diff --git a/.gitignore b/.gitignore index b952b68..08febc6 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,17 @@ tmp/ coverage/ jscpd-report/ bench/ +.uv-cache/ +.venv/ +.venv-harrier/ .claude/ CLAUDE.md RESULTS-fast-path-all-commands.md PLAN-fast-path-all-commands.md +__pycache__/ +*.pyc +.pytest_cache/ +.mypy_cache/ +.ruff_cache/ +.coverage +htmlcov/ diff --git a/claude-code/bundle/capture.js b/claude-code/bundle/capture.js index 50551da..cbdcced 100755 --- a/claude-code/bundle/capture.js +++ b/claude-code/bundle/capture.js @@ -2,13 +2,13 @@ // dist/src/utils/stdin.js function readStdin() { - return new Promise((resolve, reject) => { + return new Promise((resolve2, reject) => { let data = ""; process.stdin.setEncoding("utf-8"); process.stdin.on("data", (chunk) => data += chunk); process.stdin.on("end", () => { try { - resolve(JSON.parse(data)); + resolve2(JSON.parse(data)); } catch (err) { reject(new Error(`Failed to parse hook input: ${err}`)); } @@ -49,6 +49,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } @@ -79,6 +84,12 @@ function log(tag, msg) { function sqlStr(value) { return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); } +function sqlIdent(name) { + if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) { + throw new Error(`Invalid SQL identifier: ${JSON.stringify(name)}`); + } + return name; +} // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); @@ -96,6 +107,22 @@ function traceSql(msg) { if (debugFileLog) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -103,7 +130,7 @@ var MAX_CONCURRENCY = 5; var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); + return new Promise((resolve2) => setTimeout(resolve2, ms)); } function isTimeoutError(error) { const name = error instanceof Error ? error.name.toLowerCase() : ""; @@ -136,7 +163,7 @@ var Semaphore = class { this.active++; return; } - await new Promise((resolve) => this.waiting.push(resolve)); + await new Promise((resolve2) => this.waiting.push(resolve2)); } release() { this.active--; @@ -199,10 +226,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -225,9 +252,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -284,6 +315,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -381,24 +435,257 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); + } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); } }; -// dist/src/utils/session-path.js -function buildSessionPath(config, sessionId) { - const workspace = config.workspaceId ?? "default"; - return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${workspace}_${sessionId}.jsonl`; +// dist/src/utils/direct-run.js +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +function isDirectRun(metaUrl) { + const entry = process.argv[1]; + if (!entry) + return false; + try { + return resolve(fileURLToPath(metaUrl)) === resolve(entry); + } catch { + return false; + } } // dist/src/hooks/summary-state.js @@ -525,46 +812,75 @@ function tryAcquireLock(sessionId, maxAgeMs = 10 * 60 * 1e3) { throw e; } } -function releaseLock(sessionId) { - try { - unlinkSync(lockPath(sessionId)); - } catch (e) { - if (e?.code !== "ENOENT") { - dlog(`releaseLock unlink failed for ${sessionId}: ${e.message}`); - } - } -} // dist/src/hooks/spawn-wiki-worker.js import { spawn, execSync } from "node:child_process"; -import { fileURLToPath } from "node:url"; -import { dirname, join as join6 } from "node:path"; -import { writeFileSync as writeFileSync3, mkdirSync as mkdirSync4 } from "node:fs"; +import { fileURLToPath as fileURLToPath2 } from "node:url"; +import { dirname, join as join5 } from "node:path"; +import { writeFileSync as writeFileSync3, mkdirSync as mkdirSync3, appendFileSync as appendFileSync2 } from "node:fs"; import { homedir as homedir4, tmpdir as tmpdir2 } from "node:os"; -// dist/src/utils/wiki-log.js -import { mkdirSync as mkdirSync3, appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join5 } from "node:path"; -function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { - const path = join5(hooksDir, filename); - return { - path, - log(msg) { - try { - mkdirSync3(hooksDir, { recursive: true }); - appendFileSync2(path, `[${utcTimestamp()}] ${msg} -`); - } catch { - } - } - }; -} +// dist/src/hooks/knowledge-graph.js +import { randomUUID as randomUUID3 } from "node:crypto"; + +// dist/src/hooks/upload-summary.js +import { randomUUID as randomUUID2 } from "node:crypto"; + +// dist/src/hooks/knowledge-graph.js +var GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; + +// dist/src/hooks/memory-facts.js +import { randomUUID as randomUUID4 } from "node:crypto"; +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; // dist/src/hooks/spawn-wiki-worker.js var HOME = homedir4(); -var wikiLogger = makeWikiLogger(join6(HOME, ".claude", "hooks")); -var WIKI_LOG = wikiLogger.path; -var WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge \u2014 entities, decisions, relationships, and facts \u2014 into a structured, searchable wiki entry. Think of this as building a knowledge graph, not writing a summary. +var WIKI_LOG = join5(HOME, ".claude", "hooks", "deeplake-wiki.log"); +var WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. + +The session may be a coding session, a meeting, or a personal conversation. Your job is to turn the raw transcript into a dense, factual wiki page that preserves names, dates, relationships, preferences, plans, titles, and exact status changes. SESSION JSONL path: __JSONL__ SUMMARY FILE to write: __SUMMARY__ @@ -578,58 +894,75 @@ Steps: - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first, then focus on lines AFTER the offset for new content. Merge new facts into the existing summary. - If offset is 0, generate from scratch. + - Treat the JSONL as the source of truth. Do not invent facts. 2. Write the summary file at the path above with this EXACT format. The header fields (Source, Project) are pre-filled \u2014 copy them VERBATIM, do NOT replace them with paths from the JSONL content: # Session __SESSION_ID__ - **Source**: __JSONL_SERVER_PATH__ +- **Date**: +- **Participants**: - **Started**: - **Ended**: - **Project**: __PROJECT__ +- **Topics**: - **JSONL offset**: __JSONL_LINES__ ## What Happened -<2-3 dense sentences. What was the goal, what was accomplished, what's left.> +<2-4 dense sentences. What happened, why it mattered, and what changed. Prefer specific names/titles/dates over abstractions.> + +## Searchable Facts + ## People - + ## Entities - + ## Decisions & Reasoning - - -## Key Facts - + ## Files Modified - + ## Open Questions / TODO - + -IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. Future you will search this wiki to answer questions like "who worked on X", "why did we choose Y", "what's the status of Z". If a detail exists in the session, it should be in the wiki. +IMPORTANT: +- Be exhaustive. If a detail exists in the session and could answer a later question, it should be in the wiki. +- Favor exact nouns and titles over generic paraphrases. Preserve exact book names, organization names, file names, feature names, and self-descriptions. +- Keep facts canonical and query-friendly: "Ava is single", "Leo's home country is Brazil", "The team chose retries because the API returned 429s". +- Resolve relative dates like "last year" or "next month" against the session's own date when the source makes that possible. If it is ambiguous, keep the relative phrase instead of guessing. +- Do not omit beneficiary groups or targets of goals (for example who a project, career, or effort is meant to help). +- Do not leak absolute filesystem paths beyond the pre-filled Source field. PRIVACY: Never include absolute filesystem paths (e.g. /home/user/..., /Users/..., C:\\\\...) in the summary. Use only project-relative paths or the project name. The Source and Project fields above are already correct \u2014 do not change them. LENGTH LIMIT: Keep the total summary under 4000 characters. Be dense and concise \u2014 prioritize facts over prose. If a session is short, the summary should be short too.`; -var wikiLog = wikiLogger.log; +function wikiLog(msg) { + try { + mkdirSync3(join5(HOME, ".claude", "hooks"), { recursive: true }); + appendFileSync2(WIKI_LOG, `[${utcTimestamp()}] ${msg} +`); + } catch { + } +} function findClaudeBin() { try { return execSync("which claude 2>/dev/null", { encoding: "utf-8" }).trim(); } catch { - return join6(HOME, ".claude", "local", "claude"); + return join5(HOME, ".claude", "local", "claude"); } } function spawnWikiWorker(opts) { const { config, sessionId, cwd, bundleDir, reason } = opts; const projectName = cwd.split("/").pop() || "unknown"; - const tmpDir = join6(tmpdir2(), `deeplake-wiki-${sessionId}-${Date.now()}`); - mkdirSync4(tmpDir, { recursive: true }); - const configFile = join6(tmpDir, "config.json"); + const tmpDir = join5(tmpdir2(), `deeplake-wiki-${sessionId}-${Date.now()}`); + mkdirSync3(tmpDir, { recursive: true }); + const configFile = join5(tmpDir, "config.json"); writeFileSync3(configFile, JSON.stringify({ apiUrl: config.apiUrl, token: config.token, @@ -637,17 +970,24 @@ function spawnWikiWorker(opts) { workspaceId: config.workspaceId, memoryTable: config.tableName, sessionsTable: config.sessionsTableName, + graphNodesTable: config.graphNodesTableName, + graphEdgesTable: config.graphEdgesTableName, + factsTable: config.factsTableName, + entitiesTable: config.entitiesTableName, + factEntityLinksTable: config.factEntityLinksTableName, sessionId, userName: config.userName, project: projectName, tmpDir, claudeBin: findClaudeBin(), wikiLog: WIKI_LOG, - hooksDir: join6(HOME, ".claude", "hooks"), - promptTemplate: WIKI_PROMPT_TEMPLATE + hooksDir: join5(HOME, ".claude", "hooks"), + promptTemplate: WIKI_PROMPT_TEMPLATE, + graphPromptTemplate: GRAPH_PROMPT_TEMPLATE, + factPromptTemplate: MEMORY_FACT_PROMPT_TEMPLATE })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); - const workerPath = join6(bundleDir, "wiki-worker.js"); + const workerPath = join5(bundleDir, "wiki-worker.js"); spawn("nohup", ["node", workerPath, configFile], { detached: true, stdio: ["ignore", "ignore", "ignore"] @@ -655,24 +995,343 @@ function spawnWikiWorker(opts) { wikiLog(`${reason}: spawned summary worker for ${sessionId}`); } function bundleDirFromImportMeta(importMetaUrl) { - return dirname(fileURLToPath(importMetaUrl)); + return dirname(fileURLToPath2(importMetaUrl)); } -// dist/src/hooks/capture.js -var log3 = (msg) => log("capture", msg); -var CAPTURE = process.env.HIVEMIND_CAPTURE !== "false"; -async function main() { - if (!CAPTURE) +// dist/src/hooks/session-queue.js +import { appendFileSync as appendFileSync3, closeSync as closeSync2, existsSync as existsSync4, mkdirSync as mkdirSync4, openSync as openSync2, readFileSync as readFileSync4, readdirSync, renameSync as renameSync2, rmSync, statSync, writeFileSync as writeFileSync4 } from "node:fs"; +import { dirname as dirname2, join as join6 } from "node:path"; +import { homedir as homedir5 } from "node:os"; +var DEFAULT_QUEUE_DIR = join6(homedir5(), ".deeplake", "queue"); +var DEFAULT_MAX_BATCH_ROWS = 50; +var DEFAULT_STALE_INFLIGHT_MS = 6e4; +var DEFAULT_AUTH_FAILURE_TTL_MS = 5 * 6e4; +var BUSY_WAIT_STEP_MS = 100; +var SessionWriteDisabledError = class extends Error { + constructor(message) { + super(message); + this.name = "SessionWriteDisabledError"; + } +}; +function buildSessionPath(config, sessionId) { + return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${config.workspaceId}_${sessionId}.jsonl`; +} +function buildQueuedSessionRow(args) { + const structured = extractStructuredSessionFields(args.line, args.sessionId); + return { + id: crypto.randomUUID(), + path: args.sessionPath, + filename: args.sessionPath.split("/").pop() ?? "", + message: args.line, + sessionId: structured.sessionId, + eventType: structured.eventType, + turnIndex: structured.turnIndex, + diaId: structured.diaId, + speaker: structured.speaker, + text: structured.text, + turnSummary: structured.turnSummary, + sourceDateTime: structured.sourceDateTime, + author: args.userName, + sizeBytes: Buffer.byteLength(args.line, "utf-8"), + project: args.projectName, + description: args.description, + agent: args.agent, + creationDate: args.timestamp, + lastUpdateDate: args.timestamp + }; +} +function appendQueuedSessionRow(row, queueDir = DEFAULT_QUEUE_DIR) { + mkdirSync4(queueDir, { recursive: true }); + const sessionId = extractSessionId(row.path); + const queuePath = getQueuePath(queueDir, sessionId); + appendFileSync3(queuePath, `${JSON.stringify(row)} +`); + return queuePath; +} +function buildSessionInsertSql(sessionsTable, rows) { + if (rows.length === 0) + throw new Error("buildSessionInsertSql: rows must not be empty"); + const table = sqlIdent(sessionsTable); + const values = rows.map((row) => { + const jsonForSql = escapeJsonbLiteral(coerceJsonbPayload(row.message)); + return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.sessionId)}', '${sqlStr(row.eventType)}', ${row.turnIndex}, '${sqlStr(row.diaId)}', '${sqlStr(row.speaker)}', '${sqlStr(row.text)}', '${sqlStr(row.turnSummary)}', '${sqlStr(row.sourceDateTime)}', '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; + }).join(", "); + return `INSERT INTO "${table}" (id, path, filename, message, session_id, event_type, turn_index, dia_id, speaker, text, turn_summary, source_date_time, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; +} +function coerceJsonbPayload(message) { + try { + return JSON.stringify(JSON.parse(message)); + } catch { + return JSON.stringify({ + type: "raw_message", + content: message + }); + } +} +function escapeJsonbLiteral(value) { + return value.replace(/'/g, "''").replace(/\0/g, ""); +} +function extractString(value) { + return typeof value === "string" ? value : value == null ? "" : String(value); +} +function extractNumber(value) { + if (typeof value === "number" && Number.isFinite(value)) + return value; + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) + return parsed; + } + return 0; +} +function extractStructuredSessionFields(message, fallbackSessionId = "") { + let parsed = null; + try { + const raw = JSON.parse(message); + if (raw && typeof raw === "object") + parsed = raw; + } catch { + parsed = null; + } + if (!parsed) { + return { + sessionId: fallbackSessionId, + eventType: "raw_message", + turnIndex: 0, + diaId: "", + speaker: "", + text: message, + turnSummary: "", + sourceDateTime: "" + }; + } + const eventType = extractString(parsed["type"]); + const content = extractString(parsed["content"]); + const toolName = extractString(parsed["tool_name"]); + const speaker = extractString(parsed["speaker"]) || (eventType === "user_message" ? "user" : eventType === "assistant_message" ? "assistant" : ""); + const text = extractString(parsed["text"]) || content || (eventType === "tool_call" ? toolName : ""); + return { + sessionId: extractString(parsed["session_id"]) || fallbackSessionId, + eventType, + turnIndex: extractNumber(parsed["turn_index"]), + diaId: extractString(parsed["dia_id"]), + speaker, + text, + turnSummary: extractString(parsed["summary"]) || extractString(parsed["message_summary"]) || extractString(parsed["msg_summary"]), + sourceDateTime: extractString(parsed["source_date_time"]) || extractString(parsed["date_time"]) || extractString(parsed["date"]) + }; +} +async function flushSessionQueue(api, opts) { + const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; + const maxBatchRows = opts.maxBatchRows ?? DEFAULT_MAX_BATCH_ROWS; + const staleInflightMs = opts.staleInflightMs ?? DEFAULT_STALE_INFLIGHT_MS; + const waitIfBusyMs = opts.waitIfBusyMs ?? 0; + const drainAll = opts.drainAll ?? false; + mkdirSync4(queueDir, { recursive: true }); + const queuePath = getQueuePath(queueDir, opts.sessionId); + const inflightPath = getInflightPath(queueDir, opts.sessionId); + if (isSessionWriteDisabled(opts.sessionsTable, queueDir)) { + return existsSync4(queuePath) || existsSync4(inflightPath) ? { status: "disabled", rows: 0, batches: 0 } : { status: "empty", rows: 0, batches: 0 }; + } + let totalRows = 0; + let totalBatches = 0; + let flushedAny = false; + while (true) { + if (opts.allowStaleInflight) + recoverStaleInflight(queuePath, inflightPath, staleInflightMs); + if (existsSync4(inflightPath)) { + if (waitIfBusyMs > 0) { + await waitForInflightToClear(inflightPath, waitIfBusyMs); + if (opts.allowStaleInflight) + recoverStaleInflight(queuePath, inflightPath, staleInflightMs); + } + if (existsSync4(inflightPath)) { + return flushedAny ? { status: "flushed", rows: totalRows, batches: totalBatches } : { status: "busy", rows: 0, batches: 0 }; + } + } + if (!existsSync4(queuePath)) { + return flushedAny ? { status: "flushed", rows: totalRows, batches: totalBatches } : { status: "empty", rows: 0, batches: 0 }; + } + try { + renameSync2(queuePath, inflightPath); + } catch (e) { + if (e?.code === "ENOENT") { + return flushedAny ? { status: "flushed", rows: totalRows, batches: totalBatches } : { status: "empty", rows: 0, batches: 0 }; + } + throw e; + } + try { + const { rows, batches } = await flushInflightFile(api, opts.sessionsTable, inflightPath, maxBatchRows); + totalRows += rows; + totalBatches += batches; + flushedAny = flushedAny || rows > 0; + } catch (e) { + requeueInflight(queuePath, inflightPath); + if (e instanceof SessionWriteDisabledError) { + return { status: "disabled", rows: totalRows, batches: totalBatches }; + } + throw e; + } + if (!drainAll) { + return { status: "flushed", rows: totalRows, batches: totalBatches }; + } + } +} +function getQueuePath(queueDir, sessionId) { + return join6(queueDir, `${sessionId}.jsonl`); +} +function getInflightPath(queueDir, sessionId) { + return join6(queueDir, `${sessionId}.inflight`); +} +function extractSessionId(sessionPath) { + const filename = sessionPath.split("/").pop() ?? ""; + return filename.replace(/\.jsonl$/, "").split("_").pop() ?? filename; +} +async function flushInflightFile(api, sessionsTable, inflightPath, maxBatchRows) { + const rows = readQueuedRows(inflightPath); + if (rows.length === 0) { + rmSync(inflightPath, { force: true }); + return { rows: 0, batches: 0 }; + } + let ensured = false; + let batches = 0; + const queueDir = dirname2(inflightPath); + for (let i = 0; i < rows.length; i += maxBatchRows) { + const chunk = rows.slice(i, i + maxBatchRows); + const sql = buildSessionInsertSql(sessionsTable, chunk); + try { + await api.query(sql); + } catch (e) { + if (isSessionWriteAuthError(e)) { + markSessionWriteDisabled(sessionsTable, errorMessage(e), queueDir); + throw new SessionWriteDisabledError(errorMessage(e)); + } + if (!ensured && isEnsureSessionsTableRetryable(e)) { + try { + await api.ensureSessionsTable(sessionsTable); + } catch (ensureError) { + if (isSessionWriteAuthError(ensureError)) { + markSessionWriteDisabled(sessionsTable, errorMessage(ensureError), queueDir); + throw new SessionWriteDisabledError(errorMessage(ensureError)); + } + throw ensureError; + } + ensured = true; + try { + await api.query(sql); + } catch (retryError) { + if (isSessionWriteAuthError(retryError)) { + markSessionWriteDisabled(sessionsTable, errorMessage(retryError), queueDir); + throw new SessionWriteDisabledError(errorMessage(retryError)); + } + throw retryError; + } + } else { + throw e; + } + } + batches += 1; + } + clearSessionWriteDisabled(sessionsTable, queueDir); + rmSync(inflightPath, { force: true }); + return { rows: rows.length, batches }; +} +function readQueuedRows(path) { + const raw = readFileSync4(path, "utf-8"); + return raw.split("\n").map((line) => line.trim()).filter(Boolean).map((line) => JSON.parse(line)); +} +function requeueInflight(queuePath, inflightPath) { + if (!existsSync4(inflightPath)) return; - const input = await readStdin(); - const config = loadConfig(); - if (!config) { - log3("no config"); + const inflight = readFileSync4(inflightPath, "utf-8"); + appendFileSync3(queuePath, inflight); + rmSync(inflightPath, { force: true }); +} +function recoverStaleInflight(queuePath, inflightPath, staleInflightMs) { + if (!existsSync4(inflightPath) || !isStale(inflightPath, staleInflightMs)) return; + requeueInflight(queuePath, inflightPath); +} +function isStale(path, staleInflightMs) { + return Date.now() - statSync(path).mtimeMs >= staleInflightMs; +} +function isEnsureSessionsTableRetryable(error) { + const message = errorMessage(error).toLowerCase(); + return message.includes("does not exist") || message.includes("doesn't exist") || message.includes("relation") || message.includes("not found"); +} +function isSessionWriteAuthError(error) { + const message = errorMessage(error).toLowerCase(); + return message.includes("403") || message.includes("401") || message.includes("forbidden") || message.includes("unauthorized"); +} +function markSessionWriteDisabled(sessionsTable, reason, queueDir = DEFAULT_QUEUE_DIR) { + mkdirSync4(queueDir, { recursive: true }); + writeFileSync4(getSessionWriteDisabledPath(queueDir, sessionsTable), JSON.stringify({ + disabledAt: (/* @__PURE__ */ new Date()).toISOString(), + reason, + sessionsTable + })); +} +function clearSessionWriteDisabled(sessionsTable, queueDir = DEFAULT_QUEUE_DIR) { + rmSync(getSessionWriteDisabledPath(queueDir, sessionsTable), { force: true }); +} +function isSessionWriteDisabled(sessionsTable, queueDir = DEFAULT_QUEUE_DIR, ttlMs = DEFAULT_AUTH_FAILURE_TTL_MS) { + const path = getSessionWriteDisabledPath(queueDir, sessionsTable); + if (!existsSync4(path)) + return false; + try { + const raw = readFileSync4(path, "utf-8"); + const state = JSON.parse(raw); + const ageMs = Date.now() - new Date(state.disabledAt).getTime(); + if (Number.isNaN(ageMs) || ageMs >= ttlMs) { + rmSync(path, { force: true }); + return false; + } + return true; + } catch { + rmSync(path, { force: true }); + return false; + } +} +function getSessionWriteDisabledPath(queueDir, sessionsTable) { + return join6(queueDir, `.${sessionsTable}.disabled.json`); +} +function errorMessage(error) { + return error instanceof Error ? error.message : String(error); +} +async function waitForInflightToClear(inflightPath, waitIfBusyMs) { + const startedAt = Date.now(); + while (existsSync4(inflightPath) && Date.now() - startedAt < waitIfBusyMs) { + await sleep2(BUSY_WAIT_STEP_MS); + } +} +function sleep2(ms) { + return new Promise((resolve2) => setTimeout(resolve2, ms)); +} + +// dist/src/hooks/query-cache.js +import { mkdirSync as mkdirSync5, readFileSync as readFileSync5, rmSync as rmSync2, statSync as statSync2, writeFileSync as writeFileSync5 } from "node:fs"; +import { join as join7 } from "node:path"; +import { homedir as homedir6 } from "node:os"; +var log3 = (msg) => log("query-cache", msg); +var DEFAULT_CACHE_ROOT = join7(homedir6(), ".deeplake", "query-cache"); +var INDEX_CACHE_TTL_MS = 15 * 60 * 1e3; +function getSessionQueryCacheDir(sessionId, deps = {}) { + const { cacheRoot = DEFAULT_CACHE_ROOT } = deps; + return join7(cacheRoot, sessionId); +} +function clearSessionQueryCache(sessionId, deps = {}) { + const { logFn = log3 } = deps; + try { + rmSync2(getSessionQueryCacheDir(sessionId, deps), { recursive: true, force: true }); + } catch (e) { + logFn(`clear failed for session=${sessionId}: ${e.message}`); } - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); - const ts = (/* @__PURE__ */ new Date()).toISOString(); +} + +// dist/src/hooks/capture.js +var log4 = (msg) => log("capture", msg); +var CAPTURE = (process.env.HIVEMIND_CAPTURE ?? process.env.DEEPLAKE_CAPTURE) !== "false"; +function buildCaptureEntry(input, timestamp) { const meta = { session_id: input.session_id, transcript_path: input.transcript_path, @@ -681,20 +1340,18 @@ async function main() { hook_event_name: input.hook_event_name, agent_id: input.agent_id, agent_type: input.agent_type, - timestamp: ts + timestamp }; - let entry; if (input.prompt !== void 0) { - log3(`user session=${input.session_id}`); - entry = { + return { id: crypto.randomUUID(), ...meta, type: "user_message", content: input.prompt }; - } else if (input.tool_name !== void 0) { - log3(`tool=${input.tool_name} session=${input.session_id}`); - entry = { + } + if (input.tool_name !== void 0) { + return { id: crypto.randomUUID(), ...meta, type: "tool_call", @@ -703,75 +1360,104 @@ async function main() { tool_input: JSON.stringify(input.tool_input), tool_response: JSON.stringify(input.tool_response) }; - } else if (input.last_assistant_message !== void 0) { - log3(`assistant session=${input.session_id}`); - entry = { + } + if (input.last_assistant_message !== void 0) { + return { id: crypto.randomUUID(), ...meta, type: "assistant_message", content: input.last_assistant_message, ...input.agent_transcript_path ? { agent_transcript_path: input.agent_transcript_path } : {} }; - } else { - log3("unknown event, skipping"); - return; } - const sessionPath = buildSessionPath(config, input.session_id); - const line = JSON.stringify(entry); - log3(`writing to ${sessionPath}`); - const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; - const filename = sessionPath.split("/").pop() ?? ""; - const jsonForSql = line.replace(/'/g, "''"); - const insertSql = `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', '${sqlStr(input.hook_event_name ?? "")}', 'claude_code', '${ts}', '${ts}')`; - try { - await api.query(insertSql); - } catch (e) { - if (e.message?.includes("permission denied") || e.message?.includes("does not exist")) { - log3("table missing, creating and retrying"); - await api.ensureSessionsTable(sessionsTable); - await api.query(insertSql); - } else { - throw e; - } - } - log3("capture ok \u2192 cloud"); - maybeTriggerPeriodicSummary(input.session_id, input.cwd ?? "", config); + return null; } -function maybeTriggerPeriodicSummary(sessionId, cwd, config) { - if (process.env.HIVEMIND_WIKI_WORKER === "1") +function maybeTriggerPeriodicSummary(sessionId, cwd, config, deps = {}) { + const { bundleDir = bundleDirFromImportMeta(import.meta.url), wikiWorker = process.env.HIVEMIND_WIKI_WORKER === "1", logFn = log4, bumpTotalCountFn = bumpTotalCount, loadTriggerConfigFn = loadTriggerConfig, shouldTriggerFn = shouldTrigger, tryAcquireLockFn = tryAcquireLock, wikiLogFn = wikiLog, spawnWikiWorkerFn = spawnWikiWorker } = deps; + if (wikiWorker) return; try { - const state = bumpTotalCount(sessionId); - const cfg = loadTriggerConfig(); - if (!shouldTrigger(state, cfg)) + const state = bumpTotalCountFn(sessionId); + const cfg = loadTriggerConfigFn(); + if (!shouldTriggerFn(state, cfg)) return; - if (!tryAcquireLock(sessionId)) { - log3(`periodic trigger suppressed (lock held) session=${sessionId}`); + if (!tryAcquireLockFn(sessionId)) { + logFn(`periodic trigger suppressed (lock held) session=${sessionId}`); return; } - wikiLog(`Periodic: threshold hit (total=${state.totalCount}, since=${state.totalCount - state.lastSummaryCount}, N=${cfg.everyNMessages}, hours=${cfg.everyHours})`); - try { - spawnWikiWorker({ - config, - sessionId, - cwd, - bundleDir: bundleDirFromImportMeta(import.meta.url), - reason: "Periodic" - }); - } catch (e) { - log3(`periodic spawn failed: ${e.message}`); - try { - releaseLock(sessionId); - } catch (releaseErr) { - log3(`releaseLock after periodic spawn failure also failed: ${releaseErr.message}`); - } - throw e; - } + wikiLogFn(`Periodic: threshold hit (total=${state.totalCount}, since=${state.totalCount - state.lastSummaryCount}, N=${cfg.everyNMessages}, hours=${cfg.everyHours})`); + spawnWikiWorkerFn({ + config, + sessionId, + cwd, + bundleDir, + reason: "Periodic" + }); } catch (e) { - log3(`periodic trigger error: ${e.message}`); + logFn(`periodic trigger error: ${e.message}`); } } -main().catch((e) => { - log3(`fatal: ${e.message}`); - process.exit(0); -}); +async function runCaptureHook(input, deps = {}) { + const { captureEnabled = CAPTURE, config = loadConfig(), now = () => (/* @__PURE__ */ new Date()).toISOString(), createApi = (activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, activeConfig.sessionsTableName), appendQueuedSessionRowFn = appendQueuedSessionRow, buildQueuedSessionRowFn = buildQueuedSessionRow, flushSessionQueueFn = flushSessionQueue, clearSessionQueryCacheFn = clearSessionQueryCache, maybeTriggerPeriodicSummaryFn = maybeTriggerPeriodicSummary, logFn = log4 } = deps; + if (!captureEnabled) + return { status: "disabled" }; + if (!config) { + logFn("no config"); + return { status: "no_config" }; + } + const ts = now(); + const entry = buildCaptureEntry(input, ts); + if (!entry) { + logFn("unknown event, skipping"); + return { status: "ignored" }; + } + if (input.prompt !== void 0) + logFn(`user session=${input.session_id}`); + else if (input.tool_name !== void 0) + logFn(`tool=${input.tool_name} session=${input.session_id}`); + else + logFn(`assistant session=${input.session_id}`); + if (input.hook_event_name === "UserPromptSubmit") { + clearSessionQueryCacheFn(input.session_id); + } + const sessionPath = buildSessionPath(config, input.session_id); + const line = JSON.stringify(entry); + const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; + appendQueuedSessionRowFn(buildQueuedSessionRowFn({ + sessionPath, + line, + sessionId: input.session_id, + userName: config.userName, + projectName, + description: input.hook_event_name ?? "", + agent: "claude_code", + timestamp: ts + })); + logFn(`queued ${input.hook_event_name ?? "event"} for ${sessionPath}`); + maybeTriggerPeriodicSummaryFn(input.session_id, input.cwd ?? "", config); + if (input.hook_event_name === "Stop" || input.hook_event_name === "SubagentStop") { + const result = await flushSessionQueueFn(createApi(config), { + sessionId: input.session_id, + sessionsTable: config.sessionsTableName, + drainAll: true + }); + logFn(`flush ${result.status}: rows=${result.rows} batches=${result.batches}`); + return { status: "queued", entry, flushStatus: result.status }; + } + return { status: "queued", entry }; +} +async function main() { + const input = await readStdin(); + await runCaptureHook(input); +} +if (isDirectRun(import.meta.url)) { + main().catch((e) => { + log4(`fatal: ${e.message}`); + process.exit(0); + }); +} +export { + buildCaptureEntry, + maybeTriggerPeriodicSummary, + runCaptureHook +}; diff --git a/claude-code/bundle/commands/auth-login.js b/claude-code/bundle/commands/auth-login.js index 064f11e..8ecda06 100755 --- a/claude-code/bundle/commands/auth-login.js +++ b/claude-code/bundle/commands/auth-login.js @@ -233,6 +233,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join2(home, ".deeplake", "memory") }; } @@ -277,6 +282,22 @@ function traceSql(msg) { if (debugFileLog) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -380,10 +401,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -406,9 +427,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -465,6 +490,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -562,17 +610,242 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); + } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); } }; diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index 5076674..a51a6f8 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -2,8 +2,8 @@ // dist/src/hooks/pre-tool-use.js import { existsSync as existsSync3, mkdirSync as mkdirSync3, writeFileSync as writeFileSync3 } from "node:fs"; -import { homedir as homedir5 } from "node:os"; import { join as join6, dirname, sep } from "node:path"; +import { homedir as homedir5 } from "node:os"; import { fileURLToPath as fileURLToPath2 } from "node:url"; // dist/src/utils/stdin.js @@ -38,12 +38,12 @@ function loadConfig() { return null; } } - const env = process.env; - if (!env.HIVEMIND_TOKEN && env.DEEPLAKE_TOKEN) { + const env2 = process.env; + if (!env2.HIVEMIND_TOKEN && env2.DEEPLAKE_TOKEN) { process.stderr.write("[hivemind] DEEPLAKE_* env vars are deprecated; use HIVEMIND_* instead\n"); } - const token = env.HIVEMIND_TOKEN ?? env.DEEPLAKE_TOKEN ?? creds?.token; - const orgId = env.HIVEMIND_ORG_ID ?? env.DEEPLAKE_ORG_ID ?? creds?.orgId; + const token = env2.HIVEMIND_TOKEN ?? env2.DEEPLAKE_TOKEN ?? creds?.token; + const orgId = env2.HIVEMIND_ORG_ID ?? env2.DEEPLAKE_ORG_ID ?? creds?.orgId; if (!token || !orgId) return null; return { @@ -51,11 +51,16 @@ function loadConfig() { orgId, orgName: creds?.orgName ?? orgId, userName: creds?.userName || userInfo().username || "unknown", - workspaceId: env.HIVEMIND_WORKSPACE_ID ?? env.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", - apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", - tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", - sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", - memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") + workspaceId: env2.HIVEMIND_WORKSPACE_ID ?? env2.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", + apiUrl: env2.HIVEMIND_API_URL ?? env2.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", + tableName: env2.HIVEMIND_TABLE ?? env2.DEEPLAKE_TABLE ?? "memory", + sessionsTableName: env2.HIVEMIND_SESSIONS_TABLE ?? env2.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env2.HIVEMIND_GRAPH_NODES_TABLE ?? env2.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env2.HIVEMIND_GRAPH_EDGES_TABLE ?? env2.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env2.HIVEMIND_FACTS_TABLE ?? env2.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env2.HIVEMIND_ENTITIES_TABLE ?? env2.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env2.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env2.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", + memoryPath: env2.HIVEMIND_MEMORY_PATH ?? env2.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } @@ -102,6 +107,22 @@ function traceSql(msg) { if (debugFileLog) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -205,10 +226,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -231,9 +252,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -290,6 +315,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -387,17 +435,242 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); + } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); } }; @@ -415,7 +688,309 @@ function isDirectRun(metaUrl) { } } +// dist/src/embeddings/harrier.js +import { AutoModel, AutoTokenizer, LogLevel, env } from "@huggingface/transformers"; +var DEFAULT_MODEL_ID = "onnx-community/harrier-oss-v1-0.6b-ONNX"; +var DEFAULT_DOCUMENT_BATCH_SIZE = 8; +var DEFAULT_MAX_LENGTH = 32768; +function toNumber(value) { + return typeof value === "bigint" ? Number(value) : Number(value ?? 0); +} +function tensorToRows(tensor) { + const [batchSize, width] = tensor.dims; + const rows = []; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + const offset = batchIndex * width; + const row = []; + for (let hiddenIndex = 0; hiddenIndex < width; hiddenIndex++) { + row.push(Number(tensor.data[offset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function l2Normalize(rows) { + return rows.map((row) => { + let sumSquares = 0; + for (const value of row) + sumSquares += value * value; + const norm = Math.sqrt(sumSquares) || 1; + return row.map((value) => value / norm); + }); +} +function lastTokenPool(outputs, attentionMask) { + const [batchSize, sequenceLength, hiddenSize] = outputs.dims; + const rows = []; + const maskData = attentionMask.data; + const hiddenData = outputs.data; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + let lastTokenIndex = sequenceLength - 1; + for (let tokenIndex = sequenceLength - 1; tokenIndex >= 0; tokenIndex--) { + const maskOffset = batchIndex * sequenceLength + tokenIndex; + if (toNumber(maskData[maskOffset]) > 0) { + lastTokenIndex = tokenIndex; + break; + } + } + const row = []; + const hiddenOffset = (batchIndex * sequenceLength + lastTokenIndex) * hiddenSize; + for (let hiddenIndex = 0; hiddenIndex < hiddenSize; hiddenIndex++) { + row.push(Number(hiddenData[hiddenOffset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function formatQuery(task, query) { + return `Instruct: ${task} +Query: ${query}`; +} +var HarrierEmbedder = class { + modelId; + tokenizerPromise = null; + modelPromise = null; + options; + constructor(options = {}) { + this.modelId = options.modelId ?? DEFAULT_MODEL_ID; + this.options = { + ...options, + maxLength: options.maxLength ?? DEFAULT_MAX_LENGTH, + batchSize: options.batchSize ?? DEFAULT_DOCUMENT_BATCH_SIZE + }; + if (options.cacheDir) + env.cacheDir = options.cacheDir; + if (options.localModelPath) + env.localModelPath = options.localModelPath; + env.logLevel = LogLevel.ERROR; + } + async embedDocuments(texts) { + return this.embedInternal(texts); + } + async embedQueries(texts, options = {}) { + const task = options.task ?? "Given a user query, retrieve relevant memory rows and session events"; + return this.embedInternal(texts.map((text) => formatQuery(task, text))); + } + async load() { + if (!this.tokenizerPromise) { + this.tokenizerPromise = AutoTokenizer.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly + }); + } + if (!this.modelPromise) { + this.modelPromise = AutoModel.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly, + device: this.options.device ?? "cpu", + dtype: this.options.dtype + }); + } + const [tokenizer, model] = await Promise.all([this.tokenizerPromise, this.modelPromise]); + return { tokenizer, model }; + } + async embedInternal(texts) { + if (texts.length === 0) + return []; + const { tokenizer, model } = await this.load(); + const rows = []; + for (let start = 0; start < texts.length; start += this.options.batchSize) { + const batch = texts.slice(start, start + this.options.batchSize); + const inputs = tokenizer(batch, { + padding: true, + truncation: true, + max_length: this.options.maxLength + }); + const outputs = await model(inputs); + const sentenceEmbedding = outputs["sentence_embedding"]; + if (sentenceEmbedding && typeof sentenceEmbedding === "object" && sentenceEmbedding !== null) { + rows.push(...l2Normalize(tensorToRows(sentenceEmbedding))); + continue; + } + const lastHiddenState = outputs["last_hidden_state"]; + const attentionMask = inputs["attention_mask"]; + if (!lastHiddenState || typeof lastHiddenState !== "object" || !attentionMask || typeof attentionMask !== "object") { + throw new Error(`Harrier model "${this.modelId}" did not return a usable embedding tensor`); + } + rows.push(...l2Normalize(lastTokenPool(lastHiddenState, attentionMask))); + } + return rows; + } +}; + +// dist/src/utils/hybrid-fusion.js +function coerceFinite(value) { + return Number.isFinite(value) ? value : 0; +} +function normalizeWeights(vectorWeight, textWeight) { + const safeVector = Math.max(0, coerceFinite(vectorWeight)); + const safeText = Math.max(0, coerceFinite(textWeight)); + const total = safeVector + safeText; + if (total <= 0) + return { vectorWeight: 0.5, textWeight: 0.5 }; + return { + vectorWeight: safeVector / total, + textWeight: safeText / total + }; +} +function softmaxNormalizeScores(scores) { + if (scores.length === 0) + return []; + const safeScores = scores.map(coerceFinite); + const maxScore = Math.max(...safeScores); + const exps = safeScores.map((score) => Math.exp(score - maxScore)); + const sum = exps.reduce((acc, value) => acc + value, 0) || 1; + return exps.map((value) => value / sum); +} +function pickPreferredRow(existing, candidate) { + if (!existing) + return candidate; + if (candidate.score > existing.score) + return candidate; + if (candidate.score < existing.score) + return existing; + if (candidate.sourceOrder < existing.sourceOrder) + return candidate; + if (candidate.sourceOrder > existing.sourceOrder) + return existing; + if (candidate.creationDate < existing.creationDate) + return candidate; + if (candidate.creationDate > existing.creationDate) + return existing; + return candidate.path < existing.path ? candidate : existing; +} +function dedupeBestRows(rows) { + const bestByPath = /* @__PURE__ */ new Map(); + for (const row of rows) { + if (!row.path) + continue; + bestByPath.set(row.path, pickPreferredRow(bestByPath.get(row.path), row)); + } + return [...bestByPath.values()]; +} +function fuseRetrievalRows(args) { + const { textRows, vectorRows, limit } = args; + const { textWeight, vectorWeight } = normalizeWeights(args.vectorWeight, args.textWeight); + const dedupedTextRows = dedupeBestRows(textRows); + const dedupedVectorRows = dedupeBestRows(vectorRows); + const textNorm = softmaxNormalizeScores(dedupedTextRows.map((row) => row.score)); + const vectorNorm = softmaxNormalizeScores(dedupedVectorRows.map((row) => row.score)); + const fusedByPath = /* @__PURE__ */ new Map(); + for (let i = 0; i < dedupedTextRows.length; i++) { + const row = dedupedTextRows[i]; + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: textNorm[i] ?? 0, + vectorScore: 0, + fusedScore: textWeight * (textNorm[i] ?? 0) + }); + } + for (let i = 0; i < dedupedVectorRows.length; i++) { + const row = dedupedVectorRows[i]; + const existing = fusedByPath.get(row.path); + const vectorScore = vectorNorm[i] ?? 0; + if (existing) { + if (existing.content.length === 0 && row.content.length > 0) + existing.content = row.content; + existing.sourceOrder = Math.min(existing.sourceOrder, row.sourceOrder); + if (!existing.creationDate || row.creationDate < existing.creationDate) + existing.creationDate = row.creationDate; + existing.vectorScore = vectorScore; + existing.fusedScore = textWeight * existing.textScore + vectorWeight * existing.vectorScore; + continue; + } + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: 0, + vectorScore, + fusedScore: vectorWeight * vectorScore + }); + } + return [...fusedByPath.values()].sort((a, b) => b.fusedScore - a.fusedScore || b.vectorScore - a.vectorScore || b.textScore - a.textScore || a.sourceOrder - b.sourceOrder || a.creationDate.localeCompare(b.creationDate) || a.path.localeCompare(b.path)).slice(0, Math.max(0, limit)); +} + +// dist/src/utils/retrieval-mode.js +function isSessionsOnlyMode() { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function getGrepRetrievalMode() { + const raw = (process.env["HIVEMIND_GREP_RETRIEVAL_MODE"] ?? process.env["DEEPLAKE_GREP_RETRIEVAL_MODE"] ?? "").trim().toLowerCase(); + if (raw === "embedding" || raw === "hybrid") + return raw; + return "classic"; +} +function isIndexDisabled() { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isSummaryBm25Disabled() { + const raw = process.env["HIVEMIND_DISABLE_SUMMARY_BM25"] ?? process.env["DEEPLAKE_DISABLE_SUMMARY_BM25"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isFactsSessionsOnlyPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_FACTS_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_PSQL_FACTS_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + // dist/src/shell/grep-core.js +var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); +var DEFAULT_EMBED_RETRIEVAL_MODEL_ID = "onnx-community/harrier-oss-v1-270m-ONNX"; +var DEFAULT_HYBRID_VECTOR_WEIGHT = 0.7; +var DEFAULT_HYBRID_TEXT_WEIGHT = 0.3; +var retrievalEmbedder = null; +function envString(...names) { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) + return value; + } + return void 0; +} +function envFlag(...names) { + const raw = envString(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} +function envNumber(fallback, ...names) { + const raw = envString(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} +function getRetrievalEmbedder() { + if (!retrievalEmbedder) { + retrievalEmbedder = new HarrierEmbedder({ + modelId: envString("HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", "HIVEMIND_HARRIER_MODEL_ID", "DEEPLAKE_HARRIER_MODEL_ID") ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID, + device: envString("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY") + }); + } + return retrievalEmbedder; +} +function sqlFloat4Array(values) { + if (values.length === 0) + throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) + throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} +function escapeRegexLiteral(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function normalizeGrepRegexPattern(pattern) { + return pattern.replace(/\\([|(){}+?])/g, "$1").replace(/\\/g, "\\b"); +} var TOOL_INPUT_FIELDS = [ "command", "file_path", @@ -578,24 +1153,9 @@ function normalizeContent(path, raw) { } catch { return raw; } - if (Array.isArray(obj.turns)) { - const header = []; - if (obj.date_time) - header.push(`date: ${obj.date_time}`); - if (obj.speakers) { - const s = obj.speakers; - const names = [s.speaker_a, s.speaker_b].filter(Boolean).join(", "); - if (names) - header.push(`speakers: ${names}`); - } - const lines = obj.turns.map((t) => { - const sp = String(t?.speaker ?? t?.name ?? "?").trim(); - const tx = String(t?.text ?? t?.content ?? "").replace(/\s+/g, " ").trim(); - const tag = t?.dia_id ? `[${t.dia_id}] ` : ""; - return `${tag}${sp}: ${tx}`; - }); - const out2 = [...header, ...lines].join("\n"); - return out2.trim() ? out2 : raw; + if (Array.isArray(obj.turns) || Array.isArray(obj.dialogue)) { + return `${JSON.stringify(obj, null, 2)} +`; } const stripRecalled = (t) => { const i = t.indexOf(""); @@ -639,14 +1199,70 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; - const limit = opts.limit ?? 100; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, queryText, bm25QueryText } = opts; + const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; - const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); - const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); - const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; - const rows = await api.query(`SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`); + const ignoreCase = likeOp === "ILIKE"; + const likeMemFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const likeSessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const regexMemFilter = regexPattern ? buildRegexFilter("summary::text", regexPattern, ignoreCase) : ""; + const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; + const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; + const fallbackSessFilter = likeSessFilter; + const sessionsOnly = isSessionsOnlyMode(); + const retrievalMode = getGrepRetrievalMode(); + const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const lexicalQueryText = (bm25QueryText ?? semanticQueryText).trim(); + const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; + const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; + const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const ensureSummaryBm25Index = api.ensureSummaryBm25Index; + if ((useSummaryBm25 || useHybridRetrieval && !sessionsOnly && lexicalQueryText.length > 0) && typeof ensureSummaryBm25Index === "function") { + await ensureSummaryBm25Index.call(api, memoryTable).catch(() => { + }); + } + const buildCombinedQuery = (memFilter, sessFilter, useBm25Summary = false) => { + const memQuery = useBm25Summary ? buildSummaryBm25Query(memoryTable, pathFilter, bm25QueryText ?? "", limit) : `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; + if (useEmbeddingRetrieval || useHybridRetrieval) { + const embedder = getRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([semanticQueryText]); + if (!queryEmbedding) + throw new Error("Failed to build query embedding"); + const queryVectorSql = sqlFloat4Array(queryEmbedding); + const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); + const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); + const vectorQuery = buildScoredCombinedQuery(sessionsOnly, buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit), buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit), limit); + if (!useHybridRetrieval) { + const rows2 = await api.query(vectorQuery); + return rows2.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); + } + const lexicalQuery = buildScoredCombinedQuery(sessionsOnly, buildBm25SimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildBm25SimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const lexicalFallbackQuery = buildScoredCombinedQuery(sessionsOnly, buildHeuristicLexicalQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildHeuristicLexicalQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorQuery), + api.query(lexicalQuery).catch(() => api.query(lexicalFallbackQuery)) + ]); + return fuseRetrievalRows({ + textRows: mapScoredRows(textRows), + vectorRows: mapScoredRows(vectorRows), + textWeight, + vectorWeight, + limit + }).map((row) => ({ + path: row.path, + content: row.content + })); + } + const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; + const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); + const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); + const rows = useSummaryBm25 ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); return rows.map((row) => ({ path: String(row["path"]), content: String(row["content"] ?? "") @@ -667,6 +1283,10 @@ function extractRegexLiteralPrefilter(pattern) { const next = pattern[i + 1]; if (!next) return null; + if (/[bByYmM<>]/.test(next)) { + i++; + continue; + } if (/[dDsSwWbBAZzGkKpP]/.test(next)) return null; current += next; @@ -693,13 +1313,14 @@ function extractRegexLiteralPrefilter(pattern) { return literal.length >= 2 ? literal : null; } function extractRegexAlternationPrefilters(pattern) { - if (!pattern.includes("|")) + const unwrapped = unwrapWholeRegexGroup(pattern); + if (!unwrapped.includes("|")) return null; const parts = []; let current = ""; let escaped = false; - for (let i = 0; i < pattern.length; i++) { - const ch = pattern[i]; + for (let i = 0; i < unwrapped.length; i++) { + const ch = unwrapped[i]; if (escaped) { current += `\\${ch}`; escaped = false; @@ -727,33 +1348,201 @@ function extractRegexAlternationPrefilters(pattern) { return literals.length > 0 ? literals : null; } function buildGrepSearchOptions(params, targetPath) { - const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); - const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; - const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(normalizedPattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; + const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const queryText = (bm25QueryText ?? normalizedPattern.trim()) || void 0; + const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; + const sqlRegexPattern = params.wordMatch ? `\\b(?:${regexBase})\\b` : hasRegexMeta ? regexBase : void 0; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), + regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + queryText, + bm25QueryText: bm25QueryText ?? void 0, + limit: DEFAULT_GREP_CANDIDATE_LIMIT }; } +function buildSummaryBm25QueryText(pattern, fixedString, literalPrefilter, alternationPrefilters) { + const rawTokens = alternationPrefilters && alternationPrefilters.length > 0 ? alternationPrefilters : literalPrefilter ? [literalPrefilter] : [pattern]; + const cleaned = [...new Set(rawTokens.flatMap((token) => token.replace(/\\b/g, " ").replace(/[.*+?^${}()[\]{}|\\]/g, " ").split(/\s+/)).map((token) => token.trim()).filter((token) => token.length >= 2))]; + if (cleaned.length === 0) { + return fixedString && pattern.trim().length >= 2 ? pattern.trim() : null; + } + return cleaned.join(" "); +} function buildContentFilter(column, likeOp, patterns) { + const predicate = buildContentPredicate(column, likeOp, patterns); + return predicate ? ` AND ${predicate}` : ""; +} +function buildRegexFilter(column, pattern, ignoreCase) { + const predicate = buildRegexPredicate(column, pattern, ignoreCase); + return predicate ? ` AND ${predicate}` : ""; +} +function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { + return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; +} +function buildEmbeddingSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (embedding <#> ${queryVectorSql}) AS score FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY score DESC LIMIT ${limit}`; +} +function buildBm25SimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${contentExpr} <#> '${sqlStr(queryText)}') AS score FROM "${tableName}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT ${limit}`; +} +function buildHeuristicLexicalQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + const terms = [...new Set(queryText.split(/\s+/).map((term) => term.trim()).filter((term) => term.length >= 2))].slice(0, 8); + const clauses = terms.map((term) => `${contentExpr} ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...terms.map((term) => `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(queryText)}%' THEN ${Math.max(1, Math.min(terms.length, 4))} ELSE 0 END` + ]; + const scoreExpr = scoreTerms.join(" + "); + const where = clauses.length > 0 ? ` AND (${clauses.join(" OR ")})` : ""; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${scoreExpr})::float AS score FROM "${tableName}" WHERE 1=1${pathFilter}${where} ORDER BY score DESC LIMIT ${limit}`; +} +function buildScoredCombinedQuery(sessionsOnly, memQuery, sessQuery, limit) { + return sessionsOnly ? `SELECT path, content, source_order, creation_date, score FROM (${sessQuery}) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}` : `SELECT path, content, source_order, creation_date, score FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}`; +} +function mapScoredRows(rows) { + return rows.map((row) => ({ + path: String(row["path"] ?? ""), + content: String(row["content"] ?? ""), + sourceOrder: Number(row["source_order"] ?? 0), + creationDate: String(row["creation_date"] ?? ""), + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0 + })); +} +function toSqlRegexPattern(pattern, _ignoreCase) { + if (!pattern) + return null; + try { + new RegExp(pattern); + return translateRegexPatternToSql(pattern); + } catch { + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + } +} +function unwrapWholeRegexGroup(pattern) { + if (!pattern.startsWith("(") || !pattern.endsWith(")")) + return pattern; + let depth = 0; + let escaped = false; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (escaped) { + escaped = false; + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "(") + depth++; + if (ch === ")") { + depth--; + if (depth === 0 && i !== pattern.length - 1) + return pattern; + } + } + if (depth !== 0) + return pattern; + if (pattern.startsWith("(?:")) + return pattern.slice(3, -1); + return pattern.slice(1, -1); +} +function translateRegexPatternToSql(pattern) { + let out = ""; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (ch === "\\") { + const next = pattern[i + 1]; + if (!next) + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + i++; + switch (next) { + case "d": + out += "[[:digit:]]"; + continue; + case "D": + out += "[^[:digit:]]"; + continue; + case "s": + out += "[[:space:]]"; + continue; + case "S": + out += "[^[:space:]]"; + continue; + case "w": + out += "[[:alnum:]_]"; + continue; + case "W": + out += "[^[:alnum:]_]"; + continue; + case "b": + out += "\\y"; + continue; + case "A": + case "B": + case "G": + case "K": + case "P": + case "p": + case "z": + return null; + default: + out += `\\${next}`; + continue; + } + } + if (ch === "(" && pattern.startsWith("(?:", i)) { + out += "("; + i += 2; + continue; + } + if (ch === "(" && /^[(]\?<[^>]+>/.test(pattern.slice(i))) { + const named = pattern.slice(i).match(/^\(\?<[^>]+>/); + if (!named) + return null; + out += "("; + i += named[0].length - 1; + continue; + } + if (ch === "(" && pattern[i + 1] === "?") + return null; + out += ch; + } + return out; +} +function buildContentPredicate(column, likeOp, patterns) { if (patterns.length === 0) return ""; if (patterns.length === 1) - return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; - return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; + return `${column} ${likeOp} '%${patterns[0]}%'`; + return `(${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; +} +function buildRegexPredicate(column, pattern, ignoreCase) { + if (!pattern) + return ""; + const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); + if (!sqlPattern) + return ""; + return `${column} ${ignoreCase ? "~*" : "~"} '${sqlStr(sqlPattern)}'`; } function compileGrepRegex(params) { - let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + let reStr = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; if (params.wordMatch) - reStr = `\\b${reStr}\\b`; + reStr = `\\b(?:${reStr})\\b`; try { return new RegExp(reStr, params.ignoreCase ? "i" : ""); } catch { - return new RegExp(params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), params.ignoreCase ? "i" : ""); + return new RegExp(escapeRegexLiteral(normalizedPattern), params.ignoreCase ? "i" : ""); } } function refineGrepMatches(rows, params, forceMultiFilePrefix) { @@ -787,12 +1576,12 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) { } return output; } -async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath) { +async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath, forceMultiFilePrefix) { const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, buildGrepSearchOptions(params, targetPath)); const seen = /* @__PURE__ */ new Set(); const unique = rows.filter((r) => seen.has(r.path) ? false : (seen.add(r.path), true)); const normalized = unique.map((r) => ({ path: r.path, content: normalizeContent(r.path, r.content) })); - return refineGrepMatches(normalized, params); + return refineGrepMatches(normalized, params, forceMultiFilePrefix); } // dist/src/utils/output-cap.js @@ -918,7 +1707,7 @@ function parseBashGrep(cmd) { const tokens = tokenizeGrepStage(first); if (!tokens || tokens.length === 0) return null; - let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; + let recursive = false, ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; const explicitPatterns = []; let ti = 1; while (ti < tokens.length) { @@ -1010,6 +1799,8 @@ function parseBashGrep(cmd) { break; case "r": case "R": + recursive = true; + break; case "E": break; case "A": @@ -1051,6 +1842,7 @@ function parseBashGrep(cmd) { return { pattern, targetPath: target, + recursive, ignoreCase, wordMatch, filesOnly, @@ -1073,44 +1865,241 @@ async function handleGrepDirect(api, table, sessionsTable, params) { invertMatch: params.invertMatch, fixedString: params.fixedString }; - const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath); + const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath, params.recursive ? true : void 0); const joined = output.join("\n") || "(no matches)"; return capOutputForClaude(joined, { kind: "grep" }); } +// dist/src/utils/summary-format.js +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function basename(path) { + const trimmed = path.replace(/\/+$/, ""); + const idx = trimmed.lastIndexOf("/"); + return idx === -1 ? trimmed : trimmed.slice(idx + 1); +} +function extractSection(text, heading) { + const re = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function extractHeaderField(text, field) { + const re = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function compactText(value) { + return value.replace(/\s+/g, " ").trim(); +} +function splitMetadataList(value) { + if (!value) + return []; + return [...new Set(value.split(/\s*(?:,|;|&|\band\b)\s*/i).map((part) => compactText(part)).filter((part) => part.length >= 2 && !/^unknown$/i.test(part)))]; +} +function extractBullets(section, limit = 3) { + if (!section) + return []; + return section.split("\n").map((line) => line.trim()).filter((line) => line.startsWith("- ")).map((line) => compactText(line.slice(2))).filter(Boolean).slice(0, limit); +} +function extractSummaryDate(text) { + return extractHeaderField(text, "Date") ?? extractHeaderField(text, "Started"); +} +function extractSummaryParticipants(text) { + return extractHeaderField(text, "Participants") ?? extractHeaderField(text, "Speakers"); +} +function extractSummaryTopics(text) { + return extractHeaderField(text, "Topics"); +} +function extractSummarySource(text) { + return extractHeaderField(text, "Source"); +} +function buildSummaryBlurb(text) { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + const parts = []; + if (participants) + parts.push(participants); + if (topics) + parts.push(topics); + if (keyBullets.length > 0) + parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) + parts.push(whatHappened); + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} +function truncate(value, max) { + return value.length > max ? `${value.slice(0, max - 1).trimEnd()}\u2026` : value; +} +function formatIndexTimestamp(value) { + if (!value) + return ""; + if (!/^\d{4}-\d{2}-\d{2}T/.test(value)) + return value; + const parsed = Date.parse(value); + if (!Number.isFinite(parsed)) + return value; + const ts = new Date(parsed); + const yyyy = ts.getUTCFullYear(); + const mm = String(ts.getUTCMonth() + 1).padStart(2, "0"); + const dd = String(ts.getUTCDate()).padStart(2, "0"); + const hh = String(ts.getUTCHours()).padStart(2, "0"); + const min = String(ts.getUTCMinutes()).padStart(2, "0"); + return `${yyyy}-${mm}-${dd} ${hh}:${min} UTC`; +} +function buildSummaryIndexEntry(row) { + const path = typeof row.path === "string" ? row.path : ""; + if (!path) + return null; + if (path.startsWith("/summaries/") && !/^\/summaries\/[^/]+\/[^/]+$/.test(path)) + return null; + const summary = typeof row.summary === "string" ? row.summary : ""; + const project = typeof row.project === "string" ? row.project.trim() : ""; + const description = typeof row.description === "string" ? compactText(row.description) : ""; + const creationDate = typeof row.creation_date === "string" ? row.creation_date : ""; + const lastUpdateDate = typeof row.last_update_date === "string" ? row.last_update_date : ""; + const label = basename(path) || path; + const date = summary ? extractSummaryDate(summary) ?? creationDate : creationDate; + const participantsText = summary ? extractSummaryParticipants(summary) ?? "" : ""; + const topicsText = summary ? extractSummaryTopics(summary) ?? "" : ""; + const source = summary ? extractSummarySource(summary) ?? "" : ""; + const structuredBlurb = summary ? buildSummaryBlurb(summary) : ""; + const blurb = structuredBlurb && structuredBlurb !== "completed" ? structuredBlurb : truncate(description, 220); + return { + path, + label, + project, + description, + date, + createdAt: creationDate, + updatedAt: lastUpdateDate, + sortDate: lastUpdateDate || creationDate || date, + participantsText, + participants: splitMetadataList(participantsText), + topicsText, + topics: splitMetadataList(topicsText), + source, + blurb + }; +} +function formatSummaryIndexEntry(entry) { + const parts = [`- [summary: ${entry.label}](${entry.path})`]; + if (entry.source) + parts.push(`[session](${entry.source})`); + if (entry.date) + parts.push(truncate(entry.date, 40)); + const visibleTime = entry.updatedAt || entry.createdAt; + if (visibleTime) + parts.push(`updated: ${truncate(formatIndexTimestamp(visibleTime), 24)}`); + if (entry.participantsText) + parts.push(truncate(entry.participantsText, 80)); + if (entry.topicsText) + parts.push(`topics: ${truncate(entry.topicsText, 90)}`); + if (entry.project) + parts.push(`[${truncate(entry.project, 40)}]`); + if (entry.blurb && entry.blurb !== "completed") + parts.push(truncate(entry.blurb, 220)); + return parts.join(" \u2014 "); +} +function buildSummaryIndexLine(row) { + const entry = "label" in row && typeof row.label === "string" ? row : buildSummaryIndexEntry(row); + return entry ? formatSummaryIndexEntry(entry) : null; +} + // dist/src/hooks/virtual-table-query.js function normalizeSessionPart(path, content) { return normalizeContent(path, content); } -function buildVirtualIndexContent(summaryRows, sessionRows = []) { - const total = summaryRows.length + sessionRows.length; +function buildVirtualIndexContent(rows) { + const entries = rows.map((row) => buildSummaryIndexEntry(row)).filter((entry) => entry !== null).sort((a, b) => (b.sortDate || "").localeCompare(a.sortDate || "") || a.path.localeCompare(b.path)); const lines = [ "# Memory Index", "", - `${total} entries (${summaryRows.length} summaries, ${sessionRows.length} sessions):`, + "Persistent wiki directory. Start here, open the linked summary first, then open the paired raw session if you need exact wording or temporal grounding.", + "", + "## How To Use", + "", + "- Use the People section when the question names a person.", + "- In the catalog, each row links to both the summary page and its source session.", + "- Once you have a likely match, open that exact summary or session instead of broadening into wide grep scans.", "" ]; - if (summaryRows.length > 0) { - lines.push("## Summaries", ""); - for (const row of summaryRows) { - const path = row["path"]; - const project = row["project"] || ""; - const description = (row["description"] || "").slice(0, 120); - const date = (row["creation_date"] || "").slice(0, 10); - lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); - } + const peopleLines = buildPeopleDirectory(entries); + if (peopleLines.length > 0) { + lines.push("## People"); + lines.push(""); + lines.push(...peopleLines); lines.push(""); } - if (sessionRows.length > 0) { - lines.push("## Sessions", ""); - for (const row of sessionRows) { - const path = row["path"]; - const description = (row["description"] || "").slice(0, 120); - lines.push(`- [${path}](${path}) ${description}`); - } + const projectLines = buildProjectDirectory(entries); + if (projectLines.length > 0) { + lines.push("## Projects"); + lines.push(""); + lines.push(...projectLines); + lines.push(""); + } + lines.push("## Summary To Session Catalog"); + lines.push(""); + for (const entry of entries) { + const line = buildSummaryIndexLine(entry); + if (line) + lines.push(line); } return lines.join("\n"); } +function formatEntryLink(entry) { + const session = entry.source ? ` -> [session](${entry.source})` : ""; + return `[${entry.label}](${entry.path})${session}`; +} +function topList(counts, limit) { + return [...counts.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])).slice(0, limit).map(([value]) => value); +} +function buildPeopleDirectory(entries) { + const people = /* @__PURE__ */ new Map(); + for (const entry of entries) { + for (const person of entry.participants) { + const current = people.get(person) ?? { count: 0, topics: /* @__PURE__ */ new Map(), recent: [] }; + current.count += 1; + for (const topic of entry.topics) { + current.topics.set(topic, (current.topics.get(topic) ?? 0) + 1); + } + current.recent.push(entry); + people.set(person, current); + } + } + return [...people.entries()].sort((a, b) => b[1].count - a[1].count || a[0].localeCompare(b[0])).map(([person, info]) => { + const topics = topList(info.topics, 3); + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${person} \u2014 ${info.count} summaries`]; + if (topics.length > 0) + parts.push(`topics: ${topics.join("; ")}`); + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} +function buildProjectDirectory(entries) { + const projects = /* @__PURE__ */ new Map(); + for (const entry of entries) { + if (!entry.project) + continue; + const current = projects.get(entry.project) ?? { count: 0, recent: [] }; + current.count += 1; + current.recent.push(entry); + projects.set(entry.project, current); + } + return [...projects.entries()].sort((a, b) => b[1].count - a[1].count || a[0].localeCompare(b[0])).map(([project, info]) => { + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${project} \u2014 ${info.count} summaries`]; + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} function buildUnionQuery(memoryQuery, sessionsQuery) { return `SELECT path, content, size_bytes, creation_date, source_order FROM ((${memoryQuery}) UNION ALL (${sessionsQuery})) AS combined ORDER BY path, source_order, creation_date`; } @@ -1121,10 +2110,13 @@ function buildDirFilter(dirs) { const cleaned = [...new Set(dirs.map((dir) => dir.replace(/\/+$/, "") || "/"))]; if (cleaned.length === 0 || cleaned.includes("/")) return ""; - const clauses = cleaned.map((dir) => `path LIKE '${sqlLike(dir)}/%' ESCAPE '\\'`); + const clauses = cleaned.map((dir) => `path LIKE '${sqlLike(dir)}/%'`); return ` WHERE ${clauses.join(" OR ")}`; } async function queryUnionRows(api, memoryQuery, sessionsQuery) { + if (isSessionsOnlyMode()) { + return api.query(`SELECT path, content, size_bytes, creation_date, source_order FROM (${sessionsQuery}) AS combined ORDER BY path, source_order, creation_date`); + } const unionQuery = buildUnionQuery(memoryQuery, sessionsQuery); try { return await api.query(unionQuery); @@ -1141,7 +2133,13 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP const result = new Map(uniquePaths.map((path) => [path, null])); if (uniquePaths.length === 0) return result; - const inList = buildInList(uniquePaths); + if (isIndexDisabled() && uniquePaths.includes("/index.md")) { + result.set("/index.md", null); + } + const queryPaths = isIndexDisabled() ? uniquePaths.filter((path) => path !== "/index.md") : uniquePaths; + if (queryPaths.length === 0) + return result; + const inList = buildInList(queryPaths); const rows = await queryUnionRows(api, `SELECT path, summary::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path IN (${inList})`, `SELECT path, message::text AS content, NULL::bigint AS size_bytes, COALESCE(creation_date::text, '') AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path IN (${inList})`); const memoryHits = /* @__PURE__ */ new Map(); const sessionHits = /* @__PURE__ */ new Map(); @@ -1159,7 +2157,7 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP sessionHits.set(path, current); } } - for (const path of uniquePaths) { + for (const path of queryPaths) { if (memoryHits.has(path)) { result.set(path, memoryHits.get(path) ?? null); continue; @@ -1169,12 +2167,9 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP result.set(path, sessionParts.join("\n")); } } - if (result.get("/index.md") === null && uniquePaths.includes("/index.md")) { - const [summaryRows, sessionRows] = await Promise.all([ - api.query(`SELECT path, project, description, creation_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`).catch(() => []), - api.query(`SELECT path, description FROM "${sessionsTable}" WHERE path LIKE '/sessions/%' ORDER BY path`).catch(() => []) - ]); - result.set("/index.md", buildVirtualIndexContent(summaryRows, sessionRows)); + if (!isSessionsOnlyMode() && !isIndexDisabled() && result.get("/index.md") === null && uniquePaths.includes("/index.md")) { + const rows2 = await api.query(`SELECT path, project, description, summary, creation_date, last_update_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC`).catch(() => []); + result.set("/index.md", buildVirtualIndexContent(rows2)); } return result; } @@ -1211,7 +2206,7 @@ async function listVirtualPathRows(api, memoryTable, sessionsTable, dir) { async function findVirtualPaths(api, memoryTable, sessionsTable, dir, filenamePattern) { const normalizedDir = dir.replace(/\/+$/, "") || "/"; const likePath = `${sqlLike(normalizedDir === "/" ? "" : normalizedDir)}/%`; - const rows = await queryUnionRows(api, `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path LIKE '${likePath}' ESCAPE '\\' AND filename LIKE '${filenamePattern}' ESCAPE '\\'`, `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path LIKE '${likePath}' ESCAPE '\\' AND filename LIKE '${filenamePattern}' ESCAPE '\\'`); + const rows = await queryUnionRows(api, `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path LIKE '${likePath}' AND filename LIKE '${filenamePattern}'`, `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path LIKE '${likePath}' AND filename LIKE '${filenamePattern}'`); return [...new Set(rows.map((row) => row["path"]).filter((value) => typeof value === "string" && value.length > 0))]; } function dedupeRowsByPath(rows) { @@ -1228,6 +2223,56 @@ function dedupeRowsByPath(rows) { } // dist/src/hooks/bash-command-compiler.js +var DEFAULT_EMBED_RETRIEVAL_MODEL_ID2 = "onnx-community/harrier-oss-v1-270m-ONNX"; +var DEFAULT_HYBRID_VECTOR_WEIGHT2 = 0.7; +var DEFAULT_HYBRID_TEXT_WEIGHT2 = 0.3; +var summaryRetrievalEmbedder = null; +function envString2(...names) { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) + return value; + } + return void 0; +} +function envFlag2(...names) { + const raw = envString2(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} +function envNumber2(fallback, ...names) { + const raw = envString2(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} +function getSummaryRetrievalEmbedder() { + if (!summaryRetrievalEmbedder) { + summaryRetrievalEmbedder = new HarrierEmbedder({ + modelId: envString2("HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", "HIVEMIND_HARRIER_MODEL_ID", "DEEPLAKE_HARRIER_MODEL_ID") ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID2, + device: envString2("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString2("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString2("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString2("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag2("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY") + }); + } + return summaryRetrievalEmbedder; +} +function sqlFloat4Array2(values) { + if (values.length === 0) + throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) + throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} +function quoteShellToken(token) { + if (token === "") + return "''"; + if (!/[\s"'\\|&;<>()[\]{}$*?]/.test(token)) + return token; + return `'${token.replace(/'/g, `'"'"'`)}'`; +} function isQuoted(ch) { return ch === "'" || ch === '"'; } @@ -1235,11 +2280,19 @@ function splitTopLevel(input, operators) { const parts = []; let current = ""; let quote = null; + let escaped = false; for (let i = 0; i < input.length; i++) { const ch = input[i]; + if (escaped) { + current += ch; + escaped = false; + continue; + } if (quote) { if (ch === quote) quote = null; + else if (ch === "\\" && quote === '"') + escaped = true; current += ch; continue; } @@ -1248,6 +2301,11 @@ function splitTopLevel(input, operators) { current += ch; continue; } + if (ch === "\\" && i + 1 < input.length) { + current += ch; + escaped = true; + continue; + } const matched = operators.find((op) => input.startsWith(op, i)); if (matched) { const trimmed2 = current.trim(); @@ -1259,7 +2317,7 @@ function splitTopLevel(input, operators) { } current += ch; } - if (quote) + if (quote || escaped) return null; const trimmed = current.trim(); if (trimmed) @@ -1323,8 +2381,8 @@ function expandBraceToken(token) { return variants.flatMap((variant) => expandBraceToken(`${prefix}${variant}${suffix}`)); } function stripAllowedModifiers(segment) { - const ignoreMissing = /\s2>\/dev\/null\s*$/.test(segment); - const clean = segment.replace(/\s2>\/dev\/null\s*$/g, "").replace(/\s2>&1\s*/g, " ").trim(); + const ignoreMissing = /\s2>\/dev\/null(?=\s*(?:\||$))/.test(segment); + const clean = segment.replace(/\s2>\/dev\/null(?=\s*(?:\||$))/g, "").replace(/\s2>&1(?=\s*(?:\||$))/g, "").trim(); return { clean, ignoreMissing }; } function hasUnsupportedRedirection(segment) { @@ -1393,7 +2451,7 @@ function isValidPipelineHeadTailStage(stage) { return tokens[1] === "-n" && /^-?\d+$/.test(tokens[2]); return false; } -function parseFindNamePatterns(tokens) { +function parseFindSpec(tokens) { const patterns = []; for (let i = 2; i < tokens.length; i++) { const token = tokens[i]; @@ -1411,9 +2469,600 @@ function parseFindNamePatterns(tokens) { i += 1; continue; } + if (token === "-exec") { + const execTokens = tokens.slice(i + 1); + if (patterns.length === 0 || execTokens.length < 4) + return null; + const terminator = execTokens.at(-1); + const target = execTokens.at(-2); + if (terminator !== "\\;" && terminator !== ";" || target !== "{}") + return null; + return { + patterns, + execGrepCmd: execTokens.slice(0, -1).map(quoteShellToken).join(" ") + }; + } + return null; + } + return patterns.length > 0 ? { patterns, execGrepCmd: null } : null; +} +function extractPsqlQuery(tokens) { + let query = null; + for (let i = 1; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-c" || token === "--command") { + query = tokens[i + 1] ?? null; + i += 1; + continue; + } + if (token.startsWith("-c") && token.length > 2) { + query = token.slice(2); + continue; + } + } + return query; +} +function extractPsqlQueryFromCommand(cmd) { + const tokens = tokenizeShellWords(cmd.trim()); + if (!tokens || tokens[0] !== "psql") + return null; + return extractPsqlQuery(tokens); +} +function normalizeSqlRef(ref) { + return ref.replace(/\s+/g, "").replace(/"/g, "").toLowerCase(); +} +function deriveSiblingTableName(tableName, expectedBase, targetBase) { + if (tableName === expectedBase) + return null; + if (!tableName.startsWith(expectedBase)) + return null; + return `${targetBase}${tableName.slice(expectedBase.length)}`; +} +function resolveInterceptedTableNames(memoryTable, sessionsTable) { + const memoryDerived = { + graphNodesTable: deriveSiblingTableName(memoryTable, "memory", "graph_nodes"), + graphEdgesTable: deriveSiblingTableName(memoryTable, "memory", "graph_edges"), + factsTable: deriveSiblingTableName(memoryTable, "memory", "memory_facts"), + entitiesTable: deriveSiblingTableName(memoryTable, "memory", "memory_entities"), + factEntityLinksTable: deriveSiblingTableName(memoryTable, "memory", "fact_entity_links") + }; + const sessionsDerived = { + factsTable: deriveSiblingTableName(sessionsTable, "sessions", "memory_facts"), + entitiesTable: deriveSiblingTableName(sessionsTable, "sessions", "memory_entities"), + factEntityLinksTable: deriveSiblingTableName(sessionsTable, "sessions", "fact_entity_links") + }; + return { + graphNodesTable: process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? process.env["DEEPLAKE_GRAPH_NODES_TABLE"] ?? memoryDerived.graphNodesTable ?? "graph_nodes", + graphEdgesTable: process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? process.env["DEEPLAKE_GRAPH_EDGES_TABLE"] ?? memoryDerived.graphEdgesTable ?? "graph_edges", + factsTable: process.env["HIVEMIND_FACTS_TABLE"] ?? process.env["DEEPLAKE_FACTS_TABLE"] ?? memoryDerived.factsTable ?? sessionsDerived.factsTable ?? "memory_facts", + entitiesTable: process.env["HIVEMIND_ENTITIES_TABLE"] ?? process.env["DEEPLAKE_ENTITIES_TABLE"] ?? memoryDerived.entitiesTable ?? sessionsDerived.entitiesTable ?? "memory_entities", + factEntityLinksTable: process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? process.env["DEEPLAKE_FACT_ENTITY_LINKS_TABLE"] ?? memoryDerived.factEntityLinksTable ?? sessionsDerived.factEntityLinksTable ?? "fact_entity_links" + }; +} +function getInterceptedSqlRefs() { + if (isFactsSessionsOnlyPsqlMode()) { + return /* @__PURE__ */ new Set([ + "sessions", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.sessions", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links" + ]); + } + return /* @__PURE__ */ new Set([ + "memory", + "sessions", + "graph_nodes", + "graph_edges", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.memory", + "hivemind.sessions", + "hivemind.graph_nodes", + "hivemind.graph_edges", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links" + ]); +} +function extractSqlTableRefs(query) { + const refs = []; + const regex = /\b(?:from|join)\s+((?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*)(?:\s*\.\s*(?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*))?)/gi; + for (const match of query.matchAll(regex)) { + if (match[1]) + refs.push(normalizeSqlRef(match[1])); + } + return refs; +} +function queryReferencesInterceptedTables(query) { + const interceptedRefs = getInterceptedSqlRefs(); + return extractSqlTableRefs(query).some((ref) => interceptedRefs.has(ref)); +} +function queryUsesOnlyInterceptedTables(query) { + const refs = extractSqlTableRefs(query); + const interceptedRefs = getInterceptedSqlRefs(); + return refs.length > 0 && refs.every((ref) => interceptedRefs.has(ref)); +} +function parsePsqlSegment(pipeline, tokens) { + if (tokens[0] !== "psql" || !isPsqlMode()) + return null; + const query = extractPsqlQuery(tokens); + let tuplesOnly = false; + let fieldSeparator = "|"; + for (let i = 1; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-F" || token === "--field-separator") { + fieldSeparator = tokens[i + 1] ?? fieldSeparator; + i += 1; + continue; + } + if (token.startsWith("-F") && token.length > 2) { + fieldSeparator = token.slice(2); + continue; + } + if (token === "-t" || token === "--tuples-only") { + tuplesOnly = true; + continue; + } + if (token.startsWith("-") && !token.startsWith("--")) { + const shortFlags = token.slice(1); + if (shortFlags.includes("t")) + tuplesOnly = true; + continue; + } + } + if (!query || !queryUsesOnlyInterceptedTables(query)) return null; + let lineLimit = 0; + if (pipeline.length > 1) { + if (pipeline.length !== 2) + return null; + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) + return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) + return null; + lineLimit = headTail.lineLimit; + } + return { kind: "psql", query, lineLimit, tuplesOnly, fieldSeparator }; +} +function normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable) { + let sql = query.trim().replace(/;+\s*$/, ""); + sql = sql.replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`); + if (!isFactsSessionsOnlyPsqlMode()) { + sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`); } - return patterns.length > 0 ? patterns : null; + return sql; +} +function validatePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable) { + if (!queryUsesOnlyInterceptedTables(query)) { + if (isFactsSessionsOnlyPsqlMode()) { + throw new Error("psql queries must reference only sessions, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); + } + throw new Error("psql queries must reference only memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); + } + const sql = normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable, factsTable, entitiesTable, factEntityLinksTable); + const compact = sql.replace(/\s+/g, " ").trim(); + if (!/^(select|with)\b/i.test(compact)) { + throw new Error("psql mode only supports SELECT queries"); + } + const allowedTables = /* @__PURE__ */ new Set([ + sessionsTable, + factsTable, + entitiesTable, + factEntityLinksTable + ]); + if (!isFactsSessionsOnlyPsqlMode()) { + allowedTables.add(memoryTable); + allowedTables.add(graphNodesTable); + allowedTables.add(graphEdgesTable); + } + const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; + if (tableMatches.length === 0) { + throw new Error("psql query must reference an intercepted hivemind memory table"); + } + for (const match of tableMatches) { + if (!allowedTables.has(match[1])) { + throw new Error(`psql query references unsupported table: ${match[1]}`); + } + } + return sql; +} +function decodeSqlLiteral(value) { + return value.replace(/''/g, "'").trim(); +} +function cleanSearchTerm(value) { + return decodeSqlLiteral(value).replace(/^%+|%+$/g, "").replace(/^_+|_+$/g, "").trim(); +} +function extractSqlSearchTerms(query) { + const terms = []; + const push = (value) => { + const cleaned = cleanSearchTerm(value); + if (!cleaned) + return; + if (cleaned.startsWith("/")) + return; + if (/^\/summaries\/|^\/sessions\//.test(cleaned)) + return; + if (!terms.includes(cleaned)) + terms.push(cleaned); + }; + for (const match of query.matchAll(/\b(?:i?like|=)\s+E?'((?:[^']|'')*)'/gi)) { + push(match[1] ?? ""); + } + for (const match of query.matchAll(/<\#>\s+E?'((?:[^']|'')*)'/gi)) { + push(match[1] ?? ""); + } + return terms; +} +function chooseEntityTerms(terms) { + const entityLike = terms.filter((term) => /[A-Z]/.test(term) && !/^\d+$/.test(term) && term.split(/\s+/).length <= 4); + return (entityLike.length > 0 ? entityLike : terms).slice(0, 2); +} +function escapeRegex2(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +async function fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) + return []; + const entityTerms = chooseEntityTerms(filteredTerms); + const topicTerms = filteredTerms.filter((term) => !entityTerms.includes(term)); + const phrase = sqlStr(filteredTerms.join(" ")); + const nodeEntityClauses = entityTerms.map((term) => `(canonical_name ILIKE '%${sqlLike(term)}%' OR aliases ILIKE '%${sqlLike(term)}%')`); + const nodeTextClauses = topicTerms.map((term) => `search_text ILIKE '%${sqlLike(term)}%'`); + const edgeEntityClauses = entityTerms.map((term) => `search_text ILIKE '%${sqlLike(term)}%'`); + const edgeTopicClauses = topicTerms.map((term) => `(relation ILIKE '%${sqlLike(term)}%' OR summary ILIKE '%${sqlLike(term)}%' OR evidence ILIKE '%${sqlLike(term)}%' OR search_text ILIKE '%${sqlLike(term)}%')`); + const nodeWhere = entityTerms.length > 0 && topicTerms.length > 0 ? `(${nodeEntityClauses.join(" OR ")}) AND (${nodeTextClauses.join(" OR ")})` : entityTerms.length > 0 ? `(${nodeEntityClauses.join(" OR ")})` : topicTerms.length > 0 ? `(${nodeTextClauses.join(" OR ")})` : "FALSE"; + const edgeWhere = entityTerms.length > 0 && topicTerms.length > 0 ? `(${edgeEntityClauses.join(" OR ")}) AND (${edgeTopicClauses.join(" OR ")})` : topicTerms.length > 0 ? `(${edgeTopicClauses.join(" OR ")})` : entityTerms.length > 0 ? `(${edgeEntityClauses.join(" OR ")})` : "FALSE"; + const sql = `WITH node_candidates AS ( SELECT source_session_id, source_path, search_text, search_text <#> '${phrase}' AS score FROM "${graphNodesTable}" WHERE ${nodeWhere} ORDER BY score DESC LIMIT 8), edge_candidates AS ( SELECT source_session_id, source_path, search_text, search_text <#> '${phrase}' AS score FROM "${graphEdgesTable}" WHERE ${edgeWhere} ORDER BY score DESC LIMIT 8) SELECT source_session_id, source_path, search_text, score FROM ( SELECT source_session_id, source_path, search_text, score FROM node_candidates UNION ALL SELECT source_session_id, source_path, search_text, score FROM edge_candidates ) AS graph_candidates ORDER BY score ASC LIMIT 12`; + const rows = await api.query(sql); + const expanded = []; + const seen = /* @__PURE__ */ new Set(); + for (const row of rows) { + const searchText = typeof row["search_text"] === "string" ? row["search_text"] : ""; + const sessionIds = [ + ...searchText.match(/conv_\d+_session_\d+/g) ?? [], + typeof row["source_session_id"] === "string" ? row["source_session_id"] : "" + ].map((value) => value.trim()).filter(Boolean); + const sourcePaths = [ + ...searchText.match(/\/sessions\/conv_\d+_session_\d+\.json/g) ?? [], + typeof row["source_path"] === "string" ? row["source_path"] : "", + ...sessionIds.map((sessionId) => `/sessions/${sessionId}.json`) + ].map((value) => value.trim()).filter(Boolean); + for (let i = 0; i < sourcePaths.length; i++) { + const sourcePath = sourcePaths[i]; + const sessionId = sessionIds[i] || sessionIds[0] || sourcePath.match(/(conv_\d+_session_\d+)\.json$/)?.[1] || ""; + if (!sourcePath) + continue; + const key = `${sessionId}@@${sourcePath}`; + if (seen.has(key)) + continue; + seen.add(key); + expanded.push({ sessionId, sourcePath }); + if (expanded.length >= 12) + return expanded; + } + } + return expanded; +} +function splitDelimitedField(value) { + if (typeof value !== "string") + return []; + return value.split(",").map((item) => item.trim()).filter(Boolean); +} +function extractSessionIdFromPath(value) { + return value.match(/(conv_\d+_session_\d+)/)?.[1] ?? ""; +} +function extractSummarySourcePath(summary) { + return summary.match(/^- \*\*Source\*\*: (.+)$/m)?.[1]?.trim() ?? ""; +} +function addHybridCandidate(map, candidate) { + const sessionId = candidate.sessionId?.trim() ?? ""; + const sourcePath = candidate.sourcePath?.trim() ?? ""; + if (!sessionId && !sourcePath) + return; + const key = `${sessionId}@@${sourcePath}`; + const existing = map.get(key); + if (existing) { + existing.score += candidate.score; + existing.signals.add(candidate.signal); + return; + } + map.set(key, { + sessionId, + sourcePath, + score: candidate.score, + signals: /* @__PURE__ */ new Set([candidate.signal]) + }); +} +async function fetchEntityResolution(api, entitiesTable, terms) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) + return { entityIds: [], candidates: [] }; + const entityTerms = chooseEntityTerms(filteredTerms); + if (entityTerms.length === 0) + return { entityIds: [], candidates: [] }; + const phrase = sqlStr(filteredTerms.join(" ")); + const where = entityTerms.map((term) => `(canonical_name ILIKE '%${sqlLike(term)}%' OR aliases ILIKE '%${sqlLike(term)}%')`).join(" OR "); + const sql = `SELECT entity_id, source_session_ids, source_paths, search_text, search_text <#> '${phrase}' AS score FROM "${entitiesTable}" WHERE ${where} ORDER BY score ASC LIMIT 8`; + const rows = await api.query(sql); + const entityIds = []; + const candidateMap = /* @__PURE__ */ new Map(); + for (const row of rows) { + const entityId = typeof row["entity_id"] === "string" ? row["entity_id"] : ""; + if (entityId && !entityIds.includes(entityId)) + entityIds.push(entityId); + const sessionIds = splitDelimitedField(row["source_session_ids"]); + const sourcePaths = splitDelimitedField(row["source_paths"]); + const maxLen = Math.max(sessionIds.length, sourcePaths.length); + for (let i = 0; i < maxLen; i++) { + const sourcePath = sourcePaths[i] || (sessionIds[i] ? `/sessions/${sessionIds[i]}.json` : ""); + const sessionId = sessionIds[i] || extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 1.2, + signal: "entity" + }); + } + } + return { entityIds, candidates: [...candidateMap.values()] }; +} +async function fetchFactCandidates(api, factsTable, terms, entityIds) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0 && entityIds.length === 0) + return { entityIds: [], candidates: [] }; + const phrase = sqlStr(filteredTerms.join(" ")); + const entityTerms = chooseEntityTerms(filteredTerms); + const topicTerms = filteredTerms.filter((term) => !entityTerms.includes(term)); + const topicClauses = (topicTerms.length > 0 ? topicTerms : filteredTerms).map((term) => `(predicate ILIKE '%${sqlLike(term)}%' OR object_name ILIKE '%${sqlLike(term)}%' OR summary ILIKE '%${sqlLike(term)}%' OR search_text ILIKE '%${sqlLike(term)}%')`); + const entityFilter = entityIds.length > 0 ? `(subject_entity_id IN (${entityIds.map((id) => `'${sqlStr(id)}'`).join(", ")}) OR object_entity_id IN (${entityIds.map((id) => `'${sqlStr(id)}'`).join(", ")}))` : ""; + const whereParts = [ + entityFilter, + topicClauses.length > 0 ? `(${topicClauses.join(" OR ")})` : "" + ].filter(Boolean); + if (whereParts.length === 0) + return { entityIds: [], candidates: [] }; + const sql = `SELECT source_session_id, source_path, subject_entity_id, object_entity_id, search_text <#> '${phrase}' AS score FROM "${factsTable}" WHERE ${whereParts.join(" AND ")} ORDER BY score ASC LIMIT 16`; + const rows = await api.query(sql); + const relatedEntityIds = []; + const candidateMap = /* @__PURE__ */ new Map(); + for (const row of rows) { + for (const key of ["subject_entity_id", "object_entity_id"]) { + const value = typeof row[key] === "string" ? row[key] : ""; + if (value && !relatedEntityIds.includes(value)) + relatedEntityIds.push(value); + } + const sourcePath = typeof row["source_path"] === "string" ? row["source_path"] : ""; + const sessionId = typeof row["source_session_id"] === "string" ? row["source_session_id"] : extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 2.6, + signal: "fact" + }); + } + return { entityIds: relatedEntityIds, candidates: [...candidateMap.values()] }; +} +async function fetchSummaryCandidates(api, memoryTable, terms) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) + return []; + const retrievalMode = getGrepRetrievalMode(); + const phrase = filteredTerms.join(" "); + let rows = []; + if (retrievalMode === "embedding" || retrievalMode === "hybrid") { + const embedder = getSummaryRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([phrase]); + if (!queryEmbedding) + return []; + const queryVectorSql = sqlFloat4Array2(queryEmbedding); + const vectorSql = `SELECT path, summary, (embedding <#> ${queryVectorSql}) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8`; + if (retrievalMode === "embedding") { + rows = (await api.query(vectorSql)).map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + summary: typeof row["summary"] === "string" ? row["summary"] : "" + })); + } else { + const textSql = `SELECT path, summary, summary <#> '${sqlStr(phrase)}' AS score FROM "${memoryTable}" ORDER BY score DESC LIMIT 8`; + const textFallbackSql = buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorSql), + api.query(textSql).catch(() => api.query(textFallbackSql)) + ]); + rows = fuseRetrievalRows({ + textRows: mapSummaryRows(textRows), + vectorRows: mapSummaryRows(vectorRows), + textWeight: envNumber2(DEFAULT_HYBRID_TEXT_WEIGHT2, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"), + vectorWeight: envNumber2(DEFAULT_HYBRID_VECTOR_WEIGHT2, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"), + limit: 8 + }).map((row) => ({ + path: row.path, + summary: row.content + })); + } + } else { + const phraseSql = sqlStr(phrase); + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + const sql = `SELECT path, summary, summary <#> '${phraseSql}' AS score FROM "${memoryTable}" WHERE ${clauses.join(" OR ")} ORDER BY score DESC LIMIT 8`; + rows = (await api.query(sql).catch(() => api.query(buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase)))).map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + summary: typeof row["summary"] === "string" ? row["summary"] : "" + })); + } + const candidateMap = /* @__PURE__ */ new Map(); + for (const row of rows) { + const path = row.path; + const summary = row.summary; + const sourcePath = extractSummarySourcePath(summary) || (extractSessionIdFromPath(path) ? `/sessions/${extractSessionIdFromPath(path)}.json` : ""); + const sessionId = extractSessionIdFromPath(path) || extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 1.6, + signal: "summary" + }); + } + return [...candidateMap.values()]; +} +function buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase) { + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...filteredTerms.map((term) => `CASE WHEN summary ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN summary ILIKE '%${sqlLike(phrase)}%' THEN ${Math.max(1, Math.min(filteredTerms.length, 4))} ELSE 0 END` + ]; + return `SELECT path, summary, (${scoreTerms.join(" + ")})::float AS score FROM "${memoryTable}" WHERE ${clauses.join(" OR ")} ORDER BY score DESC LIMIT 8`; +} +function mapSummaryRows(rows) { + return rows.map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + content: typeof row["summary"] === "string" ? row["summary"] : "", + sourceOrder: 0, + creationDate: "", + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0 + })); +} +function prependCtes(sql, ctes) { + if (ctes.length === 0) + return sql; + if (/^with\b/i.test(sql)) { + return sql.replace(/^with\b/i, `WITH ${ctes.join(", ")},`); + } + return `WITH ${ctes.join(", ")} ${sql}`; +} +function rewriteQueryWithRestrictedTables(sql, aliases) { + let rewritten = sql; + if (aliases.restrictedMemoryAlias) { + const memoryPattern = escapeRegex2(aliases.memoryTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${memoryPattern}"?`, "gi"), `FROM "${aliases.restrictedMemoryAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${memoryPattern}"?`, "gi"), `JOIN "${aliases.restrictedMemoryAlias}"`); + } + if (aliases.restrictedSessionsAlias) { + const sessionsPattern = escapeRegex2(aliases.sessionsTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${sessionsPattern}"?`, "gi"), `FROM "${aliases.restrictedSessionsAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${sessionsPattern}"?`, "gi"), `JOIN "${aliases.restrictedSessionsAlias}"`); + } + if (aliases.restrictedFactsAlias) { + const factsPattern = escapeRegex2(aliases.factsTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${factsPattern}"?`, "gi"), `FROM "${aliases.restrictedFactsAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${factsPattern}"?`, "gi"), `JOIN "${aliases.restrictedFactsAlias}"`); + } + if (aliases.restrictedEntitiesAlias) { + const entitiesPattern = escapeRegex2(aliases.entitiesTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${entitiesPattern}"?`, "gi"), `FROM "${aliases.restrictedEntitiesAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${entitiesPattern}"?`, "gi"), `JOIN "${aliases.restrictedEntitiesAlias}"`); + } + if (aliases.restrictedLinksAlias) { + const linksPattern = escapeRegex2(aliases.factEntityLinksTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${linksPattern}"?`, "gi"), `FROM "${aliases.restrictedLinksAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${linksPattern}"?`, "gi"), `JOIN "${aliases.restrictedLinksAlias}"`); + } + return rewritten; +} +async function applyGraphRestrictionsToPsqlQuery(api, sql, memoryTable, sessionsTable, graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable) { + if (isFactsSessionsOnlyPsqlMode()) { + return sql; + } + if (extractSqlTableRefs(sql).some((ref) => ref === normalizeSqlRef(graphNodesTable) || ref === normalizeSqlRef(graphEdgesTable))) { + return sql; + } + const refs = extractSqlTableRefs(sql); + const touchesMemory2 = refs.some((ref) => ref === normalizeSqlRef(memoryTable)); + const touchesSessions = refs.some((ref) => ref === normalizeSqlRef(sessionsTable)); + const touchesFacts = refs.some((ref) => ref === normalizeSqlRef(factsTable)); + const touchesEntities = refs.some((ref) => ref === normalizeSqlRef(entitiesTable)); + const touchesLinks = refs.some((ref) => ref === normalizeSqlRef(factEntityLinksTable)); + if (!touchesMemory2 && !touchesSessions && !touchesFacts && !touchesEntities && !touchesLinks) + return sql; + const terms = extractSqlSearchTerms(sql); + if (terms.length === 0) + return sql; + const candidateMap = /* @__PURE__ */ new Map(); + const graphCandidates = await fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms); + for (const candidate of graphCandidates) { + addHybridCandidate(candidateMap, { ...candidate, score: 2, signal: "graph" }); + } + const entityResolution = await fetchEntityResolution(api, entitiesTable, terms); + for (const candidate of entityResolution.candidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "entity" }); + } + const factCandidates = await fetchFactCandidates(api, factsTable, terms, entityResolution.entityIds); + for (const candidate of factCandidates.candidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "fact" }); + } + const summaryCandidates = await fetchSummaryCandidates(api, memoryTable, terms); + for (const candidate of summaryCandidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "summary" }); + } + const candidateEntityIds = [.../* @__PURE__ */ new Set([...entityResolution.entityIds, ...factCandidates.entityIds])].slice(0, 12); + const candidates = [...candidateMap.values()].sort((a, b) => b.score - a.score || b.signals.size - a.signals.size).slice(0, 12); + if (candidates.length === 0) + return sql; + if (candidates.length > 16) + return sql; + const values = candidates.map((candidate) => `('${sqlStr(candidate.sessionId)}', '${sqlStr(candidate.sourcePath)}')`); + const ctes = [ + `__hm_graph_candidates(source_session_id, source_path) AS (VALUES ${values.join(", ")})` + ]; + let restrictedMemoryAlias = null; + let restrictedSessionsAlias = null; + let restrictedFactsAlias = null; + let restrictedEntitiesAlias = null; + let restrictedLinksAlias = null; + if (candidateEntityIds.length > 0) { + ctes.push(`__hm_entity_candidates(entity_id) AS (VALUES ${candidateEntityIds.map((entityId) => `('${sqlStr(entityId)}')`).join(", ")})`); + } + if (touchesMemory2) { + restrictedMemoryAlias = "__hm_memory"; + ctes.push(`"${restrictedMemoryAlias}" AS ( SELECT * FROM "${memoryTable}" m WHERE EXISTS ( SELECT 1 FROM __hm_graph_candidates gc WHERE (gc.source_path <> '' AND m.summary ILIKE '%' || gc.source_path || '%') OR (gc.source_session_id <> '' AND m.path ILIKE '%' || gc.source_session_id || '%') ))`); + } + if (touchesSessions) { + restrictedSessionsAlias = "__hm_sessions"; + ctes.push(`"${restrictedSessionsAlias}" AS ( SELECT * FROM "${sessionsTable}" s WHERE s.path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> ''))`); + } + if (touchesFacts) { + restrictedFactsAlias = "__hm_memory_facts"; + ctes.push(`"${restrictedFactsAlias}" AS ( SELECT * FROM "${factsTable}" f WHERE ( f.source_path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '') OR f.source_session_id IN (SELECT source_session_id FROM __hm_graph_candidates WHERE source_session_id <> '')` + (candidateEntityIds.length > 0 ? ` OR f.subject_entity_id IN (SELECT entity_id FROM __hm_entity_candidates) OR f.object_entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` : "") + ` ))`); + } + if (touchesEntities && candidateEntityIds.length > 0) { + restrictedEntitiesAlias = "__hm_memory_entities"; + ctes.push(`"${restrictedEntitiesAlias}" AS ( SELECT * FROM "${entitiesTable}" e WHERE e.entity_id IN (SELECT entity_id FROM __hm_entity_candidates))`); + } + if (touchesLinks) { + restrictedLinksAlias = "__hm_fact_entity_links"; + ctes.push(`"${restrictedLinksAlias}" AS ( SELECT * FROM "${factEntityLinksTable}" l WHERE ( l.source_path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '') OR l.source_session_id IN (SELECT source_session_id FROM __hm_graph_candidates WHERE source_session_id <> '')` + (candidateEntityIds.length > 0 ? ` OR l.entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` : "") + (touchesFacts ? ` OR l.fact_id IN (SELECT fact_id FROM "__hm_memory_facts")` : "") + ` ))`); + } + return prependCtes(rewriteQueryWithRestrictedTables(sql, { + memoryTable, + sessionsTable, + factsTable, + entitiesTable, + factEntityLinksTable, + restrictedMemoryAlias, + restrictedSessionsAlias, + restrictedFactsAlias, + restrictedEntitiesAlias, + restrictedLinksAlias + }), ctes); +} +function formatPsqlValue(value) { + if (value === null || value === void 0) + return ""; + if (typeof value === "string") + return value; + if (typeof value === "number" || typeof value === "boolean") + return String(value); + return JSON.stringify(value); +} +function formatPsqlRows(rows, tuplesOnly, fieldSeparator) { + if (rows.length === 0) + return tuplesOnly ? "" : "(0 rows)"; + const columns = Object.keys(rows[0] ?? {}); + const body = rows.map((row) => columns.map((column) => formatPsqlValue(row[column])).join(fieldSeparator)); + if (tuplesOnly) + return body.join("\n"); + return [columns.join(fieldSeparator), ...body].join("\n"); } function parseCompiledSegment(segment) { const { clean, ignoreMissing } = stripAllowedModifiers(segment); @@ -1425,6 +3074,9 @@ function parseCompiledSegment(segment) { const tokens = tokenizeShellWords(pipeline[0]); if (!tokens || tokens.length === 0) return null; + const psqlSegment = parsePsqlSegment(pipeline, tokens); + if (psqlSegment) + return psqlSegment; if (tokens[0] === "echo" && pipeline.length === 1) { const text = tokens.slice(1).join(" "); return { kind: "echo", text }; @@ -1500,15 +3152,32 @@ function parseCompiledSegment(segment) { const dir = tokens[1]; if (!dir) return null; - const patterns = parseFindNamePatterns(tokens); - if (!patterns) + const spec = parseFindSpec(tokens); + if (!spec) return null; + const { patterns, execGrepCmd } = spec; const countOnly = pipeline.length === 2 && /^wc\s+-l\s*$/.test(pipeline[1].trim()); if (countOnly) { if (patterns.length !== 1) return null; return { kind: "find", dir, pattern: patterns[0], countOnly }; } + if (execGrepCmd) { + const grepParams2 = parseBashGrep(execGrepCmd); + if (!grepParams2) + return null; + let lineLimit = 0; + if (pipeline.length === 2) { + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) + return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) + return null; + lineLimit = headTail.lineLimit; + } + return { kind: "find_grep", dir, patterns, params: grepParams2, lineLimit }; + } if (pipeline.length >= 2) { const xargsTokens = tokenizeShellWords(pipeline[1].trim()); if (!xargsTokens || xargsTokens[0] !== "xargs") @@ -1678,6 +3347,16 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, outputs.push(limited.join("\n") || "(no matches)"); continue; } + if (segment.kind === "psql") { + const { graphNodesTable, graphEdgesTable } = resolveInterceptedTableNames(memoryTable, sessionsTable); + const validated = validatePsqlQuery(segment.query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); + const prepared = await applyGraphRestrictionsToPsqlQuery(api, validated, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); + const rows = await api.query(prepared); + const formatted = formatPsqlRows(rows, segment.tuplesOnly, segment.fieldSeparator); + const limited = segment.lineLimit > 0 ? formatted.split("\n").slice(0, segment.lineLimit).join("\n") : formatted; + outputs.push(limited); + continue; + } if (segment.kind === "grep") { const result = await handleGrepDirectFn(api, memoryTable, sessionsTable, segment.params); if (result === null) @@ -1694,20 +3373,35 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, } // dist/src/hooks/query-cache.js -import { mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync as writeFileSync2 } from "node:fs"; +import { mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, statSync, writeFileSync as writeFileSync2 } from "node:fs"; import { join as join4 } from "node:path"; import { homedir as homedir3 } from "node:os"; var log3 = (msg) => log("query-cache", msg); var DEFAULT_CACHE_ROOT = join4(homedir3(), ".deeplake", "query-cache"); var INDEX_CACHE_FILE = "index.md"; +var INDEX_CACHE_TTL_MS = 15 * 60 * 1e3; function getSessionQueryCacheDir(sessionId, deps = {}) { const { cacheRoot = DEFAULT_CACHE_ROOT } = deps; return join4(cacheRoot, sessionId); } +function clearSessionQueryCache(sessionId, deps = {}) { + const { logFn = log3 } = deps; + try { + rmSync(getSessionQueryCacheDir(sessionId, deps), { recursive: true, force: true }); + } catch (e) { + logFn(`clear failed for session=${sessionId}: ${e.message}`); + } +} function readCachedIndexContent(sessionId, deps = {}) { const { logFn = log3 } = deps; try { - return readFileSync3(join4(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8"); + const cachePath = join4(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE); + const stats = statSync(cachePath); + if (Date.now() - stats.mtimeMs > INDEX_CACHE_TTL_MS) { + clearSessionQueryCache(sessionId, deps); + return null; + } + return readFileSync3(cachePath, "utf-8"); } catch (e) { if (e?.code === "ENOENT") return null; @@ -1789,6 +3483,7 @@ var SAFE_BUILTINS = /* @__PURE__ */ new Set([ "md5sum", "sha1sum", "sha256sum", + "psql", "echo", "printf", "tee", @@ -1825,11 +3520,66 @@ var SAFE_BUILTINS = /* @__PURE__ */ new Set([ "case", "esac" ]); +function splitSafeStages(cmd) { + const stages = []; + let current = ""; + let quote = null; + let escaped = false; + for (let i = 0; i < cmd.length; i++) { + const ch = cmd[i]; + if (escaped) { + current += ch; + escaped = false; + continue; + } + if (quote) { + current += ch; + if (ch === quote) { + quote = null; + } else if (ch === "\\" && quote === '"') { + escaped = true; + } + continue; + } + if (ch === "\\" && i + 1 < cmd.length) { + current += ch; + escaped = true; + continue; + } + if (ch === "'" || ch === '"') { + quote = ch; + current += ch; + continue; + } + const twoChar = cmd.slice(i, i + 2); + if (twoChar === "&&" || twoChar === "||") { + if (current.trim()) + stages.push(current.trim()); + current = ""; + i += 1; + continue; + } + if (ch === "|" || ch === ";" || ch === "\n") { + if (current.trim()) + stages.push(current.trim()); + current = ""; + continue; + } + current += ch; + } + if (quote || escaped) + return null; + if (current.trim()) + stages.push(current.trim()); + return stages; +} function isSafe(cmd) { if (/\$\(|`|<\(/.test(cmd)) return false; const stripped = cmd.replace(/'[^']*'/g, "''").replace(/"[^"]*"/g, '""'); - const stages = stripped.split(/\||;|&&|\|\||\n/); + const stages = splitSafeStages(stripped); + if (!stages) + return false; for (const stage of stages) { const firstToken = stage.trim().split(/\s+/)[0] ?? ""; if (firstToken && !SAFE_BUILTINS.has(firstToken)) @@ -1845,10 +3595,44 @@ function rewritePaths(cmd) { } // dist/src/hooks/pre-tool-use.js +var READ_CACHE_ROOT = join6(homedir5(), ".deeplake", "query-cache"); +function touchesVirtualMemoryPath(value) { + const rewritten = rewritePaths(value).trim(); + return rewritten === "/index.md" || rewritten === "/summaries" || rewritten.startsWith("/summaries/") || rewritten === "/sessions" || rewritten.startsWith("/sessions/") || /(^|[\s"'`])\/(?:index\.md|summaries(?:\/|\b)|sessions(?:\/|\b))/.test(rewritten); +} +function touchesAnyMemoryPath(value) { + return touchesMemory(value) || touchesVirtualMemoryPath(value); +} +function isAnyPsqlCommand(cmd) { + return /^\s*psql\b/.test(cmd.trim()); +} +function isHivemindPsqlCommand(cmd) { + if (!isPsqlMode()) + return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryUsesOnlyInterceptedTables(query); +} +function needsHivemindPsqlRewrite(cmd) { + if (!isPsqlMode() || !isAnyPsqlCommand(cmd)) + return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryReferencesInterceptedTables(query) && !queryUsesOnlyInterceptedTables(query); +} +function buildPsqlOnlyGuidance() { + if (isFactsSessionsOnlyPsqlMode()) { + return "[RETRY REQUIRED] Hivemind recall is SQL-only in this mode. Use psql with the sessions, memory_facts, memory_entities, and fact_entity_links tables only. Do NOT use grep, cat, ls, Read, Glob, memory, graph, or filesystem paths for memory lookups."; + } + return "[RETRY REQUIRED] Hivemind recall is SQL-only in this mode. Use psql with the memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links tables only. Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; +} +function buildPsqlSchemaGuidance() { + if (isFactsSessionsOnlyPsqlMode()) { + return "[RETRY REQUIRED] Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; + } + return "[RETRY REQUIRED] Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; +} var log4 = (msg) => log("pre", msg); var __bundleDir = dirname(fileURLToPath2(import.meta.url)); var SHELL_BUNDLE = existsSync3(join6(__bundleDir, "shell", "deeplake-shell.js")) ? join6(__bundleDir, "shell", "deeplake-shell.js") : join6(__bundleDir, "..", "shell", "deeplake-shell.js"); -var READ_CACHE_ROOT = join6(homedir5(), ".deeplake", "query-cache"); function writeReadCacheFile(sessionId, virtualPath, content, deps = {}) { const { cacheRoot = READ_CACHE_ROOT } = deps; const safeSessionId = sessionId.replace(/[^a-zA-Z0-9._-]/g, "_") || "unknown"; @@ -1880,7 +3664,9 @@ function getShellCommand(toolName, toolInput) { switch (toolName) { case "Grep": { const p = toolInput.path; - if (p && touchesMemory(p)) { + if (isPsqlMode() && p && touchesAnyMemoryPath(p)) + return null; + if (p && touchesAnyMemoryPath(p)) { const pattern = toolInput.pattern ?? ""; const flags = ["-r"]; if (toolInput["-i"]) @@ -1893,7 +3679,9 @@ function getShellCommand(toolName, toolInput) { } case "Read": { const fp = getReadTargetPath(toolInput); - if (fp && touchesMemory(fp)) { + if (isPsqlMode() && fp && touchesAnyMemoryPath(fp)) + return null; + if (fp && touchesAnyMemoryPath(fp)) { const rewritten = rewritePaths(fp) || "/"; return `${isLikelyDirectoryPath(rewritten) ? "ls" : "cat"} ${rewritten}`; } @@ -1901,7 +3689,13 @@ function getShellCommand(toolName, toolInput) { } case "Bash": { const cmd = toolInput.command; - if (!cmd || !touchesMemory(cmd)) + if (!cmd) + break; + if (isHivemindPsqlCommand(cmd)) + return cmd.trim(); + if (isPsqlMode() && (touchesAnyMemoryPath(cmd) || needsHivemindPsqlRewrite(cmd))) + return null; + if (!touchesAnyMemoryPath(cmd)) break; const rewritten = rewritePaths(cmd); if (!isSafe(rewritten)) { @@ -1912,7 +3706,9 @@ function getShellCommand(toolName, toolInput) { } case "Glob": { const p = toolInput.path; - if (p && touchesMemory(p)) + if (isPsqlMode() && p && touchesAnyMemoryPath(p)) + return null; + if (p && touchesAnyMemoryPath(p)) return "ls /"; break; } @@ -1928,6 +3724,7 @@ function extractGrepParams(toolName, toolInput, shellCmd) { return { pattern: toolInput.pattern ?? "", targetPath: rewritePaths(toolInput.path ?? "") || "/", + recursive: true, ignoreCase: !!toolInput["-i"], wordMatch: false, filesOnly: outputMode === "files_with_matches", @@ -1949,22 +3746,27 @@ async function processPreToolUse(input, deps = {}) { const cmd = input.tool_input.command ?? ""; const shellCmd = getShellCommand(input.tool_name, input.tool_input); const toolPath = getReadTargetPath(input.tool_input) ?? input.tool_input.path ?? ""; - if (!shellCmd && (touchesMemory(cmd) || touchesMemory(toolPath))) { - const guidance = "[RETRY REQUIRED] The command you tried is not available for ~/.deeplake/memory/. This virtual filesystem only supports bash builtins: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. python, python3, node, and curl are NOT available. You MUST rewrite your command using only the bash tools listed above and try again. For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'."; + const psqlRewriteNeeded = needsHivemindPsqlRewrite(cmd); + if (!shellCmd && (touchesAnyMemoryPath(cmd) || touchesAnyMemoryPath(toolPath) || psqlRewriteNeeded)) { + const guidance = isPsqlMode() ? psqlRewriteNeeded ? buildPsqlSchemaGuidance() : buildPsqlOnlyGuidance() : `[RETRY REQUIRED] The command you tried is not available for ~/.deeplake/memory/. This virtual filesystem only supports bash builtins plus benchmark SQL mode via psql -At -F '|' -c "SELECT ...". python, python3, node, and curl are NOT available. You MUST rewrite your command using only the bash tools listed above and try again. For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'.`; logFn(`unsupported command, returning guidance: ${cmd}`); - return buildAllowDecision(`echo ${JSON.stringify(guidance)}`, "[DeepLake] unsupported command \u2014 rewrite using bash builtins"); + return buildAllowDecision(`echo ${JSON.stringify(guidance)}`, isPsqlMode() ? "[DeepLake SQL] unsupported command \u2014 rewrite using psql over memory/sessions" : "[DeepLake] unsupported command \u2014 rewrite using bash builtins"); } if (!shellCmd) return null; - if (!config) + if (!config) { + if (isHivemindPsqlCommand(shellCmd)) { + return buildAllowDecision(`echo ${JSON.stringify("[RETRY REQUIRED] Hivemind SQL mode is unavailable because Deeplake credentials are missing.")}`, "[DeepLake SQL] unavailable"); + } return buildFallbackDecision(shellCmd, shellBundle); + } const table = process.env["HIVEMIND_TABLE"] ?? "memory"; const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; const api = createApi(table, config); const readVirtualPathContentsWithCache = async (cachePaths) => { const uniquePaths = [...new Set(cachePaths)]; const result = new Map(uniquePaths.map((path) => [path, null])); - const cachedIndex = uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; + const cachedIndex = !isIndexDisabled() && uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; const remainingPaths = cachedIndex === null ? uniquePaths : uniquePaths.filter((path) => path !== "/index.md"); if (cachedIndex !== null) { result.set("/index.md", cachedIndex); @@ -2054,10 +3856,14 @@ async function processPreToolUse(input, deps = {}) { } if (virtualPath && !virtualPath.endsWith("/")) { logFn(`direct read: ${virtualPath}`); - let content = virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; + let content = !isIndexDisabled() && virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; if (content === null) { content = await readVirtualPathContentFn(api, table, sessionsTable, virtualPath); } + if (content === null && virtualPath === "/index.md" && !isSessionsOnlyMode() && !isIndexDisabled()) { + const idxRows = await api.query(`SELECT path, project, description, summary, creation_date, last_update_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC`); + content = buildVirtualIndexContent(idxRows); + } if (content !== null) { if (virtualPath === "/index.md") { writeCachedIndexContentFn(input.session_id, content); @@ -2139,6 +3945,9 @@ async function processPreToolUse(input, deps = {}) { } catch (e) { logFn(`direct query failed, falling back to shell: ${e.message}`); } + if (isHivemindPsqlCommand(shellCmd)) { + return buildAllowDecision(`echo ${JSON.stringify("[RETRY REQUIRED] Hivemind SQL mode could not satisfy the query. Rewrite it as a narrower SELECT over memory or sessions.")}`, "[DeepLake SQL] query rewrite required"); + } return buildFallbackDecision(shellCmd, shellBundle); } async function main() { diff --git a/claude-code/bundle/session-end.js b/claude-code/bundle/session-end.js index c10f5db..584553a 100755 --- a/claude-code/bundle/session-end.js +++ b/claude-code/bundle/session-end.js @@ -49,6 +49,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } @@ -72,33 +77,71 @@ function log(tag, msg) { // dist/src/hooks/spawn-wiki-worker.js import { spawn, execSync } from "node:child_process"; import { fileURLToPath } from "node:url"; -import { dirname, join as join4 } from "node:path"; -import { writeFileSync, mkdirSync as mkdirSync2 } from "node:fs"; +import { dirname, join as join3 } from "node:path"; +import { writeFileSync, mkdirSync, appendFileSync as appendFileSync2 } from "node:fs"; import { homedir as homedir3, tmpdir } from "node:os"; -// dist/src/utils/wiki-log.js -import { mkdirSync, appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join3 } from "node:path"; -function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { - const path = join3(hooksDir, filename); - return { - path, - log(msg) { - try { - mkdirSync(hooksDir, { recursive: true }); - appendFileSync2(path, `[${utcTimestamp()}] ${msg} -`); - } catch { - } - } - }; -} +// dist/src/hooks/knowledge-graph.js +import { randomUUID as randomUUID2 } from "node:crypto"; + +// dist/src/hooks/upload-summary.js +import { randomUUID } from "node:crypto"; + +// dist/src/hooks/knowledge-graph.js +var GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; + +// dist/src/hooks/memory-facts.js +import { randomUUID as randomUUID3 } from "node:crypto"; +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; // dist/src/hooks/spawn-wiki-worker.js var HOME = homedir3(); -var wikiLogger = makeWikiLogger(join4(HOME, ".claude", "hooks")); -var WIKI_LOG = wikiLogger.path; -var WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge \u2014 entities, decisions, relationships, and facts \u2014 into a structured, searchable wiki entry. Think of this as building a knowledge graph, not writing a summary. +var WIKI_LOG = join3(HOME, ".claude", "hooks", "deeplake-wiki.log"); +var WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. + +The session may be a coding session, a meeting, or a personal conversation. Your job is to turn the raw transcript into a dense, factual wiki page that preserves names, dates, relationships, preferences, plans, titles, and exact status changes. SESSION JSONL path: __JSONL__ SUMMARY FILE to write: __SUMMARY__ @@ -112,58 +155,75 @@ Steps: - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first, then focus on lines AFTER the offset for new content. Merge new facts into the existing summary. - If offset is 0, generate from scratch. + - Treat the JSONL as the source of truth. Do not invent facts. 2. Write the summary file at the path above with this EXACT format. The header fields (Source, Project) are pre-filled \u2014 copy them VERBATIM, do NOT replace them with paths from the JSONL content: # Session __SESSION_ID__ - **Source**: __JSONL_SERVER_PATH__ +- **Date**: +- **Participants**: - **Started**: - **Ended**: - **Project**: __PROJECT__ +- **Topics**: - **JSONL offset**: __JSONL_LINES__ ## What Happened -<2-3 dense sentences. What was the goal, what was accomplished, what's left.> +<2-4 dense sentences. What happened, why it mattered, and what changed. Prefer specific names/titles/dates over abstractions.> + +## Searchable Facts + ## People - + ## Entities - + ## Decisions & Reasoning - - -## Key Facts - + ## Files Modified - + ## Open Questions / TODO - + -IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. Future you will search this wiki to answer questions like "who worked on X", "why did we choose Y", "what's the status of Z". If a detail exists in the session, it should be in the wiki. +IMPORTANT: +- Be exhaustive. If a detail exists in the session and could answer a later question, it should be in the wiki. +- Favor exact nouns and titles over generic paraphrases. Preserve exact book names, organization names, file names, feature names, and self-descriptions. +- Keep facts canonical and query-friendly: "Ava is single", "Leo's home country is Brazil", "The team chose retries because the API returned 429s". +- Resolve relative dates like "last year" or "next month" against the session's own date when the source makes that possible. If it is ambiguous, keep the relative phrase instead of guessing. +- Do not omit beneficiary groups or targets of goals (for example who a project, career, or effort is meant to help). +- Do not leak absolute filesystem paths beyond the pre-filled Source field. PRIVACY: Never include absolute filesystem paths (e.g. /home/user/..., /Users/..., C:\\\\...) in the summary. Use only project-relative paths or the project name. The Source and Project fields above are already correct \u2014 do not change them. LENGTH LIMIT: Keep the total summary under 4000 characters. Be dense and concise \u2014 prioritize facts over prose. If a session is short, the summary should be short too.`; -var wikiLog = wikiLogger.log; +function wikiLog(msg) { + try { + mkdirSync(join3(HOME, ".claude", "hooks"), { recursive: true }); + appendFileSync2(WIKI_LOG, `[${utcTimestamp()}] ${msg} +`); + } catch { + } +} function findClaudeBin() { try { return execSync("which claude 2>/dev/null", { encoding: "utf-8" }).trim(); } catch { - return join4(HOME, ".claude", "local", "claude"); + return join3(HOME, ".claude", "local", "claude"); } } function spawnWikiWorker(opts) { const { config, sessionId, cwd, bundleDir, reason } = opts; const projectName = cwd.split("/").pop() || "unknown"; - const tmpDir = join4(tmpdir(), `deeplake-wiki-${sessionId}-${Date.now()}`); - mkdirSync2(tmpDir, { recursive: true }); - const configFile = join4(tmpDir, "config.json"); + const tmpDir = join3(tmpdir(), `deeplake-wiki-${sessionId}-${Date.now()}`); + mkdirSync(tmpDir, { recursive: true }); + const configFile = join3(tmpDir, "config.json"); writeFileSync(configFile, JSON.stringify({ apiUrl: config.apiUrl, token: config.token, @@ -171,17 +231,24 @@ function spawnWikiWorker(opts) { workspaceId: config.workspaceId, memoryTable: config.tableName, sessionsTable: config.sessionsTableName, + graphNodesTable: config.graphNodesTableName, + graphEdgesTable: config.graphEdgesTableName, + factsTable: config.factsTableName, + entitiesTable: config.entitiesTableName, + factEntityLinksTable: config.factEntityLinksTableName, sessionId, userName: config.userName, project: projectName, tmpDir, claudeBin: findClaudeBin(), wikiLog: WIKI_LOG, - hooksDir: join4(HOME, ".claude", "hooks"), - promptTemplate: WIKI_PROMPT_TEMPLATE + hooksDir: join3(HOME, ".claude", "hooks"), + promptTemplate: WIKI_PROMPT_TEMPLATE, + graphPromptTemplate: GRAPH_PROMPT_TEMPLATE, + factPromptTemplate: MEMORY_FACT_PROMPT_TEMPLATE })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); - const workerPath = join4(bundleDir, "wiki-worker.js"); + const workerPath = join3(bundleDir, "wiki-worker.js"); spawn("nohup", ["node", workerPath, configFile], { detached: true, stdio: ["ignore", "ignore", "ignore"] @@ -193,17 +260,17 @@ function bundleDirFromImportMeta(importMetaUrl) { } // dist/src/hooks/summary-state.js -import { readFileSync as readFileSync2, writeFileSync as writeFileSync2, writeSync, mkdirSync as mkdirSync3, renameSync, existsSync as existsSync2, unlinkSync, openSync, closeSync } from "node:fs"; +import { readFileSync as readFileSync2, writeFileSync as writeFileSync2, writeSync, mkdirSync as mkdirSync2, renameSync, existsSync as existsSync2, unlinkSync, openSync, closeSync } from "node:fs"; import { homedir as homedir4 } from "node:os"; -import { join as join5 } from "node:path"; +import { join as join4 } from "node:path"; var dlog = (msg) => log("summary-state", msg); -var STATE_DIR = join5(homedir4(), ".claude", "hooks", "summary-state"); +var STATE_DIR = join4(homedir4(), ".claude", "hooks", "summary-state"); var YIELD_BUF = new Int32Array(new SharedArrayBuffer(4)); function lockPath(sessionId) { - return join5(STATE_DIR, `${sessionId}.lock`); + return join4(STATE_DIR, `${sessionId}.lock`); } function tryAcquireLock(sessionId, maxAgeMs = 10 * 60 * 1e3) { - mkdirSync3(STATE_DIR, { recursive: true }); + mkdirSync2(STATE_DIR, { recursive: true }); const p = lockPath(sessionId); if (existsSync2(p)) { try { diff --git a/claude-code/bundle/session-start-setup.js b/claude-code/bundle/session-start-setup.js index c0f05cc..128978c 100755 --- a/claude-code/bundle/session-start-setup.js +++ b/claude-code/bundle/session-start-setup.js @@ -1,10 +1,11 @@ #!/usr/bin/env node // dist/src/hooks/session-start-setup.js -import { fileURLToPath } from "node:url"; -import { dirname as dirname2, join as join7 } from "node:path"; +import { fileURLToPath as fileURLToPath2 } from "node:url"; +import { dirname as dirname3, join as join7 } from "node:path"; +import { mkdirSync as mkdirSync5, appendFileSync as appendFileSync3 } from "node:fs"; import { execSync as execSync2 } from "node:child_process"; -import { homedir as homedir4 } from "node:os"; +import { homedir as homedir6 } from "node:os"; // dist/src/commands/auth.js import { readFileSync, writeFileSync, existsSync, mkdirSync, unlinkSync } from "node:fs"; @@ -60,6 +61,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join2(home, ".deeplake", "memory") }; } @@ -90,6 +96,12 @@ function log(tag, msg) { function sqlStr(value) { return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); } +function sqlIdent(name) { + if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) { + throw new Error(`Invalid SQL identifier: ${JSON.stringify(name)}`); + } + return name; +} // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); @@ -107,6 +119,22 @@ function traceSql(msg) { if (debugFileLog) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -114,7 +142,7 @@ var MAX_CONCURRENCY = 5; var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); + return new Promise((resolve2) => setTimeout(resolve2, ms)); } function isTimeoutError(error) { const name = error instanceof Error ? error.name.toLowerCase() : ""; @@ -147,7 +175,7 @@ var Semaphore = class { this.active++; return; } - await new Promise((resolve) => this.waiting.push(resolve)); + await new Promise((resolve2) => this.waiting.push(resolve2)); } release() { this.active--; @@ -210,10 +238,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -236,9 +264,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -295,6 +327,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -392,29 +447,254 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); + } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); } }; // dist/src/utils/stdin.js function readStdin() { - return new Promise((resolve, reject) => { + return new Promise((resolve2, reject) => { let data = ""; process.stdin.setEncoding("utf-8"); process.stdin.on("data", (chunk) => data += chunk); process.stdin.on("end", () => { try { - resolve(JSON.parse(data)); + resolve2(JSON.parse(data)); } catch (err) { reject(new Error(`Failed to parse hook input: ${err}`)); } @@ -423,140 +703,546 @@ function readStdin() { }); } -// dist/src/utils/version-check.js -import { readFileSync as readFileSync4 } from "node:fs"; +// dist/src/utils/direct-run.js +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +function isDirectRun(metaUrl) { + const entry = process.argv[1]; + if (!entry) + return false; + try { + return resolve(fileURLToPath(metaUrl)) === resolve(entry); + } catch { + return false; + } +} + +// dist/src/hooks/session-queue.js +import { appendFileSync as appendFileSync2, closeSync, existsSync as existsSync4, mkdirSync as mkdirSync3, openSync, readFileSync as readFileSync4, readdirSync, renameSync, rmSync, statSync, writeFileSync as writeFileSync3 } from "node:fs"; import { dirname, join as join5 } from "node:path"; -var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; +import { homedir as homedir4 } from "node:os"; +var DEFAULT_QUEUE_DIR = join5(homedir4(), ".deeplake", "queue"); +var DEFAULT_MAX_BATCH_ROWS = 50; +var DEFAULT_STALE_INFLIGHT_MS = 6e4; +var DEFAULT_AUTH_FAILURE_TTL_MS = 5 * 6e4; +var DEFAULT_DRAIN_LOCK_STALE_MS = 3e4; +var BUSY_WAIT_STEP_MS = 100; +var SessionWriteDisabledError = class extends Error { + constructor(message) { + super(message); + this.name = "SessionWriteDisabledError"; + } +}; +function buildSessionInsertSql(sessionsTable, rows) { + if (rows.length === 0) + throw new Error("buildSessionInsertSql: rows must not be empty"); + const table = sqlIdent(sessionsTable); + const values = rows.map((row) => { + const jsonForSql = escapeJsonbLiteral(coerceJsonbPayload(row.message)); + return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.sessionId)}', '${sqlStr(row.eventType)}', ${row.turnIndex}, '${sqlStr(row.diaId)}', '${sqlStr(row.speaker)}', '${sqlStr(row.text)}', '${sqlStr(row.turnSummary)}', '${sqlStr(row.sourceDateTime)}', '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; + }).join(", "); + return `INSERT INTO "${table}" (id, path, filename, message, session_id, event_type, turn_index, dia_id, speaker, text, turn_summary, source_date_time, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; +} +function coerceJsonbPayload(message) { + try { + return JSON.stringify(JSON.parse(message)); + } catch { + return JSON.stringify({ + type: "raw_message", + content: message + }); + } +} +function escapeJsonbLiteral(value) { + return value.replace(/'/g, "''").replace(/\0/g, ""); +} +async function flushSessionQueue(api, opts) { + const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; + const maxBatchRows = opts.maxBatchRows ?? DEFAULT_MAX_BATCH_ROWS; + const staleInflightMs = opts.staleInflightMs ?? DEFAULT_STALE_INFLIGHT_MS; + const waitIfBusyMs = opts.waitIfBusyMs ?? 0; + const drainAll = opts.drainAll ?? false; + mkdirSync3(queueDir, { recursive: true }); + const queuePath = getQueuePath(queueDir, opts.sessionId); + const inflightPath = getInflightPath(queueDir, opts.sessionId); + if (isSessionWriteDisabled(opts.sessionsTable, queueDir)) { + return existsSync4(queuePath) || existsSync4(inflightPath) ? { status: "disabled", rows: 0, batches: 0 } : { status: "empty", rows: 0, batches: 0 }; + } + let totalRows = 0; + let totalBatches = 0; + let flushedAny = false; + while (true) { + if (opts.allowStaleInflight) + recoverStaleInflight(queuePath, inflightPath, staleInflightMs); + if (existsSync4(inflightPath)) { + if (waitIfBusyMs > 0) { + await waitForInflightToClear(inflightPath, waitIfBusyMs); + if (opts.allowStaleInflight) + recoverStaleInflight(queuePath, inflightPath, staleInflightMs); + } + if (existsSync4(inflightPath)) { + return flushedAny ? { status: "flushed", rows: totalRows, batches: totalBatches } : { status: "busy", rows: 0, batches: 0 }; + } + } + if (!existsSync4(queuePath)) { + return flushedAny ? { status: "flushed", rows: totalRows, batches: totalBatches } : { status: "empty", rows: 0, batches: 0 }; + } + try { + renameSync(queuePath, inflightPath); + } catch (e) { + if (e?.code === "ENOENT") { + return flushedAny ? { status: "flushed", rows: totalRows, batches: totalBatches } : { status: "empty", rows: 0, batches: 0 }; + } + throw e; + } + try { + const { rows, batches } = await flushInflightFile(api, opts.sessionsTable, inflightPath, maxBatchRows); + totalRows += rows; + totalBatches += batches; + flushedAny = flushedAny || rows > 0; + } catch (e) { + requeueInflight(queuePath, inflightPath); + if (e instanceof SessionWriteDisabledError) { + return { status: "disabled", rows: totalRows, batches: totalBatches }; + } + throw e; + } + if (!drainAll) { + return { status: "flushed", rows: totalRows, batches: totalBatches }; + } + } +} +async function drainSessionQueues(api, opts) { + const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; + mkdirSync3(queueDir, { recursive: true }); + const sessionIds = listQueuedSessionIds(queueDir, opts.staleInflightMs ?? DEFAULT_STALE_INFLIGHT_MS); + let flushedSessions = 0; + let rows = 0; + let batches = 0; + for (const sessionId of sessionIds) { + const result = await flushSessionQueue(api, { + sessionId, + sessionsTable: opts.sessionsTable, + queueDir, + maxBatchRows: opts.maxBatchRows, + allowStaleInflight: true, + staleInflightMs: opts.staleInflightMs, + drainAll: true + }); + if (result.status === "flushed") { + flushedSessions += 1; + rows += result.rows; + batches += result.batches; + } + } + return { + queuedSessions: sessionIds.length, + flushedSessions, + rows, + batches + }; +} +function tryAcquireSessionDrainLock(sessionsTable, queueDir = DEFAULT_QUEUE_DIR, staleMs = DEFAULT_DRAIN_LOCK_STALE_MS) { + mkdirSync3(queueDir, { recursive: true }); + const lockPath = getSessionDrainLockPath(queueDir, sessionsTable); + for (let attempt = 0; attempt < 2; attempt++) { + try { + const fd = openSync(lockPath, "wx"); + closeSync(fd); + return () => rmSync(lockPath, { force: true }); + } catch (e) { + if (e?.code !== "EEXIST") + throw e; + if (existsSync4(lockPath) && isStale(lockPath, staleMs)) { + rmSync(lockPath, { force: true }); + continue; + } + return null; + } + } + return null; +} +function getQueuePath(queueDir, sessionId) { + return join5(queueDir, `${sessionId}.jsonl`); +} +function getInflightPath(queueDir, sessionId) { + return join5(queueDir, `${sessionId}.inflight`); +} +async function flushInflightFile(api, sessionsTable, inflightPath, maxBatchRows) { + const rows = readQueuedRows(inflightPath); + if (rows.length === 0) { + rmSync(inflightPath, { force: true }); + return { rows: 0, batches: 0 }; + } + let ensured = false; + let batches = 0; + const queueDir = dirname(inflightPath); + for (let i = 0; i < rows.length; i += maxBatchRows) { + const chunk = rows.slice(i, i + maxBatchRows); + const sql = buildSessionInsertSql(sessionsTable, chunk); + try { + await api.query(sql); + } catch (e) { + if (isSessionWriteAuthError(e)) { + markSessionWriteDisabled(sessionsTable, errorMessage(e), queueDir); + throw new SessionWriteDisabledError(errorMessage(e)); + } + if (!ensured && isEnsureSessionsTableRetryable(e)) { + try { + await api.ensureSessionsTable(sessionsTable); + } catch (ensureError) { + if (isSessionWriteAuthError(ensureError)) { + markSessionWriteDisabled(sessionsTable, errorMessage(ensureError), queueDir); + throw new SessionWriteDisabledError(errorMessage(ensureError)); + } + throw ensureError; + } + ensured = true; + try { + await api.query(sql); + } catch (retryError) { + if (isSessionWriteAuthError(retryError)) { + markSessionWriteDisabled(sessionsTable, errorMessage(retryError), queueDir); + throw new SessionWriteDisabledError(errorMessage(retryError)); + } + throw retryError; + } + } else { + throw e; + } + } + batches += 1; + } + clearSessionWriteDisabled(sessionsTable, queueDir); + rmSync(inflightPath, { force: true }); + return { rows: rows.length, batches }; +} +function readQueuedRows(path) { + const raw = readFileSync4(path, "utf-8"); + return raw.split("\n").map((line) => line.trim()).filter(Boolean).map((line) => JSON.parse(line)); +} +function requeueInflight(queuePath, inflightPath) { + if (!existsSync4(inflightPath)) + return; + const inflight = readFileSync4(inflightPath, "utf-8"); + appendFileSync2(queuePath, inflight); + rmSync(inflightPath, { force: true }); +} +function recoverStaleInflight(queuePath, inflightPath, staleInflightMs) { + if (!existsSync4(inflightPath) || !isStale(inflightPath, staleInflightMs)) + return; + requeueInflight(queuePath, inflightPath); +} +function isStale(path, staleInflightMs) { + return Date.now() - statSync(path).mtimeMs >= staleInflightMs; +} +function listQueuedSessionIds(queueDir, staleInflightMs) { + const sessionIds = /* @__PURE__ */ new Set(); + for (const name of readdirSync(queueDir)) { + if (name.endsWith(".jsonl")) { + sessionIds.add(name.slice(0, -".jsonl".length)); + } else if (name.endsWith(".inflight")) { + const path = join5(queueDir, name); + if (isStale(path, staleInflightMs)) { + sessionIds.add(name.slice(0, -".inflight".length)); + } + } + } + return [...sessionIds].sort(); +} +function isEnsureSessionsTableRetryable(error) { + const message = errorMessage(error).toLowerCase(); + return message.includes("does not exist") || message.includes("doesn't exist") || message.includes("relation") || message.includes("not found"); +} +function isSessionWriteAuthError(error) { + const message = errorMessage(error).toLowerCase(); + return message.includes("403") || message.includes("401") || message.includes("forbidden") || message.includes("unauthorized"); +} +function markSessionWriteDisabled(sessionsTable, reason, queueDir = DEFAULT_QUEUE_DIR) { + mkdirSync3(queueDir, { recursive: true }); + writeFileSync3(getSessionWriteDisabledPath(queueDir, sessionsTable), JSON.stringify({ + disabledAt: (/* @__PURE__ */ new Date()).toISOString(), + reason, + sessionsTable + })); +} +function clearSessionWriteDisabled(sessionsTable, queueDir = DEFAULT_QUEUE_DIR) { + rmSync(getSessionWriteDisabledPath(queueDir, sessionsTable), { force: true }); +} +function isSessionWriteDisabled(sessionsTable, queueDir = DEFAULT_QUEUE_DIR, ttlMs = DEFAULT_AUTH_FAILURE_TTL_MS) { + const path = getSessionWriteDisabledPath(queueDir, sessionsTable); + if (!existsSync4(path)) + return false; + try { + const raw = readFileSync4(path, "utf-8"); + const state = JSON.parse(raw); + const ageMs = Date.now() - new Date(state.disabledAt).getTime(); + if (Number.isNaN(ageMs) || ageMs >= ttlMs) { + rmSync(path, { force: true }); + return false; + } + return true; + } catch { + rmSync(path, { force: true }); + return false; + } +} +function getSessionWriteDisabledPath(queueDir, sessionsTable) { + return join5(queueDir, `.${sessionsTable}.disabled.json`); +} +function getSessionDrainLockPath(queueDir, sessionsTable) { + return join5(queueDir, `.${sessionsTable}.drain.lock`); +} +function errorMessage(error) { + return error instanceof Error ? error.message : String(error); +} +async function waitForInflightToClear(inflightPath, waitIfBusyMs) { + const startedAt = Date.now(); + while (existsSync4(inflightPath) && Date.now() - startedAt < waitIfBusyMs) { + await sleep2(BUSY_WAIT_STEP_MS); + } +} +function sleep2(ms) { + return new Promise((resolve2) => setTimeout(resolve2, ms)); +} + +// dist/src/hooks/version-check.js +import { existsSync as existsSync5, mkdirSync as mkdirSync4, readFileSync as readFileSync5, writeFileSync as writeFileSync4 } from "node:fs"; +import { dirname as dirname2, join as join6 } from "node:path"; +import { homedir as homedir5 } from "node:os"; +var DEFAULT_VERSION_CACHE_PATH = join6(homedir5(), ".deeplake", ".version-check.json"); +var DEFAULT_VERSION_CACHE_TTL_MS = 60 * 60 * 1e3; function getInstalledVersion(bundleDir, pluginManifestDir) { try { - const pluginJson = join5(bundleDir, "..", pluginManifestDir, "plugin.json"); - const plugin = JSON.parse(readFileSync4(pluginJson, "utf-8")); + const pluginJson = join6(bundleDir, "..", pluginManifestDir, "plugin.json"); + const plugin = JSON.parse(readFileSync5(pluginJson, "utf-8")); if (plugin.version) return plugin.version; } catch { } let dir = bundleDir; for (let i = 0; i < 5; i++) { - const candidate = join5(dir, "package.json"); + const candidate = join6(dir, "package.json"); try { - const pkg = JSON.parse(readFileSync4(candidate, "utf-8")); + const pkg = JSON.parse(readFileSync5(candidate, "utf-8")); if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) return pkg.version; } catch { } - const parent = dirname(dir); + const parent = dirname2(dir); if (parent === dir) break; dir = parent; } return null; } -async function getLatestVersion(timeoutMs = 3e3) { - try { - const res = await fetch(GITHUB_RAW_PKG, { signal: AbortSignal.timeout(timeoutMs) }); - if (!res.ok) - return null; - const pkg = await res.json(); - return pkg.version ?? null; - } catch { - return null; - } -} function isNewer(latest, current) { - const parse = (v) => v.split(".").map(Number); + const parse = (v) => v.replace(/-.*$/, "").split(".").map(Number); const [la, lb, lc] = parse(latest); const [ca, cb, cc] = parse(current); return la > ca || la === ca && lb > cb || la === ca && lb === cb && lc > cc; } - -// dist/src/utils/wiki-log.js -import { mkdirSync as mkdirSync3, appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join6 } from "node:path"; -function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { - const path = join6(hooksDir, filename); - return { - path, - log(msg) { - try { - mkdirSync3(hooksDir, { recursive: true }); - appendFileSync2(path, `[${utcTimestamp()}] ${msg} -`); - } catch { - } +function readVersionCache(cachePath = DEFAULT_VERSION_CACHE_PATH) { + if (!existsSync5(cachePath)) + return null; + try { + const parsed = JSON.parse(readFileSync5(cachePath, "utf-8")); + if (parsed && typeof parsed.checkedAt === "number" && typeof parsed.url === "string" && (typeof parsed.latest === "string" || parsed.latest === null)) { + return parsed; } - }; + } catch { + } + return null; +} +function writeVersionCache(entry, cachePath = DEFAULT_VERSION_CACHE_PATH) { + mkdirSync4(dirname2(cachePath), { recursive: true }); + writeFileSync4(cachePath, JSON.stringify(entry)); +} +function readFreshCachedLatestVersion(url, ttlMs = DEFAULT_VERSION_CACHE_TTL_MS, cachePath = DEFAULT_VERSION_CACHE_PATH, nowMs = Date.now()) { + const cached = readVersionCache(cachePath); + if (!cached || cached.url !== url) + return void 0; + if (nowMs - cached.checkedAt > ttlMs) + return void 0; + return cached.latest; +} +async function getLatestVersionCached(opts) { + const ttlMs = opts.ttlMs ?? DEFAULT_VERSION_CACHE_TTL_MS; + const cachePath = opts.cachePath ?? DEFAULT_VERSION_CACHE_PATH; + const nowMs = opts.nowMs ?? Date.now(); + const fetchImpl = opts.fetchImpl ?? fetch; + const fresh = readFreshCachedLatestVersion(opts.url, ttlMs, cachePath, nowMs); + if (fresh !== void 0) + return fresh; + const stale = readVersionCache(cachePath); + try { + const res = await fetchImpl(opts.url, { signal: AbortSignal.timeout(opts.timeoutMs) }); + const latest = res.ok ? (await res.json()).version ?? null : stale?.latest ?? null; + writeVersionCache({ + checkedAt: nowMs, + latest, + url: opts.url + }, cachePath); + return latest; + } catch { + const latest = stale?.latest ?? null; + writeVersionCache({ + checkedAt: nowMs, + latest, + url: opts.url + }, cachePath); + return latest; + } } // dist/src/hooks/session-start-setup.js var log3 = (msg) => log("session-setup", msg); -var __bundleDir = dirname2(fileURLToPath(import.meta.url)); -var { log: wikiLog } = makeWikiLogger(join7(homedir4(), ".claude", "hooks")); -async function main() { - if (process.env.HIVEMIND_WIKI_WORKER === "1") +var __bundleDir = dirname3(fileURLToPath2(import.meta.url)); +var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; +var VERSION_CHECK_TIMEOUT = 3e3; +var HOME = homedir6(); +var WIKI_LOG = join7(HOME, ".claude", "hooks", "deeplake-wiki.log"); +function wikiLog(msg) { + try { + mkdirSync5(join7(HOME, ".claude", "hooks"), { recursive: true }); + appendFileSync3(WIKI_LOG, `[${utcTimestamp()}] ${msg} +`); + } catch { + } +} +async function createPlaceholder(api, table, sessionId, cwd, userName, orgName, workspaceId) { + const summaryPath = `/summaries/${userName}/${sessionId}.md`; + const existing = await api.query(`SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1`); + if (existing.length > 0) { + wikiLog(`SessionSetup: summary exists for ${sessionId} (resumed)`); return; - const input = await readStdin(); - const creds = loadCredentials(); + } + const now = (/* @__PURE__ */ new Date()).toISOString(); + const projectName = cwd.split("/").pop() || "unknown"; + const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; + const content = [ + `# Session ${sessionId}`, + `- **Source**: ${sessionSource}`, + `- **Started**: ${now}`, + `- **Project**: ${projectName}`, + `- **Status**: in-progress`, + "" + ].join("\n"); + const filename = `${sessionId}.md`; + await api.query(`INSERT INTO "${table}" (id, path, filename, summary, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(summaryPath)}', '${sqlStr(filename)}', E'${sqlStr(content)}', '${sqlStr(userName)}', 'text/markdown', ${Buffer.byteLength(content, "utf-8")}, '${sqlStr(projectName)}', 'in progress', 'claude_code', '${now}', '${now}')`); + wikiLog(`SessionSetup: created placeholder for ${sessionId} (${cwd})`); +} +async function runSessionStartSetup(input, deps = {}) { + const { wikiWorker = (process.env.HIVEMIND_WIKI_WORKER ?? process.env.DEEPLAKE_WIKI_WORKER) === "1", creds = loadCredentials(), saveCredentialsFn = saveCredentials, config = loadConfig(), createApi = (activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, activeConfig.tableName), captureEnabled = (process.env.HIVEMIND_CAPTURE ?? process.env.DEEPLAKE_CAPTURE) !== "false", drainSessionQueuesFn = drainSessionQueues, isSessionWriteDisabledFn = isSessionWriteDisabled, isSessionWriteAuthErrorFn = isSessionWriteAuthError, markSessionWriteDisabledFn = markSessionWriteDisabled, tryAcquireSessionDrainLockFn = tryAcquireSessionDrainLock, createPlaceholderFn = createPlaceholder, getInstalledVersionFn = getInstalledVersion, getLatestVersionCachedFn = getLatestVersionCached, isNewerFn = isNewer, execSyncFn = execSync2, logFn = log3, wikiLogFn = wikiLog } = deps; + if (wikiWorker) + return { status: "skipped" }; if (!creds?.token) { - log3("no credentials"); - return; + logFn("no credentials"); + return { status: "no_credentials" }; } if (!creds.userName) { try { const { userInfo: userInfo2 } = await import("node:os"); creds.userName = userInfo2().username ?? "unknown"; - saveCredentials(creds); - log3(`backfilled userName: ${creds.userName}`); + saveCredentialsFn(creds); + logFn(`backfilled userName: ${creds.userName}`); } catch { } } - if (input.session_id) { + if (input.session_id && config) { try { - const config = loadConfig(); - if (config) { - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName); - await api.ensureTable(); - await api.ensureSessionsTable(config.sessionsTableName); - log3("setup complete"); + const api = createApi(config); + await api.ensureTable(); + if (captureEnabled) { + if (isSessionWriteDisabledFn(config.sessionsTableName)) { + logFn(`sessions table disabled, skipping setup for "${config.sessionsTableName}"`); + } else { + const releaseDrainLock = tryAcquireSessionDrainLockFn(config.sessionsTableName); + if (!releaseDrainLock) { + logFn(`sessions drain already in progress, skipping duplicate setup for "${config.sessionsTableName}"`); + } else { + try { + await api.ensureSessionsTable(config.sessionsTableName); + await api.ensureGraphNodesTable(config.graphNodesTableName); + await api.ensureGraphEdgesTable(config.graphEdgesTableName); + await api.ensureFactsTable(config.factsTableName); + await api.ensureEntitiesTable(config.entitiesTableName); + await api.ensureFactEntityLinksTable(config.factEntityLinksTableName); + const drain = await drainSessionQueuesFn(api, { + sessionsTable: config.sessionsTableName + }); + if (drain.flushedSessions > 0) { + logFn(`drained ${drain.flushedSessions} queued session(s), rows=${drain.rows}, batches=${drain.batches}`); + } + } catch (e) { + if (isSessionWriteAuthErrorFn(e)) { + markSessionWriteDisabledFn(config.sessionsTableName, e.message); + logFn(`sessions table unavailable, skipping setup: ${e.message}`); + } else { + throw e; + } + } finally { + releaseDrainLock(); + } + } + } + await createPlaceholderFn(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); } + logFn("setup complete"); } catch (e) { - log3(`setup failed: ${e.message}`); - wikiLog(`SessionSetup: failed for ${input.session_id}: ${e.message}`); + logFn(`setup failed: ${e.message}`); + wikiLogFn(`SessionSetup: failed for ${input.session_id}: ${e.message}`); } } const autoupdate = creds.autoupdate !== false; try { - const current = getInstalledVersion(__bundleDir, ".claude-plugin"); + const current = getInstalledVersionFn(__bundleDir, ".claude-plugin"); if (current) { - const latest = await getLatestVersion(); - if (latest && isNewer(latest, current)) { + const latest = await getLatestVersionCachedFn({ + url: GITHUB_RAW_PKG, + timeoutMs: VERSION_CHECK_TIMEOUT + }); + if (latest && isNewerFn(latest, current)) { if (autoupdate) { - log3(`autoupdate: updating ${current} \u2192 ${latest}`); + logFn(`autoupdate: updating ${current} \u2192 ${latest}`); try { const scopes = ["user", "project", "local", "managed"]; const cmd = scopes.map((s) => `claude plugin update hivemind@hivemind --scope ${s} 2>/dev/null`).join("; "); - execSync2(cmd, { stdio: "ignore", timeout: 6e4 }); + execSyncFn(cmd, { stdio: "ignore", timeout: 6e4 }); process.stderr.write(`\u2705 Hivemind auto-updated: ${current} \u2192 ${latest}. Run /reload-plugins to apply. `); - log3(`autoupdate succeeded: ${current} \u2192 ${latest}`); + logFn(`autoupdate succeeded: ${current} \u2192 ${latest}`); } catch (e) { process.stderr.write(`\u2B06\uFE0F Hivemind update available: ${current} \u2192 ${latest}. Auto-update failed \u2014 run /hivemind:update to upgrade manually. `); - log3(`autoupdate failed: ${e.message}`); + logFn(`autoupdate failed: ${e.message}`); } } else { process.stderr.write(`\u2B06\uFE0F Hivemind update available: ${current} \u2192 ${latest}. Run /hivemind:update to upgrade. `); - log3(`update available (autoupdate off): ${current} \u2192 ${latest}`); + logFn(`update available (autoupdate off): ${current} \u2192 ${latest}`); } } else { - log3(`version up to date: ${current}`); + logFn(`version up to date: ${current}`); } } } catch (e) { - log3(`version check failed: ${e.message}`); + logFn(`version check failed: ${e.message}`); } + return { status: "complete" }; } -main().catch((e) => { - log3(`fatal: ${e.message}`); - process.exit(0); -}); +async function main() { + const input = await readStdin(); + await runSessionStartSetup(input); +} +if (isDirectRun(import.meta.url)) { + main().catch((e) => { + log3(`fatal: ${e.message}`); + process.exit(0); + }); +} +export { + createPlaceholder, + runSessionStartSetup, + wikiLog +}; diff --git a/claude-code/bundle/session-start.js b/claude-code/bundle/session-start.js index 1f815ee..484d175 100755 --- a/claude-code/bundle/session-start.js +++ b/claude-code/bundle/session-start.js @@ -1,11 +1,8 @@ #!/usr/bin/env node // dist/src/hooks/session-start.js -import { fileURLToPath } from "node:url"; -import { dirname as dirname2, join as join7 } from "node:path"; -import { readdirSync, rmSync } from "node:fs"; -import { execSync as execSync2 } from "node:child_process"; -import { homedir as homedir4 } from "node:os"; +import { fileURLToPath as fileURLToPath2 } from "node:url"; +import { dirname as dirname2, join as join4 } from "node:path"; // dist/src/commands/auth.js import { readFileSync, writeFileSync, existsSync, mkdirSync, unlinkSync } from "node:fs"; @@ -29,57 +26,29 @@ function saveCredentials(creds) { writeFileSync(CREDS_PATH, JSON.stringify({ ...creds, savedAt: (/* @__PURE__ */ new Date()).toISOString() }, null, 2), { mode: 384 }); } -// dist/src/config.js -import { readFileSync as readFileSync2, existsSync as existsSync2 } from "node:fs"; -import { join as join2 } from "node:path"; -import { homedir as homedir2, userInfo } from "node:os"; -function loadConfig() { - const home = homedir2(); - const credPath = join2(home, ".deeplake", "credentials.json"); - let creds = null; - if (existsSync2(credPath)) { - try { - creds = JSON.parse(readFileSync2(credPath, "utf-8")); - } catch { - return null; - } - } - const env = process.env; - if (!env.HIVEMIND_TOKEN && env.DEEPLAKE_TOKEN) { - process.stderr.write("[hivemind] DEEPLAKE_* env vars are deprecated; use HIVEMIND_* instead\n"); - } - const token = env.HIVEMIND_TOKEN ?? env.DEEPLAKE_TOKEN ?? creds?.token; - const orgId = env.HIVEMIND_ORG_ID ?? env.DEEPLAKE_ORG_ID ?? creds?.orgId; - if (!token || !orgId) - return null; - return { - token, - orgId, - orgName: creds?.orgName ?? orgId, - userName: creds?.userName || userInfo().username || "unknown", - workspaceId: env.HIVEMIND_WORKSPACE_ID ?? env.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", - apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", - tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", - sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", - memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join2(home, ".deeplake", "memory") - }; +// dist/src/utils/stdin.js +function readStdin() { + return new Promise((resolve2, reject) => { + let data = ""; + process.stdin.setEncoding("utf-8"); + process.stdin.on("data", (chunk) => data += chunk); + process.stdin.on("end", () => { + try { + resolve2(JSON.parse(data)); + } catch (err) { + reject(new Error(`Failed to parse hook input: ${err}`)); + } + }); + process.stdin.on("error", reject); + }); } -// dist/src/deeplake-api.js -import { randomUUID } from "node:crypto"; -import { existsSync as existsSync3, mkdirSync as mkdirSync2, readFileSync as readFileSync3, writeFileSync as writeFileSync2 } from "node:fs"; -import { join as join4 } from "node:path"; -import { tmpdir } from "node:os"; - // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; -import { join as join3 } from "node:path"; -import { homedir as homedir3 } from "node:os"; +import { join as join2 } from "node:path"; +import { homedir as homedir2 } from "node:os"; var DEBUG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -var LOG = join3(homedir3(), ".deeplake", "hook-debug.log"); -function utcTimestamp(d = /* @__PURE__ */ new Date()) { - return d.toISOString().replace("T", " ").slice(0, 19) + " UTC"; -} +var LOG = join2(homedir2(), ".deeplake", "hook-debug.log"); function log(tag, msg) { if (!DEBUG) return; @@ -87,360 +56,57 @@ function log(tag, msg) { `); } -// dist/src/utils/sql.js -function sqlStr(value) { - return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); +// dist/src/utils/direct-run.js +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +function isDirectRun(metaUrl) { + const entry = process.argv[1]; + if (!entry) + return false; + try { + return resolve(fileURLToPath(metaUrl)) === resolve(entry); + } catch { + return false; + } } -// dist/src/deeplake-api.js -var log2 = (msg) => log("sdk", msg); -function summarizeSql(sql, maxLen = 220) { - const compact = sql.replace(/\s+/g, " ").trim(); - return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; -} -function traceSql(msg) { - const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; - if (!traceEnabled) - return; - process.stderr.write(`[deeplake-sql] ${msg} -`); - const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; - if (debugFileLog) - log2(msg); -} -var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); -var MAX_RETRIES = 3; -var BASE_DELAY_MS = 500; -var MAX_CONCURRENCY = 5; -var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); -var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} -function isTimeoutError(error) { - const name = error instanceof Error ? error.name.toLowerCase() : ""; - const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); - return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); +// dist/src/utils/retrieval-mode.js +function isSessionsOnlyMode() { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); } -function isDuplicateIndexError(error) { - const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); - return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); +function isIndexDisabled() { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); } -function isSessionInsertQuery(sql) { - return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +function isPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); } -function isTransientHtml403(text) { - const body = text.toLowerCase(); - return body.includes(" this.waiting.push(resolve)); - } - release() { - this.active--; - const next = this.waiting.shift(); - if (next) { - this.active++; - next(); - } - } -}; -var DeeplakeApi = class { - token; - apiUrl; - orgId; - workspaceId; - tableName; - _pendingRows = []; - _sem = new Semaphore(MAX_CONCURRENCY); - _tablesCache = null; - constructor(token, apiUrl, orgId, workspaceId, tableName) { - this.token = token; - this.apiUrl = apiUrl; - this.orgId = orgId; - this.workspaceId = workspaceId; - this.tableName = tableName; - } - /** Execute SQL with retry on transient errors and bounded concurrency. */ - async query(sql) { - const startedAt = Date.now(); - const summary = summarizeSql(sql); - traceSql(`query start: ${summary}`); - await this._sem.acquire(); - try { - const rows = await this._queryWithRetry(sql); - traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); - return rows; - } catch (e) { - const message = e instanceof Error ? e.message : String(e); - traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); - throw e; - } finally { - this._sem.release(); - } - } - async _queryWithRetry(sql) { - let lastError; - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - let resp; - try { - const signal = AbortSignal.timeout(QUERY_TIMEOUT_MS); - resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { - method: "POST", - headers: { - Authorization: `Bearer ${this.token}`, - "Content-Type": "application/json", - "X-Activeloop-Org-Id": this.orgId - }, - signal, - body: JSON.stringify({ query: sql }) - }); - } catch (e) { - if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); - throw lastError; - } - lastError = e instanceof Error ? e : new Error(String(e)); - if (attempt < MAX_RETRIES) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw lastError; - } - if (resp.ok) { - const raw = await resp.json(); - if (!raw?.rows || !raw?.columns) - return []; - return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); - } - const text = await resp.text().catch(() => ""); - const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); - if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); - } - throw lastError ?? new Error("Query failed: max retries exceeded"); - } - // ── Writes ────────────────────────────────────────────────────────────────── - /** Queue rows for writing. Call commit() to flush. */ - appendRows(rows) { - this._pendingRows.push(...rows); - } - /** Flush pending rows via SQL. */ - async commit() { - if (this._pendingRows.length === 0) - return; - const rows = this._pendingRows; - this._pendingRows = []; - const CONCURRENCY = 10; - for (let i = 0; i < rows.length; i += CONCURRENCY) { - const chunk = rows.slice(i, i + CONCURRENCY); - await Promise.allSettled(chunk.map((r) => this.upsertRowSql(r))); - } - log2(`commit: ${rows.length} rows`); - } - async upsertRowSql(row) { - const ts = (/* @__PURE__ */ new Date()).toISOString(); - const cd = row.creationDate ?? ts; - const lud = row.lastUpdateDate ?? ts; - const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`); - if (exists.length > 0) { - let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`; - if (row.project !== void 0) - setClauses += `, project = '${sqlStr(row.project)}'`; - if (row.description !== void 0) - setClauses += `, description = '${sqlStr(row.description)}'`; - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`); - } else { - const id = randomUUID(); - let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date"; - let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`; - if (row.project !== void 0) { - cols += ", project"; - vals += `, '${sqlStr(row.project)}'`; - } - if (row.description !== void 0) { - cols += ", description"; - vals += `, '${sqlStr(row.description)}'`; - } - await this.query(`INSERT INTO "${this.tableName}" (${cols}) VALUES (${vals})`); - } - } - /** Update specific columns on a row by path. */ - async updateColumns(path, columns) { - const setClauses = Object.entries(columns).map(([col, val]) => typeof val === "number" ? `${col} = ${val}` : `${col} = '${sqlStr(String(val))}'`).join(", "); - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(path)}'`); - } - // ── Convenience ───────────────────────────────────────────────────────────── - /** Create a BM25 search index on a column. */ - async createIndex(column) { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); - } - buildLookupIndexName(table, suffix) { - return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); - } - getLookupIndexMarkerPath(table, suffix) { - const markerKey = [ - this.workspaceId, - this.orgId, - table, - suffix - ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); - return join4(getIndexMarkerDir(), `${markerKey}.json`); - } - hasFreshLookupIndexMarker(table, suffix) { - const markerPath = this.getLookupIndexMarkerPath(table, suffix); - if (!existsSync3(markerPath)) - return false; - try { - const raw = JSON.parse(readFileSync3(markerPath, "utf-8")); - const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; - if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) - return false; - return true; - } catch { - return false; - } - } - markLookupIndexReady(table, suffix) { - mkdirSync2(getIndexMarkerDir(), { recursive: true }); - writeFileSync2(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); - } - async ensureLookupIndex(table, suffix, columnsSql) { - if (this.hasFreshLookupIndexMarker(table, suffix)) - return; - const indexName = this.buildLookupIndexName(table, suffix); - try { - await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); - this.markLookupIndexReady(table, suffix); - } catch (e) { - if (isDuplicateIndexError(e)) { - this.markLookupIndexReady(table, suffix); - return; - } - log2(`index "${indexName}" skipped: ${e.message}`); - } - } - /** List all tables in the workspace (with retry). */ - async listTables(forceRefresh = false) { - if (!forceRefresh && this._tablesCache) - return [...this._tablesCache]; - const { tables, cacheable } = await this._fetchTables(); - if (cacheable) - this._tablesCache = [...tables]; - return tables; - } - async _fetchTables() { - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - try { - const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { - headers: { - Authorization: `Bearer ${this.token}`, - "X-Activeloop-Org-Id": this.orgId - } - }); - if (resp.ok) { - const data = await resp.json(); - return { - tables: (data.tables ?? []).map((t) => t.table_name), - cacheable: true - }; - } - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); - continue; - } - return { tables: [], cacheable: false }; - } catch { - if (attempt < MAX_RETRIES) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); - continue; - } - return { tables: [], cacheable: false }; - } - } - return { tables: [], cacheable: false }; - } - /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ - async ensureTable(name) { - const tbl = name ?? this.tableName; - const tables = await this.listTables(); - if (!tables.includes(tbl)) { - log2(`table "${tbl}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${tbl}" created`); - if (!tables.includes(tbl)) - this._tablesCache = [...tables, tbl]; - } - } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ - async ensureSessionsTable(name) { - const tables = await this.listTables(); - if (!tables.includes(name)) { - log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${name}" created`); - if (!tables.includes(name)) - this._tablesCache = [...tables, name]; - } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); - } -}; -// dist/src/utils/stdin.js -function readStdin() { - return new Promise((resolve, reject) => { - let data = ""; - process.stdin.setEncoding("utf-8"); - process.stdin.on("data", (chunk) => data += chunk); - process.stdin.on("end", () => { - try { - resolve(JSON.parse(data)); - } catch (err) { - reject(new Error(`Failed to parse hook input: ${err}`)); - } - }); - process.stdin.on("error", reject); - }); -} - -// dist/src/utils/version-check.js -import { readFileSync as readFileSync4 } from "node:fs"; -import { dirname, join as join5 } from "node:path"; -var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; +// dist/src/hooks/version-check.js +import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync2, writeFileSync as writeFileSync2 } from "node:fs"; +import { dirname, join as join3 } from "node:path"; +import { homedir as homedir3 } from "node:os"; +var DEFAULT_VERSION_CACHE_PATH = join3(homedir3(), ".deeplake", ".version-check.json"); +var DEFAULT_VERSION_CACHE_TTL_MS = 60 * 60 * 1e3; function getInstalledVersion(bundleDir, pluginManifestDir) { try { - const pluginJson = join5(bundleDir, "..", pluginManifestDir, "plugin.json"); - const plugin = JSON.parse(readFileSync4(pluginJson, "utf-8")); + const pluginJson = join3(bundleDir, "..", pluginManifestDir, "plugin.json"); + const plugin = JSON.parse(readFileSync2(pluginJson, "utf-8")); if (plugin.version) return plugin.version; } catch { } let dir = bundleDir; for (let i = 0; i < 5; i++) { - const candidate = join5(dir, "package.json"); + const candidate = join3(dir, "package.json"); try { - const pkg = JSON.parse(readFileSync4(candidate, "utf-8")); + const pkg = JSON.parse(readFileSync2(candidate, "utf-8")); if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) return pkg.version; } catch { @@ -452,47 +118,38 @@ function getInstalledVersion(bundleDir, pluginManifestDir) { } return null; } -async function getLatestVersion(timeoutMs = 3e3) { - try { - const res = await fetch(GITHUB_RAW_PKG, { signal: AbortSignal.timeout(timeoutMs) }); - if (!res.ok) - return null; - const pkg = await res.json(); - return pkg.version ?? null; - } catch { - return null; - } -} function isNewer(latest, current) { - const parse = (v) => v.split(".").map(Number); + const parse = (v) => v.replace(/-.*$/, "").split(".").map(Number); const [la, lb, lc] = parse(latest); const [ca, cb, cc] = parse(current); return la > ca || la === ca && lb > cb || la === ca && lb === cb && lc > cc; } - -// dist/src/utils/wiki-log.js -import { mkdirSync as mkdirSync3, appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join6 } from "node:path"; -function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { - const path = join6(hooksDir, filename); - return { - path, - log(msg) { - try { - mkdirSync3(hooksDir, { recursive: true }); - appendFileSync2(path, `[${utcTimestamp()}] ${msg} -`); - } catch { - } +function readVersionCache(cachePath = DEFAULT_VERSION_CACHE_PATH) { + if (!existsSync2(cachePath)) + return null; + try { + const parsed = JSON.parse(readFileSync2(cachePath, "utf-8")); + if (parsed && typeof parsed.checkedAt === "number" && typeof parsed.url === "string" && (typeof parsed.latest === "string" || parsed.latest === null)) { + return parsed; } - }; + } catch { + } + return null; +} +function readFreshCachedLatestVersion(url, ttlMs = DEFAULT_VERSION_CACHE_TTL_MS, cachePath = DEFAULT_VERSION_CACHE_PATH, nowMs = Date.now()) { + const cached = readVersionCache(cachePath); + if (!cached || cached.url !== url) + return void 0; + if (nowMs - cached.checkedAt > ttlMs) + return void 0; + return cached.latest; } // dist/src/hooks/session-start.js -var log3 = (msg) => log("session-start", msg); -var __bundleDir = dirname2(fileURLToPath(import.meta.url)); -var AUTH_CMD = join7(__bundleDir, "commands", "auth-login.js"); -var context = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: +var log2 = (msg) => log("session-start", msg); +var __bundleDir = dirname2(fileURLToPath2(import.meta.url)); +var AUTH_CMD = join4(__bundleDir, "commands", "auth-login.js"); +var CLAUDE_SESSION_START_CONTEXT = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: 1. Your built-in memory (~/.claude/) \u2014 personal per-project notes 2. Deeplake global memory (~/.deeplake/memory/) \u2014 global memory shared across all sessions, users, and agents in the org @@ -500,9 +157,19 @@ var context = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH w Deeplake memory structure: - ~/.deeplake/memory/index.md \u2014 START HERE, table of all sessions - ~/.deeplake/memory/summaries/username/*.md \u2014 AI-generated wiki summaries per session -- ~/.deeplake/memory/sessions/username/*.jsonl \u2014 raw session data (last resort) +- ~/.deeplake/memory/sessions/{author}/* \u2014 raw session data (last resort) -SEARCH STRATEGY: Always read index.md first. Then read specific summaries. Only read raw JSONL if summaries don't have enough detail. Do NOT jump straight to JSONL files. +SEARCH STRATEGY: Always read index.md first. Then read specific summaries. Only read raw session files if summaries don't have enough detail. Do NOT jump straight to raw session files. +When index.md points to a likely match, read that exact summary or session file directly before trying broader grep variants. +If index.md already points to likely candidate files, open those exact files before broadening into synonym greps or wide exploratory scans. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots when the question is about Deeplake memory. +TEMPORAL GROUNDING: If a summary or transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a summary only gives a relative time, open the linked source session and use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked index.md plus at least one likely summary or raw session file for the named person. If keyword grep is empty, grep the person's name alone and inspect the candidate files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, summaries may omit the exact phrase. If likely summaries are ambiguous, read the candidate raw session transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." Search command: Grep pattern="keyword" path="~/.deeplake/memory" @@ -522,142 +189,282 @@ IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, etc.) to LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty after 2 attempts, skip it and move on. Report what you found rather than exhaustively retrying. Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; -var HOME = homedir4(); -var { log: wikiLog } = makeWikiLogger(join7(HOME, ".claude", "hooks")); -async function createPlaceholder(api, table, sessionId, cwd, userName, orgName, workspaceId) { - const summaryPath = `/summaries/${userName}/${sessionId}.md`; - const existing = await api.query(`SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1`); - if (existing.length > 0) { - wikiLog(`SessionStart: summary exists for ${sessionId} (resumed)`); - return; +var CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: + +1. Your built-in memory (~/.claude/) \u2014 personal per-project notes +2. Deeplake global memory (~/.deeplake/memory/) \u2014 shared org memory, currently exposed in SESSIONS-ONLY mode for benchmark comparison + +Deeplake memory structure available in this mode: +- ~/.deeplake/memory/sessions/{author}/* \u2014 raw session data + +SEARCH STRATEGY: Search raw session files directly. In this mode, do NOT start with index.md or summaries and do NOT assume those paths exist. +Open the most likely session file directly before broadening into synonym greps or wide exploratory scans. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots when the question is about Deeplake memory. +TEMPORAL GROUNDING: If a transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a session only gives a relative time, use that session's date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely raw session file for the named person. If keyword grep is empty, grep the person's name alone and inspect the candidate session files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, raw sessions may contain the exact phrase even when broad keyword grep looks sparse. Read the candidate transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." + +Search command: Grep pattern="keyword" path="~/.deeplake/memory" + +Organization management \u2014 each argument is SEPARATE (do NOT quote subcommands together): +- node "HIVEMIND_AUTH_CMD" login \u2014 SSO login +- node "HIVEMIND_AUTH_CMD" whoami \u2014 show current user/org +- node "HIVEMIND_AUTH_CMD" org list \u2014 list organizations +- node "HIVEMIND_AUTH_CMD" org switch \u2014 switch organization +- node "HIVEMIND_AUTH_CMD" workspaces \u2014 list workspaces +- node "HIVEMIND_AUTH_CMD" workspace \u2014 switch workspace +- node "HIVEMIND_AUTH_CMD" invite \u2014 invite member (ALWAYS ask user which role before inviting) +- node "HIVEMIND_AUTH_CMD" members \u2014 list members +- node "HIVEMIND_AUTH_CMD" remove \u2014 remove member + +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters \u2014 they are not available in the memory filesystem. If a task seems to require Python, rewrite it using bash commands and standard text-processing tools (awk, sed, jq, grep, etc.). + +LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty after 2 attempts, skip it and move on. Report what you found rather than exhaustively retrying. + +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; +var CLAUDE_SESSION_START_CONTEXT_NO_INDEX = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: + +1. Your built-in memory (~/.claude/) \u2014 personal per-project notes +2. Deeplake global memory (~/.deeplake/memory/) \u2014 global memory shared across all sessions, users, and agents in the org + +Deeplake memory structure in this mode: +- ~/.deeplake/memory/summaries/username/*.md \u2014 AI-generated wiki summaries per session +- ~/.deeplake/memory/sessions/{author}/* \u2014 raw session data (last resort) + +INDEX MODE: /index.md is intentionally unavailable for this run. Do NOT try to read it or rely on it. +SEARCH STRATEGY: Start by grepping summaries for the named person, topic, or keyword. Then read the specific matching summaries. Only read raw session files if the summaries don't have enough detail. Do NOT jump straight to raw session files. +If a summary points to a likely source session, open that exact raw session before broadening into synonym greps or wide exploratory scans. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots when the question is about Deeplake memory. +TEMPORAL GROUNDING: If a summary or transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a summary only gives a relative time, open the linked source session and use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely summary plus one likely raw session file for the named person when the summary is ambiguous. If keyword grep is empty, grep the person's name alone and inspect the candidate files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, summaries may omit the exact phrase. If likely summaries are ambiguous, read the candidate raw session transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." + +Search command: Grep pattern="keyword" path="~/.deeplake/memory" + +Organization management \u2014 each argument is SEPARATE (do NOT quote subcommands together): +- node "HIVEMIND_AUTH_CMD" login \u2014 SSO login +- node "HIVEMIND_AUTH_CMD" whoami \u2014 show current user/org +- node "HIVEMIND_AUTH_CMD" org list \u2014 list organizations +- node "HIVEMIND_AUTH_CMD" org switch \u2014 switch organization +- node "HIVEMIND_AUTH_CMD" workspaces \u2014 list workspaces +- node "HIVEMIND_AUTH_CMD" workspace \u2014 switch workspace +- node "HIVEMIND_AUTH_CMD" invite \u2014 invite member (ALWAYS ask user which role before inviting) +- node "HIVEMIND_AUTH_CMD" members \u2014 list members +- node "HIVEMIND_AUTH_CMD" remove \u2014 remove member + +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters \u2014 they are not available in the memory filesystem. If a task seems to require Python, rewrite it using bash commands and standard text-processing tools (awk, sed, jq, grep, etc.). + +LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty after 2 attempts, skip it and move on. Report what you found rather than exhaustively retrying. + +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; +var CLAUDE_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: For this run, use SQL only when answering recall questions. + +Available Deeplake tables: +- memory(path, summary, project, description, creation_date, last_update_date) +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +SQL strategy: +1. Start with targeted SELECTs against memory to find likely sessions or summaries. +2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. +3. Graph-backed entity and relation resolution is applied automatically behind the scenes to narrow likely sessions before memory/sessions queries run. You do not need to query graph tables manually for normal recall. +3a. For stable person/project/place facts, use memory_facts first. Use memory_entities to resolve aliases or canonical names, then join through fact_entity_links when you need all facts connected to an entity. +4. After finding candidate summary rows, re-query memory by exact path. +5. If the answer needs exact wording, exact dates, or transcript grounding, query sessions by exact path for those candidate sessions. +6. Prefer precise WHERE filters, ORDER BY creation_date/last_update_date, and LIMIT 5-10. +7. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +8. If the first literal query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory.summary before concluding the data is absent. +9. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +10. If a summary, node, or edge answer is vague or relative (for example "home country", "next month", "last week"), immediately open the linked sessions rows and convert it to the most concrete answer supported there. +11. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over a paraphrased summary label. +12. When memory_entities resolves a canonical entity, use fact_entity_links to expand the connected facts before deciding the fact layer is sparse. +13. For identity or relationship questions, prefer the narrowest explicit self-label or status label over broader biography or community descriptions. +14. For "when" questions, if the best evidence is already phrased relative to another dated event, return that relative phrase instead of inventing a different absolute date. +15. For list/profile questions, return a minimal comma-separated set of directly supported items. Do not pad the answer with adjacent hobbies, events, or explanations. +16. For artifact/title questions such as books, talks, projects, or artworks, prefer exact titled objects from facts or transcript over generic phrases like "a book" or "a speech". + +Good query patterns: +- Candidate summaries: + psql -At -F '|' -c "SELECT path, summary, creation_date FROM memory WHERE summary ILIKE '%%' AND (summary ILIKE '%%' OR summary ILIKE '%%') ORDER BY creation_date DESC LIMIT 5" +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by entity: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" +- Exact summary reread: + psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" +- If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: + psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" + +Avoid these mistakes: +- Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT stop at graph rows alone when the question asks for exact wording or time grounding. Use graph rows to narrow the search, then open the linked sessions. +- Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. +- Do NOT turn a short list question into a narrative list of loosely related activities. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. +- Do not answer "not found" until you have checked both memory and a likely sessions row for the named person. +- For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. +- If the transcript already directly answers with a relative duration like "10 years ago", return that phrase instead of recalculating to today's date. +- If the transcript or fact row says something like "the week before June 9, 2023", return that phrase instead of converting it to June 9, 2023. +- If a summary says something vague like "home country", search sessions for the exact named place before answering. +- For list or profile questions, aggregate across the small set of candidate sessions before answering. +- For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. + +IMPORTANT: Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. For normal recall, query memory_facts for distilled claims, memory_entities for canonical names, and sessions for exact grounding; graph-based restriction is applied automatically where relevant. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode. + +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; +var CLAUDE_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY = `DEEPLAKE MEMORY SQL MODE: For this run, use SQL only when answering recall questions. + +Available Deeplake tables: +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) + +The summary and graph tables are intentionally unavailable in this mode. Treat them as if they do not exist. + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +SQL strategy: +1. Start with memory_entities to resolve the named person, project, place, or organization into a canonical entity. +2. Expand connected facts through fact_entity_links and memory_facts. +3. Use memory_facts to identify the small set of likely source sessions. +4. Ground every final answer on sessions rows from those source sessions. +5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +7. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +8. Sessions are the source of truth. Facts are only a helper index and synthesis layer. +9. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over paraphrased fact labels. +10. For list/profile questions, facts are for narrowing and aggregation; sessions are for final verification. + +Good query patterns: +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by name/topic: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id, source_path FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary, f.source_session_id, f.source_path FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" + +Avoid these mistakes: +- Do NOT query memory, graph_nodes, or graph_edges in this mode. +- Do NOT answer directly from memory_facts.summary, memory_entities.summary, or aliases when a relevant transcript row is available. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Sessions win over facts if they differ in detail or specificity. +- Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. +- Do not answer "not found" until you have checked both the fact layer and a likely sessions row for the named person. +- For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. +- For list or profile questions, aggregate across the small set of candidate sessions before answering. + +IMPORTANT: Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. Do NOT use python, python3, node, curl, filesystem paths, memory, or graph tables for recall in this mode. + +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; +var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; +function buildSessionStartAdditionalContext(args) { + const template = isPsqlMode() ? isFactsSessionsOnlyPsqlMode() ? CLAUDE_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY : CLAUDE_SESSION_START_CONTEXT_PSQL : isSessionsOnlyMode() ? CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY : isIndexDisabled() ? CLAUDE_SESSION_START_CONTEXT_NO_INDEX : CLAUDE_SESSION_START_CONTEXT; + const resolvedContext = template.replace(/HIVEMIND_AUTH_CMD/g, args.authCommand); + let updateNotice = ""; + if (args.currentVersion) { + if (args.latestVersion && isNewer(args.latestVersion, args.currentVersion)) { + updateNotice = ` + +\u2B06\uFE0F Hivemind update available: ${args.currentVersion} \u2192 ${args.latestVersion}.`; + } else { + updateNotice = ` + +\u2705 Hivemind v${args.currentVersion}`; + } } - const now = (/* @__PURE__ */ new Date()).toISOString(); - const projectName = cwd.split("/").pop() ?? "unknown"; - const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; - const content = [ - `# Session ${sessionId}`, - `- **Source**: ${sessionSource}`, - `- **Started**: ${now}`, - `- **Project**: ${projectName}`, - `- **Status**: in-progress`, - "" - ].join("\n"); - const filename = `${sessionId}.md`; - await api.query(`INSERT INTO "${table}" (id, path, filename, summary, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(summaryPath)}', '${sqlStr(filename)}', E'${sqlStr(content)}', '${sqlStr(userName)}', 'text/markdown', ${Buffer.byteLength(content, "utf-8")}, '${sqlStr(projectName)}', 'in progress', 'claude_code', '${now}', '${now}')`); - wikiLog(`SessionStart: created placeholder for ${sessionId} (${cwd})`); + return args.creds?.token ? `${resolvedContext} + +Logged in to Deeplake as org: ${args.creds.orgName ?? args.creds.orgId} (workspace: ${args.creds.workspaceId ?? "default"})${updateNotice}` : `${resolvedContext} + +\u26A0\uFE0F Not logged in to Deeplake. Memory search will not work. Ask the user to run /hivemind:login to authenticate.${updateNotice}`; } -async function main() { - if (process.env.HIVEMIND_WIKI_WORKER === "1") - return; - const input = await readStdin(); - let creds = loadCredentials(); +async function runSessionStartHook(_input, deps = {}) { + const { wikiWorker = (process.env.HIVEMIND_WIKI_WORKER ?? process.env.DEEPLAKE_WIKI_WORKER) === "1", creds = loadCredentials(), saveCredentialsFn = saveCredentials, currentVersion = getInstalledVersion(__bundleDir, ".claude-plugin"), latestVersion = currentVersion ? readFreshCachedLatestVersion(GITHUB_RAW_PKG, DEFAULT_VERSION_CACHE_TTL_MS) ?? null : null, authCommand = AUTH_CMD, logFn = log2 } = deps; + if (wikiWorker) + return null; if (!creds?.token) { - log3("no credentials found \u2014 run /hivemind:login to authenticate"); + logFn("no credentials found \u2014 run /hivemind:login to authenticate"); } else { - log3(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); + logFn(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); if (creds.token && !creds.userName) { try { - const { userInfo: userInfo2 } = await import("node:os"); - creds.userName = userInfo2().username ?? "unknown"; - saveCredentials(creds); - log3(`backfilled and persisted userName: ${creds.userName}`); + const { userInfo } = await import("node:os"); + creds.userName = userInfo().username ?? "unknown"; + saveCredentialsFn(creds); + logFn(`backfilled and persisted userName: ${creds.userName}`); } catch { } } } - const captureEnabled = process.env.HIVEMIND_CAPTURE !== "false"; - if (input.session_id && creds?.token) { - try { - const config = loadConfig(); - if (config) { - const table = config.tableName; - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); - await api.ensureTable(); - await api.ensureSessionsTable(sessionsTable); - if (captureEnabled) { - await createPlaceholder(api, table, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); - log3("placeholder created"); - } else { - log3("placeholder skipped (HIVEMIND_CAPTURE=false)"); - } - } - } catch (e) { - log3(`placeholder failed: ${e.message}`); - wikiLog(`SessionStart: placeholder failed for ${input.session_id}: ${e.message}`); - } - } - const autoupdate = creds?.autoupdate !== false; - let updateNotice = ""; - try { - const current = getInstalledVersion(__bundleDir, ".claude-plugin"); - if (current) { - const latest = await getLatestVersion(); - if (latest && isNewer(latest, current)) { - if (autoupdate) { - log3(`autoupdate: updating ${current} \u2192 ${latest}`); - try { - const scopes = ["user", "project", "local", "managed"]; - const cmd = scopes.map((s) => `claude plugin update hivemind@hivemind --scope ${s} 2>/dev/null || true`).join("; "); - execSync2(cmd, { stdio: "ignore", timeout: 6e4 }); - try { - const cacheParent = join7(homedir4(), ".claude", "plugins", "cache", "hivemind", "hivemind"); - const entries = readdirSync(cacheParent, { withFileTypes: true }); - for (const e of entries) { - if (e.isDirectory() && e.name !== latest) { - rmSync(join7(cacheParent, e.name), { recursive: true, force: true }); - log3(`cache cleanup: removed old version ${e.name}`); - } - } - } catch (e) { - log3(`cache cleanup failed: ${e.message}`); - } - updateNotice = ` - -\u2705 Hivemind auto-updated: ${current} \u2192 ${latest}. Run /reload-plugins to apply.`; - process.stderr.write(`\u2705 Hivemind auto-updated: ${current} \u2192 ${latest}. Run /reload-plugins to apply. -`); - log3(`autoupdate succeeded: ${current} \u2192 ${latest}`); - } catch (e) { - updateNotice = ` - -\u2B06\uFE0F Hivemind update available: ${current} \u2192 ${latest}. Auto-update failed \u2014 run /hivemind:update to upgrade manually.`; - process.stderr.write(`\u2B06\uFE0F Hivemind update available: ${current} \u2192 ${latest}. Auto-update failed \u2014 run /hivemind:update to upgrade manually. -`); - log3(`autoupdate failed: ${e.message}`); - } - } else { - updateNotice = ` - -\u2B06\uFE0F Hivemind update available: ${current} \u2192 ${latest}. Run /hivemind:update to upgrade.`; - process.stderr.write(`\u2B06\uFE0F Hivemind update available: ${current} \u2192 ${latest}. Run /hivemind:update to upgrade. -`); - log3(`update available (autoupdate off): ${current} \u2192 ${latest}`); - } - } else { - log3(`version up to date: ${current}`); - updateNotice = ` - -\u2705 Hivemind v${current} (up to date)`; - } - } - } catch (e) { - log3(`version check failed: ${e.message}`); - } - const resolvedContext = context.replace(/HIVEMIND_AUTH_CMD/g, AUTH_CMD); - const additionalContext = creds?.token ? `${resolvedContext} - -Logged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${creds.workspaceId ?? "default"})${updateNotice}` : `${resolvedContext} - -\u26A0\uFE0F Not logged in to Deeplake. Memory search will not work. Ask the user to run /hivemind:login to authenticate.${updateNotice}`; - console.log(JSON.stringify({ + return { hookSpecificOutput: { hookEventName: "SessionStart", - additionalContext + additionalContext: buildSessionStartAdditionalContext({ + authCommand, + creds, + currentVersion, + latestVersion + }) } - })); + }; +} +async function main() { + await readStdin(); + const result = await runSessionStartHook({}); + if (result) + console.log(JSON.stringify(result)); } -main().catch((e) => { - log3(`fatal: ${e.message}`); - process.exit(0); -}); +if (isDirectRun(import.meta.url)) { + main().catch((e) => { + log2(`fatal: ${e.message}`); + process.exit(0); + }); +} +export { + CLAUDE_SESSION_START_CONTEXT, + CLAUDE_SESSION_START_CONTEXT_NO_INDEX, + CLAUDE_SESSION_START_CONTEXT_PSQL, + CLAUDE_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY, + CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY, + buildSessionStartAdditionalContext, + runSessionStartHook +}; diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js index 0793149..b627405 100755 --- a/claude-code/bundle/shell/deeplake-shell.js +++ b/claude-code/bundle/shell/deeplake-shell.js @@ -66735,12 +66735,12 @@ function loadConfig() { return null; } } - const env2 = process.env; - if (!env2.HIVEMIND_TOKEN && env2.DEEPLAKE_TOKEN) { + const env3 = process.env; + if (!env3.HIVEMIND_TOKEN && env3.DEEPLAKE_TOKEN) { process.stderr.write("[hivemind] DEEPLAKE_* env vars are deprecated; use HIVEMIND_* instead\n"); } - const token = env2.HIVEMIND_TOKEN ?? env2.DEEPLAKE_TOKEN ?? creds?.token; - const orgId = env2.HIVEMIND_ORG_ID ?? env2.DEEPLAKE_ORG_ID ?? creds?.orgId; + const token = env3.HIVEMIND_TOKEN ?? env3.DEEPLAKE_TOKEN ?? creds?.token; + const orgId = env3.HIVEMIND_ORG_ID ?? env3.DEEPLAKE_ORG_ID ?? creds?.orgId; if (!token || !orgId) return null; return { @@ -66748,11 +66748,16 @@ function loadConfig() { orgId, orgName: creds?.orgName ?? orgId, userName: creds?.userName || userInfo().username || "unknown", - workspaceId: env2.HIVEMIND_WORKSPACE_ID ?? env2.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", - apiUrl: env2.HIVEMIND_API_URL ?? env2.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", - tableName: env2.HIVEMIND_TABLE ?? env2.DEEPLAKE_TABLE ?? "memory", - sessionsTableName: env2.HIVEMIND_SESSIONS_TABLE ?? env2.DEEPLAKE_SESSIONS_TABLE ?? "sessions", - memoryPath: env2.HIVEMIND_MEMORY_PATH ?? env2.DEEPLAKE_MEMORY_PATH ?? join4(home, ".deeplake", "memory") + workspaceId: env3.HIVEMIND_WORKSPACE_ID ?? env3.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", + apiUrl: env3.HIVEMIND_API_URL ?? env3.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", + tableName: env3.HIVEMIND_TABLE ?? env3.DEEPLAKE_TABLE ?? "memory", + sessionsTableName: env3.HIVEMIND_SESSIONS_TABLE ?? env3.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env3.HIVEMIND_GRAPH_NODES_TABLE ?? env3.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env3.HIVEMIND_GRAPH_EDGES_TABLE ?? env3.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env3.HIVEMIND_FACTS_TABLE ?? env3.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env3.HIVEMIND_ENTITIES_TABLE ?? env3.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env3.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env3.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", + memoryPath: env3.HIVEMIND_MEMORY_PATH ?? env3.DEEPLAKE_MEMORY_PATH ?? join4(home, ".deeplake", "memory") }; } @@ -66799,6 +66804,22 @@ function traceSql(msg) { if (debugFileLog) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -66902,10 +66923,10 @@ var DeeplakeApi = class { }); } catch (e6) { if (isTimeoutError(e6)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e6 }); throw lastError; } - lastError = e6 instanceof Error ? e6 : new Error(String(e6)); + lastError = e6 instanceof Error ? new DeeplakeQueryError(e6.message, { sql, cause: e6 }) : new DeeplakeQueryError(String(e6), { sql, cause: e6 }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -66928,9 +66949,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -66987,6 +67012,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e6) { + if (isDuplicateIndexError(e6)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e6; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -67084,25 +67132,544 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); + } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); } }; // dist/src/shell/deeplake-fs.js -import { basename as basename4, posix } from "node:path"; +import { basename as basename5, posix } from "node:path"; import { randomUUID as randomUUID2 } from "node:crypto"; +// dist/src/embeddings/harrier.js +import { AutoModel, AutoTokenizer, LogLevel, env } from "@huggingface/transformers"; +var DEFAULT_MODEL_ID = "onnx-community/harrier-oss-v1-0.6b-ONNX"; +var DEFAULT_DOCUMENT_BATCH_SIZE = 8; +var DEFAULT_MAX_LENGTH = 32768; +function toNumber(value) { + return typeof value === "bigint" ? Number(value) : Number(value ?? 0); +} +function tensorToRows(tensor) { + const [batchSize, width] = tensor.dims; + const rows = []; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + const offset = batchIndex * width; + const row = []; + for (let hiddenIndex = 0; hiddenIndex < width; hiddenIndex++) { + row.push(Number(tensor.data[offset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function l2Normalize(rows) { + return rows.map((row) => { + let sumSquares = 0; + for (const value of row) + sumSquares += value * value; + const norm = Math.sqrt(sumSquares) || 1; + return row.map((value) => value / norm); + }); +} +function lastTokenPool(outputs, attentionMask) { + const [batchSize, sequenceLength, hiddenSize] = outputs.dims; + const rows = []; + const maskData = attentionMask.data; + const hiddenData = outputs.data; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + let lastTokenIndex = sequenceLength - 1; + for (let tokenIndex = sequenceLength - 1; tokenIndex >= 0; tokenIndex--) { + const maskOffset = batchIndex * sequenceLength + tokenIndex; + if (toNumber(maskData[maskOffset]) > 0) { + lastTokenIndex = tokenIndex; + break; + } + } + const row = []; + const hiddenOffset = (batchIndex * sequenceLength + lastTokenIndex) * hiddenSize; + for (let hiddenIndex = 0; hiddenIndex < hiddenSize; hiddenIndex++) { + row.push(Number(hiddenData[hiddenOffset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function formatQuery(task, query) { + return `Instruct: ${task} +Query: ${query}`; +} +var HarrierEmbedder = class { + modelId; + tokenizerPromise = null; + modelPromise = null; + options; + constructor(options = {}) { + this.modelId = options.modelId ?? DEFAULT_MODEL_ID; + this.options = { + ...options, + maxLength: options.maxLength ?? DEFAULT_MAX_LENGTH, + batchSize: options.batchSize ?? DEFAULT_DOCUMENT_BATCH_SIZE + }; + if (options.cacheDir) + env.cacheDir = options.cacheDir; + if (options.localModelPath) + env.localModelPath = options.localModelPath; + env.logLevel = LogLevel.ERROR; + } + async embedDocuments(texts) { + return this.embedInternal(texts); + } + async embedQueries(texts, options = {}) { + const task = options.task ?? "Given a user query, retrieve relevant memory rows and session events"; + return this.embedInternal(texts.map((text) => formatQuery(task, text))); + } + async load() { + if (!this.tokenizerPromise) { + this.tokenizerPromise = AutoTokenizer.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly + }); + } + if (!this.modelPromise) { + this.modelPromise = AutoModel.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly, + device: this.options.device ?? "cpu", + dtype: this.options.dtype + }); + } + const [tokenizer, model] = await Promise.all([this.tokenizerPromise, this.modelPromise]); + return { tokenizer, model }; + } + async embedInternal(texts) { + if (texts.length === 0) + return []; + const { tokenizer, model } = await this.load(); + const rows = []; + for (let start = 0; start < texts.length; start += this.options.batchSize) { + const batch = texts.slice(start, start + this.options.batchSize); + const inputs = tokenizer(batch, { + padding: true, + truncation: true, + max_length: this.options.maxLength + }); + const outputs = await model(inputs); + const sentenceEmbedding = outputs["sentence_embedding"]; + if (sentenceEmbedding && typeof sentenceEmbedding === "object" && sentenceEmbedding !== null) { + rows.push(...l2Normalize(tensorToRows(sentenceEmbedding))); + continue; + } + const lastHiddenState = outputs["last_hidden_state"]; + const attentionMask = inputs["attention_mask"]; + if (!lastHiddenState || typeof lastHiddenState !== "object" || !attentionMask || typeof attentionMask !== "object") { + throw new Error(`Harrier model "${this.modelId}" did not return a usable embedding tensor`); + } + rows.push(...l2Normalize(lastTokenPool(lastHiddenState, attentionMask))); + } + return rows; + } +}; + +// dist/src/utils/hybrid-fusion.js +function coerceFinite(value) { + return Number.isFinite(value) ? value : 0; +} +function normalizeWeights(vectorWeight, textWeight) { + const safeVector = Math.max(0, coerceFinite(vectorWeight)); + const safeText = Math.max(0, coerceFinite(textWeight)); + const total = safeVector + safeText; + if (total <= 0) + return { vectorWeight: 0.5, textWeight: 0.5 }; + return { + vectorWeight: safeVector / total, + textWeight: safeText / total + }; +} +function softmaxNormalizeScores(scores) { + if (scores.length === 0) + return []; + const safeScores = scores.map(coerceFinite); + const maxScore = Math.max(...safeScores); + const exps = safeScores.map((score) => Math.exp(score - maxScore)); + const sum = exps.reduce((acc, value) => acc + value, 0) || 1; + return exps.map((value) => value / sum); +} +function pickPreferredRow(existing, candidate) { + if (!existing) + return candidate; + if (candidate.score > existing.score) + return candidate; + if (candidate.score < existing.score) + return existing; + if (candidate.sourceOrder < existing.sourceOrder) + return candidate; + if (candidate.sourceOrder > existing.sourceOrder) + return existing; + if (candidate.creationDate < existing.creationDate) + return candidate; + if (candidate.creationDate > existing.creationDate) + return existing; + return candidate.path < existing.path ? candidate : existing; +} +function dedupeBestRows(rows) { + const bestByPath = /* @__PURE__ */ new Map(); + for (const row of rows) { + if (!row.path) + continue; + bestByPath.set(row.path, pickPreferredRow(bestByPath.get(row.path), row)); + } + return [...bestByPath.values()]; +} +function fuseRetrievalRows(args) { + const { textRows, vectorRows, limit } = args; + const { textWeight, vectorWeight } = normalizeWeights(args.vectorWeight, args.textWeight); + const dedupedTextRows = dedupeBestRows(textRows); + const dedupedVectorRows = dedupeBestRows(vectorRows); + const textNorm = softmaxNormalizeScores(dedupedTextRows.map((row) => row.score)); + const vectorNorm = softmaxNormalizeScores(dedupedVectorRows.map((row) => row.score)); + const fusedByPath = /* @__PURE__ */ new Map(); + for (let i11 = 0; i11 < dedupedTextRows.length; i11++) { + const row = dedupedTextRows[i11]; + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: textNorm[i11] ?? 0, + vectorScore: 0, + fusedScore: textWeight * (textNorm[i11] ?? 0) + }); + } + for (let i11 = 0; i11 < dedupedVectorRows.length; i11++) { + const row = dedupedVectorRows[i11]; + const existing = fusedByPath.get(row.path); + const vectorScore = vectorNorm[i11] ?? 0; + if (existing) { + if (existing.content.length === 0 && row.content.length > 0) + existing.content = row.content; + existing.sourceOrder = Math.min(existing.sourceOrder, row.sourceOrder); + if (!existing.creationDate || row.creationDate < existing.creationDate) + existing.creationDate = row.creationDate; + existing.vectorScore = vectorScore; + existing.fusedScore = textWeight * existing.textScore + vectorWeight * existing.vectorScore; + continue; + } + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: 0, + vectorScore, + fusedScore: vectorWeight * vectorScore + }); + } + return [...fusedByPath.values()].sort((a15, b26) => b26.fusedScore - a15.fusedScore || b26.vectorScore - a15.vectorScore || b26.textScore - a15.textScore || a15.sourceOrder - b26.sourceOrder || a15.creationDate.localeCompare(b26.creationDate) || a15.path.localeCompare(b26.path)).slice(0, Math.max(0, limit)); +} + +// dist/src/utils/retrieval-mode.js +function isSessionsOnlyMode() { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function getGrepRetrievalMode() { + const raw = (process.env["HIVEMIND_GREP_RETRIEVAL_MODE"] ?? process.env["DEEPLAKE_GREP_RETRIEVAL_MODE"] ?? "").trim().toLowerCase(); + if (raw === "embedding" || raw === "hybrid") + return raw; + return "classic"; +} +function isIndexDisabled() { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isSummaryBm25Disabled() { + const raw = process.env["HIVEMIND_DISABLE_SUMMARY_BM25"] ?? process.env["DEEPLAKE_DISABLE_SUMMARY_BM25"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + // dist/src/shell/grep-core.js +var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); +var DEFAULT_EMBED_RETRIEVAL_MODEL_ID = "onnx-community/harrier-oss-v1-270m-ONNX"; +var DEFAULT_HYBRID_VECTOR_WEIGHT = 0.7; +var DEFAULT_HYBRID_TEXT_WEIGHT = 0.3; +var retrievalEmbedder = null; +function envString(...names) { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) + return value; + } + return void 0; +} +function envFlag(...names) { + const raw = envString(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} +function envNumber(fallback, ...names) { + const raw = envString(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} +function getRetrievalEmbedder() { + if (!retrievalEmbedder) { + retrievalEmbedder = new HarrierEmbedder({ + modelId: envString("HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", "HIVEMIND_HARRIER_MODEL_ID", "DEEPLAKE_HARRIER_MODEL_ID") ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID, + device: envString("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY") + }); + } + return retrievalEmbedder; +} +function sqlFloat4Array(values) { + if (values.length === 0) + throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) + throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} +function escapeRegexLiteral(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function normalizeGrepRegexPattern(pattern) { + return pattern.replace(/\\([|(){}+?])/g, "$1").replace(/\\/g, "\\b"); +} var TOOL_INPUT_FIELDS = [ "command", "file_path", @@ -67265,24 +67832,9 @@ function normalizeContent(path2, raw) { } catch { return raw; } - if (Array.isArray(obj.turns)) { - const header = []; - if (obj.date_time) - header.push(`date: ${obj.date_time}`); - if (obj.speakers) { - const s10 = obj.speakers; - const names = [s10.speaker_a, s10.speaker_b].filter(Boolean).join(", "); - if (names) - header.push(`speakers: ${names}`); - } - const lines = obj.turns.map((t6) => { - const sp = String(t6?.speaker ?? t6?.name ?? "?").trim(); - const tx = String(t6?.text ?? t6?.content ?? "").replace(/\s+/g, " ").trim(); - const tag = t6?.dia_id ? `[${t6.dia_id}] ` : ""; - return `${tag}${sp}: ${tx}`; - }); - const out2 = [...header, ...lines].join("\n"); - return out2.trim() ? out2 : raw; + if (Array.isArray(obj.turns) || Array.isArray(obj.dialogue)) { + return `${JSON.stringify(obj, null, 2)} +`; } const stripRecalled = (t6) => { const i11 = t6.indexOf(""); @@ -67326,14 +67878,70 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; - const limit = opts.limit ?? 100; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, queryText, bm25QueryText } = opts; + const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; - const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); - const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); - const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; - const rows = await api.query(`SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`); + const ignoreCase = likeOp === "ILIKE"; + const likeMemFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const likeSessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const regexMemFilter = regexPattern ? buildRegexFilter("summary::text", regexPattern, ignoreCase) : ""; + const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; + const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; + const fallbackSessFilter = likeSessFilter; + const sessionsOnly = isSessionsOnlyMode(); + const retrievalMode = getGrepRetrievalMode(); + const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const lexicalQueryText = (bm25QueryText ?? semanticQueryText).trim(); + const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; + const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; + const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const ensureSummaryBm25Index = api.ensureSummaryBm25Index; + if ((useSummaryBm25 || useHybridRetrieval && !sessionsOnly && lexicalQueryText.length > 0) && typeof ensureSummaryBm25Index === "function") { + await ensureSummaryBm25Index.call(api, memoryTable).catch(() => { + }); + } + const buildCombinedQuery = (memFilter, sessFilter, useBm25Summary = false) => { + const memQuery = useBm25Summary ? buildSummaryBm25Query(memoryTable, pathFilter, bm25QueryText ?? "", limit) : `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; + if (useEmbeddingRetrieval || useHybridRetrieval) { + const embedder = getRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([semanticQueryText]); + if (!queryEmbedding) + throw new Error("Failed to build query embedding"); + const queryVectorSql = sqlFloat4Array(queryEmbedding); + const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); + const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); + const vectorQuery = buildScoredCombinedQuery(sessionsOnly, buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit), buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit), limit); + if (!useHybridRetrieval) { + const rows2 = await api.query(vectorQuery); + return rows2.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); + } + const lexicalQuery = buildScoredCombinedQuery(sessionsOnly, buildBm25SimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildBm25SimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const lexicalFallbackQuery = buildScoredCombinedQuery(sessionsOnly, buildHeuristicLexicalQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildHeuristicLexicalQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorQuery), + api.query(lexicalQuery).catch(() => api.query(lexicalFallbackQuery)) + ]); + return fuseRetrievalRows({ + textRows: mapScoredRows(textRows), + vectorRows: mapScoredRows(vectorRows), + textWeight, + vectorWeight, + limit + }).map((row) => ({ + path: row.path, + content: row.content + })); + } + const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; + const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); + const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); + const rows = useSummaryBm25 ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); return rows.map((row) => ({ path: String(row["path"]), content: String(row["content"] ?? "") @@ -67364,6 +67972,10 @@ function extractRegexLiteralPrefilter(pattern) { const next = pattern[i11 + 1]; if (!next) return null; + if (/[bByYmM<>]/.test(next)) { + i11++; + continue; + } if (/[dDsSwWbBAZzGkKpP]/.test(next)) return null; current += next; @@ -67390,13 +68002,14 @@ function extractRegexLiteralPrefilter(pattern) { return literal.length >= 2 ? literal : null; } function extractRegexAlternationPrefilters(pattern) { - if (!pattern.includes("|")) + const unwrapped = unwrapWholeRegexGroup(pattern); + if (!unwrapped.includes("|")) return null; const parts = []; let current = ""; let escaped = false; - for (let i11 = 0; i11 < pattern.length; i11++) { - const ch = pattern[i11]; + for (let i11 = 0; i11 < unwrapped.length; i11++) { + const ch = unwrapped[i11]; if (escaped) { current += `\\${ch}`; escaped = false; @@ -67424,33 +68037,201 @@ function extractRegexAlternationPrefilters(pattern) { return literals.length > 0 ? literals : null; } function buildGrepSearchOptions(params, targetPath) { - const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); - const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; - const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(normalizedPattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; + const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const queryText = (bm25QueryText ?? normalizedPattern.trim()) || void 0; + const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; + const sqlRegexPattern = params.wordMatch ? `\\b(?:${regexBase})\\b` : hasRegexMeta ? regexBase : void 0; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), + regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + queryText, + bm25QueryText: bm25QueryText ?? void 0, + limit: DEFAULT_GREP_CANDIDATE_LIMIT }; } +function buildSummaryBm25QueryText(pattern, fixedString, literalPrefilter, alternationPrefilters) { + const rawTokens = alternationPrefilters && alternationPrefilters.length > 0 ? alternationPrefilters : literalPrefilter ? [literalPrefilter] : [pattern]; + const cleaned = [...new Set(rawTokens.flatMap((token) => token.replace(/\\b/g, " ").replace(/[.*+?^${}()[\]{}|\\]/g, " ").split(/\s+/)).map((token) => token.trim()).filter((token) => token.length >= 2))]; + if (cleaned.length === 0) { + return fixedString && pattern.trim().length >= 2 ? pattern.trim() : null; + } + return cleaned.join(" "); +} function buildContentFilter(column, likeOp, patterns) { + const predicate = buildContentPredicate(column, likeOp, patterns); + return predicate ? ` AND ${predicate}` : ""; +} +function buildRegexFilter(column, pattern, ignoreCase) { + const predicate = buildRegexPredicate(column, pattern, ignoreCase); + return predicate ? ` AND ${predicate}` : ""; +} +function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { + return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; +} +function buildEmbeddingSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (embedding <#> ${queryVectorSql}) AS score FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY score DESC LIMIT ${limit}`; +} +function buildBm25SimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${contentExpr} <#> '${sqlStr(queryText)}') AS score FROM "${tableName}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT ${limit}`; +} +function buildHeuristicLexicalQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + const terms = [...new Set(queryText.split(/\s+/).map((term) => term.trim()).filter((term) => term.length >= 2))].slice(0, 8); + const clauses = terms.map((term) => `${contentExpr} ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...terms.map((term) => `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(queryText)}%' THEN ${Math.max(1, Math.min(terms.length, 4))} ELSE 0 END` + ]; + const scoreExpr = scoreTerms.join(" + "); + const where = clauses.length > 0 ? ` AND (${clauses.join(" OR ")})` : ""; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${scoreExpr})::float AS score FROM "${tableName}" WHERE 1=1${pathFilter}${where} ORDER BY score DESC LIMIT ${limit}`; +} +function buildScoredCombinedQuery(sessionsOnly, memQuery, sessQuery, limit) { + return sessionsOnly ? `SELECT path, content, source_order, creation_date, score FROM (${sessQuery}) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}` : `SELECT path, content, source_order, creation_date, score FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}`; +} +function mapScoredRows(rows) { + return rows.map((row) => ({ + path: String(row["path"] ?? ""), + content: String(row["content"] ?? ""), + sourceOrder: Number(row["source_order"] ?? 0), + creationDate: String(row["creation_date"] ?? ""), + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0 + })); +} +function toSqlRegexPattern(pattern, _ignoreCase) { + if (!pattern) + return null; + try { + new RegExp(pattern); + return translateRegexPatternToSql(pattern); + } catch { + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + } +} +function unwrapWholeRegexGroup(pattern) { + if (!pattern.startsWith("(") || !pattern.endsWith(")")) + return pattern; + let depth = 0; + let escaped = false; + for (let i11 = 0; i11 < pattern.length; i11++) { + const ch = pattern[i11]; + if (escaped) { + escaped = false; + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "(") + depth++; + if (ch === ")") { + depth--; + if (depth === 0 && i11 !== pattern.length - 1) + return pattern; + } + } + if (depth !== 0) + return pattern; + if (pattern.startsWith("(?:")) + return pattern.slice(3, -1); + return pattern.slice(1, -1); +} +function translateRegexPatternToSql(pattern) { + let out = ""; + for (let i11 = 0; i11 < pattern.length; i11++) { + const ch = pattern[i11]; + if (ch === "\\") { + const next = pattern[i11 + 1]; + if (!next) + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + i11++; + switch (next) { + case "d": + out += "[[:digit:]]"; + continue; + case "D": + out += "[^[:digit:]]"; + continue; + case "s": + out += "[[:space:]]"; + continue; + case "S": + out += "[^[:space:]]"; + continue; + case "w": + out += "[[:alnum:]_]"; + continue; + case "W": + out += "[^[:alnum:]_]"; + continue; + case "b": + out += "\\y"; + continue; + case "A": + case "B": + case "G": + case "K": + case "P": + case "p": + case "z": + return null; + default: + out += `\\${next}`; + continue; + } + } + if (ch === "(" && pattern.startsWith("(?:", i11)) { + out += "("; + i11 += 2; + continue; + } + if (ch === "(" && /^[(]\?<[^>]+>/.test(pattern.slice(i11))) { + const named = pattern.slice(i11).match(/^\(\?<[^>]+>/); + if (!named) + return null; + out += "("; + i11 += named[0].length - 1; + continue; + } + if (ch === "(" && pattern[i11 + 1] === "?") + return null; + out += ch; + } + return out; +} +function buildContentPredicate(column, likeOp, patterns) { if (patterns.length === 0) return ""; if (patterns.length === 1) - return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; - return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; + return `${column} ${likeOp} '%${patterns[0]}%'`; + return `(${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; +} +function buildRegexPredicate(column, pattern, ignoreCase) { + if (!pattern) + return ""; + const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); + if (!sqlPattern) + return ""; + return `${column} ${ignoreCase ? "~*" : "~"} '${sqlStr(sqlPattern)}'`; } function compileGrepRegex(params) { - let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + let reStr = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; if (params.wordMatch) - reStr = `\\b${reStr}\\b`; + reStr = `\\b(?:${reStr})\\b`; try { return new RegExp(reStr, params.ignoreCase ? "i" : ""); } catch { - return new RegExp(params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), params.ignoreCase ? "i" : ""); + return new RegExp(escapeRegexLiteral(normalizedPattern), params.ignoreCase ? "i" : ""); } } function refineGrepMatches(rows, params, forceMultiFilePrefix) { @@ -67485,6 +68266,234 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) { return output; } +// dist/src/utils/summary-format.js +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function basename4(path2) { + const trimmed = path2.replace(/\/+$/, ""); + const idx = trimmed.lastIndexOf("/"); + return idx === -1 ? trimmed : trimmed.slice(idx + 1); +} +function extractSection(text, heading) { + const re9 = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match2 = text.match(re9); + return match2 ? match2[1].trim() : null; +} +function extractHeaderField(text, field) { + const re9 = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match2 = text.match(re9); + return match2 ? match2[1].trim() : null; +} +function compactText(value) { + return value.replace(/\s+/g, " ").trim(); +} +function splitMetadataList(value) { + if (!value) + return []; + return [...new Set(value.split(/\s*(?:,|;|&|\band\b)\s*/i).map((part) => compactText(part)).filter((part) => part.length >= 2 && !/^unknown$/i.test(part)))]; +} +function extractBullets(section, limit = 3) { + if (!section) + return []; + return section.split("\n").map((line) => line.trim()).filter((line) => line.startsWith("- ")).map((line) => compactText(line.slice(2))).filter(Boolean).slice(0, limit); +} +function extractSummaryDate(text) { + return extractHeaderField(text, "Date") ?? extractHeaderField(text, "Started"); +} +function extractSummaryParticipants(text) { + return extractHeaderField(text, "Participants") ?? extractHeaderField(text, "Speakers"); +} +function extractSummaryTopics(text) { + return extractHeaderField(text, "Topics"); +} +function extractSummarySource(text) { + return extractHeaderField(text, "Source"); +} +function buildSummaryBlurb(text) { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + const parts = []; + if (participants) + parts.push(participants); + if (topics) + parts.push(topics); + if (keyBullets.length > 0) + parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) + parts.push(whatHappened); + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} +function truncate(value, max) { + return value.length > max ? `${value.slice(0, max - 1).trimEnd()}\u2026` : value; +} +function formatIndexTimestamp(value) { + if (!value) + return ""; + if (!/^\d{4}-\d{2}-\d{2}T/.test(value)) + return value; + const parsed = Date.parse(value); + if (!Number.isFinite(parsed)) + return value; + const ts3 = new Date(parsed); + const yyyy = ts3.getUTCFullYear(); + const mm = String(ts3.getUTCMonth() + 1).padStart(2, "0"); + const dd = String(ts3.getUTCDate()).padStart(2, "0"); + const hh = String(ts3.getUTCHours()).padStart(2, "0"); + const min = String(ts3.getUTCMinutes()).padStart(2, "0"); + return `${yyyy}-${mm}-${dd} ${hh}:${min} UTC`; +} +function buildSummaryIndexEntry(row) { + const path2 = typeof row.path === "string" ? row.path : ""; + if (!path2) + return null; + if (path2.startsWith("/summaries/") && !/^\/summaries\/[^/]+\/[^/]+$/.test(path2)) + return null; + const summary = typeof row.summary === "string" ? row.summary : ""; + const project = typeof row.project === "string" ? row.project.trim() : ""; + const description = typeof row.description === "string" ? compactText(row.description) : ""; + const creationDate = typeof row.creation_date === "string" ? row.creation_date : ""; + const lastUpdateDate = typeof row.last_update_date === "string" ? row.last_update_date : ""; + const label = basename4(path2) || path2; + const date = summary ? extractSummaryDate(summary) ?? creationDate : creationDate; + const participantsText = summary ? extractSummaryParticipants(summary) ?? "" : ""; + const topicsText = summary ? extractSummaryTopics(summary) ?? "" : ""; + const source = summary ? extractSummarySource(summary) ?? "" : ""; + const structuredBlurb = summary ? buildSummaryBlurb(summary) : ""; + const blurb = structuredBlurb && structuredBlurb !== "completed" ? structuredBlurb : truncate(description, 220); + return { + path: path2, + label, + project, + description, + date, + createdAt: creationDate, + updatedAt: lastUpdateDate, + sortDate: lastUpdateDate || creationDate || date, + participantsText, + participants: splitMetadataList(participantsText), + topicsText, + topics: splitMetadataList(topicsText), + source, + blurb + }; +} +function formatSummaryIndexEntry(entry) { + const parts = [`- [summary: ${entry.label}](${entry.path})`]; + if (entry.source) + parts.push(`[session](${entry.source})`); + if (entry.date) + parts.push(truncate(entry.date, 40)); + const visibleTime = entry.updatedAt || entry.createdAt; + if (visibleTime) + parts.push(`updated: ${truncate(formatIndexTimestamp(visibleTime), 24)}`); + if (entry.participantsText) + parts.push(truncate(entry.participantsText, 80)); + if (entry.topicsText) + parts.push(`topics: ${truncate(entry.topicsText, 90)}`); + if (entry.project) + parts.push(`[${truncate(entry.project, 40)}]`); + if (entry.blurb && entry.blurb !== "completed") + parts.push(truncate(entry.blurb, 220)); + return parts.join(" \u2014 "); +} +function buildSummaryIndexLine(row) { + const entry = "label" in row && typeof row.label === "string" ? row : buildSummaryIndexEntry(row); + return entry ? formatSummaryIndexEntry(entry) : null; +} + +// dist/src/hooks/virtual-table-query.js +function buildVirtualIndexContent(rows) { + const entries = rows.map((row) => buildSummaryIndexEntry(row)).filter((entry) => entry !== null).sort((a15, b26) => (b26.sortDate || "").localeCompare(a15.sortDate || "") || a15.path.localeCompare(b26.path)); + const lines = [ + "# Memory Index", + "", + "Persistent wiki directory. Start here, open the linked summary first, then open the paired raw session if you need exact wording or temporal grounding.", + "", + "## How To Use", + "", + "- Use the People section when the question names a person.", + "- In the catalog, each row links to both the summary page and its source session.", + "- Once you have a likely match, open that exact summary or session instead of broadening into wide grep scans.", + "" + ]; + const peopleLines = buildPeopleDirectory(entries); + if (peopleLines.length > 0) { + lines.push("## People"); + lines.push(""); + lines.push(...peopleLines); + lines.push(""); + } + const projectLines = buildProjectDirectory(entries); + if (projectLines.length > 0) { + lines.push("## Projects"); + lines.push(""); + lines.push(...projectLines); + lines.push(""); + } + lines.push("## Summary To Session Catalog"); + lines.push(""); + for (const entry of entries) { + const line = buildSummaryIndexLine(entry); + if (line) + lines.push(line); + } + return lines.join("\n"); +} +function formatEntryLink(entry) { + const session = entry.source ? ` -> [session](${entry.source})` : ""; + return `[${entry.label}](${entry.path})${session}`; +} +function topList(counts, limit) { + return [...counts.entries()].sort((a15, b26) => b26[1] - a15[1] || a15[0].localeCompare(b26[0])).slice(0, limit).map(([value]) => value); +} +function buildPeopleDirectory(entries) { + const people = /* @__PURE__ */ new Map(); + for (const entry of entries) { + for (const person of entry.participants) { + const current = people.get(person) ?? { count: 0, topics: /* @__PURE__ */ new Map(), recent: [] }; + current.count += 1; + for (const topic of entry.topics) { + current.topics.set(topic, (current.topics.get(topic) ?? 0) + 1); + } + current.recent.push(entry); + people.set(person, current); + } + } + return [...people.entries()].sort((a15, b26) => b26[1].count - a15[1].count || a15[0].localeCompare(b26[0])).map(([person, info]) => { + const topics = topList(info.topics, 3); + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${person} \u2014 ${info.count} summaries`]; + if (topics.length > 0) + parts.push(`topics: ${topics.join("; ")}`); + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} +function buildProjectDirectory(entries) { + const projects = /* @__PURE__ */ new Map(); + for (const entry of entries) { + if (!entry.project) + continue; + const current = projects.get(entry.project) ?? { count: 0, recent: [] }; + current.count += 1; + current.recent.push(entry); + projects.set(entry.project, current); + } + return [...projects.entries()].sort((a15, b26) => b26[1].count - a15[1].count || a15[0].localeCompare(b26[0])).map(([project, info]) => { + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${project} \u2014 ${info.count} summaries`]; + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} + // dist/src/shell/deeplake-fs.js var BATCH_SIZE = 10; var PREFETCH_BATCH_SIZE = 50; @@ -67542,6 +68551,8 @@ var DeeplakeFs = class _DeeplakeFs { // Paths that live in the sessions table (multi-row, read by concatenation) sessionPaths = /* @__PURE__ */ new Set(); sessionsTable = null; + sessionsOnly = false; + indexDisabled = false; constructor(client, table, mountPoint) { this.client = client; this.table = table; @@ -67553,9 +68564,11 @@ var DeeplakeFs = class _DeeplakeFs { static async create(client, table, mount = "/memory", sessionsTable) { const fs3 = new _DeeplakeFs(client, table, mount); fs3.sessionsTable = sessionsTable ?? null; + fs3.sessionsOnly = isSessionsOnlyMode(); + fs3.indexDisabled = isIndexDisabled(); await client.ensureTable(); let sessionSyncOk = true; - const memoryBootstrap = (async () => { + const memoryBootstrap = fs3.sessionsOnly ? Promise.resolve() : (async () => { const sql = `SELECT path, size_bytes, mime_type FROM "${table}" ORDER BY path`; try { const rows = await client.query(sql); @@ -67611,7 +68624,7 @@ var DeeplakeFs = class _DeeplakeFs { this.pending.delete(filePath); this.flushed.delete(filePath); const parent = parentOf(filePath); - this.dirs.get(parent)?.delete(basename4(filePath)); + this.dirs.get(parent)?.delete(basename5(filePath)); } // ── flush / write batching ──────────────────────────────────────────────── scheduleFlush() { @@ -67674,46 +68687,8 @@ var DeeplakeFs = class _DeeplakeFs { } // ── Virtual index.md generation ──────────────────────────────────────────── async generateVirtualIndex() { - const rows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC`); - const sessionPathsByKey = /* @__PURE__ */ new Map(); - for (const sp of this.sessionPaths) { - const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); - if (hivemind) { - sessionPathsByKey.set(hivemind[1], sp.slice(1)); - } else { - const fname = sp.split("/").pop() ?? ""; - const stem = fname.replace(/\.[^.]+$/, ""); - if (stem) - sessionPathsByKey.set(stem, sp.slice(1)); - } - } - const lines = [ - "# Session Index", - "", - "List of all Claude Code sessions with summaries.", - "", - "| Session | Conversation | Created | Last Updated | Project | Description |", - "|---------|-------------|---------|--------------|---------|-------------|" - ]; - for (const row of rows) { - const p22 = row["path"]; - const match2 = p22.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/); - if (!match2) - continue; - const summaryUser = match2[1]; - const sessionId = match2[2]; - const relPath = `summaries/${summaryUser}/${sessionId}.md`; - const baseName = sessionId.replace(/_summary$/, ""); - const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName); - const convLink = convPath ? `[messages](${convPath})` : ""; - const project = row["project"] || ""; - const description = row["description"] || ""; - const creationDate = row["creation_date"] || ""; - const lastUpdateDate = row["last_update_date"] || ""; - lines.push(`| [${sessionId}](${relPath}) | ${convLink} | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`); - } - lines.push(""); - return lines.join("\n"); + const rows = await this.client.query(`SELECT path, project, description, summary, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC, creation_date DESC`); + return buildVirtualIndexContent(rows); } // ── batch prefetch ──────────────────────────────────────────────────────── /** @@ -67783,7 +68758,7 @@ var DeeplakeFs = class _DeeplakeFs { return buf2; } if (this.sessionPaths.has(p22) && this.sessionsTable) { - const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); + const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows2.length === 0) throw fsErr("ENOENT", "no such file or directory", p22); const text = joinSessionMessages(p22, rows2.map((row) => row["message"])); @@ -67802,7 +68777,7 @@ var DeeplakeFs = class _DeeplakeFs { const p22 = normPath(path2); if (this.dirs.has(p22) && !this.files.has(p22)) throw fsErr("EISDIR", "illegal operation on a directory", p22); - if (p22 === "/index.md" && !this.files.has(p22)) { + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md" && !this.files.has(p22)) { const realRows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr("/index.md")}' LIMIT 1`); if (realRows.length > 0 && realRows[0]["summary"]) { const text2 = realRows[0]["summary"]; @@ -67821,7 +68796,7 @@ var DeeplakeFs = class _DeeplakeFs { if (pend) return pend.contentText; if (this.sessionPaths.has(p22) && this.sessionsTable) { - const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); + const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows2.length === 0) throw fsErr("ENOENT", "no such file or directory", p22); const text2 = joinSessionMessages(p22, rows2.map((row) => row["message"])); @@ -67847,13 +68822,13 @@ var DeeplakeFs = class _DeeplakeFs { throw fsErr("EISDIR", "illegal operation on a directory", p22); const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); const buf = Buffer.from(text, "utf-8"); - const mime = guessMime(basename4(p22)); + const mime = guessMime(basename5(p22)); this.files.set(p22, buf); this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); this.addToTree(p22); this.pending.set(p22, { path: p22, - filename: basename4(p22), + filename: basename5(p22), contentText: text, mimeType: mime, sizeBytes: buf.length, @@ -67872,13 +68847,13 @@ var DeeplakeFs = class _DeeplakeFs { throw fsErr("EISDIR", "illegal operation on a directory", p22); const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); const buf = Buffer.from(text, "utf-8"); - const mime = guessMime(basename4(p22)); + const mime = guessMime(basename5(p22)); this.files.set(p22, buf); this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); this.addToTree(p22); this.pending.set(p22, { path: p22, - filename: basename4(p22), + filename: basename5(p22), contentText: text, mimeType: mime, sizeBytes: buf.length @@ -67910,7 +68885,7 @@ var DeeplakeFs = class _DeeplakeFs { // ── IFileSystem: metadata ───────────────────────────────────────────────── async exists(path2) { const p22 = normPath(path2); - if (p22 === "/index.md") + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md") return true; return this.files.has(p22) || this.dirs.has(p22); } @@ -67918,7 +68893,7 @@ var DeeplakeFs = class _DeeplakeFs { const p22 = normPath(path2); const isFile = this.files.has(p22); const isDir = this.dirs.has(p22); - if (p22 === "/index.md" && !isFile && !isDir) { + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md" && !isFile && !isDir) { return { isFile: true, isDirectory: false, @@ -67958,7 +68933,7 @@ var DeeplakeFs = class _DeeplakeFs { } async realpath(path2) { const p22 = normPath(path2); - if (p22 === "/index.md") + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md") return p22; if (!this.files.has(p22) && !this.dirs.has(p22)) throw fsErr("ENOENT", "no such file or directory", p22); @@ -67983,14 +68958,14 @@ var DeeplakeFs = class _DeeplakeFs { const parent = parentOf(p22); if (!this.dirs.has(parent)) this.dirs.set(parent, /* @__PURE__ */ new Set()); - this.dirs.get(parent).add(basename4(p22)); + this.dirs.get(parent).add(basename5(p22)); } async readdir(path2) { const p22 = normPath(path2); if (!this.dirs.has(p22)) throw fsErr("ENOTDIR", "not a directory", p22); const entries = [...this.dirs.get(p22) ?? []]; - if (p22 === "/" && !entries.includes("index.md")) { + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/" && !entries.includes("index.md")) { entries.push("index.md"); } return entries; @@ -68002,7 +68977,7 @@ var DeeplakeFs = class _DeeplakeFs { const child = p22 === "/" ? `/${name}` : `${p22}/${name}`; return { name, - isFile: (this.files.has(child) || child === "/index.md") && !this.dirs.has(child), + isFile: (this.files.has(child) || !this.sessionsOnly && !this.indexDisabled && child === "/index.md") && !this.dirs.has(child), isDirectory: this.dirs.has(child), isSymbolicLink: false }; @@ -68038,7 +69013,7 @@ var DeeplakeFs = class _DeeplakeFs { for (const fp of safeToDelete) this.removeFromTree(fp); this.dirs.delete(p22); - this.dirs.get(parentOf(p22))?.delete(basename4(p22)); + this.dirs.get(parentOf(p22))?.delete(basename5(p22)); if (safeToDelete.length > 0) { const inList = safeToDelete.map((fp) => `'${sqlStr(fp)}'`).join(", "); await this.client.query(`DELETE FROM "${this.table}" WHERE path IN (${inList})`); @@ -68713,8 +69688,8 @@ var YargsParser = class { if (typeof envPrefix === "undefined") return; const prefix = typeof envPrefix === "string" ? envPrefix : ""; - const env2 = mixin.env(); - Object.keys(env2).forEach(function(envVar) { + const env3 = mixin.env(); + Object.keys(env3).forEach(function(envVar) { if (prefix === "" || envVar.lastIndexOf(prefix, 0) === 0) { const keys = envVar.split("__").map(function(key, i11) { if (i11 === 0) { @@ -68723,7 +69698,7 @@ var YargsParser = class { return camelCase2(key); }); if ((configOnly && flags.configs[keys.join(".")] || !configOnly) && !hasKey(argv2, keys)) { - setArg(keys.join("."), env2[envVar]); + setArg(keys.join("."), env3[envVar]); } } }); @@ -69034,12 +70009,12 @@ if (nodeVersion) { throw Error(`yargs parser supports a minimum Node.js version of ${minNodeVersion}. Read our version support policy: https://github.com/yargs/yargs-parser#supported-nodejs-versions`); } } -var env = process ? process.env : {}; +var env2 = process ? process.env : {}; var require2 = createRequire ? createRequire(import.meta.url) : void 0; var parser = new YargsParser({ cwd: process.cwd, env: () => { - return env; + return env2; }, format, normalize, @@ -69112,8 +70087,7 @@ function createGrepCommand(client, fs3, table, sessionsTable) { try { const searchOptions = { ...buildGrepSearchOptions(matchParams, targets[0] ?? ctx.cwd), - pathFilter: buildPathFilterForTargets(targets), - limit: 100 + pathFilter: buildPathFilterForTargets(targets) }; const queryRows = await Promise.race([ searchDeeplakeTables(client, table, sessionsTable ?? "sessions", searchOptions), @@ -69136,7 +70110,8 @@ function createGrepCommand(client, fs3, table, sessionsTable) { } } const normalized = rows.map((r10) => ({ path: r10.path, content: normalizeContent(r10.path, r10.content) })); - const output = refineGrepMatches(normalized, matchParams); + const forceMultiFilePrefix = parsed.r || parsed.R || parsed.recursive ? true : void 0; + const output = refineGrepMatches(normalized, matchParams, forceMultiFilePrefix); return { stdout: output.length > 0 ? output.join("\n") + "\n" : "", stderr: "", diff --git a/claude-code/bundle/wiki-worker.js b/claude-code/bundle/wiki-worker.js index 02468a3..35c7cf5 100755 --- a/claude-code/bundle/wiki-worker.js +++ b/claude-code/bundle/wiki-worker.js @@ -107,12 +107,60 @@ function releaseLock(sessionId) { // dist/src/hooks/upload-summary.js import { randomUUID } from "node:crypto"; + +// dist/src/utils/summary-format.js +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function extractSection(text, heading) { + const re = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function extractHeaderField(text, field) { + const re = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function compactText(value) { + return value.replace(/\s+/g, " ").trim(); +} +function extractBullets(section, limit = 3) { + if (!section) + return []; + return section.split("\n").map((line) => line.trim()).filter((line) => line.startsWith("- ")).map((line) => compactText(line.slice(2))).filter(Boolean).slice(0, limit); +} +function extractSummaryParticipants(text) { + return extractHeaderField(text, "Participants") ?? extractHeaderField(text, "Speakers"); +} +function extractSummaryTopics(text) { + return extractHeaderField(text, "Topics"); +} +function buildSummaryBlurb(text) { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + const parts = []; + if (participants) + parts.push(participants); + if (topics) + parts.push(topics); + if (keyBullets.length > 0) + parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) + parts.push(whatHappened); + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} + +// dist/src/hooks/upload-summary.js function esc(s) { return s.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); } function extractDescription(text) { - const match = text.match(/## What Happened\n([\s\S]*?)(?=\n##|$)/); - return match ? match[1].trim().slice(0, 300) : "completed"; + return buildSummaryBlurb(text); } async function uploadSummary(query2, params) { const { tableName, vpath, fname, userName, project, agent, text } = params; @@ -130,6 +178,554 @@ async function uploadSummary(query2, params) { return { path: "insert", sql, descLength: desc.length, summaryLength: text.length }; } +// dist/src/hooks/knowledge-graph.js +import { randomUUID as randomUUID2 } from "node:crypto"; +var GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; +function stripCodeFences(text) { + const trimmed = text.trim(); + const fenceMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + return fenceMatch ? fenceMatch[1].trim() : trimmed; +} +function normalizeString(value) { + return typeof value === "string" ? value.trim() : ""; +} +function normalizeAliasList(value) { + if (!Array.isArray(value)) + return []; + return value.map(normalizeString).filter(Boolean).filter((item, index, arr) => arr.indexOf(item) === index); +} +function parseGraphExtraction(raw) { + const cleaned = stripCodeFences(raw); + const parsed = JSON.parse(cleaned); + const nodes = Array.isArray(parsed["nodes"]) ? parsed["nodes"] : []; + const edges = Array.isArray(parsed["edges"]) ? parsed["edges"] : []; + return { + nodes: nodes.map((node) => ({ + name: normalizeString(node["name"]), + type: normalizeString(node["type"]) || "other", + summary: normalizeString(node["summary"]), + aliases: normalizeAliasList(node["aliases"]) + })).filter((node) => node.name), + edges: edges.map((edge) => ({ + source: normalizeString(edge["source"]), + target: normalizeString(edge["target"]), + relation: normalizeString(edge["relation"]).replace(/\s+/g, "_").toLowerCase(), + summary: normalizeString(edge["summary"]), + evidence: normalizeString(edge["evidence"]) + })).filter((edge) => edge.source && edge.target && edge.relation) + }; +} +function slugify(value) { + return value.normalize("NFKD").replace(/[^\w\s-]/g, "").trim().toLowerCase().replace(/[\s-]+/g, "_").replace(/^_+|_+$/g, "") || "item"; +} +function buildGraphNodeId(name, _type = "other") { + return `entity:${slugify(name)}`; +} +function buildNodeSearchText(node) { + return [ + node.name, + node.type ?? "other", + ...node.aliases ?? [], + node.summary ?? "" + ].filter(Boolean).join(" | "); +} +function buildEdgeSearchText(edge, sourceNodeId, targetNodeId) { + return [ + edge.source, + edge.relation, + edge.target, + edge.summary ?? "", + edge.evidence ?? "", + sourceNodeId, + targetNodeId + ].filter(Boolean).join(" | "); +} +function buildKnowledgeGraphPrompt(args) { + return (args.template ?? GRAPH_PROMPT_TEMPLATE).replace(/__SUMMARY_TEXT__/g, args.summaryText).replace(/__SESSION_ID__/g, args.sessionId).replace(/__SOURCE_PATH__/g, args.sourcePath).replace(/__PROJECT__/g, args.project); +} +function wrapGraphPhaseError(error, args) { + const wrapped = new Error(`graph ${args.phase} failed for session ${args.sessionId} on table ${args.table}: ${error instanceof Error ? error.message : String(error)}`); + wrapped.cause = error; + wrapped.phase = args.phase; + wrapped.sessionId = args.sessionId; + wrapped.table = args.table; + wrapped.sql = args.sql; + return wrapped; +} +async function replaceSessionGraph(params) { + const ts = params.ts ?? (/* @__PURE__ */ new Date()).toISOString(); + const nodePath = `/graphs/nodes/${params.userName}/${params.sessionId}.jsonl`; + const edgePath = `/graphs/edges/${params.userName}/${params.sessionId}.jsonl`; + const nodeFilename = `${params.sessionId}.jsonl`; + const edgeFilename = `${params.sessionId}.jsonl`; + const nodeMap = /* @__PURE__ */ new Map(); + for (const node of params.graph.nodes) { + const key = buildGraphNodeId(node.name, node.type); + nodeMap.set(key, { + name: node.name, + type: node.type || "other", + summary: node.summary || "", + aliases: node.aliases || [] + }); + } + for (const edge of params.graph.edges) { + const sourceKey = buildGraphNodeId(edge.source); + const targetKey = buildGraphNodeId(edge.target); + if (!nodeMap.has(sourceKey)) + nodeMap.set(sourceKey, { name: edge.source, type: "other", summary: "", aliases: [] }); + if (!nodeMap.has(targetKey)) + nodeMap.set(targetKey, { name: edge.target, type: "other", summary: "", aliases: [] }); + } + const deleteNodesSql = `DELETE FROM "${params.nodesTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + const deleteEdgesSql = `DELETE FROM "${params.edgesTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + try { + await params.query(deleteNodesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "delete_nodes", + sessionId: params.sessionId, + table: params.nodesTable, + sql: deleteNodesSql + }); + } + try { + await params.query(deleteEdgesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "delete_edges", + sessionId: params.sessionId, + table: params.edgesTable, + sql: deleteEdgesSql + }); + } + const nodeRows = [...nodeMap.entries()].map(([nodeId, node]) => { + const summary = node.summary || buildSummaryBlurb(`# Graph Node + +${node.name}`); + const aliases = (node.aliases ?? []).join(", "); + const searchText = buildNodeSearchText(node); + return `('${randomUUID2()}', '${esc(nodePath)}', '${esc(nodeFilename)}', '${esc(nodeId)}', '${esc(node.name)}', '${esc(node.type || "other")}', E'${esc(summary)}', E'${esc(searchText)}', '${esc(aliases)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`; + }); + if (nodeRows.length > 0) { + const insertNodesSql = `INSERT INTO "${params.nodesTable}" (id, path, filename, node_id, canonical_name, node_type, summary, search_text, aliases, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${nodeRows.join(", ")}`; + try { + await params.query(insertNodesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "insert_nodes", + sessionId: params.sessionId, + table: params.nodesTable, + sql: insertNodesSql + }); + } + } + const edgeRows = params.graph.edges.map((edge) => { + const sourceNodeId = buildGraphNodeId(edge.source); + const targetNodeId = buildGraphNodeId(edge.target); + const searchText = buildEdgeSearchText(edge, sourceNodeId, targetNodeId); + const summary = edge.summary || `${edge.source} ${edge.relation} ${edge.target}`; + const evidence = edge.evidence || ""; + const edgeId = `${sourceNodeId}:${edge.relation}:${targetNodeId}`; + return `('${randomUUID2()}', '${esc(edgePath)}', '${esc(edgeFilename)}', '${esc(edgeId)}', '${esc(sourceNodeId)}', '${esc(targetNodeId)}', '${esc(edge.relation)}', E'${esc(summary)}', E'${esc(evidence)}', E'${esc(searchText)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`; + }); + if (edgeRows.length > 0) { + const insertEdgesSql = `INSERT INTO "${params.edgesTable}" (id, path, filename, edge_id, source_node_id, target_node_id, relation, summary, evidence, search_text, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${edgeRows.join(", ")}`; + try { + await params.query(insertEdgesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "insert_edges", + sessionId: params.sessionId, + table: params.edgesTable, + sql: insertEdgesSql + }); + } + } + return { nodes: nodeRows.length, edges: edgeRows.length }; +} + +// dist/src/hooks/memory-facts.js +import { randomUUID as randomUUID3 } from "node:crypto"; +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; +function stripCodeFences2(text) { + const trimmed = text.trim(); + const fenceMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + return fenceMatch ? fenceMatch[1].trim() : trimmed; +} +function normalizeString2(value) { + return typeof value === "string" ? value.trim() : ""; +} +function normalizeAliases(value) { + if (!Array.isArray(value)) + return []; + return value.map(normalizeString2).filter(Boolean).filter((item, index, arr) => arr.indexOf(item) === index); +} +function normalizeFactType(value) { + return normalizeString2(value) || "other"; +} +function normalizeConfidence(value) { + if (typeof value === "number" && Number.isFinite(value)) { + return Math.max(0, Math.min(1, value)); + } + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) + return Math.max(0, Math.min(1, parsed)); + } + return void 0; +} +function slugify2(value) { + return value.normalize("NFKD").replace(/[^\w\s-]/g, "").trim().toLowerCase().replace(/[\s-]+/g, "_").replace(/^_+|_+$/g, "") || "item"; +} +function buildFactId(sessionId, fact, index) { + return [ + "fact", + slugify2(sessionId), + String(index + 1), + slugify2(fact.subject), + slugify2(fact.predicate), + slugify2(fact.object) + ].join(":"); +} +function buildFactSearchText(fact) { + return [ + fact.subject, + ...fact.subjectAliases ?? [], + fact.predicate, + fact.object, + ...fact.objectAliases ?? [], + fact.summary ?? "", + fact.evidence ?? "", + fact.validAt ?? "", + fact.validFrom ?? "", + fact.validTo ?? "" + ].filter(Boolean).join(" | "); +} +function buildEntitySearchText(entity) { + return [ + entity.canonicalName, + entity.entityType, + ...entity.aliases, + ...entity.searchTerms, + ...entity.summaries + ].filter(Boolean).join(" | "); +} +function mergeDelimited(existing, nextValues) { + const merged = new Set(existing.split(",").map((value) => value.trim()).filter(Boolean)); + for (const value of nextValues) { + const trimmed = value.trim(); + if (!trimmed) + continue; + merged.add(trimmed); + } + return [...merged].join(", "); +} +function mergePipeDelimited(existing, nextValues, maxItems = 8) { + const merged = new Set(existing.split("|").map((value) => value.trim()).filter(Boolean)); + for (const value of nextValues) { + const trimmed = value.trim(); + if (!trimmed) + continue; + if (merged.has(trimmed)) + continue; + if (merged.size >= maxItems) + break; + merged.add(trimmed); + } + return [...merged].join(" | "); +} +function wrapFactsPhaseError(error, args) { + const wrapped = new Error(`facts ${args.phase} failed for session ${args.sessionId} on table ${args.table}: ${error instanceof Error ? error.message : String(error)}`); + wrapped.cause = error; + wrapped.phase = args.phase; + wrapped.sessionId = args.sessionId; + wrapped.table = args.table; + wrapped.sql = args.sql; + return wrapped; +} +function buildEntityAggregate(entityMap, args) { + const entityId = buildGraphNodeId(args.name, args.type); + const existing = entityMap.get(entityId); + if (existing) { + for (const alias of args.aliases) + existing.aliases.add(alias); + if (args.summary) + existing.summaries.add(args.summary); + if (args.searchText) + existing.searchTerms.add(args.searchText); + return existing; + } + const created = { + entityId, + canonicalName: args.name, + entityType: args.type || "other", + aliases: new Set(args.aliases), + summaries: new Set(args.summary ? [args.summary] : []), + searchTerms: new Set(args.searchText ? [args.searchText] : []) + }; + entityMap.set(entityId, created); + return created; +} +async function upsertEntities(params) { + let upserts = 0; + const path = `/facts/entities/${params.userName}.jsonl`; + const filename = `${params.userName}.jsonl`; + for (const entity of params.entityMap.values()) { + const aliases = [...entity.aliases].filter((alias) => alias !== entity.canonicalName); + const entitySummary = [...entity.summaries].join(" | ") || entity.canonicalName; + const searchText = buildEntitySearchText(entity); + const existingRows = await params.query(`SELECT id, aliases, summary, search_text, source_session_ids, source_paths, entity_type FROM "${params.entitiesTable}" WHERE entity_id = '${esc(entity.entityId)}' LIMIT 1`); + if (existingRows.length === 0) { + const insertSql = `INSERT INTO "${params.entitiesTable}" (id, path, filename, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${randomUUID3()}', '${esc(path)}', '${esc(filename)}', '${esc(entity.entityId)}', '${esc(entity.canonicalName)}', '${esc(entity.entityType)}', '${esc(aliases.join(", "))}', E'${esc(entitySummary)}', E'${esc(searchText)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(entitySummary))}', '${esc(params.agent)}', '${params.ts}', '${params.ts}')`; + await params.query(insertSql); + upserts += 1; + continue; + } + const existing = existingRows[0]; + const mergedAliases = mergeDelimited(String(existing["aliases"] ?? ""), aliases); + const mergedSummary = mergePipeDelimited(String(existing["summary"] ?? ""), entity.summaries, 10) || entitySummary; + const mergedSearchText = mergePipeDelimited(String(existing["search_text"] ?? ""), [searchText], 12) || searchText; + const mergedSessionIds = mergeDelimited(String(existing["source_session_ids"] ?? ""), [params.sessionId]); + const mergedSourcePaths = mergeDelimited(String(existing["source_paths"] ?? ""), [params.sourcePath]); + const existingType = normalizeString2(existing["entity_type"]); + const entityType = existingType && existingType !== "other" ? existingType : entity.entityType; + const updateSql = `UPDATE "${params.entitiesTable}" SET canonical_name = '${esc(entity.canonicalName)}', entity_type = '${esc(entityType)}', aliases = '${esc(mergedAliases)}', summary = E'${esc(mergedSummary)}', search_text = E'${esc(mergedSearchText)}', source_session_ids = '${esc(mergedSessionIds)}', source_paths = '${esc(mergedSourcePaths)}', size_bytes = ${Buffer.byteLength(mergedSearchText, "utf-8")}, project = '${esc(params.project)}', description = E'${esc(buildSummaryBlurb(mergedSummary))}', agent = '${esc(params.agent)}', last_update_date = '${params.ts}' WHERE entity_id = '${esc(entity.entityId)}'`; + await params.query(updateSql); + upserts += 1; + } + return upserts; +} +function parseMemoryFactExtraction(raw) { + const cleaned = stripCodeFences2(raw); + const parsed = JSON.parse(cleaned); + const facts = Array.isArray(parsed["facts"]) ? parsed["facts"] : []; + const dedupe = /* @__PURE__ */ new Set(); + return { + facts: facts.map((fact) => ({ + subject: normalizeString2(fact["subject"]), + subjectType: normalizeFactType(fact["subject_type"]), + subjectAliases: normalizeAliases(fact["subject_aliases"]), + predicate: normalizeString2(fact["predicate"]).replace(/\s+/g, "_").toLowerCase(), + object: normalizeString2(fact["object"]), + objectType: normalizeFactType(fact["object_type"]), + objectAliases: normalizeAliases(fact["object_aliases"]), + summary: normalizeString2(fact["summary"]), + evidence: normalizeString2(fact["evidence"]), + confidence: normalizeConfidence(fact["confidence"]), + validAt: normalizeString2(fact["valid_at"]), + validFrom: normalizeString2(fact["valid_from"]), + validTo: normalizeString2(fact["valid_to"]) + })).filter((fact) => fact.subject && fact.predicate && fact.object).filter((fact) => { + const key = `${fact.subject}::${fact.predicate}::${fact.object}`; + if (dedupe.has(key)) + return false; + dedupe.add(key); + return true; + }) + }; +} +function buildMemoryFactTranscript(rows) { + const normalized = rows.map((row) => ({ + turnIndex: Number.isFinite(row.turnIndex) ? row.turnIndex : 0, + speaker: normalizeString2(row.speaker), + text: normalizeString2(row.text), + eventType: normalizeString2(row.eventType) || "message", + turnSummary: normalizeString2(row.turnSummary), + sourceDateTime: normalizeString2(row.sourceDateTime) || normalizeString2(row.creationDate) + })).filter((row) => row.text || row.turnSummary); + if (normalized.length === 0) + return "(no transcript rows)"; + return normalized.map((row) => { + const prefix = [ + `turn=${row.turnIndex}`, + row.sourceDateTime ? `time=${row.sourceDateTime}` : "", + row.speaker ? `speaker=${row.speaker}` : `event=${row.eventType}` + ].filter(Boolean).join(" | "); + const lines = [`[${prefix}] ${row.text || row.turnSummary}`]; + if (row.turnSummary && row.turnSummary !== row.text) { + lines.push(`summary: ${row.turnSummary}`); + } + return lines.join("\n"); + }).join("\n"); +} +function buildMemoryFactPrompt(args) { + return (args.template ?? MEMORY_FACT_PROMPT_TEMPLATE).replace(/__TRANSCRIPT_TEXT__/g, args.transcriptText).replace(/__SESSION_ID__/g, args.sessionId).replace(/__SOURCE_PATH__/g, args.sourcePath).replace(/__PROJECT__/g, args.project); +} +async function replaceSessionFacts(params) { + const ts = params.ts ?? (/* @__PURE__ */ new Date()).toISOString(); + const factPath = `/facts/${params.userName}/${params.sessionId}.jsonl`; + const linkPath = `/facts/links/${params.userName}/${params.sessionId}.jsonl`; + const factFilename = `${params.sessionId}.jsonl`; + const linkFilename = `${params.sessionId}.jsonl`; + const deleteFactsSql = `DELETE FROM "${params.factsTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + const deleteLinksSql = `DELETE FROM "${params.linksTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + try { + await params.query(deleteFactsSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "delete_facts", + sessionId: params.sessionId, + table: params.factsTable, + sql: deleteFactsSql + }); + } + try { + await params.query(deleteLinksSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "delete_links", + sessionId: params.sessionId, + table: params.linksTable, + sql: deleteLinksSql + }); + } + const entityMap = /* @__PURE__ */ new Map(); + const factRows = params.extraction.facts.map((fact, index) => { + const summary = fact.summary || `${fact.subject} ${fact.predicate.replace(/_/g, " ")} ${fact.object}`; + const searchText = buildFactSearchText(fact); + const subjectEntity = buildEntityAggregate(entityMap, { + name: fact.subject, + type: fact.subjectType || "other", + aliases: fact.subjectAliases ?? [], + summary, + searchText + }); + const objectEntity = buildEntityAggregate(entityMap, { + name: fact.object, + type: fact.objectType || "other", + aliases: fact.objectAliases ?? [], + summary, + searchText + }); + return { + factId: buildFactId(params.sessionId, fact, index), + subjectEntityId: subjectEntity.entityId, + subjectName: fact.subject, + subjectType: fact.subjectType || "other", + objectEntityId: objectEntity.entityId, + objectName: fact.object, + objectType: fact.objectType || "other", + predicate: fact.predicate, + summary, + evidence: fact.evidence || "", + searchText, + confidence: fact.confidence == null ? "" : String(fact.confidence), + validAt: fact.validAt || "", + validFrom: fact.validFrom || "", + validTo: fact.validTo || "" + }; + }); + try { + await upsertEntities({ + query: params.query, + entitiesTable: params.entitiesTable, + entityMap, + userName: params.userName, + project: params.project, + agent: params.agent, + sourcePath: params.sourcePath, + sessionId: params.sessionId, + ts + }); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "upsert_entities", + sessionId: params.sessionId, + table: params.entitiesTable, + sql: `UPSERT entities for ${params.sessionId}` + }); + } + if (factRows.length > 0) { + const values = factRows.map((row) => `('${randomUUID3()}', '${esc(factPath)}', '${esc(factFilename)}', '${esc(row.factId)}', '${esc(row.subjectEntityId)}', '${esc(row.subjectName)}', '${esc(row.subjectType)}', '${esc(row.predicate)}', '${esc(row.objectEntityId)}', '${esc(row.objectName)}', '${esc(row.objectType)}', E'${esc(row.summary)}', E'${esc(row.evidence)}', E'${esc(row.searchText)}', '${esc(row.confidence)}', '${esc(row.validAt)}', '${esc(row.validFrom)}', '${esc(row.validTo)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(row.searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(row.summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`); + const insertFactsSql = `INSERT INTO "${params.factsTable}" (id, path, filename, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values.join(", ")}`; + try { + await params.query(insertFactsSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "insert_facts", + sessionId: params.sessionId, + table: params.factsTable, + sql: insertFactsSql + }); + } + } + const linkRows = factRows.flatMap((row) => [ + { + linkId: `${row.factId}:subject:${row.subjectEntityId}`, + factId: row.factId, + entityId: row.subjectEntityId, + entityRole: "subject" + }, + { + linkId: `${row.factId}:object:${row.objectEntityId}`, + factId: row.factId, + entityId: row.objectEntityId, + entityRole: "object" + } + ]); + if (linkRows.length > 0) { + const values = linkRows.map((row) => `('${randomUUID3()}', '${esc(linkPath)}', '${esc(linkFilename)}', '${esc(row.linkId)}', '${esc(row.factId)}', '${esc(row.entityId)}', '${esc(row.entityRole)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(row.linkId, "utf-8")}, '${esc(params.project)}', 'fact entity link', '${esc(params.agent)}', '${ts}', '${ts}')`); + const insertLinksSql = `INSERT INTO "${params.linksTable}" (id, path, filename, link_id, fact_id, entity_id, entity_role, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values.join(", ")}`; + try { + await params.query(insertLinksSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "insert_links", + sessionId: params.sessionId, + table: params.linksTable, + sql: insertLinksSql + }); + } + } + return { + facts: factRows.length, + entities: entityMap.size, + links: linkRows.length + }; +} + // dist/src/hooks/wiki-worker.js var dlog2 = (msg) => log("wiki-worker", msg); var cfg = JSON.parse(readFileSync2(process.argv[2], "utf-8")); @@ -186,7 +782,7 @@ function cleanup() { async function main() { try { wlog("fetching session events"); - const rows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc2(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC`); + const rows = await query(`SELECT path, message, creation_date, turn_index, event_type, speaker, text, turn_summary, source_date_time FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc2(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows.length === 0) { wlog("no session events found \u2014 exiting"); return; @@ -246,6 +842,90 @@ async function main() { text }); wlog(`uploaded ${vpath} (summary=${result.summaryLength}, desc=${result.descLength})`); + try { + const graphPrompt = buildKnowledgeGraphPrompt({ + summaryText: text, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.graphPromptTemplate + }); + const graphRaw = execFileSync(cfg.claudeBin, [ + "-p", + graphPrompt, + "--no-session-persistence", + "--model", + "haiku", + "--permission-mode", + "bypassPermissions" + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 12e4, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" } + }).toString("utf-8"); + const graph = parseGraphExtraction(graphRaw); + const graphResult = await replaceSessionGraph({ + query, + nodesTable: cfg.graphNodesTable, + edgesTable: cfg.graphEdgesTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "claude_code", + sourcePath: jsonlServerPath, + graph + }); + wlog(`graph updated nodes=${graphResult.nodes} edges=${graphResult.edges}`); + } catch (e) { + wlog(`graph update failed: ${e.message}`); + } + try { + const transcriptText = buildMemoryFactTranscript(rows.map((row) => ({ + turnIndex: Number(row["turn_index"] ?? 0), + eventType: typeof row["event_type"] === "string" ? row["event_type"] : "", + speaker: typeof row["speaker"] === "string" ? row["speaker"] : "", + text: typeof row["text"] === "string" ? row["text"] : "", + turnSummary: typeof row["turn_summary"] === "string" ? row["turn_summary"] : "", + sourceDateTime: typeof row["source_date_time"] === "string" ? row["source_date_time"] : "", + creationDate: typeof row["creation_date"] === "string" ? row["creation_date"] : "" + }))); + const factPrompt = buildMemoryFactPrompt({ + transcriptText, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.factPromptTemplate + }); + const factsRaw = execFileSync(cfg.claudeBin, [ + "-p", + factPrompt, + "--no-session-persistence", + "--model", + "haiku", + "--permission-mode", + "bypassPermissions" + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 12e4, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" } + }).toString("utf-8"); + const extraction = parseMemoryFactExtraction(factsRaw); + const factResult = await replaceSessionFacts({ + query, + factsTable: cfg.factsTable, + entitiesTable: cfg.entitiesTable, + linksTable: cfg.factEntityLinksTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "claude_code", + sourcePath: jsonlServerPath, + extraction + }); + wlog(`facts updated facts=${factResult.facts} entities=${factResult.entities} links=${factResult.links}`); + } catch (e) { + wlog(`fact update failed: ${e.message}`); + } try { finalizeSummary(cfg.sessionId, jsonlLines); wlog(`sidecar updated: lastSummaryCount=${jsonlLines}`); diff --git a/claude-code/tests/bash-command-compiler.test.ts b/claude-code/tests/bash-command-compiler.test.ts index 3bb90a7..b7c7114 100644 --- a/claude-code/tests/bash-command-compiler.test.ts +++ b/claude-code/tests/bash-command-compiler.test.ts @@ -10,6 +10,16 @@ import { tokenizeShellWords, } from "../../src/hooks/bash-command-compiler.js"; +const originalPsqlMode = process.env.HIVEMIND_PSQL_MODE; +const originalFactsSessionsOnlyPsqlMode = process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY; + +function restorePsqlMode(): void { + if (originalPsqlMode === undefined) delete process.env.HIVEMIND_PSQL_MODE; + else process.env.HIVEMIND_PSQL_MODE = originalPsqlMode; + if (originalFactsSessionsOnlyPsqlMode === undefined) delete process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY; + else process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY = originalFactsSessionsOnlyPsqlMode; +} + describe("bash-command-compiler parsing", () => { it("splits top-level sequences while respecting quotes", () => { expect(splitTopLevel("cat /a && echo 'x && y' ; ls /b", ["&&", ";"])).toEqual([ @@ -61,6 +71,10 @@ describe("bash-command-compiler parsing", () => { clean: "cat /a", ignoreMissing: true, }); + expect(stripAllowedModifiers("find /sessions -name '*.json' -exec grep -l 'Melanie' {} \\; 2>/dev/null | head -10")).toEqual({ + clean: "find /sessions -name '*.json' -exec grep -l 'Melanie' {} \\; | head -10", + ignoreMissing: true, + }); expect(stripAllowedModifiers("cat /a 2>&1 | head -2")).toEqual({ clean: "cat /a | head -2", ignoreMissing: false, @@ -70,6 +84,7 @@ describe("bash-command-compiler parsing", () => { }); it("parses supported read-only segments", () => { + restorePsqlMode(); expect(parseCompiledSegment("echo ---")).toEqual({ kind: "echo", text: "---" }); expect(parseCompiledSegment("cat /a /b | head -2")).toEqual({ kind: "cat", @@ -161,6 +176,7 @@ describe("bash-command-compiler parsing", () => { params: { pattern: "foo", targetPath: "/summaries", + recursive: false, ignoreCase: false, wordMatch: false, filesOnly: false, @@ -176,6 +192,7 @@ describe("bash-command-compiler parsing", () => { params: { pattern: "foo", targetPath: "/summaries", + recursive: false, ignoreCase: false, wordMatch: false, filesOnly: false, @@ -191,6 +208,7 @@ describe("bash-command-compiler parsing", () => { params: { pattern: "foo", targetPath: "/summaries", + recursive: false, ignoreCase: false, wordMatch: false, filesOnly: false, @@ -208,6 +226,7 @@ describe("bash-command-compiler parsing", () => { params: { pattern: "launch", targetPath: "/", + recursive: false, ignoreCase: false, wordMatch: false, filesOnly: true, @@ -225,6 +244,7 @@ describe("bash-command-compiler parsing", () => { params: { pattern: "launch", targetPath: "/", + recursive: false, ignoreCase: false, wordMatch: false, filesOnly: true, @@ -235,9 +255,136 @@ describe("bash-command-compiler parsing", () => { }, lineLimit: 1, }); + expect(parseCompiledSegment("find /sessions -name '*.json' -exec grep -l 'Melanie' {} \\; 2>/dev/null | head -10")).toEqual({ + kind: "find_grep", + dir: "/sessions", + patterns: ["*.json"], + params: { + pattern: "Melanie", + targetPath: "{}", + recursive: false, + ignoreCase: false, + wordMatch: false, + filesOnly: true, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, + lineLimit: 10, + }); + expect(parseCompiledSegment("find /sessions -name '*.json' -exec grep -Eli 'support group|lgbtq support' {} \\; | head -10")).toEqual({ + kind: "find_grep", + dir: "/sessions", + patterns: ["*.json"], + params: { + pattern: "support group|lgbtq support", + targetPath: "{}", + recursive: false, + ignoreCase: true, + wordMatch: false, + filesOnly: true, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, + lineLimit: 10, + }); + expect(parseCompiledSegment("grep -i 'age\\|birthday\\|born.*19\\|born.*20' /sessions/*.json 2>/dev/null | head -3")).toEqual({ + kind: "grep", + params: { + pattern: "age\\|birthday\\|born.*19\\|born.*20", + targetPath: "/sessions/*.json", + recursive: false, + ignoreCase: true, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, + lineLimit: 3, + }); + }); + + it("parses psql segments only when psql mode is enabled", () => { + delete process.env.HIVEMIND_PSQL_MODE; + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, summary FROM memory LIMIT 2\"")).toBeNull(); + + process.env.HIVEMIND_PSQL_MODE = "1"; + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, summary FROM memory LIMIT 2\" | head -1")).toEqual({ + kind: "psql", + query: "SELECT path, summary FROM memory LIMIT 2", + lineLimit: 1, + tuplesOnly: true, + fieldSeparator: "|", + }); + + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, summary FROM hivemind.memory LIMIT 2\"")).toEqual({ + kind: "psql", + query: "SELECT path, summary FROM hivemind.memory LIMIT 2", + lineLimit: 0, + tuplesOnly: true, + fieldSeparator: "|", + }); + + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, creation_date, turn_index, speaker, text FROM sessions WHERE text ILIKE '%camp%' LIMIT 2\"")).toEqual({ + kind: "psql", + query: "SELECT path, creation_date, turn_index, speaker, text FROM sessions WHERE text ILIKE '%camp%' LIMIT 2", + lineLimit: 0, + tuplesOnly: true, + fieldSeparator: "|", + }); + + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT node_id, canonical_name, relation FROM graph_nodes JOIN graph_edges ON graph_edges.source_node_id = graph_nodes.node_id LIMIT 2\"")).toEqual({ + kind: "psql", + query: "SELECT node_id, canonical_name, relation FROM graph_nodes JOIN graph_edges ON graph_edges.source_node_id = graph_nodes.node_id LIMIT 2", + lineLimit: 0, + tuplesOnly: true, + fieldSeparator: "|", + }); + + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT fact_id, subject_name, predicate, object_name FROM memory_facts LIMIT 2\"")).toEqual({ + kind: "psql", + query: "SELECT fact_id, subject_name, predicate, object_name FROM memory_facts LIMIT 2", + lineLimit: 0, + tuplesOnly: true, + fieldSeparator: "|", + }); + + restorePsqlMode(); + }); + + it("parses only facts-and-sessions psql segments when the mode is enabled", () => { + process.env.HIVEMIND_PSQL_MODE = "1"; + process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY = "1"; + + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, summary FROM memory LIMIT 2\"")).toBeNull(); + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT node_id, canonical_name FROM graph_nodes LIMIT 2\"")).toBeNull(); + + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, creation_date, turn_index, speaker, text FROM sessions WHERE text ILIKE '%camp%' LIMIT 2\"")).toEqual({ + kind: "psql", + query: "SELECT path, creation_date, turn_index, speaker, text FROM sessions WHERE text ILIKE '%camp%' LIMIT 2", + lineLimit: 0, + tuplesOnly: true, + fieldSeparator: "|", + }); + + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT fact_id, subject_name, predicate, object_name FROM memory_facts LIMIT 2\"")).toEqual({ + kind: "psql", + query: "SELECT fact_id, subject_name, predicate, object_name FROM memory_facts LIMIT 2", + lineLimit: 0, + tuplesOnly: true, + fieldSeparator: "|", + }); + + restorePsqlMode(); }); it("rejects unsupported segments and command shapes", () => { + process.env.HIVEMIND_PSQL_MODE = "1"; expect(parseCompiledSegment("cat")).toBeNull(); expect(parseCompiledSegment("echo ok > /x")).toBeNull(); expect(parseCompiledSegment("cat /a | jq '.x'")).toBeNull(); @@ -257,8 +404,10 @@ describe("bash-command-compiler parsing", () => { expect(parseCompiledSegment("find /summaries -name '*.md' | xargs grep -l foo | tail -2")).toBeNull(); expect(parseCompiledSegment("grep foo /a | tail -2")).toBeNull(); expect(parseCompiledSegment("grep foo /a | head nope")).toBeNull(); + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT * FROM memory\" | tail -2")).toBeNull(); expect(parseCompiledBashCommand("cat /a || cat /b")).toBeNull(); expect(parseCompiledBashCommand("cat /a && echo ok > /x")).toBeNull(); + restorePsqlMode(); }); }); @@ -424,6 +573,148 @@ describe("bash-command-compiler execution", () => { expect(output).toBeNull(); }); + it("executes psql queries against normalized memory and sessions table names", async () => { + const query = vi.fn(async (sql: string) => { + if ( + sql.includes('FROM "graph_nodes') || + sql.includes('FROM "graph_edges') || + sql.includes('FROM "memory_entities') || + sql.includes('FROM "memory_facts') || + (sql.includes('FROM "memory_actual"') && !sql.includes('JOIN "sessions_actual"')) + ) { + return []; + } + expect(sql).toContain('FROM "memory_actual"'); + expect(sql).toContain('JOIN "sessions_actual"'); + return [ + { path: "/summaries/locomo/conv_0_session_6_summary.md", summary: "Caroline keeps classic kids books" }, + ]; + }); + + process.env.HIVEMIND_PSQL_MODE = "1"; + const output = await executeCompiledBashCommand( + { query } as any, + "memory_actual", + "sessions_actual", + "psql -At -F '|' -c \"SELECT m.path, m.summary FROM memory m JOIN sessions s ON s.path = m.path WHERE m.summary ILIKE '%Caroline%' LIMIT 1\"", + ); + expect(output).toBe("/summaries/locomo/conv_0_session_6_summary.md|Caroline keeps classic kids books"); + expect(query.mock.calls.some(([sql]) => String(sql).includes('FROM "memory_actual"'))).toBe(true); + restorePsqlMode(); + }); + + it("executes direct sessions queries against physical per-message rows", async () => { + const query = vi.fn(async (sql: string) => { + if ( + sql.includes('FROM "graph_nodes') || + sql.includes('FROM "graph_edges') || + sql.includes('FROM "memory_entities') || + sql.includes('FROM "memory_facts') || + sql.includes('FROM "memory_actual"') + ) { + return []; + } + expect(sql).toContain('FROM "sessions_actual"'); + expect(sql).toContain("WHERE path = '/sessions/conv_0_session_8.json'"); + return [ + { + path: "/sessions/conv_0_session_8.json", + creation_date: "2023-08-10", + turn_index: 1, + speaker: "Melanie", + text: "We planned a camping trip", + }, + ]; + }); + + process.env.HIVEMIND_PSQL_MODE = "1"; + const output = await executeCompiledBashCommand( + { query } as any, + "memory_actual", + "sessions_actual", + "psql -At -F '|' -c \"SELECT path, creation_date, turn_index, speaker, text FROM sessions WHERE path = '/sessions/conv_0_session_8.json' AND text ILIKE '%camp%' ORDER BY turn_index ASC LIMIT 1\"", + ); + expect(output).toBe("/sessions/conv_0_session_8.json|2023-08-10|1|Melanie|We planned a camping trip"); + expect(query.mock.calls.some(([sql]) => String(sql).includes('FROM "sessions_actual"'))).toBe(true); + restorePsqlMode(); + }); + + it("matches psql tuples-only empty output semantics", async () => { + process.env.HIVEMIND_PSQL_MODE = "1"; + const tuplesOnly = await executeCompiledBashCommand( + { query: vi.fn(async () => []) } as any, + "memory", + "sessions", + "psql -At -F '|' -c \"SELECT path FROM memory WHERE summary ILIKE '%missing%'\"", + ); + expect(tuplesOnly).toBe(""); + + const withHeader = await executeCompiledBashCommand( + { query: vi.fn(async () => []) } as any, + "memory", + "sessions", + "psql -F '|' -c \"SELECT path FROM memory WHERE summary ILIKE '%missing%'\"", + ); + expect(withHeader).toBe("(0 rows)"); + restorePsqlMode(); + }); + + it("does not compile unrelated psql commands and rejects invalid hivemind writes", async () => { + process.env.HIVEMIND_PSQL_MODE = "1"; + await expect(executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "psql -At -F '|' -c \"DELETE FROM memory\"", + )).rejects.toThrow("psql mode only supports SELECT queries"); + + const unrelated = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "psql -At -F '|' -c \"SELECT * FROM users\"", + ); + expect(unrelated).toBeNull(); + + process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY = "1"; + const summaryQuery = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "psql -At -F '|' -c \"SELECT * FROM memory\"", + ); + expect(summaryQuery).toBeNull(); + + restorePsqlMode(); + }); + + it("executes facts-and-sessions-only psql queries without summary or graph helper queries", async () => { + process.env.HIVEMIND_PSQL_MODE = "1"; + process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY = "1"; + + const query = vi.fn(async (sql: string) => { + expect(sql).not.toContain('FROM "memory_actual"'); + expect(sql).not.toContain('FROM "graph_nodes'); + expect(sql).not.toContain('FROM "graph_edges'); + expect(sql).not.toContain("__hm_graph_candidates"); + expect(sql).toContain('FROM "memory_facts_actual"'); + return [ + { fact_id: "f1", subject_name: "Caroline", predicate: "home_country", object_name: "Sweden" }, + ]; + }); + + const output = await executeCompiledBashCommand( + { query } as any, + "memory_actual", + "sessions_actual", + "psql -At -F '|' -c \"SELECT fact_id, subject_name, predicate, object_name FROM memory_facts WHERE subject_name ILIKE '%Caroline%' LIMIT 1\"", + ); + + expect(output).toBe("f1|Caroline|home_country|Sweden"); + expect(query).toHaveBeenCalledTimes(1); + restorePsqlMode(); + }); + it("compiles find | xargs grep -l | head into batched path reads", async () => { const findVirtualPathsFn = vi.fn() .mockResolvedValueOnce(["/summaries/a.md", "/summaries/shared.json"]) @@ -454,4 +745,47 @@ describe("bash-command-compiler execution", () => { ); expect(output).toBe("/summaries/a.md"); }); + + it("compiles benchmark trace find -exec grep -l pipelines into the same find_grep plan", async () => { + const findVirtualPathsFn = vi.fn(async () => [ + "/sessions/conv_0_session_2.json", + "/sessions/conv_0_session_5.json", + "/sessions/conv_0_session_8.json", + ]); + const readVirtualPathContentsFn = vi.fn(async () => new Map([ + ["/sessions/conv_0_session_2.json", "{\"dialogue\":[{\"speaker\":\"Melanie\",\"text\":\"camping next month\"}]}"], + ["/sessions/conv_0_session_5.json", "{\"dialogue\":[{\"speaker\":\"Caroline\",\"text\":\"book club\"}]}"], + ["/sessions/conv_0_session_8.json", "{\"dialogue\":[{\"speaker\":\"Melanie\",\"text\":\"museum trip\"}]}"], + ])); + + const output = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "find /sessions -name '*.json' -exec grep -l 'Melanie' {} \\; 2>/dev/null | head -10", + { + findVirtualPathsFn: findVirtualPathsFn as any, + readVirtualPathContentsFn: readVirtualPathContentsFn as any, + }, + ); + + expect(findVirtualPathsFn).toHaveBeenCalledWith( + expect.anything(), + "memory", + "sessions", + "/sessions", + "%.json", + ); + expect(readVirtualPathContentsFn).toHaveBeenCalledWith( + expect.anything(), + "memory", + "sessions", + [ + "/sessions/conv_0_session_2.json", + "/sessions/conv_0_session_5.json", + "/sessions/conv_0_session_8.json", + ], + ); + expect(output).toBe("/sessions/conv_0_session_2.json\n/sessions/conv_0_session_8.json"); + }); }); diff --git a/claude-code/tests/benchmark-replay-parity.test.ts b/claude-code/tests/benchmark-replay-parity.test.ts new file mode 100644 index 0000000..d88b350 --- /dev/null +++ b/claude-code/tests/benchmark-replay-parity.test.ts @@ -0,0 +1,446 @@ +import { execFileSync } from "node:child_process"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { executeCompiledBashCommand } from "../../src/hooks/bash-command-compiler.js"; +import { handleGrepDirect, parseBashGrep } from "../../src/hooks/grep-direct.js"; +import { processPreToolUse } from "../../src/hooks/pre-tool-use.js"; + +type FixtureFile = { path: string; content: string }; + +type SessionTurn = { + speaker: string; + dia_id: string; + text: string; +}; + +const baseConfig = { + token: "token", + orgId: "org-1", + orgName: "Acme", + userName: "alice", + workspaceId: "default", + apiUrl: "https://api.example.com", + tableName: "memory", + sessionsTableName: "sessions", + memoryPath: "/tmp/.deeplake/memory", +}; + +function buildSessionFile( + sessionNumber: number, + turns: SessionTurn[], + dateTime = "8:56 pm on 20 July, 2023", +): FixtureFile { + const session = { + conversation_id: 0, + session_number: sessionNumber, + date_time: dateTime, + speakers: { + speaker_a: "Caroline", + speaker_b: "Melanie", + }, + turns, + }; + return { + path: `/sessions/conv_0_session_${sessionNumber}.json`, + content: `${JSON.stringify(session, null, 2)}\n`, + }; +} + +function rewriteForLocalRoot(command: string, root: string): string { + return command + .replaceAll("/sessions", `${root}/sessions`) + .replaceAll("/summaries", `${root}/summaries`) + .replaceAll("/index.md", `${root}/index.md`); +} + +function runLocalBash(root: string, command: string): string { + const localCommand = rewriteForLocalRoot(command, root); + try { + return execFileSync("/bin/bash", ["-lc", localCommand], { + encoding: "utf8", + }).trim(); + } catch (error: any) { + return String(error?.stdout ?? "").trim(); + } +} + +function writeFixture(files: FixtureFile[]): string { + const root = mkdtempSync(join(tmpdir(), "hivemind-benchmark-replay-")); + for (const file of files) { + const fullPath = join(root, file.path.slice(1)); + mkdirSync(join(fullPath, ".."), { recursive: true }); + writeFileSync(fullPath, file.content); + } + return root; +} + +function makeQueryRows(files: FixtureFile[]) { + return files.map((file) => ({ + path: file.path, + content: file.content, + })); +} + +function likePatternToRegExp(pattern: string): RegExp { + const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&"); + return new RegExp(`^${escaped.replaceAll("%", ".*").replaceAll("_", ".")}$`); +} + +async function runVirtualCommand(files: FixtureFile[], command: string): Promise { + const queryRows = makeQueryRows(files); + const grepHandler = async (_api: any, _memory: string, _sessions: string, params: any) => { + const api = { query: vi.fn().mockResolvedValue(queryRows) } as any; + return (await handleGrepDirect(api, "memory", "sessions", params)) ?? ""; + }; + + const compiled = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + command, + { + readVirtualPathContentsFn: vi.fn(async (_api, _memory, _sessions, paths: string[]) => new Map( + paths.map((path) => [path, files.find((file) => file.path === path)?.content ?? null]), + )) as any, + listVirtualPathRowsForDirsFn: vi.fn(async (_api, _memory, _sessions, dirs: string[]) => new Map( + dirs.map((dir) => [ + dir, + files + .filter((file) => file.path === dir || file.path.startsWith(`${dir.replace(/\/+$/, "")}/`)) + .map((file) => ({ path: file.path, size_bytes: Buffer.byteLength(file.content) })), + ]), + )) as any, + findVirtualPathsFn: vi.fn(async (_api, _memory, _sessions, dir: string, filenamePattern: string) => { + const dirPrefix = dir.replace(/\/+$/, "") || "/"; + const matcher = likePatternToRegExp(filenamePattern); + return files + .filter((file) => file.path.startsWith(`${dirPrefix}/`)) + .map((file) => file.path) + .filter((path) => matcher.test(path.slice(path.lastIndexOf("/") + 1))); + }) as any, + handleGrepDirectFn: grepHandler as any, + }, + ); + if (compiled !== null) return compiled.trim(); + + const grepParams = parseBashGrep(command); + if (!grepParams) { + throw new Error(`Command is neither compiled nor grep-direct: ${command}`); + } + return (await grepHandler(null, "memory", "sessions", grepParams)).trim(); +} + +describe("benchmark replay parity", () => { + const roots: string[] = []; + + afterEach(() => { + while (roots.length > 0) { + rmSync(roots.pop()!, { recursive: true, force: true }); + } + }); + + it("matches raw output for relationship-status grep", async () => { + const files = [ + buildSessionFile(13, [ + { dia_id: "D13:1", speaker: "Caroline", text: "I'm single and planning to adopt as a single parent." }, + { dia_id: "D13:2", speaker: "Caroline", text: "As a transgender woman, the support group changed my life." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -E 'relationship|dating|partner|married|single|girlfriend|boyfriend' /sessions/conv_0_session_13.json"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "I\'m single and planning to adopt as a single parent."'); + }); + + it("matches raw output for camping-location grep", async () => { + const files = [ + buildSessionFile(10, [ + { dia_id: "D10:12", speaker: "Melanie", text: "We camped near a mountain lake in a state park last summer." }, + { dia_id: "D10:13", speaker: "Caroline", text: "That sounds beautiful." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -E 'mountain|lake|forest|state|park|location|where|place' /sessions/conv_0_session_10.json"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "We camped near a mountain lake in a state park last summer."'); + }); + + it("matches raw output for Dr. Seuss bookshelf grep", async () => { + const files = [ + buildSessionFile(6, [ + { dia_id: "D6:1", speaker: "Melanie", text: "We keep classic kids' books like Dr. Seuss on the bookshelf." }, + { dia_id: "D6:2", speaker: "Caroline", text: "That sounds perfect for the kids." }, + ]), + buildSessionFile(7, [ + { dia_id: "D7:1", speaker: "Caroline", text: "I just started a new counseling course." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -E 'Dr. Seuss|bookshelf|books' /sessions/*.json"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "We keep classic kids\' books like Dr. Seuss on the bookshelf."'); + }); + + it("matches file-list output for find -exec grep with regex alternation", async () => { + const files = [ + buildSessionFile(6, [ + { dia_id: "D6:1", speaker: "Melanie", text: "We keep classic kids' books like Dr. Seuss on the bookshelf." }, + { dia_id: "D6:2", speaker: "Caroline", text: "That sounds perfect for the kids." }, + ]), + buildSessionFile(7, [ + { dia_id: "D7:1", speaker: "Caroline", text: "I just started a new counseling course." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "find /sessions -name '*.json' -exec grep -El 'Dr. Seuss|bookshelf' {} \\; | head -10"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toBe("/sessions/conv_0_session_6.json"); + }); + + it("matches file-list output for case-insensitive find -exec grep regex", async () => { + const files = [ + buildSessionFile(10, [ + { dia_id: "D10:1", speaker: "Caroline", text: "I joined the LGBTQ support group last Tuesday, July 18, 2023." }, + { dia_id: "D10:2", speaker: "Melanie", text: "That sounds like such a good step." }, + ]), + buildSessionFile(11, [ + { dia_id: "D11:1", speaker: "Caroline", text: "I moved here from Sweden four years ago." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "find /sessions -name '*.json' -exec grep -Eli 'support group|lgbtq support' {} \\; | head -10"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toBe("/sessions/conv_0_session_10.json"); + }); + + it("keeps the 18th-birthday shell-loop case explicitly divergent by returning retry guidance", async () => { + const files = [ + buildSessionFile(12, [ + { dia_id: "D12:1", speaker: "Caroline", text: "A friend made it for my 18th birthday ten years ago." }, + { dia_id: "D12:2", speaker: "Melanie", text: "That's really thoughtful." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const localCommand = "for file in /sessions/conv_0_session_*.json; do echo \"=== $(basename $file) ===\"; grep -i \"age\\|year.*old\\|born\\|birthday\\|turn.*18\" \"$file\" 2>/dev/null | head -3; done | grep -B 1 -i \"age\\|birthday\\|born\""; + const local = runLocalBash(root, localCommand); + expect(local).toContain("18th birthday"); + + const decision = await processPreToolUse({ + session_id: "s1", + tool_name: "Bash", + tool_input: { + command: "for file in ~/.deeplake/memory/sessions/conv_0_session_*.json; do echo \"=== $(basename $file) ===\"; grep -i \"age\\|year.*old\\|born\\|birthday\\|turn.*18\" \"$file\" 2>/dev/null | head -3; done | grep -B 1 -i \"age\\|birthday\\|born\"", + }, + tool_use_id: "tu-bm-q12", + }, { + config: baseConfig as any, + }); + + expect(decision?.command).toContain("RETRY REQUIRED"); + expect(decision?.description).toContain("unsupported command"); + }); + + it("matches raw output for the direct-grep fallback after the blocked 18th-birthday loop", async () => { + const files = [ + buildSessionFile(12, [ + { dia_id: "D12:1", speaker: "Caroline", text: "A friend made it for my 18th birthday ten years ago." }, + { dia_id: "D12:2", speaker: "Melanie", text: "That's really thoughtful." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i -E 'age|birthday|born.*19|born.*20' /sessions/"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "A friend made it for my 18th birthday ten years ago."'); + }); + + it("matches raw output for support-group date searches", async () => { + const files = [ + buildSessionFile(10, [ + { dia_id: "D10:1", speaker: "Caroline", text: "I joined the LGBTQ support group last Tuesday, July 18, 2023." }, + { dia_id: "D10:2", speaker: "Melanie", text: "That sounds like such a good step." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i -E 'support group|lgbtq support' /sessions/"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "I joined the LGBTQ support group last Tuesday, July 18, 2023."'); + }); + + it("matches raw output for move-from-four-years-ago searches", async () => { + const files = [ + buildSessionFile(11, [ + { dia_id: "D11:1", speaker: "Caroline", text: "I moved here from Sweden four years ago." }, + { dia_id: "D11:2", speaker: "Melanie", text: "That must have been a big change." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i -E 'moved from|four year|4 year|sweden' /sessions/"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "I moved here from Sweden four years ago."'); + }); + + it("matches raw output for Melanie activity aggregation searches", async () => { + const files = [ + buildSessionFile(8, [ + { dia_id: "D8:1", speaker: "Melanie", text: "We tried a pottery workshop, went swimming, and planned our annual camping trip." }, + { dia_id: "D8:2", speaker: "Caroline", text: "That sounds like a full summer." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i -E 'pottery|swimming|camping' /sessions/"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "We tried a pottery workshop, went swimming, and planned our annual camping trip."'); + }); + + it("matches raw output for Melanie destress searches", async () => { + const files = [ + buildSessionFile(9, [ + { dia_id: "D9:1", speaker: "Melanie", text: "Running helps me destress after busy weeks." }, + { dia_id: "D9:2", speaker: "Caroline", text: "That makes sense." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i -E 'destress|stress|running|painting' /sessions/"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "Running helps me destress after busy weeks."'); + }); + + it("matches raw output for q23-style summary markdown searches", async () => { + const files: FixtureFile[] = [ + { + path: "/summaries/locomo/conv_0_session_6_summary.md", + content: [ + "# Session 6", + "## Searchable Facts", + "- Melanie said Charlotte's Web was her favorite book as a child.", + "- The family keeps classic kids' books on the bookshelf.", + "", + ].join("\n"), + }, + { + path: "/summaries/locomo/conv_0_session_7_summary.md", + content: [ + "# Session 7", + "## Searchable Facts", + "- Caroline started a new counseling course.", + "", + ].join("\n"), + }, + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i -E 'book|read' /summaries/locomo/conv_0_session_*.md"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(local).toContain("Charlotte's Web"); + }); + + it("matches raw output for grep --regexp= alternation over summaries", async () => { + const files: FixtureFile[] = [ + { + path: "/summaries/locomo/conv_0_session_6_summary.md", + content: [ + "# Session 6", + "## Searchable Facts", + "- Melanie said Charlotte's Web was her favorite book as a child.", + "- The family keeps classic kids' books on the bookshelf.", + "", + ].join("\n"), + }, + { + path: "/summaries/locomo/conv_0_session_7_summary.md", + content: [ + "# Session 7", + "## Searchable Facts", + "- Caroline started a new counseling course.", + "", + ].join("\n"), + }, + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i --regexp='book\\|read' /summaries/locomo/conv_0_session_*.md"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain("Charlotte's Web"); + }); + + it("matches raw output for quoted regex over pretty-printed session json", async () => { + const files = [ + buildSessionFile(10, [ + { dia_id: "D10:12", speaker: "Melanie", text: "We camped near a mountain lake in a state park last summer." }, + { dia_id: "D10:13", speaker: "Caroline", text: "That sounds beautiful." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -E '\"dia_id\": \"D10:12\"|\"text\": \"We camped near a mountain lake' /sessions/conv_0_session_10.json"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"dia_id": "D10:12"'); + expect(virtual).toContain('"text": "We camped near a mountain lake in a state park last summer."'); + }); +}); diff --git a/claude-code/tests/deeplake-api.test.ts b/claude-code/tests/deeplake-api.test.ts index f427bf7..b1ca795 100644 --- a/claude-code/tests/deeplake-api.test.ts +++ b/claude-code/tests/deeplake-api.test.ts @@ -314,6 +314,29 @@ describe("DeeplakeApi.createIndex", () => { }); }); +describe("DeeplakeApi.createSummaryBm25Index", () => { + it("generates correct CREATE INDEX SQL for summary BM25", async () => { + mockFetch.mockResolvedValueOnce(jsonResponse({})); + const api = makeApi("memory"); + await api.createSummaryBm25Index(); + const sql = JSON.parse(mockFetch.mock.calls[0][1].body).query; + expect(sql).toContain("CREATE INDEX IF NOT EXISTS"); + expect(sql).toContain("idx_memory_summary_bm25"); + expect(sql).toContain('ON "memory" USING deeplake_index ("summary")'); + }); +}); + +describe("DeeplakeApi.ensureSummaryBm25Index", () => { + it("creates the summary BM25 index when no fresh marker exists", async () => { + mockFetch.mockResolvedValueOnce(jsonResponse({})); + const api = makeApi("memory"); + await api.ensureSummaryBm25Index(); + const sql = JSON.parse(mockFetch.mock.calls[0][1].body).query; + expect(sql).toContain("CREATE INDEX IF NOT EXISTS"); + expect(sql).toContain("idx_memory_summary_bm25"); + }); +}); + // ── listTables ────────────────────────────────────────────────────────────── describe("DeeplakeApi.listTables", () => { @@ -434,21 +457,25 @@ describe("DeeplakeApi.ensureTable", () => { ok: true, status: 200, json: async () => ({ tables: [{ table_name: "memory" }] }), }); - mockFetch.mockResolvedValueOnce(jsonResponse({})); - mockFetch.mockResolvedValueOnce(jsonResponse({})); + mockFetch.mockResolvedValue(jsonResponse({})); const api = makeApi("memory"); await api.ensureTable(); await api.ensureSessionsTable("sessions"); - expect(mockFetch).toHaveBeenCalledTimes(3); - const createSql = JSON.parse(mockFetch.mock.calls[1][1].body).query; + const tableListCalls = mockFetch.mock.calls.filter(([url]) => String(url).endsWith("/tables")); + expect(tableListCalls).toHaveLength(1); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; expect(createSql).toContain("CREATE TABLE IF NOT EXISTS"); expect(createSql).toContain("sessions"); - const indexSql = JSON.parse(mockFetch.mock.calls[2][1].body).query; + const indexSql = querySqls.find((sql) => sql.includes("CREATE INDEX IF NOT EXISTS")) ?? ""; expect(indexSql).toContain("CREATE INDEX IF NOT EXISTS"); expect(indexSql).toContain("\"path\""); expect(indexSql).toContain("\"creation_date\""); + expect(indexSql).toContain("\"turn_index\""); }); }); @@ -460,19 +487,26 @@ describe("DeeplakeApi.ensureSessionsTable", () => { ok: true, status: 200, json: async () => ({ tables: [] }), }); - mockFetch.mockResolvedValueOnce(jsonResponse({})); - mockFetch.mockResolvedValueOnce(jsonResponse({})); + mockFetch.mockResolvedValue(jsonResponse({})); const api = makeApi(); await api.ensureSessionsTable("sessions"); - const createSql = JSON.parse(mockFetch.mock.calls[1][1].body).query; + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; expect(createSql).toContain("CREATE TABLE IF NOT EXISTS"); expect(createSql).toContain("sessions"); expect(createSql).toContain("JSONB"); expect(createSql).toContain("USING deeplake"); - const indexSql = JSON.parse(mockFetch.mock.calls[2][1].body).query; + expect(createSql).toContain("session_id TEXT"); + expect(createSql).toContain("turn_index BIGINT"); + expect(createSql).toContain("text TEXT"); + const alterSqls = querySqls.filter((sql) => sql.startsWith("ALTER TABLE")); + expect(alterSqls).toHaveLength(8); + const indexSql = querySqls.find((sql) => sql.includes("CREATE INDEX IF NOT EXISTS")) ?? ""; expect(indexSql).toContain("CREATE INDEX IF NOT EXISTS"); expect(indexSql).toContain("\"sessions\""); - expect(indexSql).toContain("(\"path\", \"creation_date\")"); + expect(indexSql).toContain("(\"path\", \"creation_date\", \"turn_index\")"); }); it("ensures the lookup index when sessions table already exists", async () => { @@ -480,11 +514,14 @@ describe("DeeplakeApi.ensureSessionsTable", () => { ok: true, status: 200, json: async () => ({ tables: [{ table_name: "sessions" }] }), }); - mockFetch.mockResolvedValueOnce(jsonResponse({})); + mockFetch.mockResolvedValue(jsonResponse({})); const api = makeApi(); await api.ensureSessionsTable("sessions"); - expect(mockFetch).toHaveBeenCalledTimes(2); - const indexSql = JSON.parse(mockFetch.mock.calls[1][1].body).query; + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + expect(querySqls.filter((sql) => sql.startsWith("ALTER TABLE"))).toHaveLength(8); + const indexSql = querySqls.find((sql) => sql.includes("CREATE INDEX IF NOT EXISTS")) ?? ""; expect(indexSql).toContain("CREATE INDEX IF NOT EXISTS"); }); @@ -493,11 +530,16 @@ describe("DeeplakeApi.ensureSessionsTable", () => { ok: true, status: 200, json: async () => ({ tables: [{ table_name: "sessions" }] }), }); + for (let i = 0; i < 8; i++) mockFetch.mockResolvedValueOnce(jsonResponse({})); mockFetch.mockResolvedValueOnce(jsonResponse("forbidden", 403)); const api = makeApi(); await expect(api.ensureSessionsTable("sessions")).resolves.toBeUndefined(); - expect(mockFetch).toHaveBeenCalledTimes(2); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + expect(querySqls.filter((sql) => sql.startsWith("ALTER TABLE"))).toHaveLength(8); + expect(querySqls.some((sql) => sql.includes("CREATE INDEX IF NOT EXISTS"))).toBe(true); }); it("treats duplicate concurrent index creation errors as success and records a local marker", async () => { @@ -505,14 +547,124 @@ describe("DeeplakeApi.ensureSessionsTable", () => { ok: true, status: 200, json: async () => ({ tables: [{ table_name: "sessions" }] }), }); + for (let i = 0; i < 8; i++) mockFetch.mockResolvedValueOnce(jsonResponse({})); mockFetch.mockResolvedValueOnce(jsonResponse("duplicate key value violates unique constraint \"pg_class_relname_nsp_index\"", 400)); const api = makeApi(); await expect(api.ensureSessionsTable("sessions")).resolves.toBeUndefined(); mockFetch.mockReset(); + mockFetch.mockResolvedValue(jsonResponse({})); await api.ensureSessionsTable("sessions"); - expect(mockFetch).not.toHaveBeenCalled(); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + expect(querySqls.filter((sql) => sql.startsWith("ALTER TABLE"))).toHaveLength(8); + expect(querySqls.some((sql) => sql.includes("CREATE INDEX IF NOT EXISTS"))).toBe(false); + }); +}); + +describe("DeeplakeApi graph tables", () => { + it("creates graph_nodes with searchable graph columns", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ tables: [] }), + }); + mockFetch.mockResolvedValue(jsonResponse({})); + const api = makeApi(); + await api.ensureGraphNodesTable("graph_nodes"); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; + expect(createSql).toContain("graph_nodes"); + expect(createSql).toContain("node_id TEXT"); + expect(createSql).toContain("canonical_name TEXT"); + expect(createSql).toContain("search_text TEXT"); + expect(querySqls.some((sql) => sql.includes(`"source_session_id"`))).toBe(true); + expect(querySqls.some((sql) => sql.includes(`"node_id"`))).toBe(true); + }); + + it("creates graph_edges with relation columns", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ tables: [] }), + }); + mockFetch.mockResolvedValue(jsonResponse({})); + const api = makeApi(); + await api.ensureGraphEdgesTable("graph_edges"); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; + expect(createSql).toContain("graph_edges"); + expect(createSql).toContain("source_node_id TEXT"); + expect(createSql).toContain("target_node_id TEXT"); + expect(createSql).toContain("relation TEXT"); + expect(querySqls.some((sql) => sql.includes(`"source_session_id"`))).toBe(true); + expect(querySqls.some((sql) => sql.includes(`"source_node_id", "target_node_id", "relation"`))).toBe(true); + }); +}); + +describe("DeeplakeApi fact tables", () => { + it("creates memory_facts with fact and temporal columns", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ tables: [] }), + }); + mockFetch.mockResolvedValue(jsonResponse({})); + const api = makeApi(); + await api.ensureFactsTable("memory_facts"); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; + expect(createSql).toContain("memory_facts"); + expect(createSql).toContain("fact_id TEXT"); + expect(createSql).toContain("subject_entity_id TEXT"); + expect(createSql).toContain("predicate TEXT"); + expect(createSql).toContain("valid_from TEXT"); + expect(querySqls.some((sql) => sql.includes(`"fact_id"`))).toBe(true); + expect(querySqls.some((sql) => sql.includes(`"source_session_id", "predicate"`))).toBe(true); + }); + + it("creates memory_entities with canonical entity columns", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ tables: [] }), + }); + mockFetch.mockResolvedValue(jsonResponse({})); + const api = makeApi(); + await api.ensureEntitiesTable("memory_entities"); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; + expect(createSql).toContain("memory_entities"); + expect(createSql).toContain("entity_id TEXT"); + expect(createSql).toContain("canonical_name TEXT"); + expect(createSql).toContain("aliases TEXT"); + expect(querySqls.some((sql) => sql.includes(`"entity_id"`))).toBe(true); + expect(querySqls.some((sql) => sql.includes(`"canonical_name"`))).toBe(true); + }); + + it("creates fact_entity_links with linking columns", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ tables: [] }), + }); + mockFetch.mockResolvedValue(jsonResponse({})); + const api = makeApi(); + await api.ensureFactEntityLinksTable("fact_entity_links"); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; + expect(createSql).toContain("fact_entity_links"); + expect(createSql).toContain("link_id TEXT"); + expect(createSql).toContain("fact_id TEXT"); + expect(createSql).toContain("entity_id TEXT"); + expect(querySqls.some((sql) => sql.includes(`"source_session_id", "entity_id", "entity_role"`))).toBe(true); }); }); diff --git a/claude-code/tests/deeplake-fs.test.ts b/claude-code/tests/deeplake-fs.test.ts index 455b86a..1c564ea 100644 --- a/claude-code/tests/deeplake-fs.test.ts +++ b/claude-code/tests/deeplake-fs.test.ts @@ -47,12 +47,12 @@ function makeClient(seed: Record = {}) { } return []; } - // Virtual index: SELECT path, project, description, creation_date, last_update_date FROM ... WHERE path LIKE '/summaries/%' - if (sql.includes("SELECT path, project, description, creation_date, last_update_date")) { + // Virtual index: SELECT path, project, description, summary, creation_date FROM ... WHERE path LIKE '/summaries/%' + if (sql.includes("SELECT path, project, description, summary, creation_date")) { return rows .filter(r => r.path.startsWith("/summaries/")) .map(r => ({ - path: r.path, project: r.project, description: r.description, + path: r.path, project: r.project, description: r.description, summary: r.summary, creation_date: r.creation_date, last_update_date: r.last_update_date, })); } @@ -804,17 +804,50 @@ describe("virtual index.md", () => { it("generates virtual index when no /index.md row exists", async () => { const { fs } = await makeFsWithSummaries([ - { id: "aaa-111", userName: "alice", project: "my-project", description: "Fixed auth bug", creationDate: "2026-04-07T10:00:00.000Z", lastUpdateDate: "2026-04-07T11:00:00.000Z", content: "# Session aaa-111" }, - { id: "bbb-222", userName: "alice", project: "other-proj", description: "in progress", creationDate: "2026-04-07T12:00:00.000Z", lastUpdateDate: "2026-04-07T12:00:00.000Z", content: "# Session bbb-222" }, + { + id: "aaa-111", + userName: "alice", + project: "my-project", + description: "Fixed auth bug", + creationDate: "2026-04-07T10:00:00.000Z", + lastUpdateDate: "2026-04-07T11:00:00.000Z", + content: `# Session aaa-111 +- **Source**: /sessions/alice/aaa-111.jsonl +- **Date**: 2026-04-07 +- **Participants**: Alice, Bob +- **Topics**: auth, retries + +## Searchable Facts +- Auth tokens now refresh automatically. +`, + }, + { + id: "bbb-222", + userName: "alice", + project: "other-proj", + description: "in progress", + creationDate: "2026-04-07T12:00:00.000Z", + lastUpdateDate: "2026-04-07T12:00:00.000Z", + content: `# Session bbb-222 +- **Source**: /sessions/alice/bbb-222.jsonl +- **Date**: 2026-04-07 +- **Participants**: Alice, Carol +- **Topics**: rollout +`, + }, ]); const content = await fs.readFile("/index.md"); - expect(content).toContain("# Session Index"); - expect(content).toContain("| Session | Conversation | Created | Last Updated | Project | Description |"); + expect(content).toContain("# Memory Index"); + expect(content).toContain("## People"); + expect(content).toContain("## Projects"); + expect(content).toContain("## Summary To Session Catalog"); expect(content).toContain("aaa-111"); expect(content).toContain("bbb-222"); expect(content).toContain("my-project"); - expect(content).toContain("Fixed auth bug"); + expect(content).toContain("Alice, Bob"); + expect(content).toContain("[session](/sessions/alice/aaa-111.jsonl)"); expect(content).toContain("2026-04-07"); + expect(content).toContain("updated: 2026-04-07 11:00 UTC"); }); it("serves real /index.md row when it exists", async () => { @@ -847,6 +880,39 @@ describe("virtual index.md", () => { expect(s.isDirectory).toBe(false); }); + it("hides the virtual index in sessions-only mode", async () => { + const prev = process.env.HIVEMIND_SESSIONS_ONLY; + process.env.HIVEMIND_SESSIONS_ONLY = "1"; + try { + const { fs } = await makeFsWithSummaries([ + { id: "aaa-111", userName: "alice", project: "proj", description: "desc", creationDate: "2026-04-07T10:00:00.000Z", lastUpdateDate: "2026-04-07T10:00:00.000Z", content: "# Session" }, + ]); + expect(await fs.exists("/index.md")).toBe(false); + const entries = await fs.readdir("/"); + expect(entries).not.toContain("index.md"); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_SESSIONS_ONLY; + else process.env.HIVEMIND_SESSIONS_ONLY = prev; + } + }); + + it("hides the virtual index when index is disabled but still keeps summaries", async () => { + const prev = process.env.HIVEMIND_DISABLE_INDEX; + process.env.HIVEMIND_DISABLE_INDEX = "1"; + try { + const { fs } = await makeFsWithSummaries([ + { id: "aaa-111", userName: "alice", project: "proj", description: "desc", creationDate: "2026-04-07T10:00:00.000Z", lastUpdateDate: "2026-04-07T10:00:00.000Z", content: "# Session" }, + ]); + expect(await fs.exists("/index.md")).toBe(false); + const entries = await fs.readdir("/"); + expect(entries).not.toContain("index.md"); + expect(entries).toContain("summaries"); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_DISABLE_INDEX; + else process.env.HIVEMIND_DISABLE_INDEX = prev; + } + }); + it("virtual index shows all summary rows ordered", async () => { const { fs } = await makeFsWithSummaries([ { id: "old-session", userName: "alice", project: "proj-a", description: "Old work", creationDate: "2026-04-01T10:00:00.000Z", lastUpdateDate: "2026-04-01T11:00:00.000Z", content: "# Old" }, @@ -863,10 +929,8 @@ describe("virtual index.md", () => { it("virtual index handles empty summaries table", async () => { const { fs } = await makeFs({}, "/"); const content = await fs.readFile("/index.md"); - expect(content).toContain("# Session Index"); - expect(content).toContain("| Session | Conversation | Created | Last Updated | Project | Description |"); - // No data rows - const lines = content.split("\n").filter(l => l.startsWith("| [")); + expect(content).toContain("# Memory Index"); + const lines = content.split("\n").filter(l => l.startsWith("- [")); expect(lines.length).toBe(0); }); diff --git a/claude-code/tests/embedding-text.test.ts b/claude-code/tests/embedding-text.test.ts new file mode 100644 index 0000000..72332a4 --- /dev/null +++ b/claude-code/tests/embedding-text.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, it } from "vitest"; +import { + buildMemoryEmbeddingText, + buildSessionEmbeddingText, + stableEmbeddingSourceHash, +} from "../../src/embeddings/text.js"; + +describe("embedding text builders", () => { + it("builds a compact memory embedding payload", () => { + const text = buildMemoryEmbeddingText({ + path: "/summaries/alice/session.md", + filename: "session.md", + project: "hivemind", + description: "session summary", + summary: "Discussed local embeddings and retrieval quality.", + }); + + expect(text).toContain("Path: /summaries/alice/session.md"); + expect(text).toContain("Project: hivemind"); + expect(text).toContain("Summary: Discussed local embeddings and retrieval quality."); + }); + + it("builds a session embedding payload from structured turn columns", () => { + const text = buildSessionEmbeddingText({ + path: "/sessions/alice/demo.jsonl", + event_type: "dialogue_turn", + speaker: "user", + text: "Can we run Harrier locally in TypeScript?", + turn_summary: "Asked about local Harrier embeddings.", + source_date_time: "2026-04-20T10:00:00Z", + turn_index: 4, + }); + + expect(text).toContain("Event: dialogue_turn"); + expect(text).toContain("Speaker: user"); + expect(text).toContain("Text: Can we run Harrier locally in TypeScript?"); + expect(text).toContain("Turn summary: Asked about local Harrier embeddings."); + }); + + it("falls back to transcript extraction for session blobs", () => { + const text = buildSessionEmbeddingText({ + path: "/sessions/alice/transcript.json", + message: { + date_time: "2026-04-20", + turns: [ + { speaker: "user", text: "first turn" }, + { speaker: "assistant", text: "second turn" }, + ], + }, + }); + + expect(text).toContain("[user] first turn"); + expect(text).toContain("[assistant] second turn"); + expect(text).toContain("Date: 2026-04-20"); + }); + + it("hashes identical embedding sources deterministically", () => { + expect(stableEmbeddingSourceHash("same text")).toBe(stableEmbeddingSourceHash("same text")); + expect(stableEmbeddingSourceHash("same text")).not.toBe(stableEmbeddingSourceHash("different text")); + }); +}); diff --git a/claude-code/tests/grep-core.test.ts b/claude-code/tests/grep-core.test.ts index 51339ff..ac9e2c3 100644 --- a/claude-code/tests/grep-core.test.ts +++ b/claude-code/tests/grep-core.test.ts @@ -1,15 +1,19 @@ import { describe, it, expect, vi } from "vitest"; +import { HarrierEmbedder } from "../../src/embeddings/harrier.js"; import { buildGrepSearchOptions, + buildSummaryBm25QueryText, normalizeContent, buildPathFilter, buildPathFilterForTargets, compileGrepRegex, extractRegexAlternationPrefilters, extractRegexLiteralPrefilter, + normalizeGrepRegexPattern, refineGrepMatches, searchDeeplakeTables, grepBothTables, + toSqlRegexPattern, } from "../../src/shell/grep-core.js"; // ── normalizeContent ──────────────────────────────────────────────────────── @@ -36,7 +40,7 @@ describe("normalizeContent: passthrough for non-session paths", () => { }); }); -describe("normalizeContent: turn-array session shape", () => { +describe("normalizeContent: transcript session shape", () => { const raw = JSON.stringify({ date_time: "1:56 pm on 8 May, 2023", speakers: { speaker_a: "Avery", speaker_b: "Jordan" }, @@ -46,77 +50,82 @@ describe("normalizeContent: turn-array session shape", () => { ], }); - it("emits date and speakers header", () => { + it("pretty-prints transcript JSON", () => { const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).toContain("date: 1:56 pm on 8 May, 2023"); - expect(out).toContain("speakers: Avery, Jordan"); + expect(out).toBe(`${JSON.stringify(JSON.parse(raw), null, 2)}\n`); }); - it("emits one line per turn with dia_id tag", () => { + it("preserves dia_id and turn text in the raw JSON view", () => { const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).toContain("[D1:1] Avery: Hey Jordan!"); - expect(out).toContain("[D1:2] Jordan: Hi Avery."); + expect(out).toContain('"dia_id": "D1:1"'); + expect(out).toContain('"text": "Hey Jordan!"'); + expect(out).toContain('"speaker": "Jordan"'); }); - it("falls back gracefully on turns without speaker/text", () => { + it("keeps sparse turns as canonical JSON", () => { const weird = JSON.stringify({ turns: [{}, { speaker: "X" }] }); const out = normalizeContent("/sessions/alice/chat_1.json", weird); - // Must not crash; includes placeholder `?` for missing speaker - expect(out).toContain("?: "); - expect(out).toContain("X: "); + expect(out).toBe(`${JSON.stringify(JSON.parse(weird), null, 2)}\n`); }); - it("omits speakers header when both speaker fields are empty", () => { + it("preserves empty speaker metadata instead of synthesizing headers", () => { const raw = JSON.stringify({ turns: [{ speaker: "A", text: "hi" }], speakers: { speaker_a: "", speaker_b: "" }, }); const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).not.toContain("speakers:"); - expect(out).toContain("A: hi"); + expect(out).toContain('"speaker_a": ""'); + expect(out).toContain('"text": "hi"'); }); - it("emits only speaker_a when speaker_b is missing", () => { + it("preserves single-speaker metadata", () => { const raw = JSON.stringify({ turns: [{ speaker: "A", text: "hi" }], speakers: { speaker_a: "Alice" }, }); const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).toContain("speakers: Alice"); + expect(out).toContain('"speaker_a": "Alice"'); }); - it("falls back speaker->name when speaker field is absent on a turn", () => { + it("keeps alternate turn keys in the raw JSON view", () => { const raw = JSON.stringify({ turns: [{ name: "Avery", text: "hi" }] }); const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).toContain("Avery: hi"); + expect(out).toContain('"name": "Avery"'); }); - it("falls back text->content when text field is absent on a turn", () => { + it("keeps content fallback fields in the raw JSON view", () => { const raw = JSON.stringify({ turns: [{ speaker: "X", content: "fallback" }] }); const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).toContain("X: fallback"); + expect(out).toContain('"content": "fallback"'); }); - it("omits dia_id prefix when the turn has no dia_id", () => { + it("leaves missing dia_id fields absent", () => { const raw = JSON.stringify({ turns: [{ speaker: "A", text: "hi" }] }); const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).toContain("A: hi"); - expect(out).not.toMatch(/\[\]/); + expect(out).toContain('"speaker": "A"'); + expect(out).not.toContain('"dia_id"'); }); - it("emits turns without date/speakers when both are missing", () => { + it("keeps transcript rows without date or speakers", () => { const raw = JSON.stringify({ turns: [{ speaker: "A", text: "hi" }] }); const out = normalizeContent("/sessions/alice/chat_1.json", raw); expect(out).not.toContain("date:"); expect(out).not.toContain("speakers:"); - expect(out).toContain("A: hi"); + expect(out).toContain('"speaker": "A"'); }); - it("returns raw when turns produce an empty serialization", () => { + it("pretty-prints empty transcript arrays", () => { const empty = JSON.stringify({ turns: [] }); - // No header, no turns → trimmed output is empty → fallback to raw const out = normalizeContent("/sessions/alice/chat_1.json", empty); - expect(out).toBe(empty); + expect(out).toBe(`${JSON.stringify(JSON.parse(empty), null, 2)}\n`); + }); + + it("pretty-prints dialogue-array transcripts too", () => { + const dialogue = JSON.stringify({ + dialogue: [{ speaker: "Melanie", text: "camping next month" }], + }); + const out = normalizeContent("/sessions/conv_0_session_2.json", dialogue); + expect(out).toBe(`${JSON.stringify(JSON.parse(dialogue), null, 2)}\n`); }); }); @@ -632,6 +641,106 @@ describe("searchDeeplakeTables", () => { expect(sql).toContain("UNION ALL"); }); + it("uses text BM25 operator for summary searches before ILIKE fallback", async () => { + const api = { + query: vi.fn().mockImplementationOnce(async () => []), + ensureSummaryBm25Index: vi.fn().mockResolvedValue(undefined), + } as any; + await searchDeeplakeTables(api, "memory", "sessions", { + pathFilter: " AND (path = '/x' OR path LIKE '/x/%')", + contentScanOnly: false, + likeOp: "ILIKE", + escapedPattern: "book", + bm25QueryText: "book novel literature", + limit: 50, + }); + expect(api.ensureSummaryBm25Index).toHaveBeenCalledWith("memory"); + expect(api.query).toHaveBeenCalledTimes(1); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("ORDER BY (summary <#> 'book novel literature') DESC"); + expect(sql).toContain('FROM "sessions"'); + }); + + it("uses vector similarity on embedding columns when retrieval mode is embedding", async () => { + const prev = process.env.HIVEMIND_GREP_RETRIEVAL_MODE; + process.env.HIVEMIND_GREP_RETRIEVAL_MODE = "embedding"; + const embedSpy = vi.spyOn(HarrierEmbedder.prototype, "embedQueries").mockResolvedValue([[0.25, -0.5]]); + try { + const api = mockApi([]); + await searchDeeplakeTables(api, "memory", "sessions", { + pathFilter: "", + contentScanOnly: false, + likeOp: "ILIKE", + escapedPattern: "book", + queryText: "book novel literature", + bm25QueryText: "book novel literature", + limit: 50, + }); + const sql = api.query.mock.calls[0][0] as string; + expect(embedSpy).toHaveBeenCalledWith(["book novel literature"]); + expect(sql).toContain("embedding <#> ARRAY[0.25, -0.5]::float4[]"); + expect(sql).not.toContain("summary::text ILIKE"); + expect(sql).not.toContain("message::text ILIKE"); + } finally { + embedSpy.mockRestore(); + if (prev === undefined) delete process.env.HIVEMIND_GREP_RETRIEVAL_MODE; + else process.env.HIVEMIND_GREP_RETRIEVAL_MODE = prev; + } + }); + + it("runs separate lexical and vector queries then fuses them when retrieval mode is hybrid", async () => { + const prevMode = process.env.HIVEMIND_GREP_RETRIEVAL_MODE; + const prevVector = process.env.HIVEMIND_HYBRID_VECTOR_WEIGHT; + const prevText = process.env.HIVEMIND_HYBRID_TEXT_WEIGHT; + process.env.HIVEMIND_GREP_RETRIEVAL_MODE = "hybrid"; + process.env.HIVEMIND_HYBRID_VECTOR_WEIGHT = "0.6"; + process.env.HIVEMIND_HYBRID_TEXT_WEIGHT = "0.4"; + const embedSpy = vi.spyOn(HarrierEmbedder.prototype, "embedQueries").mockResolvedValue([[0.1, 0.2, 0.3]]); + try { + const api = { + query: vi.fn() + .mockResolvedValueOnce([ + { path: "/summaries/shared.md", content: "shared", source_order: 0, creation_date: "", score: 5 }, + { path: "/sessions/vector.json", content: "vector", source_order: 1, creation_date: "2024-01-01", score: 1 }, + ]) + .mockResolvedValueOnce([ + { path: "/summaries/shared.md", content: "shared", source_order: 0, creation_date: "", score: 4 }, + { path: "/sessions/text.json", content: "text", source_order: 1, creation_date: "2024-01-02", score: 3 }, + ]), + } as any; + const rows = await searchDeeplakeTables(api, "memory", "sessions", { + pathFilter: "", + contentScanOnly: false, + likeOp: "ILIKE", + escapedPattern: "book", + queryText: "book novel literature", + bm25QueryText: "book novel literature", + limit: 50, + }); + expect(embedSpy).toHaveBeenCalledWith(["book novel literature"]); + expect(api.query).toHaveBeenCalledTimes(2); + const [vectorSql, textSql] = api.query.mock.calls.map((call: unknown[]) => call[0] as string); + expect(vectorSql).toContain("embedding <#> ARRAY[0.10000000149011612"); + expect(textSql).toContain("summary::text <#> 'book novel literature'"); + expect(textSql).toContain("message::text <#> 'book novel literature'"); + expect(vectorSql).not.toContain("deeplake_hybrid_record"); + expect(textSql).not.toContain("deeplake_hybrid_record"); + expect(rows.map((row) => row.path)).toEqual([ + "/summaries/shared.md", + "/sessions/text.json", + "/sessions/vector.json", + ]); + } finally { + embedSpy.mockRestore(); + if (prevMode === undefined) delete process.env.HIVEMIND_GREP_RETRIEVAL_MODE; + else process.env.HIVEMIND_GREP_RETRIEVAL_MODE = prevMode; + if (prevVector === undefined) delete process.env.HIVEMIND_HYBRID_VECTOR_WEIGHT; + else process.env.HIVEMIND_HYBRID_VECTOR_WEIGHT = prevVector; + if (prevText === undefined) delete process.env.HIVEMIND_HYBRID_TEXT_WEIGHT; + else process.env.HIVEMIND_HYBRID_TEXT_WEIGHT = prevText; + } + }); + it("skips LIKE filter when contentScanOnly is true (regex-in-memory mode)", async () => { const api = mockApi([]); await searchDeeplakeTables(api, "m", "s", { @@ -643,6 +752,7 @@ describe("searchDeeplakeTables", () => { const sql = api.query.mock.calls[0][0] as string; expect(sql).not.toContain("summary::text LIKE"); expect(sql).not.toContain("message::text LIKE"); + expect(sql).not.toContain("message::text ~"); }); it("uses a safe literal prefilter for regex scans when available", async () => { @@ -652,27 +762,101 @@ describe("searchDeeplakeTables", () => { contentScanOnly: true, likeOp: "LIKE", escapedPattern: "foo.*bar", + regexPattern: "foo.*bar", prefilterPattern: "foo", }); const sql = api.query.mock.calls[0][0] as string; - expect(sql).toContain("summary::text LIKE '%foo%'"); - expect(sql).toContain("message::text LIKE '%foo%'"); + expect(sql).toContain("summary::text ~ 'foo.*bar'"); + expect(sql).toContain("message::text ~ 'foo.*bar'"); + expect(sql).toContain("LIKE '%foo%'"); }); - it("expands alternation prefilters into OR clauses instead of literal pipes", async () => { + it("uses regex predicates for alternation patterns instead of literal pipes", async () => { const api = mockApi([]); await searchDeeplakeTables(api, "m", "s", { pathFilter: "", contentScanOnly: true, likeOp: "LIKE", escapedPattern: "relationship|partner|married", + regexPattern: "relationship|partner|married", + prefilterPatterns: ["relationship", "partner", "married"], + }); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("summary::text ~ 'relationship|partner|married'"); + expect(sql).toContain("message::text ~ 'relationship|partner|married'"); + }); + + it("uses case-insensitive regex pushdown for ignore-case regex scans", async () => { + const api = mockApi([]); + await searchDeeplakeTables(api, "m", "s", { + pathFilter: "", + contentScanOnly: true, + likeOp: "ILIKE", + escapedPattern: "relationship|partner|married", + regexPattern: "relationship|partner|married", prefilterPatterns: ["relationship", "partner", "married"], }); const sql = api.query.mock.calls[0][0] as string; - expect(sql).toContain("summary::text LIKE '%relationship%'"); - expect(sql).toContain("summary::text LIKE '%partner%'"); - expect(sql).toContain("summary::text LIKE '%married%'"); - expect(sql).not.toContain("relationship|partner|married"); + expect(sql).toContain("summary::text ILIKE '%relationship%'"); + expect(sql).toContain("summary::text ~* 'relationship|partner|married'"); + expect(sql).toContain("message::text ~* 'relationship|partner|married'"); + }); + + it("uses OR ILIKE prefilters for grep BRE alternation patterns", async () => { + const api = mockApi([]); + const opts = buildGrepSearchOptions({ + pattern: "book\\|novel\\|literature", + ignoreCase: true, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/"); + await searchDeeplakeTables(api, "m", "s", opts); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("ORDER BY (summary <#> 'book novel literature') DESC"); + expect(sql).toContain("message::text"); + expect(sql).toContain("ILIKE '%book%'"); + expect(sql).toContain("ILIKE '%novel%'"); + expect(sql).toContain("ILIKE '%literature%'"); + expect(sql).not.toContain("ILIKE '%book|novel|literature%'"); + }); + + it("pushes down escaped regex literals for invalid bracketed patterns", async () => { + const api = mockApi([]); + await searchDeeplakeTables(api, "m", "s", { + pathFilter: " AND path = '/index.md'", + contentScanOnly: true, + likeOp: "LIKE", + escapedPattern: "^- [conv_0_session_.*\\]", + regexPattern: "^- [conv_0_session_.*\\]", + }); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("path = '/index.md'"); + expect(sql).toContain("summary::text ~ '\\\\^- \\\\[conv_0_session_\\\\.\\\\*\\\\\\\\\\\\]'"); + expect(sql).toContain("message::text ~ '\\\\^- \\\\[conv_0_session_\\\\.\\\\*\\\\\\\\\\\\]'"); + }); + + it("falls back to summary ILIKE when BM25 query is rejected", async () => { + const api = { + query: vi.fn() + .mockRejectedValueOnce(new Error("bm25 operator not supported")) + .mockResolvedValueOnce([]), + ensureSummaryBm25Index: vi.fn().mockResolvedValue(undefined), + } as any; + await searchDeeplakeTables(api, "m", "s", { + pathFilter: "", + contentScanOnly: false, + likeOp: "ILIKE", + escapedPattern: "book", + bm25QueryText: "book novel literature", + }); + expect(api.query).toHaveBeenCalledTimes(2); + const fallbackSql = api.query.mock.calls[1][0] as string; + expect(fallbackSql).toContain("summary::text ILIKE '%book%'"); + expect(fallbackSql).not.toContain("<#>"); }); it("concatenates rows from both tables into {path, content}", async () => { @@ -709,6 +893,7 @@ describe("searchDeeplakeTables", () => { const api = { query: vi.fn() .mockRejectedValueOnce(new Error("bad union")) + .mockRejectedValueOnce(new Error("bad union")), } as any; await expect(searchDeeplakeTables(api, "m", "s", { pathFilter: "", contentScanOnly: false, likeOp: "LIKE", escapedPattern: "x", @@ -722,7 +907,25 @@ describe("searchDeeplakeTables", () => { pathFilter: "", contentScanOnly: false, likeOp: "LIKE", escapedPattern: "x", }); const sql = api.query.mock.calls[0][0] as string; - expect(sql).toContain("LIMIT 100"); + expect(sql).toContain("LIMIT 500"); + }); + + it("queries only the sessions table in sessions-only mode", async () => { + const prev = process.env.HIVEMIND_SESSIONS_ONLY; + process.env.HIVEMIND_SESSIONS_ONLY = "1"; + try { + const api = { query: vi.fn().mockResolvedValue([]) } as any; + await searchDeeplakeTables(api, "m", "s", { + pathFilter: "", contentScanOnly: false, likeOp: "ILIKE", escapedPattern: "foo", + }); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).not.toContain('FROM "m"'); + expect(sql).toContain('FROM "s"'); + expect(sql).not.toContain("UNION ALL"); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_SESSIONS_ONLY; + else process.env.HIVEMIND_SESSIONS_ONLY = prev; + } }); }); @@ -759,7 +962,7 @@ describe("grepBothTables", () => { expect(out.length).toBe(1); }); - it("normalizes session JSON before refinement (turn-array sessions)", async () => { + it("greps against canonical raw JSON for transcript sessions", async () => { const sessionContent = JSON.stringify({ turns: [ { dia_id: "D1:1", speaker: "Alice", text: "project foo update" }, @@ -771,8 +974,7 @@ describe("grepBothTables", () => { .mockResolvedValueOnce([{ path: "/sessions/alice/chat_1.json", content: sessionContent }]), } as any; const out = await grepBothTables(api, "m", "s", baseParams, "/"); - // Only the matching turn is returned, not the whole JSON blob - expect(out.some(l => l.includes("[D1:1] Alice: project foo update"))).toBe(true); + expect(out.some(l => l.includes('"text": "project foo update"'))).toBe(true); expect(out.some(l => l.includes("unrelated"))).toBe(false); }); @@ -788,7 +990,8 @@ describe("grepBothTables", () => { const api = mockApi([{ path: "/a", content: "foo middle bar" }]); await grepBothTables(api, "m", "s", { ...baseParams, pattern: "foo.*bar" }, "/"); const [sql] = api.query.mock.calls.map((c: unknown[]) => c[0] as string); - expect(sql).toContain("summary::text LIKE '%foo%'"); + expect(sql).toContain("ORDER BY (summary <#> 'foo') DESC"); + expect(sql).toContain("message::text LIKE '%foo%'"); }); it("routes to ILIKE when ignoreCase is set", async () => { @@ -847,7 +1050,9 @@ describe("regex literal prefilter", () => { expect(opts.contentScanOnly).toBe(true); expect(opts.likeOp).toBe("ILIKE"); + expect(opts.regexPattern).toBe("foo.*bar"); expect(opts.prefilterPattern).toBe("foo"); + expect(opts.bm25QueryText).toBe("foo"); expect(opts.pathFilter).toContain("/summaries"); }); @@ -870,14 +1075,44 @@ describe("regex literal prefilter", () => { }, "/summaries"); expect(opts.contentScanOnly).toBe(true); + expect(opts.regexPattern).toBe("relationship|partner|married"); expect(opts.prefilterPatterns).toEqual(["relationship", "partner", "married"]); + expect(opts.bm25QueryText).toBe("relationship partner married"); + }); + + it("unwraps simple grouping around alternations", () => { + expect(extractRegexAlternationPrefilters("(foo|bar)")).toEqual(["foo", "bar"]); + expect(extractRegexAlternationPrefilters("(?:foo|bar)")).toEqual(["foo", "bar"]); + }); + + it("normalizes grep BRE alternation before building search options", () => { + expect(normalizeGrepRegexPattern("book\\|novel\\|literature")).toBe("book|novel|literature"); + + const opts = buildGrepSearchOptions({ + pattern: "book\\|novel\\|literature", + ignoreCase: true, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/summaries"); + + expect(opts.contentScanOnly).toBe(true); + expect(opts.regexPattern).toBe("book|novel|literature"); + expect(opts.prefilterPatterns).toEqual(["book", "novel", "literature"]); + expect(opts.bm25QueryText).toBe("book novel literature"); }); it("rejects alternation prefilters when grouping makes them unsafe", () => { - expect(extractRegexAlternationPrefilters("(foo|bar)")).toBeNull(); expect(extractRegexAlternationPrefilters("foo|bar.*baz")).toEqual(["foo", "bar"]); }); + it("extracts literals through word-boundary escapes", () => { + expect(extractRegexLiteralPrefilter("\\bcountry\\b")).toBe("country"); + }); + it("preserves escaped alternation characters inside a literal branch", () => { expect(extractRegexAlternationPrefilters("foo\\|bar|baz")).toEqual(["foo|bar", "baz"]); expect(extractRegexAlternationPrefilters("foo|bar\\.md")).toEqual(["foo", "bar.md"]); @@ -896,7 +1131,215 @@ describe("regex literal prefilter", () => { }, "/summaries/alice/s1.md"); expect(opts.contentScanOnly).toBe(false); + expect(opts.regexPattern).toBeUndefined(); expect(opts.prefilterPattern).toBeUndefined(); + expect(opts.bm25QueryText).toBe("foo bar"); expect(opts.pathFilter).toBe(" AND path = '/summaries/alice/s1.md'"); }); + + it("builds BM25 query text from regex literals conservatively", () => { + expect(buildSummaryBm25QueryText("home country", false, null, null)).toBe("home country"); + expect(buildSummaryBm25QueryText("book|novel|literature", false, null, ["book", "novel", "literature"])).toBe("book novel literature"); + expect(buildSummaryBm25QueryText(".*", false, null, null)).toBeNull(); + }); + + it("builds SQL-safe regex patterns conservatively", () => { + expect(toSqlRegexPattern("foo.*bar", false)).toBe("foo.*bar"); + expect(toSqlRegexPattern("foo.*bar", true)).toBe("foo.*bar"); + expect(toSqlRegexPattern("^- [conv_0_session_.*\\]", false)).toBe("\\^- \\[conv_0_session_\\.\\*\\\\\\]"); + expect(toSqlRegexPattern("\\bitem\\d+", false)).toBe("\\yitem[[:digit:]]+"); + expect(toSqlRegexPattern("foo(?=bar)", false)).toBeNull(); + }); + + it("compiles grep BRE alternation as real alternation", () => { + const re = compileGrepRegex({ + pattern: "book\\|novel\\|literature", + ignoreCase: true, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }); + + expect(re.test("She loves literature")).toBe(true); + expect(re.test("A novel inspired her")).toBe(true); + expect(re.test("No match here")).toBe(false); + }); + + describe("benchmark parity", () => { + function mockQueryRows(rows: { path: string; content: string }[]) { + return { query: vi.fn().mockResolvedValueOnce(rows) } as any; + } + + async function runParityCase( + rows: { path: string; content: string }[], + params: { + pattern: string; + ignoreCase: boolean; + wordMatch: boolean; + filesOnly: boolean; + countOnly: boolean; + lineNumber: boolean; + invertMatch: boolean; + fixedString: boolean; + }, + targetPath = "/", + ) { + const api = mockQueryRows(rows); + const remote = await grepBothTables(api, "memory", "sessions", params, targetPath); + const local = refineGrepMatches( + rows.map((row) => ({ path: row.path, content: normalizeContent(row.path, row.content) })), + params, + ); + return { api, remote, local }; + } + + it("matches local grep for LoCoMo-style relationship status lookups", async () => { + const rows = [ + { + path: "/summaries/locomo/conv_0_session_3_summary.md", + content: "## Searchable Facts\n- Relationship status: Single.\n- Caroline is researching adoption agencies.\n", + }, + { + path: "/sessions/conv_0_session_3.json", + content: JSON.stringify({ + turns: [ + { dia_id: "D1:1", speaker: "Caroline", text: "I'm single and still planning to adopt on my own." }, + { dia_id: "D1:2", speaker: "Melanie", text: "That sounds like a good plan." }, + ], + }), + }, + ]; + + const params = { + pattern: "single", + ignoreCase: true, + wordMatch: true, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }; + + const { api, remote, local } = await runParityCase(rows, params, "/summaries"); + expect(remote).toEqual(local); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("ORDER BY (summary <#> 'single') DESC"); + expect(sql).toContain("message::text ILIKE '%single%'"); + }); + + it("matches local grep for LoCoMo-style title and reading mentions", async () => { + const rows = [ + { + path: "/sessions/conv_0_session_6.json", + content: JSON.stringify({ + turns: [ + { dia_id: "D6:1", speaker: "Melanie", text: "Charlotte's Web was my favorite book as a kid." }, + { dia_id: "D6:2", speaker: "Caroline", text: "I can recommend another book if you want." }, + ], + }), + }, + ]; + + const params = { + pattern: "Charlotte\\|book", + ignoreCase: true, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }; + + const { api, remote, local } = await runParityCase(rows, params, "/sessions"); + expect(remote).toEqual(local); + expect(remote.some((line) => line.includes("Charlotte's Web"))).toBe(true); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("ILIKE '%Charlotte%'"); + expect(sql).toContain("ILIKE '%book%'"); + }); + + it("matches local grep for LoCoMo-style relative-time phrases", async () => { + const rows = [ + { + path: "/sessions/conv_0_session_12.json", + content: JSON.stringify({ + turns: [ + { dia_id: "D12:1", speaker: "Caroline", text: "A friend made it for my 18th birthday ten years ago." }, + { dia_id: "D12:2", speaker: "Melanie", text: "That's really thoughtful." }, + ], + }), + }, + ]; + + const params = { + pattern: "ten years ago", + ignoreCase: true, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }; + + const { api, remote, local } = await runParityCase(rows, params, "/sessions"); + expect(remote).toEqual(local); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("message::text ILIKE '%ten years ago%'"); + }); + + it("uses SQL regex pushdown for bracket-anchored patterns and matches the raw-json local output", async () => { + const rows = [ + { + path: "/sessions/conv_0_session_2.json", + content: JSON.stringify({ + turns: [ + { dia_id: "D2:1", speaker: "Melanie", text: "We're thinking about going camping next month." }, + { dia_id: "D2:2", speaker: "Caroline", text: "That should be fun." }, + ], + }), + }, + ]; + + const params = { + pattern: "^\\[D2:1\\]", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }; + + const { api, remote, local } = await runParityCase(rows, params, "/sessions"); + expect(remote).toEqual(local); + expect(remote).toEqual([]); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("ORDER BY (summary <#> 'D2:1') DESC"); + expect(sql).toContain("message::text ~ '^\\\\[D2:1\\\\]'"); + }); + }); + + it("compiles word-boundary alternation with grouping", () => { + const re = compileGrepRegex({ + pattern: "book\\|novel", + ignoreCase: true, + wordMatch: true, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }); + + expect(re.test("book club")).toBe(true); + expect(re.test("graphic novel")).toBe(true); + expect(re.test("storybook")).toBe(false); + }); }); diff --git a/claude-code/tests/grep-direct.test.ts b/claude-code/tests/grep-direct.test.ts index 0f56c9a..9e5e3fd 100644 --- a/claude-code/tests/grep-direct.test.ts +++ b/claude-code/tests/grep-direct.test.ts @@ -3,7 +3,7 @@ import { parseBashGrep, handleGrepDirect, type GrepParams } from "../../src/hook describe("handleGrepDirect", () => { const baseParams: GrepParams = { - pattern: "foo", targetPath: "/", + pattern: "foo", targetPath: "/", recursive: false, ignoreCase: false, wordMatch: false, filesOnly: false, countOnly: false, lineNumber: false, invertMatch: false, fixedString: false, }; @@ -188,7 +188,7 @@ describe("parseBashGrep", () => { it("parses combined flags -ri", () => { const r = parseBashGrep("grep -ri 'pattern' /dir"); expect(r!.ignoreCase).toBe(true); - // -r is no-op (recursive implied) + expect(r!.recursive).toBe(true); }); it("parses combined flags -wni", () => { @@ -201,6 +201,7 @@ describe("parseBashGrep", () => { it("parses -rl flags", () => { const r = parseBashGrep("grep -rl 'pattern' /dir"); expect(r!.filesOnly).toBe(true); + expect(r!.recursive).toBe(true); }); // ── Variants ── diff --git a/claude-code/tests/grep-interceptor.test.ts b/claude-code/tests/grep-interceptor.test.ts index ba7e67b..83c44bd 100644 --- a/claude-code/tests/grep-interceptor.test.ts +++ b/claude-code/tests/grep-interceptor.test.ts @@ -23,12 +23,12 @@ function makeCtx(fs: DeeplakeFs, cwd = "/memory") { // ── Tests ───────────────────────────────────────────────────────────────────── // -// The interceptor now queries both `memory` and `sessions` in parallel with -// LIKE/ILIKE (no more BM25 — the `<#>` query returned 400 on every call), -// and each SQL row returns { path, content } so we no longer need a -// prefetch round-trip to read file content for the regex pass. Prefetch is -// only used as a fallback when SQL returns zero rows and we scan the FS -// cache. Tests below assert that new contract. +// The interceptor now queries both `memory` and `sessions` through grep-core. +// Summary retrieval may use BM25 (`<#>`) while sessions keep LIKE/ILIKE +// filtering, and each SQL row returns { path, content } so we no longer need +// a prefetch round-trip to read file content for the regex pass. Prefetch is +// only used as a fallback when SQL returns zero rows and we scan the FS cache. +// Tests below assert that contract. describe("grep interceptor", () => { it("returns exitCode=1 when the pattern is missing", async () => { @@ -78,8 +78,7 @@ describe("grep interceptor", () => { const sqls = client.query.mock.calls.map((c: unknown[]) => c[0] as string); expect(sqls.some(s => /FROM "test"/.test(s) && /ILIKE|LIKE/.test(s))).toBe(true); expect(sqls.some(s => /FROM "sessions"/.test(s) && /ILIKE|LIKE/.test(s))).toBe(true); - // No BM25 in the new path - expect(sqls.some(s => s.includes("<#>"))).toBe(false); + expect(sqls.some(s => s.includes("<#>"))).toBe(true); expect(result.stdout).toContain("hello world"); expect(result.exitCode).toBe(0); }); diff --git a/claude-code/tests/knowledge-graph.test.ts b/claude-code/tests/knowledge-graph.test.ts new file mode 100644 index 0000000..f341bc0 --- /dev/null +++ b/claude-code/tests/knowledge-graph.test.ts @@ -0,0 +1,67 @@ +import { describe, expect, it } from "vitest"; +import { + buildGraphNodeId, + buildKnowledgeGraphPrompt, + parseGraphExtraction, + replaceSessionGraph, +} from "../../src/hooks/knowledge-graph.js"; + +describe("knowledge-graph", () => { + it("parses fenced JSON graph output", () => { + const graph = parseGraphExtraction(`\`\`\`json +{"nodes":[{"name":"Caroline","type":"person","summary":"Artist","aliases":["Caro"]}],"edges":[{"source":"Caroline","target":"Sweden","relation":"home_country","summary":"Caroline is from Sweden","evidence":"home country"}]} +\`\`\``); + expect(graph.nodes).toHaveLength(1); + expect(graph.edges).toHaveLength(1); + expect(graph.nodes[0].aliases).toEqual(["Caro"]); + expect(graph.edges[0].relation).toBe("home_country"); + }); + + it("uses stable canonical-name node ids", () => { + expect(buildGraphNodeId("Caroline")).toBe("entity:caroline"); + expect(buildGraphNodeId("Dr. Seuss")).toBe("entity:dr_seuss"); + }); + + it("builds a graph prompt with summary and source metadata", () => { + const prompt = buildKnowledgeGraphPrompt({ + summaryText: "# Session\n- **Source**: /sessions/x.json", + sessionId: "sess-1", + sourcePath: "/sessions/x.json", + project: "proj", + }); + expect(prompt).toContain("SESSION ID: sess-1"); + expect(prompt).toContain("SOURCE PATH: /sessions/x.json"); + expect(prompt).toContain("SUMMARY MARKDOWN:"); + }); + + it("replaces per-session node and edge rows using stable ids", async () => { + const calls: string[] = []; + const query = async (sql: string) => { + calls.push(sql); + return []; + }; + const result = await replaceSessionGraph({ + query, + nodesTable: "graph_nodes", + edgesTable: "graph_edges", + sessionId: "sess-1", + userName: "alice", + project: "proj", + agent: "claude_code", + sourcePath: "/sessions/alice/sess-1.jsonl", + graph: { + nodes: [{ name: "Caroline", type: "person", summary: "Artist", aliases: ["Caro"] }], + edges: [{ source: "Caroline", target: "Sweden", relation: "home_country", summary: "Caroline is from Sweden", evidence: "home country" }], + }, + ts: "2026-01-01T00:00:00.000Z", + }); + expect(result).toEqual({ nodes: 2, edges: 1 }); + expect(calls[0]).toContain('DELETE FROM "graph_nodes"'); + expect(calls[1]).toContain('DELETE FROM "graph_edges"'); + expect(calls[2]).toContain('INSERT INTO "graph_nodes"'); + expect(calls[2]).toContain("entity:caroline"); + expect(calls[2]).toContain("entity:sweden"); + expect(calls[3]).toContain('INSERT INTO "graph_edges"'); + expect(calls[3]).toContain("home_country"); + }); +}); diff --git a/claude-code/tests/memory-facts.test.ts b/claude-code/tests/memory-facts.test.ts new file mode 100644 index 0000000..a3da0b8 --- /dev/null +++ b/claude-code/tests/memory-facts.test.ts @@ -0,0 +1,97 @@ +import { describe, expect, it } from "vitest"; +import { + buildMemoryFactTranscript, + buildMemoryFactPrompt, + parseMemoryFactExtraction, + replaceSessionFacts, +} from "../../src/hooks/memory-facts.js"; + +describe("memory-facts", () => { + it("parses fenced JSON fact output and normalizes predicates", () => { + const extraction = parseMemoryFactExtraction(`\`\`\`json +{"facts":[{"subject":"Caroline","subject_type":"person","predicate":"Home Country","object":"Sweden","object_type":"place","summary":"Caroline's home country is Sweden","evidence":"home country","confidence":0.92}]} +\`\`\``); + expect(extraction.facts).toHaveLength(1); + expect(extraction.facts[0].predicate).toBe("home_country"); + expect(extraction.facts[0].confidence).toBe(0.92); + }); + + it("builds a transcript-backed fact prompt with source metadata", () => { + const prompt = buildMemoryFactPrompt({ + transcriptText: "[turn=1 | time=2023-05-07 | speaker=Caroline] I moved from Sweden.", + sessionId: "sess-1", + sourcePath: "/sessions/x.json", + project: "proj", + }); + expect(prompt).toContain("SESSION ID: sess-1"); + expect(prompt).toContain("SOURCE PATH: /sessions/x.json"); + expect(prompt).toContain("TRANSCRIPT ROWS:"); + expect(prompt).toContain("speaker=Caroline"); + }); + + it("formats transcript rows for fact extraction", () => { + const transcript = buildMemoryFactTranscript([ + { + turnIndex: 1, + speaker: "Caroline", + text: "I moved from Sweden four years ago.", + sourceDateTime: "2023-05-07", + }, + { + turnIndex: 2, + speaker: "Caroline", + text: "I'm a transgender woman.", + turnSummary: "Caroline shares her identity.", + }, + ]); + expect(transcript).toContain("[turn=1 | time=2023-05-07 | speaker=Caroline] I moved from Sweden four years ago."); + expect(transcript).toContain("summary: Caroline shares her identity."); + }); + + it("replaces per-session fact rows and upserts canonical entities", async () => { + const calls: string[] = []; + const query = async (sql: string) => { + calls.push(sql); + if (sql.includes('FROM "memory_entities"')) return []; + return []; + }; + const result = await replaceSessionFacts({ + query, + factsTable: "memory_facts", + entitiesTable: "memory_entities", + linksTable: "fact_entity_links", + sessionId: "sess-1", + userName: "alice", + project: "proj", + agent: "claude_code", + sourcePath: "/sessions/alice/sess-1.jsonl", + extraction: { + facts: [ + { + subject: "Caroline", + subjectType: "person", + predicate: "home_country", + object: "Sweden", + objectType: "place", + summary: "Caroline's home country is Sweden", + evidence: "home country", + confidence: 0.92, + }, + ], + }, + ts: "2026-01-01T00:00:00.000Z", + }); + expect(result).toEqual({ facts: 1, entities: 2, links: 2 }); + expect(calls[0]).toContain('DELETE FROM "memory_facts"'); + expect(calls[1]).toContain('DELETE FROM "fact_entity_links"'); + expect(calls.some((sql) => sql.includes('INSERT INTO "memory_entities"'))).toBe(true); + expect(calls.some((sql) => sql.includes('INSERT INTO "memory_facts"'))).toBe(true); + expect(calls.some((sql) => sql.includes('INSERT INTO "fact_entity_links"'))).toBe(true); + expect(calls.join("\n")).toContain("entity:caroline"); + expect(calls.join("\n")).toContain("entity:sweden"); + const linkInsert = calls.find((sql) => sql.includes('INSERT INTO "fact_entity_links"')); + expect(linkInsert).toContain("fact:sess_1:1:caroline:home_country:sweden"); + expect(linkInsert).toContain("'fact:sess_1:1:caroline:home_country:sweden', 'entity:caroline', 'subject'"); + expect(linkInsert).toContain("'fact:sess_1:1:caroline:home_country:sweden', 'entity:sweden', 'object'"); + }); +}); diff --git a/claude-code/tests/query-cache.test.ts b/claude-code/tests/query-cache.test.ts index 84f62a9..b14c08b 100644 --- a/claude-code/tests/query-cache.test.ts +++ b/claude-code/tests/query-cache.test.ts @@ -13,6 +13,7 @@ describe("query-cache", () => { const tempRoots: string[] = []; afterEach(() => { + vi.useRealTimers(); for (const root of tempRoots.splice(0)) { rmSync(root, { recursive: true, force: true }); } @@ -65,4 +66,16 @@ describe("query-cache", () => { }); expect(logFn).toHaveBeenCalledWith(expect.stringContaining("clear failed")); }); + + it("drops stale cached content instead of reusing it across long gaps", () => { + const cacheRoot = mkdtempSync(join(tmpdir(), "hivemind-query-cache-")); + tempRoots.push(cacheRoot); + + writeCachedIndexContent("session-3", "cached once", { cacheRoot }); + vi.useFakeTimers(); + vi.setSystemTime(Date.now() + (16 * 60 * 1000)); + + expect(readCachedIndexContent("session-3", { cacheRoot })).toBeNull(); + expect(readCachedIndexContent("session-3", { cacheRoot })).toBeNull(); + }); }); diff --git a/claude-code/tests/session-start.test.ts b/claude-code/tests/session-start.test.ts index 0d311cf..0b9ce43 100644 --- a/claude-code/tests/session-start.test.ts +++ b/claude-code/tests/session-start.test.ts @@ -137,6 +137,26 @@ describe("claude-code integration: session-start.js (sync hook)", () => { expect(ctx).toMatch(/Logged in to Deeplake|Not logged in to Deeplake/); }); + it("steers recall tasks toward index-first exact file reads", () => { + const raw = runHook("session-start.js", baseInput); + const parsed = JSON.parse(raw); + const ctx = parsed.hookSpecificOutput.additionalContext; + expect(ctx).toContain("Always read index.md first"); + expect(ctx).toContain("read that exact summary or session file directly"); + expect(ctx).toContain("Do NOT probe unrelated local paths"); + expect(ctx).toContain("answer with the smallest exact phrase supported by memory"); + expect(ctx).toContain("convert the final answer into an absolute month/date/year"); + }); + + it("switches to summary-first guidance when index is disabled", () => { + const raw = runHook("session-start.js", baseInput, { HIVEMIND_DISABLE_INDEX: "1" }); + const parsed = JSON.parse(raw); + const ctx = parsed.hookSpecificOutput.additionalContext; + expect(ctx).toContain("/index.md is intentionally unavailable"); + expect(ctx).toContain("Start by grepping summaries"); + expect(ctx).not.toContain("Always read index.md first"); + }); + it("completes within 3s with no credentials (no server calls)", () => { const start = Date.now(); runHook("session-start.js", baseInput); diff --git a/claude-code/tests/session-summary.test.ts b/claude-code/tests/session-summary.test.ts index 09f123a..cc776f5 100644 --- a/claude-code/tests/session-summary.test.ts +++ b/claude-code/tests/session-summary.test.ts @@ -1,5 +1,6 @@ import { describe, it, expect, beforeEach, vi } from "vitest"; import { DeeplakeFs, guessMime } from "../../src/shell/deeplake-fs.js"; +import { extractDescription } from "../../src/hooks/upload-summary.js"; // ── Mock client (same pattern as deeplake-fs.test.ts) ──────────────────────── type Row = { @@ -33,11 +34,11 @@ function makeClient(seed: Record = {}) { const row = match ? rows.find(r => r.path === match[1]) : undefined; return row ? [{ summary: row.summary }] : []; } - if (sql.includes("SELECT path, project, description, creation_date, last_update_date")) { + if (sql.includes("SELECT path, project, description, summary, creation_date")) { return rows .filter(r => r.path.startsWith("/summaries/")) .map(r => ({ - path: r.path, project: r.project, description: r.description, + path: r.path, project: r.project, description: r.description, summary: r.summary, creation_date: r.creation_date, last_update_date: r.last_update_date, })); } @@ -186,7 +187,7 @@ async function uploadSummary( const summaryPath = `/summaries/${userName}/${sessionId}.md`; await fs.writeFileWithMeta(summaryPath, summaryContent, { project: projectName, - description: summaryContent.match(/## What Happened\n([\s\S]*?)(?=\n##|$)/)?.[1]?.trim().slice(0, 80) ?? "completed", + description: extractDescription(summaryContent), lastUpdateDate: new Date().toISOString(), }); await fs.flush(); @@ -302,7 +303,7 @@ describe("session summary — resumed sessions update last_update_date", () => { // last_update_date must have changed expect(rowAfterEnd.last_update_date).not.toBe(initialDate); // description must be extracted from What Happened section - expect(rowAfterEnd.description).toBe("Fixed authentication bug in the login flow. Added retry logic for token refresh."); + expect(rowAfterEnd.description).toBe("Auth tokens now refresh automatically"); // content must be the full summary expect(rowAfterEnd.summary).toContain("## What Happened"); expect(rowAfterEnd.summary).toContain("## Key Facts"); diff --git a/claude-code/tests/sessions-table.test.ts b/claude-code/tests/sessions-table.test.ts index 40a254f..c1d0553 100644 --- a/claude-code/tests/sessions-table.test.ts +++ b/claude-code/tests/sessions-table.test.ts @@ -77,6 +77,32 @@ function makeClient(memoryRows: Row[] = [], sessionRows: Row[] = []) { // ── Tests ─────────────────────────────────────────────────────────────────── describe("DeeplakeFs — sessions table multi-row read", () => { + it("reads transcript-shaped session files as canonical pretty JSON", async () => { + const transcript = { + conversation_id: 0, + session_number: 6, + turns: [ + { dia_id: "D6:1", speaker: "Melanie", text: "Charlotte's Web was my favorite book as a kid." }, + { dia_id: "D6:2", speaker: "Caroline", text: "I can recommend another book if you want." }, + ], + }; + const sessionRows: Row[] = [ + { + path: "/sessions/conv_0_session_6.json", + text_content: JSON.stringify(transcript), + size_bytes: 128, + mime_type: "application/json", + creation_date: "2026-01-01T00:00:01Z", + }, + ]; + + const client = makeClient([], sessionRows); + const fs = await DeeplakeFs.create(client as never, "memory", "/", "sessions"); + + const content = await fs.readFile("/sessions/conv_0_session_6.json"); + expect(content).toBe(`${JSON.stringify(transcript, null, 2)}\n`); + }); + it("reads session file by normalizing rows ordered by creation_date", async () => { const sessionRows: Row[] = [ { path: "/sessions/alice/alice_org_default_s1.jsonl", text_content: '{"type":"user_message","content":"hello"}', size_bytes: 40, mime_type: "application/json", creation_date: "2026-01-01T00:00:01Z" }, diff --git a/claude-code/tests/summary-format.test.ts b/claude-code/tests/summary-format.test.ts new file mode 100644 index 0000000..afa7f9c --- /dev/null +++ b/claude-code/tests/summary-format.test.ts @@ -0,0 +1,69 @@ +import { describe, expect, it } from "vitest"; +import { + buildSummaryBlurb, + buildSummaryIndexLine, + extractHeaderField, + extractSection, + extractSummaryDate, + extractSummaryParticipants, + extractSummarySource, + extractSummaryTopics, +} from "../../src/utils/summary-format.js"; + +const SUMMARY = `# Session conv_0_session_10 +- **Source**: /sessions/conv_0_session_10.json +- **Date**: 8:56 pm on 20 July, 2023 +- **Participants**: Caroline, Melanie +- **Project**: locomo +- **Topics**: LGBTQ activism, family summer traditions + +## What Happened +Caroline and Melanie talked about activism, family trips, and a recent child milestone. + +## Searchable Facts +- Caroline joined Connected LGBTQ Activists last Tuesday. +- Melanie's family takes an annual summer camping trip. +- Melanie's youngest child recently took her first steps. +`; + +describe("summary-format", () => { + it("extracts header fields and sections", () => { + expect(extractHeaderField(SUMMARY, "Source")).toBe("/sessions/conv_0_session_10.json"); + expect(extractSection(SUMMARY, "What Happened")).toContain("activism"); + }); + + it("extracts common summary metadata", () => { + expect(extractSummaryDate(SUMMARY)).toBe("8:56 pm on 20 July, 2023"); + expect(extractSummaryParticipants(SUMMARY)).toBe("Caroline, Melanie"); + expect(extractSummaryTopics(SUMMARY)).toBe("LGBTQ activism, family summer traditions"); + expect(extractSummarySource(SUMMARY)).toBe("/sessions/conv_0_session_10.json"); + }); + + it("builds a searchable blurb from participants, topics, and facts", () => { + const blurb = buildSummaryBlurb(SUMMARY); + expect(blurb).toContain("Caroline, Melanie"); + expect(blurb).toContain("Connected LGBTQ Activists"); + expect(blurb).not.toContain("## Searchable Facts"); + }); + + it("builds an index line with source and metadata", () => { + const line = buildSummaryIndexLine({ + path: "/summaries/locomo/conv_0_session_10_summary.md", + project: "locomo", + description: "fallback description", + summary: SUMMARY, + creation_date: "2026-04-18T00:00:00.000Z", + last_update_date: "2026-04-18T13:45:00.000Z", + }); + + expect(line).toContain("conv_0_session_10_summary.md"); + expect(line).toContain("8:56 pm on 20 July, 2023"); + expect(line).toContain("Caroline, Melanie"); + expect(line).toContain("[session](/sessions/conv_0_session_10.json)"); + expect(line).toContain("updated: 2026-04-18 13:45 UTC"); + }); + + it("returns null for rows without a path", () => { + expect(buildSummaryIndexLine({})).toBeNull(); + }); +}); diff --git a/claude-code/tests/upload-summary.test.ts b/claude-code/tests/upload-summary.test.ts index 56eb0e9..7d836b3 100644 --- a/claude-code/tests/upload-summary.test.ts +++ b/claude-code/tests/upload-summary.test.ts @@ -24,6 +24,22 @@ All ten commands executed successfully. **test-project** (directory) — working directory `; +const TEXT_WITH_STRUCTURED_FACTS = `# Session conv_0_session_10 +- **Source**: /sessions/conv_0_session_10.json +- **Date**: 8:56 pm on 20 July, 2023 +- **Participants**: Caroline, Melanie +- **Project**: locomo +- **Topics**: LGBTQ activism, family summer traditions + +## What Happened +Caroline and Melanie talked about activism, family trips, and recent milestones. + +## Searchable Facts +- Caroline joined Connected LGBTQ Activists last Tuesday. +- Melanie's family takes an annual summer camping trip. +- Melanie's youngest child recently took her first steps. +`; + function makeSpyQuery(responses: Array>> = [[]]): { fn: QueryFn; calls: string[] } { const calls: string[] = []; let i = 0; @@ -134,12 +150,20 @@ describe("uploadSummary — Deeplake single-UPDATE invariant", () => { }); describe("extractDescription", () => { - it("extracts the What Happened section trimmed to 300 chars", () => { + it("falls back to the What Happened section when no richer structure exists", () => { const d = extractDescription(TEXT_WITH_WHAT_HAPPENED); expect(d.startsWith("User ran diagnostic commands")).toBe(true); expect(d.length).toBeLessThanOrEqual(300); }); + it("prefers participants, topics, and searchable facts when present", () => { + const d = extractDescription(TEXT_WITH_STRUCTURED_FACTS); + expect(d).toContain("Caroline, Melanie"); + expect(d).toContain("LGBTQ activism, family summer traditions"); + expect(d).toContain("Connected LGBTQ Activists"); + expect(d).not.toContain("## Searchable Facts"); + }); + it("returns 'completed' when the section is absent", () => { expect(extractDescription("# Only header, nothing else.")).toBe("completed"); }); diff --git a/claude-code/tests/virtual-table-query.test.ts b/claude-code/tests/virtual-table-query.test.ts index 013c6c0..adeb685 100644 --- a/claude-code/tests/virtual-table-query.test.ts +++ b/claude-code/tests/virtual-table-query.test.ts @@ -16,10 +16,23 @@ describe("virtual-table-query", () => { project: "repo", description: "session summary", creation_date: "2026-01-01T00:00:00.000Z", + summary: `# Session s1 +- **Source**: /sessions/a/s1.jsonl +- **Date**: 2026-01-01 +- **Participants**: Alice, Bob +- **Topics**: auth, retries + +## Searchable Facts +- Auth tokens refresh automatically. +`, }, ]); expect(content).toContain("# Memory Index"); - expect(content).toContain("/summaries/alice/s1.md"); + expect(content).toContain("## People"); + expect(content).toContain("## Summary To Session Catalog"); + expect(content).toContain("s1.md"); + expect(content).toContain("Alice, Bob"); + expect(content).toContain("[session](/sessions/a/s1.jsonl)"); }); it("builds index rows when project metadata is missing", () => { @@ -28,7 +41,7 @@ describe("virtual-table-query", () => { path: "/summaries/alice/s2.md", }, ]); - expect(content).toContain("/summaries/alice/s2.md"); + expect(content).toContain("s2.md"); expect(content).toContain("# Memory Index"); }); @@ -54,17 +67,36 @@ describe("virtual-table-query", () => { expect(api.query).not.toHaveBeenCalled(); }); - it("normalizes session rows for exact path reads", async () => { + it("pretty-prints transcript session rows for exact path reads", async () => { const api = { query: vi.fn().mockResolvedValueOnce([ - { path: "/sessions/a.jsonl", content: "{\"type\":\"user_message\",\"content\":\"hello\"}", source_order: 1 }, - { path: "/sessions/a.jsonl", content: "{\"type\":\"assistant_message\",\"content\":\"hi\"}", source_order: 1 }, + { + path: "/sessions/a.json", + content: "{\"conversation_id\":0,\"session_number\":1,\"turns\":[{\"speaker\":\"Caroline\",\"text\":\"hello\"},{\"speaker\":\"Melanie\",\"text\":\"hi\"}]}", + source_order: 1, + }, ]), } as any; - const content = await readVirtualPathContent(api, "memory", "sessions", "/sessions/a.jsonl"); - - expect(content).toBe("[user] hello\n[assistant] hi"); + const content = await readVirtualPathContent(api, "memory", "sessions", "/sessions/a.json"); + + expect(content).toBe([ + "{", + " \"conversation_id\": 0,", + " \"session_number\": 1,", + " \"turns\": [", + " {", + " \"speaker\": \"Caroline\",", + " \"text\": \"hello\"", + " },", + " {", + " \"speaker\": \"Melanie\",", + " \"text\": \"hi\"", + " }", + " ]", + "}", + "", + ].join("\n")); }); it("reads multiple exact paths in a single query and synthesizes /index.md when needed", async () => { @@ -79,6 +111,11 @@ describe("virtual-table-query", () => { project: "repo", description: "session summary", creation_date: "2026-01-01T00:00:00.000Z", + summary: `# Session s1 +- **Source**: /sessions/a/s1.jsonl +- **Date**: 2026-01-01 +- **Participants**: Alice, Bob +`, }, ]) .mockResolvedValueOnce([]), @@ -92,6 +129,66 @@ describe("virtual-table-query", () => { expect(api.query).toHaveBeenCalledTimes(3); }); + it("skips memory and does not synthesize /index.md in sessions-only mode", async () => { + const prev = process.env.HIVEMIND_SESSIONS_ONLY; + process.env.HIVEMIND_SESSIONS_ONLY = "1"; + try { + const api = { + query: vi.fn().mockResolvedValueOnce([ + { + path: "/sessions/a.json", + content: "{\"conversation_id\":0,\"turns\":[{\"speaker\":\"Caroline\",\"text\":\"hello\"}]}", + source_order: 1, + creation_date: "", + }, + ]), + } as any; + + const content = await readVirtualPathContents(api, "memory", "sessions", ["/sessions/a.json", "/index.md"]); + + expect(content.get("/sessions/a.json")).toBe([ + "{", + " \"conversation_id\": 0,", + " \"turns\": [", + " {", + " \"speaker\": \"Caroline\",", + " \"text\": \"hello\"", + " }", + " ]", + "}", + "", + ].join("\n")); + expect(content.get("/index.md")).toBeNull(); + expect(api.query).toHaveBeenCalledTimes(1); + expect(String(api.query.mock.calls[0]?.[0])).not.toContain('FROM "memory"'); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_SESSIONS_ONLY; + else process.env.HIVEMIND_SESSIONS_ONLY = prev; + } + }); + + it("does not synthesize /index.md when index is disabled but still reads summaries", async () => { + const prev = process.env.HIVEMIND_DISABLE_INDEX; + process.env.HIVEMIND_DISABLE_INDEX = "1"; + try { + const api = { + query: vi.fn().mockResolvedValueOnce([ + { path: "/summaries/a.md", content: "summary body", source_order: 0 }, + ]), + } as any; + + const content = await readVirtualPathContents(api, "memory", "sessions", ["/summaries/a.md", "/index.md"]); + + expect(content.get("/summaries/a.md")).toBe("summary body"); + expect(content.get("/index.md")).toBeNull(); + expect(api.query).toHaveBeenCalledTimes(1); + expect(String(api.query.mock.calls[0]?.[0])).not.toContain("'/index.md'"); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_DISABLE_INDEX; + else process.env.HIVEMIND_DISABLE_INDEX = prev; + } + }); + it("ignores invalid exact-read rows before merging content", async () => { const api = { query: vi.fn().mockResolvedValueOnce([ diff --git a/codex/bundle/capture.js b/codex/bundle/capture.js index 67b7919..df45765 100755 --- a/codex/bundle/capture.js +++ b/codex/bundle/capture.js @@ -2,13 +2,13 @@ // dist/src/utils/stdin.js function readStdin() { - return new Promise((resolve, reject) => { + return new Promise((resolve2, reject) => { let data = ""; process.stdin.setEncoding("utf-8"); process.stdin.on("data", (chunk) => data += chunk); process.stdin.on("end", () => { try { - resolve(JSON.parse(data)); + resolve2(JSON.parse(data)); } catch (err) { reject(new Error(`Failed to parse hook input: ${err}`)); } @@ -49,25 +49,21 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } -// dist/src/deeplake-api.js -import { randomUUID } from "node:crypto"; -import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs"; -import { join as join3 } from "node:path"; -import { tmpdir } from "node:os"; - // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; import { join as join2 } from "node:path"; import { homedir as homedir2 } from "node:os"; var DEBUG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; var LOG = join2(homedir2(), ".deeplake", "hook-debug.log"); -function utcTimestamp(d = /* @__PURE__ */ new Date()) { - return d.toISOString().replace("T", " ").slice(0, 19) + " UTC"; -} function log(tag, msg) { if (!DEBUG) return; @@ -75,364 +71,52 @@ function log(tag, msg) { `); } -// dist/src/utils/sql.js -function sqlStr(value) { - return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); -} - -// dist/src/deeplake-api.js -var log2 = (msg) => log("sdk", msg); -function summarizeSql(sql, maxLen = 220) { - const compact = sql.replace(/\s+/g, " ").trim(); - return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; -} -function traceSql(msg) { - const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; - if (!traceEnabled) - return; - process.stderr.write(`[deeplake-sql] ${msg} -`); - const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; - if (debugFileLog) - log2(msg); -} -var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); -var MAX_RETRIES = 3; -var BASE_DELAY_MS = 500; -var MAX_CONCURRENCY = 5; -var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); -var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} -function isTimeoutError(error) { - const name = error instanceof Error ? error.name.toLowerCase() : ""; - const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); - return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); -} -function isDuplicateIndexError(error) { - const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); - return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); -} -function isSessionInsertQuery(sql) { - return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); -} -function isTransientHtml403(text) { - const body = text.toLowerCase(); - return body.includes(" this.waiting.push(resolve)); - } - release() { - this.active--; - const next = this.waiting.shift(); - if (next) { - this.active++; - next(); - } - } -}; -var DeeplakeApi = class { - token; - apiUrl; - orgId; - workspaceId; - tableName; - _pendingRows = []; - _sem = new Semaphore(MAX_CONCURRENCY); - _tablesCache = null; - constructor(token, apiUrl, orgId, workspaceId, tableName) { - this.token = token; - this.apiUrl = apiUrl; - this.orgId = orgId; - this.workspaceId = workspaceId; - this.tableName = tableName; - } - /** Execute SQL with retry on transient errors and bounded concurrency. */ - async query(sql) { - const startedAt = Date.now(); - const summary = summarizeSql(sql); - traceSql(`query start: ${summary}`); - await this._sem.acquire(); - try { - const rows = await this._queryWithRetry(sql); - traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); - return rows; - } catch (e) { - const message = e instanceof Error ? e.message : String(e); - traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); - throw e; - } finally { - this._sem.release(); - } - } - async _queryWithRetry(sql) { - let lastError; - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - let resp; - try { - const signal = AbortSignal.timeout(QUERY_TIMEOUT_MS); - resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { - method: "POST", - headers: { - Authorization: `Bearer ${this.token}`, - "Content-Type": "application/json", - "X-Activeloop-Org-Id": this.orgId - }, - signal, - body: JSON.stringify({ query: sql }) - }); - } catch (e) { - if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); - throw lastError; - } - lastError = e instanceof Error ? e : new Error(String(e)); - if (attempt < MAX_RETRIES) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw lastError; - } - if (resp.ok) { - const raw = await resp.json(); - if (!raw?.rows || !raw?.columns) - return []; - return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); - } - const text = await resp.text().catch(() => ""); - const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); - if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); - } - throw lastError ?? new Error("Query failed: max retries exceeded"); - } - // ── Writes ────────────────────────────────────────────────────────────────── - /** Queue rows for writing. Call commit() to flush. */ - appendRows(rows) { - this._pendingRows.push(...rows); - } - /** Flush pending rows via SQL. */ - async commit() { - if (this._pendingRows.length === 0) - return; - const rows = this._pendingRows; - this._pendingRows = []; - const CONCURRENCY = 10; - for (let i = 0; i < rows.length; i += CONCURRENCY) { - const chunk = rows.slice(i, i + CONCURRENCY); - await Promise.allSettled(chunk.map((r) => this.upsertRowSql(r))); - } - log2(`commit: ${rows.length} rows`); - } - async upsertRowSql(row) { - const ts = (/* @__PURE__ */ new Date()).toISOString(); - const cd = row.creationDate ?? ts; - const lud = row.lastUpdateDate ?? ts; - const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`); - if (exists.length > 0) { - let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`; - if (row.project !== void 0) - setClauses += `, project = '${sqlStr(row.project)}'`; - if (row.description !== void 0) - setClauses += `, description = '${sqlStr(row.description)}'`; - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`); - } else { - const id = randomUUID(); - let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date"; - let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`; - if (row.project !== void 0) { - cols += ", project"; - vals += `, '${sqlStr(row.project)}'`; - } - if (row.description !== void 0) { - cols += ", description"; - vals += `, '${sqlStr(row.description)}'`; - } - await this.query(`INSERT INTO "${this.tableName}" (${cols}) VALUES (${vals})`); - } - } - /** Update specific columns on a row by path. */ - async updateColumns(path, columns) { - const setClauses = Object.entries(columns).map(([col, val]) => typeof val === "number" ? `${col} = ${val}` : `${col} = '${sqlStr(String(val))}'`).join(", "); - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(path)}'`); - } - // ── Convenience ───────────────────────────────────────────────────────────── - /** Create a BM25 search index on a column. */ - async createIndex(column) { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); - } - buildLookupIndexName(table, suffix) { - return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); - } - getLookupIndexMarkerPath(table, suffix) { - const markerKey = [ - this.workspaceId, - this.orgId, - table, - suffix - ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); - return join3(getIndexMarkerDir(), `${markerKey}.json`); - } - hasFreshLookupIndexMarker(table, suffix) { - const markerPath = this.getLookupIndexMarkerPath(table, suffix); - if (!existsSync2(markerPath)) - return false; - try { - const raw = JSON.parse(readFileSync2(markerPath, "utf-8")); - const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; - if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) - return false; - return true; - } catch { - return false; - } - } - markLookupIndexReady(table, suffix) { - mkdirSync(getIndexMarkerDir(), { recursive: true }); - writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); - } - async ensureLookupIndex(table, suffix, columnsSql) { - if (this.hasFreshLookupIndexMarker(table, suffix)) - return; - const indexName = this.buildLookupIndexName(table, suffix); - try { - await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); - this.markLookupIndexReady(table, suffix); - } catch (e) { - if (isDuplicateIndexError(e)) { - this.markLookupIndexReady(table, suffix); - return; - } - log2(`index "${indexName}" skipped: ${e.message}`); - } - } - /** List all tables in the workspace (with retry). */ - async listTables(forceRefresh = false) { - if (!forceRefresh && this._tablesCache) - return [...this._tablesCache]; - const { tables, cacheable } = await this._fetchTables(); - if (cacheable) - this._tablesCache = [...tables]; - return tables; - } - async _fetchTables() { - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - try { - const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { - headers: { - Authorization: `Bearer ${this.token}`, - "X-Activeloop-Org-Id": this.orgId - } - }); - if (resp.ok) { - const data = await resp.json(); - return { - tables: (data.tables ?? []).map((t) => t.table_name), - cacheable: true - }; - } - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); - continue; - } - return { tables: [], cacheable: false }; - } catch { - if (attempt < MAX_RETRIES) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); - continue; - } - return { tables: [], cacheable: false }; - } - } - return { tables: [], cacheable: false }; - } - /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ - async ensureTable(name) { - const tbl = name ?? this.tableName; - const tables = await this.listTables(); - if (!tables.includes(tbl)) { - log2(`table "${tbl}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${tbl}" created`); - if (!tables.includes(tbl)) - this._tablesCache = [...tables, tbl]; - } - } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ - async ensureSessionsTable(name) { - const tables = await this.listTables(); - if (!tables.includes(name)) { - log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${name}" created`); - if (!tables.includes(name)) - this._tablesCache = [...tables, name]; - } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); +// dist/src/utils/direct-run.js +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +function isDirectRun(metaUrl) { + const entry = process.argv[1]; + if (!entry) + return false; + try { + return resolve(fileURLToPath(metaUrl)) === resolve(entry); + } catch { + return false; } -}; - -// dist/src/utils/session-path.js -function buildSessionPath(config, sessionId) { - const workspace = config.workspaceId ?? "default"; - return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${workspace}_${sessionId}.jsonl`; } // dist/src/hooks/summary-state.js -import { readFileSync as readFileSync3, writeFileSync as writeFileSync2, writeSync, mkdirSync as mkdirSync2, renameSync, existsSync as existsSync3, unlinkSync, openSync, closeSync } from "node:fs"; +import { readFileSync as readFileSync2, writeFileSync, writeSync, mkdirSync, renameSync, existsSync as existsSync2, unlinkSync, openSync, closeSync } from "node:fs"; import { homedir as homedir3 } from "node:os"; -import { join as join4 } from "node:path"; +import { join as join3 } from "node:path"; var dlog = (msg) => log("summary-state", msg); -var STATE_DIR = join4(homedir3(), ".claude", "hooks", "summary-state"); +var STATE_DIR = join3(homedir3(), ".claude", "hooks", "summary-state"); var YIELD_BUF = new Int32Array(new SharedArrayBuffer(4)); function statePath(sessionId) { - return join4(STATE_DIR, `${sessionId}.json`); + return join3(STATE_DIR, `${sessionId}.json`); } function lockPath(sessionId) { - return join4(STATE_DIR, `${sessionId}.lock`); + return join3(STATE_DIR, `${sessionId}.lock`); } function readState(sessionId) { const p = statePath(sessionId); - if (!existsSync3(p)) + if (!existsSync2(p)) return null; try { - return JSON.parse(readFileSync3(p, "utf-8")); + return JSON.parse(readFileSync2(p, "utf-8")); } catch { return null; } } function writeState(sessionId, state) { - mkdirSync2(STATE_DIR, { recursive: true }); + mkdirSync(STATE_DIR, { recursive: true }); const p = statePath(sessionId); const tmp = `${p}.${process.pid}.${Date.now()}.tmp`; - writeFileSync2(tmp, JSON.stringify(state)); + writeFileSync(tmp, JSON.stringify(state)); renameSync(tmp, p); } function withRmwLock(sessionId, fn) { - mkdirSync2(STATE_DIR, { recursive: true }); + mkdirSync(STATE_DIR, { recursive: true }); const rmwLock = statePath(sessionId) + ".rmw"; const deadline = Date.now() + 2e3; let fd = null; @@ -494,11 +178,11 @@ function shouldTrigger(state, cfg, now = Date.now()) { return false; } function tryAcquireLock(sessionId, maxAgeMs = 10 * 60 * 1e3) { - mkdirSync2(STATE_DIR, { recursive: true }); + mkdirSync(STATE_DIR, { recursive: true }); const p = lockPath(sessionId); - if (existsSync3(p)) { + if (existsSync2(p)) { try { - const ageMs = Date.now() - parseInt(readFileSync3(p, "utf-8"), 10); + const ageMs = Date.now() - parseInt(readFileSync2(p, "utf-8"), 10); if (Number.isFinite(ageMs) && ageMs < maxAgeMs) return false; } catch (readErr) { @@ -525,46 +209,75 @@ function tryAcquireLock(sessionId, maxAgeMs = 10 * 60 * 1e3) { throw e; } } -function releaseLock(sessionId) { - try { - unlinkSync(lockPath(sessionId)); - } catch (e) { - if (e?.code !== "ENOENT") { - dlog(`releaseLock unlink failed for ${sessionId}: ${e.message}`); - } - } -} // dist/src/hooks/codex/spawn-wiki-worker.js import { spawn, execSync } from "node:child_process"; -import { fileURLToPath } from "node:url"; -import { dirname, join as join6 } from "node:path"; -import { writeFileSync as writeFileSync3, mkdirSync as mkdirSync4 } from "node:fs"; -import { homedir as homedir4, tmpdir as tmpdir2 } from "node:os"; - -// dist/src/utils/wiki-log.js -import { mkdirSync as mkdirSync3, appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join5 } from "node:path"; -function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { - const path = join5(hooksDir, filename); - return { - path, - log(msg) { - try { - mkdirSync3(hooksDir, { recursive: true }); - appendFileSync2(path, `[${utcTimestamp()}] ${msg} -`); - } catch { - } - } - }; -} +import { fileURLToPath as fileURLToPath2 } from "node:url"; +import { dirname, join as join4 } from "node:path"; +import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs"; +import { homedir as homedir4, tmpdir } from "node:os"; + +// dist/src/hooks/knowledge-graph.js +import { randomUUID as randomUUID2 } from "node:crypto"; + +// dist/src/hooks/upload-summary.js +import { randomUUID } from "node:crypto"; + +// dist/src/hooks/knowledge-graph.js +var GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; + +// dist/src/hooks/memory-facts.js +import { randomUUID as randomUUID3 } from "node:crypto"; +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; // dist/src/hooks/codex/spawn-wiki-worker.js var HOME = homedir4(); -var wikiLogger = makeWikiLogger(join6(HOME, ".codex", "hooks")); -var WIKI_LOG = wikiLogger.path; -var WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge \u2014 entities, decisions, relationships, and facts \u2014 into a structured, searchable wiki entry. +var WIKI_LOG = join4(HOME, ".codex", "hooks", "deeplake-wiki.log"); +var WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. + +The session may be a coding session, a meeting, or a personal conversation. Your job is to turn the raw transcript into a dense, factual wiki page that preserves names, dates, relationships, preferences, plans, titles, and exact status changes. SESSION JSONL path: __JSONL__ SUMMARY FILE to write: __SUMMARY__ @@ -578,42 +291,59 @@ Steps: - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first, then focus on lines AFTER the offset for new content. Merge new facts into the existing summary. - If offset is 0, generate from scratch. + - Treat the JSONL as the source of truth. Do not invent facts. 2. Write the summary file at the path above with this EXACT format: # Session __SESSION_ID__ - **Source**: __JSONL_SERVER_PATH__ +- **Date**: +- **Participants**: - **Started**: - **Ended**: - **Project**: __PROJECT__ +- **Topics**: - **JSONL offset**: __JSONL_LINES__ ## What Happened -<2-3 dense sentences. What was the goal, what was accomplished, what's left.> +<2-4 dense sentences. What happened, why it mattered, and what changed. Prefer specific names/titles/dates over abstractions.> + +## Searchable Facts + ## People - + ## Entities - + ## Decisions & Reasoning - - -## Key Facts - + ## Files Modified - + ## Open Questions / TODO - + -IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. +IMPORTANT: +- Be exhaustive. If a detail exists in the session and could answer a later question, it should be in the wiki. +- Favor exact nouns and titles over generic paraphrases. Preserve exact book names, organization names, file names, feature names, and self-descriptions. +- Keep facts canonical and query-friendly: "Ava is single", "Leo's home country is Brazil", "The team chose retries because the API returned 429s". +- Resolve relative dates like "last year" or "next month" against the session's own date when the source makes that possible. If it is ambiguous, keep the relative phrase instead of guessing. +- Do not omit beneficiary groups or targets of goals. PRIVACY: Never include absolute filesystem paths in the summary. LENGTH LIMIT: Keep the total summary under 4000 characters.`; -var wikiLog = wikiLogger.log; +function wikiLog(msg) { + try { + mkdirSync2(join4(HOME, ".codex", "hooks"), { recursive: true }); + appendFileSync2(WIKI_LOG, `[${(/* @__PURE__ */ new Date()).toISOString().replace("T", " ").slice(0, 19)}] ${msg} +`); + } catch { + } +} function findCodexBin() { try { return execSync("which codex 2>/dev/null", { encoding: "utf-8" }).trim(); @@ -624,27 +354,34 @@ function findCodexBin() { function spawnCodexWikiWorker(opts) { const { config, sessionId, cwd, bundleDir, reason } = opts; const projectName = cwd.split("/").pop() || "unknown"; - const tmpDir = join6(tmpdir2(), `deeplake-wiki-${sessionId}-${Date.now()}`); - mkdirSync4(tmpDir, { recursive: true }); - const configFile = join6(tmpDir, "config.json"); - writeFileSync3(configFile, JSON.stringify({ + const tmpDir = join4(tmpdir(), `deeplake-wiki-${sessionId}-${Date.now()}`); + mkdirSync2(tmpDir, { recursive: true }); + const configFile = join4(tmpDir, "config.json"); + writeFileSync2(configFile, JSON.stringify({ apiUrl: config.apiUrl, token: config.token, orgId: config.orgId, workspaceId: config.workspaceId, memoryTable: config.tableName, sessionsTable: config.sessionsTableName, + graphNodesTable: config.graphNodesTableName, + graphEdgesTable: config.graphEdgesTableName, + factsTable: config.factsTableName, + entitiesTable: config.entitiesTableName, + factEntityLinksTable: config.factEntityLinksTableName, sessionId, userName: config.userName, project: projectName, tmpDir, codexBin: findCodexBin(), wikiLog: WIKI_LOG, - hooksDir: join6(HOME, ".codex", "hooks"), - promptTemplate: WIKI_PROMPT_TEMPLATE + hooksDir: join4(HOME, ".codex", "hooks"), + promptTemplate: WIKI_PROMPT_TEMPLATE, + graphPromptTemplate: GRAPH_PROMPT_TEMPLATE, + factPromptTemplate: MEMORY_FACT_PROMPT_TEMPLATE })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); - const workerPath = join6(bundleDir, "wiki-worker.js"); + const workerPath = join4(bundleDir, "wiki-worker.js"); spawn("nohup", ["node", workerPath, configFile], { detached: true, stdio: ["ignore", "ignore", "ignore"] @@ -652,24 +389,132 @@ function spawnCodexWikiWorker(opts) { wikiLog(`${reason}: spawned summary worker for ${sessionId}`); } function bundleDirFromImportMeta(importMetaUrl) { - return dirname(fileURLToPath(importMetaUrl)); + return dirname(fileURLToPath2(importMetaUrl)); +} + +// dist/src/hooks/session-queue.js +import { appendFileSync as appendFileSync3, closeSync as closeSync2, existsSync as existsSync3, mkdirSync as mkdirSync3, openSync as openSync2, readFileSync as readFileSync3, readdirSync, renameSync as renameSync2, rmSync, statSync, writeFileSync as writeFileSync3 } from "node:fs"; +import { dirname as dirname2, join as join5 } from "node:path"; +import { homedir as homedir5 } from "node:os"; +var DEFAULT_QUEUE_DIR = join5(homedir5(), ".deeplake", "queue"); +var DEFAULT_AUTH_FAILURE_TTL_MS = 5 * 6e4; +function buildSessionPath(config, sessionId) { + return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${config.workspaceId}_${sessionId}.jsonl`; +} +function buildQueuedSessionRow(args) { + const structured = extractStructuredSessionFields(args.line, args.sessionId); + return { + id: crypto.randomUUID(), + path: args.sessionPath, + filename: args.sessionPath.split("/").pop() ?? "", + message: args.line, + sessionId: structured.sessionId, + eventType: structured.eventType, + turnIndex: structured.turnIndex, + diaId: structured.diaId, + speaker: structured.speaker, + text: structured.text, + turnSummary: structured.turnSummary, + sourceDateTime: structured.sourceDateTime, + author: args.userName, + sizeBytes: Buffer.byteLength(args.line, "utf-8"), + project: args.projectName, + description: args.description, + agent: args.agent, + creationDate: args.timestamp, + lastUpdateDate: args.timestamp + }; +} +function appendQueuedSessionRow(row, queueDir = DEFAULT_QUEUE_DIR) { + mkdirSync3(queueDir, { recursive: true }); + const sessionId = extractSessionId(row.path); + const queuePath = getQueuePath(queueDir, sessionId); + appendFileSync3(queuePath, `${JSON.stringify(row)} +`); + return queuePath; +} +function extractString(value) { + return typeof value === "string" ? value : value == null ? "" : String(value); +} +function extractNumber(value) { + if (typeof value === "number" && Number.isFinite(value)) + return value; + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) + return parsed; + } + return 0; +} +function extractStructuredSessionFields(message, fallbackSessionId = "") { + let parsed = null; + try { + const raw = JSON.parse(message); + if (raw && typeof raw === "object") + parsed = raw; + } catch { + parsed = null; + } + if (!parsed) { + return { + sessionId: fallbackSessionId, + eventType: "raw_message", + turnIndex: 0, + diaId: "", + speaker: "", + text: message, + turnSummary: "", + sourceDateTime: "" + }; + } + const eventType = extractString(parsed["type"]); + const content = extractString(parsed["content"]); + const toolName = extractString(parsed["tool_name"]); + const speaker = extractString(parsed["speaker"]) || (eventType === "user_message" ? "user" : eventType === "assistant_message" ? "assistant" : ""); + const text = extractString(parsed["text"]) || content || (eventType === "tool_call" ? toolName : ""); + return { + sessionId: extractString(parsed["session_id"]) || fallbackSessionId, + eventType, + turnIndex: extractNumber(parsed["turn_index"]), + diaId: extractString(parsed["dia_id"]), + speaker, + text, + turnSummary: extractString(parsed["summary"]) || extractString(parsed["message_summary"]) || extractString(parsed["msg_summary"]), + sourceDateTime: extractString(parsed["source_date_time"]) || extractString(parsed["date_time"]) || extractString(parsed["date"]) + }; +} +function getQueuePath(queueDir, sessionId) { + return join5(queueDir, `${sessionId}.jsonl`); +} +function extractSessionId(sessionPath) { + const filename = sessionPath.split("/").pop() ?? ""; + return filename.replace(/\.jsonl$/, "").split("_").pop() ?? filename; +} + +// dist/src/hooks/query-cache.js +import { mkdirSync as mkdirSync4, readFileSync as readFileSync4, rmSync as rmSync2, statSync as statSync2, writeFileSync as writeFileSync4 } from "node:fs"; +import { join as join6 } from "node:path"; +import { homedir as homedir6 } from "node:os"; +var log2 = (msg) => log("query-cache", msg); +var DEFAULT_CACHE_ROOT = join6(homedir6(), ".deeplake", "query-cache"); +var INDEX_CACHE_TTL_MS = 15 * 60 * 1e3; +function getSessionQueryCacheDir(sessionId, deps = {}) { + const { cacheRoot = DEFAULT_CACHE_ROOT } = deps; + return join6(cacheRoot, sessionId); +} +function clearSessionQueryCache(sessionId, deps = {}) { + const { logFn = log2 } = deps; + try { + rmSync2(getSessionQueryCacheDir(sessionId, deps), { recursive: true, force: true }); + } catch (e) { + logFn(`clear failed for session=${sessionId}: ${e.message}`); + } } // dist/src/hooks/codex/capture.js var log3 = (msg) => log("codex-capture", msg); -var CAPTURE = process.env.HIVEMIND_CAPTURE !== "false"; -async function main() { - if (!CAPTURE) - return; - const input = await readStdin(); - const config = loadConfig(); - if (!config) { - log3("no config"); - return; - } - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); - const ts = (/* @__PURE__ */ new Date()).toISOString(); +var CAPTURE = (process.env.HIVEMIND_CAPTURE ?? process.env.DEEPLAKE_CAPTURE) !== "false"; +function buildCodexCaptureEntry(input, timestamp) { const meta = { session_id: input.session_id, transcript_path: input.transcript_path, @@ -677,20 +522,18 @@ async function main() { hook_event_name: input.hook_event_name, model: input.model, turn_id: input.turn_id, - timestamp: ts + timestamp }; - let entry; if (input.hook_event_name === "UserPromptSubmit" && input.prompt !== void 0) { - log3(`user session=${input.session_id}`); - entry = { + return { id: crypto.randomUUID(), ...meta, type: "user_message", content: input.prompt }; - } else if (input.hook_event_name === "PostToolUse" && input.tool_name !== void 0) { - log3(`tool=${input.tool_name} session=${input.session_id}`); - entry = { + } + if (input.hook_event_name === "PostToolUse" && input.tool_name !== void 0) { + return { id: crypto.randomUUID(), ...meta, type: "tool_call", @@ -699,66 +542,84 @@ async function main() { tool_input: JSON.stringify(input.tool_input), tool_response: JSON.stringify(input.tool_response) }; - } else { - log3(`unknown event: ${input.hook_event_name}, skipping`); - return; - } - const sessionPath = buildSessionPath(config, input.session_id); - const line = JSON.stringify(entry); - log3(`writing to ${sessionPath}`); - const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; - const filename = sessionPath.split("/").pop() ?? ""; - const jsonForSql = sqlStr(line); - const insertSql = `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', '${sqlStr(input.hook_event_name ?? "")}', 'codex', '${ts}', '${ts}')`; - try { - await api.query(insertSql); - } catch (e) { - if (e.message?.includes("permission denied") || e.message?.includes("does not exist")) { - log3("table missing, creating and retrying"); - await api.ensureSessionsTable(sessionsTable); - await api.query(insertSql); - } else { - throw e; - } } - log3("capture ok"); - maybeTriggerPeriodicSummary(input.session_id, input.cwd ?? "", config); + return null; } -function maybeTriggerPeriodicSummary(sessionId, cwd, config) { - if (process.env.HIVEMIND_WIKI_WORKER === "1") +function maybeTriggerPeriodicSummary(sessionId, cwd, config, deps = {}) { + const { bundleDir = bundleDirFromImportMeta(import.meta.url), wikiWorker = process.env.HIVEMIND_WIKI_WORKER === "1", logFn = log3, bumpTotalCountFn = bumpTotalCount, loadTriggerConfigFn = loadTriggerConfig, shouldTriggerFn = shouldTrigger, tryAcquireLockFn = tryAcquireLock, wikiLogFn = wikiLog, spawnCodexWikiWorkerFn = spawnCodexWikiWorker } = deps; + if (wikiWorker) return; try { - const state = bumpTotalCount(sessionId); - const cfg = loadTriggerConfig(); - if (!shouldTrigger(state, cfg)) + const state = bumpTotalCountFn(sessionId); + const cfg = loadTriggerConfigFn(); + if (!shouldTriggerFn(state, cfg)) return; - if (!tryAcquireLock(sessionId)) { - log3(`periodic trigger suppressed (lock held) session=${sessionId}`); + if (!tryAcquireLockFn(sessionId)) { + logFn(`periodic trigger suppressed (lock held) session=${sessionId}`); return; } - wikiLog(`Periodic: threshold hit (total=${state.totalCount}, since=${state.totalCount - state.lastSummaryCount}, N=${cfg.everyNMessages}, hours=${cfg.everyHours})`); - try { - spawnCodexWikiWorker({ - config, - sessionId, - cwd, - bundleDir: bundleDirFromImportMeta(import.meta.url), - reason: "Periodic" - }); - } catch (e) { - log3(`periodic spawn failed: ${e.message}`); - try { - releaseLock(sessionId); - } catch (releaseErr) { - log3(`releaseLock after periodic spawn failure also failed: ${releaseErr.message}`); - } - throw e; - } + wikiLogFn(`Periodic: threshold hit (total=${state.totalCount}, since=${state.totalCount - state.lastSummaryCount}, N=${cfg.everyNMessages}, hours=${cfg.everyHours})`); + spawnCodexWikiWorkerFn({ + config, + sessionId, + cwd, + bundleDir, + reason: "Periodic" + }); } catch (e) { - log3(`periodic trigger error: ${e.message}`); + logFn(`periodic trigger error: ${e.message}`); } } -main().catch((e) => { - log3(`fatal: ${e.message}`); - process.exit(0); -}); +async function runCodexCaptureHook(input, deps = {}) { + const { captureEnabled = CAPTURE, config = loadConfig(), now = () => (/* @__PURE__ */ new Date()).toISOString(), appendQueuedSessionRowFn = appendQueuedSessionRow, buildQueuedSessionRowFn = buildQueuedSessionRow, clearSessionQueryCacheFn = clearSessionQueryCache, maybeTriggerPeriodicSummaryFn = maybeTriggerPeriodicSummary, logFn = log3 } = deps; + if (!captureEnabled) + return { status: "disabled" }; + if (!config) { + logFn("no config"); + return { status: "no_config" }; + } + const ts = now(); + const entry = buildCodexCaptureEntry(input, ts); + if (!entry) { + logFn(`unknown event: ${input.hook_event_name}, skipping`); + return { status: "ignored" }; + } + if (input.hook_event_name === "UserPromptSubmit") + logFn(`user session=${input.session_id}`); + else + logFn(`tool=${input.tool_name} session=${input.session_id}`); + if (input.hook_event_name === "UserPromptSubmit") { + clearSessionQueryCacheFn(input.session_id); + } + const sessionPath = buildSessionPath(config, input.session_id); + const line = JSON.stringify(entry); + const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; + appendQueuedSessionRowFn(buildQueuedSessionRowFn({ + sessionPath, + line, + sessionId: input.session_id, + userName: config.userName, + projectName, + description: input.hook_event_name ?? "", + agent: "codex", + timestamp: ts + })); + logFn(`queued ${input.hook_event_name} for ${sessionPath}`); + maybeTriggerPeriodicSummaryFn(input.session_id, input.cwd ?? "", config); + return { status: "queued", entry }; +} +async function main() { + const input = await readStdin(); + await runCodexCaptureHook(input); +} +if (isDirectRun(import.meta.url)) { + main().catch((e) => { + log3(`fatal: ${e.message}`); + process.exit(0); + }); +} +export { + buildCodexCaptureEntry, + maybeTriggerPeriodicSummary, + runCodexCaptureHook +}; diff --git a/codex/bundle/commands/auth-login.js b/codex/bundle/commands/auth-login.js index 064f11e..8ecda06 100755 --- a/codex/bundle/commands/auth-login.js +++ b/codex/bundle/commands/auth-login.js @@ -233,6 +233,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join2(home, ".deeplake", "memory") }; } @@ -277,6 +282,22 @@ function traceSql(msg) { if (debugFileLog) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -380,10 +401,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -406,9 +427,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -465,6 +490,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -562,17 +610,242 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); + } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); } }; diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index 28cf31d..b7a0475 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -38,12 +38,12 @@ function loadConfig() { return null; } } - const env = process.env; - if (!env.HIVEMIND_TOKEN && env.DEEPLAKE_TOKEN) { + const env2 = process.env; + if (!env2.HIVEMIND_TOKEN && env2.DEEPLAKE_TOKEN) { process.stderr.write("[hivemind] DEEPLAKE_* env vars are deprecated; use HIVEMIND_* instead\n"); } - const token = env.HIVEMIND_TOKEN ?? env.DEEPLAKE_TOKEN ?? creds?.token; - const orgId = env.HIVEMIND_ORG_ID ?? env.DEEPLAKE_ORG_ID ?? creds?.orgId; + const token = env2.HIVEMIND_TOKEN ?? env2.DEEPLAKE_TOKEN ?? creds?.token; + const orgId = env2.HIVEMIND_ORG_ID ?? env2.DEEPLAKE_ORG_ID ?? creds?.orgId; if (!token || !orgId) return null; return { @@ -51,11 +51,16 @@ function loadConfig() { orgId, orgName: creds?.orgName ?? orgId, userName: creds?.userName || userInfo().username || "unknown", - workspaceId: env.HIVEMIND_WORKSPACE_ID ?? env.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", - apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", - tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", - sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", - memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") + workspaceId: env2.HIVEMIND_WORKSPACE_ID ?? env2.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", + apiUrl: env2.HIVEMIND_API_URL ?? env2.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", + tableName: env2.HIVEMIND_TABLE ?? env2.DEEPLAKE_TABLE ?? "memory", + sessionsTableName: env2.HIVEMIND_SESSIONS_TABLE ?? env2.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env2.HIVEMIND_GRAPH_NODES_TABLE ?? env2.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env2.HIVEMIND_GRAPH_EDGES_TABLE ?? env2.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env2.HIVEMIND_FACTS_TABLE ?? env2.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env2.HIVEMIND_ENTITIES_TABLE ?? env2.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env2.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env2.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", + memoryPath: env2.HIVEMIND_MEMORY_PATH ?? env2.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } @@ -102,6 +107,22 @@ function traceSql(msg) { if (debugFileLog) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -205,10 +226,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -231,9 +252,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -290,6 +315,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -387,21 +435,548 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); + } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); + } +}; + +// dist/src/embeddings/harrier.js +import { AutoModel, AutoTokenizer, LogLevel, env } from "@huggingface/transformers"; +var DEFAULT_MODEL_ID = "onnx-community/harrier-oss-v1-0.6b-ONNX"; +var DEFAULT_DOCUMENT_BATCH_SIZE = 8; +var DEFAULT_MAX_LENGTH = 32768; +function toNumber(value) { + return typeof value === "bigint" ? Number(value) : Number(value ?? 0); +} +function tensorToRows(tensor) { + const [batchSize, width] = tensor.dims; + const rows = []; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + const offset = batchIndex * width; + const row = []; + for (let hiddenIndex = 0; hiddenIndex < width; hiddenIndex++) { + row.push(Number(tensor.data[offset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function l2Normalize(rows) { + return rows.map((row) => { + let sumSquares = 0; + for (const value of row) + sumSquares += value * value; + const norm = Math.sqrt(sumSquares) || 1; + return row.map((value) => value / norm); + }); +} +function lastTokenPool(outputs, attentionMask) { + const [batchSize, sequenceLength, hiddenSize] = outputs.dims; + const rows = []; + const maskData = attentionMask.data; + const hiddenData = outputs.data; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + let lastTokenIndex = sequenceLength - 1; + for (let tokenIndex = sequenceLength - 1; tokenIndex >= 0; tokenIndex--) { + const maskOffset = batchIndex * sequenceLength + tokenIndex; + if (toNumber(maskData[maskOffset]) > 0) { + lastTokenIndex = tokenIndex; + break; + } + } + const row = []; + const hiddenOffset = (batchIndex * sequenceLength + lastTokenIndex) * hiddenSize; + for (let hiddenIndex = 0; hiddenIndex < hiddenSize; hiddenIndex++) { + row.push(Number(hiddenData[hiddenOffset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function formatQuery(task, query) { + return `Instruct: ${task} +Query: ${query}`; +} +var HarrierEmbedder = class { + modelId; + tokenizerPromise = null; + modelPromise = null; + options; + constructor(options = {}) { + this.modelId = options.modelId ?? DEFAULT_MODEL_ID; + this.options = { + ...options, + maxLength: options.maxLength ?? DEFAULT_MAX_LENGTH, + batchSize: options.batchSize ?? DEFAULT_DOCUMENT_BATCH_SIZE + }; + if (options.cacheDir) + env.cacheDir = options.cacheDir; + if (options.localModelPath) + env.localModelPath = options.localModelPath; + env.logLevel = LogLevel.ERROR; + } + async embedDocuments(texts) { + return this.embedInternal(texts); + } + async embedQueries(texts, options = {}) { + const task = options.task ?? "Given a user query, retrieve relevant memory rows and session events"; + return this.embedInternal(texts.map((text) => formatQuery(task, text))); + } + async load() { + if (!this.tokenizerPromise) { + this.tokenizerPromise = AutoTokenizer.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly + }); + } + if (!this.modelPromise) { + this.modelPromise = AutoModel.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly, + device: this.options.device ?? "cpu", + dtype: this.options.dtype + }); + } + const [tokenizer, model] = await Promise.all([this.tokenizerPromise, this.modelPromise]); + return { tokenizer, model }; + } + async embedInternal(texts) { + if (texts.length === 0) + return []; + const { tokenizer, model } = await this.load(); + const rows = []; + for (let start = 0; start < texts.length; start += this.options.batchSize) { + const batch = texts.slice(start, start + this.options.batchSize); + const inputs = tokenizer(batch, { + padding: true, + truncation: true, + max_length: this.options.maxLength + }); + const outputs = await model(inputs); + const sentenceEmbedding = outputs["sentence_embedding"]; + if (sentenceEmbedding && typeof sentenceEmbedding === "object" && sentenceEmbedding !== null) { + rows.push(...l2Normalize(tensorToRows(sentenceEmbedding))); + continue; + } + const lastHiddenState = outputs["last_hidden_state"]; + const attentionMask = inputs["attention_mask"]; + if (!lastHiddenState || typeof lastHiddenState !== "object" || !attentionMask || typeof attentionMask !== "object") { + throw new Error(`Harrier model "${this.modelId}" did not return a usable embedding tensor`); + } + rows.push(...l2Normalize(lastTokenPool(lastHiddenState, attentionMask))); + } + return rows; } }; +// dist/src/utils/hybrid-fusion.js +function coerceFinite(value) { + return Number.isFinite(value) ? value : 0; +} +function normalizeWeights(vectorWeight, textWeight) { + const safeVector = Math.max(0, coerceFinite(vectorWeight)); + const safeText = Math.max(0, coerceFinite(textWeight)); + const total = safeVector + safeText; + if (total <= 0) + return { vectorWeight: 0.5, textWeight: 0.5 }; + return { + vectorWeight: safeVector / total, + textWeight: safeText / total + }; +} +function softmaxNormalizeScores(scores) { + if (scores.length === 0) + return []; + const safeScores = scores.map(coerceFinite); + const maxScore = Math.max(...safeScores); + const exps = safeScores.map((score) => Math.exp(score - maxScore)); + const sum = exps.reduce((acc, value) => acc + value, 0) || 1; + return exps.map((value) => value / sum); +} +function pickPreferredRow(existing, candidate) { + if (!existing) + return candidate; + if (candidate.score > existing.score) + return candidate; + if (candidate.score < existing.score) + return existing; + if (candidate.sourceOrder < existing.sourceOrder) + return candidate; + if (candidate.sourceOrder > existing.sourceOrder) + return existing; + if (candidate.creationDate < existing.creationDate) + return candidate; + if (candidate.creationDate > existing.creationDate) + return existing; + return candidate.path < existing.path ? candidate : existing; +} +function dedupeBestRows(rows) { + const bestByPath = /* @__PURE__ */ new Map(); + for (const row of rows) { + if (!row.path) + continue; + bestByPath.set(row.path, pickPreferredRow(bestByPath.get(row.path), row)); + } + return [...bestByPath.values()]; +} +function fuseRetrievalRows(args) { + const { textRows, vectorRows, limit } = args; + const { textWeight, vectorWeight } = normalizeWeights(args.vectorWeight, args.textWeight); + const dedupedTextRows = dedupeBestRows(textRows); + const dedupedVectorRows = dedupeBestRows(vectorRows); + const textNorm = softmaxNormalizeScores(dedupedTextRows.map((row) => row.score)); + const vectorNorm = softmaxNormalizeScores(dedupedVectorRows.map((row) => row.score)); + const fusedByPath = /* @__PURE__ */ new Map(); + for (let i = 0; i < dedupedTextRows.length; i++) { + const row = dedupedTextRows[i]; + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: textNorm[i] ?? 0, + vectorScore: 0, + fusedScore: textWeight * (textNorm[i] ?? 0) + }); + } + for (let i = 0; i < dedupedVectorRows.length; i++) { + const row = dedupedVectorRows[i]; + const existing = fusedByPath.get(row.path); + const vectorScore = vectorNorm[i] ?? 0; + if (existing) { + if (existing.content.length === 0 && row.content.length > 0) + existing.content = row.content; + existing.sourceOrder = Math.min(existing.sourceOrder, row.sourceOrder); + if (!existing.creationDate || row.creationDate < existing.creationDate) + existing.creationDate = row.creationDate; + existing.vectorScore = vectorScore; + existing.fusedScore = textWeight * existing.textScore + vectorWeight * existing.vectorScore; + continue; + } + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: 0, + vectorScore, + fusedScore: vectorWeight * vectorScore + }); + } + return [...fusedByPath.values()].sort((a, b) => b.fusedScore - a.fusedScore || b.vectorScore - a.vectorScore || b.textScore - a.textScore || a.sourceOrder - b.sourceOrder || a.creationDate.localeCompare(b.creationDate) || a.path.localeCompare(b.path)).slice(0, Math.max(0, limit)); +} + +// dist/src/utils/retrieval-mode.js +function isSessionsOnlyMode() { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function getGrepRetrievalMode() { + const raw = (process.env["HIVEMIND_GREP_RETRIEVAL_MODE"] ?? process.env["DEEPLAKE_GREP_RETRIEVAL_MODE"] ?? "").trim().toLowerCase(); + if (raw === "embedding" || raw === "hybrid") + return raw; + return "classic"; +} +function isIndexDisabled() { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isSummaryBm25Disabled() { + const raw = process.env["HIVEMIND_DISABLE_SUMMARY_BM25"] ?? process.env["DEEPLAKE_DISABLE_SUMMARY_BM25"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isFactsSessionsOnlyPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_FACTS_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_PSQL_FACTS_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + // dist/src/shell/grep-core.js +var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); +var DEFAULT_EMBED_RETRIEVAL_MODEL_ID = "onnx-community/harrier-oss-v1-270m-ONNX"; +var DEFAULT_HYBRID_VECTOR_WEIGHT = 0.7; +var DEFAULT_HYBRID_TEXT_WEIGHT = 0.3; +var retrievalEmbedder = null; +function envString(...names) { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) + return value; + } + return void 0; +} +function envFlag(...names) { + const raw = envString(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} +function envNumber(fallback, ...names) { + const raw = envString(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} +function getRetrievalEmbedder() { + if (!retrievalEmbedder) { + retrievalEmbedder = new HarrierEmbedder({ + modelId: envString("HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", "HIVEMIND_HARRIER_MODEL_ID", "DEEPLAKE_HARRIER_MODEL_ID") ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID, + device: envString("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY") + }); + } + return retrievalEmbedder; +} +function sqlFloat4Array(values) { + if (values.length === 0) + throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) + throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} +function escapeRegexLiteral(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function normalizeGrepRegexPattern(pattern) { + return pattern.replace(/\\([|(){}+?])/g, "$1").replace(/\\/g, "\\b"); +} var TOOL_INPUT_FIELDS = [ "command", "file_path", @@ -564,24 +1139,9 @@ function normalizeContent(path, raw) { } catch { return raw; } - if (Array.isArray(obj.turns)) { - const header = []; - if (obj.date_time) - header.push(`date: ${obj.date_time}`); - if (obj.speakers) { - const s = obj.speakers; - const names = [s.speaker_a, s.speaker_b].filter(Boolean).join(", "); - if (names) - header.push(`speakers: ${names}`); - } - const lines = obj.turns.map((t) => { - const sp = String(t?.speaker ?? t?.name ?? "?").trim(); - const tx = String(t?.text ?? t?.content ?? "").replace(/\s+/g, " ").trim(); - const tag = t?.dia_id ? `[${t.dia_id}] ` : ""; - return `${tag}${sp}: ${tx}`; - }); - const out2 = [...header, ...lines].join("\n"); - return out2.trim() ? out2 : raw; + if (Array.isArray(obj.turns) || Array.isArray(obj.dialogue)) { + return `${JSON.stringify(obj, null, 2)} +`; } const stripRecalled = (t) => { const i = t.indexOf(""); @@ -625,14 +1185,70 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; - const limit = opts.limit ?? 100; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, queryText, bm25QueryText } = opts; + const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; - const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); - const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); - const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; - const rows = await api.query(`SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`); + const ignoreCase = likeOp === "ILIKE"; + const likeMemFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const likeSessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const regexMemFilter = regexPattern ? buildRegexFilter("summary::text", regexPattern, ignoreCase) : ""; + const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; + const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; + const fallbackSessFilter = likeSessFilter; + const sessionsOnly = isSessionsOnlyMode(); + const retrievalMode = getGrepRetrievalMode(); + const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const lexicalQueryText = (bm25QueryText ?? semanticQueryText).trim(); + const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; + const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; + const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const ensureSummaryBm25Index = api.ensureSummaryBm25Index; + if ((useSummaryBm25 || useHybridRetrieval && !sessionsOnly && lexicalQueryText.length > 0) && typeof ensureSummaryBm25Index === "function") { + await ensureSummaryBm25Index.call(api, memoryTable).catch(() => { + }); + } + const buildCombinedQuery = (memFilter, sessFilter, useBm25Summary = false) => { + const memQuery = useBm25Summary ? buildSummaryBm25Query(memoryTable, pathFilter, bm25QueryText ?? "", limit) : `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; + if (useEmbeddingRetrieval || useHybridRetrieval) { + const embedder = getRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([semanticQueryText]); + if (!queryEmbedding) + throw new Error("Failed to build query embedding"); + const queryVectorSql = sqlFloat4Array(queryEmbedding); + const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); + const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); + const vectorQuery = buildScoredCombinedQuery(sessionsOnly, buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit), buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit), limit); + if (!useHybridRetrieval) { + const rows2 = await api.query(vectorQuery); + return rows2.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); + } + const lexicalQuery = buildScoredCombinedQuery(sessionsOnly, buildBm25SimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildBm25SimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const lexicalFallbackQuery = buildScoredCombinedQuery(sessionsOnly, buildHeuristicLexicalQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildHeuristicLexicalQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorQuery), + api.query(lexicalQuery).catch(() => api.query(lexicalFallbackQuery)) + ]); + return fuseRetrievalRows({ + textRows: mapScoredRows(textRows), + vectorRows: mapScoredRows(vectorRows), + textWeight, + vectorWeight, + limit + }).map((row) => ({ + path: row.path, + content: row.content + })); + } + const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; + const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); + const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); + const rows = useSummaryBm25 ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); return rows.map((row) => ({ path: String(row["path"]), content: String(row["content"] ?? "") @@ -653,6 +1269,10 @@ function extractRegexLiteralPrefilter(pattern) { const next = pattern[i + 1]; if (!next) return null; + if (/[bByYmM<>]/.test(next)) { + i++; + continue; + } if (/[dDsSwWbBAZzGkKpP]/.test(next)) return null; current += next; @@ -679,13 +1299,14 @@ function extractRegexLiteralPrefilter(pattern) { return literal.length >= 2 ? literal : null; } function extractRegexAlternationPrefilters(pattern) { - if (!pattern.includes("|")) + const unwrapped = unwrapWholeRegexGroup(pattern); + if (!unwrapped.includes("|")) return null; const parts = []; let current = ""; let escaped = false; - for (let i = 0; i < pattern.length; i++) { - const ch = pattern[i]; + for (let i = 0; i < unwrapped.length; i++) { + const ch = unwrapped[i]; if (escaped) { current += `\\${ch}`; escaped = false; @@ -713,33 +1334,201 @@ function extractRegexAlternationPrefilters(pattern) { return literals.length > 0 ? literals : null; } function buildGrepSearchOptions(params, targetPath) { - const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); - const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; - const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(normalizedPattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; + const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const queryText = (bm25QueryText ?? normalizedPattern.trim()) || void 0; + const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; + const sqlRegexPattern = params.wordMatch ? `\\b(?:${regexBase})\\b` : hasRegexMeta ? regexBase : void 0; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), + regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + queryText, + bm25QueryText: bm25QueryText ?? void 0, + limit: DEFAULT_GREP_CANDIDATE_LIMIT }; } +function buildSummaryBm25QueryText(pattern, fixedString, literalPrefilter, alternationPrefilters) { + const rawTokens = alternationPrefilters && alternationPrefilters.length > 0 ? alternationPrefilters : literalPrefilter ? [literalPrefilter] : [pattern]; + const cleaned = [...new Set(rawTokens.flatMap((token) => token.replace(/\\b/g, " ").replace(/[.*+?^${}()[\]{}|\\]/g, " ").split(/\s+/)).map((token) => token.trim()).filter((token) => token.length >= 2))]; + if (cleaned.length === 0) { + return fixedString && pattern.trim().length >= 2 ? pattern.trim() : null; + } + return cleaned.join(" "); +} function buildContentFilter(column, likeOp, patterns) { + const predicate = buildContentPredicate(column, likeOp, patterns); + return predicate ? ` AND ${predicate}` : ""; +} +function buildRegexFilter(column, pattern, ignoreCase) { + const predicate = buildRegexPredicate(column, pattern, ignoreCase); + return predicate ? ` AND ${predicate}` : ""; +} +function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { + return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; +} +function buildEmbeddingSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (embedding <#> ${queryVectorSql}) AS score FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY score DESC LIMIT ${limit}`; +} +function buildBm25SimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${contentExpr} <#> '${sqlStr(queryText)}') AS score FROM "${tableName}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT ${limit}`; +} +function buildHeuristicLexicalQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + const terms = [...new Set(queryText.split(/\s+/).map((term) => term.trim()).filter((term) => term.length >= 2))].slice(0, 8); + const clauses = terms.map((term) => `${contentExpr} ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...terms.map((term) => `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(queryText)}%' THEN ${Math.max(1, Math.min(terms.length, 4))} ELSE 0 END` + ]; + const scoreExpr = scoreTerms.join(" + "); + const where = clauses.length > 0 ? ` AND (${clauses.join(" OR ")})` : ""; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${scoreExpr})::float AS score FROM "${tableName}" WHERE 1=1${pathFilter}${where} ORDER BY score DESC LIMIT ${limit}`; +} +function buildScoredCombinedQuery(sessionsOnly, memQuery, sessQuery, limit) { + return sessionsOnly ? `SELECT path, content, source_order, creation_date, score FROM (${sessQuery}) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}` : `SELECT path, content, source_order, creation_date, score FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}`; +} +function mapScoredRows(rows) { + return rows.map((row) => ({ + path: String(row["path"] ?? ""), + content: String(row["content"] ?? ""), + sourceOrder: Number(row["source_order"] ?? 0), + creationDate: String(row["creation_date"] ?? ""), + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0 + })); +} +function toSqlRegexPattern(pattern, _ignoreCase) { + if (!pattern) + return null; + try { + new RegExp(pattern); + return translateRegexPatternToSql(pattern); + } catch { + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + } +} +function unwrapWholeRegexGroup(pattern) { + if (!pattern.startsWith("(") || !pattern.endsWith(")")) + return pattern; + let depth = 0; + let escaped = false; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (escaped) { + escaped = false; + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "(") + depth++; + if (ch === ")") { + depth--; + if (depth === 0 && i !== pattern.length - 1) + return pattern; + } + } + if (depth !== 0) + return pattern; + if (pattern.startsWith("(?:")) + return pattern.slice(3, -1); + return pattern.slice(1, -1); +} +function translateRegexPatternToSql(pattern) { + let out = ""; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (ch === "\\") { + const next = pattern[i + 1]; + if (!next) + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + i++; + switch (next) { + case "d": + out += "[[:digit:]]"; + continue; + case "D": + out += "[^[:digit:]]"; + continue; + case "s": + out += "[[:space:]]"; + continue; + case "S": + out += "[^[:space:]]"; + continue; + case "w": + out += "[[:alnum:]_]"; + continue; + case "W": + out += "[^[:alnum:]_]"; + continue; + case "b": + out += "\\y"; + continue; + case "A": + case "B": + case "G": + case "K": + case "P": + case "p": + case "z": + return null; + default: + out += `\\${next}`; + continue; + } + } + if (ch === "(" && pattern.startsWith("(?:", i)) { + out += "("; + i += 2; + continue; + } + if (ch === "(" && /^[(]\?<[^>]+>/.test(pattern.slice(i))) { + const named = pattern.slice(i).match(/^\(\?<[^>]+>/); + if (!named) + return null; + out += "("; + i += named[0].length - 1; + continue; + } + if (ch === "(" && pattern[i + 1] === "?") + return null; + out += ch; + } + return out; +} +function buildContentPredicate(column, likeOp, patterns) { if (patterns.length === 0) return ""; if (patterns.length === 1) - return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; - return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; + return `${column} ${likeOp} '%${patterns[0]}%'`; + return `(${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; +} +function buildRegexPredicate(column, pattern, ignoreCase) { + if (!pattern) + return ""; + const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); + if (!sqlPattern) + return ""; + return `${column} ${ignoreCase ? "~*" : "~"} '${sqlStr(sqlPattern)}'`; } function compileGrepRegex(params) { - let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + let reStr = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; if (params.wordMatch) - reStr = `\\b${reStr}\\b`; + reStr = `\\b(?:${reStr})\\b`; try { return new RegExp(reStr, params.ignoreCase ? "i" : ""); } catch { - return new RegExp(params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), params.ignoreCase ? "i" : ""); + return new RegExp(escapeRegexLiteral(normalizedPattern), params.ignoreCase ? "i" : ""); } } function refineGrepMatches(rows, params, forceMultiFilePrefix) { @@ -773,12 +1562,12 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) { } return output; } -async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath) { +async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath, forceMultiFilePrefix) { const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, buildGrepSearchOptions(params, targetPath)); const seen = /* @__PURE__ */ new Set(); const unique = rows.filter((r) => seen.has(r.path) ? false : (seen.add(r.path), true)); const normalized = unique.map((r) => ({ path: r.path, content: normalizeContent(r.path, r.content) })); - return refineGrepMatches(normalized, params); + return refineGrepMatches(normalized, params, forceMultiFilePrefix); } // dist/src/utils/output-cap.js @@ -904,7 +1693,7 @@ function parseBashGrep(cmd) { const tokens = tokenizeGrepStage(first); if (!tokens || tokens.length === 0) return null; - let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; + let recursive = false, ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; const explicitPatterns = []; let ti = 1; while (ti < tokens.length) { @@ -996,6 +1785,8 @@ function parseBashGrep(cmd) { break; case "r": case "R": + recursive = true; + break; case "E": break; case "A": @@ -1037,6 +1828,7 @@ function parseBashGrep(cmd) { return { pattern, targetPath: target, + recursive, ignoreCase, wordMatch, filesOnly, @@ -1059,44 +1851,241 @@ async function handleGrepDirect(api, table, sessionsTable, params) { invertMatch: params.invertMatch, fixedString: params.fixedString }; - const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath); + const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath, params.recursive ? true : void 0); const joined = output.join("\n") || "(no matches)"; return capOutputForClaude(joined, { kind: "grep" }); } +// dist/src/utils/summary-format.js +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function basename(path) { + const trimmed = path.replace(/\/+$/, ""); + const idx = trimmed.lastIndexOf("/"); + return idx === -1 ? trimmed : trimmed.slice(idx + 1); +} +function extractSection(text, heading) { + const re = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function extractHeaderField(text, field) { + const re = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function compactText(value) { + return value.replace(/\s+/g, " ").trim(); +} +function splitMetadataList(value) { + if (!value) + return []; + return [...new Set(value.split(/\s*(?:,|;|&|\band\b)\s*/i).map((part) => compactText(part)).filter((part) => part.length >= 2 && !/^unknown$/i.test(part)))]; +} +function extractBullets(section, limit = 3) { + if (!section) + return []; + return section.split("\n").map((line) => line.trim()).filter((line) => line.startsWith("- ")).map((line) => compactText(line.slice(2))).filter(Boolean).slice(0, limit); +} +function extractSummaryDate(text) { + return extractHeaderField(text, "Date") ?? extractHeaderField(text, "Started"); +} +function extractSummaryParticipants(text) { + return extractHeaderField(text, "Participants") ?? extractHeaderField(text, "Speakers"); +} +function extractSummaryTopics(text) { + return extractHeaderField(text, "Topics"); +} +function extractSummarySource(text) { + return extractHeaderField(text, "Source"); +} +function buildSummaryBlurb(text) { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + const parts = []; + if (participants) + parts.push(participants); + if (topics) + parts.push(topics); + if (keyBullets.length > 0) + parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) + parts.push(whatHappened); + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} +function truncate(value, max) { + return value.length > max ? `${value.slice(0, max - 1).trimEnd()}\u2026` : value; +} +function formatIndexTimestamp(value) { + if (!value) + return ""; + if (!/^\d{4}-\d{2}-\d{2}T/.test(value)) + return value; + const parsed = Date.parse(value); + if (!Number.isFinite(parsed)) + return value; + const ts = new Date(parsed); + const yyyy = ts.getUTCFullYear(); + const mm = String(ts.getUTCMonth() + 1).padStart(2, "0"); + const dd = String(ts.getUTCDate()).padStart(2, "0"); + const hh = String(ts.getUTCHours()).padStart(2, "0"); + const min = String(ts.getUTCMinutes()).padStart(2, "0"); + return `${yyyy}-${mm}-${dd} ${hh}:${min} UTC`; +} +function buildSummaryIndexEntry(row) { + const path = typeof row.path === "string" ? row.path : ""; + if (!path) + return null; + if (path.startsWith("/summaries/") && !/^\/summaries\/[^/]+\/[^/]+$/.test(path)) + return null; + const summary = typeof row.summary === "string" ? row.summary : ""; + const project = typeof row.project === "string" ? row.project.trim() : ""; + const description = typeof row.description === "string" ? compactText(row.description) : ""; + const creationDate = typeof row.creation_date === "string" ? row.creation_date : ""; + const lastUpdateDate = typeof row.last_update_date === "string" ? row.last_update_date : ""; + const label = basename(path) || path; + const date = summary ? extractSummaryDate(summary) ?? creationDate : creationDate; + const participantsText = summary ? extractSummaryParticipants(summary) ?? "" : ""; + const topicsText = summary ? extractSummaryTopics(summary) ?? "" : ""; + const source = summary ? extractSummarySource(summary) ?? "" : ""; + const structuredBlurb = summary ? buildSummaryBlurb(summary) : ""; + const blurb = structuredBlurb && structuredBlurb !== "completed" ? structuredBlurb : truncate(description, 220); + return { + path, + label, + project, + description, + date, + createdAt: creationDate, + updatedAt: lastUpdateDate, + sortDate: lastUpdateDate || creationDate || date, + participantsText, + participants: splitMetadataList(participantsText), + topicsText, + topics: splitMetadataList(topicsText), + source, + blurb + }; +} +function formatSummaryIndexEntry(entry) { + const parts = [`- [summary: ${entry.label}](${entry.path})`]; + if (entry.source) + parts.push(`[session](${entry.source})`); + if (entry.date) + parts.push(truncate(entry.date, 40)); + const visibleTime = entry.updatedAt || entry.createdAt; + if (visibleTime) + parts.push(`updated: ${truncate(formatIndexTimestamp(visibleTime), 24)}`); + if (entry.participantsText) + parts.push(truncate(entry.participantsText, 80)); + if (entry.topicsText) + parts.push(`topics: ${truncate(entry.topicsText, 90)}`); + if (entry.project) + parts.push(`[${truncate(entry.project, 40)}]`); + if (entry.blurb && entry.blurb !== "completed") + parts.push(truncate(entry.blurb, 220)); + return parts.join(" \u2014 "); +} +function buildSummaryIndexLine(row) { + const entry = "label" in row && typeof row.label === "string" ? row : buildSummaryIndexEntry(row); + return entry ? formatSummaryIndexEntry(entry) : null; +} + // dist/src/hooks/virtual-table-query.js function normalizeSessionPart(path, content) { return normalizeContent(path, content); } -function buildVirtualIndexContent(summaryRows, sessionRows = []) { - const total = summaryRows.length + sessionRows.length; +function buildVirtualIndexContent(rows) { + const entries = rows.map((row) => buildSummaryIndexEntry(row)).filter((entry) => entry !== null).sort((a, b) => (b.sortDate || "").localeCompare(a.sortDate || "") || a.path.localeCompare(b.path)); const lines = [ "# Memory Index", "", - `${total} entries (${summaryRows.length} summaries, ${sessionRows.length} sessions):`, + "Persistent wiki directory. Start here, open the linked summary first, then open the paired raw session if you need exact wording or temporal grounding.", + "", + "## How To Use", + "", + "- Use the People section when the question names a person.", + "- In the catalog, each row links to both the summary page and its source session.", + "- Once you have a likely match, open that exact summary or session instead of broadening into wide grep scans.", "" ]; - if (summaryRows.length > 0) { - lines.push("## Summaries", ""); - for (const row of summaryRows) { - const path = row["path"]; - const project = row["project"] || ""; - const description = (row["description"] || "").slice(0, 120); - const date = (row["creation_date"] || "").slice(0, 10); - lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); - } + const peopleLines = buildPeopleDirectory(entries); + if (peopleLines.length > 0) { + lines.push("## People"); + lines.push(""); + lines.push(...peopleLines); lines.push(""); } - if (sessionRows.length > 0) { - lines.push("## Sessions", ""); - for (const row of sessionRows) { - const path = row["path"]; - const description = (row["description"] || "").slice(0, 120); - lines.push(`- [${path}](${path}) ${description}`); - } + const projectLines = buildProjectDirectory(entries); + if (projectLines.length > 0) { + lines.push("## Projects"); + lines.push(""); + lines.push(...projectLines); + lines.push(""); + } + lines.push("## Summary To Session Catalog"); + lines.push(""); + for (const entry of entries) { + const line = buildSummaryIndexLine(entry); + if (line) + lines.push(line); } return lines.join("\n"); } +function formatEntryLink(entry) { + const session = entry.source ? ` -> [session](${entry.source})` : ""; + return `[${entry.label}](${entry.path})${session}`; +} +function topList(counts, limit) { + return [...counts.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])).slice(0, limit).map(([value]) => value); +} +function buildPeopleDirectory(entries) { + const people = /* @__PURE__ */ new Map(); + for (const entry of entries) { + for (const person of entry.participants) { + const current = people.get(person) ?? { count: 0, topics: /* @__PURE__ */ new Map(), recent: [] }; + current.count += 1; + for (const topic of entry.topics) { + current.topics.set(topic, (current.topics.get(topic) ?? 0) + 1); + } + current.recent.push(entry); + people.set(person, current); + } + } + return [...people.entries()].sort((a, b) => b[1].count - a[1].count || a[0].localeCompare(b[0])).map(([person, info]) => { + const topics = topList(info.topics, 3); + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${person} \u2014 ${info.count} summaries`]; + if (topics.length > 0) + parts.push(`topics: ${topics.join("; ")}`); + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} +function buildProjectDirectory(entries) { + const projects = /* @__PURE__ */ new Map(); + for (const entry of entries) { + if (!entry.project) + continue; + const current = projects.get(entry.project) ?? { count: 0, recent: [] }; + current.count += 1; + current.recent.push(entry); + projects.set(entry.project, current); + } + return [...projects.entries()].sort((a, b) => b[1].count - a[1].count || a[0].localeCompare(b[0])).map(([project, info]) => { + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${project} \u2014 ${info.count} summaries`]; + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} function buildUnionQuery(memoryQuery, sessionsQuery) { return `SELECT path, content, size_bytes, creation_date, source_order FROM ((${memoryQuery}) UNION ALL (${sessionsQuery})) AS combined ORDER BY path, source_order, creation_date`; } @@ -1107,10 +2096,13 @@ function buildDirFilter(dirs) { const cleaned = [...new Set(dirs.map((dir) => dir.replace(/\/+$/, "") || "/"))]; if (cleaned.length === 0 || cleaned.includes("/")) return ""; - const clauses = cleaned.map((dir) => `path LIKE '${sqlLike(dir)}/%' ESCAPE '\\'`); + const clauses = cleaned.map((dir) => `path LIKE '${sqlLike(dir)}/%'`); return ` WHERE ${clauses.join(" OR ")}`; } async function queryUnionRows(api, memoryQuery, sessionsQuery) { + if (isSessionsOnlyMode()) { + return api.query(`SELECT path, content, size_bytes, creation_date, source_order FROM (${sessionsQuery}) AS combined ORDER BY path, source_order, creation_date`); + } const unionQuery = buildUnionQuery(memoryQuery, sessionsQuery); try { return await api.query(unionQuery); @@ -1127,7 +2119,13 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP const result = new Map(uniquePaths.map((path) => [path, null])); if (uniquePaths.length === 0) return result; - const inList = buildInList(uniquePaths); + if (isIndexDisabled() && uniquePaths.includes("/index.md")) { + result.set("/index.md", null); + } + const queryPaths = isIndexDisabled() ? uniquePaths.filter((path) => path !== "/index.md") : uniquePaths; + if (queryPaths.length === 0) + return result; + const inList = buildInList(queryPaths); const rows = await queryUnionRows(api, `SELECT path, summary::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path IN (${inList})`, `SELECT path, message::text AS content, NULL::bigint AS size_bytes, COALESCE(creation_date::text, '') AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path IN (${inList})`); const memoryHits = /* @__PURE__ */ new Map(); const sessionHits = /* @__PURE__ */ new Map(); @@ -1145,7 +2143,7 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP sessionHits.set(path, current); } } - for (const path of uniquePaths) { + for (const path of queryPaths) { if (memoryHits.has(path)) { result.set(path, memoryHits.get(path) ?? null); continue; @@ -1155,12 +2153,9 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP result.set(path, sessionParts.join("\n")); } } - if (result.get("/index.md") === null && uniquePaths.includes("/index.md")) { - const [summaryRows, sessionRows] = await Promise.all([ - api.query(`SELECT path, project, description, creation_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`).catch(() => []), - api.query(`SELECT path, description FROM "${sessionsTable}" WHERE path LIKE '/sessions/%' ORDER BY path`).catch(() => []) - ]); - result.set("/index.md", buildVirtualIndexContent(summaryRows, sessionRows)); + if (!isSessionsOnlyMode() && !isIndexDisabled() && result.get("/index.md") === null && uniquePaths.includes("/index.md")) { + const rows2 = await api.query(`SELECT path, project, description, summary, creation_date, last_update_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC`).catch(() => []); + result.set("/index.md", buildVirtualIndexContent(rows2)); } return result; } @@ -1197,7 +2192,7 @@ async function listVirtualPathRows(api, memoryTable, sessionsTable, dir) { async function findVirtualPaths(api, memoryTable, sessionsTable, dir, filenamePattern) { const normalizedDir = dir.replace(/\/+$/, "") || "/"; const likePath = `${sqlLike(normalizedDir === "/" ? "" : normalizedDir)}/%`; - const rows = await queryUnionRows(api, `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path LIKE '${likePath}' ESCAPE '\\' AND filename LIKE '${filenamePattern}' ESCAPE '\\'`, `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path LIKE '${likePath}' ESCAPE '\\' AND filename LIKE '${filenamePattern}' ESCAPE '\\'`); + const rows = await queryUnionRows(api, `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path LIKE '${likePath}' AND filename LIKE '${filenamePattern}'`, `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path LIKE '${likePath}' AND filename LIKE '${filenamePattern}'`); return [...new Set(rows.map((row) => row["path"]).filter((value) => typeof value === "string" && value.length > 0))]; } function dedupeRowsByPath(rows) { @@ -1214,6 +2209,56 @@ function dedupeRowsByPath(rows) { } // dist/src/hooks/bash-command-compiler.js +var DEFAULT_EMBED_RETRIEVAL_MODEL_ID2 = "onnx-community/harrier-oss-v1-270m-ONNX"; +var DEFAULT_HYBRID_VECTOR_WEIGHT2 = 0.7; +var DEFAULT_HYBRID_TEXT_WEIGHT2 = 0.3; +var summaryRetrievalEmbedder = null; +function envString2(...names) { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) + return value; + } + return void 0; +} +function envFlag2(...names) { + const raw = envString2(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} +function envNumber2(fallback, ...names) { + const raw = envString2(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} +function getSummaryRetrievalEmbedder() { + if (!summaryRetrievalEmbedder) { + summaryRetrievalEmbedder = new HarrierEmbedder({ + modelId: envString2("HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", "HIVEMIND_HARRIER_MODEL_ID", "DEEPLAKE_HARRIER_MODEL_ID") ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID2, + device: envString2("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString2("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString2("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString2("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag2("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY") + }); + } + return summaryRetrievalEmbedder; +} +function sqlFloat4Array2(values) { + if (values.length === 0) + throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) + throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} +function quoteShellToken(token) { + if (token === "") + return "''"; + if (!/[\s"'\\|&;<>()[\]{}$*?]/.test(token)) + return token; + return `'${token.replace(/'/g, `'"'"'`)}'`; +} function isQuoted(ch) { return ch === "'" || ch === '"'; } @@ -1221,11 +2266,19 @@ function splitTopLevel(input, operators) { const parts = []; let current = ""; let quote = null; + let escaped = false; for (let i = 0; i < input.length; i++) { const ch = input[i]; + if (escaped) { + current += ch; + escaped = false; + continue; + } if (quote) { if (ch === quote) quote = null; + else if (ch === "\\" && quote === '"') + escaped = true; current += ch; continue; } @@ -1234,6 +2287,11 @@ function splitTopLevel(input, operators) { current += ch; continue; } + if (ch === "\\" && i + 1 < input.length) { + current += ch; + escaped = true; + continue; + } const matched = operators.find((op) => input.startsWith(op, i)); if (matched) { const trimmed2 = current.trim(); @@ -1245,7 +2303,7 @@ function splitTopLevel(input, operators) { } current += ch; } - if (quote) + if (quote || escaped) return null; const trimmed = current.trim(); if (trimmed) @@ -1309,8 +2367,8 @@ function expandBraceToken(token) { return variants.flatMap((variant) => expandBraceToken(`${prefix}${variant}${suffix}`)); } function stripAllowedModifiers(segment) { - const ignoreMissing = /\s2>\/dev\/null\s*$/.test(segment); - const clean = segment.replace(/\s2>\/dev\/null\s*$/g, "").replace(/\s2>&1\s*/g, " ").trim(); + const ignoreMissing = /\s2>\/dev\/null(?=\s*(?:\||$))/.test(segment); + const clean = segment.replace(/\s2>\/dev\/null(?=\s*(?:\||$))/g, "").replace(/\s2>&1(?=\s*(?:\||$))/g, "").trim(); return { clean, ignoreMissing }; } function hasUnsupportedRedirection(segment) { @@ -1379,7 +2437,7 @@ function isValidPipelineHeadTailStage(stage) { return tokens[1] === "-n" && /^-?\d+$/.test(tokens[2]); return false; } -function parseFindNamePatterns(tokens) { +function parseFindSpec(tokens) { const patterns = []; for (let i = 2; i < tokens.length; i++) { const token = tokens[i]; @@ -1397,9 +2455,600 @@ function parseFindNamePatterns(tokens) { i += 1; continue; } + if (token === "-exec") { + const execTokens = tokens.slice(i + 1); + if (patterns.length === 0 || execTokens.length < 4) + return null; + const terminator = execTokens.at(-1); + const target = execTokens.at(-2); + if (terminator !== "\\;" && terminator !== ";" || target !== "{}") + return null; + return { + patterns, + execGrepCmd: execTokens.slice(0, -1).map(quoteShellToken).join(" ") + }; + } + return null; + } + return patterns.length > 0 ? { patterns, execGrepCmd: null } : null; +} +function extractPsqlQuery(tokens) { + let query = null; + for (let i = 1; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-c" || token === "--command") { + query = tokens[i + 1] ?? null; + i += 1; + continue; + } + if (token.startsWith("-c") && token.length > 2) { + query = token.slice(2); + continue; + } + } + return query; +} +function extractPsqlQueryFromCommand(cmd) { + const tokens = tokenizeShellWords(cmd.trim()); + if (!tokens || tokens[0] !== "psql") + return null; + return extractPsqlQuery(tokens); +} +function normalizeSqlRef(ref) { + return ref.replace(/\s+/g, "").replace(/"/g, "").toLowerCase(); +} +function deriveSiblingTableName(tableName, expectedBase, targetBase) { + if (tableName === expectedBase) + return null; + if (!tableName.startsWith(expectedBase)) + return null; + return `${targetBase}${tableName.slice(expectedBase.length)}`; +} +function resolveInterceptedTableNames(memoryTable, sessionsTable) { + const memoryDerived = { + graphNodesTable: deriveSiblingTableName(memoryTable, "memory", "graph_nodes"), + graphEdgesTable: deriveSiblingTableName(memoryTable, "memory", "graph_edges"), + factsTable: deriveSiblingTableName(memoryTable, "memory", "memory_facts"), + entitiesTable: deriveSiblingTableName(memoryTable, "memory", "memory_entities"), + factEntityLinksTable: deriveSiblingTableName(memoryTable, "memory", "fact_entity_links") + }; + const sessionsDerived = { + factsTable: deriveSiblingTableName(sessionsTable, "sessions", "memory_facts"), + entitiesTable: deriveSiblingTableName(sessionsTable, "sessions", "memory_entities"), + factEntityLinksTable: deriveSiblingTableName(sessionsTable, "sessions", "fact_entity_links") + }; + return { + graphNodesTable: process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? process.env["DEEPLAKE_GRAPH_NODES_TABLE"] ?? memoryDerived.graphNodesTable ?? "graph_nodes", + graphEdgesTable: process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? process.env["DEEPLAKE_GRAPH_EDGES_TABLE"] ?? memoryDerived.graphEdgesTable ?? "graph_edges", + factsTable: process.env["HIVEMIND_FACTS_TABLE"] ?? process.env["DEEPLAKE_FACTS_TABLE"] ?? memoryDerived.factsTable ?? sessionsDerived.factsTable ?? "memory_facts", + entitiesTable: process.env["HIVEMIND_ENTITIES_TABLE"] ?? process.env["DEEPLAKE_ENTITIES_TABLE"] ?? memoryDerived.entitiesTable ?? sessionsDerived.entitiesTable ?? "memory_entities", + factEntityLinksTable: process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? process.env["DEEPLAKE_FACT_ENTITY_LINKS_TABLE"] ?? memoryDerived.factEntityLinksTable ?? sessionsDerived.factEntityLinksTable ?? "fact_entity_links" + }; +} +function getInterceptedSqlRefs() { + if (isFactsSessionsOnlyPsqlMode()) { + return /* @__PURE__ */ new Set([ + "sessions", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.sessions", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links" + ]); + } + return /* @__PURE__ */ new Set([ + "memory", + "sessions", + "graph_nodes", + "graph_edges", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.memory", + "hivemind.sessions", + "hivemind.graph_nodes", + "hivemind.graph_edges", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links" + ]); +} +function extractSqlTableRefs(query) { + const refs = []; + const regex = /\b(?:from|join)\s+((?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*)(?:\s*\.\s*(?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*))?)/gi; + for (const match of query.matchAll(regex)) { + if (match[1]) + refs.push(normalizeSqlRef(match[1])); + } + return refs; +} +function queryReferencesInterceptedTables(query) { + const interceptedRefs = getInterceptedSqlRefs(); + return extractSqlTableRefs(query).some((ref) => interceptedRefs.has(ref)); +} +function queryUsesOnlyInterceptedTables(query) { + const refs = extractSqlTableRefs(query); + const interceptedRefs = getInterceptedSqlRefs(); + return refs.length > 0 && refs.every((ref) => interceptedRefs.has(ref)); +} +function parsePsqlSegment(pipeline, tokens) { + if (tokens[0] !== "psql" || !isPsqlMode()) + return null; + const query = extractPsqlQuery(tokens); + let tuplesOnly = false; + let fieldSeparator = "|"; + for (let i = 1; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-F" || token === "--field-separator") { + fieldSeparator = tokens[i + 1] ?? fieldSeparator; + i += 1; + continue; + } + if (token.startsWith("-F") && token.length > 2) { + fieldSeparator = token.slice(2); + continue; + } + if (token === "-t" || token === "--tuples-only") { + tuplesOnly = true; + continue; + } + if (token.startsWith("-") && !token.startsWith("--")) { + const shortFlags = token.slice(1); + if (shortFlags.includes("t")) + tuplesOnly = true; + continue; + } + } + if (!query || !queryUsesOnlyInterceptedTables(query)) return null; + let lineLimit = 0; + if (pipeline.length > 1) { + if (pipeline.length !== 2) + return null; + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) + return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) + return null; + lineLimit = headTail.lineLimit; + } + return { kind: "psql", query, lineLimit, tuplesOnly, fieldSeparator }; +} +function normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable) { + let sql = query.trim().replace(/;+\s*$/, ""); + sql = sql.replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`); + if (!isFactsSessionsOnlyPsqlMode()) { + sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`); + } + return sql; +} +function validatePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable) { + if (!queryUsesOnlyInterceptedTables(query)) { + if (isFactsSessionsOnlyPsqlMode()) { + throw new Error("psql queries must reference only sessions, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); + } + throw new Error("psql queries must reference only memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); + } + const sql = normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable, factsTable, entitiesTable, factEntityLinksTable); + const compact = sql.replace(/\s+/g, " ").trim(); + if (!/^(select|with)\b/i.test(compact)) { + throw new Error("psql mode only supports SELECT queries"); + } + const allowedTables = /* @__PURE__ */ new Set([ + sessionsTable, + factsTable, + entitiesTable, + factEntityLinksTable + ]); + if (!isFactsSessionsOnlyPsqlMode()) { + allowedTables.add(memoryTable); + allowedTables.add(graphNodesTable); + allowedTables.add(graphEdgesTable); + } + const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; + if (tableMatches.length === 0) { + throw new Error("psql query must reference an intercepted hivemind memory table"); + } + for (const match of tableMatches) { + if (!allowedTables.has(match[1])) { + throw new Error(`psql query references unsupported table: ${match[1]}`); + } + } + return sql; +} +function decodeSqlLiteral(value) { + return value.replace(/''/g, "'").trim(); +} +function cleanSearchTerm(value) { + return decodeSqlLiteral(value).replace(/^%+|%+$/g, "").replace(/^_+|_+$/g, "").trim(); +} +function extractSqlSearchTerms(query) { + const terms = []; + const push = (value) => { + const cleaned = cleanSearchTerm(value); + if (!cleaned) + return; + if (cleaned.startsWith("/")) + return; + if (/^\/summaries\/|^\/sessions\//.test(cleaned)) + return; + if (!terms.includes(cleaned)) + terms.push(cleaned); + }; + for (const match of query.matchAll(/\b(?:i?like|=)\s+E?'((?:[^']|'')*)'/gi)) { + push(match[1] ?? ""); + } + for (const match of query.matchAll(/<\#>\s+E?'((?:[^']|'')*)'/gi)) { + push(match[1] ?? ""); + } + return terms; +} +function chooseEntityTerms(terms) { + const entityLike = terms.filter((term) => /[A-Z]/.test(term) && !/^\d+$/.test(term) && term.split(/\s+/).length <= 4); + return (entityLike.length > 0 ? entityLike : terms).slice(0, 2); +} +function escapeRegex2(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +async function fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) + return []; + const entityTerms = chooseEntityTerms(filteredTerms); + const topicTerms = filteredTerms.filter((term) => !entityTerms.includes(term)); + const phrase = sqlStr(filteredTerms.join(" ")); + const nodeEntityClauses = entityTerms.map((term) => `(canonical_name ILIKE '%${sqlLike(term)}%' OR aliases ILIKE '%${sqlLike(term)}%')`); + const nodeTextClauses = topicTerms.map((term) => `search_text ILIKE '%${sqlLike(term)}%'`); + const edgeEntityClauses = entityTerms.map((term) => `search_text ILIKE '%${sqlLike(term)}%'`); + const edgeTopicClauses = topicTerms.map((term) => `(relation ILIKE '%${sqlLike(term)}%' OR summary ILIKE '%${sqlLike(term)}%' OR evidence ILIKE '%${sqlLike(term)}%' OR search_text ILIKE '%${sqlLike(term)}%')`); + const nodeWhere = entityTerms.length > 0 && topicTerms.length > 0 ? `(${nodeEntityClauses.join(" OR ")}) AND (${nodeTextClauses.join(" OR ")})` : entityTerms.length > 0 ? `(${nodeEntityClauses.join(" OR ")})` : topicTerms.length > 0 ? `(${nodeTextClauses.join(" OR ")})` : "FALSE"; + const edgeWhere = entityTerms.length > 0 && topicTerms.length > 0 ? `(${edgeEntityClauses.join(" OR ")}) AND (${edgeTopicClauses.join(" OR ")})` : topicTerms.length > 0 ? `(${edgeTopicClauses.join(" OR ")})` : entityTerms.length > 0 ? `(${edgeEntityClauses.join(" OR ")})` : "FALSE"; + const sql = `WITH node_candidates AS ( SELECT source_session_id, source_path, search_text, search_text <#> '${phrase}' AS score FROM "${graphNodesTable}" WHERE ${nodeWhere} ORDER BY score DESC LIMIT 8), edge_candidates AS ( SELECT source_session_id, source_path, search_text, search_text <#> '${phrase}' AS score FROM "${graphEdgesTable}" WHERE ${edgeWhere} ORDER BY score DESC LIMIT 8) SELECT source_session_id, source_path, search_text, score FROM ( SELECT source_session_id, source_path, search_text, score FROM node_candidates UNION ALL SELECT source_session_id, source_path, search_text, score FROM edge_candidates ) AS graph_candidates ORDER BY score ASC LIMIT 12`; + const rows = await api.query(sql); + const expanded = []; + const seen = /* @__PURE__ */ new Set(); + for (const row of rows) { + const searchText = typeof row["search_text"] === "string" ? row["search_text"] : ""; + const sessionIds = [ + ...searchText.match(/conv_\d+_session_\d+/g) ?? [], + typeof row["source_session_id"] === "string" ? row["source_session_id"] : "" + ].map((value) => value.trim()).filter(Boolean); + const sourcePaths = [ + ...searchText.match(/\/sessions\/conv_\d+_session_\d+\.json/g) ?? [], + typeof row["source_path"] === "string" ? row["source_path"] : "", + ...sessionIds.map((sessionId) => `/sessions/${sessionId}.json`) + ].map((value) => value.trim()).filter(Boolean); + for (let i = 0; i < sourcePaths.length; i++) { + const sourcePath = sourcePaths[i]; + const sessionId = sessionIds[i] || sessionIds[0] || sourcePath.match(/(conv_\d+_session_\d+)\.json$/)?.[1] || ""; + if (!sourcePath) + continue; + const key = `${sessionId}@@${sourcePath}`; + if (seen.has(key)) + continue; + seen.add(key); + expanded.push({ sessionId, sourcePath }); + if (expanded.length >= 12) + return expanded; + } + } + return expanded; +} +function splitDelimitedField(value) { + if (typeof value !== "string") + return []; + return value.split(",").map((item) => item.trim()).filter(Boolean); +} +function extractSessionIdFromPath(value) { + return value.match(/(conv_\d+_session_\d+)/)?.[1] ?? ""; +} +function extractSummarySourcePath(summary) { + return summary.match(/^- \*\*Source\*\*: (.+)$/m)?.[1]?.trim() ?? ""; +} +function addHybridCandidate(map, candidate) { + const sessionId = candidate.sessionId?.trim() ?? ""; + const sourcePath = candidate.sourcePath?.trim() ?? ""; + if (!sessionId && !sourcePath) + return; + const key = `${sessionId}@@${sourcePath}`; + const existing = map.get(key); + if (existing) { + existing.score += candidate.score; + existing.signals.add(candidate.signal); + return; + } + map.set(key, { + sessionId, + sourcePath, + score: candidate.score, + signals: /* @__PURE__ */ new Set([candidate.signal]) + }); +} +async function fetchEntityResolution(api, entitiesTable, terms) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) + return { entityIds: [], candidates: [] }; + const entityTerms = chooseEntityTerms(filteredTerms); + if (entityTerms.length === 0) + return { entityIds: [], candidates: [] }; + const phrase = sqlStr(filteredTerms.join(" ")); + const where = entityTerms.map((term) => `(canonical_name ILIKE '%${sqlLike(term)}%' OR aliases ILIKE '%${sqlLike(term)}%')`).join(" OR "); + const sql = `SELECT entity_id, source_session_ids, source_paths, search_text, search_text <#> '${phrase}' AS score FROM "${entitiesTable}" WHERE ${where} ORDER BY score ASC LIMIT 8`; + const rows = await api.query(sql); + const entityIds = []; + const candidateMap = /* @__PURE__ */ new Map(); + for (const row of rows) { + const entityId = typeof row["entity_id"] === "string" ? row["entity_id"] : ""; + if (entityId && !entityIds.includes(entityId)) + entityIds.push(entityId); + const sessionIds = splitDelimitedField(row["source_session_ids"]); + const sourcePaths = splitDelimitedField(row["source_paths"]); + const maxLen = Math.max(sessionIds.length, sourcePaths.length); + for (let i = 0; i < maxLen; i++) { + const sourcePath = sourcePaths[i] || (sessionIds[i] ? `/sessions/${sessionIds[i]}.json` : ""); + const sessionId = sessionIds[i] || extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 1.2, + signal: "entity" + }); + } + } + return { entityIds, candidates: [...candidateMap.values()] }; +} +async function fetchFactCandidates(api, factsTable, terms, entityIds) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0 && entityIds.length === 0) + return { entityIds: [], candidates: [] }; + const phrase = sqlStr(filteredTerms.join(" ")); + const entityTerms = chooseEntityTerms(filteredTerms); + const topicTerms = filteredTerms.filter((term) => !entityTerms.includes(term)); + const topicClauses = (topicTerms.length > 0 ? topicTerms : filteredTerms).map((term) => `(predicate ILIKE '%${sqlLike(term)}%' OR object_name ILIKE '%${sqlLike(term)}%' OR summary ILIKE '%${sqlLike(term)}%' OR search_text ILIKE '%${sqlLike(term)}%')`); + const entityFilter = entityIds.length > 0 ? `(subject_entity_id IN (${entityIds.map((id) => `'${sqlStr(id)}'`).join(", ")}) OR object_entity_id IN (${entityIds.map((id) => `'${sqlStr(id)}'`).join(", ")}))` : ""; + const whereParts = [ + entityFilter, + topicClauses.length > 0 ? `(${topicClauses.join(" OR ")})` : "" + ].filter(Boolean); + if (whereParts.length === 0) + return { entityIds: [], candidates: [] }; + const sql = `SELECT source_session_id, source_path, subject_entity_id, object_entity_id, search_text <#> '${phrase}' AS score FROM "${factsTable}" WHERE ${whereParts.join(" AND ")} ORDER BY score ASC LIMIT 16`; + const rows = await api.query(sql); + const relatedEntityIds = []; + const candidateMap = /* @__PURE__ */ new Map(); + for (const row of rows) { + for (const key of ["subject_entity_id", "object_entity_id"]) { + const value = typeof row[key] === "string" ? row[key] : ""; + if (value && !relatedEntityIds.includes(value)) + relatedEntityIds.push(value); + } + const sourcePath = typeof row["source_path"] === "string" ? row["source_path"] : ""; + const sessionId = typeof row["source_session_id"] === "string" ? row["source_session_id"] : extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 2.6, + signal: "fact" + }); + } + return { entityIds: relatedEntityIds, candidates: [...candidateMap.values()] }; +} +async function fetchSummaryCandidates(api, memoryTable, terms) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) + return []; + const retrievalMode = getGrepRetrievalMode(); + const phrase = filteredTerms.join(" "); + let rows = []; + if (retrievalMode === "embedding" || retrievalMode === "hybrid") { + const embedder = getSummaryRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([phrase]); + if (!queryEmbedding) + return []; + const queryVectorSql = sqlFloat4Array2(queryEmbedding); + const vectorSql = `SELECT path, summary, (embedding <#> ${queryVectorSql}) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8`; + if (retrievalMode === "embedding") { + rows = (await api.query(vectorSql)).map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + summary: typeof row["summary"] === "string" ? row["summary"] : "" + })); + } else { + const textSql = `SELECT path, summary, summary <#> '${sqlStr(phrase)}' AS score FROM "${memoryTable}" ORDER BY score DESC LIMIT 8`; + const textFallbackSql = buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorSql), + api.query(textSql).catch(() => api.query(textFallbackSql)) + ]); + rows = fuseRetrievalRows({ + textRows: mapSummaryRows(textRows), + vectorRows: mapSummaryRows(vectorRows), + textWeight: envNumber2(DEFAULT_HYBRID_TEXT_WEIGHT2, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"), + vectorWeight: envNumber2(DEFAULT_HYBRID_VECTOR_WEIGHT2, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"), + limit: 8 + }).map((row) => ({ + path: row.path, + summary: row.content + })); + } + } else { + const phraseSql = sqlStr(phrase); + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + const sql = `SELECT path, summary, summary <#> '${phraseSql}' AS score FROM "${memoryTable}" WHERE ${clauses.join(" OR ")} ORDER BY score DESC LIMIT 8`; + rows = (await api.query(sql).catch(() => api.query(buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase)))).map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + summary: typeof row["summary"] === "string" ? row["summary"] : "" + })); + } + const candidateMap = /* @__PURE__ */ new Map(); + for (const row of rows) { + const path = row.path; + const summary = row.summary; + const sourcePath = extractSummarySourcePath(summary) || (extractSessionIdFromPath(path) ? `/sessions/${extractSessionIdFromPath(path)}.json` : ""); + const sessionId = extractSessionIdFromPath(path) || extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 1.6, + signal: "summary" + }); } - return patterns.length > 0 ? patterns : null; + return [...candidateMap.values()]; +} +function buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase) { + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...filteredTerms.map((term) => `CASE WHEN summary ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN summary ILIKE '%${sqlLike(phrase)}%' THEN ${Math.max(1, Math.min(filteredTerms.length, 4))} ELSE 0 END` + ]; + return `SELECT path, summary, (${scoreTerms.join(" + ")})::float AS score FROM "${memoryTable}" WHERE ${clauses.join(" OR ")} ORDER BY score DESC LIMIT 8`; +} +function mapSummaryRows(rows) { + return rows.map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + content: typeof row["summary"] === "string" ? row["summary"] : "", + sourceOrder: 0, + creationDate: "", + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0 + })); +} +function prependCtes(sql, ctes) { + if (ctes.length === 0) + return sql; + if (/^with\b/i.test(sql)) { + return sql.replace(/^with\b/i, `WITH ${ctes.join(", ")},`); + } + return `WITH ${ctes.join(", ")} ${sql}`; +} +function rewriteQueryWithRestrictedTables(sql, aliases) { + let rewritten = sql; + if (aliases.restrictedMemoryAlias) { + const memoryPattern = escapeRegex2(aliases.memoryTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${memoryPattern}"?`, "gi"), `FROM "${aliases.restrictedMemoryAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${memoryPattern}"?`, "gi"), `JOIN "${aliases.restrictedMemoryAlias}"`); + } + if (aliases.restrictedSessionsAlias) { + const sessionsPattern = escapeRegex2(aliases.sessionsTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${sessionsPattern}"?`, "gi"), `FROM "${aliases.restrictedSessionsAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${sessionsPattern}"?`, "gi"), `JOIN "${aliases.restrictedSessionsAlias}"`); + } + if (aliases.restrictedFactsAlias) { + const factsPattern = escapeRegex2(aliases.factsTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${factsPattern}"?`, "gi"), `FROM "${aliases.restrictedFactsAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${factsPattern}"?`, "gi"), `JOIN "${aliases.restrictedFactsAlias}"`); + } + if (aliases.restrictedEntitiesAlias) { + const entitiesPattern = escapeRegex2(aliases.entitiesTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${entitiesPattern}"?`, "gi"), `FROM "${aliases.restrictedEntitiesAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${entitiesPattern}"?`, "gi"), `JOIN "${aliases.restrictedEntitiesAlias}"`); + } + if (aliases.restrictedLinksAlias) { + const linksPattern = escapeRegex2(aliases.factEntityLinksTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${linksPattern}"?`, "gi"), `FROM "${aliases.restrictedLinksAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${linksPattern}"?`, "gi"), `JOIN "${aliases.restrictedLinksAlias}"`); + } + return rewritten; +} +async function applyGraphRestrictionsToPsqlQuery(api, sql, memoryTable, sessionsTable, graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable) { + if (isFactsSessionsOnlyPsqlMode()) { + return sql; + } + if (extractSqlTableRefs(sql).some((ref) => ref === normalizeSqlRef(graphNodesTable) || ref === normalizeSqlRef(graphEdgesTable))) { + return sql; + } + const refs = extractSqlTableRefs(sql); + const touchesMemory2 = refs.some((ref) => ref === normalizeSqlRef(memoryTable)); + const touchesSessions = refs.some((ref) => ref === normalizeSqlRef(sessionsTable)); + const touchesFacts = refs.some((ref) => ref === normalizeSqlRef(factsTable)); + const touchesEntities = refs.some((ref) => ref === normalizeSqlRef(entitiesTable)); + const touchesLinks = refs.some((ref) => ref === normalizeSqlRef(factEntityLinksTable)); + if (!touchesMemory2 && !touchesSessions && !touchesFacts && !touchesEntities && !touchesLinks) + return sql; + const terms = extractSqlSearchTerms(sql); + if (terms.length === 0) + return sql; + const candidateMap = /* @__PURE__ */ new Map(); + const graphCandidates = await fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms); + for (const candidate of graphCandidates) { + addHybridCandidate(candidateMap, { ...candidate, score: 2, signal: "graph" }); + } + const entityResolution = await fetchEntityResolution(api, entitiesTable, terms); + for (const candidate of entityResolution.candidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "entity" }); + } + const factCandidates = await fetchFactCandidates(api, factsTable, terms, entityResolution.entityIds); + for (const candidate of factCandidates.candidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "fact" }); + } + const summaryCandidates = await fetchSummaryCandidates(api, memoryTable, terms); + for (const candidate of summaryCandidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "summary" }); + } + const candidateEntityIds = [.../* @__PURE__ */ new Set([...entityResolution.entityIds, ...factCandidates.entityIds])].slice(0, 12); + const candidates = [...candidateMap.values()].sort((a, b) => b.score - a.score || b.signals.size - a.signals.size).slice(0, 12); + if (candidates.length === 0) + return sql; + if (candidates.length > 16) + return sql; + const values = candidates.map((candidate) => `('${sqlStr(candidate.sessionId)}', '${sqlStr(candidate.sourcePath)}')`); + const ctes = [ + `__hm_graph_candidates(source_session_id, source_path) AS (VALUES ${values.join(", ")})` + ]; + let restrictedMemoryAlias = null; + let restrictedSessionsAlias = null; + let restrictedFactsAlias = null; + let restrictedEntitiesAlias = null; + let restrictedLinksAlias = null; + if (candidateEntityIds.length > 0) { + ctes.push(`__hm_entity_candidates(entity_id) AS (VALUES ${candidateEntityIds.map((entityId) => `('${sqlStr(entityId)}')`).join(", ")})`); + } + if (touchesMemory2) { + restrictedMemoryAlias = "__hm_memory"; + ctes.push(`"${restrictedMemoryAlias}" AS ( SELECT * FROM "${memoryTable}" m WHERE EXISTS ( SELECT 1 FROM __hm_graph_candidates gc WHERE (gc.source_path <> '' AND m.summary ILIKE '%' || gc.source_path || '%') OR (gc.source_session_id <> '' AND m.path ILIKE '%' || gc.source_session_id || '%') ))`); + } + if (touchesSessions) { + restrictedSessionsAlias = "__hm_sessions"; + ctes.push(`"${restrictedSessionsAlias}" AS ( SELECT * FROM "${sessionsTable}" s WHERE s.path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> ''))`); + } + if (touchesFacts) { + restrictedFactsAlias = "__hm_memory_facts"; + ctes.push(`"${restrictedFactsAlias}" AS ( SELECT * FROM "${factsTable}" f WHERE ( f.source_path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '') OR f.source_session_id IN (SELECT source_session_id FROM __hm_graph_candidates WHERE source_session_id <> '')` + (candidateEntityIds.length > 0 ? ` OR f.subject_entity_id IN (SELECT entity_id FROM __hm_entity_candidates) OR f.object_entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` : "") + ` ))`); + } + if (touchesEntities && candidateEntityIds.length > 0) { + restrictedEntitiesAlias = "__hm_memory_entities"; + ctes.push(`"${restrictedEntitiesAlias}" AS ( SELECT * FROM "${entitiesTable}" e WHERE e.entity_id IN (SELECT entity_id FROM __hm_entity_candidates))`); + } + if (touchesLinks) { + restrictedLinksAlias = "__hm_fact_entity_links"; + ctes.push(`"${restrictedLinksAlias}" AS ( SELECT * FROM "${factEntityLinksTable}" l WHERE ( l.source_path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '') OR l.source_session_id IN (SELECT source_session_id FROM __hm_graph_candidates WHERE source_session_id <> '')` + (candidateEntityIds.length > 0 ? ` OR l.entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` : "") + (touchesFacts ? ` OR l.fact_id IN (SELECT fact_id FROM "__hm_memory_facts")` : "") + ` ))`); + } + return prependCtes(rewriteQueryWithRestrictedTables(sql, { + memoryTable, + sessionsTable, + factsTable, + entitiesTable, + factEntityLinksTable, + restrictedMemoryAlias, + restrictedSessionsAlias, + restrictedFactsAlias, + restrictedEntitiesAlias, + restrictedLinksAlias + }), ctes); +} +function formatPsqlValue(value) { + if (value === null || value === void 0) + return ""; + if (typeof value === "string") + return value; + if (typeof value === "number" || typeof value === "boolean") + return String(value); + return JSON.stringify(value); +} +function formatPsqlRows(rows, tuplesOnly, fieldSeparator) { + if (rows.length === 0) + return tuplesOnly ? "" : "(0 rows)"; + const columns = Object.keys(rows[0] ?? {}); + const body = rows.map((row) => columns.map((column) => formatPsqlValue(row[column])).join(fieldSeparator)); + if (tuplesOnly) + return body.join("\n"); + return [columns.join(fieldSeparator), ...body].join("\n"); } function parseCompiledSegment(segment) { const { clean, ignoreMissing } = stripAllowedModifiers(segment); @@ -1411,6 +3060,9 @@ function parseCompiledSegment(segment) { const tokens = tokenizeShellWords(pipeline[0]); if (!tokens || tokens.length === 0) return null; + const psqlSegment = parsePsqlSegment(pipeline, tokens); + if (psqlSegment) + return psqlSegment; if (tokens[0] === "echo" && pipeline.length === 1) { const text = tokens.slice(1).join(" "); return { kind: "echo", text }; @@ -1486,15 +3138,32 @@ function parseCompiledSegment(segment) { const dir = tokens[1]; if (!dir) return null; - const patterns = parseFindNamePatterns(tokens); - if (!patterns) + const spec = parseFindSpec(tokens); + if (!spec) return null; + const { patterns, execGrepCmd } = spec; const countOnly = pipeline.length === 2 && /^wc\s+-l\s*$/.test(pipeline[1].trim()); if (countOnly) { if (patterns.length !== 1) return null; return { kind: "find", dir, pattern: patterns[0], countOnly }; } + if (execGrepCmd) { + const grepParams2 = parseBashGrep(execGrepCmd); + if (!grepParams2) + return null; + let lineLimit = 0; + if (pipeline.length === 2) { + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) + return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) + return null; + lineLimit = headTail.lineLimit; + } + return { kind: "find_grep", dir, patterns, params: grepParams2, lineLimit }; + } if (pipeline.length >= 2) { const xargsTokens = tokenizeShellWords(pipeline[1].trim()); if (!xargsTokens || xargsTokens[0] !== "xargs") @@ -1664,6 +3333,16 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, outputs.push(limited.join("\n") || "(no matches)"); continue; } + if (segment.kind === "psql") { + const { graphNodesTable, graphEdgesTable } = resolveInterceptedTableNames(memoryTable, sessionsTable); + const validated = validatePsqlQuery(segment.query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); + const prepared = await applyGraphRestrictionsToPsqlQuery(api, validated, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); + const rows = await api.query(prepared); + const formatted = formatPsqlRows(rows, segment.tuplesOnly, segment.fieldSeparator); + const limited = segment.lineLimit > 0 ? formatted.split("\n").slice(0, segment.lineLimit).join("\n") : formatted; + outputs.push(limited); + continue; + } if (segment.kind === "grep") { const result = await handleGrepDirectFn(api, memoryTable, sessionsTable, segment.params); if (result === null) @@ -1680,20 +3359,35 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, } // dist/src/hooks/query-cache.js -import { mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync as writeFileSync2 } from "node:fs"; +import { mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, statSync, writeFileSync as writeFileSync2 } from "node:fs"; import { join as join4 } from "node:path"; import { homedir as homedir3 } from "node:os"; var log3 = (msg) => log("query-cache", msg); var DEFAULT_CACHE_ROOT = join4(homedir3(), ".deeplake", "query-cache"); var INDEX_CACHE_FILE = "index.md"; +var INDEX_CACHE_TTL_MS = 15 * 60 * 1e3; function getSessionQueryCacheDir(sessionId, deps = {}) { const { cacheRoot = DEFAULT_CACHE_ROOT } = deps; return join4(cacheRoot, sessionId); } +function clearSessionQueryCache(sessionId, deps = {}) { + const { logFn = log3 } = deps; + try { + rmSync(getSessionQueryCacheDir(sessionId, deps), { recursive: true, force: true }); + } catch (e) { + logFn(`clear failed for session=${sessionId}: ${e.message}`); + } +} function readCachedIndexContent(sessionId, deps = {}) { const { logFn = log3 } = deps; try { - return readFileSync3(join4(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8"); + const cachePath = join4(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE); + const stats = statSync(cachePath); + if (Date.now() - stats.mtimeMs > INDEX_CACHE_TTL_MS) { + clearSessionQueryCache(sessionId, deps); + return null; + } + return readFileSync3(cachePath, "utf-8"); } catch (e) { if (e?.code === "ENOENT") return null; @@ -1789,6 +3483,7 @@ var SAFE_BUILTINS = /* @__PURE__ */ new Set([ "md5sum", "sha1sum", "sha256sum", + "psql", "echo", "printf", "tee", @@ -1825,11 +3520,66 @@ var SAFE_BUILTINS = /* @__PURE__ */ new Set([ "case", "esac" ]); +function splitSafeStages(cmd) { + const stages = []; + let current = ""; + let quote = null; + let escaped = false; + for (let i = 0; i < cmd.length; i++) { + const ch = cmd[i]; + if (escaped) { + current += ch; + escaped = false; + continue; + } + if (quote) { + current += ch; + if (ch === quote) { + quote = null; + } else if (ch === "\\" && quote === '"') { + escaped = true; + } + continue; + } + if (ch === "\\" && i + 1 < cmd.length) { + current += ch; + escaped = true; + continue; + } + if (ch === "'" || ch === '"') { + quote = ch; + current += ch; + continue; + } + const twoChar = cmd.slice(i, i + 2); + if (twoChar === "&&" || twoChar === "||") { + if (current.trim()) + stages.push(current.trim()); + current = ""; + i += 1; + continue; + } + if (ch === "|" || ch === ";" || ch === "\n") { + if (current.trim()) + stages.push(current.trim()); + current = ""; + continue; + } + current += ch; + } + if (quote || escaped) + return null; + if (current.trim()) + stages.push(current.trim()); + return stages; +} function isSafe(cmd) { if (/\$\(|`|<\(/.test(cmd)) return false; const stripped = cmd.replace(/'[^']*'/g, "''").replace(/"[^"]*"/g, '""'); - const stages = stripped.split(/\||;|&&|\|\||\n/); + const stages = splitSafeStages(stripped); + if (!stages) + return false; for (const stage of stages) { const firstToken = stage.trim().split(/\s+/)[0] ?? ""; if (firstToken && !SAFE_BUILTINS.has(firstToken)) @@ -1845,11 +3595,45 @@ function rewritePaths(cmd) { } // dist/src/hooks/codex/pre-tool-use.js +function touchesVirtualMemoryPath(value) { + const rewritten = rewritePaths(value).trim(); + return rewritten === "/index.md" || rewritten === "/summaries" || rewritten.startsWith("/summaries/") || rewritten === "/sessions" || rewritten.startsWith("/sessions/") || /(^|[\s"'`])\/(?:index\.md|summaries(?:\/|\b)|sessions(?:\/|\b))/.test(rewritten); +} +function touchesAnyMemoryPath(value) { + return touchesMemory(value) || touchesVirtualMemoryPath(value); +} +function isAnyPsqlCommand(cmd) { + return /^\s*psql\b/.test(cmd.trim()); +} +function isHivemindPsqlCommand(cmd) { + if (!isPsqlMode()) + return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryUsesOnlyInterceptedTables(query); +} +function needsHivemindPsqlRewrite(cmd) { + if (!isPsqlMode() || !isAnyPsqlCommand(cmd)) + return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryReferencesInterceptedTables(query) && !queryUsesOnlyInterceptedTables(query); +} var log4 = (msg) => log("codex-pre", msg); var __bundleDir = dirname(fileURLToPath2(import.meta.url)); var SHELL_BUNDLE = existsSync3(join6(__bundleDir, "shell", "deeplake-shell.js")) ? join6(__bundleDir, "shell", "deeplake-shell.js") : join6(__bundleDir, "..", "shell", "deeplake-shell.js"); function buildUnsupportedGuidance() { - return "This command is not supported for ~/.deeplake/memory/ operations. Only bash builtins are available: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. Do NOT use python, python3, node, curl, or other interpreters. Rewrite your command using only bash tools and retry."; + return `This command is not supported for ~/.deeplake/memory/ operations. Only bash builtins are available, plus benchmark SQL mode via psql -At -F '|' -c "SELECT ...". Do NOT use python, python3, node, curl, or other interpreters. Rewrite your command using only bash tools and retry.`; +} +function buildPsqlOnlyGuidance() { + if (isFactsSessionsOnlyPsqlMode()) { + return "Hivemind recall is SQL-only in this mode. Use psql with the sessions, memory_facts, memory_entities, and fact_entity_links tables only. Do NOT use grep, cat, ls, Read, Glob, memory, graph, or filesystem paths for memory lookups."; + } + return "Hivemind recall is SQL-only in this mode. Use psql with the memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links tables only. Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; +} +function buildPsqlSchemaGuidance() { + if (isFactsSessionsOnlyPsqlMode()) { + return "Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; + } + return "Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; } function runVirtualShell(cmd, shellBundle = SHELL_BUNDLE, logFn = log4) { try { @@ -1864,26 +3648,32 @@ function runVirtualShell(cmd, shellBundle = SHELL_BUNDLE, logFn = log4) { return ""; } } -function buildIndexContent(rows) { - const lines = ["# Memory Index", "", `${rows.length} sessions:`, ""]; - for (const row of rows) { - const path = row["path"]; - const project = row["project"] || ""; - const description = (row["description"] || "").slice(0, 120); - const date = (row["creation_date"] || "").slice(0, 10); - lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); - } - return lines.join("\n"); -} async function processCodexPreToolUse(input, deps = {}) { const { config = loadConfig(), createApi = (table, activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, table), executeCompiledBashCommandFn = executeCompiledBashCommand, readVirtualPathContentsFn = readVirtualPathContents, readVirtualPathContentFn = readVirtualPathContent, listVirtualPathRowsFn = listVirtualPathRows, findVirtualPathsFn = findVirtualPaths, handleGrepDirectFn = handleGrepDirect, readCachedIndexContentFn = readCachedIndexContent, writeCachedIndexContentFn = writeCachedIndexContent, runVirtualShellFn = runVirtualShell, shellBundle = SHELL_BUNDLE, logFn = log4 } = deps; const cmd = input.tool_input?.command ?? ""; logFn(`hook fired: cmd=${cmd}`); - if (!touchesMemory(cmd)) + if (!touchesAnyMemoryPath(cmd) && !isAnyPsqlCommand(cmd)) + return { action: "pass" }; + if (isAnyPsqlCommand(cmd) && !isHivemindPsqlCommand(cmd)) { + if (needsHivemindPsqlRewrite(cmd)) { + return { + action: "guide", + output: buildPsqlSchemaGuidance(), + rewrittenCommand: cmd.trim() + }; + } return { action: "pass" }; - const rewritten = rewritePaths(cmd); + } + if (isPsqlMode() && touchesAnyMemoryPath(cmd)) { + return { + action: "guide", + output: buildPsqlOnlyGuidance(), + rewrittenCommand: cmd.trim() + }; + } + const rewritten = isHivemindPsqlCommand(cmd) ? cmd.trim() : rewritePaths(cmd); if (!isSafe(rewritten)) { - const guidance = buildUnsupportedGuidance(); + const guidance = isPsqlMode() ? buildPsqlOnlyGuidance() : buildUnsupportedGuidance(); logFn(`unsupported command, returning guidance: ${rewritten}`); return { action: "guide", @@ -1891,6 +3681,13 @@ async function processCodexPreToolUse(input, deps = {}) { rewrittenCommand: rewritten }; } + if (isHivemindPsqlCommand(rewritten) && !config) { + return { + action: "guide", + output: "Hivemind SQL mode is unavailable because Deeplake credentials are missing.", + rewrittenCommand: rewritten + }; + } if (config) { const table = process.env["HIVEMIND_TABLE"] ?? "memory"; const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; @@ -1898,7 +3695,7 @@ async function processCodexPreToolUse(input, deps = {}) { const readVirtualPathContentsWithCache = async (cachePaths) => { const uniquePaths = [...new Set(cachePaths)]; const result2 = new Map(uniquePaths.map((path) => [path, null])); - const cachedIndex = uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; + const cachedIndex = !isIndexDisabled() && uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; const remainingPaths = cachedIndex === null ? uniquePaths : uniquePaths.filter((path) => path !== "/index.md"); if (cachedIndex !== null) { result2.set("/index.md", cachedIndex); @@ -1969,13 +3766,13 @@ async function processCodexPreToolUse(input, deps = {}) { } if (virtualPath && !virtualPath.endsWith("/")) { logFn(`direct read: ${virtualPath}`); - let content = virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; + let content = !isIndexDisabled() && virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; if (content === null) { content = await readVirtualPathContentFn(api, table, sessionsTable, virtualPath); } - if (content === null && virtualPath === "/index.md") { - const idxRows = await api.query(`SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`); - content = buildIndexContent(idxRows); + if (content === null && virtualPath === "/index.md" && !isSessionsOnlyMode() && !isIndexDisabled()) { + const idxRows = await api.query(`SELECT path, project, description, summary, creation_date, last_update_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC`); + content = buildVirtualIndexContent(idxRows); } if (content !== null) { if (virtualPath === "/index.md") { @@ -2060,8 +3857,22 @@ async function processCodexPreToolUse(input, deps = {}) { } } catch (e) { logFn(`direct query failed, falling back to shell: ${e.message}`); + if (isHivemindPsqlCommand(rewritten)) { + return { + action: "guide", + output: "Hivemind SQL mode could not satisfy the query. Rewrite it as a narrower SELECT over memory or sessions.", + rewrittenCommand: rewritten + }; + } } } + if (isHivemindPsqlCommand(rewritten)) { + return { + action: "guide", + output: "Hivemind SQL mode could not satisfy the query. Rewrite it as a narrower SELECT over memory or sessions.", + rewrittenCommand: rewritten + }; + } logFn(`intercepted \u2192 running via virtual shell: ${rewritten}`); const result = runVirtualShellFn(rewritten, shellBundle, logFn); return { @@ -2091,6 +3902,8 @@ if (isDirectRun(import.meta.url)) { }); } export { + buildPsqlOnlyGuidance, + buildPsqlSchemaGuidance, buildUnsupportedGuidance, isSafe, processCodexPreToolUse, diff --git a/codex/bundle/session-start-setup.js b/codex/bundle/session-start-setup.js index 21609fa..ff3153b 100755 --- a/codex/bundle/session-start-setup.js +++ b/codex/bundle/session-start-setup.js @@ -1,10 +1,11 @@ #!/usr/bin/env node // dist/src/hooks/codex/session-start-setup.js -import { fileURLToPath } from "node:url"; -import { dirname as dirname2, join as join7 } from "node:path"; +import { fileURLToPath as fileURLToPath2 } from "node:url"; +import { dirname as dirname3, join as join7 } from "node:path"; +import { mkdirSync as mkdirSync5, appendFileSync as appendFileSync3 } from "node:fs"; import { execSync as execSync2 } from "node:child_process"; -import { homedir as homedir4 } from "node:os"; +import { homedir as homedir6 } from "node:os"; // dist/src/commands/auth.js import { readFileSync, writeFileSync, existsSync, mkdirSync, unlinkSync } from "node:fs"; @@ -60,6 +61,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join2(home, ".deeplake", "memory") }; } @@ -76,9 +82,6 @@ import { join as join3 } from "node:path"; import { homedir as homedir3 } from "node:os"; var DEBUG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; var LOG = join3(homedir3(), ".deeplake", "hook-debug.log"); -function utcTimestamp(d = /* @__PURE__ */ new Date()) { - return d.toISOString().replace("T", " ").slice(0, 19) + " UTC"; -} function log(tag, msg) { if (!DEBUG) return; @@ -90,6 +93,12 @@ function log(tag, msg) { function sqlStr(value) { return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); } +function sqlIdent(name) { + if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) { + throw new Error(`Invalid SQL identifier: ${JSON.stringify(name)}`); + } + return name; +} // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); @@ -107,6 +116,22 @@ function traceSql(msg) { if (debugFileLog) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -114,7 +139,7 @@ var MAX_CONCURRENCY = 5; var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); + return new Promise((resolve2) => setTimeout(resolve2, ms)); } function isTimeoutError(error) { const name = error instanceof Error ? error.name.toLowerCase() : ""; @@ -147,7 +172,7 @@ var Semaphore = class { this.active++; return; } - await new Promise((resolve) => this.waiting.push(resolve)); + await new Promise((resolve2) => this.waiting.push(resolve2)); } release() { this.active--; @@ -210,10 +235,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -236,9 +261,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -295,6 +324,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -392,29 +444,254 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); + } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); } }; // dist/src/utils/stdin.js function readStdin() { - return new Promise((resolve, reject) => { + return new Promise((resolve2, reject) => { let data = ""; process.stdin.setEncoding("utf-8"); process.stdin.on("data", (chunk) => data += chunk); process.stdin.on("end", () => { try { - resolve(JSON.parse(data)); + resolve2(JSON.parse(data)); } catch (err) { reject(new Error(`Failed to parse hook input: ${err}`)); } @@ -423,74 +700,413 @@ function readStdin() { }); } -// dist/src/utils/version-check.js -import { readFileSync as readFileSync4 } from "node:fs"; +// dist/src/utils/direct-run.js +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +function isDirectRun(metaUrl) { + const entry = process.argv[1]; + if (!entry) + return false; + try { + return resolve(fileURLToPath(metaUrl)) === resolve(entry); + } catch { + return false; + } +} + +// dist/src/hooks/session-queue.js +import { appendFileSync as appendFileSync2, closeSync, existsSync as existsSync4, mkdirSync as mkdirSync3, openSync, readFileSync as readFileSync4, readdirSync, renameSync, rmSync, statSync, writeFileSync as writeFileSync3 } from "node:fs"; import { dirname, join as join5 } from "node:path"; -var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; +import { homedir as homedir4 } from "node:os"; +var DEFAULT_QUEUE_DIR = join5(homedir4(), ".deeplake", "queue"); +var DEFAULT_MAX_BATCH_ROWS = 50; +var DEFAULT_STALE_INFLIGHT_MS = 6e4; +var DEFAULT_AUTH_FAILURE_TTL_MS = 5 * 6e4; +var DEFAULT_DRAIN_LOCK_STALE_MS = 3e4; +var BUSY_WAIT_STEP_MS = 100; +var SessionWriteDisabledError = class extends Error { + constructor(message) { + super(message); + this.name = "SessionWriteDisabledError"; + } +}; +function buildSessionInsertSql(sessionsTable, rows) { + if (rows.length === 0) + throw new Error("buildSessionInsertSql: rows must not be empty"); + const table = sqlIdent(sessionsTable); + const values = rows.map((row) => { + const jsonForSql = escapeJsonbLiteral(coerceJsonbPayload(row.message)); + return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.sessionId)}', '${sqlStr(row.eventType)}', ${row.turnIndex}, '${sqlStr(row.diaId)}', '${sqlStr(row.speaker)}', '${sqlStr(row.text)}', '${sqlStr(row.turnSummary)}', '${sqlStr(row.sourceDateTime)}', '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; + }).join(", "); + return `INSERT INTO "${table}" (id, path, filename, message, session_id, event_type, turn_index, dia_id, speaker, text, turn_summary, source_date_time, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; +} +function coerceJsonbPayload(message) { + try { + return JSON.stringify(JSON.parse(message)); + } catch { + return JSON.stringify({ + type: "raw_message", + content: message + }); + } +} +function escapeJsonbLiteral(value) { + return value.replace(/'/g, "''").replace(/\0/g, ""); +} +async function flushSessionQueue(api, opts) { + const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; + const maxBatchRows = opts.maxBatchRows ?? DEFAULT_MAX_BATCH_ROWS; + const staleInflightMs = opts.staleInflightMs ?? DEFAULT_STALE_INFLIGHT_MS; + const waitIfBusyMs = opts.waitIfBusyMs ?? 0; + const drainAll = opts.drainAll ?? false; + mkdirSync3(queueDir, { recursive: true }); + const queuePath = getQueuePath(queueDir, opts.sessionId); + const inflightPath = getInflightPath(queueDir, opts.sessionId); + if (isSessionWriteDisabled(opts.sessionsTable, queueDir)) { + return existsSync4(queuePath) || existsSync4(inflightPath) ? { status: "disabled", rows: 0, batches: 0 } : { status: "empty", rows: 0, batches: 0 }; + } + let totalRows = 0; + let totalBatches = 0; + let flushedAny = false; + while (true) { + if (opts.allowStaleInflight) + recoverStaleInflight(queuePath, inflightPath, staleInflightMs); + if (existsSync4(inflightPath)) { + if (waitIfBusyMs > 0) { + await waitForInflightToClear(inflightPath, waitIfBusyMs); + if (opts.allowStaleInflight) + recoverStaleInflight(queuePath, inflightPath, staleInflightMs); + } + if (existsSync4(inflightPath)) { + return flushedAny ? { status: "flushed", rows: totalRows, batches: totalBatches } : { status: "busy", rows: 0, batches: 0 }; + } + } + if (!existsSync4(queuePath)) { + return flushedAny ? { status: "flushed", rows: totalRows, batches: totalBatches } : { status: "empty", rows: 0, batches: 0 }; + } + try { + renameSync(queuePath, inflightPath); + } catch (e) { + if (e?.code === "ENOENT") { + return flushedAny ? { status: "flushed", rows: totalRows, batches: totalBatches } : { status: "empty", rows: 0, batches: 0 }; + } + throw e; + } + try { + const { rows, batches } = await flushInflightFile(api, opts.sessionsTable, inflightPath, maxBatchRows); + totalRows += rows; + totalBatches += batches; + flushedAny = flushedAny || rows > 0; + } catch (e) { + requeueInflight(queuePath, inflightPath); + if (e instanceof SessionWriteDisabledError) { + return { status: "disabled", rows: totalRows, batches: totalBatches }; + } + throw e; + } + if (!drainAll) { + return { status: "flushed", rows: totalRows, batches: totalBatches }; + } + } +} +async function drainSessionQueues(api, opts) { + const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; + mkdirSync3(queueDir, { recursive: true }); + const sessionIds = listQueuedSessionIds(queueDir, opts.staleInflightMs ?? DEFAULT_STALE_INFLIGHT_MS); + let flushedSessions = 0; + let rows = 0; + let batches = 0; + for (const sessionId of sessionIds) { + const result = await flushSessionQueue(api, { + sessionId, + sessionsTable: opts.sessionsTable, + queueDir, + maxBatchRows: opts.maxBatchRows, + allowStaleInflight: true, + staleInflightMs: opts.staleInflightMs, + drainAll: true + }); + if (result.status === "flushed") { + flushedSessions += 1; + rows += result.rows; + batches += result.batches; + } + } + return { + queuedSessions: sessionIds.length, + flushedSessions, + rows, + batches + }; +} +function tryAcquireSessionDrainLock(sessionsTable, queueDir = DEFAULT_QUEUE_DIR, staleMs = DEFAULT_DRAIN_LOCK_STALE_MS) { + mkdirSync3(queueDir, { recursive: true }); + const lockPath = getSessionDrainLockPath(queueDir, sessionsTable); + for (let attempt = 0; attempt < 2; attempt++) { + try { + const fd = openSync(lockPath, "wx"); + closeSync(fd); + return () => rmSync(lockPath, { force: true }); + } catch (e) { + if (e?.code !== "EEXIST") + throw e; + if (existsSync4(lockPath) && isStale(lockPath, staleMs)) { + rmSync(lockPath, { force: true }); + continue; + } + return null; + } + } + return null; +} +function getQueuePath(queueDir, sessionId) { + return join5(queueDir, `${sessionId}.jsonl`); +} +function getInflightPath(queueDir, sessionId) { + return join5(queueDir, `${sessionId}.inflight`); +} +async function flushInflightFile(api, sessionsTable, inflightPath, maxBatchRows) { + const rows = readQueuedRows(inflightPath); + if (rows.length === 0) { + rmSync(inflightPath, { force: true }); + return { rows: 0, batches: 0 }; + } + let ensured = false; + let batches = 0; + const queueDir = dirname(inflightPath); + for (let i = 0; i < rows.length; i += maxBatchRows) { + const chunk = rows.slice(i, i + maxBatchRows); + const sql = buildSessionInsertSql(sessionsTable, chunk); + try { + await api.query(sql); + } catch (e) { + if (isSessionWriteAuthError(e)) { + markSessionWriteDisabled(sessionsTable, errorMessage(e), queueDir); + throw new SessionWriteDisabledError(errorMessage(e)); + } + if (!ensured && isEnsureSessionsTableRetryable(e)) { + try { + await api.ensureSessionsTable(sessionsTable); + } catch (ensureError) { + if (isSessionWriteAuthError(ensureError)) { + markSessionWriteDisabled(sessionsTable, errorMessage(ensureError), queueDir); + throw new SessionWriteDisabledError(errorMessage(ensureError)); + } + throw ensureError; + } + ensured = true; + try { + await api.query(sql); + } catch (retryError) { + if (isSessionWriteAuthError(retryError)) { + markSessionWriteDisabled(sessionsTable, errorMessage(retryError), queueDir); + throw new SessionWriteDisabledError(errorMessage(retryError)); + } + throw retryError; + } + } else { + throw e; + } + } + batches += 1; + } + clearSessionWriteDisabled(sessionsTable, queueDir); + rmSync(inflightPath, { force: true }); + return { rows: rows.length, batches }; +} +function readQueuedRows(path) { + const raw = readFileSync4(path, "utf-8"); + return raw.split("\n").map((line) => line.trim()).filter(Boolean).map((line) => JSON.parse(line)); +} +function requeueInflight(queuePath, inflightPath) { + if (!existsSync4(inflightPath)) + return; + const inflight = readFileSync4(inflightPath, "utf-8"); + appendFileSync2(queuePath, inflight); + rmSync(inflightPath, { force: true }); +} +function recoverStaleInflight(queuePath, inflightPath, staleInflightMs) { + if (!existsSync4(inflightPath) || !isStale(inflightPath, staleInflightMs)) + return; + requeueInflight(queuePath, inflightPath); +} +function isStale(path, staleInflightMs) { + return Date.now() - statSync(path).mtimeMs >= staleInflightMs; +} +function listQueuedSessionIds(queueDir, staleInflightMs) { + const sessionIds = /* @__PURE__ */ new Set(); + for (const name of readdirSync(queueDir)) { + if (name.endsWith(".jsonl")) { + sessionIds.add(name.slice(0, -".jsonl".length)); + } else if (name.endsWith(".inflight")) { + const path = join5(queueDir, name); + if (isStale(path, staleInflightMs)) { + sessionIds.add(name.slice(0, -".inflight".length)); + } + } + } + return [...sessionIds].sort(); +} +function isEnsureSessionsTableRetryable(error) { + const message = errorMessage(error).toLowerCase(); + return message.includes("does not exist") || message.includes("doesn't exist") || message.includes("relation") || message.includes("not found"); +} +function isSessionWriteAuthError(error) { + const message = errorMessage(error).toLowerCase(); + return message.includes("403") || message.includes("401") || message.includes("forbidden") || message.includes("unauthorized"); +} +function markSessionWriteDisabled(sessionsTable, reason, queueDir = DEFAULT_QUEUE_DIR) { + mkdirSync3(queueDir, { recursive: true }); + writeFileSync3(getSessionWriteDisabledPath(queueDir, sessionsTable), JSON.stringify({ + disabledAt: (/* @__PURE__ */ new Date()).toISOString(), + reason, + sessionsTable + })); +} +function clearSessionWriteDisabled(sessionsTable, queueDir = DEFAULT_QUEUE_DIR) { + rmSync(getSessionWriteDisabledPath(queueDir, sessionsTable), { force: true }); +} +function isSessionWriteDisabled(sessionsTable, queueDir = DEFAULT_QUEUE_DIR, ttlMs = DEFAULT_AUTH_FAILURE_TTL_MS) { + const path = getSessionWriteDisabledPath(queueDir, sessionsTable); + if (!existsSync4(path)) + return false; + try { + const raw = readFileSync4(path, "utf-8"); + const state = JSON.parse(raw); + const ageMs = Date.now() - new Date(state.disabledAt).getTime(); + if (Number.isNaN(ageMs) || ageMs >= ttlMs) { + rmSync(path, { force: true }); + return false; + } + return true; + } catch { + rmSync(path, { force: true }); + return false; + } +} +function getSessionWriteDisabledPath(queueDir, sessionsTable) { + return join5(queueDir, `.${sessionsTable}.disabled.json`); +} +function getSessionDrainLockPath(queueDir, sessionsTable) { + return join5(queueDir, `.${sessionsTable}.drain.lock`); +} +function errorMessage(error) { + return error instanceof Error ? error.message : String(error); +} +async function waitForInflightToClear(inflightPath, waitIfBusyMs) { + const startedAt = Date.now(); + while (existsSync4(inflightPath) && Date.now() - startedAt < waitIfBusyMs) { + await sleep2(BUSY_WAIT_STEP_MS); + } +} +function sleep2(ms) { + return new Promise((resolve2) => setTimeout(resolve2, ms)); +} + +// dist/src/hooks/version-check.js +import { existsSync as existsSync5, mkdirSync as mkdirSync4, readFileSync as readFileSync5, writeFileSync as writeFileSync4 } from "node:fs"; +import { dirname as dirname2, join as join6 } from "node:path"; +import { homedir as homedir5 } from "node:os"; +var DEFAULT_VERSION_CACHE_PATH = join6(homedir5(), ".deeplake", ".version-check.json"); +var DEFAULT_VERSION_CACHE_TTL_MS = 60 * 60 * 1e3; function getInstalledVersion(bundleDir, pluginManifestDir) { try { - const pluginJson = join5(bundleDir, "..", pluginManifestDir, "plugin.json"); - const plugin = JSON.parse(readFileSync4(pluginJson, "utf-8")); + const pluginJson = join6(bundleDir, "..", pluginManifestDir, "plugin.json"); + const plugin = JSON.parse(readFileSync5(pluginJson, "utf-8")); if (plugin.version) return plugin.version; } catch { } let dir = bundleDir; for (let i = 0; i < 5; i++) { - const candidate = join5(dir, "package.json"); + const candidate = join6(dir, "package.json"); try { - const pkg = JSON.parse(readFileSync4(candidate, "utf-8")); + const pkg = JSON.parse(readFileSync5(candidate, "utf-8")); if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) return pkg.version; } catch { } - const parent = dirname(dir); + const parent = dirname2(dir); if (parent === dir) break; dir = parent; } return null; } -async function getLatestVersion(timeoutMs = 3e3) { - try { - const res = await fetch(GITHUB_RAW_PKG, { signal: AbortSignal.timeout(timeoutMs) }); - if (!res.ok) - return null; - const pkg = await res.json(); - return pkg.version ?? null; - } catch { - return null; - } -} function isNewer(latest, current) { - const parse = (v) => v.split(".").map(Number); + const parse = (v) => v.replace(/-.*$/, "").split(".").map(Number); const [la, lb, lc] = parse(latest); const [ca, cb, cc] = parse(current); return la > ca || la === ca && lb > cb || la === ca && lb === cb && lc > cc; } - -// dist/src/utils/wiki-log.js -import { mkdirSync as mkdirSync3, appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join6 } from "node:path"; -function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { - const path = join6(hooksDir, filename); - return { - path, - log(msg) { - try { - mkdirSync3(hooksDir, { recursive: true }); - appendFileSync2(path, `[${utcTimestamp()}] ${msg} -`); - } catch { - } +function readVersionCache(cachePath = DEFAULT_VERSION_CACHE_PATH) { + if (!existsSync5(cachePath)) + return null; + try { + const parsed = JSON.parse(readFileSync5(cachePath, "utf-8")); + if (parsed && typeof parsed.checkedAt === "number" && typeof parsed.url === "string" && (typeof parsed.latest === "string" || parsed.latest === null)) { + return parsed; } - }; + } catch { + } + return null; +} +function writeVersionCache(entry, cachePath = DEFAULT_VERSION_CACHE_PATH) { + mkdirSync4(dirname2(cachePath), { recursive: true }); + writeFileSync4(cachePath, JSON.stringify(entry)); +} +function readFreshCachedLatestVersion(url, ttlMs = DEFAULT_VERSION_CACHE_TTL_MS, cachePath = DEFAULT_VERSION_CACHE_PATH, nowMs = Date.now()) { + const cached = readVersionCache(cachePath); + if (!cached || cached.url !== url) + return void 0; + if (nowMs - cached.checkedAt > ttlMs) + return void 0; + return cached.latest; +} +async function getLatestVersionCached(opts) { + const ttlMs = opts.ttlMs ?? DEFAULT_VERSION_CACHE_TTL_MS; + const cachePath = opts.cachePath ?? DEFAULT_VERSION_CACHE_PATH; + const nowMs = opts.nowMs ?? Date.now(); + const fetchImpl = opts.fetchImpl ?? fetch; + const fresh = readFreshCachedLatestVersion(opts.url, ttlMs, cachePath, nowMs); + if (fresh !== void 0) + return fresh; + const stale = readVersionCache(cachePath); + try { + const res = await fetchImpl(opts.url, { signal: AbortSignal.timeout(opts.timeoutMs) }); + const latest = res.ok ? (await res.json()).version ?? null : stale?.latest ?? null; + writeVersionCache({ + checkedAt: nowMs, + latest, + url: opts.url + }, cachePath); + return latest; + } catch { + const latest = stale?.latest ?? null; + writeVersionCache({ + checkedAt: nowMs, + latest, + url: opts.url + }, cachePath); + return latest; + } } // dist/src/hooks/codex/session-start-setup.js var log3 = (msg) => log("codex-session-setup", msg); -var __bundleDir = dirname2(fileURLToPath(import.meta.url)); -var { log: wikiLog } = makeWikiLogger(join7(homedir4(), ".codex", "hooks")); +var __bundleDir = dirname3(fileURLToPath2(import.meta.url)); +var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; +var VERSION_CHECK_TIMEOUT = 3e3; +var HOME = homedir6(); +var WIKI_LOG = join7(HOME, ".codex", "hooks", "deeplake-wiki.log"); +function wikiLog(msg) { + try { + mkdirSync5(join7(HOME, ".codex", "hooks"), { recursive: true }); + appendFileSync3(WIKI_LOG, `[${(/* @__PURE__ */ new Date()).toISOString().replace("T", " ").slice(0, 19)}] ${msg} +`); + } catch { + } +} async function createPlaceholder(api, table, sessionId, cwd, userName, orgName, workspaceId) { const summaryPath = `/summaries/${userName}/${sessionId}.md`; const existing = await api.query(`SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1`); @@ -499,7 +1115,7 @@ async function createPlaceholder(api, table, sessionId, cwd, userName, orgName, return; } const now = (/* @__PURE__ */ new Date()).toISOString(); - const projectName = cwd.split("/").pop() ?? "unknown"; + const projectName = cwd.split("/").pop() || "unknown"; const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; const content = [ `# Session ${sessionId}`, @@ -513,78 +1129,119 @@ async function createPlaceholder(api, table, sessionId, cwd, userName, orgName, await api.query(`INSERT INTO "${table}" (id, path, filename, summary, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(summaryPath)}', '${sqlStr(filename)}', E'${sqlStr(content)}', '${sqlStr(userName)}', 'text/markdown', ${Buffer.byteLength(content, "utf-8")}, '${sqlStr(projectName)}', 'in progress', 'codex', '${now}', '${now}')`); wikiLog(`SessionSetup: created placeholder for ${sessionId} (${cwd})`); } -async function main() { - if (process.env.HIVEMIND_WIKI_WORKER === "1") - return; - const input = await readStdin(); - const creds = loadCredentials(); +async function runCodexSessionStartSetup(input, deps = {}) { + const { wikiWorker = (process.env.HIVEMIND_WIKI_WORKER ?? process.env.DEEPLAKE_WIKI_WORKER) === "1", creds = loadCredentials(), saveCredentialsFn = saveCredentials, config = loadConfig(), createApi = (activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, activeConfig.tableName), captureEnabled = (process.env.HIVEMIND_CAPTURE ?? process.env.DEEPLAKE_CAPTURE) !== "false", drainSessionQueuesFn = drainSessionQueues, isSessionWriteDisabledFn = isSessionWriteDisabled, isSessionWriteAuthErrorFn = isSessionWriteAuthError, markSessionWriteDisabledFn = markSessionWriteDisabled, tryAcquireSessionDrainLockFn = tryAcquireSessionDrainLock, createPlaceholderFn = createPlaceholder, getInstalledVersionFn = getInstalledVersion, getLatestVersionCachedFn = getLatestVersionCached, isNewerFn = isNewer, execSyncFn = execSync2, logFn = log3, wikiLogFn = wikiLog } = deps; + if (wikiWorker) + return { status: "skipped" }; if (!creds?.token) { - log3("no credentials"); - return; + logFn("no credentials"); + return { status: "no_credentials" }; } if (!creds.userName) { try { const { userInfo: userInfo2 } = await import("node:os"); creds.userName = userInfo2().username ?? "unknown"; - saveCredentials(creds); - log3(`backfilled userName: ${creds.userName}`); + saveCredentialsFn(creds); + logFn(`backfilled userName: ${creds.userName}`); } catch { } } - const captureEnabled = process.env.HIVEMIND_CAPTURE !== "false"; - if (input.session_id) { + if (input.session_id && config) { try { - const config = loadConfig(); - if (config) { - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName); - await api.ensureTable(); - await api.ensureSessionsTable(config.sessionsTableName); - if (captureEnabled) { - await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + const api = createApi(config); + await api.ensureTable(); + if (captureEnabled) { + if (isSessionWriteDisabledFn(config.sessionsTableName)) { + logFn(`sessions table disabled, skipping setup for "${config.sessionsTableName}"`); + } else { + const releaseDrainLock = tryAcquireSessionDrainLockFn(config.sessionsTableName); + if (!releaseDrainLock) { + logFn(`sessions drain already in progress, skipping duplicate setup for "${config.sessionsTableName}"`); + } else { + try { + await api.ensureSessionsTable(config.sessionsTableName); + await api.ensureGraphNodesTable(config.graphNodesTableName); + await api.ensureGraphEdgesTable(config.graphEdgesTableName); + await api.ensureFactsTable(config.factsTableName); + await api.ensureEntitiesTable(config.entitiesTableName); + await api.ensureFactEntityLinksTable(config.factEntityLinksTableName); + const drain = await drainSessionQueuesFn(api, { + sessionsTable: config.sessionsTableName + }); + if (drain.flushedSessions > 0) { + logFn(`drained ${drain.flushedSessions} queued session(s), rows=${drain.rows}, batches=${drain.batches}`); + } + } catch (e) { + if (isSessionWriteAuthErrorFn(e)) { + markSessionWriteDisabledFn(config.sessionsTableName, e.message); + logFn(`sessions table unavailable, skipping setup: ${e.message}`); + } else { + throw e; + } + } finally { + releaseDrainLock(); + } + } } - log3("setup complete"); + await createPlaceholderFn(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); } + logFn("setup complete"); } catch (e) { - log3(`setup failed: ${e.message}`); - wikiLog(`SessionSetup: failed for ${input.session_id}: ${e.message}`); + logFn(`setup failed: ${e.message}`); + wikiLogFn(`SessionSetup: failed for ${input.session_id}: ${e.message}`); } } const autoupdate = creds.autoupdate !== false; try { - const current = getInstalledVersion(__bundleDir, ".codex-plugin"); + const current = getInstalledVersionFn(__bundleDir, ".codex-plugin"); if (current) { - const latest = await getLatestVersion(); - if (latest && isNewer(latest, current)) { + const latest = await getLatestVersionCachedFn({ + url: GITHUB_RAW_PKG, + timeoutMs: VERSION_CHECK_TIMEOUT + }); + if (latest && isNewerFn(latest, current)) { if (autoupdate) { - log3(`autoupdate: updating ${current} \u2192 ${latest}`); + logFn(`autoupdate: updating ${current} \u2192 ${latest}`); try { const tag = `v${latest}`; if (!/^v\d+\.\d+\.\d+$/.test(tag)) throw new Error(`unsafe version tag: ${tag}`); const findCmd = `INSTALL_DIR=""; CACHE_DIR=$(find ~/.codex/plugins/cache -maxdepth 3 -name "hivemind" -type d 2>/dev/null | head -1); if [ -n "$CACHE_DIR" ]; then INSTALL_DIR=$(ls -1d "$CACHE_DIR"/*/ 2>/dev/null | tail -1); elif [ -d ~/.codex/hivemind ]; then INSTALL_DIR=~/.codex/hivemind; fi; if [ -n "$INSTALL_DIR" ]; then TMPDIR=$(mktemp -d); git clone --depth 1 --branch ${tag} -q https://github.com/activeloopai/hivemind.git "$TMPDIR/hivemind" 2>/dev/null && cp -r "$TMPDIR/hivemind/codex/"* "$INSTALL_DIR/" 2>/dev/null; rm -rf "$TMPDIR"; fi`; - execSync2(findCmd, { stdio: "ignore", timeout: 6e4 }); + execSyncFn(findCmd, { stdio: "ignore", timeout: 6e4 }); process.stderr.write(`Hivemind auto-updated: ${current} \u2192 ${latest}. Restart Codex to apply. `); - log3(`autoupdate succeeded: ${current} \u2192 ${latest} (tag: ${tag})`); + logFn(`autoupdate succeeded: ${current} \u2192 ${latest} (tag: ${tag})`); } catch (e) { process.stderr.write(`Hivemind update available: ${current} \u2192 ${latest}. Auto-update failed. `); - log3(`autoupdate failed: ${e.message}`); + logFn(`autoupdate failed: ${e.message}`); } } else { process.stderr.write(`Hivemind update available: ${current} \u2192 ${latest}. `); - log3(`update available (autoupdate off): ${current} \u2192 ${latest}`); + logFn(`update available (autoupdate off): ${current} \u2192 ${latest}`); } } else { - log3(`version up to date: ${current}`); + logFn(`version up to date: ${current}`); } } } catch (e) { - log3(`version check failed: ${e.message}`); + logFn(`version check failed: ${e.message}`); } + return { status: "complete" }; } -main().catch((e) => { - log3(`fatal: ${e.message}`); - process.exit(0); -}); +async function main() { + const input = await readStdin(); + await runCodexSessionStartSetup(input); +} +if (isDirectRun(import.meta.url)) { + main().catch((e) => { + log3(`fatal: ${e.message}`); + process.exit(0); + }); +} +export { + createPlaceholder, + runCodexSessionStartSetup, + wikiLog +}; diff --git a/codex/bundle/session-start.js b/codex/bundle/session-start.js index fe5cfe1..1e29e19 100755 --- a/codex/bundle/session-start.js +++ b/codex/bundle/session-start.js @@ -2,7 +2,7 @@ // dist/src/hooks/codex/session-start.js import { spawn } from "node:child_process"; -import { fileURLToPath } from "node:url"; +import { fileURLToPath as fileURLToPath2 } from "node:url"; import { dirname as dirname2, join as join4 } from "node:path"; // dist/src/commands/auth.js @@ -24,13 +24,13 @@ function loadCredentials() { // dist/src/utils/stdin.js function readStdin() { - return new Promise((resolve, reject) => { + return new Promise((resolve2, reject) => { let data = ""; process.stdin.setEncoding("utf-8"); process.stdin.on("data", (chunk) => data += chunk); process.stdin.on("end", () => { try { - resolve(JSON.parse(data)); + resolve2(JSON.parse(data)); } catch (err) { reject(new Error(`Failed to parse hook input: ${err}`)); } @@ -52,9 +52,44 @@ function log(tag, msg) { `); } -// dist/src/utils/version-check.js -import { readFileSync as readFileSync2 } from "node:fs"; +// dist/src/utils/direct-run.js +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +function isDirectRun(metaUrl) { + const entry = process.argv[1]; + if (!entry) + return false; + try { + return resolve(fileURLToPath(metaUrl)) === resolve(entry); + } catch { + return false; + } +} + +// dist/src/utils/retrieval-mode.js +function isSessionsOnlyMode() { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isIndexDisabled() { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isFactsSessionsOnlyPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_FACTS_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_PSQL_FACTS_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + +// dist/src/hooks/version-check.js +import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync2, writeFileSync as writeFileSync2 } from "node:fs"; import { dirname, join as join3 } from "node:path"; +import { homedir as homedir3 } from "node:os"; +var DEFAULT_VERSION_CACHE_PATH = join3(homedir3(), ".deeplake", ".version-check.json"); +var DEFAULT_VERSION_CACHE_TTL_MS = 60 * 60 * 1e3; function getInstalledVersion(bundleDir, pluginManifestDir) { try { const pluginJson = join3(bundleDir, "..", pluginManifestDir, "plugin.json"); @@ -82,27 +117,198 @@ function getInstalledVersion(bundleDir, pluginManifestDir) { // dist/src/hooks/codex/session-start.js var log2 = (msg) => log("codex-session-start", msg); -var __bundleDir = dirname2(fileURLToPath(import.meta.url)); +var __bundleDir = dirname2(fileURLToPath2(import.meta.url)); var AUTH_CMD = join4(__bundleDir, "commands", "auth-login.js"); -var context = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. +var CODEX_SESSION_START_CONTEXT = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. -Structure: index.md (start here) \u2192 summaries/*.md \u2192 sessions/*.jsonl (last resort). Do NOT jump straight to JSONL. +Structure: index.md (start here) \u2192 summaries/*.md \u2192 sessions/{author}/* (last resort). Do NOT jump straight to raw session files. +When index.md identifies a likely match, read that exact summary or session path directly before broader grep variants. +If index.md already points to likely candidate files, open those exact files before broader synonym greps or wide exploratory scans. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots for Deeplake recall tasks. +TEMPORAL GROUNDING: If a summary or transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a summary only gives a relative time, open the linked source session and use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked index.md plus at least one likely summary or raw session file for the named person. If keyword grep is empty, grep the person's name alone and inspect the candidate files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, summaries may omit the exact phrase. If likely summaries are ambiguous, read the candidate raw session transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." Search: grep -r "keyword" ~/.deeplake/memory/ IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters \u2014 they are not available in the memory filesystem. Do NOT spawn subagents to read deeplake memory.`; -async function main() { - if (process.env.HIVEMIND_WIKI_WORKER === "1") - return; - const input = await readStdin(); - const creds = loadCredentials(); - if (!creds?.token) { - log2("no credentials found \u2014 run auth login to authenticate"); - } else { - log2(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); - } +var CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. + +SESSIONS-ONLY mode is active for benchmark comparison. Available Deeplake recall paths are raw session files under sessions/{author}/*. +Do NOT start with index.md or summaries in this mode, and do NOT assume those paths exist. +Open the most likely session file directly before broader grep variants. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots for Deeplake recall tasks. +TEMPORAL GROUNDING: If a transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a session only gives a relative time, use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely raw session file for the named person. If keyword grep is empty, grep the person's name alone and inspect the candidate session files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, raw sessions may contain the exact phrase even when broad keyword grep looks sparse. Read the candidate transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." +Search: grep -r "keyword" ~/.deeplake/memory/ +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters \u2014 they are not available in the memory filesystem. +Do NOT spawn subagents to read deeplake memory.`; +var CODEX_SESSION_START_CONTEXT_NO_INDEX = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. + +Structure in this mode: summaries/*.md \u2192 sessions/{author}/* (last resort). /index.md is intentionally unavailable, so do NOT read it or rely on it. +Start by grepping summaries for the named person, topic, or keyword. Then open the specific matching summaries. Only read raw session files if summaries do not contain the exact detail. +If a summary points to a likely source session, read that exact raw session before broader grep variants. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots for Deeplake recall tasks. +TEMPORAL GROUNDING: If a summary or transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a summary only gives a relative time, open the linked source session and use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely summary plus one likely raw session file when the summary is ambiguous. If keyword grep is empty, grep the person's name alone and inspect the candidate files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, summaries may omit the exact phrase. If likely summaries are ambiguous, read the candidate raw session transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." +Search: grep -r "keyword" ~/.deeplake/memory/ +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters \u2014 they are not available in the memory filesystem. +Do NOT spawn subagents to read deeplake memory.`; +var CODEX_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: Use SQL only for Deeplake recall tasks. + +Available tables: +- memory(path, summary, project, description, creation_date, last_update_date) +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +Workflow: +1. Query memory first to identify likely summaries and sessions. +2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. +3. Graph-backed entity and relation resolution is applied automatically behind the scenes to narrow likely sessions before memory/sessions queries run. You do not need to query graph tables manually for normal recall. +3a. For stable person/project/place facts, use memory_facts first. Use memory_entities to resolve aliases or canonical names, then join through fact_entity_links when you need all facts connected to an entity. +4. Re-query memory by exact path for the small candidate set you selected. +5. Query sessions by exact path for transcript evidence or unresolved dates. +6. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +7. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +8. If the first literal query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory.summary. +9. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +10. If a summary, node, or edge answer is vague or relative, immediately open the linked sessions rows and convert it to the most concrete answer supported there. +11. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over paraphrased summary labels. +12. When memory_entities resolves a canonical entity, use fact_entity_links to expand the connected facts before deciding the fact layer is sparse. +13. For identity or relationship questions, prefer the narrowest explicit self-label or status label over broader biography or community descriptions. +14. For "when" questions, if the best evidence is already phrased relative to another dated event, return that relative phrase instead of inventing a different absolute date. +15. For list/profile questions, return a minimal comma-separated set of directly supported items. Do not pad the answer with adjacent hobbies, events, or explanations. +16. For artifact/title questions such as books, talks, projects, or artworks, prefer exact titled objects from facts or transcript over generic phrases like "a book" or "a speech". + +Good query patterns: +- Candidate summaries: + psql -At -F '|' -c "SELECT path, summary, creation_date FROM memory WHERE summary ILIKE '%%' AND (summary ILIKE '%%' OR summary ILIKE '%%') ORDER BY creation_date DESC LIMIT 5" +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by entity: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" +- Exact summary reread: + psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" +- If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: + psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" +- If graph entity lookup is sparse or semantically weak, retry with BM25 on graph nodes: + psql -At -F '|' -c "SELECT node_id, canonical_name, node_type, summary, source_session_id, source_path, search_text <#> ' ' AS score FROM graph_nodes ORDER BY score DESC LIMIT 5" + +Avoid these mistakes: +- Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT stop at graph rows alone when the question asks for exact wording or time grounding. Use graph rows to narrow the search, then open the linked sessions. +- Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. +- Do NOT turn a short list question into a narrative list of loosely related activities. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Resolve relative dates against session metadata, not today's date. +- Do not answer "not found" until you have checked both memory and a likely sessions row. +- Preserve direct relative-duration answers when they already match the question. +- If the transcript already directly answers with a relative duration like "10 years ago", return that phrase instead of recalculating to today's date. +- If the transcript or fact row says something like "the week before June 9, 2023", return that phrase instead of converting it to June 9, 2023. +- If a summary says something vague like "home country", search sessions for the exact named place before answering. +- Aggregate across the small candidate set before answering profile or list questions. +- For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. + +Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. For normal recall, query memory_facts for distilled claims, memory_entities for canonical names, and sessions for exact grounding; graph-based restriction is applied automatically where relevant. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode.`; +var CODEX_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY = `DEEPLAKE MEMORY SQL MODE: Use SQL only for Deeplake recall tasks. + +Available tables: +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) + +The summary and graph tables are intentionally unavailable in this mode. Treat them as if they do not exist. + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +Workflow: +1. Resolve the named person, project, place, or organization with memory_entities. +2. Expand connected facts through fact_entity_links and memory_facts. +3. Use memory_facts to identify the small set of likely source sessions. +4. Use sessions for transcript grounding and final answer verification. +5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +7. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +8. Sessions are the source of truth. Facts are only a helper index and synthesis layer. +9. Facts are for narrowing and aggregation; sessions are for the final exact answer. + +Good query patterns: +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by entity: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id, source_path FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary, f.source_session_id, f.source_path FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" + +Avoid these mistakes: +- Do NOT query memory, graph_nodes, or graph_edges in this mode. +- Do NOT answer directly from memory_facts.summary, memory_entities.summary, or aliases when a relevant transcript row is available. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Sessions win over facts if they differ in detail or specificity. +- Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. +- Do not answer "not found" until you have checked both the fact layer and a likely sessions row. + +Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. Do NOT use python, python3, node, curl, filesystem paths, memory, or graph tables for recall in this mode.`; +function buildCodexSessionStartContext(args) { + const versionNotice = args.currentVersion ? ` +Hivemind v${args.currentVersion}` : ""; + const template = isPsqlMode() ? isFactsSessionsOnlyPsqlMode() ? CODEX_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY : CODEX_SESSION_START_CONTEXT_PSQL : isSessionsOnlyMode() ? CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY : isIndexDisabled() ? CODEX_SESSION_START_CONTEXT_NO_INDEX : CODEX_SESSION_START_CONTEXT; + return args.creds?.token ? `${template} +Logged in to Deeplake as org: ${args.creds.orgName ?? args.creds.orgId} (workspace: ${args.creds.workspaceId ?? "default"})${versionNotice}` : `${template} +Not logged in to Deeplake. Run: node "${args.authCommand}" login${versionNotice}`; +} +async function runCodexSessionStartHook(input, deps = {}) { + const { wikiWorker = (process.env.HIVEMIND_WIKI_WORKER ?? process.env.DEEPLAKE_WIKI_WORKER) === "1", creds = loadCredentials(), spawnFn = spawn, currentVersion = getInstalledVersion(__bundleDir, ".codex-plugin"), authCommand = AUTH_CMD, setupScript = join4(__bundleDir, "session-start-setup.js"), logFn = log2 } = deps; + if (wikiWorker) + return null; + if (!creds?.token) + logFn("no credentials found \u2014 run auth login to authenticate"); + else + logFn(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); if (creds?.token) { - const setupScript = join4(__bundleDir, "session-start-setup.js"); - const child = spawn("node", [setupScript], { + const child = spawnFn("node", [setupScript], { detached: true, stdio: ["pipe", "ignore", "ignore"], env: { ...process.env } @@ -110,20 +316,32 @@ async function main() { child.stdin?.write(JSON.stringify(input)); child.stdin?.end(); child.unref(); - log2("spawned async setup process"); - } - let versionNotice = ""; - const current = getInstalledVersion(__bundleDir, ".codex-plugin"); - if (current) { - versionNotice = ` -Hivemind v${current}`; + logFn("spawned async setup process"); } - const additionalContext = creds?.token ? `${context} -Logged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${creds.workspaceId ?? "default"})${versionNotice}` : `${context} -Not logged in to Deeplake. Run: node "${AUTH_CMD}" login${versionNotice}`; - console.log(additionalContext); + return buildCodexSessionStartContext({ + creds, + currentVersion, + authCommand + }); +} +async function main() { + const input = await readStdin(); + const output = await runCodexSessionStartHook(input); + if (output) + console.log(output); +} +if (isDirectRun(import.meta.url)) { + main().catch((e) => { + log2(`fatal: ${e.message}`); + process.exit(0); + }); } -main().catch((e) => { - log2(`fatal: ${e.message}`); - process.exit(0); -}); +export { + CODEX_SESSION_START_CONTEXT, + CODEX_SESSION_START_CONTEXT_NO_INDEX, + CODEX_SESSION_START_CONTEXT_PSQL, + CODEX_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY, + CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY, + buildCodexSessionStartContext, + runCodexSessionStartHook +}; diff --git a/codex/bundle/shell/deeplake-shell.js b/codex/bundle/shell/deeplake-shell.js index 0793149..b627405 100755 --- a/codex/bundle/shell/deeplake-shell.js +++ b/codex/bundle/shell/deeplake-shell.js @@ -66735,12 +66735,12 @@ function loadConfig() { return null; } } - const env2 = process.env; - if (!env2.HIVEMIND_TOKEN && env2.DEEPLAKE_TOKEN) { + const env3 = process.env; + if (!env3.HIVEMIND_TOKEN && env3.DEEPLAKE_TOKEN) { process.stderr.write("[hivemind] DEEPLAKE_* env vars are deprecated; use HIVEMIND_* instead\n"); } - const token = env2.HIVEMIND_TOKEN ?? env2.DEEPLAKE_TOKEN ?? creds?.token; - const orgId = env2.HIVEMIND_ORG_ID ?? env2.DEEPLAKE_ORG_ID ?? creds?.orgId; + const token = env3.HIVEMIND_TOKEN ?? env3.DEEPLAKE_TOKEN ?? creds?.token; + const orgId = env3.HIVEMIND_ORG_ID ?? env3.DEEPLAKE_ORG_ID ?? creds?.orgId; if (!token || !orgId) return null; return { @@ -66748,11 +66748,16 @@ function loadConfig() { orgId, orgName: creds?.orgName ?? orgId, userName: creds?.userName || userInfo().username || "unknown", - workspaceId: env2.HIVEMIND_WORKSPACE_ID ?? env2.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", - apiUrl: env2.HIVEMIND_API_URL ?? env2.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", - tableName: env2.HIVEMIND_TABLE ?? env2.DEEPLAKE_TABLE ?? "memory", - sessionsTableName: env2.HIVEMIND_SESSIONS_TABLE ?? env2.DEEPLAKE_SESSIONS_TABLE ?? "sessions", - memoryPath: env2.HIVEMIND_MEMORY_PATH ?? env2.DEEPLAKE_MEMORY_PATH ?? join4(home, ".deeplake", "memory") + workspaceId: env3.HIVEMIND_WORKSPACE_ID ?? env3.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", + apiUrl: env3.HIVEMIND_API_URL ?? env3.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", + tableName: env3.HIVEMIND_TABLE ?? env3.DEEPLAKE_TABLE ?? "memory", + sessionsTableName: env3.HIVEMIND_SESSIONS_TABLE ?? env3.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env3.HIVEMIND_GRAPH_NODES_TABLE ?? env3.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env3.HIVEMIND_GRAPH_EDGES_TABLE ?? env3.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env3.HIVEMIND_FACTS_TABLE ?? env3.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env3.HIVEMIND_ENTITIES_TABLE ?? env3.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env3.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env3.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", + memoryPath: env3.HIVEMIND_MEMORY_PATH ?? env3.DEEPLAKE_MEMORY_PATH ?? join4(home, ".deeplake", "memory") }; } @@ -66799,6 +66804,22 @@ function traceSql(msg) { if (debugFileLog) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -66902,10 +66923,10 @@ var DeeplakeApi = class { }); } catch (e6) { if (isTimeoutError(e6)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e6 }); throw lastError; } - lastError = e6 instanceof Error ? e6 : new Error(String(e6)); + lastError = e6 instanceof Error ? new DeeplakeQueryError(e6.message, { sql, cause: e6 }) : new DeeplakeQueryError(String(e6), { sql, cause: e6 }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -66928,9 +66949,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -66987,6 +67012,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e6) { + if (isDuplicateIndexError(e6)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e6; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -67084,25 +67132,544 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); + } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); } }; // dist/src/shell/deeplake-fs.js -import { basename as basename4, posix } from "node:path"; +import { basename as basename5, posix } from "node:path"; import { randomUUID as randomUUID2 } from "node:crypto"; +// dist/src/embeddings/harrier.js +import { AutoModel, AutoTokenizer, LogLevel, env } from "@huggingface/transformers"; +var DEFAULT_MODEL_ID = "onnx-community/harrier-oss-v1-0.6b-ONNX"; +var DEFAULT_DOCUMENT_BATCH_SIZE = 8; +var DEFAULT_MAX_LENGTH = 32768; +function toNumber(value) { + return typeof value === "bigint" ? Number(value) : Number(value ?? 0); +} +function tensorToRows(tensor) { + const [batchSize, width] = tensor.dims; + const rows = []; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + const offset = batchIndex * width; + const row = []; + for (let hiddenIndex = 0; hiddenIndex < width; hiddenIndex++) { + row.push(Number(tensor.data[offset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function l2Normalize(rows) { + return rows.map((row) => { + let sumSquares = 0; + for (const value of row) + sumSquares += value * value; + const norm = Math.sqrt(sumSquares) || 1; + return row.map((value) => value / norm); + }); +} +function lastTokenPool(outputs, attentionMask) { + const [batchSize, sequenceLength, hiddenSize] = outputs.dims; + const rows = []; + const maskData = attentionMask.data; + const hiddenData = outputs.data; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + let lastTokenIndex = sequenceLength - 1; + for (let tokenIndex = sequenceLength - 1; tokenIndex >= 0; tokenIndex--) { + const maskOffset = batchIndex * sequenceLength + tokenIndex; + if (toNumber(maskData[maskOffset]) > 0) { + lastTokenIndex = tokenIndex; + break; + } + } + const row = []; + const hiddenOffset = (batchIndex * sequenceLength + lastTokenIndex) * hiddenSize; + for (let hiddenIndex = 0; hiddenIndex < hiddenSize; hiddenIndex++) { + row.push(Number(hiddenData[hiddenOffset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function formatQuery(task, query) { + return `Instruct: ${task} +Query: ${query}`; +} +var HarrierEmbedder = class { + modelId; + tokenizerPromise = null; + modelPromise = null; + options; + constructor(options = {}) { + this.modelId = options.modelId ?? DEFAULT_MODEL_ID; + this.options = { + ...options, + maxLength: options.maxLength ?? DEFAULT_MAX_LENGTH, + batchSize: options.batchSize ?? DEFAULT_DOCUMENT_BATCH_SIZE + }; + if (options.cacheDir) + env.cacheDir = options.cacheDir; + if (options.localModelPath) + env.localModelPath = options.localModelPath; + env.logLevel = LogLevel.ERROR; + } + async embedDocuments(texts) { + return this.embedInternal(texts); + } + async embedQueries(texts, options = {}) { + const task = options.task ?? "Given a user query, retrieve relevant memory rows and session events"; + return this.embedInternal(texts.map((text) => formatQuery(task, text))); + } + async load() { + if (!this.tokenizerPromise) { + this.tokenizerPromise = AutoTokenizer.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly + }); + } + if (!this.modelPromise) { + this.modelPromise = AutoModel.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly, + device: this.options.device ?? "cpu", + dtype: this.options.dtype + }); + } + const [tokenizer, model] = await Promise.all([this.tokenizerPromise, this.modelPromise]); + return { tokenizer, model }; + } + async embedInternal(texts) { + if (texts.length === 0) + return []; + const { tokenizer, model } = await this.load(); + const rows = []; + for (let start = 0; start < texts.length; start += this.options.batchSize) { + const batch = texts.slice(start, start + this.options.batchSize); + const inputs = tokenizer(batch, { + padding: true, + truncation: true, + max_length: this.options.maxLength + }); + const outputs = await model(inputs); + const sentenceEmbedding = outputs["sentence_embedding"]; + if (sentenceEmbedding && typeof sentenceEmbedding === "object" && sentenceEmbedding !== null) { + rows.push(...l2Normalize(tensorToRows(sentenceEmbedding))); + continue; + } + const lastHiddenState = outputs["last_hidden_state"]; + const attentionMask = inputs["attention_mask"]; + if (!lastHiddenState || typeof lastHiddenState !== "object" || !attentionMask || typeof attentionMask !== "object") { + throw new Error(`Harrier model "${this.modelId}" did not return a usable embedding tensor`); + } + rows.push(...l2Normalize(lastTokenPool(lastHiddenState, attentionMask))); + } + return rows; + } +}; + +// dist/src/utils/hybrid-fusion.js +function coerceFinite(value) { + return Number.isFinite(value) ? value : 0; +} +function normalizeWeights(vectorWeight, textWeight) { + const safeVector = Math.max(0, coerceFinite(vectorWeight)); + const safeText = Math.max(0, coerceFinite(textWeight)); + const total = safeVector + safeText; + if (total <= 0) + return { vectorWeight: 0.5, textWeight: 0.5 }; + return { + vectorWeight: safeVector / total, + textWeight: safeText / total + }; +} +function softmaxNormalizeScores(scores) { + if (scores.length === 0) + return []; + const safeScores = scores.map(coerceFinite); + const maxScore = Math.max(...safeScores); + const exps = safeScores.map((score) => Math.exp(score - maxScore)); + const sum = exps.reduce((acc, value) => acc + value, 0) || 1; + return exps.map((value) => value / sum); +} +function pickPreferredRow(existing, candidate) { + if (!existing) + return candidate; + if (candidate.score > existing.score) + return candidate; + if (candidate.score < existing.score) + return existing; + if (candidate.sourceOrder < existing.sourceOrder) + return candidate; + if (candidate.sourceOrder > existing.sourceOrder) + return existing; + if (candidate.creationDate < existing.creationDate) + return candidate; + if (candidate.creationDate > existing.creationDate) + return existing; + return candidate.path < existing.path ? candidate : existing; +} +function dedupeBestRows(rows) { + const bestByPath = /* @__PURE__ */ new Map(); + for (const row of rows) { + if (!row.path) + continue; + bestByPath.set(row.path, pickPreferredRow(bestByPath.get(row.path), row)); + } + return [...bestByPath.values()]; +} +function fuseRetrievalRows(args) { + const { textRows, vectorRows, limit } = args; + const { textWeight, vectorWeight } = normalizeWeights(args.vectorWeight, args.textWeight); + const dedupedTextRows = dedupeBestRows(textRows); + const dedupedVectorRows = dedupeBestRows(vectorRows); + const textNorm = softmaxNormalizeScores(dedupedTextRows.map((row) => row.score)); + const vectorNorm = softmaxNormalizeScores(dedupedVectorRows.map((row) => row.score)); + const fusedByPath = /* @__PURE__ */ new Map(); + for (let i11 = 0; i11 < dedupedTextRows.length; i11++) { + const row = dedupedTextRows[i11]; + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: textNorm[i11] ?? 0, + vectorScore: 0, + fusedScore: textWeight * (textNorm[i11] ?? 0) + }); + } + for (let i11 = 0; i11 < dedupedVectorRows.length; i11++) { + const row = dedupedVectorRows[i11]; + const existing = fusedByPath.get(row.path); + const vectorScore = vectorNorm[i11] ?? 0; + if (existing) { + if (existing.content.length === 0 && row.content.length > 0) + existing.content = row.content; + existing.sourceOrder = Math.min(existing.sourceOrder, row.sourceOrder); + if (!existing.creationDate || row.creationDate < existing.creationDate) + existing.creationDate = row.creationDate; + existing.vectorScore = vectorScore; + existing.fusedScore = textWeight * existing.textScore + vectorWeight * existing.vectorScore; + continue; + } + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: 0, + vectorScore, + fusedScore: vectorWeight * vectorScore + }); + } + return [...fusedByPath.values()].sort((a15, b26) => b26.fusedScore - a15.fusedScore || b26.vectorScore - a15.vectorScore || b26.textScore - a15.textScore || a15.sourceOrder - b26.sourceOrder || a15.creationDate.localeCompare(b26.creationDate) || a15.path.localeCompare(b26.path)).slice(0, Math.max(0, limit)); +} + +// dist/src/utils/retrieval-mode.js +function isSessionsOnlyMode() { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function getGrepRetrievalMode() { + const raw = (process.env["HIVEMIND_GREP_RETRIEVAL_MODE"] ?? process.env["DEEPLAKE_GREP_RETRIEVAL_MODE"] ?? "").trim().toLowerCase(); + if (raw === "embedding" || raw === "hybrid") + return raw; + return "classic"; +} +function isIndexDisabled() { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isSummaryBm25Disabled() { + const raw = process.env["HIVEMIND_DISABLE_SUMMARY_BM25"] ?? process.env["DEEPLAKE_DISABLE_SUMMARY_BM25"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + // dist/src/shell/grep-core.js +var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); +var DEFAULT_EMBED_RETRIEVAL_MODEL_ID = "onnx-community/harrier-oss-v1-270m-ONNX"; +var DEFAULT_HYBRID_VECTOR_WEIGHT = 0.7; +var DEFAULT_HYBRID_TEXT_WEIGHT = 0.3; +var retrievalEmbedder = null; +function envString(...names) { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) + return value; + } + return void 0; +} +function envFlag(...names) { + const raw = envString(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} +function envNumber(fallback, ...names) { + const raw = envString(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} +function getRetrievalEmbedder() { + if (!retrievalEmbedder) { + retrievalEmbedder = new HarrierEmbedder({ + modelId: envString("HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", "HIVEMIND_HARRIER_MODEL_ID", "DEEPLAKE_HARRIER_MODEL_ID") ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID, + device: envString("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY") + }); + } + return retrievalEmbedder; +} +function sqlFloat4Array(values) { + if (values.length === 0) + throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) + throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} +function escapeRegexLiteral(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function normalizeGrepRegexPattern(pattern) { + return pattern.replace(/\\([|(){}+?])/g, "$1").replace(/\\/g, "\\b"); +} var TOOL_INPUT_FIELDS = [ "command", "file_path", @@ -67265,24 +67832,9 @@ function normalizeContent(path2, raw) { } catch { return raw; } - if (Array.isArray(obj.turns)) { - const header = []; - if (obj.date_time) - header.push(`date: ${obj.date_time}`); - if (obj.speakers) { - const s10 = obj.speakers; - const names = [s10.speaker_a, s10.speaker_b].filter(Boolean).join(", "); - if (names) - header.push(`speakers: ${names}`); - } - const lines = obj.turns.map((t6) => { - const sp = String(t6?.speaker ?? t6?.name ?? "?").trim(); - const tx = String(t6?.text ?? t6?.content ?? "").replace(/\s+/g, " ").trim(); - const tag = t6?.dia_id ? `[${t6.dia_id}] ` : ""; - return `${tag}${sp}: ${tx}`; - }); - const out2 = [...header, ...lines].join("\n"); - return out2.trim() ? out2 : raw; + if (Array.isArray(obj.turns) || Array.isArray(obj.dialogue)) { + return `${JSON.stringify(obj, null, 2)} +`; } const stripRecalled = (t6) => { const i11 = t6.indexOf(""); @@ -67326,14 +67878,70 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; - const limit = opts.limit ?? 100; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, queryText, bm25QueryText } = opts; + const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; - const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); - const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); - const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; - const rows = await api.query(`SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`); + const ignoreCase = likeOp === "ILIKE"; + const likeMemFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const likeSessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const regexMemFilter = regexPattern ? buildRegexFilter("summary::text", regexPattern, ignoreCase) : ""; + const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; + const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; + const fallbackSessFilter = likeSessFilter; + const sessionsOnly = isSessionsOnlyMode(); + const retrievalMode = getGrepRetrievalMode(); + const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const lexicalQueryText = (bm25QueryText ?? semanticQueryText).trim(); + const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; + const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; + const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const ensureSummaryBm25Index = api.ensureSummaryBm25Index; + if ((useSummaryBm25 || useHybridRetrieval && !sessionsOnly && lexicalQueryText.length > 0) && typeof ensureSummaryBm25Index === "function") { + await ensureSummaryBm25Index.call(api, memoryTable).catch(() => { + }); + } + const buildCombinedQuery = (memFilter, sessFilter, useBm25Summary = false) => { + const memQuery = useBm25Summary ? buildSummaryBm25Query(memoryTable, pathFilter, bm25QueryText ?? "", limit) : `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; + if (useEmbeddingRetrieval || useHybridRetrieval) { + const embedder = getRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([semanticQueryText]); + if (!queryEmbedding) + throw new Error("Failed to build query embedding"); + const queryVectorSql = sqlFloat4Array(queryEmbedding); + const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); + const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); + const vectorQuery = buildScoredCombinedQuery(sessionsOnly, buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit), buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit), limit); + if (!useHybridRetrieval) { + const rows2 = await api.query(vectorQuery); + return rows2.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); + } + const lexicalQuery = buildScoredCombinedQuery(sessionsOnly, buildBm25SimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildBm25SimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const lexicalFallbackQuery = buildScoredCombinedQuery(sessionsOnly, buildHeuristicLexicalQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildHeuristicLexicalQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorQuery), + api.query(lexicalQuery).catch(() => api.query(lexicalFallbackQuery)) + ]); + return fuseRetrievalRows({ + textRows: mapScoredRows(textRows), + vectorRows: mapScoredRows(vectorRows), + textWeight, + vectorWeight, + limit + }).map((row) => ({ + path: row.path, + content: row.content + })); + } + const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; + const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); + const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); + const rows = useSummaryBm25 ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); return rows.map((row) => ({ path: String(row["path"]), content: String(row["content"] ?? "") @@ -67364,6 +67972,10 @@ function extractRegexLiteralPrefilter(pattern) { const next = pattern[i11 + 1]; if (!next) return null; + if (/[bByYmM<>]/.test(next)) { + i11++; + continue; + } if (/[dDsSwWbBAZzGkKpP]/.test(next)) return null; current += next; @@ -67390,13 +68002,14 @@ function extractRegexLiteralPrefilter(pattern) { return literal.length >= 2 ? literal : null; } function extractRegexAlternationPrefilters(pattern) { - if (!pattern.includes("|")) + const unwrapped = unwrapWholeRegexGroup(pattern); + if (!unwrapped.includes("|")) return null; const parts = []; let current = ""; let escaped = false; - for (let i11 = 0; i11 < pattern.length; i11++) { - const ch = pattern[i11]; + for (let i11 = 0; i11 < unwrapped.length; i11++) { + const ch = unwrapped[i11]; if (escaped) { current += `\\${ch}`; escaped = false; @@ -67424,33 +68037,201 @@ function extractRegexAlternationPrefilters(pattern) { return literals.length > 0 ? literals : null; } function buildGrepSearchOptions(params, targetPath) { - const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); - const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; - const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(normalizedPattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; + const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const queryText = (bm25QueryText ?? normalizedPattern.trim()) || void 0; + const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; + const sqlRegexPattern = params.wordMatch ? `\\b(?:${regexBase})\\b` : hasRegexMeta ? regexBase : void 0; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), + regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + queryText, + bm25QueryText: bm25QueryText ?? void 0, + limit: DEFAULT_GREP_CANDIDATE_LIMIT }; } +function buildSummaryBm25QueryText(pattern, fixedString, literalPrefilter, alternationPrefilters) { + const rawTokens = alternationPrefilters && alternationPrefilters.length > 0 ? alternationPrefilters : literalPrefilter ? [literalPrefilter] : [pattern]; + const cleaned = [...new Set(rawTokens.flatMap((token) => token.replace(/\\b/g, " ").replace(/[.*+?^${}()[\]{}|\\]/g, " ").split(/\s+/)).map((token) => token.trim()).filter((token) => token.length >= 2))]; + if (cleaned.length === 0) { + return fixedString && pattern.trim().length >= 2 ? pattern.trim() : null; + } + return cleaned.join(" "); +} function buildContentFilter(column, likeOp, patterns) { + const predicate = buildContentPredicate(column, likeOp, patterns); + return predicate ? ` AND ${predicate}` : ""; +} +function buildRegexFilter(column, pattern, ignoreCase) { + const predicate = buildRegexPredicate(column, pattern, ignoreCase); + return predicate ? ` AND ${predicate}` : ""; +} +function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { + return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; +} +function buildEmbeddingSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (embedding <#> ${queryVectorSql}) AS score FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY score DESC LIMIT ${limit}`; +} +function buildBm25SimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${contentExpr} <#> '${sqlStr(queryText)}') AS score FROM "${tableName}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT ${limit}`; +} +function buildHeuristicLexicalQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + const terms = [...new Set(queryText.split(/\s+/).map((term) => term.trim()).filter((term) => term.length >= 2))].slice(0, 8); + const clauses = terms.map((term) => `${contentExpr} ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...terms.map((term) => `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(queryText)}%' THEN ${Math.max(1, Math.min(terms.length, 4))} ELSE 0 END` + ]; + const scoreExpr = scoreTerms.join(" + "); + const where = clauses.length > 0 ? ` AND (${clauses.join(" OR ")})` : ""; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${scoreExpr})::float AS score FROM "${tableName}" WHERE 1=1${pathFilter}${where} ORDER BY score DESC LIMIT ${limit}`; +} +function buildScoredCombinedQuery(sessionsOnly, memQuery, sessQuery, limit) { + return sessionsOnly ? `SELECT path, content, source_order, creation_date, score FROM (${sessQuery}) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}` : `SELECT path, content, source_order, creation_date, score FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}`; +} +function mapScoredRows(rows) { + return rows.map((row) => ({ + path: String(row["path"] ?? ""), + content: String(row["content"] ?? ""), + sourceOrder: Number(row["source_order"] ?? 0), + creationDate: String(row["creation_date"] ?? ""), + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0 + })); +} +function toSqlRegexPattern(pattern, _ignoreCase) { + if (!pattern) + return null; + try { + new RegExp(pattern); + return translateRegexPatternToSql(pattern); + } catch { + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + } +} +function unwrapWholeRegexGroup(pattern) { + if (!pattern.startsWith("(") || !pattern.endsWith(")")) + return pattern; + let depth = 0; + let escaped = false; + for (let i11 = 0; i11 < pattern.length; i11++) { + const ch = pattern[i11]; + if (escaped) { + escaped = false; + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "(") + depth++; + if (ch === ")") { + depth--; + if (depth === 0 && i11 !== pattern.length - 1) + return pattern; + } + } + if (depth !== 0) + return pattern; + if (pattern.startsWith("(?:")) + return pattern.slice(3, -1); + return pattern.slice(1, -1); +} +function translateRegexPatternToSql(pattern) { + let out = ""; + for (let i11 = 0; i11 < pattern.length; i11++) { + const ch = pattern[i11]; + if (ch === "\\") { + const next = pattern[i11 + 1]; + if (!next) + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + i11++; + switch (next) { + case "d": + out += "[[:digit:]]"; + continue; + case "D": + out += "[^[:digit:]]"; + continue; + case "s": + out += "[[:space:]]"; + continue; + case "S": + out += "[^[:space:]]"; + continue; + case "w": + out += "[[:alnum:]_]"; + continue; + case "W": + out += "[^[:alnum:]_]"; + continue; + case "b": + out += "\\y"; + continue; + case "A": + case "B": + case "G": + case "K": + case "P": + case "p": + case "z": + return null; + default: + out += `\\${next}`; + continue; + } + } + if (ch === "(" && pattern.startsWith("(?:", i11)) { + out += "("; + i11 += 2; + continue; + } + if (ch === "(" && /^[(]\?<[^>]+>/.test(pattern.slice(i11))) { + const named = pattern.slice(i11).match(/^\(\?<[^>]+>/); + if (!named) + return null; + out += "("; + i11 += named[0].length - 1; + continue; + } + if (ch === "(" && pattern[i11 + 1] === "?") + return null; + out += ch; + } + return out; +} +function buildContentPredicate(column, likeOp, patterns) { if (patterns.length === 0) return ""; if (patterns.length === 1) - return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; - return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; + return `${column} ${likeOp} '%${patterns[0]}%'`; + return `(${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; +} +function buildRegexPredicate(column, pattern, ignoreCase) { + if (!pattern) + return ""; + const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); + if (!sqlPattern) + return ""; + return `${column} ${ignoreCase ? "~*" : "~"} '${sqlStr(sqlPattern)}'`; } function compileGrepRegex(params) { - let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + let reStr = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; if (params.wordMatch) - reStr = `\\b${reStr}\\b`; + reStr = `\\b(?:${reStr})\\b`; try { return new RegExp(reStr, params.ignoreCase ? "i" : ""); } catch { - return new RegExp(params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), params.ignoreCase ? "i" : ""); + return new RegExp(escapeRegexLiteral(normalizedPattern), params.ignoreCase ? "i" : ""); } } function refineGrepMatches(rows, params, forceMultiFilePrefix) { @@ -67485,6 +68266,234 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) { return output; } +// dist/src/utils/summary-format.js +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function basename4(path2) { + const trimmed = path2.replace(/\/+$/, ""); + const idx = trimmed.lastIndexOf("/"); + return idx === -1 ? trimmed : trimmed.slice(idx + 1); +} +function extractSection(text, heading) { + const re9 = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match2 = text.match(re9); + return match2 ? match2[1].trim() : null; +} +function extractHeaderField(text, field) { + const re9 = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match2 = text.match(re9); + return match2 ? match2[1].trim() : null; +} +function compactText(value) { + return value.replace(/\s+/g, " ").trim(); +} +function splitMetadataList(value) { + if (!value) + return []; + return [...new Set(value.split(/\s*(?:,|;|&|\band\b)\s*/i).map((part) => compactText(part)).filter((part) => part.length >= 2 && !/^unknown$/i.test(part)))]; +} +function extractBullets(section, limit = 3) { + if (!section) + return []; + return section.split("\n").map((line) => line.trim()).filter((line) => line.startsWith("- ")).map((line) => compactText(line.slice(2))).filter(Boolean).slice(0, limit); +} +function extractSummaryDate(text) { + return extractHeaderField(text, "Date") ?? extractHeaderField(text, "Started"); +} +function extractSummaryParticipants(text) { + return extractHeaderField(text, "Participants") ?? extractHeaderField(text, "Speakers"); +} +function extractSummaryTopics(text) { + return extractHeaderField(text, "Topics"); +} +function extractSummarySource(text) { + return extractHeaderField(text, "Source"); +} +function buildSummaryBlurb(text) { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + const parts = []; + if (participants) + parts.push(participants); + if (topics) + parts.push(topics); + if (keyBullets.length > 0) + parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) + parts.push(whatHappened); + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} +function truncate(value, max) { + return value.length > max ? `${value.slice(0, max - 1).trimEnd()}\u2026` : value; +} +function formatIndexTimestamp(value) { + if (!value) + return ""; + if (!/^\d{4}-\d{2}-\d{2}T/.test(value)) + return value; + const parsed = Date.parse(value); + if (!Number.isFinite(parsed)) + return value; + const ts3 = new Date(parsed); + const yyyy = ts3.getUTCFullYear(); + const mm = String(ts3.getUTCMonth() + 1).padStart(2, "0"); + const dd = String(ts3.getUTCDate()).padStart(2, "0"); + const hh = String(ts3.getUTCHours()).padStart(2, "0"); + const min = String(ts3.getUTCMinutes()).padStart(2, "0"); + return `${yyyy}-${mm}-${dd} ${hh}:${min} UTC`; +} +function buildSummaryIndexEntry(row) { + const path2 = typeof row.path === "string" ? row.path : ""; + if (!path2) + return null; + if (path2.startsWith("/summaries/") && !/^\/summaries\/[^/]+\/[^/]+$/.test(path2)) + return null; + const summary = typeof row.summary === "string" ? row.summary : ""; + const project = typeof row.project === "string" ? row.project.trim() : ""; + const description = typeof row.description === "string" ? compactText(row.description) : ""; + const creationDate = typeof row.creation_date === "string" ? row.creation_date : ""; + const lastUpdateDate = typeof row.last_update_date === "string" ? row.last_update_date : ""; + const label = basename4(path2) || path2; + const date = summary ? extractSummaryDate(summary) ?? creationDate : creationDate; + const participantsText = summary ? extractSummaryParticipants(summary) ?? "" : ""; + const topicsText = summary ? extractSummaryTopics(summary) ?? "" : ""; + const source = summary ? extractSummarySource(summary) ?? "" : ""; + const structuredBlurb = summary ? buildSummaryBlurb(summary) : ""; + const blurb = structuredBlurb && structuredBlurb !== "completed" ? structuredBlurb : truncate(description, 220); + return { + path: path2, + label, + project, + description, + date, + createdAt: creationDate, + updatedAt: lastUpdateDate, + sortDate: lastUpdateDate || creationDate || date, + participantsText, + participants: splitMetadataList(participantsText), + topicsText, + topics: splitMetadataList(topicsText), + source, + blurb + }; +} +function formatSummaryIndexEntry(entry) { + const parts = [`- [summary: ${entry.label}](${entry.path})`]; + if (entry.source) + parts.push(`[session](${entry.source})`); + if (entry.date) + parts.push(truncate(entry.date, 40)); + const visibleTime = entry.updatedAt || entry.createdAt; + if (visibleTime) + parts.push(`updated: ${truncate(formatIndexTimestamp(visibleTime), 24)}`); + if (entry.participantsText) + parts.push(truncate(entry.participantsText, 80)); + if (entry.topicsText) + parts.push(`topics: ${truncate(entry.topicsText, 90)}`); + if (entry.project) + parts.push(`[${truncate(entry.project, 40)}]`); + if (entry.blurb && entry.blurb !== "completed") + parts.push(truncate(entry.blurb, 220)); + return parts.join(" \u2014 "); +} +function buildSummaryIndexLine(row) { + const entry = "label" in row && typeof row.label === "string" ? row : buildSummaryIndexEntry(row); + return entry ? formatSummaryIndexEntry(entry) : null; +} + +// dist/src/hooks/virtual-table-query.js +function buildVirtualIndexContent(rows) { + const entries = rows.map((row) => buildSummaryIndexEntry(row)).filter((entry) => entry !== null).sort((a15, b26) => (b26.sortDate || "").localeCompare(a15.sortDate || "") || a15.path.localeCompare(b26.path)); + const lines = [ + "# Memory Index", + "", + "Persistent wiki directory. Start here, open the linked summary first, then open the paired raw session if you need exact wording or temporal grounding.", + "", + "## How To Use", + "", + "- Use the People section when the question names a person.", + "- In the catalog, each row links to both the summary page and its source session.", + "- Once you have a likely match, open that exact summary or session instead of broadening into wide grep scans.", + "" + ]; + const peopleLines = buildPeopleDirectory(entries); + if (peopleLines.length > 0) { + lines.push("## People"); + lines.push(""); + lines.push(...peopleLines); + lines.push(""); + } + const projectLines = buildProjectDirectory(entries); + if (projectLines.length > 0) { + lines.push("## Projects"); + lines.push(""); + lines.push(...projectLines); + lines.push(""); + } + lines.push("## Summary To Session Catalog"); + lines.push(""); + for (const entry of entries) { + const line = buildSummaryIndexLine(entry); + if (line) + lines.push(line); + } + return lines.join("\n"); +} +function formatEntryLink(entry) { + const session = entry.source ? ` -> [session](${entry.source})` : ""; + return `[${entry.label}](${entry.path})${session}`; +} +function topList(counts, limit) { + return [...counts.entries()].sort((a15, b26) => b26[1] - a15[1] || a15[0].localeCompare(b26[0])).slice(0, limit).map(([value]) => value); +} +function buildPeopleDirectory(entries) { + const people = /* @__PURE__ */ new Map(); + for (const entry of entries) { + for (const person of entry.participants) { + const current = people.get(person) ?? { count: 0, topics: /* @__PURE__ */ new Map(), recent: [] }; + current.count += 1; + for (const topic of entry.topics) { + current.topics.set(topic, (current.topics.get(topic) ?? 0) + 1); + } + current.recent.push(entry); + people.set(person, current); + } + } + return [...people.entries()].sort((a15, b26) => b26[1].count - a15[1].count || a15[0].localeCompare(b26[0])).map(([person, info]) => { + const topics = topList(info.topics, 3); + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${person} \u2014 ${info.count} summaries`]; + if (topics.length > 0) + parts.push(`topics: ${topics.join("; ")}`); + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} +function buildProjectDirectory(entries) { + const projects = /* @__PURE__ */ new Map(); + for (const entry of entries) { + if (!entry.project) + continue; + const current = projects.get(entry.project) ?? { count: 0, recent: [] }; + current.count += 1; + current.recent.push(entry); + projects.set(entry.project, current); + } + return [...projects.entries()].sort((a15, b26) => b26[1].count - a15[1].count || a15[0].localeCompare(b26[0])).map(([project, info]) => { + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${project} \u2014 ${info.count} summaries`]; + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} + // dist/src/shell/deeplake-fs.js var BATCH_SIZE = 10; var PREFETCH_BATCH_SIZE = 50; @@ -67542,6 +68551,8 @@ var DeeplakeFs = class _DeeplakeFs { // Paths that live in the sessions table (multi-row, read by concatenation) sessionPaths = /* @__PURE__ */ new Set(); sessionsTable = null; + sessionsOnly = false; + indexDisabled = false; constructor(client, table, mountPoint) { this.client = client; this.table = table; @@ -67553,9 +68564,11 @@ var DeeplakeFs = class _DeeplakeFs { static async create(client, table, mount = "/memory", sessionsTable) { const fs3 = new _DeeplakeFs(client, table, mount); fs3.sessionsTable = sessionsTable ?? null; + fs3.sessionsOnly = isSessionsOnlyMode(); + fs3.indexDisabled = isIndexDisabled(); await client.ensureTable(); let sessionSyncOk = true; - const memoryBootstrap = (async () => { + const memoryBootstrap = fs3.sessionsOnly ? Promise.resolve() : (async () => { const sql = `SELECT path, size_bytes, mime_type FROM "${table}" ORDER BY path`; try { const rows = await client.query(sql); @@ -67611,7 +68624,7 @@ var DeeplakeFs = class _DeeplakeFs { this.pending.delete(filePath); this.flushed.delete(filePath); const parent = parentOf(filePath); - this.dirs.get(parent)?.delete(basename4(filePath)); + this.dirs.get(parent)?.delete(basename5(filePath)); } // ── flush / write batching ──────────────────────────────────────────────── scheduleFlush() { @@ -67674,46 +68687,8 @@ var DeeplakeFs = class _DeeplakeFs { } // ── Virtual index.md generation ──────────────────────────────────────────── async generateVirtualIndex() { - const rows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC`); - const sessionPathsByKey = /* @__PURE__ */ new Map(); - for (const sp of this.sessionPaths) { - const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); - if (hivemind) { - sessionPathsByKey.set(hivemind[1], sp.slice(1)); - } else { - const fname = sp.split("/").pop() ?? ""; - const stem = fname.replace(/\.[^.]+$/, ""); - if (stem) - sessionPathsByKey.set(stem, sp.slice(1)); - } - } - const lines = [ - "# Session Index", - "", - "List of all Claude Code sessions with summaries.", - "", - "| Session | Conversation | Created | Last Updated | Project | Description |", - "|---------|-------------|---------|--------------|---------|-------------|" - ]; - for (const row of rows) { - const p22 = row["path"]; - const match2 = p22.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/); - if (!match2) - continue; - const summaryUser = match2[1]; - const sessionId = match2[2]; - const relPath = `summaries/${summaryUser}/${sessionId}.md`; - const baseName = sessionId.replace(/_summary$/, ""); - const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName); - const convLink = convPath ? `[messages](${convPath})` : ""; - const project = row["project"] || ""; - const description = row["description"] || ""; - const creationDate = row["creation_date"] || ""; - const lastUpdateDate = row["last_update_date"] || ""; - lines.push(`| [${sessionId}](${relPath}) | ${convLink} | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`); - } - lines.push(""); - return lines.join("\n"); + const rows = await this.client.query(`SELECT path, project, description, summary, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC, creation_date DESC`); + return buildVirtualIndexContent(rows); } // ── batch prefetch ──────────────────────────────────────────────────────── /** @@ -67783,7 +68758,7 @@ var DeeplakeFs = class _DeeplakeFs { return buf2; } if (this.sessionPaths.has(p22) && this.sessionsTable) { - const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); + const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows2.length === 0) throw fsErr("ENOENT", "no such file or directory", p22); const text = joinSessionMessages(p22, rows2.map((row) => row["message"])); @@ -67802,7 +68777,7 @@ var DeeplakeFs = class _DeeplakeFs { const p22 = normPath(path2); if (this.dirs.has(p22) && !this.files.has(p22)) throw fsErr("EISDIR", "illegal operation on a directory", p22); - if (p22 === "/index.md" && !this.files.has(p22)) { + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md" && !this.files.has(p22)) { const realRows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr("/index.md")}' LIMIT 1`); if (realRows.length > 0 && realRows[0]["summary"]) { const text2 = realRows[0]["summary"]; @@ -67821,7 +68796,7 @@ var DeeplakeFs = class _DeeplakeFs { if (pend) return pend.contentText; if (this.sessionPaths.has(p22) && this.sessionsTable) { - const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); + const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows2.length === 0) throw fsErr("ENOENT", "no such file or directory", p22); const text2 = joinSessionMessages(p22, rows2.map((row) => row["message"])); @@ -67847,13 +68822,13 @@ var DeeplakeFs = class _DeeplakeFs { throw fsErr("EISDIR", "illegal operation on a directory", p22); const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); const buf = Buffer.from(text, "utf-8"); - const mime = guessMime(basename4(p22)); + const mime = guessMime(basename5(p22)); this.files.set(p22, buf); this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); this.addToTree(p22); this.pending.set(p22, { path: p22, - filename: basename4(p22), + filename: basename5(p22), contentText: text, mimeType: mime, sizeBytes: buf.length, @@ -67872,13 +68847,13 @@ var DeeplakeFs = class _DeeplakeFs { throw fsErr("EISDIR", "illegal operation on a directory", p22); const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); const buf = Buffer.from(text, "utf-8"); - const mime = guessMime(basename4(p22)); + const mime = guessMime(basename5(p22)); this.files.set(p22, buf); this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); this.addToTree(p22); this.pending.set(p22, { path: p22, - filename: basename4(p22), + filename: basename5(p22), contentText: text, mimeType: mime, sizeBytes: buf.length @@ -67910,7 +68885,7 @@ var DeeplakeFs = class _DeeplakeFs { // ── IFileSystem: metadata ───────────────────────────────────────────────── async exists(path2) { const p22 = normPath(path2); - if (p22 === "/index.md") + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md") return true; return this.files.has(p22) || this.dirs.has(p22); } @@ -67918,7 +68893,7 @@ var DeeplakeFs = class _DeeplakeFs { const p22 = normPath(path2); const isFile = this.files.has(p22); const isDir = this.dirs.has(p22); - if (p22 === "/index.md" && !isFile && !isDir) { + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md" && !isFile && !isDir) { return { isFile: true, isDirectory: false, @@ -67958,7 +68933,7 @@ var DeeplakeFs = class _DeeplakeFs { } async realpath(path2) { const p22 = normPath(path2); - if (p22 === "/index.md") + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md") return p22; if (!this.files.has(p22) && !this.dirs.has(p22)) throw fsErr("ENOENT", "no such file or directory", p22); @@ -67983,14 +68958,14 @@ var DeeplakeFs = class _DeeplakeFs { const parent = parentOf(p22); if (!this.dirs.has(parent)) this.dirs.set(parent, /* @__PURE__ */ new Set()); - this.dirs.get(parent).add(basename4(p22)); + this.dirs.get(parent).add(basename5(p22)); } async readdir(path2) { const p22 = normPath(path2); if (!this.dirs.has(p22)) throw fsErr("ENOTDIR", "not a directory", p22); const entries = [...this.dirs.get(p22) ?? []]; - if (p22 === "/" && !entries.includes("index.md")) { + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/" && !entries.includes("index.md")) { entries.push("index.md"); } return entries; @@ -68002,7 +68977,7 @@ var DeeplakeFs = class _DeeplakeFs { const child = p22 === "/" ? `/${name}` : `${p22}/${name}`; return { name, - isFile: (this.files.has(child) || child === "/index.md") && !this.dirs.has(child), + isFile: (this.files.has(child) || !this.sessionsOnly && !this.indexDisabled && child === "/index.md") && !this.dirs.has(child), isDirectory: this.dirs.has(child), isSymbolicLink: false }; @@ -68038,7 +69013,7 @@ var DeeplakeFs = class _DeeplakeFs { for (const fp of safeToDelete) this.removeFromTree(fp); this.dirs.delete(p22); - this.dirs.get(parentOf(p22))?.delete(basename4(p22)); + this.dirs.get(parentOf(p22))?.delete(basename5(p22)); if (safeToDelete.length > 0) { const inList = safeToDelete.map((fp) => `'${sqlStr(fp)}'`).join(", "); await this.client.query(`DELETE FROM "${this.table}" WHERE path IN (${inList})`); @@ -68713,8 +69688,8 @@ var YargsParser = class { if (typeof envPrefix === "undefined") return; const prefix = typeof envPrefix === "string" ? envPrefix : ""; - const env2 = mixin.env(); - Object.keys(env2).forEach(function(envVar) { + const env3 = mixin.env(); + Object.keys(env3).forEach(function(envVar) { if (prefix === "" || envVar.lastIndexOf(prefix, 0) === 0) { const keys = envVar.split("__").map(function(key, i11) { if (i11 === 0) { @@ -68723,7 +69698,7 @@ var YargsParser = class { return camelCase2(key); }); if ((configOnly && flags.configs[keys.join(".")] || !configOnly) && !hasKey(argv2, keys)) { - setArg(keys.join("."), env2[envVar]); + setArg(keys.join("."), env3[envVar]); } } }); @@ -69034,12 +70009,12 @@ if (nodeVersion) { throw Error(`yargs parser supports a minimum Node.js version of ${minNodeVersion}. Read our version support policy: https://github.com/yargs/yargs-parser#supported-nodejs-versions`); } } -var env = process ? process.env : {}; +var env2 = process ? process.env : {}; var require2 = createRequire ? createRequire(import.meta.url) : void 0; var parser = new YargsParser({ cwd: process.cwd, env: () => { - return env; + return env2; }, format, normalize, @@ -69112,8 +70087,7 @@ function createGrepCommand(client, fs3, table, sessionsTable) { try { const searchOptions = { ...buildGrepSearchOptions(matchParams, targets[0] ?? ctx.cwd), - pathFilter: buildPathFilterForTargets(targets), - limit: 100 + pathFilter: buildPathFilterForTargets(targets) }; const queryRows = await Promise.race([ searchDeeplakeTables(client, table, sessionsTable ?? "sessions", searchOptions), @@ -69136,7 +70110,8 @@ function createGrepCommand(client, fs3, table, sessionsTable) { } } const normalized = rows.map((r10) => ({ path: r10.path, content: normalizeContent(r10.path, r10.content) })); - const output = refineGrepMatches(normalized, matchParams); + const forceMultiFilePrefix = parsed.r || parsed.R || parsed.recursive ? true : void 0; + const output = refineGrepMatches(normalized, matchParams, forceMultiFilePrefix); return { stdout: output.length > 0 ? output.join("\n") + "\n" : "", stderr: "", diff --git a/codex/bundle/stop.js b/codex/bundle/stop.js index e6081b5..3fd2608 100755 --- a/codex/bundle/stop.js +++ b/codex/bundle/stop.js @@ -5,13 +5,13 @@ import { readFileSync as readFileSync4, existsSync as existsSync4 } from "node:f // dist/src/utils/stdin.js function readStdin() { - return new Promise((resolve, reject) => { + return new Promise((resolve2, reject) => { let data = ""; process.stdin.setEncoding("utf-8"); process.stdin.on("data", (chunk) => data += chunk); process.stdin.on("end", () => { try { - resolve(JSON.parse(data)); + resolve2(JSON.parse(data)); } catch (err) { reject(new Error(`Failed to parse hook input: ${err}`)); } @@ -52,6 +52,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } @@ -68,9 +73,6 @@ import { join as join2 } from "node:path"; import { homedir as homedir2 } from "node:os"; var DEBUG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; var LOG = join2(homedir2(), ".deeplake", "hook-debug.log"); -function utcTimestamp(d = /* @__PURE__ */ new Date()) { - return d.toISOString().replace("T", " ").slice(0, 19) + " UTC"; -} function log(tag, msg) { if (!DEBUG) return; @@ -82,6 +84,12 @@ function log(tag, msg) { function sqlStr(value) { return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); } +function sqlIdent(name) { + if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) { + throw new Error(`Invalid SQL identifier: ${JSON.stringify(name)}`); + } + return name; +} // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); @@ -99,6 +107,22 @@ function traceSql(msg) { if (debugFileLog) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -106,7 +130,7 @@ var MAX_CONCURRENCY = 5; var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); + return new Promise((resolve2) => setTimeout(resolve2, ms)); } function isTimeoutError(error) { const name = error instanceof Error ? error.name.toLowerCase() : ""; @@ -139,7 +163,7 @@ var Semaphore = class { this.active++; return; } - await new Promise((resolve) => this.waiting.push(resolve)); + await new Promise((resolve2) => this.waiting.push(resolve2)); } release() { this.active--; @@ -202,10 +226,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -228,9 +252,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -287,6 +315,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -384,50 +435,327 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); + } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); } }; +// dist/src/utils/direct-run.js +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +function isDirectRun(metaUrl) { + const entry = process.argv[1]; + if (!entry) + return false; + try { + return resolve(fileURLToPath(metaUrl)) === resolve(entry); + } catch { + return false; + } +} + // dist/src/hooks/codex/spawn-wiki-worker.js import { spawn, execSync } from "node:child_process"; -import { fileURLToPath } from "node:url"; -import { dirname, join as join5 } from "node:path"; -import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync3 } from "node:fs"; +import { fileURLToPath as fileURLToPath2 } from "node:url"; +import { dirname, join as join4 } from "node:path"; +import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs"; import { homedir as homedir3, tmpdir as tmpdir2 } from "node:os"; -// dist/src/utils/wiki-log.js -import { mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join4 } from "node:path"; -function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { - const path = join4(hooksDir, filename); - return { - path, - log(msg) { - try { - mkdirSync2(hooksDir, { recursive: true }); - appendFileSync2(path, `[${utcTimestamp()}] ${msg} -`); - } catch { - } - } - }; -} +// dist/src/hooks/knowledge-graph.js +import { randomUUID as randomUUID3 } from "node:crypto"; + +// dist/src/hooks/upload-summary.js +import { randomUUID as randomUUID2 } from "node:crypto"; + +// dist/src/hooks/knowledge-graph.js +var GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; + +// dist/src/hooks/memory-facts.js +import { randomUUID as randomUUID4 } from "node:crypto"; +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; // dist/src/hooks/codex/spawn-wiki-worker.js var HOME = homedir3(); -var wikiLogger = makeWikiLogger(join5(HOME, ".codex", "hooks")); -var WIKI_LOG = wikiLogger.path; -var WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge \u2014 entities, decisions, relationships, and facts \u2014 into a structured, searchable wiki entry. +var WIKI_LOG = join4(HOME, ".codex", "hooks", "deeplake-wiki.log"); +var WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. + +The session may be a coding session, a meeting, or a personal conversation. Your job is to turn the raw transcript into a dense, factual wiki page that preserves names, dates, relationships, preferences, plans, titles, and exact status changes. SESSION JSONL path: __JSONL__ SUMMARY FILE to write: __SUMMARY__ @@ -441,42 +769,59 @@ Steps: - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first, then focus on lines AFTER the offset for new content. Merge new facts into the existing summary. - If offset is 0, generate from scratch. + - Treat the JSONL as the source of truth. Do not invent facts. 2. Write the summary file at the path above with this EXACT format: # Session __SESSION_ID__ - **Source**: __JSONL_SERVER_PATH__ +- **Date**: +- **Participants**: - **Started**: - **Ended**: - **Project**: __PROJECT__ +- **Topics**: - **JSONL offset**: __JSONL_LINES__ ## What Happened -<2-3 dense sentences. What was the goal, what was accomplished, what's left.> +<2-4 dense sentences. What happened, why it mattered, and what changed. Prefer specific names/titles/dates over abstractions.> + +## Searchable Facts + ## People - + ## Entities - + ## Decisions & Reasoning - - -## Key Facts - + ## Files Modified - + ## Open Questions / TODO - + -IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. +IMPORTANT: +- Be exhaustive. If a detail exists in the session and could answer a later question, it should be in the wiki. +- Favor exact nouns and titles over generic paraphrases. Preserve exact book names, organization names, file names, feature names, and self-descriptions. +- Keep facts canonical and query-friendly: "Ava is single", "Leo's home country is Brazil", "The team chose retries because the API returned 429s". +- Resolve relative dates like "last year" or "next month" against the session's own date when the source makes that possible. If it is ambiguous, keep the relative phrase instead of guessing. +- Do not omit beneficiary groups or targets of goals. PRIVACY: Never include absolute filesystem paths in the summary. LENGTH LIMIT: Keep the total summary under 4000 characters.`; -var wikiLog = wikiLogger.log; +function wikiLog(msg) { + try { + mkdirSync2(join4(HOME, ".codex", "hooks"), { recursive: true }); + appendFileSync2(WIKI_LOG, `[${(/* @__PURE__ */ new Date()).toISOString().replace("T", " ").slice(0, 19)}] ${msg} +`); + } catch { + } +} function findCodexBin() { try { return execSync("which codex 2>/dev/null", { encoding: "utf-8" }).trim(); @@ -487,9 +832,9 @@ function findCodexBin() { function spawnCodexWikiWorker(opts) { const { config, sessionId, cwd, bundleDir, reason } = opts; const projectName = cwd.split("/").pop() || "unknown"; - const tmpDir = join5(tmpdir2(), `deeplake-wiki-${sessionId}-${Date.now()}`); - mkdirSync3(tmpDir, { recursive: true }); - const configFile = join5(tmpDir, "config.json"); + const tmpDir = join4(tmpdir2(), `deeplake-wiki-${sessionId}-${Date.now()}`); + mkdirSync2(tmpDir, { recursive: true }); + const configFile = join4(tmpDir, "config.json"); writeFileSync2(configFile, JSON.stringify({ apiUrl: config.apiUrl, token: config.token, @@ -497,17 +842,24 @@ function spawnCodexWikiWorker(opts) { workspaceId: config.workspaceId, memoryTable: config.tableName, sessionsTable: config.sessionsTableName, + graphNodesTable: config.graphNodesTableName, + graphEdgesTable: config.graphEdgesTableName, + factsTable: config.factsTableName, + entitiesTable: config.entitiesTableName, + factEntityLinksTable: config.factEntityLinksTableName, sessionId, userName: config.userName, project: projectName, tmpDir, codexBin: findCodexBin(), wikiLog: WIKI_LOG, - hooksDir: join5(HOME, ".codex", "hooks"), - promptTemplate: WIKI_PROMPT_TEMPLATE + hooksDir: join4(HOME, ".codex", "hooks"), + promptTemplate: WIKI_PROMPT_TEMPLATE, + graphPromptTemplate: GRAPH_PROMPT_TEMPLATE, + factPromptTemplate: MEMORY_FACT_PROMPT_TEMPLATE })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); - const workerPath = join5(bundleDir, "wiki-worker.js"); + const workerPath = join4(bundleDir, "wiki-worker.js"); spawn("nohup", ["node", workerPath, configFile], { detached: true, stdio: ["ignore", "ignore", "ignore"] @@ -515,164 +867,426 @@ function spawnCodexWikiWorker(opts) { wikiLog(`${reason}: spawned summary worker for ${sessionId}`); } function bundleDirFromImportMeta(importMetaUrl) { - return dirname(fileURLToPath(importMetaUrl)); + return dirname(fileURLToPath2(importMetaUrl)); } -// dist/src/hooks/summary-state.js -import { readFileSync as readFileSync3, writeFileSync as writeFileSync3, writeSync, mkdirSync as mkdirSync4, renameSync, existsSync as existsSync3, unlinkSync, openSync, closeSync } from "node:fs"; +// dist/src/hooks/session-queue.js +import { appendFileSync as appendFileSync3, closeSync, existsSync as existsSync3, mkdirSync as mkdirSync3, openSync, readFileSync as readFileSync3, readdirSync, renameSync, rmSync, statSync, writeFileSync as writeFileSync3 } from "node:fs"; +import { dirname as dirname2, join as join5 } from "node:path"; import { homedir as homedir4 } from "node:os"; -import { join as join6 } from "node:path"; -var dlog = (msg) => log("summary-state", msg); -var STATE_DIR = join6(homedir4(), ".claude", "hooks", "summary-state"); -var YIELD_BUF = new Int32Array(new SharedArrayBuffer(4)); -function lockPath(sessionId) { - return join6(STATE_DIR, `${sessionId}.lock`); -} -function tryAcquireLock(sessionId, maxAgeMs = 10 * 60 * 1e3) { - mkdirSync4(STATE_DIR, { recursive: true }); - const p = lockPath(sessionId); - if (existsSync3(p)) { +var DEFAULT_QUEUE_DIR = join5(homedir4(), ".deeplake", "queue"); +var DEFAULT_MAX_BATCH_ROWS = 50; +var DEFAULT_STALE_INFLIGHT_MS = 6e4; +var DEFAULT_AUTH_FAILURE_TTL_MS = 5 * 6e4; +var BUSY_WAIT_STEP_MS = 100; +var SessionWriteDisabledError = class extends Error { + constructor(message) { + super(message); + this.name = "SessionWriteDisabledError"; + } +}; +function buildSessionPath(config, sessionId) { + return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${config.workspaceId}_${sessionId}.jsonl`; +} +function buildQueuedSessionRow(args) { + const structured = extractStructuredSessionFields(args.line, args.sessionId); + return { + id: crypto.randomUUID(), + path: args.sessionPath, + filename: args.sessionPath.split("/").pop() ?? "", + message: args.line, + sessionId: structured.sessionId, + eventType: structured.eventType, + turnIndex: structured.turnIndex, + diaId: structured.diaId, + speaker: structured.speaker, + text: structured.text, + turnSummary: structured.turnSummary, + sourceDateTime: structured.sourceDateTime, + author: args.userName, + sizeBytes: Buffer.byteLength(args.line, "utf-8"), + project: args.projectName, + description: args.description, + agent: args.agent, + creationDate: args.timestamp, + lastUpdateDate: args.timestamp + }; +} +function appendQueuedSessionRow(row, queueDir = DEFAULT_QUEUE_DIR) { + mkdirSync3(queueDir, { recursive: true }); + const sessionId = extractSessionId(row.path); + const queuePath = getQueuePath(queueDir, sessionId); + appendFileSync3(queuePath, `${JSON.stringify(row)} +`); + return queuePath; +} +function buildSessionInsertSql(sessionsTable, rows) { + if (rows.length === 0) + throw new Error("buildSessionInsertSql: rows must not be empty"); + const table = sqlIdent(sessionsTable); + const values = rows.map((row) => { + const jsonForSql = escapeJsonbLiteral(coerceJsonbPayload(row.message)); + return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.sessionId)}', '${sqlStr(row.eventType)}', ${row.turnIndex}, '${sqlStr(row.diaId)}', '${sqlStr(row.speaker)}', '${sqlStr(row.text)}', '${sqlStr(row.turnSummary)}', '${sqlStr(row.sourceDateTime)}', '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; + }).join(", "); + return `INSERT INTO "${table}" (id, path, filename, message, session_id, event_type, turn_index, dia_id, speaker, text, turn_summary, source_date_time, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; +} +function coerceJsonbPayload(message) { + try { + return JSON.stringify(JSON.parse(message)); + } catch { + return JSON.stringify({ + type: "raw_message", + content: message + }); + } +} +function escapeJsonbLiteral(value) { + return value.replace(/'/g, "''").replace(/\0/g, ""); +} +function extractString(value) { + return typeof value === "string" ? value : value == null ? "" : String(value); +} +function extractNumber(value) { + if (typeof value === "number" && Number.isFinite(value)) + return value; + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) + return parsed; + } + return 0; +} +function extractStructuredSessionFields(message, fallbackSessionId = "") { + let parsed = null; + try { + const raw = JSON.parse(message); + if (raw && typeof raw === "object") + parsed = raw; + } catch { + parsed = null; + } + if (!parsed) { + return { + sessionId: fallbackSessionId, + eventType: "raw_message", + turnIndex: 0, + diaId: "", + speaker: "", + text: message, + turnSummary: "", + sourceDateTime: "" + }; + } + const eventType = extractString(parsed["type"]); + const content = extractString(parsed["content"]); + const toolName = extractString(parsed["tool_name"]); + const speaker = extractString(parsed["speaker"]) || (eventType === "user_message" ? "user" : eventType === "assistant_message" ? "assistant" : ""); + const text = extractString(parsed["text"]) || content || (eventType === "tool_call" ? toolName : ""); + return { + sessionId: extractString(parsed["session_id"]) || fallbackSessionId, + eventType, + turnIndex: extractNumber(parsed["turn_index"]), + diaId: extractString(parsed["dia_id"]), + speaker, + text, + turnSummary: extractString(parsed["summary"]) || extractString(parsed["message_summary"]) || extractString(parsed["msg_summary"]), + sourceDateTime: extractString(parsed["source_date_time"]) || extractString(parsed["date_time"]) || extractString(parsed["date"]) + }; +} +async function flushSessionQueue(api, opts) { + const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; + const maxBatchRows = opts.maxBatchRows ?? DEFAULT_MAX_BATCH_ROWS; + const staleInflightMs = opts.staleInflightMs ?? DEFAULT_STALE_INFLIGHT_MS; + const waitIfBusyMs = opts.waitIfBusyMs ?? 0; + const drainAll = opts.drainAll ?? false; + mkdirSync3(queueDir, { recursive: true }); + const queuePath = getQueuePath(queueDir, opts.sessionId); + const inflightPath = getInflightPath(queueDir, opts.sessionId); + if (isSessionWriteDisabled(opts.sessionsTable, queueDir)) { + return existsSync3(queuePath) || existsSync3(inflightPath) ? { status: "disabled", rows: 0, batches: 0 } : { status: "empty", rows: 0, batches: 0 }; + } + let totalRows = 0; + let totalBatches = 0; + let flushedAny = false; + while (true) { + if (opts.allowStaleInflight) + recoverStaleInflight(queuePath, inflightPath, staleInflightMs); + if (existsSync3(inflightPath)) { + if (waitIfBusyMs > 0) { + await waitForInflightToClear(inflightPath, waitIfBusyMs); + if (opts.allowStaleInflight) + recoverStaleInflight(queuePath, inflightPath, staleInflightMs); + } + if (existsSync3(inflightPath)) { + return flushedAny ? { status: "flushed", rows: totalRows, batches: totalBatches } : { status: "busy", rows: 0, batches: 0 }; + } + } + if (!existsSync3(queuePath)) { + return flushedAny ? { status: "flushed", rows: totalRows, batches: totalBatches } : { status: "empty", rows: 0, batches: 0 }; + } try { - const ageMs = Date.now() - parseInt(readFileSync3(p, "utf-8"), 10); - if (Number.isFinite(ageMs) && ageMs < maxAgeMs) - return false; - } catch (readErr) { - dlog(`lock file unreadable for ${sessionId}, treating as stale: ${readErr.message}`); + renameSync(queuePath, inflightPath); + } catch (e) { + if (e?.code === "ENOENT") { + return flushedAny ? { status: "flushed", rows: totalRows, batches: totalBatches } : { status: "empty", rows: 0, batches: 0 }; + } + throw e; } try { - unlinkSync(p); - } catch (unlinkErr) { - dlog(`could not unlink stale lock for ${sessionId}: ${unlinkErr.message}`); - return false; + const { rows, batches } = await flushInflightFile(api, opts.sessionsTable, inflightPath, maxBatchRows); + totalRows += rows; + totalBatches += batches; + flushedAny = flushedAny || rows > 0; + } catch (e) { + requeueInflight(queuePath, inflightPath); + if (e instanceof SessionWriteDisabledError) { + return { status: "disabled", rows: totalRows, batches: totalBatches }; + } + throw e; + } + if (!drainAll) { + return { status: "flushed", rows: totalRows, batches: totalBatches }; } } - try { - const fd = openSync(p, "wx"); +} +function getQueuePath(queueDir, sessionId) { + return join5(queueDir, `${sessionId}.jsonl`); +} +function getInflightPath(queueDir, sessionId) { + return join5(queueDir, `${sessionId}.inflight`); +} +function extractSessionId(sessionPath) { + const filename = sessionPath.split("/").pop() ?? ""; + return filename.replace(/\.jsonl$/, "").split("_").pop() ?? filename; +} +async function flushInflightFile(api, sessionsTable, inflightPath, maxBatchRows) { + const rows = readQueuedRows(inflightPath); + if (rows.length === 0) { + rmSync(inflightPath, { force: true }); + return { rows: 0, batches: 0 }; + } + let ensured = false; + let batches = 0; + const queueDir = dirname2(inflightPath); + for (let i = 0; i < rows.length; i += maxBatchRows) { + const chunk = rows.slice(i, i + maxBatchRows); + const sql = buildSessionInsertSql(sessionsTable, chunk); try { - writeSync(fd, String(Date.now())); - } finally { - closeSync(fd); + await api.query(sql); + } catch (e) { + if (isSessionWriteAuthError(e)) { + markSessionWriteDisabled(sessionsTable, errorMessage(e), queueDir); + throw new SessionWriteDisabledError(errorMessage(e)); + } + if (!ensured && isEnsureSessionsTableRetryable(e)) { + try { + await api.ensureSessionsTable(sessionsTable); + } catch (ensureError) { + if (isSessionWriteAuthError(ensureError)) { + markSessionWriteDisabled(sessionsTable, errorMessage(ensureError), queueDir); + throw new SessionWriteDisabledError(errorMessage(ensureError)); + } + throw ensureError; + } + ensured = true; + try { + await api.query(sql); + } catch (retryError) { + if (isSessionWriteAuthError(retryError)) { + markSessionWriteDisabled(sessionsTable, errorMessage(retryError), queueDir); + throw new SessionWriteDisabledError(errorMessage(retryError)); + } + throw retryError; + } + } else { + throw e; + } } - return true; - } catch (e) { - if (e.code === "EEXIST") - return false; - throw e; + batches += 1; } + clearSessionWriteDisabled(sessionsTable, queueDir); + rmSync(inflightPath, { force: true }); + return { rows: rows.length, batches }; +} +function readQueuedRows(path) { + const raw = readFileSync3(path, "utf-8"); + return raw.split("\n").map((line) => line.trim()).filter(Boolean).map((line) => JSON.parse(line)); +} +function requeueInflight(queuePath, inflightPath) { + if (!existsSync3(inflightPath)) + return; + const inflight = readFileSync3(inflightPath, "utf-8"); + appendFileSync3(queuePath, inflight); + rmSync(inflightPath, { force: true }); +} +function recoverStaleInflight(queuePath, inflightPath, staleInflightMs) { + if (!existsSync3(inflightPath) || !isStale(inflightPath, staleInflightMs)) + return; + requeueInflight(queuePath, inflightPath); +} +function isStale(path, staleInflightMs) { + return Date.now() - statSync(path).mtimeMs >= staleInflightMs; +} +function isEnsureSessionsTableRetryable(error) { + const message = errorMessage(error).toLowerCase(); + return message.includes("does not exist") || message.includes("doesn't exist") || message.includes("relation") || message.includes("not found"); +} +function isSessionWriteAuthError(error) { + const message = errorMessage(error).toLowerCase(); + return message.includes("403") || message.includes("401") || message.includes("forbidden") || message.includes("unauthorized"); +} +function markSessionWriteDisabled(sessionsTable, reason, queueDir = DEFAULT_QUEUE_DIR) { + mkdirSync3(queueDir, { recursive: true }); + writeFileSync3(getSessionWriteDisabledPath(queueDir, sessionsTable), JSON.stringify({ + disabledAt: (/* @__PURE__ */ new Date()).toISOString(), + reason, + sessionsTable + })); +} +function clearSessionWriteDisabled(sessionsTable, queueDir = DEFAULT_QUEUE_DIR) { + rmSync(getSessionWriteDisabledPath(queueDir, sessionsTable), { force: true }); } -function releaseLock(sessionId) { +function isSessionWriteDisabled(sessionsTable, queueDir = DEFAULT_QUEUE_DIR, ttlMs = DEFAULT_AUTH_FAILURE_TTL_MS) { + const path = getSessionWriteDisabledPath(queueDir, sessionsTable); + if (!existsSync3(path)) + return false; try { - unlinkSync(lockPath(sessionId)); - } catch (e) { - if (e?.code !== "ENOENT") { - dlog(`releaseLock unlink failed for ${sessionId}: ${e.message}`); + const raw = readFileSync3(path, "utf-8"); + const state = JSON.parse(raw); + const ageMs = Date.now() - new Date(state.disabledAt).getTime(); + if (Number.isNaN(ageMs) || ageMs >= ttlMs) { + rmSync(path, { force: true }); + return false; } + return true; + } catch { + rmSync(path, { force: true }); + return false; } } - -// dist/src/utils/session-path.js -function buildSessionPath(config, sessionId) { - const workspace = config.workspaceId ?? "default"; - return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${workspace}_${sessionId}.jsonl`; +function getSessionWriteDisabledPath(queueDir, sessionsTable) { + return join5(queueDir, `.${sessionsTable}.disabled.json`); +} +function errorMessage(error) { + return error instanceof Error ? error.message : String(error); +} +async function waitForInflightToClear(inflightPath, waitIfBusyMs) { + const startedAt = Date.now(); + while (existsSync3(inflightPath) && Date.now() - startedAt < waitIfBusyMs) { + await sleep2(BUSY_WAIT_STEP_MS); + } +} +function sleep2(ms) { + return new Promise((resolve2) => setTimeout(resolve2, ms)); } // dist/src/hooks/codex/stop.js var log3 = (msg) => log("codex-stop", msg); -var CAPTURE = process.env.HIVEMIND_CAPTURE !== "false"; -async function main() { - if (process.env.HIVEMIND_WIKI_WORKER === "1") - return; - const input = await readStdin(); - const sessionId = input.session_id; - if (!sessionId) - return; - const config = loadConfig(); +var CAPTURE = (process.env.HIVEMIND_CAPTURE ?? process.env.DEEPLAKE_CAPTURE) !== "false"; +function extractLastAssistantMessage(transcript) { + const lines = transcript.trim().split("\n").reverse(); + for (const line of lines) { + try { + const entry = JSON.parse(line); + const msg = entry.payload ?? entry; + if (msg.role === "assistant" && msg.content) { + const content = typeof msg.content === "string" ? msg.content : Array.isArray(msg.content) ? msg.content.filter((b) => b.type === "output_text" || b.type === "text").map((b) => b.text).join("\n") : ""; + if (content) + return content.slice(0, 4e3); + } + } catch { + } + } + return ""; +} +function buildCodexStopEntry(input, timestamp, lastAssistantMessage) { + return { + id: crypto.randomUUID(), + session_id: input.session_id, + transcript_path: input.transcript_path, + cwd: input.cwd, + hook_event_name: input.hook_event_name, + model: input.model, + timestamp, + type: lastAssistantMessage ? "assistant_message" : "assistant_stop", + content: lastAssistantMessage + }; +} +async function runCodexStopHook(input, deps = {}) { + const { wikiWorker = (process.env.HIVEMIND_WIKI_WORKER ?? process.env.DEEPLAKE_WIKI_WORKER) === "1", captureEnabled = CAPTURE, config = loadConfig(), now = () => (/* @__PURE__ */ new Date()).toISOString(), transcriptExists = existsSync4, readTranscript = (path) => readFileSync4(path, "utf-8"), createApi = (activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, activeConfig.sessionsTableName), appendQueuedSessionRowFn = appendQueuedSessionRow, buildQueuedSessionRowFn = buildQueuedSessionRow, flushSessionQueueFn = flushSessionQueue, spawnCodexWikiWorkerFn = spawnCodexWikiWorker, wikiLogFn = wikiLog, bundleDir = bundleDirFromImportMeta(import.meta.url), logFn = log3 } = deps; + if (wikiWorker || !input.session_id) + return { status: "skipped" }; if (!config) { - log3("no config"); - return; + logFn("no config"); + return { status: "no_config" }; } - if (CAPTURE) { + let entry; + let flushStatus; + if (captureEnabled) { try { - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); - const ts = (/* @__PURE__ */ new Date()).toISOString(); + const ts = now(); let lastAssistantMessage = ""; if (input.transcript_path) { try { - const transcriptPath = input.transcript_path; - if (existsSync4(transcriptPath)) { - const transcript = readFileSync4(transcriptPath, "utf-8"); - const lines = transcript.trim().split("\n").reverse(); - for (const line2 of lines) { - try { - const entry2 = JSON.parse(line2); - const msg = entry2.payload ?? entry2; - if (msg.role === "assistant" && msg.content) { - const content = typeof msg.content === "string" ? msg.content : Array.isArray(msg.content) ? msg.content.filter((b) => b.type === "output_text" || b.type === "text").map((b) => b.text).join("\n") : ""; - if (content) { - lastAssistantMessage = content.slice(0, 4e3); - break; - } - } - } catch { - } + if (transcriptExists(input.transcript_path)) { + lastAssistantMessage = extractLastAssistantMessage(readTranscript(input.transcript_path)); + if (lastAssistantMessage) { + logFn(`extracted assistant message from transcript (${lastAssistantMessage.length} chars)`); } - if (lastAssistantMessage) - log3(`extracted assistant message from transcript (${lastAssistantMessage.length} chars)`); } } catch (e) { - log3(`transcript read failed: ${e.message}`); + logFn(`transcript read failed: ${e.message}`); } } - const entry = { - id: crypto.randomUUID(), - session_id: sessionId, - transcript_path: input.transcript_path, - cwd: input.cwd, - hook_event_name: input.hook_event_name, - model: input.model, - timestamp: ts, - type: lastAssistantMessage ? "assistant_message" : "assistant_stop", - content: lastAssistantMessage - }; + entry = buildCodexStopEntry(input, ts, lastAssistantMessage); const line = JSON.stringify(entry); - const sessionPath = buildSessionPath(config, sessionId); + const sessionPath = buildSessionPath(config, input.session_id); const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; - const filename = sessionPath.split("/").pop() ?? ""; - const jsonForSql = sqlStr(line); - const insertSql = `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', 'Stop', 'codex', '${ts}', '${ts}')`; - await api.query(insertSql); - log3("stop event captured"); + appendQueuedSessionRowFn(buildQueuedSessionRowFn({ + sessionPath, + line, + sessionId: input.session_id, + userName: config.userName, + projectName, + description: "Stop", + agent: "codex", + timestamp: ts + })); + const flush = await flushSessionQueueFn(createApi(config), { + sessionId: input.session_id, + sessionsTable: config.sessionsTableName, + drainAll: true + }); + flushStatus = flush.status; + logFn(`stop flush ${flush.status}: rows=${flush.rows} batches=${flush.batches}`); } catch (e) { - log3(`capture failed: ${e.message}`); + logFn(`capture failed: ${e.message}`); } } - if (!CAPTURE) - return; - if (!tryAcquireLock(sessionId)) { - wikiLog(`Stop: periodic worker already running for ${sessionId}, skipping`); - return; - } - wikiLog(`Stop: triggering summary for ${sessionId}`); - try { - spawnCodexWikiWorker({ - config, - sessionId, - cwd: input.cwd ?? "", - bundleDir: bundleDirFromImportMeta(import.meta.url), - reason: "Stop" - }); - } catch (e) { - log3(`spawn failed: ${e.message}`); - try { - releaseLock(sessionId); - } catch (releaseErr) { - log3(`releaseLock after spawn failure also failed: ${releaseErr.message}`); - } - throw e; - } + if (!captureEnabled) + return { status: "complete", entry }; + wikiLogFn(`Stop: triggering summary for ${input.session_id}`); + spawnCodexWikiWorkerFn({ + config, + sessionId: input.session_id, + cwd: input.cwd ?? "", + bundleDir, + reason: "Stop" + }); + return { status: "complete", flushStatus, entry }; } -main().catch((e) => { - log3(`fatal: ${e.message}`); - process.exit(0); -}); +async function main() { + const input = await readStdin(); + await runCodexStopHook(input); +} +if (isDirectRun(import.meta.url)) { + main().catch((e) => { + log3(`fatal: ${e.message}`); + process.exit(0); + }); +} +export { + buildCodexStopEntry, + extractLastAssistantMessage, + runCodexStopHook +}; diff --git a/codex/bundle/wiki-worker.js b/codex/bundle/wiki-worker.js index 913c279..468aaee 100755 --- a/codex/bundle/wiki-worker.js +++ b/codex/bundle/wiki-worker.js @@ -106,12 +106,60 @@ function releaseLock(sessionId) { // dist/src/hooks/upload-summary.js import { randomUUID } from "node:crypto"; + +// dist/src/utils/summary-format.js +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function extractSection(text, heading) { + const re = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function extractHeaderField(text, field) { + const re = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function compactText(value) { + return value.replace(/\s+/g, " ").trim(); +} +function extractBullets(section, limit = 3) { + if (!section) + return []; + return section.split("\n").map((line) => line.trim()).filter((line) => line.startsWith("- ")).map((line) => compactText(line.slice(2))).filter(Boolean).slice(0, limit); +} +function extractSummaryParticipants(text) { + return extractHeaderField(text, "Participants") ?? extractHeaderField(text, "Speakers"); +} +function extractSummaryTopics(text) { + return extractHeaderField(text, "Topics"); +} +function buildSummaryBlurb(text) { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + const parts = []; + if (participants) + parts.push(participants); + if (topics) + parts.push(topics); + if (keyBullets.length > 0) + parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) + parts.push(whatHappened); + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} + +// dist/src/hooks/upload-summary.js function esc(s) { return s.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); } function extractDescription(text) { - const match = text.match(/## What Happened\n([\s\S]*?)(?=\n##|$)/); - return match ? match[1].trim().slice(0, 300) : "completed"; + return buildSummaryBlurb(text); } async function uploadSummary(query2, params) { const { tableName, vpath, fname, userName, project, agent, text } = params; @@ -129,8 +177,555 @@ async function uploadSummary(query2, params) { return { path: "insert", sql, descLength: desc.length, summaryLength: text.length }; } +// dist/src/hooks/knowledge-graph.js +import { randomUUID as randomUUID2 } from "node:crypto"; +var GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; +function stripCodeFences(text) { + const trimmed = text.trim(); + const fenceMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + return fenceMatch ? fenceMatch[1].trim() : trimmed; +} +function normalizeString(value) { + return typeof value === "string" ? value.trim() : ""; +} +function normalizeAliasList(value) { + if (!Array.isArray(value)) + return []; + return value.map(normalizeString).filter(Boolean).filter((item, index, arr) => arr.indexOf(item) === index); +} +function parseGraphExtraction(raw) { + const cleaned = stripCodeFences(raw); + const parsed = JSON.parse(cleaned); + const nodes = Array.isArray(parsed["nodes"]) ? parsed["nodes"] : []; + const edges = Array.isArray(parsed["edges"]) ? parsed["edges"] : []; + return { + nodes: nodes.map((node) => ({ + name: normalizeString(node["name"]), + type: normalizeString(node["type"]) || "other", + summary: normalizeString(node["summary"]), + aliases: normalizeAliasList(node["aliases"]) + })).filter((node) => node.name), + edges: edges.map((edge) => ({ + source: normalizeString(edge["source"]), + target: normalizeString(edge["target"]), + relation: normalizeString(edge["relation"]).replace(/\s+/g, "_").toLowerCase(), + summary: normalizeString(edge["summary"]), + evidence: normalizeString(edge["evidence"]) + })).filter((edge) => edge.source && edge.target && edge.relation) + }; +} +function slugify(value) { + return value.normalize("NFKD").replace(/[^\w\s-]/g, "").trim().toLowerCase().replace(/[\s-]+/g, "_").replace(/^_+|_+$/g, "") || "item"; +} +function buildGraphNodeId(name, _type = "other") { + return `entity:${slugify(name)}`; +} +function buildNodeSearchText(node) { + return [ + node.name, + node.type ?? "other", + ...node.aliases ?? [], + node.summary ?? "" + ].filter(Boolean).join(" | "); +} +function buildEdgeSearchText(edge, sourceNodeId, targetNodeId) { + return [ + edge.source, + edge.relation, + edge.target, + edge.summary ?? "", + edge.evidence ?? "", + sourceNodeId, + targetNodeId + ].filter(Boolean).join(" | "); +} +function buildKnowledgeGraphPrompt(args) { + return (args.template ?? GRAPH_PROMPT_TEMPLATE).replace(/__SUMMARY_TEXT__/g, args.summaryText).replace(/__SESSION_ID__/g, args.sessionId).replace(/__SOURCE_PATH__/g, args.sourcePath).replace(/__PROJECT__/g, args.project); +} +function wrapGraphPhaseError(error, args) { + const wrapped = new Error(`graph ${args.phase} failed for session ${args.sessionId} on table ${args.table}: ${error instanceof Error ? error.message : String(error)}`); + wrapped.cause = error; + wrapped.phase = args.phase; + wrapped.sessionId = args.sessionId; + wrapped.table = args.table; + wrapped.sql = args.sql; + return wrapped; +} +async function replaceSessionGraph(params) { + const ts = params.ts ?? (/* @__PURE__ */ new Date()).toISOString(); + const nodePath = `/graphs/nodes/${params.userName}/${params.sessionId}.jsonl`; + const edgePath = `/graphs/edges/${params.userName}/${params.sessionId}.jsonl`; + const nodeFilename = `${params.sessionId}.jsonl`; + const edgeFilename = `${params.sessionId}.jsonl`; + const nodeMap = /* @__PURE__ */ new Map(); + for (const node of params.graph.nodes) { + const key = buildGraphNodeId(node.name, node.type); + nodeMap.set(key, { + name: node.name, + type: node.type || "other", + summary: node.summary || "", + aliases: node.aliases || [] + }); + } + for (const edge of params.graph.edges) { + const sourceKey = buildGraphNodeId(edge.source); + const targetKey = buildGraphNodeId(edge.target); + if (!nodeMap.has(sourceKey)) + nodeMap.set(sourceKey, { name: edge.source, type: "other", summary: "", aliases: [] }); + if (!nodeMap.has(targetKey)) + nodeMap.set(targetKey, { name: edge.target, type: "other", summary: "", aliases: [] }); + } + const deleteNodesSql = `DELETE FROM "${params.nodesTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + const deleteEdgesSql = `DELETE FROM "${params.edgesTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + try { + await params.query(deleteNodesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "delete_nodes", + sessionId: params.sessionId, + table: params.nodesTable, + sql: deleteNodesSql + }); + } + try { + await params.query(deleteEdgesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "delete_edges", + sessionId: params.sessionId, + table: params.edgesTable, + sql: deleteEdgesSql + }); + } + const nodeRows = [...nodeMap.entries()].map(([nodeId, node]) => { + const summary = node.summary || buildSummaryBlurb(`# Graph Node + +${node.name}`); + const aliases = (node.aliases ?? []).join(", "); + const searchText = buildNodeSearchText(node); + return `('${randomUUID2()}', '${esc(nodePath)}', '${esc(nodeFilename)}', '${esc(nodeId)}', '${esc(node.name)}', '${esc(node.type || "other")}', E'${esc(summary)}', E'${esc(searchText)}', '${esc(aliases)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`; + }); + if (nodeRows.length > 0) { + const insertNodesSql = `INSERT INTO "${params.nodesTable}" (id, path, filename, node_id, canonical_name, node_type, summary, search_text, aliases, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${nodeRows.join(", ")}`; + try { + await params.query(insertNodesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "insert_nodes", + sessionId: params.sessionId, + table: params.nodesTable, + sql: insertNodesSql + }); + } + } + const edgeRows = params.graph.edges.map((edge) => { + const sourceNodeId = buildGraphNodeId(edge.source); + const targetNodeId = buildGraphNodeId(edge.target); + const searchText = buildEdgeSearchText(edge, sourceNodeId, targetNodeId); + const summary = edge.summary || `${edge.source} ${edge.relation} ${edge.target}`; + const evidence = edge.evidence || ""; + const edgeId = `${sourceNodeId}:${edge.relation}:${targetNodeId}`; + return `('${randomUUID2()}', '${esc(edgePath)}', '${esc(edgeFilename)}', '${esc(edgeId)}', '${esc(sourceNodeId)}', '${esc(targetNodeId)}', '${esc(edge.relation)}', E'${esc(summary)}', E'${esc(evidence)}', E'${esc(searchText)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`; + }); + if (edgeRows.length > 0) { + const insertEdgesSql = `INSERT INTO "${params.edgesTable}" (id, path, filename, edge_id, source_node_id, target_node_id, relation, summary, evidence, search_text, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${edgeRows.join(", ")}`; + try { + await params.query(insertEdgesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "insert_edges", + sessionId: params.sessionId, + table: params.edgesTable, + sql: insertEdgesSql + }); + } + } + return { nodes: nodeRows.length, edges: edgeRows.length }; +} + +// dist/src/hooks/memory-facts.js +import { randomUUID as randomUUID3 } from "node:crypto"; +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; +function stripCodeFences2(text) { + const trimmed = text.trim(); + const fenceMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + return fenceMatch ? fenceMatch[1].trim() : trimmed; +} +function normalizeString2(value) { + return typeof value === "string" ? value.trim() : ""; +} +function normalizeAliases(value) { + if (!Array.isArray(value)) + return []; + return value.map(normalizeString2).filter(Boolean).filter((item, index, arr) => arr.indexOf(item) === index); +} +function normalizeFactType(value) { + return normalizeString2(value) || "other"; +} +function normalizeConfidence(value) { + if (typeof value === "number" && Number.isFinite(value)) { + return Math.max(0, Math.min(1, value)); + } + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) + return Math.max(0, Math.min(1, parsed)); + } + return void 0; +} +function slugify2(value) { + return value.normalize("NFKD").replace(/[^\w\s-]/g, "").trim().toLowerCase().replace(/[\s-]+/g, "_").replace(/^_+|_+$/g, "") || "item"; +} +function buildFactId(sessionId, fact, index) { + return [ + "fact", + slugify2(sessionId), + String(index + 1), + slugify2(fact.subject), + slugify2(fact.predicate), + slugify2(fact.object) + ].join(":"); +} +function buildFactSearchText(fact) { + return [ + fact.subject, + ...fact.subjectAliases ?? [], + fact.predicate, + fact.object, + ...fact.objectAliases ?? [], + fact.summary ?? "", + fact.evidence ?? "", + fact.validAt ?? "", + fact.validFrom ?? "", + fact.validTo ?? "" + ].filter(Boolean).join(" | "); +} +function buildEntitySearchText(entity) { + return [ + entity.canonicalName, + entity.entityType, + ...entity.aliases, + ...entity.searchTerms, + ...entity.summaries + ].filter(Boolean).join(" | "); +} +function mergeDelimited(existing, nextValues) { + const merged = new Set(existing.split(",").map((value) => value.trim()).filter(Boolean)); + for (const value of nextValues) { + const trimmed = value.trim(); + if (!trimmed) + continue; + merged.add(trimmed); + } + return [...merged].join(", "); +} +function mergePipeDelimited(existing, nextValues, maxItems = 8) { + const merged = new Set(existing.split("|").map((value) => value.trim()).filter(Boolean)); + for (const value of nextValues) { + const trimmed = value.trim(); + if (!trimmed) + continue; + if (merged.has(trimmed)) + continue; + if (merged.size >= maxItems) + break; + merged.add(trimmed); + } + return [...merged].join(" | "); +} +function wrapFactsPhaseError(error, args) { + const wrapped = new Error(`facts ${args.phase} failed for session ${args.sessionId} on table ${args.table}: ${error instanceof Error ? error.message : String(error)}`); + wrapped.cause = error; + wrapped.phase = args.phase; + wrapped.sessionId = args.sessionId; + wrapped.table = args.table; + wrapped.sql = args.sql; + return wrapped; +} +function buildEntityAggregate(entityMap, args) { + const entityId = buildGraphNodeId(args.name, args.type); + const existing = entityMap.get(entityId); + if (existing) { + for (const alias of args.aliases) + existing.aliases.add(alias); + if (args.summary) + existing.summaries.add(args.summary); + if (args.searchText) + existing.searchTerms.add(args.searchText); + return existing; + } + const created = { + entityId, + canonicalName: args.name, + entityType: args.type || "other", + aliases: new Set(args.aliases), + summaries: new Set(args.summary ? [args.summary] : []), + searchTerms: new Set(args.searchText ? [args.searchText] : []) + }; + entityMap.set(entityId, created); + return created; +} +async function upsertEntities(params) { + let upserts = 0; + const path = `/facts/entities/${params.userName}.jsonl`; + const filename = `${params.userName}.jsonl`; + for (const entity of params.entityMap.values()) { + const aliases = [...entity.aliases].filter((alias) => alias !== entity.canonicalName); + const entitySummary = [...entity.summaries].join(" | ") || entity.canonicalName; + const searchText = buildEntitySearchText(entity); + const existingRows = await params.query(`SELECT id, aliases, summary, search_text, source_session_ids, source_paths, entity_type FROM "${params.entitiesTable}" WHERE entity_id = '${esc(entity.entityId)}' LIMIT 1`); + if (existingRows.length === 0) { + const insertSql = `INSERT INTO "${params.entitiesTable}" (id, path, filename, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${randomUUID3()}', '${esc(path)}', '${esc(filename)}', '${esc(entity.entityId)}', '${esc(entity.canonicalName)}', '${esc(entity.entityType)}', '${esc(aliases.join(", "))}', E'${esc(entitySummary)}', E'${esc(searchText)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(entitySummary))}', '${esc(params.agent)}', '${params.ts}', '${params.ts}')`; + await params.query(insertSql); + upserts += 1; + continue; + } + const existing = existingRows[0]; + const mergedAliases = mergeDelimited(String(existing["aliases"] ?? ""), aliases); + const mergedSummary = mergePipeDelimited(String(existing["summary"] ?? ""), entity.summaries, 10) || entitySummary; + const mergedSearchText = mergePipeDelimited(String(existing["search_text"] ?? ""), [searchText], 12) || searchText; + const mergedSessionIds = mergeDelimited(String(existing["source_session_ids"] ?? ""), [params.sessionId]); + const mergedSourcePaths = mergeDelimited(String(existing["source_paths"] ?? ""), [params.sourcePath]); + const existingType = normalizeString2(existing["entity_type"]); + const entityType = existingType && existingType !== "other" ? existingType : entity.entityType; + const updateSql = `UPDATE "${params.entitiesTable}" SET canonical_name = '${esc(entity.canonicalName)}', entity_type = '${esc(entityType)}', aliases = '${esc(mergedAliases)}', summary = E'${esc(mergedSummary)}', search_text = E'${esc(mergedSearchText)}', source_session_ids = '${esc(mergedSessionIds)}', source_paths = '${esc(mergedSourcePaths)}', size_bytes = ${Buffer.byteLength(mergedSearchText, "utf-8")}, project = '${esc(params.project)}', description = E'${esc(buildSummaryBlurb(mergedSummary))}', agent = '${esc(params.agent)}', last_update_date = '${params.ts}' WHERE entity_id = '${esc(entity.entityId)}'`; + await params.query(updateSql); + upserts += 1; + } + return upserts; +} +function parseMemoryFactExtraction(raw) { + const cleaned = stripCodeFences2(raw); + const parsed = JSON.parse(cleaned); + const facts = Array.isArray(parsed["facts"]) ? parsed["facts"] : []; + const dedupe = /* @__PURE__ */ new Set(); + return { + facts: facts.map((fact) => ({ + subject: normalizeString2(fact["subject"]), + subjectType: normalizeFactType(fact["subject_type"]), + subjectAliases: normalizeAliases(fact["subject_aliases"]), + predicate: normalizeString2(fact["predicate"]).replace(/\s+/g, "_").toLowerCase(), + object: normalizeString2(fact["object"]), + objectType: normalizeFactType(fact["object_type"]), + objectAliases: normalizeAliases(fact["object_aliases"]), + summary: normalizeString2(fact["summary"]), + evidence: normalizeString2(fact["evidence"]), + confidence: normalizeConfidence(fact["confidence"]), + validAt: normalizeString2(fact["valid_at"]), + validFrom: normalizeString2(fact["valid_from"]), + validTo: normalizeString2(fact["valid_to"]) + })).filter((fact) => fact.subject && fact.predicate && fact.object).filter((fact) => { + const key = `${fact.subject}::${fact.predicate}::${fact.object}`; + if (dedupe.has(key)) + return false; + dedupe.add(key); + return true; + }) + }; +} +function buildMemoryFactTranscript(rows) { + const normalized = rows.map((row) => ({ + turnIndex: Number.isFinite(row.turnIndex) ? row.turnIndex : 0, + speaker: normalizeString2(row.speaker), + text: normalizeString2(row.text), + eventType: normalizeString2(row.eventType) || "message", + turnSummary: normalizeString2(row.turnSummary), + sourceDateTime: normalizeString2(row.sourceDateTime) || normalizeString2(row.creationDate) + })).filter((row) => row.text || row.turnSummary); + if (normalized.length === 0) + return "(no transcript rows)"; + return normalized.map((row) => { + const prefix = [ + `turn=${row.turnIndex}`, + row.sourceDateTime ? `time=${row.sourceDateTime}` : "", + row.speaker ? `speaker=${row.speaker}` : `event=${row.eventType}` + ].filter(Boolean).join(" | "); + const lines = [`[${prefix}] ${row.text || row.turnSummary}`]; + if (row.turnSummary && row.turnSummary !== row.text) { + lines.push(`summary: ${row.turnSummary}`); + } + return lines.join("\n"); + }).join("\n"); +} +function buildMemoryFactPrompt(args) { + return (args.template ?? MEMORY_FACT_PROMPT_TEMPLATE).replace(/__TRANSCRIPT_TEXT__/g, args.transcriptText).replace(/__SESSION_ID__/g, args.sessionId).replace(/__SOURCE_PATH__/g, args.sourcePath).replace(/__PROJECT__/g, args.project); +} +async function replaceSessionFacts(params) { + const ts = params.ts ?? (/* @__PURE__ */ new Date()).toISOString(); + const factPath = `/facts/${params.userName}/${params.sessionId}.jsonl`; + const linkPath = `/facts/links/${params.userName}/${params.sessionId}.jsonl`; + const factFilename = `${params.sessionId}.jsonl`; + const linkFilename = `${params.sessionId}.jsonl`; + const deleteFactsSql = `DELETE FROM "${params.factsTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + const deleteLinksSql = `DELETE FROM "${params.linksTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + try { + await params.query(deleteFactsSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "delete_facts", + sessionId: params.sessionId, + table: params.factsTable, + sql: deleteFactsSql + }); + } + try { + await params.query(deleteLinksSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "delete_links", + sessionId: params.sessionId, + table: params.linksTable, + sql: deleteLinksSql + }); + } + const entityMap = /* @__PURE__ */ new Map(); + const factRows = params.extraction.facts.map((fact, index) => { + const summary = fact.summary || `${fact.subject} ${fact.predicate.replace(/_/g, " ")} ${fact.object}`; + const searchText = buildFactSearchText(fact); + const subjectEntity = buildEntityAggregate(entityMap, { + name: fact.subject, + type: fact.subjectType || "other", + aliases: fact.subjectAliases ?? [], + summary, + searchText + }); + const objectEntity = buildEntityAggregate(entityMap, { + name: fact.object, + type: fact.objectType || "other", + aliases: fact.objectAliases ?? [], + summary, + searchText + }); + return { + factId: buildFactId(params.sessionId, fact, index), + subjectEntityId: subjectEntity.entityId, + subjectName: fact.subject, + subjectType: fact.subjectType || "other", + objectEntityId: objectEntity.entityId, + objectName: fact.object, + objectType: fact.objectType || "other", + predicate: fact.predicate, + summary, + evidence: fact.evidence || "", + searchText, + confidence: fact.confidence == null ? "" : String(fact.confidence), + validAt: fact.validAt || "", + validFrom: fact.validFrom || "", + validTo: fact.validTo || "" + }; + }); + try { + await upsertEntities({ + query: params.query, + entitiesTable: params.entitiesTable, + entityMap, + userName: params.userName, + project: params.project, + agent: params.agent, + sourcePath: params.sourcePath, + sessionId: params.sessionId, + ts + }); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "upsert_entities", + sessionId: params.sessionId, + table: params.entitiesTable, + sql: `UPSERT entities for ${params.sessionId}` + }); + } + if (factRows.length > 0) { + const values = factRows.map((row) => `('${randomUUID3()}', '${esc(factPath)}', '${esc(factFilename)}', '${esc(row.factId)}', '${esc(row.subjectEntityId)}', '${esc(row.subjectName)}', '${esc(row.subjectType)}', '${esc(row.predicate)}', '${esc(row.objectEntityId)}', '${esc(row.objectName)}', '${esc(row.objectType)}', E'${esc(row.summary)}', E'${esc(row.evidence)}', E'${esc(row.searchText)}', '${esc(row.confidence)}', '${esc(row.validAt)}', '${esc(row.validFrom)}', '${esc(row.validTo)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(row.searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(row.summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`); + const insertFactsSql = `INSERT INTO "${params.factsTable}" (id, path, filename, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values.join(", ")}`; + try { + await params.query(insertFactsSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "insert_facts", + sessionId: params.sessionId, + table: params.factsTable, + sql: insertFactsSql + }); + } + } + const linkRows = factRows.flatMap((row) => [ + { + linkId: `${row.factId}:subject:${row.subjectEntityId}`, + factId: row.factId, + entityId: row.subjectEntityId, + entityRole: "subject" + }, + { + linkId: `${row.factId}:object:${row.objectEntityId}`, + factId: row.factId, + entityId: row.objectEntityId, + entityRole: "object" + } + ]); + if (linkRows.length > 0) { + const values = linkRows.map((row) => `('${randomUUID3()}', '${esc(linkPath)}', '${esc(linkFilename)}', '${esc(row.linkId)}', '${esc(row.factId)}', '${esc(row.entityId)}', '${esc(row.entityRole)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(row.linkId, "utf-8")}, '${esc(params.project)}', 'fact entity link', '${esc(params.agent)}', '${ts}', '${ts}')`); + const insertLinksSql = `INSERT INTO "${params.linksTable}" (id, path, filename, link_id, fact_id, entity_id, entity_role, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values.join(", ")}`; + try { + await params.query(insertLinksSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "insert_links", + sessionId: params.sessionId, + table: params.linksTable, + sql: insertLinksSql + }); + } + } + return { + facts: factRows.length, + entities: entityMap.size, + links: linkRows.length + }; +} + // dist/src/hooks/codex/wiki-worker.js -var dlog2 = (msg) => log("codex-wiki-worker", msg); var cfg = JSON.parse(readFileSync2(process.argv[2], "utf-8")); var tmpDir = cfg.tmpDir; var tmpJsonl = join3(tmpDir, "session.jsonl"); @@ -178,14 +773,13 @@ async function query(sql, retries = 4) { function cleanup() { try { rmSync(tmpDir, { recursive: true, force: true }); - } catch (cleanupErr) { - dlog2(`cleanup failed to remove ${tmpDir}: ${cleanupErr.message}`); + } catch { } } async function main() { try { wlog("fetching session events"); - const rows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE E'${esc2(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC`); + const rows = await query(`SELECT path, message, creation_date, turn_index, event_type, speaker, text, turn_summary, source_date_time FROM "${cfg.sessionsTable}" WHERE path LIKE E'${esc2(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows.length === 0) { wlog("no session events found \u2014 exiting"); return; @@ -241,6 +835,82 @@ async function main() { text }); wlog(`uploaded ${vpath} (summary=${result.summaryLength}, desc=${result.descLength})`); + try { + const graphPrompt = buildKnowledgeGraphPrompt({ + summaryText: text, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.graphPromptTemplate + }); + const graphRaw = execFileSync(cfg.codexBin, [ + "exec", + "--dangerously-bypass-approvals-and-sandbox", + graphPrompt + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 12e4, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" } + }).toString("utf-8"); + const graph = parseGraphExtraction(graphRaw); + const graphResult = await replaceSessionGraph({ + query, + nodesTable: cfg.graphNodesTable, + edgesTable: cfg.graphEdgesTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "codex", + sourcePath: jsonlServerPath, + graph + }); + wlog(`graph updated nodes=${graphResult.nodes} edges=${graphResult.edges}`); + } catch (e) { + wlog(`graph update failed: ${e.message}`); + } + try { + const transcriptText = buildMemoryFactTranscript(rows.map((row) => ({ + turnIndex: Number(row["turn_index"] ?? 0), + eventType: typeof row["event_type"] === "string" ? row["event_type"] : "", + speaker: typeof row["speaker"] === "string" ? row["speaker"] : "", + text: typeof row["text"] === "string" ? row["text"] : "", + turnSummary: typeof row["turn_summary"] === "string" ? row["turn_summary"] : "", + sourceDateTime: typeof row["source_date_time"] === "string" ? row["source_date_time"] : "", + creationDate: typeof row["creation_date"] === "string" ? row["creation_date"] : "" + }))); + const factPrompt = buildMemoryFactPrompt({ + transcriptText, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.factPromptTemplate + }); + const factsRaw = execFileSync(cfg.codexBin, [ + "exec", + "--dangerously-bypass-approvals-and-sandbox", + factPrompt + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 12e4, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" } + }).toString("utf-8"); + const extraction = parseMemoryFactExtraction(factsRaw); + const factResult = await replaceSessionFacts({ + query, + factsTable: cfg.factsTable, + entitiesTable: cfg.entitiesTable, + linksTable: cfg.factEntityLinksTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "codex", + sourcePath: jsonlServerPath, + extraction + }); + wlog(`facts updated facts=${factResult.facts} entities=${factResult.entities} links=${factResult.links}`); + } catch (e) { + wlog(`fact update failed: ${e.message}`); + } try { finalizeSummary(cfg.sessionId, jsonlLines); wlog(`sidecar updated: lastSummaryCount=${jsonlLines}`); @@ -258,8 +928,7 @@ async function main() { cleanup(); try { releaseLock(cfg.sessionId); - } catch (releaseErr) { - dlog2(`releaseLock failed in finally for ${cfg.sessionId}: ${releaseErr.message}`); + } catch { } } } diff --git a/codex/tests/codex-integration.test.ts b/codex/tests/codex-integration.test.ts index d399a9d..72a78ec 100644 --- a/codex/tests/codex-integration.test.ts +++ b/codex/tests/codex-integration.test.ts @@ -115,6 +115,20 @@ describe("codex integration: session-start", () => { }); expect(raw).toContain("Do NOT jump straight to JSONL"); }); + + it("switches to sessions-only recall guidance when the env flag is set", () => { + const raw = runHook("session-start.js", { + session_id: "test-session-004c", + cwd: "/tmp", + hook_event_name: "SessionStart", + model: "gpt-5.2", + }, { + HIVEMIND_SESSIONS_ONLY: "1", + }); + expect(raw).toContain("SESSIONS-ONLY mode"); + expect(raw).toContain("Do NOT start with index.md or summaries"); + expect(raw).not.toContain("index.md (start here)"); + }); }); // ── Capture (UserPromptSubmit) ─────────────────────────────────────────────── diff --git a/esbuild.config.mjs b/esbuild.config.mjs index 95b2490..e43ad30 100644 --- a/esbuild.config.mjs +++ b/esbuild.config.mjs @@ -29,7 +29,7 @@ await build({ platform: "node", format: "esm", outdir: "claude-code/bundle", - external: ["node:*", "node-liblzma", "@mongodb-js/zstd"], + external: ["node:*", "node-liblzma", "@mongodb-js/zstd", "@huggingface/transformers", "onnxruntime-node"], }); for (const h of ccAll) { @@ -63,7 +63,7 @@ await build({ platform: "node", format: "esm", outdir: "codex/bundle", - external: ["node:*", "node-liblzma", "@mongodb-js/zstd"], + external: ["node:*", "node-liblzma", "@mongodb-js/zstd", "@huggingface/transformers", "onnxruntime-node"], }); for (const h of codexAll) { diff --git a/package.json b/package.json index 71727a4..7e445d0 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,10 @@ "build": "tsc && node esbuild.config.mjs", "bundle": "node esbuild.config.mjs", "dev": "tsc --watch", + "embeddings:backfill": "node --import tsx src/tools/backfill-harrier-embeddings.ts", + "embeddings:embed:python": "UV_CACHE_DIR=.uv-cache uv run python scripts/backfill_harrier_embeddings.py embed", + "embeddings:run:python": "UV_CACHE_DIR=.uv-cache uv run python scripts/backfill_harrier_embeddings.py run", + "embeddings:upload:python": "UV_CACHE_DIR=.uv-cache uv run python scripts/backfill_harrier_embeddings.py upload", "shell": "tsx src/shell/deeplake-shell.ts", "test": "vitest run", "typecheck": "tsc --noEmit", @@ -24,6 +28,7 @@ "*.md": [] }, "dependencies": { + "@huggingface/transformers": "^4.1.0", "deeplake": "^0.3.30", "just-bash": "^2.14.0", "yargs-parser": "^22.0.0" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..bc2bb47 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "hivemind-harrier-backfill" +version = "0.1.0" +description = "Local Harrier embedding backfill helpers for Deeplake tables" +requires-python = ">=3.11" +dependencies = [ + "numpy>=1.26", + "safetensors>=0.4", + "torch>=2.4", + "transformers>=4.57", +] + +[tool.uv] +package = false diff --git a/scripts/backfill_harrier_embeddings.py b/scripts/backfill_harrier_embeddings.py new file mode 100644 index 0000000..35ff111 --- /dev/null +++ b/scripts/backfill_harrier_embeddings.py @@ -0,0 +1,1256 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import hashlib +import json +import os +import re +import sys +import time +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any +from urllib import error as urllib_error +from urllib import request as urllib_request + + +DEFAULT_MODEL_ID = ( + os.environ.get("HIVEMIND_HARRIER_MODEL_ID") + or os.environ.get("DEEPLAKE_HARRIER_MODEL_ID") + or "microsoft/harrier-oss-v1-270m" +) +DEFAULT_API_URL = "https://api.deeplake.ai" +DEFAULT_BATCH_SIZE = 8 +DEFAULT_SCAN_BATCH_SIZE = 64 +DEFAULT_MAX_LENGTH = 32_768 +DEFAULT_TIMEOUT_SECONDS = float( + os.environ.get("HIVEMIND_QUERY_TIMEOUT_MS") + or os.environ.get("DEEPLAKE_QUERY_TIMEOUT_MS") + or "10000" +) / 1000.0 +MAX_RETRIES = 3 +BASE_DELAY_SECONDS = 0.5 +RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504} +TOOL_INPUT_FIELDS = [ + "command", + "file_path", + "path", + "pattern", + "prompt", + "subagent_type", + "query", + "url", + "notebook_path", + "old_string", + "new_string", + "content", + "skill", + "args", + "taskId", + "status", + "subject", + "description", + "to", + "message", + "summary", + "max_results", +] +TOOL_RESPONSE_DROP = { + "interrupted", + "isImage", + "noOutputExpected", + "type", + "structuredPatch", + "userModified", + "originalFile", + "replaceAll", + "totalDurationMs", + "totalTokens", + "totalToolUseCount", + "usage", + "toolStats", + "durationMs", + "durationSeconds", + "bytes", + "code", + "codeText", + "agentId", + "agentType", + "verificationNudgeNeeded", + "numLines", + "numFiles", + "truncated", + "statusChange", + "updatedFields", + "isAgent", + "success", +} +ARTIFACT_SCHEMA_VERSION = 1 + + +def eprint(message: str) -> None: + sys.stderr.write(f"{message}\n") + + +def now_iso() -> str: + return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def compact(value: Any) -> str: + return value.strip() if isinstance(value, str) else "" + + +def as_str(value: Any) -> str: + if isinstance(value, str): + return value + if value is None: + return "" + return str(value) + + +def parse_positive_int(value: str | None, fallback: int) -> int: + if value is None: + return fallback + try: + parsed = int(value) + except ValueError: + return fallback + return parsed if parsed > 0 else fallback + + +def load_json(path: Path) -> dict[str, Any]: + return json.loads(path.read_text(encoding="utf-8")) + + +def write_json_atomic(path: Path, payload: dict[str, Any]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + tmp_path = path.with_suffix(path.suffix + ".tmp") + tmp_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + tmp_path.replace(path) + + +def write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as handle: + for row in rows: + handle.write(json.dumps(row, ensure_ascii=False) + "\n") + + +def read_jsonl(path: Path) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + with path.open("r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if not line: + continue + rows.append(json.loads(line)) + return rows + + +def sql_ident(name: str) -> str: + if not re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", name): + raise ValueError(f"Invalid SQL identifier: {name!r}") + return name + + +def sql_str(value: str) -> str: + return ( + value.replace("\\", "\\\\") + .replace("'", "''") + .replace("\x00", "") + .translate({codepoint: None for codepoint in list(range(1, 9)) + [11, 12] + list(range(14, 32)) + [127]}) + ) + + +def sql_float4_array(values: list[float]) -> str: + parts: list[str] = [] + for value in values: + if value != value or value == float("inf") or value == float("-inf"): + parts.append("0") + continue + parts.append(repr(float(value))) + return f"ARRAY[{', '.join(parts)}]::float4[]" + + +@dataclass +class Config: + token: str + org_id: str + org_name: str + user_name: str + workspace_id: str + api_url: str + memory_table: str + sessions_table: str + + +def load_config() -> Config: + creds_path = Path.home() / ".deeplake" / "credentials.json" + creds: dict[str, Any] = {} + if creds_path.exists(): + try: + creds = json.loads(creds_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + raise SystemExit(f"Failed to parse {creds_path}: {exc}") from exc + + env = os.environ + token = env.get("HIVEMIND_TOKEN") or env.get("DEEPLAKE_TOKEN") or creds.get("token") + org_id = env.get("HIVEMIND_ORG_ID") or env.get("DEEPLAKE_ORG_ID") or creds.get("orgId") + if not token or not org_id: + raise SystemExit("Missing Deeplake credentials. Run `deeplake login` or set HIVEMIND_* env vars.") + + return Config( + token=token, + org_id=org_id, + org_name=creds.get("orgName") or org_id, + user_name=creds.get("userName") or os.environ.get("USER") or "unknown", + workspace_id=env.get("HIVEMIND_WORKSPACE_ID") or env.get("DEEPLAKE_WORKSPACE_ID") or creds.get("workspaceId") or "default", + api_url=env.get("HIVEMIND_API_URL") or env.get("DEEPLAKE_API_URL") or creds.get("apiUrl") or DEFAULT_API_URL, + memory_table=env.get("HIVEMIND_TABLE") or env.get("DEEPLAKE_TABLE") or "memory", + sessions_table=env.get("HIVEMIND_SESSIONS_TABLE") or env.get("DEEPLAKE_SESSIONS_TABLE") or "sessions", + ) + + +class DeeplakeQueryError(RuntimeError): + pass + + +class DeeplakeApi: + def __init__( + self, + token: str, + api_url: str, + org_id: str, + workspace_id: str, + timeout_seconds: float = DEFAULT_TIMEOUT_SECONDS, + ) -> None: + self.token = token + self.api_url = api_url.rstrip("/") + self.org_id = org_id + self.workspace_id = workspace_id + self.timeout_seconds = timeout_seconds + + def query(self, sql: str) -> list[dict[str, Any]]: + body = json.dumps({"query": sql}).encode("utf-8") + url = f"{self.api_url}/workspaces/{self.workspace_id}/tables/query" + headers = { + "Authorization": f"Bearer {self.token}", + "Content-Type": "application/json", + "X-Activeloop-Org-Id": self.org_id, + } + + last_error: Exception | None = None + for attempt in range(MAX_RETRIES + 1): + req = urllib_request.Request(url, data=body, headers=headers, method="POST") + try: + with urllib_request.urlopen(req, timeout=self.timeout_seconds) as resp: + payload = json.loads(resp.read().decode("utf-8")) + columns = payload.get("columns") or [] + rows = payload.get("rows") or [] + return [dict(zip(columns, row, strict=False)) for row in rows] + except urllib_error.HTTPError as exc: + response_body = exc.read().decode("utf-8", errors="replace") + last_error = DeeplakeQueryError( + f"Query failed with HTTP {exc.code}: {response_body[:300]}" + ) + if exc.code in RETRYABLE_STATUS_CODES and attempt < MAX_RETRIES: + time.sleep(BASE_DELAY_SECONDS * (2**attempt)) + continue + raise last_error from exc + except urllib_error.URLError as exc: + last_error = DeeplakeQueryError(f"Query failed: {exc.reason}") + if attempt < MAX_RETRIES: + time.sleep(BASE_DELAY_SECONDS * (2**attempt)) + continue + raise last_error from exc + except TimeoutError as exc: + last_error = DeeplakeQueryError( + f"Query timeout after {self.timeout_seconds:.1f}s" + ) + raise last_error from exc + + raise DeeplakeQueryError(str(last_error or "Query failed")) + + +def ensure_sql_columns(api: DeeplakeApi, table_name: str, specs: list[tuple[str, str]]) -> None: + table = sql_ident(table_name) + for column_name, ddl in specs: + column = sql_ident(column_name) + try: + api.query(f'ALTER TABLE "{table}" ADD COLUMN IF NOT EXISTS "{column}" {ddl}') + except DeeplakeQueryError: + pass + + +def ensure_embedding_index(api: DeeplakeApi, table_name: str, column_name: str) -> None: + table = sql_ident(table_name) + column = sql_ident(column_name) + index_name = sql_ident(f"idx_{table_name}_{column_name}".replace("-", "_")) + try: + api.query( + f'CREATE INDEX IF NOT EXISTS "{index_name}" ON "{table}" USING deeplake_index ("{column}")' + ) + except DeeplakeQueryError: + pass + + +def join_sections(sections: list[tuple[str, str]]) -> str: + return "\n".join( + f"{label}: {value}" + for label, value in sections + if value + ) + + +def truncate_text(text: str, max_chars: int) -> str: + normalized = text.strip() + if len(normalized) <= max_chars: + return normalized + omitted = len(normalized) - max_chars + return f"{normalized[:max_chars].rstrip()}\n[truncated {omitted} chars]" + + +def try_parse_object(value: Any) -> dict[str, Any] | None: + if value is None: + return None + if isinstance(value, str): + try: + parsed = json.loads(value) + except json.JSONDecodeError: + return None + return parsed if isinstance(parsed, dict) else None + return value if isinstance(value, dict) else None + + +def maybe_parse_json(value: Any) -> Any: + if not isinstance(value, str): + return value + stripped = value.strip() + if not stripped or stripped[0] not in "[{": + return value + try: + return json.loads(stripped) + except json.JSONDecodeError: + return value + + +def snake_case(name: str) -> str: + return re.sub(r"([A-Z])", r"_\1", name).lower() + + +def camel_case(name: str) -> str: + return re.sub(r"_([a-z])", lambda match: match.group(1).upper(), name) + + +def format_tool_input(raw: Any) -> str: + payload = maybe_parse_json(raw) + if not isinstance(payload, dict): + return str(payload or "") + parts: list[str] = [] + for key in TOOL_INPUT_FIELDS: + if key not in payload: + continue + value = payload[key] + parts.append(f"{key}: {value if isinstance(value, str) else json.dumps(value, ensure_ascii=False)}") + for key in ["glob", "output_mode", "limit", "offset"]: + if key in payload: + parts.append(f"{key}: {payload[key]}") + return "\n".join(parts) if parts else json.dumps(payload, ensure_ascii=False) + + +def format_tool_response(raw: Any, original_input: Any, tool_name: str | None) -> str: + payload = maybe_parse_json(raw) + if not isinstance(payload, dict): + return str(payload or "") + + if tool_name in {"Edit", "Write", "MultiEdit"}: + file_path = payload.get("filePath") + return f"[wrote {file_path}]" if file_path else "[ok]" + + stdout = payload.get("stdout") + if isinstance(stdout, str): + stderr = payload.get("stderr") + return stdout + (f"\nstderr: {stderr}" if stderr else "") + + content = payload.get("content") + if isinstance(content, str): + return content + + file_payload = payload.get("file") + if isinstance(file_payload, dict): + file_content = file_payload.get("content") + if isinstance(file_content, str): + return f"[{file_payload.get('filePath', '')}]\n{file_content}" + base64_value = file_payload.get("base64") + if isinstance(base64_value, str): + return f"[binary {file_payload.get('filePath', '')}: {len(base64_value)} base64 chars]" + + for key in ("filenames", "matches", "results"): + value = payload.get(key) + if isinstance(value, list): + if key == "results": + rendered = [ + item if isinstance(item, str) + else item.get("title") or item.get("url") or json.dumps(item, ensure_ascii=False) + for item in value + ] + else: + rendered = [item if isinstance(item, str) else json.dumps(item, ensure_ascii=False) for item in value] + return "\n".join(rendered) + + input_payload = maybe_parse_json(original_input) + kept: dict[str, Any] = {} + for key, value in payload.items(): + if key in TOOL_RESPONSE_DROP: + continue + if value in ("", False, None): + continue + if isinstance(input_payload, dict): + if key in input_payload and json.dumps(input_payload[key], sort_keys=True, ensure_ascii=False) == json.dumps(value, sort_keys=True, ensure_ascii=False): + continue + snake = snake_case(key) + if snake in input_payload and json.dumps(input_payload[snake], sort_keys=True, ensure_ascii=False) == json.dumps(value, sort_keys=True, ensure_ascii=False): + continue + camel = camel_case(key) + if camel in input_payload and json.dumps(input_payload[camel], sort_keys=True, ensure_ascii=False) == json.dumps(value, sort_keys=True, ensure_ascii=False): + continue + kept[key] = value + + return json.dumps(kept, ensure_ascii=False) if kept else "[ok]" + + +def format_tool_call(payload: dict[str, Any]) -> str: + return ( + f"[tool:{payload.get('tool_name', '?')}]\n" + f"input: {format_tool_input(payload.get('tool_input'))}\n" + f"response: {format_tool_response(payload.get('tool_response'), payload.get('tool_input'), as_str(payload.get('tool_name')) or None)}" + ) + + +def normalize_content(path: str, raw: str) -> str: + if "/sessions/" not in path: + return raw + if not raw or raw[0] != "{": + return raw + try: + obj = json.loads(raw) + except json.JSONDecodeError: + return raw + + if isinstance(obj, dict) and (isinstance(obj.get("turns"), list) or isinstance(obj.get("dialogue"), list)): + return json.dumps(obj, indent=2, ensure_ascii=False) + "\n" + + def strip_recalled(text: str) -> str: + start = text.find("") + if start == -1: + return text + end = text.rfind("") + if end == -1 or end < start: + return text + head = text[:start] + tail = text[end + len(""):] + return re.sub(r"\n{3,}", "\n\n", (head + tail).lstrip()) + + out: str | None = None + if isinstance(obj, dict) and obj.get("type") == "user_message": + out = f"[user] {strip_recalled(as_str(obj.get('content')))}" + elif isinstance(obj, dict) and obj.get("type") == "assistant_message": + agent_type = as_str(obj.get("agent_type")) + agent_suffix = f" (agent={agent_type})" if agent_type else "" + out = f"[assistant{agent_suffix}] {strip_recalled(as_str(obj.get('content')))}" + elif isinstance(obj, dict) and obj.get("type") == "tool_call": + out = format_tool_call(obj) + + if out is None: + return raw + trimmed = out.strip() + if ( + not trimmed + or trimmed in {"[user]", "[assistant]"} + or re.fullmatch(r"\[tool:[^\]]*\]\s+input:\s+\{\}\s+response:\s+\{\}", trimmed) + ): + return raw + return out + + +def extract_transcript_text(message: Any) -> str: + payload = try_parse_object(message) + if not payload: + return "" + turns = payload.get("turns") + if not isinstance(turns, list): + turns = payload.get("dialogue") + if not isinstance(turns, list) or not turns: + return "" + + intro = join_sections( + [ + ("Session path", compact(payload.get("source_path"))), + ("Conversation", compact(payload.get("conversation_id"))), + ( + "Date", + compact(payload.get("date_time")) + or compact(payload.get("date")), + ), + ] + ) + rendered_turns: list[str] = [] + for turn in turns: + if not isinstance(turn, dict): + continue + speaker = ( + compact(turn.get("speaker")) + or compact(turn.get("role")) + or compact(turn.get("author")) + or "speaker" + ) + text = ( + compact(turn.get("text")) + or compact(turn.get("content")) + or compact(turn.get("utterance")) + ) + if text: + rendered_turns.append(f"[{speaker}] {text}") + transcript = "\n".join(rendered_turns) + return "\n".join(part for part in [intro, transcript] if part) + + +def fallback_session_text(path: str, message: Any) -> str: + transcript_text = extract_transcript_text(message) + if transcript_text: + return transcript_text + + if isinstance(message, str): + return normalize_content(path or "/sessions/unknown.jsonl", message) + if isinstance(message, dict): + return normalize_content(path or "/sessions/unknown.jsonl", json.dumps(message, ensure_ascii=False)) + return "" + + +def build_memory_embedding_text(row: dict[str, Any], max_chars: int) -> str: + return truncate_text( + join_sections( + [ + ("Path", compact(row.get("path"))), + ("Filename", compact(row.get("filename"))), + ("Project", compact(row.get("project"))), + ("Description", compact(row.get("description"))), + ("Summary", compact(row.get("summary"))), + ] + ), + max_chars, + ) + + +def build_session_embedding_text(row: dict[str, Any], max_chars: int) -> str: + text = compact(row.get("text")) + turn_summary = compact(row.get("turn_summary")) + fallback = "" + if not text and not turn_summary: + fallback = compact(fallback_session_text(as_str(row.get("path")), row.get("message"))) + turn_index_value = row.get("turn_index") + turn_index = "" + if isinstance(turn_index_value, (int, float)) and int(turn_index_value) == turn_index_value: + turn_index = str(int(turn_index_value)) + return truncate_text( + join_sections( + [ + ("Path", compact(row.get("path"))), + ("Event", compact(row.get("event_type"))), + ("Speaker", compact(row.get("speaker"))), + ("Source time", compact(row.get("source_date_time"))), + ("Turn index", turn_index), + ("Text", text), + ("Turn summary", turn_summary), + ("Content", fallback), + ] + ), + max_chars, + ) + + +def stable_embedding_source_hash(text: str) -> str: + return hashlib.sha256(text.encode("utf-8")).hexdigest() + + +def import_numpy(): + try: + import numpy as np + except ImportError as exc: + raise SystemExit( + "Missing dependency `numpy`. Install `scripts/requirements-harrier-embeddings.txt` first." + ) from exc + return np + + +def import_torch_and_transformers(): + try: + import torch + import torch.nn.functional as torch_f + from transformers import AutoModel, AutoTokenizer + except ImportError as exc: + raise SystemExit( + "Missing Python ML dependencies. Install `scripts/requirements-harrier-embeddings.txt` first." + ) from exc + return torch, torch_f, AutoModel, AutoTokenizer + + +def resolve_device(torch_module: Any, requested: str) -> str: + if requested != "auto": + return requested + if getattr(torch_module.backends, "mps", None) and torch_module.backends.mps.is_available(): + return "mps" + if torch_module.cuda.is_available(): + return "cuda" + return "cpu" + + +def resolve_torch_dtype(torch_module: Any, requested: str, device: str) -> Any: + normalized = requested.lower() + if normalized == "auto": + if device == "mps": + return torch_module.float32 + return "auto" + aliases = { + "fp16": torch_module.float16, + "float16": torch_module.float16, + "half": torch_module.float16, + "fp32": torch_module.float32, + "float32": torch_module.float32, + "float": torch_module.float32, + "bf16": torch_module.bfloat16, + "bfloat16": torch_module.bfloat16, + } + if normalized not in aliases: + raise SystemExit(f"Unsupported --dtype value: {requested}") + return aliases[normalized] + + +class HarrierEmbedder: + def __init__( + self, + model_id: str, + device: str, + dtype: str, + max_length: int, + local_files_only: bool, + cache_dir: str | None, + ) -> None: + self.model_id = model_id + self.requested_device = device + self.requested_dtype = dtype + self.max_length = max_length + self.local_files_only = local_files_only + self.cache_dir = cache_dir + self._np = None + self._torch = None + self._torch_f = None + self._tokenizer = None + self._model = None + self.device = "cpu" + self.vector_dim = 0 + self.dtype_name = "auto" + + def load(self) -> None: + if self._model is not None and self._tokenizer is not None: + return + + if self.cache_dir: + os.environ.setdefault("HF_HOME", self.cache_dir) + os.environ.setdefault("TRANSFORMERS_CACHE", self.cache_dir) + + np = import_numpy() + torch, torch_f, AutoModel, AutoTokenizer = import_torch_and_transformers() + + device = resolve_device(torch, self.requested_device) + torch_dtype = resolve_torch_dtype(torch, self.requested_dtype, device) + tokenizer = AutoTokenizer.from_pretrained( + self.model_id, + local_files_only=self.local_files_only, + ) + model_kwargs: dict[str, Any] = { + "local_files_only": self.local_files_only, + } + if torch_dtype == "auto": + model_kwargs["torch_dtype"] = "auto" + self.dtype_name = "auto" + else: + model_kwargs["torch_dtype"] = torch_dtype + self.dtype_name = str(torch_dtype).split(".")[-1] + model = AutoModel.from_pretrained(self.model_id, **model_kwargs) + model.eval() + model.to(device) + + self._np = np + self._torch = torch + self._torch_f = torch_f + self._tokenizer = tokenizer + self._model = model + self.device = device + self.vector_dim = int(getattr(model.config, "hidden_size")) + eprint( + f"[harrier] loaded {self.model_id} on {self.device} " + f"(dtype={self.dtype_name}, dim={self.vector_dim})" + ) + + def _last_token_pool(self, last_hidden_states: Any, attention_mask: Any) -> Any: + torch = self._torch + assert torch is not None + left_padding = bool((attention_mask[:, -1].sum() == attention_mask.shape[0]).item()) + if left_padding: + return last_hidden_states[:, -1] + sequence_lengths = attention_mask.sum(dim=1) - 1 + batch_size = last_hidden_states.shape[0] + indices = torch.arange(batch_size, device=last_hidden_states.device) + return last_hidden_states[indices, sequence_lengths] + + def embed_documents(self, texts: list[str]) -> Any: + if not texts: + np = import_numpy() + return np.zeros((0, self.vector_dim or 0), dtype=np.float32) + self.load() + assert self._tokenizer is not None + assert self._model is not None + assert self._torch is not None + assert self._torch_f is not None + assert self._np is not None + + batch = self._tokenizer( + texts, + max_length=self.max_length, + padding=True, + truncation=True, + return_tensors="pt", + ) + batch = {key: value.to(self.device) for key, value in batch.items()} + with self._torch.no_grad(): + outputs = self._model(**batch) + embeddings = self._last_token_pool(outputs.last_hidden_state, batch["attention_mask"]) + embeddings = self._torch_f.normalize(embeddings, p=2, dim=1) + output = embeddings.detach().to("cpu", dtype=self._torch.float32).numpy() + if not self._np.isfinite(output).all(): + raise RuntimeError( + f"Non-finite embeddings generated by {self.model_id} on " + f"device={self.device} dtype={self.dtype_name}. " + "Retry with --dtype fp32 or --device cpu." + ) + return output + + +def slugify(text: str) -> str: + return re.sub(r"[^A-Za-z0-9._-]+", "_", text.strip("/")) or "artifact" + + +def default_artifact_root(table: str, model_id: str) -> Path: + stamp = datetime.now().strftime("%Y%m%d-%H%M%S") + return Path("tmp") / "harrier-backfill" / f"{table}-{slugify(model_id)}-{stamp}" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Two-phase Harrier embedding backfill: generate local vectors.npy first, then upload to Deeplake." + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + def add_table_options(cmd: argparse.ArgumentParser) -> None: + cmd.add_argument("--table", choices=["memory", "sessions", "all"], default="all") + cmd.add_argument("--memory-table", default=None) + cmd.add_argument("--sessions-table", default=None) + cmd.add_argument("--artifact-dir", default=None) + cmd.add_argument("--embedding-column", default="embedding") + cmd.add_argument("--embedding-model-column", default="embedding_model") + cmd.add_argument("--embedding-source-hash-column", default="embedding_source_hash") + cmd.add_argument("--embedding-updated-at-column", default="embedding_updated_at") + + def add_embed_options(cmd: argparse.ArgumentParser) -> None: + add_table_options(cmd) + cmd.add_argument("--model-id", default=DEFAULT_MODEL_ID) + cmd.add_argument("--start-offset", type=int, default=0) + cmd.add_argument("--max-rows", type=int, default=None) + cmd.add_argument("--batch-size", type=int, default=DEFAULT_BATCH_SIZE) + cmd.add_argument("--scan-batch-size", type=int, default=DEFAULT_SCAN_BATCH_SIZE) + cmd.add_argument("--memory-max-chars", type=int, default=8000) + cmd.add_argument("--sessions-max-chars", type=int, default=8000) + cmd.add_argument("--max-length", type=int, default=DEFAULT_MAX_LENGTH) + cmd.add_argument("--device", default="auto") + cmd.add_argument("--dtype", default="auto") + cmd.add_argument("--force", action="store_true") + cmd.add_argument("--local-files-only", action="store_true") + cmd.add_argument("--cache-dir", default=None) + cmd.add_argument("--resume", action="store_true") + cmd.add_argument("--rebuild-plan", action="store_true") + + def add_upload_options(cmd: argparse.ArgumentParser) -> None: + add_table_options(cmd) + cmd.add_argument("--upload-batch-size", type=int, default=32) + cmd.add_argument("--resume", action="store_true") + + embed_cmd = subparsers.add_parser("embed", help="Plan remaining rows and generate vectors.npy locally.") + add_embed_options(embed_cmd) + + upload_cmd = subparsers.add_parser("upload", help="Upload vectors from a completed artifact into Deeplake.") + add_upload_options(upload_cmd) + + run_cmd = subparsers.add_parser("run", help="Run embed, then upload after vectors.npy is complete.") + add_embed_options(run_cmd) + run_cmd.add_argument("--upload-batch-size", type=int, default=32) + + return parser.parse_args() + + +def table_name_for_kind(config: Config, args: argparse.Namespace, table_kind: str) -> str: + if table_kind == "memory": + return args.memory_table or config.memory_table + if table_kind == "sessions": + return args.sessions_table or config.sessions_table + raise ValueError(f"Unsupported table kind: {table_kind}") + + +def artifact_dir_for_table(args: argparse.Namespace, table_kind: str) -> Path: + if args.artifact_dir: + root = Path(args.artifact_dir) + else: + root = default_artifact_root(args.table, getattr(args, "model_id", DEFAULT_MODEL_ID)) + if args.table == "all": + return root / table_kind + return root + + +def manifest_paths(artifact_dir: Path) -> tuple[Path, Path, Path]: + return ( + artifact_dir / "manifest.json", + artifact_dir / "rows.jsonl", + artifact_dir / "vectors.npy", + ) + + +def remaining_scan_limit(args: argparse.Namespace, offset: int) -> int: + if args.max_rows is None: + return args.scan_batch_size + remaining = max(0, (args.start_offset + args.max_rows) - offset) + return min(args.scan_batch_size, remaining) + + +def fetch_memory_rows(api: DeeplakeApi, args: argparse.Namespace, table_name: str, offset: int) -> list[dict[str, Any]]: + limit = remaining_scan_limit(args, offset) + if limit <= 0: + return [] + table = sql_ident(table_name) + return api.query( + "SELECT id, path, filename, summary, description, project, " + f'"{sql_ident(args.embedding_source_hash_column)}" AS embedding_source_hash, ' + f'"{sql_ident(args.embedding_model_column)}" AS embedding_model ' + f'FROM "{table}" ORDER BY path ASC LIMIT {limit} OFFSET {offset}' + ) + + +def fetch_session_rows( + api: DeeplakeApi, + args: argparse.Namespace, + table_name: str, + offset: int, + include_metadata: bool, +) -> list[dict[str, Any]]: + limit = remaining_scan_limit(args, offset) + if limit <= 0: + return [] + table = sql_ident(table_name) + select_columns = [ + "id", + "path", + "event_type", + "speaker", + "text", + "turn_summary", + "source_date_time", + "turn_index", + "message", + ] + if include_metadata: + select_columns.extend( + [ + f'"{sql_ident(args.embedding_source_hash_column)}" AS embedding_source_hash', + f'"{sql_ident(args.embedding_model_column)}" AS embedding_model', + ] + ) + return api.query( + f'SELECT {", ".join(select_columns)} ' + f'FROM "{table}" ' + f"ORDER BY path ASC, turn_index ASC, creation_date ASC LIMIT {limit} OFFSET {offset}" + ) + + +def plan_artifact( + api: DeeplakeApi, + config: Config, + args: argparse.Namespace, + table_kind: str, + artifact_dir: Path, +) -> dict[str, Any]: + artifact_dir.mkdir(parents=True, exist_ok=True) + manifest_path, rows_path, vectors_path = manifest_paths(artifact_dir) + table_name = table_name_for_kind(config, args, table_kind) + planned_rows: list[dict[str, Any]] = [] + scanned_rows = 0 + skipped_empty = 0 + skipped_existing = 0 + metadata_supported = True + used_metadata = table_kind == "memory" + + offset = max(0, args.start_offset) + while True: + if table_kind == "memory": + rows = fetch_memory_rows(api, args, table_name, offset) + else: + try: + rows = fetch_session_rows(api, args, table_name, offset, include_metadata=metadata_supported) + used_metadata = metadata_supported + except DeeplakeQueryError: + if metadata_supported: + metadata_supported = False + eprint("[sessions] metadata scan failed; falling back to scans without existing-hash checks") + rows = fetch_session_rows(api, args, table_name, offset, include_metadata=False) + used_metadata = False + else: + raise + + if not rows: + break + + scanned_rows += len(rows) + for row in rows: + text = ( + build_memory_embedding_text(row, args.memory_max_chars) + if table_kind == "memory" + else build_session_embedding_text(row, args.sessions_max_chars) + ) + if not text: + skipped_empty += 1 + continue + source_hash = stable_embedding_source_hash(text) + existing_hash = compact(row.get("embedding_source_hash")) + existing_model = compact(row.get("embedding_model")) + if not args.force and used_metadata and existing_hash == source_hash and existing_model == args.model_id: + skipped_existing += 1 + continue + planned_rows.append( + { + "id": as_str(row.get("id")), + "path": as_str(row.get("path")), + "source_hash": source_hash, + "text": text, + } + ) + eprint( + f"[{table_kind}] planned {len(planned_rows)} rows after scanning {scanned_rows} rows " + f"(skipped_empty={skipped_empty}, skipped_existing={skipped_existing})" + ) + offset += args.scan_batch_size + + manifest = { + "schema_version": ARTIFACT_SCHEMA_VERSION, + "table_kind": table_kind, + "table_name": table_name, + "model_id": args.model_id, + "artifact_created_at": now_iso(), + "artifact_updated_at": now_iso(), + "rows_file": rows_path.name, + "vectors_file": vectors_path.name, + "embedding_column": args.embedding_column, + "embedding_model_column": args.embedding_model_column, + "embedding_source_hash_column": args.embedding_source_hash_column, + "embedding_updated_at_column": args.embedding_updated_at_column, + "start_offset": args.start_offset, + "max_rows": args.max_rows, + "planned_rows": len(planned_rows), + "scanned_rows": scanned_rows, + "skipped_empty": skipped_empty, + "skipped_existing": skipped_existing, + "skip_existing_supported": bool(used_metadata), + "completed_vectors": 0, + "completed_uploads": 0, + "vector_dim": None, + "status": "planned", + "upload_complete": False, + } + write_jsonl(rows_path, planned_rows) + write_json_atomic(manifest_path, manifest) + return manifest + + +def load_or_plan_artifact( + api: DeeplakeApi, + config: Config, + args: argparse.Namespace, + table_kind: str, + artifact_dir: Path, +) -> tuple[dict[str, Any], Path, Path, Path]: + manifest_path, rows_path, vectors_path = manifest_paths(artifact_dir) + if manifest_path.exists() and rows_path.exists() and not args.rebuild_plan: + manifest = load_json(manifest_path) + if manifest.get("table_kind") != table_kind: + raise SystemExit(f"Artifact at {artifact_dir} is for {manifest.get('table_kind')}, not {table_kind}") + if not args.resume and manifest.get("status") in {"embedding", "complete"}: + raise SystemExit( + f"Artifact already exists at {artifact_dir}. Use --resume or --rebuild-plan." + ) + return manifest, manifest_path, rows_path, vectors_path + + manifest = plan_artifact(api, config, args, table_kind, artifact_dir) + return manifest, manifest_path, rows_path, vectors_path + + +def embed_artifact(args: argparse.Namespace, manifest: dict[str, Any], manifest_path: Path, rows_path: Path, vectors_path: Path) -> dict[str, Any]: + np = import_numpy() + records = read_jsonl(rows_path) + total_rows = len(records) + completed_vectors = int(manifest.get("completed_vectors") or 0) + if completed_vectors > total_rows: + raise SystemExit("Manifest completed_vectors exceeds rows.jsonl length") + + if total_rows == 0: + if not vectors_path.exists(): + np.save(vectors_path, np.zeros((0, 0), dtype=np.float32)) + manifest["status"] = "complete" + manifest["vector_dim"] = 0 + manifest["completed_vectors"] = 0 + manifest["artifact_updated_at"] = now_iso() + write_json_atomic(manifest_path, manifest) + eprint(f"[{manifest['table_kind']}] nothing to embed") + return manifest + + embedder = HarrierEmbedder( + model_id=manifest["model_id"], + device=args.device, + dtype=args.dtype, + max_length=args.max_length, + local_files_only=args.local_files_only, + cache_dir=args.cache_dir, + ) + embedder.load() + vector_dim = embedder.vector_dim + + if vectors_path.exists(): + vectors = np.load(vectors_path, mmap_mode="r+") + if tuple(vectors.shape) != (total_rows, vector_dim): + raise SystemExit( + f"Existing vectors.npy shape {tuple(vectors.shape)} does not match planned shape {(total_rows, vector_dim)}" + ) + else: + vectors = np.lib.format.open_memmap( + vectors_path, + mode="w+", + dtype=np.float32, + shape=(total_rows, vector_dim), + ) + + if completed_vectors == 0: + manifest["status"] = "embedding" + manifest["vector_dim"] = vector_dim + manifest["artifact_updated_at"] = now_iso() + write_json_atomic(manifest_path, manifest) + + for start in range(completed_vectors, total_rows, args.batch_size): + end = min(total_rows, start + args.batch_size) + batch_records = records[start:end] + batch_vectors = embedder.embed_documents([record["text"] for record in batch_records]) + vectors[start:end] = batch_vectors + if hasattr(vectors, "flush"): + vectors.flush() + manifest["completed_vectors"] = end + manifest["vector_dim"] = vector_dim + manifest["status"] = "embedding" if end < total_rows else "complete" + manifest["artifact_updated_at"] = now_iso() + write_json_atomic(manifest_path, manifest) + eprint(f"[{manifest['table_kind']}] embedded {end}/{total_rows}") + + manifest["status"] = "complete" + manifest["artifact_updated_at"] = now_iso() + write_json_atomic(manifest_path, manifest) + return manifest + + +def update_embedding_row( + api: DeeplakeApi, + manifest: dict[str, Any], + row_id: str, + vector: list[float], + source_hash: str, +) -> None: + table = sql_ident(manifest["table_name"]) + updated_at = now_iso() + api.query( + f'UPDATE "{table}" SET ' + f'"{sql_ident(manifest["embedding_column"])}" = {sql_float4_array(vector)}, ' + f'"{sql_ident(manifest["embedding_model_column"])}" = \'{sql_str(manifest["model_id"])}\', ' + f'"{sql_ident(manifest["embedding_source_hash_column"])}" = \'{sql_str(source_hash)}\', ' + f'"{sql_ident(manifest["embedding_updated_at_column"])}" = \'{sql_str(updated_at)}\' ' + f"WHERE id = '{sql_str(row_id)}'" + ) + + +def update_embedding_rows_batch( + api: DeeplakeApi, + manifest: dict[str, Any], + rows: list[tuple[str, list[float], str]], +) -> None: + if not rows: + return + table = sql_ident(manifest["table_name"]) + updated_at = now_iso() + values_sql = ", ".join( + ( + f"('{sql_str(row_id)}', {sql_float4_array(vector)}, '{sql_str(source_hash)}')" + ) + for row_id, vector, source_hash in rows + ) + api.query( + f'UPDATE "{table}" AS target SET ' + f'"{sql_ident(manifest["embedding_column"])}" = source.embedding, ' + f'"{sql_ident(manifest["embedding_model_column"])}" = \'{sql_str(manifest["model_id"])}\', ' + f'"{sql_ident(manifest["embedding_source_hash_column"])}" = source.source_hash, ' + f'"{sql_ident(manifest["embedding_updated_at_column"])}" = \'{sql_str(updated_at)}\' ' + f"FROM (VALUES {values_sql}) AS source(id, embedding, source_hash) " + f"WHERE target.id = source.id" + ) + + +def upload_artifact( + api: DeeplakeApi, + args: argparse.Namespace, + manifest: dict[str, Any], + manifest_path: Path, + rows_path: Path, + vectors_path: Path, +) -> dict[str, Any]: + np = import_numpy() + records = read_jsonl(rows_path) + total_rows = len(records) + if int(manifest.get("completed_vectors") or 0) < total_rows: + raise SystemExit( + f"Artifact {manifest_path.parent} is incomplete: embedded " + f"{manifest.get('completed_vectors', 0)}/{total_rows} rows." + ) + + ensure_sql_columns( + api, + manifest["table_name"], + [ + (manifest["embedding_column"], "float4[]"), + (manifest["embedding_model_column"], "TEXT NOT NULL DEFAULT ''"), + (manifest["embedding_source_hash_column"], "TEXT NOT NULL DEFAULT ''"), + (manifest["embedding_updated_at_column"], "TEXT NOT NULL DEFAULT ''"), + ], + ) + + vectors = np.load(vectors_path, mmap_mode="r") + if len(vectors) != total_rows: + raise SystemExit( + f"vectors.npy row count {len(vectors)} does not match rows.jsonl count {total_rows}" + ) + + completed_uploads = int(manifest.get("completed_uploads") or 0) + if completed_uploads > total_rows: + raise SystemExit("Manifest completed_uploads exceeds rows.jsonl length") + if completed_uploads and not args.resume: + raise SystemExit( + f"Upload already started for {manifest_path.parent}. Use --resume to continue." + ) + + for start in range(completed_uploads, total_rows, args.upload_batch_size): + end = min(total_rows, start + args.upload_batch_size) + batch_rows: list[tuple[str, list[float], str]] = [] + for index in range(start, end): + record = records[index] + vector = vectors[index].astype("float32") + if not np.isfinite(vector).all(): + raise SystemExit( + f"Artifact contains non-finite values at row {index} " + f"(id={record['id']}). Regenerate vectors before uploading." + ) + batch_rows.append( + ( + record["id"], + vector.tolist(), + record["source_hash"], + ) + ) + update_embedding_rows_batch(api, manifest, batch_rows) + manifest["completed_uploads"] = end + manifest["upload_complete"] = end >= total_rows + manifest["artifact_updated_at"] = now_iso() + write_json_atomic(manifest_path, manifest) + eprint(f"[{manifest['table_kind']}] uploaded {end}/{total_rows}") + + ensure_embedding_index(api, manifest["table_name"], manifest["embedding_column"]) + manifest["upload_complete"] = True + manifest["artifact_updated_at"] = now_iso() + write_json_atomic(manifest_path, manifest) + return manifest + + +def table_kinds(args: argparse.Namespace) -> list[str]: + if args.table == "all": + return ["memory", "sessions"] + return [args.table] + + +def run_embed_command(api: DeeplakeApi, config: Config, args: argparse.Namespace) -> list[Path]: + artifact_dirs: list[Path] = [] + for table_kind in table_kinds(args): + artifact_dir = artifact_dir_for_table(args, table_kind) + manifest, manifest_path, rows_path, vectors_path = load_or_plan_artifact( + api, + config, + args, + table_kind, + artifact_dir, + ) + embed_artifact(args, manifest, manifest_path, rows_path, vectors_path) + artifact_dirs.append(artifact_dir) + eprint(f"[{table_kind}] artifact ready at {artifact_dir}") + return artifact_dirs + + +def run_upload_command(api: DeeplakeApi, args: argparse.Namespace) -> None: + if not args.artifact_dir: + raise SystemExit("--artifact-dir is required for upload") + for table_kind in table_kinds(args): + artifact_dir = artifact_dir_for_table(args, table_kind) + manifest_path, rows_path, vectors_path = manifest_paths(artifact_dir) + if not manifest_path.exists() or not rows_path.exists() or not vectors_path.exists(): + raise SystemExit(f"Incomplete artifact directory: {artifact_dir}") + manifest = load_json(manifest_path) + upload_artifact(api, args, manifest, manifest_path, rows_path, vectors_path) + eprint(f"[{table_kind}] upload complete from {artifact_dir}") + + +def main() -> int: + args = parse_args() + config = load_config() + api = DeeplakeApi( + token=config.token, + api_url=config.api_url, + org_id=config.org_id, + workspace_id=config.workspace_id, + ) + + if args.command == "embed": + run_embed_command(api, config, args) + return 0 + if args.command == "upload": + run_upload_command(api, args) + return 0 + if args.command == "run": + run_embed_command(api, config, args) + upload_args = argparse.Namespace(**vars(args)) + upload_args.resume = True + run_upload_command(api, upload_args) + return 0 + raise SystemExit(f"Unsupported command: {args.command}") + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/requirements-harrier-embeddings.txt b/scripts/requirements-harrier-embeddings.txt new file mode 100644 index 0000000..086ebf4 --- /dev/null +++ b/scripts/requirements-harrier-embeddings.txt @@ -0,0 +1,4 @@ +numpy>=1.26 +safetensors>=0.4 +torch>=2.4 +transformers>=4.57 diff --git a/src/config.ts b/src/config.ts index af884b6..d6ceaa0 100644 --- a/src/config.ts +++ b/src/config.ts @@ -11,6 +11,11 @@ export interface Config { apiUrl: string; tableName: string; sessionsTableName: string; + graphNodesTableName: string; + graphEdgesTableName: string; + factsTableName: string; + entitiesTableName: string; + factEntityLinksTableName: string; memoryPath: string; } @@ -54,6 +59,11 @@ export function loadConfig(): Config | null { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory"), }; } diff --git a/src/deeplake-api.ts b/src/deeplake-api.ts index a003b04..b6774da 100644 --- a/src/deeplake-api.ts +++ b/src/deeplake-api.ts @@ -7,7 +7,7 @@ import { sqlStr } from "./utils/sql.js"; const log = (msg: string) => _log("sdk", msg); -function summarizeSql(sql: string, maxLen = 220): string { +export function summarizeSql(sql: string, maxLen = 220): string { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } @@ -27,6 +27,29 @@ function traceSql(msg: string): void { if (debugFileLog) log(msg); } +export class DeeplakeQueryError extends Error { + readonly sqlSummary: string; + readonly status?: number; + readonly responseBody?: string; + readonly sql?: string; + override cause?: unknown; + + constructor(message: string, args: { + sql?: string; + status?: number; + responseBody?: string; + cause?: unknown; + } = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +} + // ── Retry & concurrency primitives ────────────────────────────────────────── const RETRYABLE_CODES = new Set([429, 500, 502, 503, 504]); @@ -154,10 +177,12 @@ export class DeeplakeApi { } catch (e: unknown) { // Network-level failure (DNS, TCP reset, timeout, etc.) if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error + ? new DeeplakeQueryError(e.message, { sql, cause: e }) + : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -183,9 +208,13 @@ export class DeeplakeApi { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4000), + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── @@ -253,6 +282,30 @@ export class DeeplakeApi { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName?: string): Promise { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName?: string): Promise { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e: any) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } + private buildLookupIndexName(table: string, suffix: string): string { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -379,30 +432,247 @@ export class DeeplakeApi { // } catch { /* index may already exist or not be supported */ } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name: string): Promise { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''`, + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log(`table "${name}" not found, creating`); await this.query( `CREATE TABLE IF NOT EXISTS "${name}" (` + - `id TEXT NOT NULL DEFAULT '', ` + - `path TEXT NOT NULL DEFAULT '', ` + - `filename TEXT NOT NULL DEFAULT '', ` + - `message JSONB, ` + - `author TEXT NOT NULL DEFAULT '', ` + - `mime_type TEXT NOT NULL DEFAULT 'application/json', ` + - `size_bytes BIGINT NOT NULL DEFAULT 0, ` + - `project TEXT NOT NULL DEFAULT '', ` + - `description TEXT NOT NULL DEFAULT '', ` + - `agent TEXT NOT NULL DEFAULT '', ` + - `creation_date TEXT NOT NULL DEFAULT '', ` + - `last_update_date TEXT NOT NULL DEFAULT ''` + + sessionColumns.join(", ") + `) USING deeplake`, ); log(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns: Array<[string, string]> = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`], + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + // Some backends may not support ADD COLUMN IF NOT EXISTS; keep going so older tables still work. + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); + } + + async ensureGraphNodesTable(name: string): Promise { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''`, + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`], + ] as const) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + // Best effort for older backends. + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + + async ensureGraphEdgesTable(name: string): Promise { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''`, + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`], + ] as const) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + // Best effort for older backends. + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + + async ensureFactsTable(name: string): Promise { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''`, + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + + async ensureEntitiesTable(name: string): Promise { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''`, + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + + async ensureFactEntityLinksTable(name: string): Promise { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''`, + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); } } diff --git a/src/embeddings/harrier.ts b/src/embeddings/harrier.ts new file mode 100644 index 0000000..b78174d --- /dev/null +++ b/src/embeddings/harrier.ts @@ -0,0 +1,166 @@ +import { + AutoModel, + AutoTokenizer, + LogLevel, + env, +} from "@huggingface/transformers"; + +const DEFAULT_MODEL_ID = "onnx-community/harrier-oss-v1-0.6b-ONNX"; +const DEFAULT_DOCUMENT_BATCH_SIZE = 8; +const DEFAULT_MAX_LENGTH = 32_768; + +export interface HarrierEmbedderOptions { + modelId?: string; + cacheDir?: string; + localModelPath?: string; + localFilesOnly?: boolean; + device?: string; + dtype?: string; + maxLength?: number; + batchSize?: number; +} + +export interface HarrierQueryOptions { + task?: string; +} + +type TokenizerLike = Awaited>; +type ModelLike = Awaited>; + +function toNumber(value: unknown): number { + return typeof value === "bigint" ? Number(value) : Number(value ?? 0); +} + +function tensorToRows(tensor: { data: ArrayLike; dims: number[] }): number[][] { + const [batchSize, width] = tensor.dims; + const rows: number[][] = []; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + const offset = batchIndex * width; + const row: number[] = []; + for (let hiddenIndex = 0; hiddenIndex < width; hiddenIndex++) { + row.push(Number(tensor.data[offset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} + +function l2Normalize(rows: number[][]): number[][] { + return rows.map((row) => { + let sumSquares = 0; + for (const value of row) sumSquares += value * value; + const norm = Math.sqrt(sumSquares) || 1; + return row.map((value) => value / norm); + }); +} + +function lastTokenPool(outputs: { data: ArrayLike; dims: number[] }, attentionMask: { data: ArrayLike; dims: number[] }): number[][] { + const [batchSize, sequenceLength, hiddenSize] = outputs.dims; + const rows: number[][] = []; + const maskData = attentionMask.data; + const hiddenData = outputs.data; + + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + let lastTokenIndex = sequenceLength - 1; + for (let tokenIndex = sequenceLength - 1; tokenIndex >= 0; tokenIndex--) { + const maskOffset = (batchIndex * sequenceLength) + tokenIndex; + if (toNumber(maskData[maskOffset]) > 0) { + lastTokenIndex = tokenIndex; + break; + } + } + + const row: number[] = []; + const hiddenOffset = ((batchIndex * sequenceLength) + lastTokenIndex) * hiddenSize; + for (let hiddenIndex = 0; hiddenIndex < hiddenSize; hiddenIndex++) { + row.push(Number(hiddenData[hiddenOffset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + + return rows; +} + +function formatQuery(task: string, query: string): string { + return `Instruct: ${task}\nQuery: ${query}`; +} + +export class HarrierEmbedder { + readonly modelId: string; + private tokenizerPromise: Promise | null = null; + private modelPromise: Promise | null = null; + private readonly options: Required> & HarrierEmbedderOptions; + + constructor(options: HarrierEmbedderOptions = {}) { + this.modelId = options.modelId ?? DEFAULT_MODEL_ID; + this.options = { + ...options, + maxLength: options.maxLength ?? DEFAULT_MAX_LENGTH, + batchSize: options.batchSize ?? DEFAULT_DOCUMENT_BATCH_SIZE, + }; + if (options.cacheDir) env.cacheDir = options.cacheDir; + if (options.localModelPath) env.localModelPath = options.localModelPath; + env.logLevel = LogLevel.ERROR; + } + + async embedDocuments(texts: string[]): Promise { + return this.embedInternal(texts); + } + + async embedQueries(texts: string[], options: HarrierQueryOptions = {}): Promise { + const task = options.task ?? "Given a user query, retrieve relevant memory rows and session events"; + return this.embedInternal(texts.map((text) => formatQuery(task, text))); + } + + private async load(): Promise<{ tokenizer: TokenizerLike; model: ModelLike }> { + if (!this.tokenizerPromise) { + this.tokenizerPromise = AutoTokenizer.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly, + }); + } + if (!this.modelPromise) { + this.modelPromise = AutoModel.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly, + device: (this.options.device ?? "cpu") as any, + dtype: this.options.dtype as any, + }); + } + const [tokenizer, model] = await Promise.all([this.tokenizerPromise, this.modelPromise]); + return { tokenizer, model }; + } + + private async embedInternal(texts: string[]): Promise { + if (texts.length === 0) return []; + const { tokenizer, model } = await this.load(); + const rows: number[][] = []; + + for (let start = 0; start < texts.length; start += this.options.batchSize) { + const batch = texts.slice(start, start + this.options.batchSize); + const inputs = tokenizer(batch, { + padding: true, + truncation: true, + max_length: this.options.maxLength, + }) as Record; + const outputs = await model(inputs); + const sentenceEmbedding = (outputs as Record)["sentence_embedding"]; + if (sentenceEmbedding && typeof sentenceEmbedding === "object" && sentenceEmbedding !== null) { + rows.push(...l2Normalize(tensorToRows(sentenceEmbedding as { data: ArrayLike; dims: number[] }))); + continue; + } + + const lastHiddenState = (outputs as Record)["last_hidden_state"]; + const attentionMask = inputs["attention_mask"]; + if (!lastHiddenState || typeof lastHiddenState !== "object" || !attentionMask || typeof attentionMask !== "object") { + throw new Error(`Harrier model "${this.modelId}" did not return a usable embedding tensor`); + } + rows.push(...l2Normalize( + lastTokenPool( + lastHiddenState as { data: ArrayLike; dims: number[] }, + attentionMask as { data: ArrayLike; dims: number[] }, + ), + )); + } + + return rows; + } +} diff --git a/src/embeddings/text.ts b/src/embeddings/text.ts new file mode 100644 index 0000000..880ec2f --- /dev/null +++ b/src/embeddings/text.ts @@ -0,0 +1,138 @@ +import { createHash } from "node:crypto"; +import { normalizeContent } from "../shell/grep-core.js"; + +export interface MemoryEmbeddingRow { + path?: string; + filename?: string; + summary?: string; + description?: string; + project?: string; +} + +export interface SessionEmbeddingRow { + path?: string; + event_type?: string; + speaker?: string; + text?: string; + turn_summary?: string; + source_date_time?: string; + turn_index?: number; + message?: unknown; +} + +function compact(value: unknown): string { + if (typeof value !== "string") return ""; + return value.trim(); +} + +function joinSections(sections: Array<[label: string, value: string]>): string { + return sections + .filter(([, value]) => value.length > 0) + .map(([label, value]) => `${label}: ${value}`) + .join("\n"); +} + +function truncateText(text: string, maxChars: number): string { + const normalized = text.trim(); + if (normalized.length <= maxChars) return normalized; + return `${normalized.slice(0, maxChars).trimEnd()}\n[truncated ${normalized.length - maxChars} chars]`; +} + +function tryParseObject(value: unknown): Record | null { + if (!value) return null; + if (typeof value === "string") { + try { + const parsed = JSON.parse(value) as unknown; + return parsed && typeof parsed === "object" ? parsed as Record : null; + } catch { + return null; + } + } + return typeof value === "object" ? value as Record : null; +} + +function extractTranscriptText(message: unknown): string { + const payload = tryParseObject(message); + if (!payload) return ""; + const turns = Array.isArray(payload["turns"]) + ? payload["turns"] as Array> + : Array.isArray(payload["dialogue"]) + ? payload["dialogue"] as Array> + : null; + if (!turns || turns.length === 0) return ""; + + const intro = joinSections([ + ["Session path", compact(typeof payload["source_path"] === "string" ? payload["source_path"] : "")], + ["Conversation", compact(typeof payload["conversation_id"] === "string" ? payload["conversation_id"] : "")], + ["Date", compact(typeof payload["date_time"] === "string" ? payload["date_time"] : typeof payload["date"] === "string" ? payload["date"] : "")], + ]); + const transcript = turns + .map((turn) => { + const speaker = compact( + typeof turn["speaker"] === "string" + ? turn["speaker"] + : typeof turn["role"] === "string" + ? turn["role"] + : typeof turn["author"] === "string" + ? turn["author"] + : "", + ) || "speaker"; + const text = compact( + typeof turn["text"] === "string" + ? turn["text"] + : typeof turn["content"] === "string" + ? turn["content"] + : typeof turn["utterance"] === "string" + ? turn["utterance"] + : "", + ); + return text ? `[${speaker}] ${text}` : ""; + }) + .filter(Boolean) + .join("\n"); + + return [intro, transcript].filter(Boolean).join("\n"); +} + +function fallbackSessionText(row: SessionEmbeddingRow): string { + const transcriptText = extractTranscriptText(row.message); + if (transcriptText) return transcriptText; + + if (typeof row.message === "string") { + return normalizeContent(row.path ?? "/sessions/unknown.jsonl", row.message); + } + if (row.message && typeof row.message === "object") { + return normalizeContent(row.path ?? "/sessions/unknown.jsonl", JSON.stringify(row.message)); + } + return ""; +} + +export function buildMemoryEmbeddingText(row: MemoryEmbeddingRow, maxChars = 8_000): string { + return truncateText(joinSections([ + ["Path", compact(row.path)], + ["Filename", compact(row.filename)], + ["Project", compact(row.project)], + ["Description", compact(row.description)], + ["Summary", compact(row.summary)], + ]), maxChars); +} + +export function buildSessionEmbeddingText(row: SessionEmbeddingRow, maxChars = 8_000): string { + const text = compact(row.text); + const turnSummary = compact(row.turn_summary); + const fallback = (!text && !turnSummary) ? compact(fallbackSessionText(row)) : ""; + return truncateText(joinSections([ + ["Path", compact(row.path)], + ["Event", compact(row.event_type)], + ["Speaker", compact(row.speaker)], + ["Source time", compact(row.source_date_time)], + ["Turn index", Number.isFinite(row.turn_index) ? String(row.turn_index) : ""], + ["Text", text], + ["Turn summary", turnSummary], + ["Content", fallback], + ]), maxChars); +} + +export function stableEmbeddingSourceHash(text: string): string { + return createHash("sha256").update(text).digest("hex"); +} diff --git a/src/hooks/bash-command-compiler.ts b/src/hooks/bash-command-compiler.ts index 68e1534..2a3cb1c 100644 --- a/src/hooks/bash-command-compiler.ts +++ b/src/hooks/bash-command-compiler.ts @@ -1,5 +1,7 @@ import type { DeeplakeApi } from "../deeplake-api.js"; -import { sqlLike } from "../utils/sql.js"; +import { HarrierEmbedder } from "../embeddings/harrier.js"; +import { type ScoredRetrievalRow, fuseRetrievalRows } from "../utils/hybrid-fusion.js"; +import { sqlLike, sqlStr } from "../utils/sql.js"; import { type GrepParams, handleGrepDirect, parseBashGrep } from "./grep-direct.js"; import { normalizeContent, refineGrepMatches } from "../shell/grep-core.js"; import { capOutputForClaude } from "../utils/output-cap.js"; @@ -8,15 +10,69 @@ import { readVirtualPathContents, findVirtualPaths, } from "./virtual-table-query.js"; +import { getGrepRetrievalMode, isFactsSessionsOnlyPsqlMode, isPsqlMode } from "../utils/retrieval-mode.js"; type VirtualRow = Record; +const DEFAULT_EMBED_RETRIEVAL_MODEL_ID = "onnx-community/harrier-oss-v1-270m-ONNX"; +const DEFAULT_HYBRID_VECTOR_WEIGHT = 0.7; +const DEFAULT_HYBRID_TEXT_WEIGHT = 0.3; + +let summaryRetrievalEmbedder: HarrierEmbedder | null = null; + +function envString(...names: string[]): string | undefined { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) return value; + } + return undefined; +} + +function envFlag(...names: string[]): boolean { + const raw = envString(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} + +function envNumber(fallback: number, ...names: string[]): number { + const raw = envString(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} + +function getSummaryRetrievalEmbedder(): HarrierEmbedder { + if (!summaryRetrievalEmbedder) { + summaryRetrievalEmbedder = new HarrierEmbedder({ + modelId: envString( + "HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", + "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", + "HIVEMIND_HARRIER_MODEL_ID", + "DEEPLAKE_HARRIER_MODEL_ID", + ) ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID, + device: envString("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY"), + }); + } + return summaryRetrievalEmbedder; +} + +function sqlFloat4Array(values: number[]): string { + if (values.length === 0) throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} + export type CompiledSegment = | { kind: "echo"; text: string } | { kind: "cat"; paths: string[]; lineLimit: number; fromEnd: boolean; countLines: boolean; ignoreMissing: boolean } | { kind: "ls"; dirs: string[]; longFormat: boolean } | { kind: "find"; dir: string; pattern: string; countOnly: boolean } | { kind: "find_grep"; dir: string; patterns: string[]; params: GrepParams; lineLimit: number } + | { kind: "psql"; query: string; lineLimit: number; tuplesOnly: boolean; fieldSeparator: string } | { kind: "grep"; params: GrepParams; lineLimit: number }; interface ParsedModifier { @@ -24,6 +80,17 @@ interface ParsedModifier { ignoreMissing: boolean; } +interface ParsedFindSpec { + patterns: string[]; + execGrepCmd: string | null; +} + +function quoteShellToken(token: string): string { + if (token === "") return "''"; + if (!/[\s"'\\|&;<>()[\]{}$*?]/.test(token)) return token; + return `'${token.replace(/'/g, `'\"'\"'`)}'`; +} + function isQuoted(ch: string): boolean { return ch === "'" || ch === "\""; } @@ -32,11 +99,18 @@ export function splitTopLevel(input: string, operators: string[]): string[] | nu const parts: string[] = []; let current = ""; let quote: string | null = null; + let escaped = false; for (let i = 0; i < input.length; i++) { const ch = input[i]; + if (escaped) { + current += ch; + escaped = false; + continue; + } if (quote) { if (ch === quote) quote = null; + else if (ch === "\\" && quote === "\"") escaped = true; current += ch; continue; } @@ -45,6 +119,11 @@ export function splitTopLevel(input: string, operators: string[]): string[] | nu current += ch; continue; } + if (ch === "\\" && i + 1 < input.length) { + current += ch; + escaped = true; + continue; + } const matched = operators.find((op) => input.startsWith(op, i)); if (matched) { @@ -58,7 +137,7 @@ export function splitTopLevel(input: string, operators: string[]): string[] | nu current += ch; } - if (quote) return null; + if (quote || escaped) return null; const trimmed = current.trim(); if (trimmed) parts.push(trimmed); return parts; @@ -128,10 +207,10 @@ export function expandBraceToken(token: string): string[] { } export function stripAllowedModifiers(segment: string): ParsedModifier { - const ignoreMissing = /\s2>\/dev\/null\s*$/.test(segment); + const ignoreMissing = /\s2>\/dev\/null(?=\s*(?:\||$))/.test(segment); const clean = segment - .replace(/\s2>\/dev\/null\s*$/g, "") - .replace(/\s2>&1\s*/g, " ") + .replace(/\s2>\/dev\/null(?=\s*(?:\||$))/g, "") + .replace(/\s2>&1(?=\s*(?:\||$))/g, "") .trim(); return { clean, ignoreMissing }; } @@ -193,7 +272,7 @@ function isValidPipelineHeadTailStage(stage: string): boolean { return false; } -function parseFindNamePatterns(tokens: string[]): string[] | null { +function parseFindSpec(tokens: string[]): ParsedFindSpec | null { const patterns: string[] = []; for (let i = 2; i < tokens.length; i++) { const token = tokens[i]; @@ -209,9 +288,897 @@ function parseFindNamePatterns(tokens: string[]): string[] | null { i += 1; continue; } + if (token === "-exec") { + const execTokens = tokens.slice(i + 1); + if (patterns.length === 0 || execTokens.length < 4) return null; + const terminator = execTokens.at(-1); + const target = execTokens.at(-2); + if ((terminator !== "\\;" && terminator !== ";") || target !== "{}") return null; + return { + patterns, + execGrepCmd: execTokens.slice(0, -1).map(quoteShellToken).join(" "), + }; + } return null; } - return patterns.length > 0 ? patterns : null; + return patterns.length > 0 ? { patterns, execGrepCmd: null } : null; +} + +function extractPsqlQuery(tokens: string[]): string | null { + let query: string | null = null; + for (let i = 1; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-c" || token === "--command") { + query = tokens[i + 1] ?? null; + i += 1; + continue; + } + if (token.startsWith("-c") && token.length > 2) { + query = token.slice(2); + continue; + } + } + return query; +} + +export function extractPsqlQueryFromCommand(cmd: string): string | null { + const tokens = tokenizeShellWords(cmd.trim()); + if (!tokens || tokens[0] !== "psql") return null; + return extractPsqlQuery(tokens); +} + +function normalizeSqlRef(ref: string): string { + return ref.replace(/\s+/g, "").replace(/"/g, "").toLowerCase(); +} + +function deriveSiblingTableName(tableName: string, expectedBase: string, targetBase: string): string | null { + if (tableName === expectedBase) return null; + if (!tableName.startsWith(expectedBase)) return null; + return `${targetBase}${tableName.slice(expectedBase.length)}`; +} + +function resolveInterceptedTableNames( + memoryTable: string, + sessionsTable: string, +): { + graphNodesTable: string; + graphEdgesTable: string; + factsTable: string; + entitiesTable: string; + factEntityLinksTable: string; +} { + const memoryDerived = { + graphNodesTable: deriveSiblingTableName(memoryTable, "memory", "graph_nodes"), + graphEdgesTable: deriveSiblingTableName(memoryTable, "memory", "graph_edges"), + factsTable: deriveSiblingTableName(memoryTable, "memory", "memory_facts"), + entitiesTable: deriveSiblingTableName(memoryTable, "memory", "memory_entities"), + factEntityLinksTable: deriveSiblingTableName(memoryTable, "memory", "fact_entity_links"), + }; + const sessionsDerived = { + factsTable: deriveSiblingTableName(sessionsTable, "sessions", "memory_facts"), + entitiesTable: deriveSiblingTableName(sessionsTable, "sessions", "memory_entities"), + factEntityLinksTable: deriveSiblingTableName(sessionsTable, "sessions", "fact_entity_links"), + }; + return { + graphNodesTable: process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? process.env["DEEPLAKE_GRAPH_NODES_TABLE"] ?? memoryDerived.graphNodesTable ?? "graph_nodes", + graphEdgesTable: process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? process.env["DEEPLAKE_GRAPH_EDGES_TABLE"] ?? memoryDerived.graphEdgesTable ?? "graph_edges", + factsTable: process.env["HIVEMIND_FACTS_TABLE"] ?? process.env["DEEPLAKE_FACTS_TABLE"] ?? memoryDerived.factsTable ?? sessionsDerived.factsTable ?? "memory_facts", + entitiesTable: process.env["HIVEMIND_ENTITIES_TABLE"] ?? process.env["DEEPLAKE_ENTITIES_TABLE"] ?? memoryDerived.entitiesTable ?? sessionsDerived.entitiesTable ?? "memory_entities", + factEntityLinksTable: process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? process.env["DEEPLAKE_FACT_ENTITY_LINKS_TABLE"] ?? memoryDerived.factEntityLinksTable ?? sessionsDerived.factEntityLinksTable ?? "fact_entity_links", + }; +} + +function getInterceptedSqlRefs(): Set { + if (isFactsSessionsOnlyPsqlMode()) { + return new Set([ + "sessions", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.sessions", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links", + ]); + } + return new Set([ + "memory", + "sessions", + "graph_nodes", + "graph_edges", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.memory", + "hivemind.sessions", + "hivemind.graph_nodes", + "hivemind.graph_edges", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links", + ]); +} + +function extractSqlTableRefs(query: string): string[] { + const refs: string[] = []; + const regex = /\b(?:from|join)\s+((?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*)(?:\s*\.\s*(?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*))?)/gi; + for (const match of query.matchAll(regex)) { + if (match[1]) refs.push(normalizeSqlRef(match[1])); + } + return refs; +} + +export function queryReferencesInterceptedTables(query: string): boolean { + const interceptedRefs = getInterceptedSqlRefs(); + return extractSqlTableRefs(query).some((ref) => interceptedRefs.has(ref)); +} + +export function queryUsesOnlyInterceptedTables(query: string): boolean { + const refs = extractSqlTableRefs(query); + const interceptedRefs = getInterceptedSqlRefs(); + return refs.length > 0 && refs.every((ref) => interceptedRefs.has(ref)); +} + +export function queryUsesBareMemoryTables(query: string): boolean { + const bareRefs = isFactsSessionsOnlyPsqlMode() + ? new Set(["sessions", "memory_facts", "memory_entities", "fact_entity_links"]) + : new Set(["memory", "sessions", "graph_nodes", "graph_edges", "memory_facts", "memory_entities", "fact_entity_links"]); + return extractSqlTableRefs(query).some((ref) => bareRefs.has(ref)); +} + +function parsePsqlSegment(pipeline: string[], tokens: string[]): CompiledSegment | null { + if (tokens[0] !== "psql" || !isPsqlMode()) return null; + const query = extractPsqlQuery(tokens); + let tuplesOnly = false; + let fieldSeparator = "|"; + + for (let i = 1; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-F" || token === "--field-separator") { + fieldSeparator = tokens[i + 1] ?? fieldSeparator; + i += 1; + continue; + } + if (token.startsWith("-F") && token.length > 2) { + fieldSeparator = token.slice(2); + continue; + } + if (token === "-t" || token === "--tuples-only") { + tuplesOnly = true; + continue; + } + if (token.startsWith("-") && !token.startsWith("--")) { + const shortFlags = token.slice(1); + if (shortFlags.includes("t")) tuplesOnly = true; + continue; + } + } + + if (!query || !queryUsesOnlyInterceptedTables(query)) return null; + + let lineLimit = 0; + if (pipeline.length > 1) { + if (pipeline.length !== 2) return null; + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) return null; + lineLimit = headTail.lineLimit; + } + + return { kind: "psql", query, lineLimit, tuplesOnly, fieldSeparator }; +} + +function normalizePsqlQuery( + query: string, + memoryTable: string, + sessionsTable: string, + graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, + graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, + factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, + entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, + factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable, +): string { + let sql = query.trim().replace(/;+\s*$/, ""); + sql = sql + .replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`) + .replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`) + .replace(/\bFROM\s+"?memory_facts"?\b/gi, `FROM "${factsTable}"`) + .replace(/\bJOIN\s+"?memory_facts"?\b/gi, `JOIN "${factsTable}"`) + .replace(/\bFROM\s+"?memory_entities"?\b/gi, `FROM "${entitiesTable}"`) + .replace(/\bJOIN\s+"?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`) + .replace(/\bFROM\s+"?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`) + .replace(/\bJOIN\s+"?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?memory_facts"?\b/gi, `FROM "${factsTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?memory_facts"?\b/gi, `JOIN "${factsTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?memory_entities"?\b/gi, `FROM "${entitiesTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`); + if (!isFactsSessionsOnlyPsqlMode()) { + sql = sql + .replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`) + .replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`) + .replace(/\bFROM\s+"?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`) + .replace(/\bJOIN\s+"?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`) + .replace(/\bFROM\s+"?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`) + .replace(/\bJOIN\s+"?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`); + } + return sql; +} + +function validatePsqlQuery( + query: string, + memoryTable: string, + sessionsTable: string, + graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, + graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, + factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, + entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, + factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable, +): string { + if (!queryUsesOnlyInterceptedTables(query)) { + if (isFactsSessionsOnlyPsqlMode()) { + throw new Error("psql queries must reference only sessions, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); + } + throw new Error("psql queries must reference only memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); + } + const sql = normalizePsqlQuery( + query, + memoryTable, + sessionsTable, + graphNodesTable, + graphEdgesTable, + factsTable, + entitiesTable, + factEntityLinksTable, + ); + const compact = sql.replace(/\s+/g, " ").trim(); + if (!/^(select|with)\b/i.test(compact)) { + throw new Error("psql mode only supports SELECT queries"); + } + const allowedTables = new Set([ + sessionsTable, + factsTable, + entitiesTable, + factEntityLinksTable, + ]); + if (!isFactsSessionsOnlyPsqlMode()) { + allowedTables.add(memoryTable); + allowedTables.add(graphNodesTable); + allowedTables.add(graphEdgesTable); + } + const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; + if (tableMatches.length === 0) { + throw new Error("psql query must reference an intercepted hivemind memory table"); + } + for (const match of tableMatches) { + if (!allowedTables.has(match[1])) { + throw new Error(`psql query references unsupported table: ${match[1]}`); + } + } + return sql; +} + +function decodeSqlLiteral(value: string): string { + return value.replace(/''/g, "'").trim(); +} + +function cleanSearchTerm(value: string): string { + return decodeSqlLiteral(value) + .replace(/^%+|%+$/g, "") + .replace(/^_+|_+$/g, "") + .trim(); +} + +function extractSqlSearchTerms(query: string): string[] { + const terms: string[] = []; + const push = (value: string) => { + const cleaned = cleanSearchTerm(value); + if (!cleaned) return; + if (cleaned.startsWith("/")) return; + if (/^\/summaries\/|^\/sessions\//.test(cleaned)) return; + if (!terms.includes(cleaned)) terms.push(cleaned); + }; + + for (const match of query.matchAll(/\b(?:i?like|=)\s+E?'((?:[^']|'')*)'/gi)) { + push(match[1] ?? ""); + } + for (const match of query.matchAll(/<\#>\s+E?'((?:[^']|'')*)'/gi)) { + push(match[1] ?? ""); + } + return terms; +} + +function chooseEntityTerms(terms: string[]): string[] { + const entityLike = terms.filter((term) => + /[A-Z]/.test(term) && + !/^\d+$/.test(term) && + term.split(/\s+/).length <= 4 + ); + return (entityLike.length > 0 ? entityLike : terms).slice(0, 2); +} + +interface GraphCandidateRow extends VirtualRow { + source_session_id?: string; + source_path?: string; + search_text?: string; +} + +interface HybridCandidate { + sessionId: string; + sourcePath: string; + score: number; + signals: Set; +} + +function escapeRegex(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +async function fetchGraphCandidates( + api: DeeplakeApi, + graphNodesTable: string, + graphEdgesTable: string, + terms: string[], +): Promise<{ sessionId: string; sourcePath: string }[]> { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) return []; + + const entityTerms = chooseEntityTerms(filteredTerms); + const topicTerms = filteredTerms.filter((term) => !entityTerms.includes(term)); + const phrase = sqlStr(filteredTerms.join(" ")); + const nodeEntityClauses = entityTerms.map((term) => + `(canonical_name ILIKE '%${sqlLike(term)}%' OR aliases ILIKE '%${sqlLike(term)}%')` + ); + const nodeTextClauses = topicTerms.map((term) => + `search_text ILIKE '%${sqlLike(term)}%'` + ); + const edgeEntityClauses = entityTerms.map((term) => + `search_text ILIKE '%${sqlLike(term)}%'` + ); + const edgeTopicClauses = topicTerms.map((term) => + `(relation ILIKE '%${sqlLike(term)}%' OR summary ILIKE '%${sqlLike(term)}%' OR evidence ILIKE '%${sqlLike(term)}%' OR search_text ILIKE '%${sqlLike(term)}%')` + ); + const nodeWhere = entityTerms.length > 0 && topicTerms.length > 0 + ? `(${nodeEntityClauses.join(" OR ")}) AND (${nodeTextClauses.join(" OR ")})` + : entityTerms.length > 0 + ? `(${nodeEntityClauses.join(" OR ")})` + : topicTerms.length > 0 + ? `(${nodeTextClauses.join(" OR ")})` + : "FALSE"; + const edgeWhere = entityTerms.length > 0 && topicTerms.length > 0 + ? `(${edgeEntityClauses.join(" OR ")}) AND (${edgeTopicClauses.join(" OR ")})` + : topicTerms.length > 0 + ? `(${edgeTopicClauses.join(" OR ")})` + : entityTerms.length > 0 + ? `(${edgeEntityClauses.join(" OR ")})` + : "FALSE"; + + const sql = + `WITH node_candidates AS (` + + ` SELECT source_session_id, source_path, search_text, search_text <#> '${phrase}' AS score` + + ` FROM "${graphNodesTable}"` + + ` WHERE ${nodeWhere}` + + ` ORDER BY score DESC LIMIT 8` + + `), edge_candidates AS (` + + ` SELECT source_session_id, source_path, search_text, search_text <#> '${phrase}' AS score` + + ` FROM "${graphEdgesTable}"` + + ` WHERE ${edgeWhere}` + + ` ORDER BY score DESC LIMIT 8` + + `)` + + ` SELECT source_session_id, source_path, search_text, score` + + ` FROM (` + + ` SELECT source_session_id, source_path, search_text, score FROM node_candidates` + + ` UNION ALL` + + ` SELECT source_session_id, source_path, search_text, score FROM edge_candidates` + + ` ) AS graph_candidates` + + ` ORDER BY score ASC` + + ` LIMIT 12`; + + const rows = await api.query(sql) as GraphCandidateRow[]; + const expanded: Array<{ sessionId: string; sourcePath: string }> = []; + const seen = new Set(); + for (const row of rows) { + const searchText = typeof row["search_text"] === "string" ? row["search_text"] : ""; + const sessionIds = [ + ...(searchText.match(/conv_\d+_session_\d+/g) ?? []), + typeof row["source_session_id"] === "string" ? row["source_session_id"] : "", + ].map((value) => value.trim()).filter(Boolean); + const sourcePaths = [ + ...(searchText.match(/\/sessions\/conv_\d+_session_\d+\.json/g) ?? []), + typeof row["source_path"] === "string" ? row["source_path"] : "", + ...sessionIds.map((sessionId) => `/sessions/${sessionId}.json`), + ].map((value) => value.trim()).filter(Boolean); + for (let i = 0; i < sourcePaths.length; i++) { + const sourcePath = sourcePaths[i]; + const sessionId = sessionIds[i] || sessionIds[0] || sourcePath.match(/(conv_\d+_session_\d+)\.json$/)?.[1] || ""; + if (!sourcePath) continue; + const key = `${sessionId}@@${sourcePath}`; + if (seen.has(key)) continue; + seen.add(key); + expanded.push({ sessionId, sourcePath }); + if (expanded.length >= 12) return expanded; + } + } + return expanded; +} + +function splitDelimitedField(value: unknown): string[] { + if (typeof value !== "string") return []; + return value + .split(",") + .map((item) => item.trim()) + .filter(Boolean); +} + +function extractSessionIdFromPath(value: string): string { + return value.match(/(conv_\d+_session_\d+)/)?.[1] ?? ""; +} + +function extractSummarySourcePath(summary: string): string { + return summary.match(/^- \*\*Source\*\*: (.+)$/m)?.[1]?.trim() ?? ""; +} + +function addHybridCandidate( + map: Map, + candidate: { sessionId?: string; sourcePath?: string; score: number; signal: string }, +): void { + const sessionId = candidate.sessionId?.trim() ?? ""; + const sourcePath = candidate.sourcePath?.trim() ?? ""; + if (!sessionId && !sourcePath) return; + const key = `${sessionId}@@${sourcePath}`; + const existing = map.get(key); + if (existing) { + existing.score += candidate.score; + existing.signals.add(candidate.signal); + return; + } + map.set(key, { + sessionId, + sourcePath, + score: candidate.score, + signals: new Set([candidate.signal]), + }); +} + +async function fetchEntityResolution( + api: DeeplakeApi, + entitiesTable: string, + terms: string[], +): Promise<{ entityIds: string[]; candidates: HybridCandidate[] }> { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) return { entityIds: [], candidates: [] }; + const entityTerms = chooseEntityTerms(filteredTerms); + if (entityTerms.length === 0) return { entityIds: [], candidates: [] }; + + const phrase = sqlStr(filteredTerms.join(" ")); + const where = entityTerms + .map((term) => `(canonical_name ILIKE '%${sqlLike(term)}%' OR aliases ILIKE '%${sqlLike(term)}%')`) + .join(" OR "); + const sql = + `SELECT entity_id, source_session_ids, source_paths, search_text, search_text <#> '${phrase}' AS score` + + ` FROM "${entitiesTable}"` + + ` WHERE ${where}` + + ` ORDER BY score ASC` + + ` LIMIT 8`; + + const rows = await api.query(sql); + const entityIds: string[] = []; + const candidateMap = new Map(); + for (const row of rows) { + const entityId = typeof row["entity_id"] === "string" ? row["entity_id"] : ""; + if (entityId && !entityIds.includes(entityId)) entityIds.push(entityId); + const sessionIds = splitDelimitedField(row["source_session_ids"]); + const sourcePaths = splitDelimitedField(row["source_paths"]); + const maxLen = Math.max(sessionIds.length, sourcePaths.length); + for (let i = 0; i < maxLen; i++) { + const sourcePath = sourcePaths[i] || (sessionIds[i] ? `/sessions/${sessionIds[i]}.json` : ""); + const sessionId = sessionIds[i] || extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 1.2, + signal: "entity", + }); + } + } + return { entityIds, candidates: [...candidateMap.values()] }; +} + +async function fetchFactCandidates( + api: DeeplakeApi, + factsTable: string, + terms: string[], + entityIds: string[], +): Promise<{ entityIds: string[]; candidates: HybridCandidate[] }> { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0 && entityIds.length === 0) return { entityIds: [], candidates: [] }; + const phrase = sqlStr(filteredTerms.join(" ")); + const entityTerms = chooseEntityTerms(filteredTerms); + const topicTerms = filteredTerms.filter((term) => !entityTerms.includes(term)); + const topicClauses = (topicTerms.length > 0 ? topicTerms : filteredTerms) + .map((term) => `(predicate ILIKE '%${sqlLike(term)}%' OR object_name ILIKE '%${sqlLike(term)}%' OR summary ILIKE '%${sqlLike(term)}%' OR search_text ILIKE '%${sqlLike(term)}%')`); + const entityFilter = entityIds.length > 0 + ? `(subject_entity_id IN (${entityIds.map((id) => `'${sqlStr(id)}'`).join(", ")}) OR object_entity_id IN (${entityIds.map((id) => `'${sqlStr(id)}'`).join(", ")}))` + : ""; + const whereParts = [ + entityFilter, + topicClauses.length > 0 ? `(${topicClauses.join(" OR ")})` : "", + ].filter(Boolean); + if (whereParts.length === 0) return { entityIds: [], candidates: [] }; + + const sql = + `SELECT source_session_id, source_path, subject_entity_id, object_entity_id, search_text <#> '${phrase}' AS score` + + ` FROM "${factsTable}"` + + ` WHERE ${whereParts.join(" AND ")}` + + ` ORDER BY score ASC` + + ` LIMIT 16`; + + const rows = await api.query(sql); + const relatedEntityIds: string[] = []; + const candidateMap = new Map(); + for (const row of rows) { + for (const key of ["subject_entity_id", "object_entity_id"] as const) { + const value = typeof row[key] === "string" ? row[key] : ""; + if (value && !relatedEntityIds.includes(value)) relatedEntityIds.push(value); + } + const sourcePath = typeof row["source_path"] === "string" ? row["source_path"] : ""; + const sessionId = typeof row["source_session_id"] === "string" ? row["source_session_id"] : extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 2.6, + signal: "fact", + }); + } + return { entityIds: relatedEntityIds, candidates: [...candidateMap.values()] }; +} + +async function fetchSummaryCandidates( + api: DeeplakeApi, + memoryTable: string, + terms: string[], +): Promise { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) return []; + const retrievalMode = getGrepRetrievalMode(); + const phrase = filteredTerms.join(" "); + let rows: Array<{ path: string; summary: string }> = []; + + if (retrievalMode === "embedding" || retrievalMode === "hybrid") { + const embedder = getSummaryRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([phrase]); + if (!queryEmbedding) return []; + const queryVectorSql = sqlFloat4Array(queryEmbedding); + const vectorSql = + `SELECT path, summary, (embedding <#> ${queryVectorSql}) AS score` + + ` FROM "${memoryTable}"` + + ` WHERE embedding IS NOT NULL` + + ` ORDER BY score DESC` + + ` LIMIT 8`; + if (retrievalMode === "embedding") { + rows = (await api.query(vectorSql)).map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + summary: typeof row["summary"] === "string" ? row["summary"] : "", + })); + } else { + const textSql = + `SELECT path, summary, summary <#> '${sqlStr(phrase)}' AS score` + + ` FROM "${memoryTable}"` + + ` ORDER BY score DESC` + + ` LIMIT 8`; + const textFallbackSql = buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorSql), + api.query(textSql).catch(() => api.query(textFallbackSql)), + ]); + rows = fuseRetrievalRows({ + textRows: mapSummaryRows(textRows), + vectorRows: mapSummaryRows(vectorRows), + textWeight: envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"), + vectorWeight: envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"), + limit: 8, + }).map((row) => ({ + path: row.path, + summary: row.content, + })); + } + } else { + const phraseSql = sqlStr(phrase); + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + const sql = + `SELECT path, summary, summary <#> '${phraseSql}' AS score` + + ` FROM "${memoryTable}"` + + ` WHERE ${clauses.join(" OR ")}` + + ` ORDER BY score DESC` + + ` LIMIT 8`; + rows = (await api.query(sql).catch(() => api.query(buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase)))).map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + summary: typeof row["summary"] === "string" ? row["summary"] : "", + })); + } + const candidateMap = new Map(); + for (const row of rows) { + const path = row.path; + const summary = row.summary; + const sourcePath = extractSummarySourcePath(summary) || (extractSessionIdFromPath(path) ? `/sessions/${extractSessionIdFromPath(path)}.json` : ""); + const sessionId = extractSessionIdFromPath(path) || extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 1.6, + signal: "summary", + }); + } + return [...candidateMap.values()]; +} + +function buildSummaryHeuristicQuery( + memoryTable: string, + filteredTerms: string[], + phrase: string, +): string { + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...filteredTerms.map((term) => `CASE WHEN summary ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN summary ILIKE '%${sqlLike(phrase)}%' THEN ${Math.max(1, Math.min(filteredTerms.length, 4))} ELSE 0 END`, + ]; + return ( + `SELECT path, summary, (${scoreTerms.join(" + ")})::float AS score` + + ` FROM "${memoryTable}"` + + ` WHERE ${clauses.join(" OR ")}` + + ` ORDER BY score DESC` + + ` LIMIT 8` + ); +} + +function mapSummaryRows(rows: Record[]): ScoredRetrievalRow[] { + return rows.map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + content: typeof row["summary"] === "string" ? row["summary"] : "", + sourceOrder: 0, + creationDate: "", + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0, + })); +} + +function prependCtes(sql: string, ctes: string[]): string { + if (ctes.length === 0) return sql; + if (/^with\b/i.test(sql)) { + return sql.replace(/^with\b/i, `WITH ${ctes.join(", ")},`); + } + return `WITH ${ctes.join(", ")} ${sql}`; +} + +function rewriteQueryWithRestrictedTables( + sql: string, + aliases: { + memoryTable: string; + sessionsTable: string; + factsTable: string; + entitiesTable: string; + factEntityLinksTable: string; + restrictedMemoryAlias: string | null; + restrictedSessionsAlias: string | null; + restrictedFactsAlias: string | null; + restrictedEntitiesAlias: string | null; + restrictedLinksAlias: string | null; + }, +): string { + let rewritten = sql; + if (aliases.restrictedMemoryAlias) { + const memoryPattern = escapeRegex(aliases.memoryTable); + rewritten = rewritten + .replace(new RegExp(`\\bFROM\\s+"?${memoryPattern}"?`, "gi"), `FROM "${aliases.restrictedMemoryAlias}"`) + .replace(new RegExp(`\\bJOIN\\s+"?${memoryPattern}"?`, "gi"), `JOIN "${aliases.restrictedMemoryAlias}"`); + } + if (aliases.restrictedSessionsAlias) { + const sessionsPattern = escapeRegex(aliases.sessionsTable); + rewritten = rewritten + .replace(new RegExp(`\\bFROM\\s+"?${sessionsPattern}"?`, "gi"), `FROM "${aliases.restrictedSessionsAlias}"`) + .replace(new RegExp(`\\bJOIN\\s+"?${sessionsPattern}"?`, "gi"), `JOIN "${aliases.restrictedSessionsAlias}"`); + } + if (aliases.restrictedFactsAlias) { + const factsPattern = escapeRegex(aliases.factsTable); + rewritten = rewritten + .replace(new RegExp(`\\bFROM\\s+"?${factsPattern}"?`, "gi"), `FROM "${aliases.restrictedFactsAlias}"`) + .replace(new RegExp(`\\bJOIN\\s+"?${factsPattern}"?`, "gi"), `JOIN "${aliases.restrictedFactsAlias}"`); + } + if (aliases.restrictedEntitiesAlias) { + const entitiesPattern = escapeRegex(aliases.entitiesTable); + rewritten = rewritten + .replace(new RegExp(`\\bFROM\\s+"?${entitiesPattern}"?`, "gi"), `FROM "${aliases.restrictedEntitiesAlias}"`) + .replace(new RegExp(`\\bJOIN\\s+"?${entitiesPattern}"?`, "gi"), `JOIN "${aliases.restrictedEntitiesAlias}"`); + } + if (aliases.restrictedLinksAlias) { + const linksPattern = escapeRegex(aliases.factEntityLinksTable); + rewritten = rewritten + .replace(new RegExp(`\\bFROM\\s+"?${linksPattern}"?`, "gi"), `FROM "${aliases.restrictedLinksAlias}"`) + .replace(new RegExp(`\\bJOIN\\s+"?${linksPattern}"?`, "gi"), `JOIN "${aliases.restrictedLinksAlias}"`); + } + return rewritten; +} + +async function applyGraphRestrictionsToPsqlQuery( + api: DeeplakeApi, + sql: string, + memoryTable: string, + sessionsTable: string, + graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, + graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, + factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, + entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, + factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable, +): Promise { + if (isFactsSessionsOnlyPsqlMode()) { + return sql; + } + if (extractSqlTableRefs(sql).some((ref) => ref === normalizeSqlRef(graphNodesTable) || ref === normalizeSqlRef(graphEdgesTable))) { + return sql; + } + const refs = extractSqlTableRefs(sql); + const touchesMemory = refs.some((ref) => ref === normalizeSqlRef(memoryTable)); + const touchesSessions = refs.some((ref) => ref === normalizeSqlRef(sessionsTable)); + const touchesFacts = refs.some((ref) => ref === normalizeSqlRef(factsTable)); + const touchesEntities = refs.some((ref) => ref === normalizeSqlRef(entitiesTable)); + const touchesLinks = refs.some((ref) => ref === normalizeSqlRef(factEntityLinksTable)); + if (!touchesMemory && !touchesSessions && !touchesFacts && !touchesEntities && !touchesLinks) return sql; + + const terms = extractSqlSearchTerms(sql); + if (terms.length === 0) return sql; + + const candidateMap = new Map(); + const graphCandidates = await fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms); + for (const candidate of graphCandidates) { + addHybridCandidate(candidateMap, { ...candidate, score: 2.0, signal: "graph" }); + } + const entityResolution = await fetchEntityResolution(api, entitiesTable, terms); + for (const candidate of entityResolution.candidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "entity" }); + } + const factCandidates = await fetchFactCandidates(api, factsTable, terms, entityResolution.entityIds); + for (const candidate of factCandidates.candidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "fact" }); + } + const summaryCandidates = await fetchSummaryCandidates(api, memoryTable, terms); + for (const candidate of summaryCandidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "summary" }); + } + + const candidateEntityIds = [...new Set([...entityResolution.entityIds, ...factCandidates.entityIds])].slice(0, 12); + const candidates = [...candidateMap.values()] + .sort((a, b) => b.score - a.score || b.signals.size - a.signals.size) + .slice(0, 12); + if (candidates.length === 0) return sql; + if (candidates.length > 16) return sql; + + const values = candidates.map((candidate) => + `('${sqlStr(candidate.sessionId)}', '${sqlStr(candidate.sourcePath)}')` + ); + const ctes = [ + `__hm_graph_candidates(source_session_id, source_path) AS (VALUES ${values.join(", ")})`, + ]; + let restrictedMemoryAlias: string | null = null; + let restrictedSessionsAlias: string | null = null; + let restrictedFactsAlias: string | null = null; + let restrictedEntitiesAlias: string | null = null; + let restrictedLinksAlias: string | null = null; + + if (candidateEntityIds.length > 0) { + ctes.push( + `__hm_entity_candidates(entity_id) AS (VALUES ${candidateEntityIds.map((entityId) => `('${sqlStr(entityId)}')`).join(", ")})`, + ); + } + + if (touchesMemory) { + restrictedMemoryAlias = "__hm_memory"; + ctes.push( + `"${restrictedMemoryAlias}" AS (` + + ` SELECT * FROM "${memoryTable}" m` + + ` WHERE EXISTS (` + + ` SELECT 1 FROM __hm_graph_candidates gc` + + ` WHERE (gc.source_path <> '' AND m.summary ILIKE '%' || gc.source_path || '%')` + + ` OR (gc.source_session_id <> '' AND m.path ILIKE '%' || gc.source_session_id || '%')` + + ` )` + + `)` + ); + } + if (touchesSessions) { + restrictedSessionsAlias = "__hm_sessions"; + ctes.push( + `"${restrictedSessionsAlias}" AS (` + + ` SELECT * FROM "${sessionsTable}" s` + + ` WHERE s.path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '')` + + `)` + ); + } + if (touchesFacts) { + restrictedFactsAlias = "__hm_memory_facts"; + ctes.push( + `"${restrictedFactsAlias}" AS (` + + ` SELECT * FROM "${factsTable}" f` + + ` WHERE (` + + ` f.source_path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '')` + + ` OR f.source_session_id IN (SELECT source_session_id FROM __hm_graph_candidates WHERE source_session_id <> '')` + + (candidateEntityIds.length > 0 + ? ` OR f.subject_entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` + + ` OR f.object_entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` + : "") + + ` )` + + `)` + ); + } + if (touchesEntities && candidateEntityIds.length > 0) { + restrictedEntitiesAlias = "__hm_memory_entities"; + ctes.push( + `"${restrictedEntitiesAlias}" AS (` + + ` SELECT * FROM "${entitiesTable}" e` + + ` WHERE e.entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` + + `)` + ); + } + if (touchesLinks) { + restrictedLinksAlias = "__hm_fact_entity_links"; + ctes.push( + `"${restrictedLinksAlias}" AS (` + + ` SELECT * FROM "${factEntityLinksTable}" l` + + ` WHERE (` + + ` l.source_path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '')` + + ` OR l.source_session_id IN (SELECT source_session_id FROM __hm_graph_candidates WHERE source_session_id <> '')` + + (candidateEntityIds.length > 0 + ? ` OR l.entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` + : "") + + (touchesFacts + ? ` OR l.fact_id IN (SELECT fact_id FROM "__hm_memory_facts")` + : "") + + ` )` + + `)` + ); + } + + return prependCtes( + rewriteQueryWithRestrictedTables(sql, { + memoryTable, + sessionsTable, + factsTable, + entitiesTable, + factEntityLinksTable, + restrictedMemoryAlias, + restrictedSessionsAlias, + restrictedFactsAlias, + restrictedEntitiesAlias, + restrictedLinksAlias, + }), + ctes, + ); +} + +function formatPsqlValue(value: unknown): string { + if (value === null || value === undefined) return ""; + if (typeof value === "string") return value; + if (typeof value === "number" || typeof value === "boolean") return String(value); + return JSON.stringify(value); +} + +function formatPsqlRows( + rows: VirtualRow[], + tuplesOnly: boolean, + fieldSeparator: string, +): string { + if (rows.length === 0) return tuplesOnly ? "" : "(0 rows)"; + const columns = Object.keys(rows[0] ?? {}); + const body = rows.map((row) => columns.map((column) => formatPsqlValue(row[column])).join(fieldSeparator)); + if (tuplesOnly) return body.join("\n"); + return [columns.join(fieldSeparator), ...body].join("\n"); } export function parseCompiledSegment(segment: string): CompiledSegment | null { @@ -223,6 +1190,9 @@ export function parseCompiledSegment(segment: string): CompiledSegment | null { const tokens = tokenizeShellWords(pipeline[0]); if (!tokens || tokens.length === 0) return null; + const psqlSegment = parsePsqlSegment(pipeline, tokens); + if (psqlSegment) return psqlSegment; + if (tokens[0] === "echo" && pipeline.length === 1) { const text = tokens.slice(1).join(" "); return { kind: "echo", text }; @@ -298,14 +1268,29 @@ export function parseCompiledSegment(segment: string): CompiledSegment | null { if (pipeline.length > 3) return null; const dir = tokens[1]; if (!dir) return null; - const patterns = parseFindNamePatterns(tokens); - if (!patterns) return null; + const spec = parseFindSpec(tokens); + if (!spec) return null; + const { patterns, execGrepCmd } = spec; const countOnly = pipeline.length === 2 && /^wc\s+-l\s*$/.test(pipeline[1].trim()); if (countOnly) { if (patterns.length !== 1) return null; return { kind: "find", dir, pattern: patterns[0], countOnly }; } + if (execGrepCmd) { + const grepParams = parseBashGrep(execGrepCmd); + if (!grepParams) return null; + let lineLimit = 0; + if (pipeline.length === 2) { + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) return null; + lineLimit = headTail.lineLimit; + } + return { kind: "find_grep", dir, patterns, params: grepParams, lineLimit }; + } + if (pipeline.length >= 2) { const xargsTokens = tokenizeShellWords(pipeline[1].trim()); if (!xargsTokens || xargsTokens[0] !== "xargs") return null; @@ -509,6 +1494,27 @@ export async function executeCompiledBashCommand( continue; } + if (segment.kind === "psql") { + const { + graphNodesTable, + graphEdgesTable, + } = resolveInterceptedTableNames(memoryTable, sessionsTable); + const validated = validatePsqlQuery(segment.query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); + const prepared = await applyGraphRestrictionsToPsqlQuery( + api, + validated, + memoryTable, + sessionsTable, + graphNodesTable, + graphEdgesTable, + ); + const rows = await api.query(prepared); + const formatted = formatPsqlRows(rows, segment.tuplesOnly, segment.fieldSeparator); + const limited = segment.lineLimit > 0 ? formatted.split("\n").slice(0, segment.lineLimit).join("\n") : formatted; + outputs.push(limited); + continue; + } + if (segment.kind === "grep") { const result = await handleGrepDirectFn(api, memoryTable, sessionsTable, segment.params); if (result === null) return null; diff --git a/src/hooks/capture.ts b/src/hooks/capture.ts index 81c8385..adb8e07 100644 --- a/src/hooks/capture.ts +++ b/src/hooks/capture.ts @@ -1,8 +1,8 @@ #!/usr/bin/env node /** - * Capture hook — writes each session event as a separate row in the sessions table. - * One INSERT per event, no concat, no race conditions. + * Capture hook — appends session events to a local queue on the hot path. + * Stop/SubagentStop flush that queue to the sessions table in batched INSERTs. * * Used by: UserPromptSubmit, PostToolUse (async), Stop, SubagentStop */ @@ -10,20 +10,26 @@ import { readStdin } from "../utils/stdin.js"; import { loadConfig, type Config } from "../config.js"; import { DeeplakeApi } from "../deeplake-api.js"; -import { sqlStr } from "../utils/sql.js"; import { log as _log } from "../utils/debug.js"; -import { buildSessionPath } from "../utils/session-path.js"; +import { isDirectRun } from "../utils/direct-run.js"; import { bumpTotalCount, loadTriggerConfig, shouldTrigger, tryAcquireLock, - releaseLock, } from "./summary-state.js"; import { bundleDirFromImportMeta, spawnWikiWorker, wikiLog } from "./spawn-wiki-worker.js"; +import { + appendQueuedSessionRow, + buildQueuedSessionRow, + buildSessionPath, + flushSessionQueue, +} from "./session-queue.js"; +import { clearSessionQueryCache } from "./query-cache.js"; + const log = (msg: string) => _log("capture", msg); -interface HookInput { +export interface HookInput { session_id: string; transcript_path?: string; cwd?: string; @@ -31,32 +37,19 @@ interface HookInput { hook_event_name?: string; agent_id?: string; agent_type?: string; - // UserPromptSubmit prompt?: string; - // PostToolUse tool_name?: string; tool_input?: Record; tool_response?: Record; tool_use_id?: string; - // Stop / SubagentStop last_assistant_message?: string; stop_hook_active?: boolean; agent_transcript_path?: string; } -const CAPTURE = process.env.HIVEMIND_CAPTURE !== "false"; - -async function main(): Promise { - if (!CAPTURE) return; - const input = await readStdin(); - const config = loadConfig(); - if (!config) { log("no config"); return; } - - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); +const CAPTURE = (process.env.HIVEMIND_CAPTURE ?? process.env.DEEPLAKE_CAPTURE) !== "false"; - // Build the event entry - const ts = new Date().toISOString(); +export function buildCaptureEntry(input: HookInput, timestamp: string): Record | null { const meta = { session_id: input.session_id, transcript_path: input.transcript_path, @@ -65,22 +58,20 @@ async function main(): Promise { hook_event_name: input.hook_event_name, agent_id: input.agent_id, agent_type: input.agent_type, - timestamp: ts, + timestamp, }; - let entry: Record; - if (input.prompt !== undefined) { - log(`user session=${input.session_id}`); - entry = { + return { id: crypto.randomUUID(), ...meta, type: "user_message", content: input.prompt, }; - } else if (input.tool_name !== undefined) { - log(`tool=${input.tool_name} session=${input.session_id}`); - entry = { + } + + if (input.tool_name !== undefined) { + return { id: crypto.randomUUID(), ...meta, type: "tool_call", @@ -89,91 +80,166 @@ async function main(): Promise { tool_input: JSON.stringify(input.tool_input), tool_response: JSON.stringify(input.tool_response), }; - } else if (input.last_assistant_message !== undefined) { - log(`assistant session=${input.session_id}`); - entry = { + } + + if (input.last_assistant_message !== undefined) { + return { id: crypto.randomUUID(), ...meta, type: "assistant_message", content: input.last_assistant_message, ...(input.agent_transcript_path ? { agent_transcript_path: input.agent_transcript_path } : {}), }; - } else { - log("unknown event, skipping"); - return; } - const sessionPath = buildSessionPath(config, input.session_id); - const line = JSON.stringify(entry); - log(`writing to ${sessionPath}`); - - // Simple INSERT — one row per event, no concat, no race conditions. - const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; - const filename = sessionPath.split("/").pop() ?? ""; + return null; +} - // For JSONB: only escape single quotes for the SQL literal, keep JSON structure intact. - // sqlStr() would also escape backslashes and strip control chars, corrupting the JSON. - const jsonForSql = line.replace(/'/g, "''"); +interface PeriodicSummaryDeps { + bundleDir?: string; + wikiWorker?: boolean; + logFn?: (msg: string) => void; + bumpTotalCountFn?: typeof bumpTotalCount; + loadTriggerConfigFn?: typeof loadTriggerConfig; + shouldTriggerFn?: typeof shouldTrigger; + tryAcquireLockFn?: typeof tryAcquireLock; + wikiLogFn?: typeof wikiLog; + spawnWikiWorkerFn?: typeof spawnWikiWorker; +} - const insertSql = - `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + - `VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ` + - `${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', '${sqlStr(input.hook_event_name ?? "")}', 'claude_code', '${ts}', '${ts}')`; +export function maybeTriggerPeriodicSummary(sessionId: string, cwd: string, config: Config, deps: PeriodicSummaryDeps = {}): void { + const { + bundleDir = bundleDirFromImportMeta(import.meta.url), + wikiWorker = process.env.HIVEMIND_WIKI_WORKER === "1", + logFn = log, + bumpTotalCountFn = bumpTotalCount, + loadTriggerConfigFn = loadTriggerConfig, + shouldTriggerFn = shouldTrigger, + tryAcquireLockFn = tryAcquireLock, + wikiLogFn = wikiLog, + spawnWikiWorkerFn = spawnWikiWorker, + } = deps; + + if (wikiWorker) return; try { - await api.query(insertSql); - } catch (e: any) { - // Fallback: table might not exist (session-start failed or org switched mid-session). - // Create it and retry once. - if (e.message?.includes("permission denied") || e.message?.includes("does not exist")) { - log("table missing, creating and retrying"); - await api.ensureSessionsTable(sessionsTable); - await api.query(insertSql); - } else { - throw e; + const state = bumpTotalCountFn(sessionId); + const cfg = loadTriggerConfigFn(); + if (!shouldTriggerFn(state, cfg)) return; + + if (!tryAcquireLockFn(sessionId)) { + logFn(`periodic trigger suppressed (lock held) session=${sessionId}`); + return; } - } - log("capture ok → cloud"); + wikiLogFn(`Periodic: threshold hit (total=${state.totalCount}, since=${state.totalCount - state.lastSummaryCount}, N=${cfg.everyNMessages}, hours=${cfg.everyHours})`); + spawnWikiWorkerFn({ + config, + sessionId, + cwd, + bundleDir, + reason: "Periodic", + }); + } catch (e: any) { + logFn(`periodic trigger error: ${e.message}`); + } +} - maybeTriggerPeriodicSummary(input.session_id, input.cwd ?? "", config); +interface CaptureHookDeps { + captureEnabled?: boolean; + config?: Config | null; + now?: () => string; + createApi?: (config: Config) => DeeplakeApi; + appendQueuedSessionRowFn?: typeof appendQueuedSessionRow; + buildQueuedSessionRowFn?: typeof buildQueuedSessionRow; + flushSessionQueueFn?: typeof flushSessionQueue; + clearSessionQueryCacheFn?: typeof clearSessionQueryCache; + maybeTriggerPeriodicSummaryFn?: typeof maybeTriggerPeriodicSummary; + logFn?: (msg: string) => void; } -/** Increment the event counter and, if the threshold is crossed, spawn a background wiki worker. */ -function maybeTriggerPeriodicSummary(sessionId: string, cwd: string, config: Config): void { - if (process.env.HIVEMIND_WIKI_WORKER === "1") return; +export async function runCaptureHook(input: HookInput, deps: CaptureHookDeps = {}): Promise<{ + status: "disabled" | "no_config" | "ignored" | "queued"; + entry?: Record; + flushStatus?: string; +}> { + const { + captureEnabled = CAPTURE, + config = loadConfig(), + now = () => new Date().toISOString(), + createApi = (activeConfig) => new DeeplakeApi( + activeConfig.token, + activeConfig.apiUrl, + activeConfig.orgId, + activeConfig.workspaceId, + activeConfig.sessionsTableName, + ), + appendQueuedSessionRowFn = appendQueuedSessionRow, + buildQueuedSessionRowFn = buildQueuedSessionRow, + flushSessionQueueFn = flushSessionQueue, + clearSessionQueryCacheFn = clearSessionQueryCache, + maybeTriggerPeriodicSummaryFn = maybeTriggerPeriodicSummary, + logFn = log, + } = deps; + + if (!captureEnabled) return { status: "disabled" }; + if (!config) { + logFn("no config"); + return { status: "no_config" }; + } - try { - const state = bumpTotalCount(sessionId); - const cfg = loadTriggerConfig(); - if (!shouldTrigger(state, cfg)) return; + const ts = now(); + const entry = buildCaptureEntry(input, ts); + if (!entry) { + logFn("unknown event, skipping"); + return { status: "ignored" }; + } - if (!tryAcquireLock(sessionId)) { - log(`periodic trigger suppressed (lock held) session=${sessionId}`); - return; - } + if (input.prompt !== undefined) logFn(`user session=${input.session_id}`); + else if (input.tool_name !== undefined) logFn(`tool=${input.tool_name} session=${input.session_id}`); + else logFn(`assistant session=${input.session_id}`); - wikiLog(`Periodic: threshold hit (total=${state.totalCount}, since=${state.totalCount - state.lastSummaryCount}, N=${cfg.everyNMessages}, hours=${cfg.everyHours})`); - try { - spawnWikiWorker({ - config, - sessionId, - cwd, - bundleDir: bundleDirFromImportMeta(import.meta.url), - reason: "Periodic", - }); - } catch (e: any) { - log(`periodic spawn failed: ${e.message}`); - try { - releaseLock(sessionId); - } catch (releaseErr: any) { - log(`releaseLock after periodic spawn failure also failed: ${releaseErr.message}`); - } - throw e; - } - } catch (e: any) { - log(`periodic trigger error: ${e.message}`); + if (input.hook_event_name === "UserPromptSubmit") { + clearSessionQueryCacheFn(input.session_id); + } + + const sessionPath = buildSessionPath(config, input.session_id); + const line = JSON.stringify(entry); + const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; + appendQueuedSessionRowFn(buildQueuedSessionRowFn({ + sessionPath, + line, + sessionId: input.session_id, + userName: config.userName, + projectName, + description: input.hook_event_name ?? "", + agent: "claude_code", + timestamp: ts, + })); + logFn(`queued ${input.hook_event_name ?? "event"} for ${sessionPath}`); + + maybeTriggerPeriodicSummaryFn(input.session_id, input.cwd ?? "", config); + + if (input.hook_event_name === "Stop" || input.hook_event_name === "SubagentStop") { + const result = await flushSessionQueueFn(createApi(config), { + sessionId: input.session_id, + sessionsTable: config.sessionsTableName, + drainAll: true, + }); + logFn(`flush ${result.status}: rows=${result.rows} batches=${result.batches}`); + return { status: "queued", entry, flushStatus: result.status }; } + + return { status: "queued", entry }; } -main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +/* c8 ignore start */ +async function main(): Promise { + const input = await readStdin(); + await runCaptureHook(input); +} + +if (isDirectRun(import.meta.url)) { + main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +} +/* c8 ignore stop */ diff --git a/src/hooks/codex/capture.ts b/src/hooks/codex/capture.ts index 0c80802..5908b4a 100644 --- a/src/hooks/codex/capture.ts +++ b/src/hooks/codex/capture.ts @@ -1,61 +1,48 @@ #!/usr/bin/env node /** - * Codex Capture hook — writes each session event as a row in the sessions table. + * Codex Capture hook — appends session events to a local queue on the hot path. * * Used by: UserPromptSubmit, PostToolUse - * - * Codex input fields: - * All events: session_id, transcript_path, cwd, hook_event_name, model - * UserPromptSubmit: prompt (user text) - * PostToolUse: tool_name, tool_use_id, tool_input, tool_response - * Stop: (no extra fields — Codex has no last_assistant_message equivalent) */ import { readStdin } from "../../utils/stdin.js"; import { loadConfig, type Config } from "../../config.js"; -import { DeeplakeApi } from "../../deeplake-api.js"; -import { sqlStr } from "../../utils/sql.js"; import { log as _log } from "../../utils/debug.js"; -import { buildSessionPath } from "../../utils/session-path.js"; +import { isDirectRun } from "../../utils/direct-run.js"; import { bumpTotalCount, loadTriggerConfig, shouldTrigger, tryAcquireLock, - releaseLock, } from "../summary-state.js"; import { bundleDirFromImportMeta, spawnCodexWikiWorker, wikiLog } from "./spawn-wiki-worker.js"; +import { + appendQueuedSessionRow, + buildQueuedSessionRow, + buildSessionPath, +} from "../session-queue.js"; +import { clearSessionQueryCache } from "../query-cache.js"; + const log = (msg: string) => _log("codex-capture", msg); -interface CodexHookInput { +export interface CodexHookInput { session_id: string; transcript_path?: string | null; cwd: string; hook_event_name: string; model: string; turn_id?: string; - // UserPromptSubmit prompt?: string; - // PostToolUse (Bash only in Codex) tool_name?: string; tool_use_id?: string; tool_input?: { command?: string }; tool_response?: Record; } -const CAPTURE = process.env.HIVEMIND_CAPTURE !== "false"; - -async function main(): Promise { - if (!CAPTURE) return; - const input = await readStdin(); - const config = loadConfig(); - if (!config) { log("no config"); return; } +const CAPTURE = (process.env.HIVEMIND_CAPTURE ?? process.env.DEEPLAKE_CAPTURE) !== "false"; - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); - - const ts = new Date().toISOString(); +export function buildCodexCaptureEntry(input: CodexHookInput, timestamp: string): Record | null { const meta = { session_id: input.session_id, transcript_path: input.transcript_path, @@ -63,22 +50,20 @@ async function main(): Promise { hook_event_name: input.hook_event_name, model: input.model, turn_id: input.turn_id, - timestamp: ts, + timestamp, }; - let entry: Record; - if (input.hook_event_name === "UserPromptSubmit" && input.prompt !== undefined) { - log(`user session=${input.session_id}`); - entry = { + return { id: crypto.randomUUID(), ...meta, type: "user_message", content: input.prompt, }; - } else if (input.hook_event_name === "PostToolUse" && input.tool_name !== undefined) { - log(`tool=${input.tool_name} session=${input.session_id}`); - entry = { + } + + if (input.hook_event_name === "PostToolUse" && input.tool_name !== undefined) { + return { id: crypto.randomUUID(), ...meta, type: "tool_call", @@ -87,75 +72,133 @@ async function main(): Promise { tool_input: JSON.stringify(input.tool_input), tool_response: JSON.stringify(input.tool_response), }; - } else { - log(`unknown event: ${input.hook_event_name}, skipping`); - return; } - const sessionPath = buildSessionPath(config, input.session_id); - const line = JSON.stringify(entry); - log(`writing to ${sessionPath}`); + return null; +} - const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; - const filename = sessionPath.split("/").pop() ?? ""; - const jsonForSql = sqlStr(line); +interface PeriodicSummaryDeps { + bundleDir?: string; + wikiWorker?: boolean; + logFn?: (msg: string) => void; + bumpTotalCountFn?: typeof bumpTotalCount; + loadTriggerConfigFn?: typeof loadTriggerConfig; + shouldTriggerFn?: typeof shouldTrigger; + tryAcquireLockFn?: typeof tryAcquireLock; + wikiLogFn?: typeof wikiLog; + spawnCodexWikiWorkerFn?: typeof spawnCodexWikiWorker; +} - const insertSql = - `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + - `VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ` + - `${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', '${sqlStr(input.hook_event_name ?? "")}', 'codex', '${ts}', '${ts}')`; +export function maybeTriggerPeriodicSummary(sessionId: string, cwd: string, config: Config, deps: PeriodicSummaryDeps = {}): void { + const { + bundleDir = bundleDirFromImportMeta(import.meta.url), + wikiWorker = process.env.HIVEMIND_WIKI_WORKER === "1", + logFn = log, + bumpTotalCountFn = bumpTotalCount, + loadTriggerConfigFn = loadTriggerConfig, + shouldTriggerFn = shouldTrigger, + tryAcquireLockFn = tryAcquireLock, + wikiLogFn = wikiLog, + spawnCodexWikiWorkerFn = spawnCodexWikiWorker, + } = deps; + + if (wikiWorker) return; try { - await api.query(insertSql); - } catch (e: any) { - if (e.message?.includes("permission denied") || e.message?.includes("does not exist")) { - log("table missing, creating and retrying"); - await api.ensureSessionsTable(sessionsTable); - await api.query(insertSql); - } else { - throw e; + const state = bumpTotalCountFn(sessionId); + const cfg = loadTriggerConfigFn(); + if (!shouldTriggerFn(state, cfg)) return; + + if (!tryAcquireLockFn(sessionId)) { + logFn(`periodic trigger suppressed (lock held) session=${sessionId}`); + return; } - } - log("capture ok"); + wikiLogFn(`Periodic: threshold hit (total=${state.totalCount}, since=${state.totalCount - state.lastSummaryCount}, N=${cfg.everyNMessages}, hours=${cfg.everyHours})`); + spawnCodexWikiWorkerFn({ + config, + sessionId, + cwd, + bundleDir, + reason: "Periodic", + }); + } catch (e: any) { + logFn(`periodic trigger error: ${e.message}`); + } +} - maybeTriggerPeriodicSummary(input.session_id, input.cwd ?? "", config); +interface CodexCaptureDeps { + captureEnabled?: boolean; + config?: Config | null; + now?: () => string; + appendQueuedSessionRowFn?: typeof appendQueuedSessionRow; + buildQueuedSessionRowFn?: typeof buildQueuedSessionRow; + clearSessionQueryCacheFn?: typeof clearSessionQueryCache; + maybeTriggerPeriodicSummaryFn?: typeof maybeTriggerPeriodicSummary; + logFn?: (msg: string) => void; } -function maybeTriggerPeriodicSummary(sessionId: string, cwd: string, config: Config): void { - if (process.env.HIVEMIND_WIKI_WORKER === "1") return; +export async function runCodexCaptureHook(input: CodexHookInput, deps: CodexCaptureDeps = {}): Promise<{ + status: "disabled" | "no_config" | "ignored" | "queued"; + entry?: Record; +}> { + const { + captureEnabled = CAPTURE, + config = loadConfig(), + now = () => new Date().toISOString(), + appendQueuedSessionRowFn = appendQueuedSessionRow, + buildQueuedSessionRowFn = buildQueuedSessionRow, + clearSessionQueryCacheFn = clearSessionQueryCache, + maybeTriggerPeriodicSummaryFn = maybeTriggerPeriodicSummary, + logFn = log, + } = deps; + + if (!captureEnabled) return { status: "disabled" }; + if (!config) { + logFn("no config"); + return { status: "no_config" }; + } - try { - const state = bumpTotalCount(sessionId); - const cfg = loadTriggerConfig(); - if (!shouldTrigger(state, cfg)) return; + const ts = now(); + const entry = buildCodexCaptureEntry(input, ts); + if (!entry) { + logFn(`unknown event: ${input.hook_event_name}, skipping`); + return { status: "ignored" }; + } - if (!tryAcquireLock(sessionId)) { - log(`periodic trigger suppressed (lock held) session=${sessionId}`); - return; - } + if (input.hook_event_name === "UserPromptSubmit") logFn(`user session=${input.session_id}`); + else logFn(`tool=${input.tool_name} session=${input.session_id}`); - wikiLog(`Periodic: threshold hit (total=${state.totalCount}, since=${state.totalCount - state.lastSummaryCount}, N=${cfg.everyNMessages}, hours=${cfg.everyHours})`); - try { - spawnCodexWikiWorker({ - config, - sessionId, - cwd, - bundleDir: bundleDirFromImportMeta(import.meta.url), - reason: "Periodic", - }); - } catch (e: any) { - log(`periodic spawn failed: ${e.message}`); - try { - releaseLock(sessionId); - } catch (releaseErr: any) { - log(`releaseLock after periodic spawn failure also failed: ${releaseErr.message}`); - } - throw e; - } - } catch (e: any) { - log(`periodic trigger error: ${e.message}`); + if (input.hook_event_name === "UserPromptSubmit") { + clearSessionQueryCacheFn(input.session_id); } + + const sessionPath = buildSessionPath(config, input.session_id); + const line = JSON.stringify(entry); + const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; + appendQueuedSessionRowFn(buildQueuedSessionRowFn({ + sessionPath, + line, + sessionId: input.session_id, + userName: config.userName, + projectName, + description: input.hook_event_name ?? "", + agent: "codex", + timestamp: ts, + })); + logFn(`queued ${input.hook_event_name} for ${sessionPath}`); + + maybeTriggerPeriodicSummaryFn(input.session_id, input.cwd ?? "", config); + return { status: "queued", entry }; } -main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +/* c8 ignore start */ +async function main(): Promise { + const input = await readStdin(); + await runCodexCaptureHook(input); +} + +if (isDirectRun(import.meta.url)) { + main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +} +/* c8 ignore stop */ diff --git a/src/hooks/codex/pre-tool-use.ts b/src/hooks/codex/pre-tool-use.ts index 3b1aacd..8c94103 100644 --- a/src/hooks/codex/pre-tool-use.ts +++ b/src/hooks/codex/pre-tool-use.ts @@ -22,12 +22,18 @@ import { loadConfig } from "../../config.js"; import { DeeplakeApi } from "../../deeplake-api.js"; import { sqlLike } from "../../utils/sql.js"; import { parseBashGrep, handleGrepDirect } from "../grep-direct.js"; -import { executeCompiledBashCommand } from "../bash-command-compiler.js"; +import { + executeCompiledBashCommand, + extractPsqlQueryFromCommand, + queryReferencesInterceptedTables, + queryUsesOnlyInterceptedTables, +} from "../bash-command-compiler.js"; import { findVirtualPaths, readVirtualPathContents, listVirtualPathRows, readVirtualPathContent, + buildVirtualIndexContent, } from "../virtual-table-query.js"; import { readCachedIndexContent, @@ -36,9 +42,42 @@ import { import { log as _log } from "../../utils/debug.js"; import { isDirectRun } from "../../utils/direct-run.js"; import { isSafe, touchesMemory, rewritePaths } from "../memory-path-utils.js"; +import { isFactsSessionsOnlyPsqlMode, isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../../utils/retrieval-mode.js"; export { isSafe, touchesMemory, rewritePaths }; +function touchesVirtualMemoryPath(value: string): boolean { + const rewritten = rewritePaths(value).trim(); + return ( + rewritten === "/index.md" || + rewritten === "/summaries" || + rewritten.startsWith("/summaries/") || + rewritten === "/sessions" || + rewritten.startsWith("/sessions/") || + /(^|[\s"'`])\/(?:index\.md|summaries(?:\/|\b)|sessions(?:\/|\b))/.test(rewritten) + ); +} + +function touchesAnyMemoryPath(value: string): boolean { + return touchesMemory(value) || touchesVirtualMemoryPath(value); +} + +function isAnyPsqlCommand(cmd: string): boolean { + return /^\s*psql\b/.test(cmd.trim()); +} + +function isHivemindPsqlCommand(cmd: string): boolean { + if (!isPsqlMode()) return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryUsesOnlyInterceptedTables(query); +} + +function needsHivemindPsqlRewrite(cmd: string): boolean { + if (!isPsqlMode() || !isAnyPsqlCommand(cmd)) return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryReferencesInterceptedTables(query) && !queryUsesOnlyInterceptedTables(query); +} + const log = (msg: string) => _log("codex-pre", msg); const __bundleDir = dirname(fileURLToPath(import.meta.url)); @@ -65,11 +104,31 @@ export interface CodexPreToolDecision { export function buildUnsupportedGuidance(): string { return "This command is not supported for ~/.deeplake/memory/ operations. " + - "Only bash builtins are available: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. " + + "Only bash builtins are available, plus benchmark SQL mode via psql -At -F '|' -c \"SELECT ...\". " + "Do NOT use python, python3, node, curl, or other interpreters. " + "Rewrite your command using only bash tools and retry."; } +export function buildPsqlOnlyGuidance(): string { + if (isFactsSessionsOnlyPsqlMode()) { + return "Hivemind recall is SQL-only in this mode. " + + "Use psql with the sessions, memory_facts, memory_entities, and fact_entity_links tables only. " + + "Do NOT use grep, cat, ls, Read, Glob, memory, graph, or filesystem paths for memory lookups."; + } + return "Hivemind recall is SQL-only in this mode. " + + "Use psql with the memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links tables only. " + + "Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; +} + +export function buildPsqlSchemaGuidance(): string { + if (isFactsSessionsOnlyPsqlMode()) { + return "Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. " + + "Rewrite the query to reference only those tables with normal psql SELECT syntax."; + } + return "Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. " + + "Rewrite the query to reference only those tables with normal psql SELECT syntax."; +} + export function runVirtualShell(cmd: string, shellBundle = SHELL_BUNDLE, logFn: (msg: string) => void = log): string { try { return execFileSync("node", [shellBundle, "-c", cmd], { @@ -84,18 +143,6 @@ export function runVirtualShell(cmd: string, shellBundle = SHELL_BUNDLE, logFn: } } -function buildIndexContent(rows: Record[]): string { - const lines = ["# Memory Index", "", `${rows.length} sessions:`, ""]; - for (const row of rows) { - const path = row["path"] as string; - const project = row["project"] as string || ""; - const description = (row["description"] as string || "").slice(0, 120); - const date = (row["creation_date"] as string || "").slice(0, 10); - lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); - } - return lines.join("\n"); -} - interface CodexPreToolDeps { config?: ReturnType; createApi?: (table: string, config: NonNullable>) => DeeplakeApi; @@ -141,11 +188,30 @@ export async function processCodexPreToolUse( const cmd = input.tool_input?.command ?? ""; logFn(`hook fired: cmd=${cmd}`); - if (!touchesMemory(cmd)) return { action: "pass" }; + if (!touchesAnyMemoryPath(cmd) && !isAnyPsqlCommand(cmd)) return { action: "pass" }; + + if (isAnyPsqlCommand(cmd) && !isHivemindPsqlCommand(cmd)) { + if (needsHivemindPsqlRewrite(cmd)) { + return { + action: "guide", + output: buildPsqlSchemaGuidance(), + rewrittenCommand: cmd.trim(), + }; + } + return { action: "pass" }; + } + + if (isPsqlMode() && touchesAnyMemoryPath(cmd)) { + return { + action: "guide", + output: buildPsqlOnlyGuidance(), + rewrittenCommand: cmd.trim(), + }; + } - const rewritten = rewritePaths(cmd); + const rewritten = isHivemindPsqlCommand(cmd) ? cmd.trim() : rewritePaths(cmd); if (!isSafe(rewritten)) { - const guidance = buildUnsupportedGuidance(); + const guidance = isPsqlMode() ? buildPsqlOnlyGuidance() : buildUnsupportedGuidance(); logFn(`unsupported command, returning guidance: ${rewritten}`); return { action: "guide", @@ -154,6 +220,14 @@ export async function processCodexPreToolUse( }; } + if (isHivemindPsqlCommand(rewritten) && !config) { + return { + action: "guide", + output: "Hivemind SQL mode is unavailable because Deeplake credentials are missing.", + rewrittenCommand: rewritten, + }; + } + if (config) { const table = process.env["HIVEMIND_TABLE"] ?? "memory"; const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; @@ -164,7 +238,7 @@ export async function processCodexPreToolUse( ): Promise> => { const uniquePaths = [...new Set(cachePaths)]; const result = new Map(uniquePaths.map((path) => [path, null])); - const cachedIndex = uniquePaths.includes("/index.md") + const cachedIndex = !isIndexDisabled() && uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; @@ -248,17 +322,17 @@ export async function processCodexPreToolUse( if (virtualPath && !virtualPath.endsWith("/")) { logFn(`direct read: ${virtualPath}`); - let content = virtualPath === "/index.md" + let content = !isIndexDisabled() && virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; if (content === null) { content = await readVirtualPathContentFn(api, table, sessionsTable, virtualPath); } - if (content === null && virtualPath === "/index.md") { + if (content === null && virtualPath === "/index.md" && !isSessionsOnlyMode() && !isIndexDisabled()) { const idxRows = await api.query( - `SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC` + `SELECT path, project, description, summary, creation_date, last_update_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC` ); - content = buildIndexContent(idxRows); + content = buildVirtualIndexContent(idxRows); } if (content !== null) { @@ -347,9 +421,24 @@ export async function processCodexPreToolUse( } } catch (e: any) { logFn(`direct query failed, falling back to shell: ${e.message}`); + if (isHivemindPsqlCommand(rewritten)) { + return { + action: "guide", + output: "Hivemind SQL mode could not satisfy the query. Rewrite it as a narrower SELECT over memory or sessions.", + rewrittenCommand: rewritten, + }; + } } } + if (isHivemindPsqlCommand(rewritten)) { + return { + action: "guide", + output: "Hivemind SQL mode could not satisfy the query. Rewrite it as a narrower SELECT over memory or sessions.", + rewrittenCommand: rewritten, + }; + } + logFn(`intercepted → running via virtual shell: ${rewritten}`); const result = runVirtualShellFn(rewritten, shellBundle, logFn); return { diff --git a/src/hooks/codex/session-start-setup.ts b/src/hooks/codex/session-start-setup.ts index 8dfb984..8c720cc 100644 --- a/src/hooks/codex/session-start-setup.ts +++ b/src/hooks/codex/session-start-setup.ts @@ -8,6 +8,7 @@ import { fileURLToPath } from "node:url"; import { dirname, join } from "node:path"; +import { mkdirSync, appendFileSync } from "node:fs"; import { execSync } from "node:child_process"; import { homedir } from "node:os"; import { loadCredentials, saveCredentials } from "../../commands/auth.js"; @@ -16,15 +17,54 @@ import { DeeplakeApi } from "../../deeplake-api.js"; import { sqlStr } from "../../utils/sql.js"; import { readStdin } from "../../utils/stdin.js"; import { log as _log } from "../../utils/debug.js"; -import { getInstalledVersion, getLatestVersion, isNewer } from "../../utils/version-check.js"; -import { makeWikiLogger } from "../../utils/wiki-log.js"; +import { isDirectRun } from "../../utils/direct-run.js"; +import { + drainSessionQueues, + isSessionWriteAuthError, + isSessionWriteDisabled, + markSessionWriteDisabled, + tryAcquireSessionDrainLock, +} from "../session-queue.js"; +import { + getInstalledVersion, + getLatestVersionCached, + isNewer, +} from "../version-check.js"; + const log = (msg: string) => _log("codex-session-setup", msg); const __bundleDir = dirname(fileURLToPath(import.meta.url)); -const { log: wikiLog } = makeWikiLogger(join(homedir(), ".codex", "hooks")); +const GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; +const VERSION_CHECK_TIMEOUT = 3000; + +const HOME = homedir(); +const WIKI_LOG = join(HOME, ".codex", "hooks", "deeplake-wiki.log"); + +export function wikiLog(msg: string): void { + try { + mkdirSync(join(HOME, ".codex", "hooks"), { recursive: true }); + appendFileSync(WIKI_LOG, `[${new Date().toISOString().replace("T", " ").slice(0, 19)}] ${msg}\n`); + } catch { /* ignore */ } +} -/** Create a placeholder summary via direct SQL INSERT. */ -async function createPlaceholder(api: DeeplakeApi, table: string, sessionId: string, cwd: string, userName: string, orgName: string, workspaceId: string): Promise { +export interface CodexSessionStartInput { + session_id: string; + transcript_path?: string | null; + cwd: string; + hook_event_name: string; + model: string; + source?: string; +} + +export async function createPlaceholder( + api: DeeplakeApi, + table: string, + sessionId: string, + cwd: string, + userName: string, + orgName: string, + workspaceId: string, +): Promise { const summaryPath = `/summaries/${userName}/${sessionId}.md`; const existing = await api.query( @@ -36,7 +76,7 @@ async function createPlaceholder(api: DeeplakeApi, table: string, sessionId: str } const now = new Date().toISOString(); - const projectName = cwd.split("/").pop() ?? "unknown"; + const projectName = cwd.split("/").pop() || "unknown"; const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; const content = [ `# Session ${sessionId}`, @@ -57,61 +97,129 @@ async function createPlaceholder(api: DeeplakeApi, table: string, sessionId: str wikiLog(`SessionSetup: created placeholder for ${sessionId} (${cwd})`); } -interface CodexSessionStartInput { - session_id: string; - transcript_path?: string | null; - cwd: string; - hook_event_name: string; - model: string; - source?: string; +interface CodexSessionStartSetupDeps { + wikiWorker?: boolean; + creds?: ReturnType; + saveCredentialsFn?: typeof saveCredentials; + config?: ReturnType; + createApi?: (config: NonNullable>) => DeeplakeApi; + captureEnabled?: boolean; + drainSessionQueuesFn?: typeof drainSessionQueues; + isSessionWriteDisabledFn?: typeof isSessionWriteDisabled; + isSessionWriteAuthErrorFn?: typeof isSessionWriteAuthError; + markSessionWriteDisabledFn?: typeof markSessionWriteDisabled; + tryAcquireSessionDrainLockFn?: typeof tryAcquireSessionDrainLock; + createPlaceholderFn?: typeof createPlaceholder; + getInstalledVersionFn?: typeof getInstalledVersion; + getLatestVersionCachedFn?: typeof getLatestVersionCached; + isNewerFn?: typeof isNewer; + execSyncFn?: typeof execSync; + logFn?: (msg: string) => void; + wikiLogFn?: typeof wikiLog; } -async function main(): Promise { - if (process.env.HIVEMIND_WIKI_WORKER === "1") return; +export async function runCodexSessionStartSetup(input: CodexSessionStartInput, deps: CodexSessionStartSetupDeps = {}): Promise<{ + status: "skipped" | "no_credentials" | "complete"; +}> { + const { + wikiWorker = (process.env.HIVEMIND_WIKI_WORKER ?? process.env.DEEPLAKE_WIKI_WORKER) === "1", + creds = loadCredentials(), + saveCredentialsFn = saveCredentials, + config = loadConfig(), + createApi = (activeConfig) => new DeeplakeApi( + activeConfig.token, + activeConfig.apiUrl, + activeConfig.orgId, + activeConfig.workspaceId, + activeConfig.tableName, + ), + captureEnabled = (process.env.HIVEMIND_CAPTURE ?? process.env.DEEPLAKE_CAPTURE) !== "false", + drainSessionQueuesFn = drainSessionQueues, + isSessionWriteDisabledFn = isSessionWriteDisabled, + isSessionWriteAuthErrorFn = isSessionWriteAuthError, + markSessionWriteDisabledFn = markSessionWriteDisabled, + tryAcquireSessionDrainLockFn = tryAcquireSessionDrainLock, + createPlaceholderFn = createPlaceholder, + getInstalledVersionFn = getInstalledVersion, + getLatestVersionCachedFn = getLatestVersionCached, + isNewerFn = isNewer, + execSyncFn = execSync, + logFn = log, + wikiLogFn = wikiLog, + } = deps; - const input = await readStdin(); - const creds = loadCredentials(); - if (!creds?.token) { log("no credentials"); return; } + if (wikiWorker) return { status: "skipped" }; + if (!creds?.token) { + logFn("no credentials"); + return { status: "no_credentials" }; + } - // Backfill userName if missing if (!creds.userName) { try { const { userInfo } = await import("node:os"); creds.userName = userInfo().username ?? "unknown"; - saveCredentials(creds); - log(`backfilled userName: ${creds.userName}`); + saveCredentialsFn(creds); + logFn(`backfilled userName: ${creds.userName}`); } catch { /* non-fatal */ } } - // Table setup + sync — always sync, only skip placeholder when capture disabled - const captureEnabled = process.env.HIVEMIND_CAPTURE !== "false"; - if (input.session_id) { + if (input.session_id && config) { try { - const config = loadConfig(); - if (config) { - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName); - await api.ensureTable(); - await api.ensureSessionsTable(config.sessionsTableName); - if (captureEnabled) { - await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + const api = createApi(config); + await api.ensureTable(); + if (captureEnabled) { + if (isSessionWriteDisabledFn(config.sessionsTableName)) { + logFn(`sessions table disabled, skipping setup for "${config.sessionsTableName}"`); + } else { + const releaseDrainLock = tryAcquireSessionDrainLockFn(config.sessionsTableName); + if (!releaseDrainLock) { + logFn(`sessions drain already in progress, skipping duplicate setup for "${config.sessionsTableName}"`); + } else { + try { + await api.ensureSessionsTable(config.sessionsTableName); + await api.ensureGraphNodesTable(config.graphNodesTableName); + await api.ensureGraphEdgesTable(config.graphEdgesTableName); + await api.ensureFactsTable(config.factsTableName); + await api.ensureEntitiesTable(config.entitiesTableName); + await api.ensureFactEntityLinksTable(config.factEntityLinksTableName); + const drain = await drainSessionQueuesFn(api, { + sessionsTable: config.sessionsTableName, + }); + if (drain.flushedSessions > 0) { + logFn(`drained ${drain.flushedSessions} queued session(s), rows=${drain.rows}, batches=${drain.batches}`); + } + } catch (e: any) { + if (isSessionWriteAuthErrorFn(e)) { + markSessionWriteDisabledFn(config.sessionsTableName, e.message); + logFn(`sessions table unavailable, skipping setup: ${e.message}`); + } else { + throw e; + } + } finally { + releaseDrainLock(); + } + } } - log("setup complete"); + await createPlaceholderFn(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); } + logFn("setup complete"); } catch (e: any) { - log(`setup failed: ${e.message}`); - wikiLog(`SessionSetup: failed for ${input.session_id}: ${e.message}`); + logFn(`setup failed: ${e.message}`); + wikiLogFn(`SessionSetup: failed for ${input.session_id}: ${e.message}`); } } - // Version check + auto-update const autoupdate = creds.autoupdate !== false; try { - const current = getInstalledVersion(__bundleDir, ".codex-plugin"); + const current = getInstalledVersionFn(__bundleDir, ".codex-plugin"); if (current) { - const latest = await getLatestVersion(); - if (latest && isNewer(latest, current)) { + const latest = await getLatestVersionCachedFn({ + url: GITHUB_RAW_PKG, + timeoutMs: VERSION_CHECK_TIMEOUT, + }); + if (latest && isNewerFn(latest, current)) { if (autoupdate) { - log(`autoupdate: updating ${current} → ${latest}`); + logFn(`autoupdate: updating ${current} → ${latest}`); try { const tag = `v${latest}`; if (!/^v\d+\.\d+\.\d+$/.test(tag)) throw new Error(`unsafe version tag: ${tag}`); @@ -124,24 +232,35 @@ async function main(): Promise { `git clone --depth 1 --branch ${tag} -q https://github.com/activeloopai/hivemind.git "$TMPDIR/hivemind" 2>/dev/null && ` + `cp -r "$TMPDIR/hivemind/codex/"* "$INSTALL_DIR/" 2>/dev/null; ` + `rm -rf "$TMPDIR"; fi`; - execSync(findCmd, { stdio: "ignore", timeout: 60_000 }); + execSyncFn(findCmd, { stdio: "ignore", timeout: 60_000 }); process.stderr.write(`Hivemind auto-updated: ${current} → ${latest}. Restart Codex to apply.\n`); - log(`autoupdate succeeded: ${current} → ${latest} (tag: ${tag})`); + logFn(`autoupdate succeeded: ${current} → ${latest} (tag: ${tag})`); } catch (e: any) { process.stderr.write(`Hivemind update available: ${current} → ${latest}. Auto-update failed.\n`); - log(`autoupdate failed: ${e.message}`); + logFn(`autoupdate failed: ${e.message}`); } } else { process.stderr.write(`Hivemind update available: ${current} → ${latest}.\n`); - log(`update available (autoupdate off): ${current} → ${latest}`); + logFn(`update available (autoupdate off): ${current} → ${latest}`); } } else { - log(`version up to date: ${current}`); + logFn(`version up to date: ${current}`); } } } catch (e: any) { - log(`version check failed: ${e.message}`); + logFn(`version check failed: ${e.message}`); } + + return { status: "complete" }; } -main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +/* c8 ignore start */ +async function main(): Promise { + const input = await readStdin(); + await runCodexSessionStartSetup(input); +} + +if (isDirectRun(import.meta.url)) { + main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +} +/* c8 ignore stop */ diff --git a/src/hooks/codex/session-start.ts b/src/hooks/codex/session-start.ts index 81d25e4..582de16 100644 --- a/src/hooks/codex/session-start.ts +++ b/src/hooks/codex/session-start.ts @@ -5,9 +5,6 @@ * Only reads local credentials and injects context into Codex's developer prompt. * All server calls (table setup, placeholder, version check) are handled by * session-start-setup.js which runs as a separate async hook. - * - * Codex input: { session_id, transcript_path, cwd, hook_event_name, model, source } - * Codex output: plain text on stdout (added as developer context) */ import { spawn } from "node:child_process"; @@ -16,20 +13,193 @@ import { dirname, join } from "node:path"; import { loadCredentials } from "../../commands/auth.js"; import { readStdin } from "../../utils/stdin.js"; import { log as _log } from "../../utils/debug.js"; -import { getInstalledVersion } from "../../utils/version-check.js"; +import { isDirectRun } from "../../utils/direct-run.js"; +import { isFactsSessionsOnlyPsqlMode, isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../../utils/retrieval-mode.js"; +import { getInstalledVersion } from "../version-check.js"; + const log = (msg: string) => _log("codex-session-start", msg); const __bundleDir = dirname(fileURLToPath(import.meta.url)); const AUTH_CMD = join(__bundleDir, "commands", "auth-login.js"); -const context = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. +export const CODEX_SESSION_START_CONTEXT = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. + +Structure: index.md (start here) → summaries/*.md → sessions/{author}/* (last resort). Do NOT jump straight to raw session files. +When index.md identifies a likely match, read that exact summary or session path directly before broader grep variants. +If index.md already points to likely candidate files, open those exact files before broader synonym greps or wide exploratory scans. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots for Deeplake recall tasks. +TEMPORAL GROUNDING: If a summary or transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a summary only gives a relative time, open the linked source session and use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked index.md plus at least one likely summary or raw session file for the named person. If keyword grep is empty, grep the person's name alone and inspect the candidate files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, summaries may omit the exact phrase. If likely summaries are ambiguous, read the candidate raw session transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." +Search: grep -r "keyword" ~/.deeplake/memory/ +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters — they are not available in the memory filesystem. +Do NOT spawn subagents to read deeplake memory.`; + +export const CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. + +SESSIONS-ONLY mode is active for benchmark comparison. Available Deeplake recall paths are raw session files under sessions/{author}/*. +Do NOT start with index.md or summaries in this mode, and do NOT assume those paths exist. +Open the most likely session file directly before broader grep variants. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots for Deeplake recall tasks. +TEMPORAL GROUNDING: If a transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a session only gives a relative time, use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely raw session file for the named person. If keyword grep is empty, grep the person's name alone and inspect the candidate session files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, raw sessions may contain the exact phrase even when broad keyword grep looks sparse. Read the candidate transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." +Search: grep -r "keyword" ~/.deeplake/memory/ +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters — they are not available in the memory filesystem. +Do NOT spawn subagents to read deeplake memory.`; + +export const CODEX_SESSION_START_CONTEXT_NO_INDEX = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. -Structure: index.md (start here) → summaries/*.md → sessions/*.jsonl (last resort). Do NOT jump straight to JSONL. +Structure in this mode: summaries/*.md → sessions/{author}/* (last resort). /index.md is intentionally unavailable, so do NOT read it or rely on it. +Start by grepping summaries for the named person, topic, or keyword. Then open the specific matching summaries. Only read raw session files if summaries do not contain the exact detail. +If a summary points to a likely source session, read that exact raw session before broader grep variants. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots for Deeplake recall tasks. +TEMPORAL GROUNDING: If a summary or transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a summary only gives a relative time, open the linked source session and use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely summary plus one likely raw session file when the summary is ambiguous. If keyword grep is empty, grep the person's name alone and inspect the candidate files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, summaries may omit the exact phrase. If likely summaries are ambiguous, read the candidate raw session transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." Search: grep -r "keyword" ~/.deeplake/memory/ IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters — they are not available in the memory filesystem. Do NOT spawn subagents to read deeplake memory.`; -interface CodexSessionStartInput { +export const CODEX_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: Use SQL only for Deeplake recall tasks. + +Available tables: +- memory(path, summary, project, description, creation_date, last_update_date) +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +Workflow: +1. Query memory first to identify likely summaries and sessions. +2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. +3. Graph-backed entity and relation resolution is applied automatically behind the scenes to narrow likely sessions before memory/sessions queries run. You do not need to query graph tables manually for normal recall. +3a. For stable person/project/place facts, use memory_facts first. Use memory_entities to resolve aliases or canonical names, then join through fact_entity_links when you need all facts connected to an entity. +4. Re-query memory by exact path for the small candidate set you selected. +5. Query sessions by exact path for transcript evidence or unresolved dates. +6. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +7. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +8. If the first literal query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory.summary. +9. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +10. If a summary, node, or edge answer is vague or relative, immediately open the linked sessions rows and convert it to the most concrete answer supported there. +11. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over paraphrased summary labels. +12. When memory_entities resolves a canonical entity, use fact_entity_links to expand the connected facts before deciding the fact layer is sparse. +13. For identity or relationship questions, prefer the narrowest explicit self-label or status label over broader biography or community descriptions. +14. For "when" questions, if the best evidence is already phrased relative to another dated event, return that relative phrase instead of inventing a different absolute date. +15. For list/profile questions, return a minimal comma-separated set of directly supported items. Do not pad the answer with adjacent hobbies, events, or explanations. +16. For artifact/title questions such as books, talks, projects, or artworks, prefer exact titled objects from facts or transcript over generic phrases like "a book" or "a speech". + +Good query patterns: +- Candidate summaries: + psql -At -F '|' -c "SELECT path, summary, creation_date FROM memory WHERE summary ILIKE '%%' AND (summary ILIKE '%%' OR summary ILIKE '%%') ORDER BY creation_date DESC LIMIT 5" +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by entity: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" +- Exact summary reread: + psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" +- If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: + psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" +- If graph entity lookup is sparse or semantically weak, retry with BM25 on graph nodes: + psql -At -F '|' -c "SELECT node_id, canonical_name, node_type, summary, source_session_id, source_path, search_text <#> ' ' AS score FROM graph_nodes ORDER BY score DESC LIMIT 5" + +Avoid these mistakes: +- Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT stop at graph rows alone when the question asks for exact wording or time grounding. Use graph rows to narrow the search, then open the linked sessions. +- Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. +- Do NOT turn a short list question into a narrative list of loosely related activities. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Resolve relative dates against session metadata, not today's date. +- Do not answer "not found" until you have checked both memory and a likely sessions row. +- Preserve direct relative-duration answers when they already match the question. +- If the transcript already directly answers with a relative duration like "10 years ago", return that phrase instead of recalculating to today's date. +- If the transcript or fact row says something like "the week before June 9, 2023", return that phrase instead of converting it to June 9, 2023. +- If a summary says something vague like "home country", search sessions for the exact named place before answering. +- Aggregate across the small candidate set before answering profile or list questions. +- For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. + +Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. For normal recall, query memory_facts for distilled claims, memory_entities for canonical names, and sessions for exact grounding; graph-based restriction is applied automatically where relevant. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode.`; + +export const CODEX_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY = `DEEPLAKE MEMORY SQL MODE: Use SQL only for Deeplake recall tasks. + +Available tables: +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) + +The summary and graph tables are intentionally unavailable in this mode. Treat them as if they do not exist. + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +Workflow: +1. Resolve the named person, project, place, or organization with memory_entities. +2. Expand connected facts through fact_entity_links and memory_facts. +3. Use memory_facts to identify the small set of likely source sessions. +4. Use sessions for transcript grounding and final answer verification. +5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +7. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +8. Sessions are the source of truth. Facts are only a helper index and synthesis layer. +9. Facts are for narrowing and aggregation; sessions are for the final exact answer. + +Good query patterns: +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by entity: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id, source_path FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary, f.source_session_id, f.source_path FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" + +Avoid these mistakes: +- Do NOT query memory, graph_nodes, or graph_edges in this mode. +- Do NOT answer directly from memory_facts.summary, memory_entities.summary, or aliases when a relevant transcript row is available. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Sessions win over facts if they differ in detail or specificity. +- Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. +- Do not answer "not found" until you have checked both the fact layer and a likely sessions row. + +Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. Do NOT use python, python3, node, curl, filesystem paths, memory, or graph tables for recall in this mode.`; + +export interface CodexSessionStartInput { session_id: string; transcript_path?: string | null; cwd: string; @@ -38,48 +208,79 @@ interface CodexSessionStartInput { source?: string; } -async function main(): Promise { - if (process.env.HIVEMIND_WIKI_WORKER === "1") return; +export function buildCodexSessionStartContext(args: { + creds: ReturnType; + currentVersion: string | null; + authCommand: string; +}): string { + const versionNotice = args.currentVersion ? `\nHivemind v${args.currentVersion}` : ""; + const template = isPsqlMode() + ? isFactsSessionsOnlyPsqlMode() + ? CODEX_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY + : CODEX_SESSION_START_CONTEXT_PSQL + : isSessionsOnlyMode() + ? CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY + : isIndexDisabled() + ? CODEX_SESSION_START_CONTEXT_NO_INDEX + : CODEX_SESSION_START_CONTEXT; + return args.creds?.token + ? `${template}\nLogged in to Deeplake as org: ${args.creds.orgName ?? args.creds.orgId} (workspace: ${args.creds.workspaceId ?? "default"})${versionNotice}` + : `${template}\nNot logged in to Deeplake. Run: node "${args.authCommand}" login${versionNotice}`; +} - const input = await readStdin(); +interface CodexSessionStartDeps { + wikiWorker?: boolean; + creds?: ReturnType; + spawnFn?: typeof spawn; + currentVersion?: string | null; + authCommand?: string; + setupScript?: string; + logFn?: (msg: string) => void; +} - const creds = loadCredentials(); +export async function runCodexSessionStartHook(input: CodexSessionStartInput, deps: CodexSessionStartDeps = {}): Promise { + const { + wikiWorker = (process.env.HIVEMIND_WIKI_WORKER ?? process.env.DEEPLAKE_WIKI_WORKER) === "1", + creds = loadCredentials(), + spawnFn = spawn, + currentVersion = getInstalledVersion(__bundleDir, ".codex-plugin"), + authCommand = AUTH_CMD, + setupScript = join(__bundleDir, "session-start-setup.js"), + logFn = log, + } = deps; - if (!creds?.token) { - log("no credentials found — run auth login to authenticate"); - } else { - log(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); - } + if (wikiWorker) return null; + + if (!creds?.token) logFn("no credentials found — run auth login to authenticate"); + else logFn(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); - // Spawn async setup (table creation, placeholder, version check) as detached process. - // Codex doesn't support async hooks, so we use the same pattern as the wiki worker. if (creds?.token) { - const setupScript = join(__bundleDir, "session-start-setup.js"); - const child = spawn("node", [setupScript], { + const child = spawnFn("node", [setupScript], { detached: true, stdio: ["pipe", "ignore", "ignore"], env: { ...process.env }, }); - // Feed the same stdin input to the setup process child.stdin?.write(JSON.stringify(input)); child.stdin?.end(); child.unref(); - log("spawned async setup process"); - } - - let versionNotice = ""; - const current = getInstalledVersion(__bundleDir, ".codex-plugin"); - if (current) { - versionNotice = `\nHivemind v${current}`; + logFn("spawned async setup process"); } - const additionalContext = creds?.token - ? `${context}\nLogged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${creds.workspaceId ?? "default"})${versionNotice}` - : `${context}\nNot logged in to Deeplake. Run: node "${AUTH_CMD}" login${versionNotice}`; + return buildCodexSessionStartContext({ + creds, + currentVersion, + authCommand, + }); +} - // Codex SessionStart: plain text on stdout is added as developer context. - // JSON { additionalContext } format is rejected by Codex 0.118.0. - console.log(additionalContext); +/* c8 ignore start */ +async function main(): Promise { + const input = await readStdin(); + const output = await runCodexSessionStartHook(input); + if (output) console.log(output); } -main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +if (isDirectRun(import.meta.url)) { + main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +} +/* c8 ignore stop */ diff --git a/src/hooks/codex/spawn-wiki-worker.ts b/src/hooks/codex/spawn-wiki-worker.ts index 06bc89e..dbd3bd6 100644 --- a/src/hooks/codex/spawn-wiki-worker.ts +++ b/src/hooks/codex/spawn-wiki-worker.ts @@ -6,16 +6,18 @@ import { spawn, execSync } from "node:child_process"; import { fileURLToPath } from "node:url"; import { dirname, join } from "node:path"; -import { writeFileSync, mkdirSync } from "node:fs"; +import { writeFileSync, mkdirSync, appendFileSync } from "node:fs"; import { homedir, tmpdir } from "node:os"; import type { Config } from "../../config.js"; -import { makeWikiLogger } from "../../utils/wiki-log.js"; +import { GRAPH_PROMPT_TEMPLATE } from "../knowledge-graph.js"; +import { MEMORY_FACT_PROMPT_TEMPLATE } from "../memory-facts.js"; const HOME = homedir(); -const wikiLogger = makeWikiLogger(join(HOME, ".codex", "hooks")); -export const WIKI_LOG = wikiLogger.path; +export const WIKI_LOG = join(HOME, ".codex", "hooks", "deeplake-wiki.log"); -export const WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge — entities, decisions, relationships, and facts — into a structured, searchable wiki entry. +export const WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. + +The session may be a coding session, a meeting, or a personal conversation. Your job is to turn the raw transcript into a dense, factual wiki page that preserves names, dates, relationships, preferences, plans, titles, and exact status changes. SESSION JSONL path: __JSONL__ SUMMARY FILE to write: __SUMMARY__ @@ -29,43 +31,58 @@ Steps: - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first, then focus on lines AFTER the offset for new content. Merge new facts into the existing summary. - If offset is 0, generate from scratch. + - Treat the JSONL as the source of truth. Do not invent facts. 2. Write the summary file at the path above with this EXACT format: # Session __SESSION_ID__ - **Source**: __JSONL_SERVER_PATH__ +- **Date**: +- **Participants**: - **Started**: - **Ended**: - **Project**: __PROJECT__ +- **Topics**: - **JSONL offset**: __JSONL_LINES__ ## What Happened -<2-3 dense sentences. What was the goal, what was accomplished, what's left.> +<2-4 dense sentences. What happened, why it mattered, and what changed. Prefer specific names/titles/dates over abstractions.> + +## Searchable Facts + ## People - + ## Entities - + ## Decisions & Reasoning - - -## Key Facts - + ## Files Modified - + ## Open Questions / TODO - - -IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. + + +IMPORTANT: +- Be exhaustive. If a detail exists in the session and could answer a later question, it should be in the wiki. +- Favor exact nouns and titles over generic paraphrases. Preserve exact book names, organization names, file names, feature names, and self-descriptions. +- Keep facts canonical and query-friendly: "Ava is single", "Leo's home country is Brazil", "The team chose retries because the API returned 429s". +- Resolve relative dates like "last year" or "next month" against the session's own date when the source makes that possible. If it is ambiguous, keep the relative phrase instead of guessing. +- Do not omit beneficiary groups or targets of goals. PRIVACY: Never include absolute filesystem paths in the summary. LENGTH LIMIT: Keep the total summary under 4000 characters.`; -export const wikiLog = wikiLogger.log; +export function wikiLog(msg: string): void { + try { + mkdirSync(join(HOME, ".codex", "hooks"), { recursive: true }); + appendFileSync(WIKI_LOG, `[${new Date().toISOString().replace("T", " ").slice(0, 19)}] ${msg}\n`); + } catch { /* ignore */ } +} export function findCodexBin(): string { try { @@ -98,6 +115,11 @@ export function spawnCodexWikiWorker(opts: SpawnOptions): void { workspaceId: config.workspaceId, memoryTable: config.tableName, sessionsTable: config.sessionsTableName, + graphNodesTable: config.graphNodesTableName, + graphEdgesTable: config.graphEdgesTableName, + factsTable: config.factsTableName, + entitiesTable: config.entitiesTableName, + factEntityLinksTable: config.factEntityLinksTableName, sessionId, userName: config.userName, project: projectName, @@ -106,6 +128,8 @@ export function spawnCodexWikiWorker(opts: SpawnOptions): void { wikiLog: WIKI_LOG, hooksDir: join(HOME, ".codex", "hooks"), promptTemplate: WIKI_PROMPT_TEMPLATE, + graphPromptTemplate: GRAPH_PROMPT_TEMPLATE, + factPromptTemplate: MEMORY_FACT_PROMPT_TEMPLATE, })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); diff --git a/src/hooks/codex/stop.ts b/src/hooks/codex/stop.ts index 39eb330..9393c34 100644 --- a/src/hooks/codex/stop.ts +++ b/src/hooks/codex/stop.ts @@ -4,26 +4,27 @@ * Codex Stop hook — handles both capture and session-end (wiki summary spawn). * * Codex has no SessionEnd event, so this hook does double duty: - * 1. Captures the stop event to the sessions table (like capture.ts) - * 2. Spawns the wiki worker to generate the session summary (like session-end.ts) - * - * Codex input: { session_id, transcript_path, cwd, hook_event_name, model } - * Codex output: JSON with optional { decision: "block", reason: "..." } to continue + * 1. Captures the stop event to the sessions table + * 2. Spawns the wiki worker to generate the session summary */ import { readFileSync, existsSync } from "node:fs"; import { readStdin } from "../../utils/stdin.js"; -import { loadConfig } from "../../config.js"; +import { loadConfig, type Config } from "../../config.js"; import { DeeplakeApi } from "../../deeplake-api.js"; -import { sqlStr } from "../../utils/sql.js"; import { log as _log } from "../../utils/debug.js"; +import { isDirectRun } from "../../utils/direct-run.js"; import { bundleDirFromImportMeta, spawnCodexWikiWorker, wikiLog } from "./spawn-wiki-worker.js"; -import { tryAcquireLock, releaseLock } from "../summary-state.js"; -import { buildSessionPath } from "../../utils/session-path.js"; +import { + appendQueuedSessionRow, + buildQueuedSessionRow, + buildSessionPath, + flushSessionQueue, +} from "../session-queue.js"; const log = (msg: string) => _log("codex-stop", msg); -interface CodexStopInput { +export interface CodexStopInput { session_id: string; transcript_path?: string | null; cwd: string; @@ -31,122 +32,163 @@ interface CodexStopInput { model: string; } -const CAPTURE = process.env.HIVEMIND_CAPTURE !== "false"; +const CAPTURE = (process.env.HIVEMIND_CAPTURE ?? process.env.DEEPLAKE_CAPTURE) !== "false"; -async function main(): Promise { - if (process.env.HIVEMIND_WIKI_WORKER === "1") return; +export function extractLastAssistantMessage(transcript: string): string { + const lines = transcript.trim().split("\n").reverse(); + for (const line of lines) { + try { + const entry = JSON.parse(line); + const msg = entry.payload ?? entry; + if (msg.role === "assistant" && msg.content) { + const content = typeof msg.content === "string" + ? msg.content + : Array.isArray(msg.content) + ? msg.content + .filter((b: any) => b.type === "output_text" || b.type === "text") + .map((b: any) => b.text) + .join("\n") + : ""; + if (content) return content.slice(0, 4000); + } + } catch { /* skip malformed line */ } + } + return ""; +} - const input = await readStdin(); - const sessionId = input.session_id; - if (!sessionId) return; +export function buildCodexStopEntry(input: CodexStopInput, timestamp: string, lastAssistantMessage: string): Record { + return { + id: crypto.randomUUID(), + session_id: input.session_id, + transcript_path: input.transcript_path, + cwd: input.cwd, + hook_event_name: input.hook_event_name, + model: input.model, + timestamp, + type: lastAssistantMessage ? "assistant_message" : "assistant_stop", + content: lastAssistantMessage, + }; +} - const config = loadConfig(); - if (!config) { log("no config"); return; } +interface CodexStopDeps { + wikiWorker?: boolean; + captureEnabled?: boolean; + config?: Config | null; + now?: () => string; + transcriptExists?: (path: string) => boolean; + readTranscript?: (path: string) => string; + createApi?: (config: Config) => DeeplakeApi; + appendQueuedSessionRowFn?: typeof appendQueuedSessionRow; + buildQueuedSessionRowFn?: typeof buildQueuedSessionRow; + flushSessionQueueFn?: typeof flushSessionQueue; + spawnCodexWikiWorkerFn?: typeof spawnCodexWikiWorker; + wikiLogFn?: typeof wikiLog; + bundleDir?: string; + logFn?: (msg: string) => void; +} - // 1. Capture the stop event (try to extract last assistant message from transcript) - if (CAPTURE) { - try { - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); - const ts = new Date().toISOString(); +export async function runCodexStopHook(input: CodexStopInput, deps: CodexStopDeps = {}): Promise<{ + status: "skipped" | "no_config" | "complete"; + flushStatus?: string; + entry?: Record; +}> { + const { + wikiWorker = (process.env.HIVEMIND_WIKI_WORKER ?? process.env.DEEPLAKE_WIKI_WORKER) === "1", + captureEnabled = CAPTURE, + config = loadConfig(), + now = () => new Date().toISOString(), + transcriptExists = existsSync, + readTranscript = (path) => readFileSync(path, "utf-8"), + createApi = (activeConfig) => new DeeplakeApi( + activeConfig.token, + activeConfig.apiUrl, + activeConfig.orgId, + activeConfig.workspaceId, + activeConfig.sessionsTableName, + ), + appendQueuedSessionRowFn = appendQueuedSessionRow, + buildQueuedSessionRowFn = buildQueuedSessionRow, + flushSessionQueueFn = flushSessionQueue, + spawnCodexWikiWorkerFn = spawnCodexWikiWorker, + wikiLogFn = wikiLog, + bundleDir = bundleDirFromImportMeta(import.meta.url), + logFn = log, + } = deps; + + if (wikiWorker || !input.session_id) return { status: "skipped" }; + if (!config) { + logFn("no config"); + return { status: "no_config" }; + } + + let entry: Record | undefined; + let flushStatus: string | undefined; - // Codex Stop doesn't include last_assistant_message, but it provides - // transcript_path. Try to extract the last assistant message from it. + if (captureEnabled) { + try { + const ts = now(); let lastAssistantMessage = ""; if (input.transcript_path) { try { - const transcriptPath = input.transcript_path; - if (existsSync(transcriptPath)) { - const transcript = readFileSync(transcriptPath, "utf-8"); - // Codex transcript is JSONL with format: - // {"type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"..."}]}} - const lines = transcript.trim().split("\n").reverse(); - for (const line of lines) { - try { - const entry = JSON.parse(line); - // Codex nests the message inside payload - const msg = entry.payload ?? entry; - if (msg.role === "assistant" && msg.content) { - const content = typeof msg.content === "string" - ? msg.content - : Array.isArray(msg.content) - ? msg.content.filter((b: any) => b.type === "output_text" || b.type === "text").map((b: any) => b.text).join("\n") - : ""; - if (content) { - lastAssistantMessage = content.slice(0, 4000); - break; - } - } - } catch { /* skip malformed line */ } + if (transcriptExists(input.transcript_path)) { + lastAssistantMessage = extractLastAssistantMessage(readTranscript(input.transcript_path)); + if (lastAssistantMessage) { + logFn(`extracted assistant message from transcript (${lastAssistantMessage.length} chars)`); } - if (lastAssistantMessage) log(`extracted assistant message from transcript (${lastAssistantMessage.length} chars)`); } } catch (e: any) { - log(`transcript read failed: ${e.message}`); + logFn(`transcript read failed: ${e.message}`); } } - const entry = { - id: crypto.randomUUID(), - session_id: sessionId, - transcript_path: input.transcript_path, - cwd: input.cwd, - hook_event_name: input.hook_event_name, - model: input.model, - timestamp: ts, - type: lastAssistantMessage ? "assistant_message" : "assistant_stop", - content: lastAssistantMessage, - }; + entry = buildCodexStopEntry(input, ts, lastAssistantMessage); const line = JSON.stringify(entry); - const sessionPath = buildSessionPath(config, sessionId); + const sessionPath = buildSessionPath(config, input.session_id); const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; - const filename = sessionPath.split("/").pop() ?? ""; - const jsonForSql = sqlStr(line); - - const insertSql = - `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + - `VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ` + - `${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', 'Stop', 'codex', '${ts}', '${ts}')`; - - await api.query(insertSql); - log("stop event captured"); + appendQueuedSessionRowFn(buildQueuedSessionRowFn({ + sessionPath, + line, + sessionId: input.session_id, + userName: config.userName, + projectName, + description: "Stop", + agent: "codex", + timestamp: ts, + })); + + const flush = await flushSessionQueueFn(createApi(config), { + sessionId: input.session_id, + sessionsTable: config.sessionsTableName, + drainAll: true, + }); + flushStatus = flush.status; + logFn(`stop flush ${flush.status}: rows=${flush.rows} batches=${flush.batches}`); } catch (e: any) { - log(`capture failed: ${e.message}`); + logFn(`capture failed: ${e.message}`); } } - // 2. Spawn wiki worker — skip when capture disabled - if (!CAPTURE) return; + if (!captureEnabled) return { status: "complete", entry }; - // Coordinate with the periodic worker: if one is already running for this - // session, skip. Two workers writing the same summary row trip the - // Deeplake UPDATE-coalescing quirk (see CLAUDE.md) and drop one write. - if (!tryAcquireLock(sessionId)) { - wikiLog(`Stop: periodic worker already running for ${sessionId}, skipping`); - return; - } + wikiLogFn(`Stop: triggering summary for ${input.session_id}`); + spawnCodexWikiWorkerFn({ + config, + sessionId: input.session_id, + cwd: input.cwd ?? "", + bundleDir, + reason: "Stop", + }); - wikiLog(`Stop: triggering summary for ${sessionId}`); - try { - spawnCodexWikiWorker({ - config, - sessionId, - cwd: input.cwd ?? "", - bundleDir: bundleDirFromImportMeta(import.meta.url), - reason: "Stop", - }); - } catch (e: any) { - // Spawn threw before the worker took ownership of the lock: release - // it here so a --resume can retrigger periodic summaries without - // waiting for the 10-minute stale reclaim. - log(`spawn failed: ${e.message}`); - try { - releaseLock(sessionId); - } catch (releaseErr: any) { - log(`releaseLock after spawn failure also failed: ${releaseErr.message}`); - } - throw e; - } + return { status: "complete", flushStatus, entry }; +} + +/* c8 ignore start */ +async function main(): Promise { + const input = await readStdin(); + await runCodexStopHook(input); } -main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +if (isDirectRun(import.meta.url)) { + main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +} +/* c8 ignore stop */ diff --git a/src/hooks/codex/wiki-worker.ts b/src/hooks/codex/wiki-worker.ts index 7d74f75..c756b0f 100644 --- a/src/hooks/codex/wiki-worker.ts +++ b/src/hooks/codex/wiki-worker.ts @@ -12,9 +12,17 @@ import { execFileSync } from "node:child_process"; import { join } from "node:path"; import { finalizeSummary, releaseLock } from "../summary-state.js"; import { uploadSummary } from "../upload-summary.js"; -import { log as _log } from "../../utils/debug.js"; - -const dlog = (msg: string) => _log("codex-wiki-worker", msg); +import { + buildKnowledgeGraphPrompt, + parseGraphExtraction, + replaceSessionGraph, +} from "../knowledge-graph.js"; +import { + buildMemoryFactTranscript, + buildMemoryFactPrompt, + parseMemoryFactExtraction, + replaceSessionFacts, +} from "../memory-facts.js"; interface WorkerConfig { apiUrl: string; @@ -23,6 +31,11 @@ interface WorkerConfig { workspaceId: string; memoryTable: string; sessionsTable: string; + graphNodesTable: string; + graphEdgesTable: string; + factsTable: string; + entitiesTable: string; + factEntityLinksTable: string; sessionId: string; userName: string; project: string; @@ -31,6 +44,8 @@ interface WorkerConfig { wikiLog: string; hooksDir: string; promptTemplate: string; + graphPromptTemplate: string; + factPromptTemplate: string; } const cfg: WorkerConfig = JSON.parse(readFileSync(process.argv[2], "utf-8")); @@ -91,11 +106,7 @@ async function query(sql: string, retries = 4): Promise[ } function cleanup(): void { - try { - rmSync(tmpDir, { recursive: true, force: true }); - } catch (cleanupErr: any) { - dlog(`cleanup failed to remove ${tmpDir}: ${cleanupErr.message}`); - } + try { rmSync(tmpDir, { recursive: true, force: true }); } catch { /* ignore */ } } async function main(): Promise { @@ -103,8 +114,8 @@ async function main(): Promise { // 1. Fetch session events from sessions table wlog("fetching session events"); const rows = await query( - `SELECT message, creation_date FROM "${cfg.sessionsTable}" ` + - `WHERE path LIKE E'${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC` + `SELECT path, message, creation_date, turn_index, event_type, speaker, text, turn_summary, source_date_time FROM "${cfg.sessionsTable}" ` + + `WHERE path LIKE E'${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC` ); if (rows.length === 0) { @@ -187,6 +198,84 @@ async function main(): Promise { }); wlog(`uploaded ${vpath} (summary=${result.summaryLength}, desc=${result.descLength})`); + try { + const graphPrompt = buildKnowledgeGraphPrompt({ + summaryText: text, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.graphPromptTemplate, + }); + const graphRaw = execFileSync(cfg.codexBin, [ + "exec", + "--dangerously-bypass-approvals-and-sandbox", + graphPrompt, + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 120_000, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" }, + }).toString("utf-8"); + const graph = parseGraphExtraction(graphRaw); + const graphResult = await replaceSessionGraph({ + query, + nodesTable: cfg.graphNodesTable, + edgesTable: cfg.graphEdgesTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "codex", + sourcePath: jsonlServerPath, + graph, + }); + wlog(`graph updated nodes=${graphResult.nodes} edges=${graphResult.edges}`); + } catch (e: any) { + wlog(`graph update failed: ${e.message}`); + } + + try { + const transcriptText = buildMemoryFactTranscript(rows.map((row) => ({ + turnIndex: Number(row["turn_index"] ?? 0), + eventType: typeof row["event_type"] === "string" ? row["event_type"] : "", + speaker: typeof row["speaker"] === "string" ? row["speaker"] : "", + text: typeof row["text"] === "string" ? row["text"] : "", + turnSummary: typeof row["turn_summary"] === "string" ? row["turn_summary"] : "", + sourceDateTime: typeof row["source_date_time"] === "string" ? row["source_date_time"] : "", + creationDate: typeof row["creation_date"] === "string" ? row["creation_date"] : "", + }))); + const factPrompt = buildMemoryFactPrompt({ + transcriptText, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.factPromptTemplate, + }); + const factsRaw = execFileSync(cfg.codexBin, [ + "exec", + "--dangerously-bypass-approvals-and-sandbox", + factPrompt, + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 120_000, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" }, + }).toString("utf-8"); + const extraction = parseMemoryFactExtraction(factsRaw); + const factResult = await replaceSessionFacts({ + query, + factsTable: cfg.factsTable, + entitiesTable: cfg.entitiesTable, + linksTable: cfg.factEntityLinksTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "codex", + sourcePath: jsonlServerPath, + extraction, + }); + wlog(`facts updated facts=${factResult.facts} entities=${factResult.entities} links=${factResult.links}`); + } catch (e: any) { + wlog(`fact update failed: ${e.message}`); + } + try { finalizeSummary(cfg.sessionId, jsonlLines); wlog(`sidecar updated: lastSummaryCount=${jsonlLines}`); @@ -203,11 +292,7 @@ async function main(): Promise { wlog(`fatal: ${e.message}`); } finally { cleanup(); - try { - releaseLock(cfg.sessionId); - } catch (releaseErr: any) { - dlog(`releaseLock failed in finally for ${cfg.sessionId}: ${releaseErr.message}`); - } + try { releaseLock(cfg.sessionId); } catch { /* ignore */ } } } diff --git a/src/hooks/grep-direct.ts b/src/hooks/grep-direct.ts index 95e15d9..d80ce2b 100644 --- a/src/hooks/grep-direct.ts +++ b/src/hooks/grep-direct.ts @@ -12,6 +12,7 @@ import { capOutputForClaude } from "../utils/output-cap.js"; export interface GrepParams { pattern: string; targetPath: string; + recursive: boolean; ignoreCase: boolean; wordMatch: boolean; filesOnly: boolean; @@ -108,7 +109,7 @@ export function parseBashGrep(cmd: string): GrepParams | null { const tokens = tokenizeGrepStage(first); if (!tokens || tokens.length === 0) return null; - let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, + let recursive = false, ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; const explicitPatterns: string[] = []; @@ -166,6 +167,8 @@ export function parseBashGrep(cmd: string): GrepParams | null { case "F": fixedString = true; break; case "r": case "R": + recursive = true; + break; case "E": break; case "A": @@ -205,6 +208,7 @@ export function parseBashGrep(cmd: string): GrepParams | null { return { pattern, targetPath: target, + recursive, ignoreCase, wordMatch, filesOnly, countOnly, lineNumber, invertMatch, fixedString, }; } @@ -229,7 +233,14 @@ export async function handleGrepDirect( fixedString: params.fixedString, }; - const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath); + const output = await grepBothTables( + api, + table, + sessionsTable, + matchParams, + params.targetPath, + params.recursive ? true : undefined, + ); const joined = output.join("\n") || "(no matches)"; return capOutputForClaude(joined, { kind: "grep" }); } diff --git a/src/hooks/knowledge-graph.ts b/src/hooks/knowledge-graph.ts new file mode 100644 index 0000000..af9a3f5 --- /dev/null +++ b/src/hooks/knowledge-graph.ts @@ -0,0 +1,285 @@ +import { randomUUID } from "node:crypto"; +import { buildSummaryBlurb } from "../utils/summary-format.js"; +import { esc, type QueryFn } from "./upload-summary.js"; + +export interface GraphNodeSpec { + name: string; + type?: string; + summary?: string; + aliases?: string[]; +} + +export interface GraphEdgeSpec { + source: string; + target: string; + relation: string; + summary?: string; + evidence?: string; +} + +export interface GraphExtraction { + nodes: GraphNodeSpec[]; + edges: GraphEdgeSpec[]; +} + +export interface ReplaceSessionGraphParams { + query: QueryFn; + nodesTable: string; + edgesTable: string; + sessionId: string; + userName: string; + project: string; + agent: string; + sourcePath: string; + graph: GraphExtraction; + ts?: string; +} + +export interface ReplaceSessionGraphResult { + nodes: number; + edges: number; +} + +export const GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; + +function stripCodeFences(text: string): string { + const trimmed = text.trim(); + const fenceMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + return fenceMatch ? fenceMatch[1].trim() : trimmed; +} + +function normalizeString(value: unknown): string { + return typeof value === "string" ? value.trim() : ""; +} + +function normalizeAliasList(value: unknown): string[] { + if (!Array.isArray(value)) return []; + return value + .map(normalizeString) + .filter(Boolean) + .filter((item, index, arr) => arr.indexOf(item) === index); +} + +export function parseGraphExtraction(raw: string): GraphExtraction { + const cleaned = stripCodeFences(raw); + const parsed = JSON.parse(cleaned) as Record; + const nodes = Array.isArray(parsed["nodes"]) ? parsed["nodes"] as Array> : []; + const edges = Array.isArray(parsed["edges"]) ? parsed["edges"] as Array> : []; + return { + nodes: nodes + .map((node) => ({ + name: normalizeString(node["name"]), + type: normalizeString(node["type"]) || "other", + summary: normalizeString(node["summary"]), + aliases: normalizeAliasList(node["aliases"]), + })) + .filter((node) => node.name), + edges: edges + .map((edge) => ({ + source: normalizeString(edge["source"]), + target: normalizeString(edge["target"]), + relation: normalizeString(edge["relation"]).replace(/\s+/g, "_").toLowerCase(), + summary: normalizeString(edge["summary"]), + evidence: normalizeString(edge["evidence"]), + })) + .filter((edge) => edge.source && edge.target && edge.relation), + }; +} + +function slugify(value: string): string { + return value + .normalize("NFKD") + .replace(/[^\w\s-]/g, "") + .trim() + .toLowerCase() + .replace(/[\s-]+/g, "_") + .replace(/^_+|_+$/g, "") || "item"; +} + +export function buildGraphNodeId(name: string, _type = "other"): string { + return `entity:${slugify(name)}`; +} + +function buildNodeSearchText(node: GraphNodeSpec): string { + return [ + node.name, + node.type ?? "other", + ...(node.aliases ?? []), + node.summary ?? "", + ].filter(Boolean).join(" | "); +} + +function buildEdgeSearchText(edge: GraphEdgeSpec, sourceNodeId: string, targetNodeId: string): string { + return [ + edge.source, + edge.relation, + edge.target, + edge.summary ?? "", + edge.evidence ?? "", + sourceNodeId, + targetNodeId, + ].filter(Boolean).join(" | "); +} + +export function buildKnowledgeGraphPrompt(args: { + summaryText: string; + sessionId: string; + sourcePath: string; + project: string; + template?: string; +}): string { + return (args.template ?? GRAPH_PROMPT_TEMPLATE) + .replace(/__SUMMARY_TEXT__/g, args.summaryText) + .replace(/__SESSION_ID__/g, args.sessionId) + .replace(/__SOURCE_PATH__/g, args.sourcePath) + .replace(/__PROJECT__/g, args.project); +} + +function wrapGraphPhaseError(error: unknown, args: { + phase: "delete_nodes" | "delete_edges" | "insert_nodes" | "insert_edges"; + sessionId: string; + table: string; + sql: string; +}): Error { + const wrapped = new Error( + `graph ${args.phase} failed for session ${args.sessionId} on table ${args.table}: ${ + error instanceof Error ? error.message : String(error) + }` + ); + (wrapped as Error & Record).cause = error; + (wrapped as Error & Record).phase = args.phase; + (wrapped as Error & Record).sessionId = args.sessionId; + (wrapped as Error & Record).table = args.table; + (wrapped as Error & Record).sql = args.sql; + return wrapped; +} + +export async function replaceSessionGraph(params: ReplaceSessionGraphParams): Promise { + const ts = params.ts ?? new Date().toISOString(); + const nodePath = `/graphs/nodes/${params.userName}/${params.sessionId}.jsonl`; + const edgePath = `/graphs/edges/${params.userName}/${params.sessionId}.jsonl`; + const nodeFilename = `${params.sessionId}.jsonl`; + const edgeFilename = `${params.sessionId}.jsonl`; + + const nodeMap = new Map(); + for (const node of params.graph.nodes) { + const key = buildGraphNodeId(node.name, node.type); + nodeMap.set(key, { + name: node.name, + type: node.type || "other", + summary: node.summary || "", + aliases: node.aliases || [], + }); + } + for (const edge of params.graph.edges) { + const sourceKey = buildGraphNodeId(edge.source); + const targetKey = buildGraphNodeId(edge.target); + if (!nodeMap.has(sourceKey)) nodeMap.set(sourceKey, { name: edge.source, type: "other", summary: "", aliases: [] }); + if (!nodeMap.has(targetKey)) nodeMap.set(targetKey, { name: edge.target, type: "other", summary: "", aliases: [] }); + } + + const deleteNodesSql = `DELETE FROM "${params.nodesTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + const deleteEdgesSql = `DELETE FROM "${params.edgesTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + try { + await params.query(deleteNodesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "delete_nodes", + sessionId: params.sessionId, + table: params.nodesTable, + sql: deleteNodesSql, + }); + } + try { + await params.query(deleteEdgesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "delete_edges", + sessionId: params.sessionId, + table: params.edgesTable, + sql: deleteEdgesSql, + }); + } + + const nodeRows = [...nodeMap.entries()].map(([nodeId, node]) => { + const summary = node.summary || buildSummaryBlurb(`# Graph Node\n\n${node.name}`); + const aliases = (node.aliases ?? []).join(", "); + const searchText = buildNodeSearchText(node); + return ( + `('${randomUUID()}', '${esc(nodePath)}', '${esc(nodeFilename)}', '${esc(nodeId)}', ` + + `'${esc(node.name)}', '${esc(node.type || "other")}', E'${esc(summary)}', E'${esc(searchText)}', ` + + `'${esc(aliases)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', ` + + `'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', ` + + `E'${esc(buildSummaryBlurb(summary))}', '${esc(params.agent)}', '${ts}', '${ts}')` + ); + }); + + if (nodeRows.length > 0) { + const insertNodesSql = `INSERT INTO "${params.nodesTable}" ` + + `(id, path, filename, node_id, canonical_name, node_type, summary, search_text, aliases, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ${nodeRows.join(", ")}`; + try { + await params.query(insertNodesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "insert_nodes", + sessionId: params.sessionId, + table: params.nodesTable, + sql: insertNodesSql, + }); + } + } + + const edgeRows = params.graph.edges.map((edge) => { + const sourceNodeId = buildGraphNodeId(edge.source); + const targetNodeId = buildGraphNodeId(edge.target); + const searchText = buildEdgeSearchText(edge, sourceNodeId, targetNodeId); + const summary = edge.summary || `${edge.source} ${edge.relation} ${edge.target}`; + const evidence = edge.evidence || ""; + const edgeId = `${sourceNodeId}:${edge.relation}:${targetNodeId}`; + return ( + `('${randomUUID()}', '${esc(edgePath)}', '${esc(edgeFilename)}', '${esc(edgeId)}', ` + + `'${esc(sourceNodeId)}', '${esc(targetNodeId)}', '${esc(edge.relation)}', E'${esc(summary)}', ` + + `E'${esc(evidence)}', E'${esc(searchText)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', ` + + `'${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', ` + + `E'${esc(buildSummaryBlurb(summary))}', '${esc(params.agent)}', '${ts}', '${ts}')` + ); + }); + + if (edgeRows.length > 0) { + const insertEdgesSql = `INSERT INTO "${params.edgesTable}" ` + + `(id, path, filename, edge_id, source_node_id, target_node_id, relation, summary, evidence, search_text, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ${edgeRows.join(", ")}`; + try { + await params.query(insertEdgesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "insert_edges", + sessionId: params.sessionId, + table: params.edgesTable, + sql: insertEdgesSql, + }); + } + } + + return { nodes: nodeRows.length, edges: edgeRows.length }; +} diff --git a/src/hooks/memory-facts.ts b/src/hooks/memory-facts.ts new file mode 100644 index 0000000..8070e2d --- /dev/null +++ b/src/hooks/memory-facts.ts @@ -0,0 +1,545 @@ +import { randomUUID } from "node:crypto"; +import { buildSummaryBlurb } from "../utils/summary-format.js"; +import { buildGraphNodeId } from "./knowledge-graph.js"; +import { esc, type QueryFn } from "./upload-summary.js"; + +export interface MemoryFactSpec { + subject: string; + subjectType?: string; + subjectAliases?: string[]; + predicate: string; + object: string; + objectType?: string; + objectAliases?: string[]; + summary?: string; + evidence?: string; + confidence?: number; + validAt?: string; + validFrom?: string; + validTo?: string; +} + +export interface MemoryFactExtraction { + facts: MemoryFactSpec[]; +} + +export interface ReplaceSessionFactsParams { + query: QueryFn; + factsTable: string; + entitiesTable: string; + linksTable: string; + sessionId: string; + userName: string; + project: string; + agent: string; + sourcePath: string; + extraction: MemoryFactExtraction; + ts?: string; +} + +export interface ReplaceSessionFactsResult { + facts: number; + entities: number; + links: number; +} + +export interface SessionFactTranscriptRow { + turnIndex: number; + eventType?: string; + speaker?: string; + text?: string; + turnSummary?: string; + sourceDateTime?: string; + creationDate?: string; +} + +export const MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; + +interface EntityAggregate { + entityId: string; + canonicalName: string; + entityType: string; + aliases: Set; + summaries: Set; + searchTerms: Set; +} + +interface FactRowSpec { + factId: string; + subjectEntityId: string; + subjectName: string; + subjectType: string; + objectEntityId: string; + objectName: string; + objectType: string; + predicate: string; + summary: string; + evidence: string; + searchText: string; + confidence: string; + validAt: string; + validFrom: string; + validTo: string; +} + +function stripCodeFences(text: string): string { + const trimmed = text.trim(); + const fenceMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + return fenceMatch ? fenceMatch[1].trim() : trimmed; +} + +function normalizeString(value: unknown): string { + return typeof value === "string" ? value.trim() : ""; +} + +function normalizeAliases(value: unknown): string[] { + if (!Array.isArray(value)) return []; + return value + .map(normalizeString) + .filter(Boolean) + .filter((item, index, arr) => arr.indexOf(item) === index); +} + +function normalizeFactType(value: unknown): string { + return normalizeString(value) || "other"; +} + +function normalizeConfidence(value: unknown): number | undefined { + if (typeof value === "number" && Number.isFinite(value)) { + return Math.max(0, Math.min(1, value)); + } + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) return Math.max(0, Math.min(1, parsed)); + } + return undefined; +} + +function slugify(value: string): string { + return value + .normalize("NFKD") + .replace(/[^\w\s-]/g, "") + .trim() + .toLowerCase() + .replace(/[\s-]+/g, "_") + .replace(/^_+|_+$/g, "") || "item"; +} + +function buildFactId(sessionId: string, fact: MemoryFactSpec, index: number): string { + return [ + "fact", + slugify(sessionId), + String(index + 1), + slugify(fact.subject), + slugify(fact.predicate), + slugify(fact.object), + ].join(":"); +} + +function buildFactSearchText(fact: MemoryFactSpec): string { + return [ + fact.subject, + ...(fact.subjectAliases ?? []), + fact.predicate, + fact.object, + ...(fact.objectAliases ?? []), + fact.summary ?? "", + fact.evidence ?? "", + fact.validAt ?? "", + fact.validFrom ?? "", + fact.validTo ?? "", + ].filter(Boolean).join(" | "); +} + +function buildEntitySearchText(entity: EntityAggregate): string { + return [ + entity.canonicalName, + entity.entityType, + ...entity.aliases, + ...entity.searchTerms, + ...entity.summaries, + ].filter(Boolean).join(" | "); +} + +function mergeDelimited(existing: string, nextValues: Iterable): string { + const merged = new Set( + existing.split(",").map((value) => value.trim()).filter(Boolean), + ); + for (const value of nextValues) { + const trimmed = value.trim(); + if (!trimmed) continue; + merged.add(trimmed); + } + return [...merged].join(", "); +} + +function mergePipeDelimited(existing: string, nextValues: Iterable, maxItems = 8): string { + const merged = new Set( + existing.split("|").map((value) => value.trim()).filter(Boolean), + ); + for (const value of nextValues) { + const trimmed = value.trim(); + if (!trimmed) continue; + if (merged.has(trimmed)) continue; + if (merged.size >= maxItems) break; + merged.add(trimmed); + } + return [...merged].join(" | "); +} + +function wrapFactsPhaseError(error: unknown, args: { + phase: "delete_facts" | "delete_links" | "upsert_entities" | "insert_facts" | "insert_links"; + sessionId: string; + table: string; + sql: string; +}): Error { + const wrapped = new Error( + `facts ${args.phase} failed for session ${args.sessionId} on table ${args.table}: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + (wrapped as Error & Record).cause = error; + (wrapped as Error & Record).phase = args.phase; + (wrapped as Error & Record).sessionId = args.sessionId; + (wrapped as Error & Record).table = args.table; + (wrapped as Error & Record).sql = args.sql; + return wrapped; +} + +function buildEntityAggregate( + entityMap: Map, + args: { name: string; type: string; aliases: string[]; summary: string; searchText: string }, +): EntityAggregate { + const entityId = buildGraphNodeId(args.name, args.type); + const existing = entityMap.get(entityId); + if (existing) { + for (const alias of args.aliases) existing.aliases.add(alias); + if (args.summary) existing.summaries.add(args.summary); + if (args.searchText) existing.searchTerms.add(args.searchText); + return existing; + } + const created: EntityAggregate = { + entityId, + canonicalName: args.name, + entityType: args.type || "other", + aliases: new Set(args.aliases), + summaries: new Set(args.summary ? [args.summary] : []), + searchTerms: new Set(args.searchText ? [args.searchText] : []), + }; + entityMap.set(entityId, created); + return created; +} + +async function upsertEntities(params: { + query: QueryFn; + entitiesTable: string; + entityMap: Map; + userName: string; + project: string; + agent: string; + sourcePath: string; + sessionId: string; + ts: string; +}): Promise { + let upserts = 0; + const path = `/facts/entities/${params.userName}.jsonl`; + const filename = `${params.userName}.jsonl`; + + for (const entity of params.entityMap.values()) { + const aliases = [...entity.aliases].filter((alias) => alias !== entity.canonicalName); + const entitySummary = [...entity.summaries].join(" | ") || entity.canonicalName; + const searchText = buildEntitySearchText(entity); + const existingRows = await params.query( + `SELECT id, aliases, summary, search_text, source_session_ids, source_paths, entity_type FROM "${params.entitiesTable}" ` + + `WHERE entity_id = '${esc(entity.entityId)}' LIMIT 1`, + ); + if (existingRows.length === 0) { + const insertSql = + `INSERT INTO "${params.entitiesTable}" ` + + `(id, path, filename, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ` + + `('${randomUUID()}', '${esc(path)}', '${esc(filename)}', '${esc(entity.entityId)}', '${esc(entity.canonicalName)}', '${esc(entity.entityType)}', ` + + `'${esc(aliases.join(", "))}', E'${esc(entitySummary)}', E'${esc(searchText)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', ` + + `'${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', ` + + `E'${esc(buildSummaryBlurb(entitySummary))}', '${esc(params.agent)}', '${params.ts}', '${params.ts}')`; + await params.query(insertSql); + upserts += 1; + continue; + } + + const existing = existingRows[0]; + const mergedAliases = mergeDelimited(String(existing["aliases"] ?? ""), aliases); + const mergedSummary = mergePipeDelimited(String(existing["summary"] ?? ""), entity.summaries, 10) || entitySummary; + const mergedSearchText = mergePipeDelimited(String(existing["search_text"] ?? ""), [searchText], 12) || searchText; + const mergedSessionIds = mergeDelimited(String(existing["source_session_ids"] ?? ""), [params.sessionId]); + const mergedSourcePaths = mergeDelimited(String(existing["source_paths"] ?? ""), [params.sourcePath]); + const existingType = normalizeString(existing["entity_type"]); + const entityType = existingType && existingType !== "other" ? existingType : entity.entityType; + const updateSql = + `UPDATE "${params.entitiesTable}" SET ` + + `canonical_name = '${esc(entity.canonicalName)}', entity_type = '${esc(entityType)}', aliases = '${esc(mergedAliases)}', ` + + `summary = E'${esc(mergedSummary)}', search_text = E'${esc(mergedSearchText)}', ` + + `source_session_ids = '${esc(mergedSessionIds)}', source_paths = '${esc(mergedSourcePaths)}', ` + + `size_bytes = ${Buffer.byteLength(mergedSearchText, "utf-8")}, project = '${esc(params.project)}', ` + + `description = E'${esc(buildSummaryBlurb(mergedSummary))}', agent = '${esc(params.agent)}', last_update_date = '${params.ts}' ` + + `WHERE entity_id = '${esc(entity.entityId)}'`; + await params.query(updateSql); + upserts += 1; + } + return upserts; +} + +export function parseMemoryFactExtraction(raw: string): MemoryFactExtraction { + const cleaned = stripCodeFences(raw); + const parsed = JSON.parse(cleaned) as Record; + const facts = Array.isArray(parsed["facts"]) ? parsed["facts"] as Array> : []; + const dedupe = new Set(); + return { + facts: facts + .map((fact) => ({ + subject: normalizeString(fact["subject"]), + subjectType: normalizeFactType(fact["subject_type"]), + subjectAliases: normalizeAliases(fact["subject_aliases"]), + predicate: normalizeString(fact["predicate"]).replace(/\s+/g, "_").toLowerCase(), + object: normalizeString(fact["object"]), + objectType: normalizeFactType(fact["object_type"]), + objectAliases: normalizeAliases(fact["object_aliases"]), + summary: normalizeString(fact["summary"]), + evidence: normalizeString(fact["evidence"]), + confidence: normalizeConfidence(fact["confidence"]), + validAt: normalizeString(fact["valid_at"]), + validFrom: normalizeString(fact["valid_from"]), + validTo: normalizeString(fact["valid_to"]), + })) + .filter((fact) => fact.subject && fact.predicate && fact.object) + .filter((fact) => { + const key = `${fact.subject}::${fact.predicate}::${fact.object}`; + if (dedupe.has(key)) return false; + dedupe.add(key); + return true; + }), + }; +} + +export function buildMemoryFactTranscript(rows: SessionFactTranscriptRow[]): string { + const normalized = rows + .map((row) => ({ + turnIndex: Number.isFinite(row.turnIndex) ? row.turnIndex : 0, + speaker: normalizeString(row.speaker), + text: normalizeString(row.text), + eventType: normalizeString(row.eventType) || "message", + turnSummary: normalizeString(row.turnSummary), + sourceDateTime: normalizeString(row.sourceDateTime) || normalizeString(row.creationDate), + })) + .filter((row) => row.text || row.turnSummary); + + if (normalized.length === 0) return "(no transcript rows)"; + + return normalized.map((row) => { + const prefix = [ + `turn=${row.turnIndex}`, + row.sourceDateTime ? `time=${row.sourceDateTime}` : "", + row.speaker ? `speaker=${row.speaker}` : `event=${row.eventType}`, + ].filter(Boolean).join(" | "); + const lines = [`[${prefix}] ${row.text || row.turnSummary}`]; + if (row.turnSummary && row.turnSummary !== row.text) { + lines.push(`summary: ${row.turnSummary}`); + } + return lines.join("\n"); + }).join("\n"); +} + +export function buildMemoryFactPrompt(args: { + transcriptText: string; + sessionId: string; + sourcePath: string; + project: string; + template?: string; +}): string { + return (args.template ?? MEMORY_FACT_PROMPT_TEMPLATE) + .replace(/__TRANSCRIPT_TEXT__/g, args.transcriptText) + .replace(/__SESSION_ID__/g, args.sessionId) + .replace(/__SOURCE_PATH__/g, args.sourcePath) + .replace(/__PROJECT__/g, args.project); +} + +export async function replaceSessionFacts(params: ReplaceSessionFactsParams): Promise { + const ts = params.ts ?? new Date().toISOString(); + const factPath = `/facts/${params.userName}/${params.sessionId}.jsonl`; + const linkPath = `/facts/links/${params.userName}/${params.sessionId}.jsonl`; + const factFilename = `${params.sessionId}.jsonl`; + const linkFilename = `${params.sessionId}.jsonl`; + + const deleteFactsSql = `DELETE FROM "${params.factsTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + const deleteLinksSql = `DELETE FROM "${params.linksTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + try { + await params.query(deleteFactsSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "delete_facts", + sessionId: params.sessionId, + table: params.factsTable, + sql: deleteFactsSql, + }); + } + try { + await params.query(deleteLinksSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "delete_links", + sessionId: params.sessionId, + table: params.linksTable, + sql: deleteLinksSql, + }); + } + + const entityMap = new Map(); + const factRows: FactRowSpec[] = params.extraction.facts.map((fact, index) => { + const summary = fact.summary || `${fact.subject} ${fact.predicate.replace(/_/g, " ")} ${fact.object}`; + const searchText = buildFactSearchText(fact); + const subjectEntity = buildEntityAggregate(entityMap, { + name: fact.subject, + type: fact.subjectType || "other", + aliases: fact.subjectAliases ?? [], + summary, + searchText, + }); + const objectEntity = buildEntityAggregate(entityMap, { + name: fact.object, + type: fact.objectType || "other", + aliases: fact.objectAliases ?? [], + summary, + searchText, + }); + return { + factId: buildFactId(params.sessionId, fact, index), + subjectEntityId: subjectEntity.entityId, + subjectName: fact.subject, + subjectType: fact.subjectType || "other", + objectEntityId: objectEntity.entityId, + objectName: fact.object, + objectType: fact.objectType || "other", + predicate: fact.predicate, + summary, + evidence: fact.evidence || "", + searchText, + confidence: fact.confidence == null ? "" : String(fact.confidence), + validAt: fact.validAt || "", + validFrom: fact.validFrom || "", + validTo: fact.validTo || "", + }; + }); + + try { + await upsertEntities({ + query: params.query, + entitiesTable: params.entitiesTable, + entityMap, + userName: params.userName, + project: params.project, + agent: params.agent, + sourcePath: params.sourcePath, + sessionId: params.sessionId, + ts, + }); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "upsert_entities", + sessionId: params.sessionId, + table: params.entitiesTable, + sql: `UPSERT entities for ${params.sessionId}`, + }); + } + + if (factRows.length > 0) { + const values = factRows.map((row) => + `('${randomUUID()}', '${esc(factPath)}', '${esc(factFilename)}', '${esc(row.factId)}', ` + + `'${esc(row.subjectEntityId)}', '${esc(row.subjectName)}', '${esc(row.subjectType)}', '${esc(row.predicate)}', ` + + `'${esc(row.objectEntityId)}', '${esc(row.objectName)}', '${esc(row.objectType)}', E'${esc(row.summary)}', ` + + `E'${esc(row.evidence)}', E'${esc(row.searchText)}', '${esc(row.confidence)}', '${esc(row.validAt)}', ` + + `'${esc(row.validFrom)}', '${esc(row.validTo)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', ` + + `'${esc(params.userName)}', 'application/json', ${Buffer.byteLength(row.searchText, "utf-8")}, '${esc(params.project)}', ` + + `E'${esc(buildSummaryBlurb(row.summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`, + ); + const insertFactsSql = + `INSERT INTO "${params.factsTable}" ` + + `(id, path, filename, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ${values.join(", ")}`; + try { + await params.query(insertFactsSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "insert_facts", + sessionId: params.sessionId, + table: params.factsTable, + sql: insertFactsSql, + }); + } + } + + const linkRows = factRows.flatMap((row) => ([ + { + linkId: `${row.factId}:subject:${row.subjectEntityId}`, + factId: row.factId, + entityId: row.subjectEntityId, + entityRole: "subject", + }, + { + linkId: `${row.factId}:object:${row.objectEntityId}`, + factId: row.factId, + entityId: row.objectEntityId, + entityRole: "object", + }, + ])); + + if (linkRows.length > 0) { + const values = linkRows.map((row) => + `('${randomUUID()}', '${esc(linkPath)}', '${esc(linkFilename)}', '${esc(row.linkId)}', ` + + `'${esc(row.factId)}', '${esc(row.entityId)}', '${esc(row.entityRole)}', ` + + `'${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(row.linkId, "utf-8")}, ` + + `'${esc(params.project)}', 'fact entity link', '${esc(params.agent)}', '${ts}', '${ts}')`, + ); + const insertLinksSql = + `INSERT INTO "${params.linksTable}" ` + + `(id, path, filename, link_id, fact_id, entity_id, entity_role, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ${values.join(", ")}`; + try { + await params.query(insertLinksSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "insert_links", + sessionId: params.sessionId, + table: params.linksTable, + sql: insertLinksSql, + }); + } + } + + return { + facts: factRows.length, + entities: entityMap.size, + links: linkRows.length, + }; +} diff --git a/src/hooks/memory-path-utils.ts b/src/hooks/memory-path-utils.ts index b741cb3..2506b10 100644 --- a/src/hooks/memory-path-utils.ts +++ b/src/hooks/memory-path-utils.ts @@ -15,6 +15,7 @@ export const SAFE_BUILTINS = new Set([ "jq", "yq", "xan", "base64", "od", "tar", "gzip", "gunzip", "zcat", "md5sum", "sha1sum", "sha256sum", + "psql", "echo", "printf", "tee", "pwd", "cd", "basename", "dirname", "env", "printenv", "hostname", "whoami", "date", "seq", "expr", "sleep", "timeout", "time", "true", "false", "test", @@ -22,10 +23,69 @@ export const SAFE_BUILTINS = new Set([ "for", "while", "do", "done", "if", "then", "else", "fi", "case", "esac", ]); +function splitSafeStages(cmd: string): string[] | null { + const stages: string[] = []; + let current = ""; + let quote: string | null = null; + let escaped = false; + + for (let i = 0; i < cmd.length; i++) { + const ch = cmd[i]; + + if (escaped) { + current += ch; + escaped = false; + continue; + } + + if (quote) { + current += ch; + if (ch === quote) { + quote = null; + } else if (ch === "\\" && quote === "\"") { + escaped = true; + } + continue; + } + + if (ch === "\\" && i + 1 < cmd.length) { + current += ch; + escaped = true; + continue; + } + + if (ch === "'" || ch === "\"") { + quote = ch; + current += ch; + continue; + } + + const twoChar = cmd.slice(i, i + 2); + if (twoChar === "&&" || twoChar === "||") { + if (current.trim()) stages.push(current.trim()); + current = ""; + i += 1; + continue; + } + if (ch === "|" || ch === ";" || ch === "\n") { + if (current.trim()) stages.push(current.trim()); + current = ""; + continue; + } + + current += ch; + } + + if (quote || escaped) return null; + if (current.trim()) stages.push(current.trim()); + return stages; +} + export function isSafe(cmd: string): boolean { if (/\$\(|`|<\(/.test(cmd)) return false; const stripped = cmd.replace(/'[^']*'/g, "''").replace(/"[^"]*"/g, '""'); - const stages = stripped.split(/\||;|&&|\|\||\n/); + const stages = splitSafeStages(stripped); + if (!stages) return false; for (const stage of stages) { const firstToken = stage.trim().split(/\s+/)[0] ?? ""; if (firstToken && !SAFE_BUILTINS.has(firstToken)) return false; diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index 34c45db..8939c82 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -1,8 +1,8 @@ #!/usr/bin/env node import { existsSync, mkdirSync, writeFileSync } from "node:fs"; -import { homedir } from "node:os"; import { join, dirname, sep } from "node:path"; +import { homedir } from "node:os"; import { fileURLToPath } from "node:url"; import { readStdin } from "../utils/stdin.js"; import { loadConfig } from "../config.js"; @@ -11,22 +11,83 @@ import { sqlLike } from "../utils/sql.js"; import { log as _log } from "../utils/debug.js"; import { isDirectRun } from "../utils/direct-run.js"; import { type GrepParams, parseBashGrep, handleGrepDirect } from "./grep-direct.js"; -import { executeCompiledBashCommand } from "./bash-command-compiler.js"; +import { + executeCompiledBashCommand, + extractPsqlQueryFromCommand, + queryReferencesInterceptedTables, + queryUsesOnlyInterceptedTables, +} from "./bash-command-compiler.js"; import { findVirtualPaths, readVirtualPathContents, listVirtualPathRows, readVirtualPathContent, + buildVirtualIndexContent, } from "./virtual-table-query.js"; import { readCachedIndexContent, writeCachedIndexContent, } from "./query-cache.js"; import { isSafe, touchesMemory, rewritePaths } from "./memory-path-utils.js"; +import { isFactsSessionsOnlyPsqlMode, isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; import { capOutputForClaude } from "../utils/output-cap.js"; export { isSafe, touchesMemory, rewritePaths }; +const READ_CACHE_ROOT = join(homedir(), ".deeplake", "query-cache"); + +function touchesVirtualMemoryPath(value: string): boolean { + const rewritten = rewritePaths(value).trim(); + return ( + rewritten === "/index.md" || + rewritten === "/summaries" || + rewritten.startsWith("/summaries/") || + rewritten === "/sessions" || + rewritten.startsWith("/sessions/") || + /(^|[\s"'`])\/(?:index\.md|summaries(?:\/|\b)|sessions(?:\/|\b))/.test(rewritten) + ); +} + +function touchesAnyMemoryPath(value: string): boolean { + return touchesMemory(value) || touchesVirtualMemoryPath(value); +} + +function isAnyPsqlCommand(cmd: string): boolean { + return /^\s*psql\b/.test(cmd.trim()); +} + +function isHivemindPsqlCommand(cmd: string): boolean { + if (!isPsqlMode()) return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryUsesOnlyInterceptedTables(query); +} + +function needsHivemindPsqlRewrite(cmd: string): boolean { + if (!isPsqlMode() || !isAnyPsqlCommand(cmd)) return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryReferencesInterceptedTables(query) && !queryUsesOnlyInterceptedTables(query); +} + +function buildPsqlOnlyGuidance(): string { + if (isFactsSessionsOnlyPsqlMode()) { + return "[RETRY REQUIRED] Hivemind recall is SQL-only in this mode. " + + "Use psql with the sessions, memory_facts, memory_entities, and fact_entity_links tables only. " + + "Do NOT use grep, cat, ls, Read, Glob, memory, graph, or filesystem paths for memory lookups."; + } + return "[RETRY REQUIRED] Hivemind recall is SQL-only in this mode. " + + "Use psql with the memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links tables only. " + + "Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; +} + +function buildPsqlSchemaGuidance(): string { + if (isFactsSessionsOnlyPsqlMode()) { + return "[RETRY REQUIRED] Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. " + + "Rewrite the query to reference only those tables with normal psql SELECT syntax."; + } + return "[RETRY REQUIRED] Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. " + + "Rewrite the query to reference only those tables with normal psql SELECT syntax."; +} + const log = (msg: string) => _log("pre", msg); const __bundleDir = dirname(fileURLToPath(import.meta.url)); @@ -44,26 +105,9 @@ export interface PreToolUseInput { export interface ClaudePreToolDecision { command: string; description: string; - /** - * When set, main() emits the hook response as `updatedInput: {file_path}` - * instead of `updatedInput: {command, description}`. This is required for - * Read-tool intercepts: Claude Code's Read implementation reads - * `updatedInput.file_path` and errors with "path must be of type string, - * got undefined" if the hook hands it the Bash-shaped input. - */ file_path?: string; } -const READ_CACHE_ROOT = join(homedir(), ".deeplake", "query-cache"); - -/** - * Materialize fetched content for a Read intercept into a real file on disk - * so Claude Code's Read tool can read it via `updatedInput.file_path`. The - * file lives under `~/.deeplake/query-cache//read/` and mirrors - * the virtual path structure (e.g. `/sessions/conv_0_session_1.json` → - * `.../read/sessions/conv_0_session_1.json`). Per-session dirs are cleaned - * alongside the index cache at session end. - */ export function writeReadCacheFile( sessionId: string, virtualPath: string, @@ -75,9 +119,6 @@ export function writeReadCacheFile( const rel = virtualPath.replace(/^\/+/, "") || "content"; const expectedRoot = join(cacheRoot, safeSessionId, "read"); const absPath = join(expectedRoot, rel); - // Containment guard: if the DB-derived virtualPath contains `..` segments, - // `join` resolves them and absPath can escape the per-session cache dir. - // Refuse the write rather than silently writing outside the sandbox. if (absPath !== expectedRoot && !absPath.startsWith(expectedRoot + sep)) { throw new Error(`writeReadCacheFile: path escapes cache root: ${absPath}`); } @@ -106,7 +147,8 @@ export function getShellCommand(toolName: string, toolInput: Record> => { const uniquePaths = [...new Set(cachePaths)]; const result = new Map(uniquePaths.map((path) => [path, null])); - const cachedIndex = uniquePaths.includes("/index.md") + const cachedIndex = !isIndexDisabled() && uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; @@ -331,17 +392,19 @@ export async function processPreToolUse(input: PreToolUseInput, deps: ClaudePreT if (virtualPath && !virtualPath.endsWith("/")) { logFn(`direct read: ${virtualPath}`); - let content = virtualPath === "/index.md" + let content = !isIndexDisabled() && virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; if (content === null) { - // `/index.md` goes through the dual-table builder inside - // `readVirtualPathContents` (fix #1). Other paths fall back to the - // same helper which returns null when neither table has a row, at - // which point we let the shell bundle handle the miss below. content = await readVirtualPathContentFn(api, table, sessionsTable, virtualPath); } + if (content === null && virtualPath === "/index.md" && !isSessionsOnlyMode() && !isIndexDisabled()) { + const idxRows = await api.query( + `SELECT path, project, description, summary, creation_date, last_update_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC` + ); + content = buildVirtualIndexContent(idxRows); + } if (content !== null) { if (virtualPath === "/index.md") { writeCachedIndexContentFn(input.session_id, content); @@ -352,9 +415,6 @@ export async function processPreToolUse(input: PreToolUseInput, deps: ClaudePreT content = fromEnd ? lines.slice(-lineLimit).join("\n") : lines.slice(0, lineLimit).join("\n"); } const label = lineLimit > 0 ? (fromEnd ? `tail -${lineLimit}` : `head -${lineLimit}`) : "cat"; - // Read tool writes content to disk and Claude Code reads the file directly, - // so no size pressure; keep full content. Bash intercepts flow through - // Claude Code's 16 KB tool_result threshold so we cap before reaching it. if (input.tool_name === "Read") { const file_path = writeReadCacheFileFn(input.session_id, virtualPath, content); return buildReadDecision(file_path, `[DeepLake direct] ${label} ${virtualPath}`); @@ -425,6 +485,12 @@ export async function processPreToolUse(input: PreToolUseInput, deps: ClaudePreT logFn(`direct query failed, falling back to shell: ${e.message}`); } + if (isHivemindPsqlCommand(shellCmd)) { + return buildAllowDecision( + `echo ${JSON.stringify("[RETRY REQUIRED] Hivemind SQL mode could not satisfy the query. Rewrite it as a narrower SELECT over memory or sessions.")}`, + "[DeepLake SQL] query rewrite required", + ); + } return buildFallbackDecision(shellCmd, shellBundle); } diff --git a/src/hooks/query-cache.ts b/src/hooks/query-cache.ts index aee72e4..53cd58a 100644 --- a/src/hooks/query-cache.ts +++ b/src/hooks/query-cache.ts @@ -1,4 +1,4 @@ -import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { mkdirSync, readFileSync, rmSync, statSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { homedir } from "node:os"; import { log as _log } from "../utils/debug.js"; @@ -6,6 +6,7 @@ import { log as _log } from "../utils/debug.js"; const log = (msg: string) => _log("query-cache", msg); const DEFAULT_CACHE_ROOT = join(homedir(), ".deeplake", "query-cache"); const INDEX_CACHE_FILE = "index.md"; +const INDEX_CACHE_TTL_MS = 15 * 60 * 1000; interface QueryCacheDeps { cacheRoot?: string; @@ -29,7 +30,13 @@ export function clearSessionQueryCache(sessionId: string, deps: QueryCacheDeps = export function readCachedIndexContent(sessionId: string, deps: QueryCacheDeps = {}): string | null { const { logFn = log } = deps; try { - return readFileSync(join(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8"); + const cachePath = join(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE); + const stats = statSync(cachePath); + if ((Date.now() - stats.mtimeMs) > INDEX_CACHE_TTL_MS) { + clearSessionQueryCache(sessionId, deps); + return null; + } + return readFileSync(cachePath, "utf-8"); } catch (e: any) { if (e?.code === "ENOENT") return null; logFn(`read failed for session=${sessionId}: ${e.message}`); diff --git a/src/hooks/session-queue.ts b/src/hooks/session-queue.ts index 1157a44..6d17c7a 100644 --- a/src/hooks/session-queue.ts +++ b/src/hooks/session-queue.ts @@ -25,6 +25,14 @@ export interface QueuedSessionRow { path: string; filename: string; message: string; + sessionId: string; + eventType: string; + turnIndex: number; + diaId: string; + speaker: string; + text: string; + turnSummary: string; + sourceDateTime: string; author: string; sizeBytes: number; project: string; @@ -92,17 +100,27 @@ export function buildSessionPath(config: { userName: string; orgName: string; wo export function buildQueuedSessionRow(args: { sessionPath: string; line: string; + sessionId?: string; userName: string; projectName: string; description: string; agent: string; timestamp: string; }): QueuedSessionRow { + const structured = extractStructuredSessionFields(args.line, args.sessionId); return { id: crypto.randomUUID(), path: args.sessionPath, filename: args.sessionPath.split("/").pop() ?? "", message: args.line, + sessionId: structured.sessionId, + eventType: structured.eventType, + turnIndex: structured.turnIndex, + diaId: structured.diaId, + speaker: structured.speaker, + text: structured.text, + turnSummary: structured.turnSummary, + sourceDateTime: structured.sourceDateTime, author: args.userName, sizeBytes: Buffer.byteLength(args.line, "utf-8"), project: args.projectName, @@ -125,9 +143,11 @@ export function buildSessionInsertSql(sessionsTable: string, rows: QueuedSession if (rows.length === 0) throw new Error("buildSessionInsertSql: rows must not be empty"); const table = sqlIdent(sessionsTable); const values = rows.map((row) => { - const jsonForSql = sqlStr(coerceJsonbPayload(row.message)); + const jsonForSql = escapeJsonbLiteral(coerceJsonbPayload(row.message)); return ( `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, ` + + `'${sqlStr(row.sessionId)}', '${sqlStr(row.eventType)}', ${row.turnIndex}, '${sqlStr(row.diaId)}', ` + + `'${sqlStr(row.speaker)}', '${sqlStr(row.text)}', '${sqlStr(row.turnSummary)}', '${sqlStr(row.sourceDateTime)}', ` + `'${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', ` + `'${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')` ); @@ -135,7 +155,7 @@ export function buildSessionInsertSql(sessionsTable: string, rows: QueuedSession return ( `INSERT INTO "${table}" ` + - `(id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `(id, path, filename, message, session_id, event_type, turn_index, dia_id, speaker, text, turn_summary, source_date_time, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + `VALUES ${values}` ); } @@ -151,6 +171,77 @@ function coerceJsonbPayload(message: string): string { } } +function escapeJsonbLiteral(value: string): string { + return value + .replace(/'/g, "''") + .replace(/\0/g, ""); +} + +function extractString(value: unknown): string { + return typeof value === "string" ? value : value == null ? "" : String(value); +} + +function extractNumber(value: unknown): number { + if (typeof value === "number" && Number.isFinite(value)) return value; + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) return parsed; + } + return 0; +} + +function extractStructuredSessionFields(message: string, fallbackSessionId = ""): { + sessionId: string; + eventType: string; + turnIndex: number; + diaId: string; + speaker: string; + text: string; + turnSummary: string; + sourceDateTime: string; +} { + let parsed: Record | null = null; + try { + const raw = JSON.parse(message); + if (raw && typeof raw === "object") parsed = raw as Record; + } catch { + parsed = null; + } + + if (!parsed) { + return { + sessionId: fallbackSessionId, + eventType: "raw_message", + turnIndex: 0, + diaId: "", + speaker: "", + text: message, + turnSummary: "", + sourceDateTime: "", + }; + } + + const eventType = extractString(parsed["type"]); + const content = extractString(parsed["content"]); + const toolName = extractString(parsed["tool_name"]); + const speaker = extractString(parsed["speaker"]) + || (eventType === "user_message" ? "user" : eventType === "assistant_message" ? "assistant" : ""); + const text = extractString(parsed["text"]) + || content + || (eventType === "tool_call" ? toolName : ""); + + return { + sessionId: extractString(parsed["session_id"]) || fallbackSessionId, + eventType, + turnIndex: extractNumber(parsed["turn_index"]), + diaId: extractString(parsed["dia_id"]), + speaker, + text, + turnSummary: extractString(parsed["summary"]) || extractString(parsed["message_summary"]) || extractString(parsed["msg_summary"]), + sourceDateTime: extractString(parsed["source_date_time"]) || extractString(parsed["date_time"]) || extractString(parsed["date"]), + }; +} + export async function flushSessionQueue(api: SessionQueueApi, opts: FlushSessionQueueOptions): Promise { const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; const maxBatchRows = opts.maxBatchRows ?? DEFAULT_MAX_BATCH_ROWS; diff --git a/src/hooks/session-start-setup.ts b/src/hooks/session-start-setup.ts index f78ceb0..bd20f82 100644 --- a/src/hooks/session-start-setup.ts +++ b/src/hooks/session-start-setup.ts @@ -8,89 +8,248 @@ import { fileURLToPath } from "node:url"; import { dirname, join } from "node:path"; +import { mkdirSync, appendFileSync } from "node:fs"; import { execSync } from "node:child_process"; import { homedir } from "node:os"; import { loadCredentials, saveCredentials } from "../commands/auth.js"; import { loadConfig } from "../config.js"; import { DeeplakeApi } from "../deeplake-api.js"; +import { sqlStr } from "../utils/sql.js"; import { readStdin } from "../utils/stdin.js"; -import { log as _log } from "../utils/debug.js"; -import { getInstalledVersion, getLatestVersion, isNewer } from "../utils/version-check.js"; -import { makeWikiLogger } from "../utils/wiki-log.js"; +import { log as _log, utcTimestamp } from "../utils/debug.js"; +import { isDirectRun } from "../utils/direct-run.js"; +import { + drainSessionQueues, + isSessionWriteAuthError, + isSessionWriteDisabled, + markSessionWriteDisabled, + tryAcquireSessionDrainLock, +} from "./session-queue.js"; +import { + getInstalledVersion, + getLatestVersionCached, + isNewer, +} from "./version-check.js"; + const log = (msg: string) => _log("session-setup", msg); const __bundleDir = dirname(fileURLToPath(import.meta.url)); -const { log: wikiLog } = makeWikiLogger(join(homedir(), ".claude", "hooks")); +const GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; +const VERSION_CHECK_TIMEOUT = 3000; + +const HOME = homedir(); +const WIKI_LOG = join(HOME, ".claude", "hooks", "deeplake-wiki.log"); -interface SessionStartInput { +export function wikiLog(msg: string): void { + try { + mkdirSync(join(HOME, ".claude", "hooks"), { recursive: true }); + appendFileSync(WIKI_LOG, `[${utcTimestamp()}] ${msg}\n`); + } catch { /* ignore */ } +} + +export interface SessionStartInput { session_id: string; cwd?: string; } -async function main(): Promise { - if (process.env.HIVEMIND_WIKI_WORKER === "1") return; +export async function createPlaceholder( + api: DeeplakeApi, + table: string, + sessionId: string, + cwd: string, + userName: string, + orgName: string, + workspaceId: string, +): Promise { + const summaryPath = `/summaries/${userName}/${sessionId}.md`; - const input = await readStdin(); - const creds = loadCredentials(); - if (!creds?.token) { log("no credentials"); return; } + const existing = await api.query( + `SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1` + ); + if (existing.length > 0) { + wikiLog(`SessionSetup: summary exists for ${sessionId} (resumed)`); + return; + } + + const now = new Date().toISOString(); + const projectName = cwd.split("/").pop() || "unknown"; + const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; + const content = [ + `# Session ${sessionId}`, + `- **Source**: ${sessionSource}`, + `- **Started**: ${now}`, + `- **Project**: ${projectName}`, + `- **Status**: in-progress`, + "", + ].join("\n"); + const filename = `${sessionId}.md`; + + await api.query( + `INSERT INTO "${table}" (id, path, filename, summary, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ('${crypto.randomUUID()}', '${sqlStr(summaryPath)}', '${sqlStr(filename)}', E'${sqlStr(content)}', '${sqlStr(userName)}', 'text/markdown', ` + + `${Buffer.byteLength(content, "utf-8")}, '${sqlStr(projectName)}', 'in progress', 'claude_code', '${now}', '${now}')` + ); + + wikiLog(`SessionSetup: created placeholder for ${sessionId} (${cwd})`); +} + +interface SessionStartSetupDeps { + wikiWorker?: boolean; + creds?: ReturnType; + saveCredentialsFn?: typeof saveCredentials; + config?: ReturnType; + createApi?: (config: NonNullable>) => DeeplakeApi; + captureEnabled?: boolean; + drainSessionQueuesFn?: typeof drainSessionQueues; + isSessionWriteDisabledFn?: typeof isSessionWriteDisabled; + isSessionWriteAuthErrorFn?: typeof isSessionWriteAuthError; + markSessionWriteDisabledFn?: typeof markSessionWriteDisabled; + tryAcquireSessionDrainLockFn?: typeof tryAcquireSessionDrainLock; + createPlaceholderFn?: typeof createPlaceholder; + getInstalledVersionFn?: typeof getInstalledVersion; + getLatestVersionCachedFn?: typeof getLatestVersionCached; + isNewerFn?: typeof isNewer; + execSyncFn?: typeof execSync; + logFn?: (msg: string) => void; + wikiLogFn?: typeof wikiLog; +} + +export async function runSessionStartSetup(input: SessionStartInput, deps: SessionStartSetupDeps = {}): Promise<{ + status: "skipped" | "no_credentials" | "complete"; +}> { + const { + wikiWorker = (process.env.HIVEMIND_WIKI_WORKER ?? process.env.DEEPLAKE_WIKI_WORKER) === "1", + creds = loadCredentials(), + saveCredentialsFn = saveCredentials, + config = loadConfig(), + createApi = (activeConfig) => new DeeplakeApi( + activeConfig.token, + activeConfig.apiUrl, + activeConfig.orgId, + activeConfig.workspaceId, + activeConfig.tableName, + ), + captureEnabled = (process.env.HIVEMIND_CAPTURE ?? process.env.DEEPLAKE_CAPTURE) !== "false", + drainSessionQueuesFn = drainSessionQueues, + isSessionWriteDisabledFn = isSessionWriteDisabled, + isSessionWriteAuthErrorFn = isSessionWriteAuthError, + markSessionWriteDisabledFn = markSessionWriteDisabled, + tryAcquireSessionDrainLockFn = tryAcquireSessionDrainLock, + createPlaceholderFn = createPlaceholder, + getInstalledVersionFn = getInstalledVersion, + getLatestVersionCachedFn = getLatestVersionCached, + isNewerFn = isNewer, + execSyncFn = execSync, + logFn = log, + wikiLogFn = wikiLog, + } = deps; + + if (wikiWorker) return { status: "skipped" }; + if (!creds?.token) { + logFn("no credentials"); + return { status: "no_credentials" }; + } - // Backfill userName if missing if (!creds.userName) { try { const { userInfo } = await import("node:os"); creds.userName = userInfo().username ?? "unknown"; - saveCredentials(creds); - log(`backfilled userName: ${creds.userName}`); + saveCredentialsFn(creds); + logFn(`backfilled userName: ${creds.userName}`); } catch { /* non-fatal */ } } - if (input.session_id) { + if (input.session_id && config) { try { - const config = loadConfig(); - if (config) { - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName); - await api.ensureTable(); - await api.ensureSessionsTable(config.sessionsTableName); - log("setup complete"); + const api = createApi(config); + await api.ensureTable(); + if (captureEnabled) { + if (isSessionWriteDisabledFn(config.sessionsTableName)) { + logFn(`sessions table disabled, skipping setup for "${config.sessionsTableName}"`); + } else { + const releaseDrainLock = tryAcquireSessionDrainLockFn(config.sessionsTableName); + if (!releaseDrainLock) { + logFn(`sessions drain already in progress, skipping duplicate setup for "${config.sessionsTableName}"`); + } else { + try { + await api.ensureSessionsTable(config.sessionsTableName); + await api.ensureGraphNodesTable(config.graphNodesTableName); + await api.ensureGraphEdgesTable(config.graphEdgesTableName); + await api.ensureFactsTable(config.factsTableName); + await api.ensureEntitiesTable(config.entitiesTableName); + await api.ensureFactEntityLinksTable(config.factEntityLinksTableName); + const drain = await drainSessionQueuesFn(api, { + sessionsTable: config.sessionsTableName, + }); + if (drain.flushedSessions > 0) { + logFn(`drained ${drain.flushedSessions} queued session(s), rows=${drain.rows}, batches=${drain.batches}`); + } + } catch (e: any) { + if (isSessionWriteAuthErrorFn(e)) { + markSessionWriteDisabledFn(config.sessionsTableName, e.message); + logFn(`sessions table unavailable, skipping setup: ${e.message}`); + } else { + throw e; + } + } finally { + releaseDrainLock(); + } + } + } + await createPlaceholderFn(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); } + logFn("setup complete"); } catch (e: any) { - log(`setup failed: ${e.message}`); - wikiLog(`SessionSetup: failed for ${input.session_id}: ${e.message}`); + logFn(`setup failed: ${e.message}`); + wikiLogFn(`SessionSetup: failed for ${input.session_id}: ${e.message}`); } } - // Version check + auto-update const autoupdate = creds.autoupdate !== false; try { - const current = getInstalledVersion(__bundleDir, ".claude-plugin"); + const current = getInstalledVersionFn(__bundleDir, ".claude-plugin"); if (current) { - const latest = await getLatestVersion(); - if (latest && isNewer(latest, current)) { + const latest = await getLatestVersionCachedFn({ + url: GITHUB_RAW_PKG, + timeoutMs: VERSION_CHECK_TIMEOUT, + }); + if (latest && isNewerFn(latest, current)) { if (autoupdate) { - log(`autoupdate: updating ${current} → ${latest}`); + logFn(`autoupdate: updating ${current} → ${latest}`); try { const scopes = ["user", "project", "local", "managed"]; const cmd = scopes .map(s => `claude plugin update hivemind@hivemind --scope ${s} 2>/dev/null`) .join("; "); - execSync(cmd, { stdio: "ignore", timeout: 60_000 }); + execSyncFn(cmd, { stdio: "ignore", timeout: 60_000 }); process.stderr.write(`✅ Hivemind auto-updated: ${current} → ${latest}. Run /reload-plugins to apply.\n`); - log(`autoupdate succeeded: ${current} → ${latest}`); + logFn(`autoupdate succeeded: ${current} → ${latest}`); } catch (e: any) { process.stderr.write(`⬆️ Hivemind update available: ${current} → ${latest}. Auto-update failed — run /hivemind:update to upgrade manually.\n`); - log(`autoupdate failed: ${e.message}`); + logFn(`autoupdate failed: ${e.message}`); } } else { process.stderr.write(`⬆️ Hivemind update available: ${current} → ${latest}. Run /hivemind:update to upgrade.\n`); - log(`update available (autoupdate off): ${current} → ${latest}`); + logFn(`update available (autoupdate off): ${current} → ${latest}`); } } else { - log(`version up to date: ${current}`); + logFn(`version up to date: ${current}`); } } } catch (e: any) { - log(`version check failed: ${e.message}`); + logFn(`version check failed: ${e.message}`); } + + return { status: "complete" }; +} + +/* c8 ignore start */ +async function main(): Promise { + const input = await readStdin(); + await runSessionStartSetup(input); } -main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +if (isDirectRun(import.meta.url)) { + main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +} +/* c8 ignore stop */ diff --git a/src/hooks/session-start.ts b/src/hooks/session-start.ts index 60e402b..1c27323 100644 --- a/src/hooks/session-start.ts +++ b/src/hooks/session-start.ts @@ -4,27 +4,31 @@ * SessionStart hook: * 1. If no credentials → run device flow login (opens browser) * 2. Inject Deeplake memory instructions into Claude's context + * + * This sync hook stays local-only. All network work (table setup, placeholder, + * queue drain, version refresh, auto-update) runs in session-start-setup.ts. */ import { fileURLToPath } from "node:url"; import { dirname, join } from "node:path"; -import { readdirSync, rmSync } from "node:fs"; -import { execSync } from "node:child_process"; -import { homedir } from "node:os"; -import { loadCredentials, saveCredentials, login } from "../commands/auth.js"; -import { loadConfig } from "../config.js"; -import { DeeplakeApi } from "../deeplake-api.js"; -import { sqlStr } from "../utils/sql.js"; +import { loadCredentials, saveCredentials } from "../commands/auth.js"; import { readStdin } from "../utils/stdin.js"; import { log as _log } from "../utils/debug.js"; -import { getInstalledVersion, getLatestVersion, isNewer } from "../utils/version-check.js"; -import { makeWikiLogger } from "../utils/wiki-log.js"; +import { isDirectRun } from "../utils/direct-run.js"; +import { isFactsSessionsOnlyPsqlMode, isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; +import { + DEFAULT_VERSION_CACHE_TTL_MS, + getInstalledVersion, + isNewer, + readFreshCachedLatestVersion, +} from "./version-check.js"; + const log = (msg: string) => _log("session-start", msg); const __bundleDir = dirname(fileURLToPath(import.meta.url)); const AUTH_CMD = join(__bundleDir, "commands", "auth-login.js"); -const context = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: +export const CLAUDE_SESSION_START_CONTEXT = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: 1. Your built-in memory (~/.claude/) — personal per-project notes 2. Deeplake global memory (~/.deeplake/memory/) — global memory shared across all sessions, users, and agents in the org @@ -32,9 +36,97 @@ const context = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH Deeplake memory structure: - ~/.deeplake/memory/index.md — START HERE, table of all sessions - ~/.deeplake/memory/summaries/username/*.md — AI-generated wiki summaries per session -- ~/.deeplake/memory/sessions/username/*.jsonl — raw session data (last resort) +- ~/.deeplake/memory/sessions/{author}/* — raw session data (last resort) + +SEARCH STRATEGY: Always read index.md first. Then read specific summaries. Only read raw session files if summaries don't have enough detail. Do NOT jump straight to raw session files. +When index.md points to a likely match, read that exact summary or session file directly before trying broader grep variants. +If index.md already points to likely candidate files, open those exact files before broadening into synonym greps or wide exploratory scans. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots when the question is about Deeplake memory. +TEMPORAL GROUNDING: If a summary or transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a summary only gives a relative time, open the linked source session and use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked index.md plus at least one likely summary or raw session file for the named person. If keyword grep is empty, grep the person's name alone and inspect the candidate files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, summaries may omit the exact phrase. If likely summaries are ambiguous, read the candidate raw session transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." + +Search command: Grep pattern="keyword" path="~/.deeplake/memory" + +Organization management — each argument is SEPARATE (do NOT quote subcommands together): +- node "HIVEMIND_AUTH_CMD" login — SSO login +- node "HIVEMIND_AUTH_CMD" whoami — show current user/org +- node "HIVEMIND_AUTH_CMD" org list — list organizations +- node "HIVEMIND_AUTH_CMD" org switch — switch organization +- node "HIVEMIND_AUTH_CMD" workspaces — list workspaces +- node "HIVEMIND_AUTH_CMD" workspace — switch workspace +- node "HIVEMIND_AUTH_CMD" invite — invite member (ALWAYS ask user which role before inviting) +- node "HIVEMIND_AUTH_CMD" members — list members +- node "HIVEMIND_AUTH_CMD" remove — remove member + +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters — they are not available in the memory filesystem. If a task seems to require Python, rewrite it using bash commands and standard text-processing tools (awk, sed, jq, grep, etc.). + +LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty after 2 attempts, skip it and move on. Report what you found rather than exhaustively retrying. + +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; + +export const CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: + +1. Your built-in memory (~/.claude/) — personal per-project notes +2. Deeplake global memory (~/.deeplake/memory/) — shared org memory, currently exposed in SESSIONS-ONLY mode for benchmark comparison + +Deeplake memory structure available in this mode: +- ~/.deeplake/memory/sessions/{author}/* — raw session data + +SEARCH STRATEGY: Search raw session files directly. In this mode, do NOT start with index.md or summaries and do NOT assume those paths exist. +Open the most likely session file directly before broadening into synonym greps or wide exploratory scans. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots when the question is about Deeplake memory. +TEMPORAL GROUNDING: If a transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a session only gives a relative time, use that session's date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely raw session file for the named person. If keyword grep is empty, grep the person's name alone and inspect the candidate session files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, raw sessions may contain the exact phrase even when broad keyword grep looks sparse. Read the candidate transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." + +Search command: Grep pattern="keyword" path="~/.deeplake/memory" + +Organization management — each argument is SEPARATE (do NOT quote subcommands together): +- node "HIVEMIND_AUTH_CMD" login — SSO login +- node "HIVEMIND_AUTH_CMD" whoami — show current user/org +- node "HIVEMIND_AUTH_CMD" org list — list organizations +- node "HIVEMIND_AUTH_CMD" org switch — switch organization +- node "HIVEMIND_AUTH_CMD" workspaces — list workspaces +- node "HIVEMIND_AUTH_CMD" workspace — switch workspace +- node "HIVEMIND_AUTH_CMD" invite — invite member (ALWAYS ask user which role before inviting) +- node "HIVEMIND_AUTH_CMD" members — list members +- node "HIVEMIND_AUTH_CMD" remove — remove member + +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters — they are not available in the memory filesystem. If a task seems to require Python, rewrite it using bash commands and standard text-processing tools (awk, sed, jq, grep, etc.). + +LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty after 2 attempts, skip it and move on. Report what you found rather than exhaustively retrying. + +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; + +export const CLAUDE_SESSION_START_CONTEXT_NO_INDEX = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: + +1. Your built-in memory (~/.claude/) — personal per-project notes +2. Deeplake global memory (~/.deeplake/memory/) — global memory shared across all sessions, users, and agents in the org + +Deeplake memory structure in this mode: +- ~/.deeplake/memory/summaries/username/*.md — AI-generated wiki summaries per session +- ~/.deeplake/memory/sessions/{author}/* — raw session data (last resort) -SEARCH STRATEGY: Always read index.md first. Then read specific summaries. Only read raw JSONL if summaries don't have enough detail. Do NOT jump straight to JSONL files. +INDEX MODE: /index.md is intentionally unavailable for this run. Do NOT try to read it or rely on it. +SEARCH STRATEGY: Start by grepping summaries for the named person, topic, or keyword. Then read the specific matching summaries. Only read raw session files if the summaries don't have enough detail. Do NOT jump straight to raw session files. +If a summary points to a likely source session, open that exact raw session before broadening into synonym greps or wide exploratory scans. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots when the question is about Deeplake memory. +TEMPORAL GROUNDING: If a summary or transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a summary only gives a relative time, open the linked source session and use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely summary plus one likely raw session file for the named person when the summary is ambiguous. If keyword grep is empty, grep the person's name alone and inspect the candidate files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, summaries may omit the exact phrase. If likely summaries are ambiguous, read the candidate raw session transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." Search command: Grep pattern="keyword" path="~/.deeplake/memory" @@ -55,161 +147,237 @@ LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; -const HOME = homedir(); -const { log: wikiLog } = makeWikiLogger(join(HOME, ".claude", "hooks")); +export const CLAUDE_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: For this run, use SQL only when answering recall questions. + +Available Deeplake tables: +- memory(path, summary, project, description, creation_date, last_update_date) +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +SQL strategy: +1. Start with targeted SELECTs against memory to find likely sessions or summaries. +2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. +3. Graph-backed entity and relation resolution is applied automatically behind the scenes to narrow likely sessions before memory/sessions queries run. You do not need to query graph tables manually for normal recall. +3a. For stable person/project/place facts, use memory_facts first. Use memory_entities to resolve aliases or canonical names, then join through fact_entity_links when you need all facts connected to an entity. +4. After finding candidate summary rows, re-query memory by exact path. +5. If the answer needs exact wording, exact dates, or transcript grounding, query sessions by exact path for those candidate sessions. +6. Prefer precise WHERE filters, ORDER BY creation_date/last_update_date, and LIMIT 5-10. +7. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +8. If the first literal query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory.summary before concluding the data is absent. +9. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +10. If a summary, node, or edge answer is vague or relative (for example "home country", "next month", "last week"), immediately open the linked sessions rows and convert it to the most concrete answer supported there. +11. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over a paraphrased summary label. +12. When memory_entities resolves a canonical entity, use fact_entity_links to expand the connected facts before deciding the fact layer is sparse. +13. For identity or relationship questions, prefer the narrowest explicit self-label or status label over broader biography or community descriptions. +14. For "when" questions, if the best evidence is already phrased relative to another dated event, return that relative phrase instead of inventing a different absolute date. +15. For list/profile questions, return a minimal comma-separated set of directly supported items. Do not pad the answer with adjacent hobbies, events, or explanations. +16. For artifact/title questions such as books, talks, projects, or artworks, prefer exact titled objects from facts or transcript over generic phrases like "a book" or "a speech". + +Good query patterns: +- Candidate summaries: + psql -At -F '|' -c "SELECT path, summary, creation_date FROM memory WHERE summary ILIKE '%%' AND (summary ILIKE '%%' OR summary ILIKE '%%') ORDER BY creation_date DESC LIMIT 5" +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by entity: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" +- Exact summary reread: + psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" +- If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: + psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" + +Avoid these mistakes: +- Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT stop at graph rows alone when the question asks for exact wording or time grounding. Use graph rows to narrow the search, then open the linked sessions. +- Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. +- Do NOT turn a short list question into a narrative list of loosely related activities. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. +- Do not answer "not found" until you have checked both memory and a likely sessions row for the named person. +- For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. +- If the transcript already directly answers with a relative duration like "10 years ago", return that phrase instead of recalculating to today's date. +- If the transcript or fact row says something like "the week before June 9, 2023", return that phrase instead of converting it to June 9, 2023. +- If a summary says something vague like "home country", search sessions for the exact named place before answering. +- For list or profile questions, aggregate across the small set of candidate sessions before answering. +- For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. + +IMPORTANT: Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. For normal recall, query memory_facts for distilled claims, memory_entities for canonical names, and sessions for exact grounding; graph-based restriction is applied automatically where relevant. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode. + +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; + +export const CLAUDE_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY = `DEEPLAKE MEMORY SQL MODE: For this run, use SQL only when answering recall questions. + +Available Deeplake tables: +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) + +The summary and graph tables are intentionally unavailable in this mode. Treat them as if they do not exist. + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +SQL strategy: +1. Start with memory_entities to resolve the named person, project, place, or organization into a canonical entity. +2. Expand connected facts through fact_entity_links and memory_facts. +3. Use memory_facts to identify the small set of likely source sessions. +4. Ground every final answer on sessions rows from those source sessions. +5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +7. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +8. Sessions are the source of truth. Facts are only a helper index and synthesis layer. +9. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over paraphrased fact labels. +10. For list/profile questions, facts are for narrowing and aggregation; sessions are for final verification. + +Good query patterns: +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by name/topic: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id, source_path FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary, f.source_session_id, f.source_path FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" + +Avoid these mistakes: +- Do NOT query memory, graph_nodes, or graph_edges in this mode. +- Do NOT answer directly from memory_facts.summary, memory_entities.summary, or aliases when a relevant transcript row is available. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Sessions win over facts if they differ in detail or specificity. +- Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. +- Do not answer "not found" until you have checked both the fact layer and a likely sessions row for the named person. +- For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. +- For list or profile questions, aggregate across the small set of candidate sessions before answering. -/** Create a placeholder summary via direct SQL INSERT (no DeeplakeFs bootstrap needed). */ -async function createPlaceholder(api: DeeplakeApi, table: string, sessionId: string, cwd: string, userName: string, orgName: string, workspaceId: string): Promise { - const summaryPath = `/summaries/${userName}/${sessionId}.md`; +IMPORTANT: Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. Do NOT use python, python3, node, curl, filesystem paths, memory, or graph tables for recall in this mode. - const existing = await api.query( - `SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1` - ); - if (existing.length > 0) { - wikiLog(`SessionStart: summary exists for ${sessionId} (resumed)`); - return; +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; + +const GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; + +export function buildSessionStartAdditionalContext(args: { + authCommand: string; + creds: ReturnType; + currentVersion: string | null; + latestVersion: string | null; +}): string { + const template = isPsqlMode() + ? isFactsSessionsOnlyPsqlMode() + ? CLAUDE_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY + : CLAUDE_SESSION_START_CONTEXT_PSQL + : isSessionsOnlyMode() + ? CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY + : isIndexDisabled() + ? CLAUDE_SESSION_START_CONTEXT_NO_INDEX + : CLAUDE_SESSION_START_CONTEXT; + const resolvedContext = template.replace(/HIVEMIND_AUTH_CMD/g, args.authCommand); + + let updateNotice = ""; + if (args.currentVersion) { + if (args.latestVersion && isNewer(args.latestVersion, args.currentVersion)) { + updateNotice = `\n\n⬆️ Hivemind update available: ${args.currentVersion} → ${args.latestVersion}.`; + } else { + updateNotice = `\n\n✅ Hivemind v${args.currentVersion}`; + } } - const now = new Date().toISOString(); - const projectName = cwd.split("/").pop() ?? "unknown"; - const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; - const content = [ - `# Session ${sessionId}`, - `- **Source**: ${sessionSource}`, - `- **Started**: ${now}`, - `- **Project**: ${projectName}`, - `- **Status**: in-progress`, - "", - ].join("\n"); - const filename = `${sessionId}.md`; - - await api.query( - `INSERT INTO "${table}" (id, path, filename, summary, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + - `VALUES ('${crypto.randomUUID()}', '${sqlStr(summaryPath)}', '${sqlStr(filename)}', E'${sqlStr(content)}', '${sqlStr(userName)}', 'text/markdown', ` + - `${Buffer.byteLength(content, "utf-8")}, '${sqlStr(projectName)}', 'in progress', 'claude_code', '${now}', '${now}')` - ); - - wikiLog(`SessionStart: created placeholder for ${sessionId} (${cwd})`); + return args.creds?.token + ? `${resolvedContext}\n\nLogged in to Deeplake as org: ${args.creds.orgName ?? args.creds.orgId} (workspace: ${args.creds.workspaceId ?? "default"})${updateNotice}` + : `${resolvedContext}\n\n⚠️ Not logged in to Deeplake. Memory search will not work. Ask the user to run /hivemind:login to authenticate.${updateNotice}`; } -interface SessionStartInput { - session_id: string; - cwd?: string; +interface SessionStartHookDeps { + wikiWorker?: boolean; + creds?: ReturnType; + saveCredentialsFn?: typeof saveCredentials; + currentVersion?: string | null; + latestVersion?: string | null; + authCommand?: string; + bundleDir?: string; + logFn?: (msg: string) => void; } -async function main(): Promise { - // Skip if this is a sub-session spawned by the wiki worker - if (process.env.HIVEMIND_WIKI_WORKER === "1") return; - - const input = await readStdin(); +export async function runSessionStartHook(_input: Record, deps: SessionStartHookDeps = {}): Promise<{ + hookSpecificOutput: { + hookEventName: "SessionStart"; + additionalContext: string; + }; +} | null> { + const { + wikiWorker = (process.env.HIVEMIND_WIKI_WORKER ?? process.env.DEEPLAKE_WIKI_WORKER) === "1", + creds = loadCredentials(), + saveCredentialsFn = saveCredentials, + currentVersion = getInstalledVersion(__bundleDir, ".claude-plugin"), + latestVersion = currentVersion + ? readFreshCachedLatestVersion(GITHUB_RAW_PKG, DEFAULT_VERSION_CACHE_TTL_MS) ?? null + : null, + authCommand = AUTH_CMD, + logFn = log, + } = deps; - let creds = loadCredentials(); + if (wikiWorker) return null; if (!creds?.token) { - log("no credentials found — run /hivemind:login to authenticate"); + logFn("no credentials found — run /hivemind:login to authenticate"); } else { - log(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); - // Backfill userName if missing (for users who logged in before this field was added) + logFn(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); if (creds.token && !creds.userName) { try { const { userInfo } = await import("node:os"); creds.userName = userInfo().username ?? "unknown"; - saveCredentials(creds); - log(`backfilled and persisted userName: ${creds.userName}`); + saveCredentialsFn(creds); + logFn(`backfilled and persisted userName: ${creds.userName}`); } catch { /* non-fatal */ } } } - // Ensure tables exist and (when capture is enabled) create the placeholder - // summary via direct SQL. Tables must always be synced so queries return - // fresh data — only the placeholder INSERT is skipped when HIVEMIND_CAPTURE=false - // (benchmark runs, explicit opt-out). Mirrors the guard already in - // session-start-setup.ts / session-end.ts / codex hooks. - const captureEnabled = process.env.HIVEMIND_CAPTURE !== "false"; - if (input.session_id && creds?.token) { - try { - const config = loadConfig(); - if (config) { - const table = config.tableName; - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); - await api.ensureTable(); - await api.ensureSessionsTable(sessionsTable); - if (captureEnabled) { - await createPlaceholder(api, table, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); - log("placeholder created"); - } else { - log("placeholder skipped (HIVEMIND_CAPTURE=false)"); - } - } - } catch (e: any) { - log(`placeholder failed: ${e.message}`); - wikiLog(`SessionStart: placeholder failed for ${input.session_id}: ${e.message}`); - } - } - - // Version check (non-blocking — failures are silently ignored) - const autoupdate = creds?.autoupdate !== false; // default: true - let updateNotice = ""; - try { - const current = getInstalledVersion(__bundleDir, ".claude-plugin"); - if (current) { - const latest = await getLatestVersion(); - if (latest && isNewer(latest, current)) { - if (autoupdate) { - log(`autoupdate: updating ${current} → ${latest}`); - try { - const scopes = ["user", "project", "local", "managed"]; - const cmd = scopes - .map(s => `claude plugin update hivemind@hivemind --scope ${s} 2>/dev/null || true`) - .join("; "); - execSync(cmd, { stdio: "ignore", timeout: 60_000 }); - // Clean up old cached versions, keep only the latest - try { - const cacheParent = join(homedir(), ".claude", "plugins", "cache", "hivemind", "hivemind"); - const entries = readdirSync(cacheParent, { withFileTypes: true }); - for (const e of entries) { - if (e.isDirectory() && e.name !== latest) { - rmSync(join(cacheParent, e.name), { recursive: true, force: true }); - log(`cache cleanup: removed old version ${e.name}`); - } - } - } catch (e: any) { - log(`cache cleanup failed: ${e.message}`); - } - updateNotice = `\n\n✅ Hivemind auto-updated: ${current} → ${latest}. Run /reload-plugins to apply.`; - process.stderr.write(`✅ Hivemind auto-updated: ${current} → ${latest}. Run /reload-plugins to apply.\n`); - log(`autoupdate succeeded: ${current} → ${latest}`); - } catch (e: any) { - updateNotice = `\n\n⬆️ Hivemind update available: ${current} → ${latest}. Auto-update failed — run /hivemind:update to upgrade manually.`; - process.stderr.write(`⬆️ Hivemind update available: ${current} → ${latest}. Auto-update failed — run /hivemind:update to upgrade manually.\n`); - log(`autoupdate failed: ${e.message}`); - } - } else { - updateNotice = `\n\n⬆️ Hivemind update available: ${current} → ${latest}. Run /hivemind:update to upgrade.`; - process.stderr.write(`⬆️ Hivemind update available: ${current} → ${latest}. Run /hivemind:update to upgrade.\n`); - log(`update available (autoupdate off): ${current} → ${latest}`); - } - } else { - log(`version up to date: ${current}`); - updateNotice = `\n\n✅ Hivemind v${current} (up to date)`; - } - } - } catch (e: any) { - log(`version check failed: ${e.message}`); - } - - const resolvedContext = context.replace(/HIVEMIND_AUTH_CMD/g, AUTH_CMD); - const additionalContext = creds?.token - ? `${resolvedContext}\n\nLogged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${creds.workspaceId ?? "default"})${updateNotice}` - : `${resolvedContext}\n\n⚠️ Not logged in to Deeplake. Memory search will not work. Ask the user to run /hivemind:login to authenticate.${updateNotice}`; - - console.log(JSON.stringify({ + return { hookSpecificOutput: { hookEventName: "SessionStart", - additionalContext, + additionalContext: buildSessionStartAdditionalContext({ + authCommand, + creds, + currentVersion, + latestVersion, + }), }, - })); + }; } -main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +/* c8 ignore start */ +async function main(): Promise { + await readStdin>(); + const result = await runSessionStartHook({}); + if (result) console.log(JSON.stringify(result)); +} + +if (isDirectRun(import.meta.url)) { + main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +} +/* c8 ignore stop */ diff --git a/src/hooks/spawn-wiki-worker.ts b/src/hooks/spawn-wiki-worker.ts index b870bb4..5fa611f 100644 --- a/src/hooks/spawn-wiki-worker.ts +++ b/src/hooks/spawn-wiki-worker.ts @@ -6,16 +6,19 @@ import { spawn, execSync } from "node:child_process"; import { fileURLToPath } from "node:url"; import { dirname, join } from "node:path"; -import { writeFileSync, mkdirSync } from "node:fs"; +import { writeFileSync, mkdirSync, appendFileSync } from "node:fs"; import { homedir, tmpdir } from "node:os"; import type { Config } from "../config.js"; -import { makeWikiLogger } from "../utils/wiki-log.js"; +import { GRAPH_PROMPT_TEMPLATE } from "./knowledge-graph.js"; +import { MEMORY_FACT_PROMPT_TEMPLATE } from "./memory-facts.js"; +import { utcTimestamp } from "../utils/debug.js"; const HOME = homedir(); -const wikiLogger = makeWikiLogger(join(HOME, ".claude", "hooks")); -export const WIKI_LOG = wikiLogger.path; +export const WIKI_LOG = join(HOME, ".claude", "hooks", "deeplake-wiki.log"); -export const WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge — entities, decisions, relationships, and facts — into a structured, searchable wiki entry. Think of this as building a knowledge graph, not writing a summary. +export const WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. + +The session may be a coding session, a meeting, or a personal conversation. Your job is to turn the raw transcript into a dense, factual wiki page that preserves names, dates, relationships, preferences, plans, titles, and exact status changes. SESSION JSONL path: __JSONL__ SUMMARY FILE to write: __SUMMARY__ @@ -29,46 +32,61 @@ Steps: - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first, then focus on lines AFTER the offset for new content. Merge new facts into the existing summary. - If offset is 0, generate from scratch. + - Treat the JSONL as the source of truth. Do not invent facts. 2. Write the summary file at the path above with this EXACT format. The header fields (Source, Project) are pre-filled — copy them VERBATIM, do NOT replace them with paths from the JSONL content: # Session __SESSION_ID__ - **Source**: __JSONL_SERVER_PATH__ +- **Date**: +- **Participants**: - **Started**: - **Ended**: - **Project**: __PROJECT__ +- **Topics**: - **JSONL offset**: __JSONL_LINES__ ## What Happened -<2-3 dense sentences. What was the goal, what was accomplished, what's left.> +<2-4 dense sentences. What happened, why it mattered, and what changed. Prefer specific names/titles/dates over abstractions.> + +## Searchable Facts + ## People - + ## Entities - + ## Decisions & Reasoning - - -## Key Facts - + ## Files Modified - + ## Open Questions / TODO - + -IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. Future you will search this wiki to answer questions like "who worked on X", "why did we choose Y", "what's the status of Z". If a detail exists in the session, it should be in the wiki. +IMPORTANT: +- Be exhaustive. If a detail exists in the session and could answer a later question, it should be in the wiki. +- Favor exact nouns and titles over generic paraphrases. Preserve exact book names, organization names, file names, feature names, and self-descriptions. +- Keep facts canonical and query-friendly: "Ava is single", "Leo's home country is Brazil", "The team chose retries because the API returned 429s". +- Resolve relative dates like "last year" or "next month" against the session's own date when the source makes that possible. If it is ambiguous, keep the relative phrase instead of guessing. +- Do not omit beneficiary groups or targets of goals (for example who a project, career, or effort is meant to help). +- Do not leak absolute filesystem paths beyond the pre-filled Source field. PRIVACY: Never include absolute filesystem paths (e.g. /home/user/..., /Users/..., C:\\\\...) in the summary. Use only project-relative paths or the project name. The Source and Project fields above are already correct — do not change them. LENGTH LIMIT: Keep the total summary under 4000 characters. Be dense and concise — prioritize facts over prose. If a session is short, the summary should be short too.`; -export const wikiLog = wikiLogger.log; +export function wikiLog(msg: string): void { + try { + mkdirSync(join(HOME, ".claude", "hooks"), { recursive: true }); + appendFileSync(WIKI_LOG, `[${utcTimestamp()}] ${msg}\n`); + } catch { /* ignore */ } +} export function findClaudeBin(): string { try { @@ -101,6 +119,11 @@ export function spawnWikiWorker(opts: SpawnOptions): void { workspaceId: config.workspaceId, memoryTable: config.tableName, sessionsTable: config.sessionsTableName, + graphNodesTable: config.graphNodesTableName, + graphEdgesTable: config.graphEdgesTableName, + factsTable: config.factsTableName, + entitiesTable: config.entitiesTableName, + factEntityLinksTable: config.factEntityLinksTableName, sessionId, userName: config.userName, project: projectName, @@ -109,6 +132,8 @@ export function spawnWikiWorker(opts: SpawnOptions): void { wikiLog: WIKI_LOG, hooksDir: join(HOME, ".claude", "hooks"), promptTemplate: WIKI_PROMPT_TEMPLATE, + graphPromptTemplate: GRAPH_PROMPT_TEMPLATE, + factPromptTemplate: MEMORY_FACT_PROMPT_TEMPLATE, })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); diff --git a/src/hooks/upload-summary.ts b/src/hooks/upload-summary.ts index f6c96a0..e0d0489 100644 --- a/src/hooks/upload-summary.ts +++ b/src/hooks/upload-summary.ts @@ -9,6 +9,7 @@ */ import { randomUUID } from "node:crypto"; +import { buildSummaryBlurb } from "../utils/summary-format.js"; export type QueryFn = (sql: string) => Promise>>; @@ -39,10 +40,9 @@ export function esc(s: string): string { .replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); } -/** Derive the short description from the "## What Happened" section of a wiki summary. */ +/** Derive the short catalog description from the structured summary. */ export function extractDescription(text: string): string { - const match = text.match(/## What Happened\n([\s\S]*?)(?=\n##|$)/); - return match ? match[1].trim().slice(0, 300) : "completed"; + return buildSummaryBlurb(text); } /** diff --git a/src/hooks/virtual-table-query.ts b/src/hooks/virtual-table-query.ts index a430a35..794f977 100644 --- a/src/hooks/virtual-table-query.ts +++ b/src/hooks/virtual-table-query.ts @@ -1,6 +1,8 @@ import type { DeeplakeApi } from "../deeplake-api.js"; import { sqlLike, sqlStr } from "../utils/sql.js"; import { normalizeContent } from "../shell/grep-core.js"; +import { isIndexDisabled, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; +import { buildSummaryIndexEntry, buildSummaryIndexLine, type SummaryIndexEntry } from "../utils/summary-format.js"; type Row = Record; @@ -8,36 +10,110 @@ function normalizeSessionPart(path: string, content: string): string { return normalizeContent(path, content); } -export function buildVirtualIndexContent(summaryRows: Row[], sessionRows: Row[] = []): string { - const total = summaryRows.length + sessionRows.length; +export function buildVirtualIndexContent(rows: Row[]): string { + const entries = rows + .map((row) => buildSummaryIndexEntry(row)) + .filter((entry): entry is SummaryIndexEntry => entry !== null) + .sort((a, b) => (b.sortDate || "").localeCompare(a.sortDate || "") || a.path.localeCompare(b.path)); + const lines = [ "# Memory Index", "", - `${total} entries (${summaryRows.length} summaries, ${sessionRows.length} sessions):`, + "Persistent wiki directory. Start here, open the linked summary first, then open the paired raw session if you need exact wording or temporal grounding.", + "", + "## How To Use", + "", + "- Use the People section when the question names a person.", + "- In the catalog, each row links to both the summary page and its source session.", + "- Once you have a likely match, open that exact summary or session instead of broadening into wide grep scans.", "", ]; - if (summaryRows.length > 0) { - lines.push("## Summaries", ""); - for (const row of summaryRows) { - const path = row["path"] as string; - const project = row["project"] as string || ""; - const description = (row["description"] as string || "").slice(0, 120); - const date = (row["creation_date"] as string || "").slice(0, 10); - lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); - } + + const peopleLines = buildPeopleDirectory(entries); + if (peopleLines.length > 0) { + lines.push("## People"); + lines.push(""); + lines.push(...peopleLines); lines.push(""); } - if (sessionRows.length > 0) { - lines.push("## Sessions", ""); - for (const row of sessionRows) { - const path = row["path"] as string; - const description = (row["description"] as string || "").slice(0, 120); - lines.push(`- [${path}](${path}) ${description}`); - } + + const projectLines = buildProjectDirectory(entries); + if (projectLines.length > 0) { + lines.push("## Projects"); + lines.push(""); + lines.push(...projectLines); + lines.push(""); + } + + lines.push("## Summary To Session Catalog"); + lines.push(""); + for (const entry of entries) { + const line = buildSummaryIndexLine(entry); + if (line) lines.push(line); } return lines.join("\n"); } +function formatEntryLink(entry: SummaryIndexEntry): string { + const session = entry.source ? ` -> [session](${entry.source})` : ""; + return `[${entry.label}](${entry.path})${session}`; +} + +function topList(counts: Map, limit: number): string[] { + return [...counts.entries()] + .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])) + .slice(0, limit) + .map(([value]) => value); +} + +function buildPeopleDirectory(entries: SummaryIndexEntry[]): string[] { + const people = new Map; recent: SummaryIndexEntry[] }>(); + + for (const entry of entries) { + for (const person of entry.participants) { + const current = people.get(person) ?? { count: 0, topics: new Map(), recent: [] }; + current.count += 1; + for (const topic of entry.topics) { + current.topics.set(topic, (current.topics.get(topic) ?? 0) + 1); + } + current.recent.push(entry); + people.set(person, current); + } + } + + return [...people.entries()] + .sort((a, b) => b[1].count - a[1].count || a[0].localeCompare(b[0])) + .map(([person, info]) => { + const topics = topList(info.topics, 3); + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${person} — ${info.count} summaries`]; + if (topics.length > 0) parts.push(`topics: ${topics.join("; ")}`); + if (recent) parts.push(`recent: ${recent}`); + return parts.join(" — "); + }); +} + +function buildProjectDirectory(entries: SummaryIndexEntry[]): string[] { + const projects = new Map(); + + for (const entry of entries) { + if (!entry.project) continue; + const current = projects.get(entry.project) ?? { count: 0, recent: [] }; + current.count += 1; + current.recent.push(entry); + projects.set(entry.project, current); + } + + return [...projects.entries()] + .sort((a, b) => b[1].count - a[1].count || a[0].localeCompare(b[0])) + .map(([project, info]) => { + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${project} — ${info.count} summaries`]; + if (recent) parts.push(`recent: ${recent}`); + return parts.join(" — "); + }); +} + function buildUnionQuery(memoryQuery: string, sessionsQuery: string): string { return ( `SELECT path, content, size_bytes, creation_date, source_order FROM (` + @@ -53,7 +129,7 @@ function buildInList(paths: string[]): string { function buildDirFilter(dirs: string[]): string { const cleaned = [...new Set(dirs.map(dir => dir.replace(/\/+$/, "") || "/"))]; if (cleaned.length === 0 || cleaned.includes("/")) return ""; - const clauses = cleaned.map((dir) => `path LIKE '${sqlLike(dir)}/%' ESCAPE '\\'`); + const clauses = cleaned.map((dir) => `path LIKE '${sqlLike(dir)}/%'`); return ` WHERE ${clauses.join(" OR ")}`; } @@ -62,6 +138,12 @@ async function queryUnionRows( memoryQuery: string, sessionsQuery: string, ): Promise { + if (isSessionsOnlyMode()) { + return api.query( + `SELECT path, content, size_bytes, creation_date, source_order FROM (${sessionsQuery}) AS combined ORDER BY path, source_order, creation_date` + ); + } + const unionQuery = buildUnionQuery(memoryQuery, sessionsQuery); try { return await api.query(unionQuery); @@ -83,8 +165,16 @@ export async function readVirtualPathContents( const uniquePaths = [...new Set(virtualPaths)]; const result = new Map(uniquePaths.map(path => [path, null])); if (uniquePaths.length === 0) return result; + if (isIndexDisabled() && uniquePaths.includes("/index.md")) { + result.set("/index.md", null); + } - const inList = buildInList(uniquePaths); + const queryPaths = isIndexDisabled() + ? uniquePaths.filter((path) => path !== "/index.md") + : uniquePaths; + if (queryPaths.length === 0) return result; + + const inList = buildInList(queryPaths); const rows = await queryUnionRows( api, `SELECT path, summary::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path IN (${inList})`, @@ -107,7 +197,7 @@ export async function readVirtualPathContents( } } - for (const path of uniquePaths) { + for (const path of queryPaths) { if (memoryHits.has(path)) { result.set(path, memoryHits.get(path) ?? null); continue; @@ -118,16 +208,11 @@ export async function readVirtualPathContents( } } - if (result.get("/index.md") === null && uniquePaths.includes("/index.md")) { - const [summaryRows, sessionRows] = await Promise.all([ - api.query( - `SELECT path, project, description, creation_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC` - ).catch(() => [] as Row[]), - api.query( - `SELECT path, description FROM "${sessionsTable}" WHERE path LIKE '/sessions/%' ORDER BY path` - ).catch(() => [] as Row[]), - ]); - result.set("/index.md", buildVirtualIndexContent(summaryRows, sessionRows)); + if (!isSessionsOnlyMode() && !isIndexDisabled() && result.get("/index.md") === null && uniquePaths.includes("/index.md")) { + const rows = await api.query( + `SELECT path, project, description, summary, creation_date, last_update_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC` + ).catch(() => []); + result.set("/index.md", buildVirtualIndexContent(rows)); } return result; @@ -196,8 +281,8 @@ export async function findVirtualPaths( const likePath = `${sqlLike(normalizedDir === "/" ? "" : normalizedDir)}/%`; const rows = await queryUnionRows( api, - `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path LIKE '${likePath}' ESCAPE '\\' AND filename LIKE '${filenamePattern}' ESCAPE '\\'`, - `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path LIKE '${likePath}' ESCAPE '\\' AND filename LIKE '${filenamePattern}' ESCAPE '\\'`, + `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path LIKE '${likePath}' AND filename LIKE '${filenamePattern}'`, + `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path LIKE '${likePath}' AND filename LIKE '${filenamePattern}'`, ); return [...new Set( diff --git a/src/hooks/wiki-worker.ts b/src/hooks/wiki-worker.ts index 2359ea0..213b63a 100644 --- a/src/hooks/wiki-worker.ts +++ b/src/hooks/wiki-worker.ts @@ -15,6 +15,17 @@ import { utcTimestamp, log as _log } from "../utils/debug.js"; const dlog = (msg: string) => _log("wiki-worker", msg); import { finalizeSummary, releaseLock } from "./summary-state.js"; import { uploadSummary } from "./upload-summary.js"; +import { + buildKnowledgeGraphPrompt, + parseGraphExtraction, + replaceSessionGraph, +} from "./knowledge-graph.js"; +import { + buildMemoryFactTranscript, + buildMemoryFactPrompt, + parseMemoryFactExtraction, + replaceSessionFacts, +} from "./memory-facts.js"; interface WorkerConfig { apiUrl: string; @@ -23,6 +34,11 @@ interface WorkerConfig { workspaceId: string; memoryTable: string; sessionsTable: string; + graphNodesTable: string; + graphEdgesTable: string; + factsTable: string; + entitiesTable: string; + factEntityLinksTable: string; sessionId: string; userName: string; project: string; @@ -31,6 +47,8 @@ interface WorkerConfig { wikiLog: string; hooksDir: string; promptTemplate: string; + graphPromptTemplate: string; + factPromptTemplate: string; } const cfg: WorkerConfig = JSON.parse(readFileSync(process.argv[2], "utf-8")); @@ -105,8 +123,8 @@ async function main(): Promise { // 1. Fetch session events from sessions table, reconstruct JSONL wlog("fetching session events"); const rows = await query( - `SELECT message, creation_date FROM "${cfg.sessionsTable}" ` + - `WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC` + `SELECT path, message, creation_date, turn_index, event_type, speaker, text, turn_summary, source_date_time FROM "${cfg.sessionsTable}" ` + + `WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC` ); if (rows.length === 0) { @@ -192,6 +210,86 @@ async function main(): Promise { }); wlog(`uploaded ${vpath} (summary=${result.summaryLength}, desc=${result.descLength})`); + try { + const graphPrompt = buildKnowledgeGraphPrompt({ + summaryText: text, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.graphPromptTemplate, + }); + const graphRaw = execFileSync(cfg.claudeBin, [ + "-p", graphPrompt, + "--no-session-persistence", + "--model", "haiku", + "--permission-mode", "bypassPermissions", + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 120_000, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" }, + }).toString("utf-8"); + const graph = parseGraphExtraction(graphRaw); + const graphResult = await replaceSessionGraph({ + query, + nodesTable: cfg.graphNodesTable, + edgesTable: cfg.graphEdgesTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "claude_code", + sourcePath: jsonlServerPath, + graph, + }); + wlog(`graph updated nodes=${graphResult.nodes} edges=${graphResult.edges}`); + } catch (e: any) { + wlog(`graph update failed: ${e.message}`); + } + + try { + const transcriptText = buildMemoryFactTranscript(rows.map((row) => ({ + turnIndex: Number(row["turn_index"] ?? 0), + eventType: typeof row["event_type"] === "string" ? row["event_type"] : "", + speaker: typeof row["speaker"] === "string" ? row["speaker"] : "", + text: typeof row["text"] === "string" ? row["text"] : "", + turnSummary: typeof row["turn_summary"] === "string" ? row["turn_summary"] : "", + sourceDateTime: typeof row["source_date_time"] === "string" ? row["source_date_time"] : "", + creationDate: typeof row["creation_date"] === "string" ? row["creation_date"] : "", + }))); + const factPrompt = buildMemoryFactPrompt({ + transcriptText, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.factPromptTemplate, + }); + const factsRaw = execFileSync(cfg.claudeBin, [ + "-p", factPrompt, + "--no-session-persistence", + "--model", "haiku", + "--permission-mode", "bypassPermissions", + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 120_000, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" }, + }).toString("utf-8"); + const extraction = parseMemoryFactExtraction(factsRaw); + const factResult = await replaceSessionFacts({ + query, + factsTable: cfg.factsTable, + entitiesTable: cfg.entitiesTable, + linksTable: cfg.factEntityLinksTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "claude_code", + sourcePath: jsonlServerPath, + extraction, + }); + wlog(`facts updated facts=${factResult.facts} entities=${factResult.entities} links=${factResult.links}`); + } catch (e: any) { + wlog(`fact update failed: ${e.message}`); + } + try { finalizeSummary(cfg.sessionId, jsonlLines); wlog(`sidecar updated: lastSummaryCount=${jsonlLines}`); diff --git a/src/shell/deeplake-fs.ts b/src/shell/deeplake-fs.ts index 8db0716..ebac2a0 100644 --- a/src/shell/deeplake-fs.ts +++ b/src/shell/deeplake-fs.ts @@ -6,6 +6,8 @@ import type { FileContent, BufferEncoding, } from "just-bash"; import { normalizeContent } from "./grep-core.js"; +import { isIndexDisabled, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; +import { buildVirtualIndexContent } from "../hooks/virtual-table-query.js"; interface ReadFileOptions { encoding?: BufferEncoding } interface WriteFileOptions { encoding?: BufferEncoding } @@ -84,6 +86,8 @@ export class DeeplakeFs implements IFileSystem { // Paths that live in the sessions table (multi-row, read by concatenation) private sessionPaths = new Set(); private sessionsTable: string | null = null; + private sessionsOnly = false; + private indexDisabled = false; private constructor( private readonly client: DeeplakeApi, @@ -102,12 +106,14 @@ export class DeeplakeFs implements IFileSystem { ): Promise { const fs = new DeeplakeFs(client, table, mount); fs.sessionsTable = sessionsTable ?? null; + fs.sessionsOnly = isSessionsOnlyMode(); + fs.indexDisabled = isIndexDisabled(); // Ensure the table exists before bootstrapping. await client.ensureTable(); // Bootstrap memory + sessions metadata in parallel. let sessionSyncOk = true; - const memoryBootstrap = (async () => { + const memoryBootstrap = fs.sessionsOnly ? Promise.resolve() : (async () => { const sql = `SELECT path, size_bytes, mime_type FROM "${table}" ORDER BY path`; try { const rows = await client.query(sql); @@ -247,55 +253,10 @@ export class DeeplakeFs implements IFileSystem { private async generateVirtualIndex(): Promise { const rows = await this.client.query( - `SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" ` + - `WHERE path LIKE '${esc("/summaries/")}%' ORDER BY last_update_date DESC` + `SELECT path, project, description, summary, creation_date, last_update_date FROM "${this.table}" ` + + `WHERE path LIKE '${esc("/summaries/")}%' ORDER BY last_update_date DESC, creation_date DESC` ); - - // Build a lookup: key → session path from sessionPaths - // Supports two formats: - // 1. /sessions//___.jsonl → key = sessionId - // 2. /sessions//.json or .jsonl → key = filename stem - const sessionPathsByKey = new Map(); - for (const sp of this.sessionPaths) { - const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); - if (hivemind) { - sessionPathsByKey.set(hivemind[1], sp.slice(1)); - } else { - // Generic: extract filename without extension - const fname = sp.split("/").pop() ?? ""; - const stem = fname.replace(/\.[^.]+$/, ""); - if (stem) sessionPathsByKey.set(stem, sp.slice(1)); - } - } - - const lines: string[] = [ - "# Session Index", - "", - "List of all Claude Code sessions with summaries.", - "", - "| Session | Conversation | Created | Last Updated | Project | Description |", - "|---------|-------------|---------|--------------|---------|-------------|", - ]; - for (const row of rows) { - const p = row["path"] as string; - // Extract session ID from path: /summaries//.md - const match = p.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/); - if (!match) continue; - const summaryUser = match[1]; - const sessionId = match[2]; - const relPath = `summaries/${summaryUser}/${sessionId}.md`; - // Try matching session: first exact sessionId, then strip _summary suffix - const baseName = sessionId.replace(/_summary$/, ""); - const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName); - const convLink = convPath ? `[messages](${convPath})` : ""; - const project = (row["project"] as string) || ""; - const description = (row["description"] as string) || ""; - const creationDate = (row["creation_date"] as string) || ""; - const lastUpdateDate = (row["last_update_date"] as string) || ""; - lines.push(`| [${sessionId}](${relPath}) | ${convLink} | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`); - } - lines.push(""); - return lines.join("\n"); + return buildVirtualIndexContent(rows); } // ── batch prefetch ──────────────────────────────────────────────────────── @@ -372,7 +333,7 @@ export class DeeplakeFs implements IFileSystem { // 3. Session files: concatenate rows from sessions table if (this.sessionPaths.has(p) && this.sessionsTable) { const rows = await this.client.query( - `SELECT message FROM "${this.sessionsTable}" WHERE path = '${esc(p)}' ORDER BY creation_date ASC` + `SELECT message FROM "${this.sessionsTable}" WHERE path = '${esc(p)}' ORDER BY creation_date ASC, turn_index ASC` ); if (rows.length === 0) throw fsErr("ENOENT", "no such file or directory", p); const text = joinSessionMessages(p, rows.map((row) => row["message"])); @@ -396,7 +357,7 @@ export class DeeplakeFs implements IFileSystem { if (this.dirs.has(p) && !this.files.has(p)) throw fsErr("EISDIR", "illegal operation on a directory", p); // Virtual index.md: if no real row exists, generate from summary rows - if (p === "/index.md" && !this.files.has(p)) { + if (!this.sessionsOnly && !this.indexDisabled && p === "/index.md" && !this.files.has(p)) { // Check if a real /index.md row exists in the table const realRows = await this.client.query( `SELECT summary FROM "${this.table}" WHERE path = '${esc("/index.md")}' LIMIT 1` @@ -424,7 +385,7 @@ export class DeeplakeFs implements IFileSystem { // Session files: concatenate rows from sessions table, ordered by creation_date if (this.sessionPaths.has(p) && this.sessionsTable) { const rows = await this.client.query( - `SELECT message FROM "${this.sessionsTable}" WHERE path = '${esc(p)}' ORDER BY creation_date ASC` + `SELECT message FROM "${this.sessionsTable}" WHERE path = '${esc(p)}' ORDER BY creation_date ASC, turn_index ASC` ); if (rows.length === 0) throw fsErr("ENOENT", "no such file or directory", p); const text = joinSessionMessages(p, rows.map((row) => row["message"])); @@ -526,7 +487,7 @@ export class DeeplakeFs implements IFileSystem { async exists(path: string): Promise { const p = normPath(path); - if (p === "/index.md") return true; // Virtual index always exists + if (!this.sessionsOnly && !this.indexDisabled && p === "/index.md") return true; // Virtual index always exists return this.files.has(p) || this.dirs.has(p); } @@ -535,7 +496,7 @@ export class DeeplakeFs implements IFileSystem { const isFile = this.files.has(p); const isDir = this.dirs.has(p); // Virtual index.md: always exists as a file - if (p === "/index.md" && !isFile && !isDir) { + if (!this.sessionsOnly && !this.indexDisabled && p === "/index.md" && !isFile && !isDir) { return { isFile: true, isDirectory: false, isSymbolicLink: false, mode: 0o644, size: 0, mtime: new Date(), @@ -562,7 +523,7 @@ export class DeeplakeFs implements IFileSystem { async readlink(path: string): Promise { throw fsErr("EINVAL", "invalid argument", path); } async realpath(path: string): Promise { const p = normPath(path); - if (p === "/index.md") return p; // Virtual index always exists + if (!this.sessionsOnly && !this.indexDisabled && p === "/index.md") return p; // Virtual index always exists if (!this.files.has(p) && !this.dirs.has(p)) throw fsErr("ENOENT", "no such file or directory", p); return p; } @@ -591,7 +552,7 @@ export class DeeplakeFs implements IFileSystem { if (!this.dirs.has(p)) throw fsErr("ENOTDIR", "not a directory", p); const entries = [...(this.dirs.get(p) ?? [])]; // Virtual index.md: always show in root listing even if no real row exists - if (p === "/" && !entries.includes("index.md")) { + if (!this.sessionsOnly && !this.indexDisabled && p === "/" && !entries.includes("index.md")) { entries.push("index.md"); } return entries; @@ -604,7 +565,7 @@ export class DeeplakeFs implements IFileSystem { const child = p === "/" ? `/${name}` : `${p}/${name}`; return { name, - isFile: (this.files.has(child) || child === "/index.md") && !this.dirs.has(child), + isFile: (this.files.has(child) || (!this.sessionsOnly && !this.indexDisabled && child === "/index.md")) && !this.dirs.has(child), isDirectory: this.dirs.has(child), isSymbolicLink: false, }; diff --git a/src/shell/grep-core.ts b/src/shell/grep-core.ts index 6e93c5b..07becfb 100644 --- a/src/shell/grep-core.ts +++ b/src/shell/grep-core.ts @@ -7,15 +7,76 @@ * 1. searchDeeplakeTables: run one UNION ALL query across both the memory * table (summaries, column `summary`) AND the sessions table * (raw dialogue, column `message` JSONB), returning {path, content}. - * 2. normalizeSessionContent: when a row comes from a session path, turn the - * single-line JSON blob into multi-line "Speaker: text" so the standard - * line-wise regex refinement surfaces only matching turns, not the whole - * 5 KB blob. + * 2. normalizeSessionContent: when a row comes from a session path, expose a + * file-like text view. Transcript JSON blobs stay as canonical pretty + * JSON so local grep/read over `/sessions/*.json` matches the plugin + * surface, while production hook-event rows keep their concise normalized + * text view. * 3. refineGrepMatches: line-by-line regex match with the usual grep flags. */ import type { DeeplakeApi } from "../deeplake-api.js"; +import { HarrierEmbedder } from "../embeddings/harrier.js"; +import { type ScoredRetrievalRow, fuseRetrievalRows } from "../utils/hybrid-fusion.js"; import { sqlStr, sqlLike } from "../utils/sql.js"; +import { getGrepRetrievalMode, isSessionsOnlyMode, isSummaryBm25Disabled } from "../utils/retrieval-mode.js"; + +const DEFAULT_GREP_CANDIDATE_LIMIT = Number( + process.env["HIVEMIND_GREP_LIMIT"] + ?? process.env["DEEPLAKE_GREP_LIMIT"] + ?? 500, +); +const DEFAULT_EMBED_RETRIEVAL_MODEL_ID = "onnx-community/harrier-oss-v1-270m-ONNX"; +const DEFAULT_HYBRID_VECTOR_WEIGHT = 0.7; +const DEFAULT_HYBRID_TEXT_WEIGHT = 0.3; + +let retrievalEmbedder: HarrierEmbedder | null = null; + +function envString(...names: string[]): string | undefined { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) return value; + } + return undefined; +} + +function envFlag(...names: string[]): boolean { + const raw = envString(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} + +function envNumber(fallback: number, ...names: string[]): number { + const raw = envString(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} + +function getRetrievalEmbedder(): HarrierEmbedder { + if (!retrievalEmbedder) { + retrievalEmbedder = new HarrierEmbedder({ + modelId: envString( + "HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", + "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", + "HIVEMIND_HARRIER_MODEL_ID", + "DEEPLAKE_HARRIER_MODEL_ID", + ) ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID, + device: envString("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY"), + }); + } + return retrievalEmbedder; +} + +function sqlFloat4Array(values: number[]): string { + if (values.length === 0) throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} // ── Types ──────────────────────────────────────────────────────────────────── @@ -44,20 +105,44 @@ export interface SearchOptions { likeOp: "LIKE" | "ILIKE"; /** LIKE-escaped pattern (via sqlLike). */ escapedPattern: string; + /** Optional raw grep regex pattern. May be normalized before SQL pushdown. */ + regexPattern?: string; /** Optional safe literal anchor for regex searches (e.g. foo.*bar → foo). */ prefilterPattern?: string; /** Optional safe literal alternation anchors for regex searches (e.g. foo|bar). */ prefilterPatterns?: string[]; + /** Optional semantic query text used for vector and hybrid retrieval. */ + queryText?: string; + /** Optional lexical query text for BM25 summary retrieval. */ + bm25QueryText?: string; /** Per-table row cap. */ limit?: number; } +function escapeRegexLiteral(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +/** + * Normalize common grep BRE operator spellings into the JS/SQL-regex form used + * by our execution paths. This fixes patterns like `book\\|novel` that grep + * users often write for alternation. + */ +export function normalizeGrepRegexPattern(pattern: string): string { + return pattern + .replace(/\\([|(){}+?])/g, "$1") + .replace(/\\/g, "\\b"); +} + // ── Content normalization ─────────────────────────────────────────────────── /** - * If the row is a session JSON blob, serialize it as multi-line - * "Speaker: text" so the standard grep refinement surfaces only matching turns. - * Falls back to the raw content if parsing fails or the path is not a session. + * If the row is a session JSON blob, expose a file-like text view. Transcript + * blobs (`turns` / `dialogue`) stay as canonical pretty JSON so grep/read + * match the local filesystem surface. Hook-event rows keep a concise + * normalized text projection. Falls back to the raw content if parsing fails + * or the path is not a session. */ // ── Tool-call extractor ───────────────────────────────────────────────────── // Extracts only signal-bearing fields from `tool_input` / `tool_response`, @@ -173,23 +258,9 @@ export function normalizeContent(path: string, raw: string): string { let obj: any; try { obj = JSON.parse(raw); } catch { return raw; } - // ── Turn-array session shape: { turns: [...] } ─────────────────────────── - if (Array.isArray(obj.turns)) { - const header: string[] = []; - if (obj.date_time) header.push(`date: ${obj.date_time}`); - if (obj.speakers) { - const s = obj.speakers; - const names = [s.speaker_a, s.speaker_b].filter(Boolean).join(", "); - if (names) header.push(`speakers: ${names}`); - } - const lines = obj.turns.map((t: any) => { - const sp = String(t?.speaker ?? t?.name ?? "?").trim(); - const tx = String(t?.text ?? t?.content ?? "").replace(/\s+/g, " ").trim(); - const tag = t?.dia_id ? `[${t.dia_id}] ` : ""; - return `${tag}${sp}: ${tx}`; - }); - const out = [...header, ...lines].join("\n"); - return out.trim() ? out : raw; + // ── Transcript session shapes: keep a canonical raw-JSON view ─────────── + if (Array.isArray(obj.turns) || Array.isArray(obj.dialogue)) { + return `${JSON.stringify(obj, null, 2)}\n`; } // ── Production shape: single hook-event row (capture.ts output) ───────── @@ -244,7 +315,7 @@ function buildPathCondition(targetPath: string): string { } /** - * Dual-table LIKE/ILIKE search. Casts `summary` (TEXT) and `message` (JSONB) + * Dual-table text/regex search. Casts `summary` (TEXT) and `message` (JSONB) * to ::text so the same predicate works across both. The lookup always goes * through a single UNION ALL query so one grep maps to one SQL search. */ @@ -254,22 +325,152 @@ export async function searchDeeplakeTables( sessionsTable: string, opts: SearchOptions, ): Promise { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; - const limit = opts.limit ?? 100; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, queryText, bm25QueryText } = opts; + const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? (prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : (prefilterPattern ? [prefilterPattern] : [])) : [escapedPattern]; - const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); - const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const ignoreCase = likeOp === "ILIKE"; + const likeMemFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const likeSessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const regexMemFilter = regexPattern ? buildRegexFilter("summary::text", regexPattern, ignoreCase) : ""; + const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; + // Stay on portable message::text filters for session rows. The structured + // json_extract_string() predicates currently fail against the managed + // backend for these JSONB rows, which forces a 400 and a retry onto a + // coarser query path. + const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; + const fallbackSessFilter = likeSessFilter; + const sessionsOnly = isSessionsOnlyMode(); + const retrievalMode = getGrepRetrievalMode(); + const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const lexicalQueryText = (bm25QueryText ?? semanticQueryText).trim(); + const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; + const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; + const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const ensureSummaryBm25Index = (api as DeeplakeApi & { + ensureSummaryBm25Index?: (tableName?: string) => Promise; + }).ensureSummaryBm25Index; + + if ((useSummaryBm25 || (useHybridRetrieval && !sessionsOnly && lexicalQueryText.length > 0)) && typeof ensureSummaryBm25Index === "function") { + await ensureSummaryBm25Index.call(api, memoryTable).catch(() => {}); + } - const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + const buildCombinedQuery = (memFilter: string, sessFilter: string, useBm25Summary = false): string => { + const memQuery = useBm25Summary + ? buildSummaryBm25Query(memoryTable, pathFilter, bm25QueryText ?? "", limit) + : `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + return sessionsOnly + ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` + : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; - const rows = await api.query( - `SELECT path, content, source_order, creation_date FROM (` + - `(${memQuery}) UNION ALL (${sessQuery})` + - `) AS combined ORDER BY path, source_order, creation_date` - ); + if (useEmbeddingRetrieval || useHybridRetrieval) { + const embedder = getRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([semanticQueryText]); + if (!queryEmbedding) throw new Error("Failed to build query embedding"); + const queryVectorSql = sqlFloat4Array(queryEmbedding); + const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); + const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); + const vectorQuery = buildScoredCombinedQuery( + sessionsOnly, + buildEmbeddingSimilarityQuery( + memoryTable, + pathFilter, + "summary::text", + 0, + "''", + queryVectorSql, + limit, + ), + buildEmbeddingSimilarityQuery( + sessionsTable, + pathFilter, + "message::text", + 1, + "COALESCE(creation_date::text, '')", + queryVectorSql, + limit, + ), + limit, + ); + + if (!useHybridRetrieval) { + const rows = await api.query(vectorQuery); + return rows.map(row => ({ + path: String(row["path"]), + content: String(row["content"] ?? ""), + })); + } + + const lexicalQuery = buildScoredCombinedQuery( + sessionsOnly, + buildBm25SimilarityQuery( + memoryTable, + pathFilter, + "summary::text", + 0, + "''", + lexicalQueryText, + limit, + ), + buildBm25SimilarityQuery( + sessionsTable, + pathFilter, + "message::text", + 1, + "COALESCE(creation_date::text, '')", + lexicalQueryText, + limit, + ), + limit, + ); + const lexicalFallbackQuery = buildScoredCombinedQuery( + sessionsOnly, + buildHeuristicLexicalQuery( + memoryTable, + pathFilter, + "summary::text", + 0, + "''", + lexicalQueryText, + limit, + ), + buildHeuristicLexicalQuery( + sessionsTable, + pathFilter, + "message::text", + 1, + "COALESCE(creation_date::text, '')", + lexicalQueryText, + limit, + ), + limit, + ); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorQuery), + api.query(lexicalQuery).catch(() => api.query(lexicalFallbackQuery)), + ]); + return fuseRetrievalRows({ + textRows: mapScoredRows(textRows), + vectorRows: mapScoredRows(vectorRows), + textWeight, + vectorWeight, + limit, + }).map(row => ({ + path: row.path, + content: row.content, + })); + } + + const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; + const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); + const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); + + const rows = useSummaryBm25 + ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) + : await api.query(primaryQuery); return rows.map(row => ({ path: String(row["path"]), @@ -311,6 +512,10 @@ export function extractRegexLiteralPrefilter(pattern: string): string | null { if (ch === "\\") { const next = pattern[i + 1]; if (!next) return null; + if (/[bByYmM<>]/.test(next)) { + i++; + continue; + } if (/[dDsSwWbBAZzGkKpP]/.test(next)) return null; current += next; i++; @@ -335,14 +540,15 @@ export function extractRegexLiteralPrefilter(pattern: string): string | null { } export function extractRegexAlternationPrefilters(pattern: string): string[] | null { - if (!pattern.includes("|")) return null; + const unwrapped = unwrapWholeRegexGroup(pattern); + if (!unwrapped.includes("|")) return null; const parts: string[] = []; let current = ""; let escaped = false; - for (let i = 0; i < pattern.length; i++) { - const ch = pattern[i]; + for (let i = 0; i < unwrapped.length; i++) { + const ch = unwrapped[i]; if (escaped) { current += `\\${ch}`; escaped = false; @@ -374,42 +580,349 @@ export function extractRegexAlternationPrefilters(pattern: string): string[] | n } export function buildGrepSearchOptions(params: GrepMatchParams, targetPath: string): SearchOptions { - const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); - const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; - const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(normalizedPattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; + const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const queryText = (bm25QueryText ?? normalizedPattern.trim()) || undefined; + const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; + const sqlRegexPattern = params.wordMatch + ? `\\b(?:${regexBase})\\b` + : hasRegexMeta + ? regexBase + : undefined; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), + regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : undefined, prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + queryText, + bm25QueryText: bm25QueryText ?? undefined, + limit: DEFAULT_GREP_CANDIDATE_LIMIT, }; } +export function buildSummaryBm25QueryText( + pattern: string, + fixedString: boolean, + literalPrefilter: string | null, + alternationPrefilters: string[] | null, +): string | null { + const rawTokens = alternationPrefilters && alternationPrefilters.length > 0 + ? alternationPrefilters + : literalPrefilter + ? [literalPrefilter] + : [pattern]; + + const cleaned = [...new Set( + rawTokens + .flatMap((token) => token + .replace(/\\b/g, " ") + .replace(/[.*+?^${}()[\]{}|\\]/g, " ") + .split(/\s+/)) + .map((token) => token.trim()) + .filter((token) => token.length >= 2), + )]; + + if (cleaned.length === 0) { + return fixedString && pattern.trim().length >= 2 ? pattern.trim() : null; + } + return cleaned.join(" "); +} + function buildContentFilter( column: string, likeOp: "LIKE" | "ILIKE", patterns: string[], +): string { + const predicate = buildContentPredicate(column, likeOp, patterns); + return predicate ? ` AND ${predicate}` : ""; +} + +function buildRegexFilter( + column: string, + pattern: string, + ignoreCase: boolean, +): string { + const predicate = buildRegexPredicate(column, pattern, ignoreCase); + return predicate ? ` AND ${predicate}` : ""; +} + +function buildSummaryBm25Query( + memoryTable: string, + pathFilter: string, + queryText: string, + limit: number, +): string { + return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; +} + +function buildEmbeddingSimilarityQuery( + tableName: string, + pathFilter: string, + contentExpr: string, + sourceOrder: number, + creationDateExpr: string, + queryVectorSql: string, + limit: number, +): string { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (embedding <#> ${queryVectorSql}) AS score FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY score DESC LIMIT ${limit}`; +} + +function buildBm25SimilarityQuery( + tableName: string, + pathFilter: string, + contentExpr: string, + sourceOrder: number, + creationDateExpr: string, + queryText: string, + limit: number, +): string { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${contentExpr} <#> '${sqlStr(queryText)}') AS score FROM "${tableName}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT ${limit}`; +} + +function buildHeuristicLexicalQuery( + tableName: string, + pathFilter: string, + contentExpr: string, + sourceOrder: number, + creationDateExpr: string, + queryText: string, + limit: number, +): string { + const terms = [...new Set( + queryText + .split(/\s+/) + .map((term) => term.trim()) + .filter((term) => term.length >= 2), + )].slice(0, 8); + const clauses = terms.map((term) => `${contentExpr} ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...terms.map((term) => `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(queryText)}%' THEN ${Math.max(1, Math.min(terms.length, 4))} ELSE 0 END`, + ]; + const scoreExpr = scoreTerms.join(" + "); + const where = clauses.length > 0 ? ` AND (${clauses.join(" OR ")})` : ""; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${scoreExpr})::float AS score FROM "${tableName}" WHERE 1=1${pathFilter}${where} ORDER BY score DESC LIMIT ${limit}`; +} + +function buildScoredCombinedQuery( + sessionsOnly: boolean, + memQuery: string, + sessQuery: string, + limit: number, +): string { + return sessionsOnly + ? `SELECT path, content, source_order, creation_date, score FROM (${sessQuery}) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}` + : `SELECT path, content, source_order, creation_date, score FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}`; +} + +function mapScoredRows(rows: Record[]): ScoredRetrievalRow[] { + return rows.map((row) => ({ + path: String(row["path"] ?? ""), + content: String(row["content"] ?? ""), + sourceOrder: Number(row["source_order"] ?? 0), + creationDate: String(row["creation_date"] ?? ""), + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0, + })); +} + +export function toSqlRegexPattern( + pattern: string, + _ignoreCase: boolean, +): string | null { + if (!pattern) return null; + + try { + new RegExp(pattern); + return translateRegexPatternToSql(pattern); + } catch { + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + } +} + +function unwrapWholeRegexGroup(pattern: string): string { + if (!pattern.startsWith("(") || !pattern.endsWith(")")) return pattern; + + let depth = 0; + let escaped = false; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (escaped) { + escaped = false; + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "(") depth++; + if (ch === ")") { + depth--; + if (depth === 0 && i !== pattern.length - 1) return pattern; + } + } + if (depth !== 0) return pattern; + if (pattern.startsWith("(?:")) return pattern.slice(3, -1); + return pattern.slice(1, -1); +} + +function translateRegexPatternToSql(pattern: string): string | null { + let out = ""; + + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + + if (ch === "\\") { + const next = pattern[i + 1]; + if (!next) return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + i++; + switch (next) { + case "d": out += "[[:digit:]]"; continue; + case "D": out += "[^[:digit:]]"; continue; + case "s": out += "[[:space:]]"; continue; + case "S": out += "[^[:space:]]"; continue; + case "w": out += "[[:alnum:]_]"; continue; + case "W": out += "[^[:alnum:]_]"; continue; + case "b": out += "\\y"; continue; + case "A": + case "B": + case "G": + case "K": + case "P": + case "p": + case "z": + return null; + default: + out += `\\${next}`; + continue; + } + } + + if (ch === "(" && pattern.startsWith("(?:", i)) { + out += "("; + i += 2; + continue; + } + + if (ch === "(" && /^[(]\?<[^>]+>/.test(pattern.slice(i))) { + const named = pattern.slice(i).match(/^\(\?<[^>]+>/); + if (!named) return null; + out += "("; + i += named[0].length - 1; + continue; + } + + if (ch === "(" && pattern[i + 1] === "?") return null; + + out += ch; + } + + return out; +} + +function buildContentPredicate( + column: string, + likeOp: "LIKE" | "ILIKE", + patterns: string[], ): string { if (patterns.length === 0) return ""; - if (patterns.length === 1) return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; - return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; + if (patterns.length === 1) return `${column} ${likeOp} '%${patterns[0]}%'`; + return `(${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; +} + +function buildRegexPredicate( + column: string, + pattern: string | undefined, + ignoreCase: boolean, +): string { + if (!pattern) return ""; + const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); + if (!sqlPattern) return ""; + return `${column} ${ignoreCase ? "~*" : "~"} '${sqlStr(sqlPattern)}'`; +} + +function joinAndPredicates(predicates: string[]): string { + const filtered = predicates.filter(Boolean); + if (filtered.length === 0) return ""; + if (filtered.length === 1) return filtered[0]!; + return `(${filtered.join(" AND ")})`; +} + +function joinOrPredicates(predicates: string[]): string { + const filtered = predicates.filter(Boolean); + if (filtered.length === 0) return ""; + if (filtered.length === 1) return filtered[0]!; + return `(${filtered.join(" OR ")})`; +} + +function buildAnyColumnPredicate( + columns: string[], + builder: (column: string) => string, +): string { + return joinOrPredicates(columns.map((column) => builder(column))); +} + +function buildStructuredSessionFilter( + likeOp: "LIKE" | "ILIKE", + patterns: string[], + regexPattern: string | undefined, + ignoreCase: boolean, +): string { + const typeExpr = "COALESCE(json_extract_string(message, '$.type'), '')"; + const contentExpr = "COALESCE(json_extract_string(message, '$.content'), '')"; + const toolFieldExprs = [ + "COALESCE(json_extract_string(message, '$.tool_name'), '')", + "COALESCE(json_extract_string(message, '$.tool_input'), '')", + "COALESCE(json_extract_string(message, '$.tool_response'), '')", + ]; + const metaExprs = [ + typeExpr, + "COALESCE(json_extract_string(message, '$.hook_event_name'), '')", + "COALESCE(json_extract_string(message, '$.agent_type'), '')", + ]; + + const buildFieldSearch = (columns: string[]): string => joinAndPredicates([ + buildAnyColumnPredicate(columns, (column) => buildContentPredicate(column, likeOp, patterns)), + buildAnyColumnPredicate(columns, (column) => buildRegexPredicate(column, regexPattern, ignoreCase)), + ]); + + const contentSearch = buildFieldSearch([contentExpr]); + const toolSearch = buildFieldSearch(toolFieldExprs); + const metaSearch = buildFieldSearch(metaExprs); + + const branches = [ + contentSearch + ? joinAndPredicates([`${typeExpr} IN ('user_message', 'assistant_message')`, contentSearch]) + : "", + toolSearch + ? joinAndPredicates([`${typeExpr} = 'tool_call'`, toolSearch]) + : "", + metaSearch, + ]; + + const predicate = joinOrPredicates(branches); + return predicate ? ` AND ${predicate}` : ""; } // ── Regex refinement (line-by-line grep) ──────────────────────────────────── /** Compile the grep regex from params, with a safe fallback on bad user regex. */ export function compileGrepRegex(params: GrepMatchParams): RegExp { + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); let reStr = params.fixedString - ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") - : params.pattern; - if (params.wordMatch) reStr = `\\b${reStr}\\b`; + ? escapeRegexLiteral(normalizedPattern) + : normalizedPattern; + if (params.wordMatch) reStr = `\\b(?:${reStr})\\b`; try { return new RegExp(reStr, params.ignoreCase ? "i" : ""); } catch { return new RegExp( - params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), + escapeRegexLiteral(normalizedPattern), params.ignoreCase ? "i" : "", ); } @@ -462,6 +975,7 @@ export async function grepBothTables( sessionsTable: string, params: GrepMatchParams, targetPath: string, + forceMultiFilePrefix?: boolean, ): Promise { const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, buildGrepSearchOptions(params, targetPath)); // Defensive path dedup — memory and sessions tables use disjoint path @@ -472,5 +986,5 @@ export async function grepBothTables( const seen = new Set(); const unique = rows.filter(r => seen.has(r.path) ? false : (seen.add(r.path), true)); const normalized = unique.map(r => ({ path: r.path, content: normalizeContent(r.path, r.content) })); - return refineGrepMatches(normalized, params); + return refineGrepMatches(normalized, params, forceMultiFilePrefix); } diff --git a/src/shell/grep-interceptor.ts b/src/shell/grep-interceptor.ts index debd0cd..398f4c2 100644 --- a/src/shell/grep-interceptor.ts +++ b/src/shell/grep-interceptor.ts @@ -18,7 +18,7 @@ const MAX_FALLBACK_CANDIDATES = 500; /** * grep implementation for the deeplake-shell (virtual bash). Two paths: * 1. SQL-first: dual-table LIKE/ILIKE search via grep-core, with session - * JSON normalized to per-turn lines for sane output. + * content projected into the same file-like view used by local reads. * 2. Fallback: if SQL returns nothing (or races past a 3s timeout), scan * the in-memory FS cache using the same regex refinement. * @@ -76,7 +76,6 @@ export function createGrepCommand( const searchOptions = { ...buildGrepSearchOptions(matchParams, targets[0] ?? ctx.cwd), pathFilter: buildPathFilterForTargets(targets), - limit: 100, }; const queryRows = await Promise.race([ searchDeeplakeTables(client, table, sessionsTable ?? "sessions", searchOptions), @@ -106,9 +105,10 @@ export function createGrepCommand( } } - // Normalize session JSON blobs to per-turn lines before the regex pass. + // Normalize session blobs into the same file-like text view used by reads. const normalized = rows.map(r => ({ path: r.path, content: normalizeContent(r.path, r.content) })); - const output = refineGrepMatches(normalized, matchParams); + const forceMultiFilePrefix = parsed.r || parsed.R || parsed.recursive ? true : undefined; + const output = refineGrepMatches(normalized, matchParams, forceMultiFilePrefix); return { stdout: output.length > 0 ? output.join("\n") + "\n" : "", diff --git a/src/tools/backfill-harrier-embeddings.ts b/src/tools/backfill-harrier-embeddings.ts new file mode 100644 index 0000000..59b253a --- /dev/null +++ b/src/tools/backfill-harrier-embeddings.ts @@ -0,0 +1,433 @@ +#!/usr/bin/env node + +import { loadConfig } from "../config.js"; +import { loadCredentials } from "../commands/auth.js"; +import { DeeplakeApi } from "../deeplake-api.js"; +import { HarrierEmbedder } from "../embeddings/harrier.js"; +import { + buildMemoryEmbeddingText, + buildSessionEmbeddingText, + stableEmbeddingSourceHash, + type MemoryEmbeddingRow, + type SessionEmbeddingRow, +} from "../embeddings/text.js"; +import { sqlIdent, sqlStr } from "../utils/sql.js"; + +type TableKind = "memory" | "sessions"; + +interface Args { + table: TableKind | "all"; + memoryTable: string; + sessionsTable: string; + modelId: string; + startOffset: number; + maxRows?: number; + device?: string; + dtype?: string; + batchSize: number; + scanBatchSize: number; + limit?: number; + force: boolean; + localFilesOnly: boolean; + localModelPath?: string; + cacheDir?: string; + memoryMaxChars: number; + sessionsMaxChars: number; + embeddingColumn: string; + embeddingModelColumn: string; + embeddingSourceHashColumn: string; + embeddingUpdatedAtColumn: string; +} + +interface SqlColumnSpec { + name: string; + ddl: string; +} + +const DEFAULT_MODEL_ID = process.env.HIVEMIND_HARRIER_MODEL_ID + ?? process.env.DEEPLAKE_HARRIER_MODEL_ID + ?? "onnx-community/harrier-oss-v1-0.6b-ONNX"; +const DEFAULT_EMBEDDING_COLUMN = "embedding"; +const DEFAULT_BATCH_SIZE = 8; +const DEFAULT_SCAN_BATCH_SIZE = 64; + +function printUsage(): void { + process.stderr.write([ + "Usage: tsx src/tools/backfill-harrier-embeddings.ts [options]", + "", + "Options:", + " --table Tables to backfill (default: all)", + " --memory-table Memory table name", + " --sessions-table Sessions table name", + " --model-id Harrier model id (default: onnx-community/harrier-oss-v1-0.6b-ONNX)", + " --start-offset Start scanning at SQL offset n (default: 0)", + " --max-rows Process at most n scanned rows from the start offset", + " --device Transformers.js device (default: cpu)", + " --dtype Optional ONNX dtype override", + " --batch-size Embedding batch size (default: 8)", + " --scan-batch-size Rows read/write per scan batch (default: 64)", + " --limit Stop after n row updates", + " --force Recompute even when source hash matches", + " --local-files-only Refuse remote model downloads", + " --local-model-path Local model root for Transformers.js", + " --cache-dir Transformers.js cache directory", + " --memory-max-chars Max chars embedded per memory row (default: 8000)", + " --sessions-max-chars Max chars embedded per sessions row (default: 8000)", + "", + "Note: For local TypeScript inference, the practical default is the ONNX export", + " of microsoft/harrier-oss-v1-0.6b. Pass --local-files-only with a local model", + " cache if you want fully offline execution.", + "", + ].join("\n")); +} + +function parseInteger(value: string | undefined, fallback: number): number { + const parsed = Number.parseInt(value ?? "", 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback; +} + +function parseArgs(): Args { + const config = loadConfig(); + const args = process.argv.slice(2); + const opts: Args = { + table: "all", + memoryTable: config?.tableName ?? "memory", + sessionsTable: config?.sessionsTableName ?? "sessions", + modelId: DEFAULT_MODEL_ID, + startOffset: 0, + device: "cpu", + batchSize: DEFAULT_BATCH_SIZE, + scanBatchSize: DEFAULT_SCAN_BATCH_SIZE, + force: false, + localFilesOnly: false, + memoryMaxChars: 8_000, + sessionsMaxChars: 8_000, + embeddingColumn: DEFAULT_EMBEDDING_COLUMN, + embeddingModelColumn: "embedding_model", + embeddingSourceHashColumn: "embedding_source_hash", + embeddingUpdatedAtColumn: "embedding_updated_at", + }; + + for (let index = 0; index < args.length; index++) { + switch (args[index]) { + case "--help": + case "-h": + printUsage(); + process.exit(0); + case "--table": { + const value = args[++index]; + if (value === "memory" || value === "sessions" || value === "all") { + opts.table = value; + } else { + throw new Error(`Unsupported --table value: ${value}`); + } + break; + } + case "--memory-table": + opts.memoryTable = args[++index] ?? opts.memoryTable; + break; + case "--sessions-table": + opts.sessionsTable = args[++index] ?? opts.sessionsTable; + break; + case "--model-id": + opts.modelId = args[++index] ?? opts.modelId; + break; + case "--start-offset": + opts.startOffset = Math.max(0, parseInteger(args[++index], 0)); + break; + case "--max-rows": + opts.maxRows = parseInteger(args[++index], 0); + break; + case "--device": + opts.device = args[++index] ?? opts.device; + break; + case "--dtype": + opts.dtype = args[++index] ?? opts.dtype; + break; + case "--batch-size": + opts.batchSize = parseInteger(args[++index], opts.batchSize); + break; + case "--scan-batch-size": + opts.scanBatchSize = parseInteger(args[++index], opts.scanBatchSize); + break; + case "--limit": + opts.limit = parseInteger(args[++index], 0); + break; + case "--force": + opts.force = true; + break; + case "--local-files-only": + opts.localFilesOnly = true; + break; + case "--local-model-path": + opts.localModelPath = args[++index] ?? opts.localModelPath; + break; + case "--cache-dir": + opts.cacheDir = args[++index] ?? opts.cacheDir; + break; + case "--memory-max-chars": + opts.memoryMaxChars = parseInteger(args[++index], opts.memoryMaxChars); + break; + case "--sessions-max-chars": + opts.sessionsMaxChars = parseInteger(args[++index], opts.sessionsMaxChars); + break; + default: + throw new Error(`Unknown argument: ${args[index]}`); + } + } + + return opts; +} + +function asString(value: unknown): string { + return typeof value === "string" ? value : value == null ? "" : String(value); +} + +function hasVector(value: unknown): boolean { + return Array.isArray(value) && value.length > 0; +} + +function sqlFloat4Array(values: number[]): string { + return `ARRAY[${values.map((value) => Number.isFinite(value) ? Math.fround(value).toString() : "0").join(", ")}]::float4[]`; +} + +async function ensureSqlColumns(api: DeeplakeApi, tableName: string, specs: SqlColumnSpec[]): Promise { + const table = sqlIdent(tableName); + for (const spec of specs) { + const column = sqlIdent(spec.name); + try { + await api.query(`ALTER TABLE "${table}" ADD COLUMN IF NOT EXISTS "${column}" ${spec.ddl}`); + } catch { + // Older backends may reject IF NOT EXISTS or duplicate adds. + // Continue so repeated runs remain best-effort. + } + } +} + +async function ensureEmbeddingIndex(api: DeeplakeApi, tableName: string, columnName: string): Promise { + const table = sqlIdent(tableName); + const column = sqlIdent(columnName); + const indexName = sqlIdent(`idx_${tableName}_${columnName}`.replace(/[^a-zA-Z0-9_]/g, "_")); + await api.query( + `CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("${column}")` + ).catch(() => {}); +} + +async function fetchMemoryRows(api: DeeplakeApi, args: Args, offset: number): Promise[]> { + const table = sqlIdent(args.memoryTable); + const remainingRows = args.maxRows ? Math.max(0, (args.startOffset + args.maxRows) - offset) : args.scanBatchSize; + const limit = Math.min(args.scanBatchSize, remainingRows); + if (limit <= 0) return []; + return api.query( + `SELECT id, path, filename, summary, description, project, ` + + `"${sqlIdent(args.embeddingSourceHashColumn)}" AS embedding_source_hash, ` + + `"${sqlIdent(args.embeddingModelColumn)}" AS embedding_model ` + + `FROM "${table}" ORDER BY path ASC LIMIT ${limit} OFFSET ${offset}` + ); +} + +async function fetchSessionRows(api: DeeplakeApi, args: Args, offset: number): Promise[]> { + const table = sqlIdent(args.sessionsTable); + const remainingRows = args.maxRows ? Math.max(0, (args.startOffset + args.maxRows) - offset) : args.scanBatchSize; + const limit = Math.min(args.scanBatchSize, remainingRows); + if (limit <= 0) return []; + return api.query( + `SELECT id, path, event_type, speaker, text, turn_summary, source_date_time, turn_index, message ` + + `FROM "${table}" ` + + `ORDER BY path ASC, turn_index ASC, creation_date ASC LIMIT ${limit} OFFSET ${offset}` + ); +} + +async function updateEmbeddingRow( + api: DeeplakeApi, + tableName: string, + args: Args, + id: string, + vector: number[], + sourceHash: string, +): Promise { + const table = sqlIdent(tableName); + const updatedAt = new Date().toISOString(); + await api.query( + `UPDATE "${table}" SET ` + + `"${sqlIdent(args.embeddingColumn)}" = ${sqlFloat4Array(vector)}, ` + + `"${sqlIdent(args.embeddingModelColumn)}" = '${sqlStr(args.modelId)}', ` + + `"${sqlIdent(args.embeddingSourceHashColumn)}" = '${sqlStr(sourceHash)}', ` + + `"${sqlIdent(args.embeddingUpdatedAtColumn)}" = '${sqlStr(updatedAt)}' ` + + `WHERE id = '${sqlStr(id)}'` + ); +} + +async function backfillMemoryTable(api: DeeplakeApi, embedder: HarrierEmbedder, args: Args): Promise<{ updated: number; skipped: number }> { + await ensureSqlColumns(api, args.memoryTable, [ + { name: args.embeddingColumn, ddl: "float4[]" }, + { name: args.embeddingModelColumn, ddl: "TEXT NOT NULL DEFAULT ''" }, + { name: args.embeddingSourceHashColumn, ddl: "TEXT NOT NULL DEFAULT ''" }, + { name: args.embeddingUpdatedAtColumn, ddl: "TEXT NOT NULL DEFAULT ''" }, + ]); + + let updated = 0; + let skipped = 0; + + for (let offset = args.startOffset; ; offset += args.scanBatchSize) { + const rows = await fetchMemoryRows(api, args, offset); + if (rows.length === 0) break; + + const docs: string[] = []; + const ids: string[] = []; + const sourceHashes: string[] = []; + + for (const row of rows) { + const text = buildMemoryEmbeddingText({ + path: asString(row["path"]), + filename: asString(row["filename"]), + summary: asString(row["summary"]), + description: asString(row["description"]), + project: asString(row["project"]), + } satisfies MemoryEmbeddingRow, args.memoryMaxChars); + + if (!text) { + skipped++; + continue; + } + + docs.push(text); + ids.push(asString(row["id"])); + sourceHashes.push(stableEmbeddingSourceHash(text)); + } + + for (let batchStart = 0; batchStart < docs.length; batchStart += args.batchSize) { + const batchDocs = docs.slice(batchStart, batchStart + args.batchSize); + const batchIds = ids.slice(batchStart, batchStart + args.batchSize); + const batchHashes = sourceHashes.slice(batchStart, batchStart + args.batchSize); + const vectors = await embedder.embedDocuments(batchDocs); + + for (let index = 0; index < vectors.length; index++) { + await updateEmbeddingRow(api, args.memoryTable, args, batchIds[index], vectors[index], batchHashes[index]); + updated++; + } + + process.stderr.write(`[memory] updated ${updated}, skipped ${skipped}\n`); + if (args.limit && updated >= args.limit) { + await ensureEmbeddingIndex(api, args.memoryTable, args.embeddingColumn); + return { updated, skipped }; + } + } + } + + await ensureEmbeddingIndex(api, args.memoryTable, args.embeddingColumn); + return { updated, skipped }; +} + +async function backfillSessionsTable(api: DeeplakeApi, embedder: HarrierEmbedder, args: Args): Promise<{ updated: number; skipped: number }> { + await ensureSqlColumns(api, args.sessionsTable, [ + { name: args.embeddingColumn, ddl: "float4[]" }, + { name: args.embeddingModelColumn, ddl: "TEXT NOT NULL DEFAULT ''" }, + { name: args.embeddingSourceHashColumn, ddl: "TEXT NOT NULL DEFAULT ''" }, + { name: args.embeddingUpdatedAtColumn, ddl: "TEXT NOT NULL DEFAULT ''" }, + ]); + + let updated = 0; + let skipped = 0; + + for (let offset = args.startOffset; ; offset += args.scanBatchSize) { + const rows = await fetchSessionRows(api, args, offset); + if (rows.length === 0) break; + + const docs: string[] = []; + const ids: string[] = []; + const sourceHashes: string[] = []; + + for (const row of rows) { + const text = buildSessionEmbeddingText({ + path: asString(row["path"]), + event_type: asString(row["event_type"]), + speaker: asString(row["speaker"]), + text: asString(row["text"]), + turn_summary: asString(row["turn_summary"]), + source_date_time: asString(row["source_date_time"]), + turn_index: Number.isFinite(Number(row["turn_index"])) ? Number(row["turn_index"]) : undefined, + message: row["message"], + } satisfies SessionEmbeddingRow, args.sessionsMaxChars); + + if (!text) { + skipped++; + continue; + } + + const sourceHash = stableEmbeddingSourceHash(text); + const existingHash = asString(row["embedding_source_hash"]); + const existingModel = asString(row["embedding_model"]); + if (!args.force && existingHash === sourceHash && existingModel === embedder.modelId) { + skipped++; + continue; + } + + docs.push(text); + ids.push(asString(row["id"])); + sourceHashes.push(sourceHash); + } + + for (let batchStart = 0; batchStart < docs.length; batchStart += args.batchSize) { + const batchDocs = docs.slice(batchStart, batchStart + args.batchSize); + const batchIds = ids.slice(batchStart, batchStart + args.batchSize); + const batchHashes = sourceHashes.slice(batchStart, batchStart + args.batchSize); + const vectors = await embedder.embedDocuments(batchDocs); + + for (let index = 0; index < vectors.length; index++) { + await updateEmbeddingRow(api, args.sessionsTable, args, batchIds[index], vectors[index], batchHashes[index]); + updated++; + } + + process.stderr.write(`[sessions] updated ${updated}, skipped ${skipped}\n`); + if (args.limit && updated >= args.limit) { + await ensureEmbeddingIndex(api, args.sessionsTable, args.embeddingColumn); + return { updated, skipped }; + } + } + } + + await ensureEmbeddingIndex(api, args.sessionsTable, args.embeddingColumn); + return { updated, skipped }; +} + +async function main(): Promise { + const args = parseArgs(); + const creds = loadCredentials(); + const config = loadConfig(); + if (!creds?.token || !config) { + throw new Error("Missing Deeplake credentials. Run `deeplake login` first."); + } + + const api = new DeeplakeApi( + config.token, + config.apiUrl, + config.orgId, + config.workspaceId, + config.tableName, + ); + const embedder = new HarrierEmbedder({ + modelId: args.modelId, + cacheDir: args.cacheDir, + localModelPath: args.localModelPath, + localFilesOnly: args.localFilesOnly, + device: args.device, + dtype: args.dtype, + batchSize: args.batchSize, + }); + + if (args.table === "memory" || args.table === "all") { + const result = await backfillMemoryTable(api, embedder, args); + process.stderr.write(`[memory] complete: updated=${result.updated} skipped=${result.skipped}\n`); + } + if (args.table === "sessions" || args.table === "all") { + const result = await backfillSessionsTable(api, embedder, args); + process.stderr.write(`[sessions] complete: updated=${result.updated} skipped=${result.skipped}\n`); + } +} + +main().catch((error: unknown) => { + const message = error instanceof Error ? error.message : String(error); + process.stderr.write(`[backfill-harrier-embeddings] ${message}\n`); + process.exit(1); +}); diff --git a/src/tools/backfill-locomo-facts.ts b/src/tools/backfill-locomo-facts.ts new file mode 100644 index 0000000..cd52611 --- /dev/null +++ b/src/tools/backfill-locomo-facts.ts @@ -0,0 +1,280 @@ +#!/usr/bin/env node + +import { execFile } from "node:child_process"; +import { appendFileSync } from "node:fs"; +import { basename } from "node:path"; +import { promisify } from "node:util"; +import { loadCredentials } from "../commands/auth.js"; +import { DeeplakeApi, DeeplakeQueryError, summarizeSql } from "../deeplake-api.js"; +import { + buildMemoryFactTranscript, + buildMemoryFactPrompt, + parseMemoryFactExtraction, + replaceSessionFacts, +} from "../hooks/memory-facts.js"; +import { findClaudeBin } from "../hooks/spawn-wiki-worker.js"; + +const execFileAsync = promisify(execFile); + +interface Args { + memoryTable: string; + sessionsTable: string; + factsTable: string; + entitiesTable: string; + linksTable: string; + pathContains?: string; + concurrency: number; + model: string; + clearFacts: boolean; + clearEntities: boolean; + errorLogPath?: string; +} + +interface SummaryRow { + path: string; + project?: string; +} + +function parseArgs(): Args { + const args = process.argv.slice(2); + const opts: Args = { + memoryTable: "memory", + sessionsTable: "sessions", + factsTable: "memory_facts", + entitiesTable: "memory_entities", + linksTable: "fact_entity_links", + pathContains: undefined, + concurrency: 4, + model: "haiku", + clearFacts: false, + clearEntities: false, + errorLogPath: undefined, + }; + for (let i = 0; i < args.length; i++) { + switch (args[i]) { + case "--memory-table": + opts.memoryTable = args[++i] ?? opts.memoryTable; + break; + case "--facts-table": + opts.factsTable = args[++i] ?? opts.factsTable; + break; + case "--sessions-table": + opts.sessionsTable = args[++i] ?? opts.sessionsTable; + break; + case "--entities-table": + opts.entitiesTable = args[++i] ?? opts.entitiesTable; + break; + case "--links-table": + opts.linksTable = args[++i] ?? opts.linksTable; + break; + case "--path-contains": + opts.pathContains = args[++i] ?? opts.pathContains; + break; + case "--concurrency": + opts.concurrency = Math.max(1, parseInt(args[++i] ?? "4", 10) || 4); + break; + case "--model": + opts.model = args[++i] ?? opts.model; + break; + case "--clear-facts": + opts.clearFacts = true; + break; + case "--clear-entities": + opts.clearEntities = true; + break; + case "--error-log": + opts.errorLogPath = args[++i] ?? opts.errorLogPath; + break; + } + } + return opts; +} + +function sessionIdFromSessionPath(path: string): string { + const base = basename(path).replace(/\.jsonl?$/, ""); + return base; +} + +function serializeError(error: unknown): Record { + const err = error instanceof Error ? error : new Error(String(error)); + const out: Record = { + name: err.name, + message: err.message, + stack: err.stack, + }; + const record = err as Error & Record; + if (typeof record["phase"] === "string") out["phase"] = record["phase"]; + if (typeof record["sessionId"] === "string") out["sessionId"] = record["sessionId"]; + if (typeof record["table"] === "string") out["table"] = record["table"]; + if (typeof record["sql"] === "string") out["sql"] = record["sql"]; + if (error instanceof DeeplakeQueryError) { + out["sqlSummary"] = error.sqlSummary; + out["status"] = error.status; + out["responseBody"] = error.responseBody; + } else if (typeof record["sql"] === "string") { + out["sqlSummary"] = summarizeSql(record["sql"] as string); + } + const cause = record["cause"]; + if (cause instanceof DeeplakeQueryError) { + out["cause"] = { + name: cause.name, + message: cause.message, + sqlSummary: cause.sqlSummary, + status: cause.status, + responseBody: cause.responseBody, + stack: cause.stack, + }; + } else if (cause instanceof Error) { + out["cause"] = { + name: cause.name, + message: cause.message, + stack: cause.stack, + }; + } + return out; +} + +function appendErrorLog(logPath: string | undefined, payload: Record): void { + if (!logPath) return; + appendFileSync(logPath, `${JSON.stringify(payload)}\n`, "utf-8"); +} + +async function generateFacts( + transcriptText: string, + sourcePath: string, + sessionId: string, + project: string, + claudeBin: string, + model: string, +) { + const prompt = buildMemoryFactPrompt({ + transcriptText, + sessionId, + sourcePath, + project, + }); + const { stdout } = await execFileAsync(claudeBin, [ + "-p", + prompt, + "--no-session-persistence", + "--model", + model, + "--permission-mode", + "bypassPermissions", + ], { + timeout: 120_000, + env: { + ...process.env, + DEEPLAKE_CAPTURE: "false", + HIVEMIND_CAPTURE: "false", + HIVEMIND_WIKI_WORKER: "1", + }, + }); + return parseMemoryFactExtraction(stdout); +} + +async function main(): Promise { + const opts = parseArgs(); + const creds = loadCredentials(); + if (!creds?.token || !creds.orgId) { + throw new Error("Missing Deeplake credentials. Run auth first."); + } + + const workspaceId = creds.workspaceId ?? "default"; + const apiUrl = process.env["HIVEMIND_API_URL"] ?? process.env["DEEPLAKE_API_URL"] ?? creds.apiUrl ?? "https://api.deeplake.ai"; + const api = new DeeplakeApi(creds.token, apiUrl, creds.orgId, workspaceId, opts.memoryTable); + await api.ensureFactsTable(opts.factsTable); + await api.ensureEntitiesTable(opts.entitiesTable); + await api.ensureFactEntityLinksTable(opts.linksTable); + + if (opts.clearFacts) { + await api.query(`DELETE FROM "${opts.factsTable}"`); + await api.query(`DELETE FROM "${opts.linksTable}"`); + } + if (opts.clearEntities) { + await api.query(`DELETE FROM "${opts.entitiesTable}"`); + } + + const sessionRows = await api.query( + `SELECT DISTINCT path, project FROM "${opts.sessionsTable}" WHERE path LIKE '/sessions/conv_%_session_%.json%' ORDER BY path ASC`, + ); + const summaries: SummaryRow[] = sessionRows.map((row) => ({ + path: String(row["path"] ?? ""), + project: row["project"] == null ? undefined : String(row["project"]), + })) + .filter((row) => row.path) + .filter((row) => !opts.pathContains || row.path.includes(opts.pathContains)); + + const claudeBin = findClaudeBin(); + let nextIndex = 0; + let completed = 0; + let failures = 0; + let totalFacts = 0; + + async function worker(): Promise { + while (true) { + const index = nextIndex++; + if (index >= summaries.length) return; + const row = summaries[index]; + const sessionId = sessionIdFromSessionPath(row.path); + const sourcePath = row.path; + try { + const transcriptRows = await api.query( + `SELECT creation_date, turn_index, event_type, speaker, text, turn_summary, source_date_time FROM "${opts.sessionsTable}" ` + + `WHERE path = '${row.path.replace(/'/g, "''")}' ORDER BY creation_date ASC, turn_index ASC`, + ); + const transcriptText = buildMemoryFactTranscript(transcriptRows.map((transcriptRow) => ({ + turnIndex: Number(transcriptRow["turn_index"] ?? 0), + eventType: typeof transcriptRow["event_type"] === "string" ? transcriptRow["event_type"] : "", + speaker: typeof transcriptRow["speaker"] === "string" ? transcriptRow["speaker"] : "", + text: typeof transcriptRow["text"] === "string" ? transcriptRow["text"] : "", + turnSummary: typeof transcriptRow["turn_summary"] === "string" ? transcriptRow["turn_summary"] : "", + sourceDateTime: typeof transcriptRow["source_date_time"] === "string" ? transcriptRow["source_date_time"] : "", + creationDate: typeof transcriptRow["creation_date"] === "string" ? transcriptRow["creation_date"] : "", + }))); + const extraction = await generateFacts( + transcriptText, + sourcePath, + sessionId, + row.project || "locomo", + claudeBin, + opts.model, + ); + const result = await replaceSessionFacts({ + query: (sql) => api.query(sql), + factsTable: opts.factsTable, + entitiesTable: opts.entitiesTable, + linksTable: opts.linksTable, + sessionId, + userName: "locomo", + project: row.project || "locomo", + agent: "claude_code", + sourcePath, + extraction, + }); + totalFacts += result.facts; + completed += 1; + process.stdout.write(`facts ${completed}/${summaries.length}: ${sessionId} facts=${result.facts} entities=${result.entities} links=${result.links}\n`); + } catch (error) { + failures += 1; + const payload = { + path: row.path, + sessionId, + sourcePath, + failureAt: new Date().toISOString(), + error: serializeError(error), + }; + appendErrorLog(opts.errorLogPath, payload); + process.stderr.write(`FAIL ${row.path}: ${error instanceof Error ? error.message : String(error)}\n`); + } + } + } + + await Promise.all(Array.from({ length: opts.concurrency }, () => worker())); + process.stdout.write(`Done. facts_sessions=${completed} failed=${failures} total_facts=${totalFacts}\n`); +} + +main().catch((error) => { + process.stderr.write(`${error instanceof Error ? error.stack ?? error.message : String(error)}\n`); + process.exit(1); +}); diff --git a/src/tools/backfill-locomo-graph.ts b/src/tools/backfill-locomo-graph.ts new file mode 100644 index 0000000..63d1eb3 --- /dev/null +++ b/src/tools/backfill-locomo-graph.ts @@ -0,0 +1,525 @@ +#!/usr/bin/env node + +import { execFile } from "node:child_process"; +import { randomUUID } from "node:crypto"; +import { appendFileSync, writeFileSync } from "node:fs"; +import { promisify } from "node:util"; +import { basename } from "node:path"; +import { loadCredentials } from "../commands/auth.js"; +import { DeeplakeApi, DeeplakeQueryError, summarizeSql } from "../deeplake-api.js"; +import { + buildGraphNodeId, + buildKnowledgeGraphPrompt, + type GraphExtraction, + parseGraphExtraction, +} from "../hooks/knowledge-graph.js"; +import { buildSummaryBlurb } from "../utils/summary-format.js"; +import { esc } from "../hooks/upload-summary.js"; +import { findClaudeBin } from "../hooks/spawn-wiki-worker.js"; + +const execFileAsync = promisify(execFile); + +interface Args { + memoryTable: string; + graphNodesTable: string; + graphEdgesTable: string; + concurrency: number; + model: string; + clearGraph: boolean; + errorLogPath?: string; +} + +interface SummaryRow { + path: string; + summary: string; + project?: string; +} + +interface AggregateNode { + nodeId: string; + canonicalName: string; + nodeType: string; + aliases: Set; + summaries: Set; + sourceSessionIds: Set; + sourcePaths: Set; + representativeSessionId: string; + representativeSourcePath: string; +} + +interface AggregateEdge { + edgeId: string; + sourceNodeId: string; + targetNodeId: string; + relation: string; + summaries: Set; + evidences: Set; + sourceSessionIds: Set; + sourcePaths: Set; + representativeSessionId: string; + representativeSourcePath: string; +} + +function parseArgs(): Args { + const args = process.argv.slice(2); + const opts: Args = { + memoryTable: "memory", + graphNodesTable: "graph_nodes", + graphEdgesTable: "graph_edges", + concurrency: 4, + model: "haiku", + clearGraph: false, + errorLogPath: undefined, + }; + for (let i = 0; i < args.length; i++) { + switch (args[i]) { + case "--memory-table": + opts.memoryTable = args[++i] ?? opts.memoryTable; + break; + case "--graph-nodes-table": + opts.graphNodesTable = args[++i] ?? opts.graphNodesTable; + break; + case "--graph-edges-table": + opts.graphEdgesTable = args[++i] ?? opts.graphEdgesTable; + break; + case "--concurrency": + opts.concurrency = Math.max(1, parseInt(args[++i] ?? "4", 10) || 4); + break; + case "--model": + opts.model = args[++i] ?? opts.model; + break; + case "--clear-graph": + opts.clearGraph = true; + break; + case "--error-log": + opts.errorLogPath = args[++i] ?? opts.errorLogPath; + break; + } + } + return opts; +} + +function extractSummarySourcePath(summary: string): string { + const match = summary.match(/^- \*\*Source\*\*: (.+)$/m); + return match?.[1]?.trim() || ""; +} + +function sessionIdFromSummaryPath(path: string): string { + const base = basename(path).replace(/\.md$/, ""); + return base.endsWith("_summary") ? base.slice(0, -"_summary".length) : base; +} + +async function generateGraph(summary: string, sourcePath: string, sessionId: string, project: string, claudeBin: string, model: string) { + const prompt = buildKnowledgeGraphPrompt({ + summaryText: summary, + sessionId, + sourcePath, + project, + }); + const { stdout } = await execFileAsync(claudeBin, [ + "-p", + prompt, + "--no-session-persistence", + "--model", + model, + "--permission-mode", + "bypassPermissions", + ], { + timeout: 120_000, + env: { + ...process.env, + DEEPLAKE_CAPTURE: "false", + HIVEMIND_CAPTURE: "false", + HIVEMIND_WIKI_WORKER: "1", + }, + }); + return parseGraphExtraction(stdout); +} + +function serializeError(error: unknown): Record { + const err = error instanceof Error ? error : new Error(String(error)); + const out: Record = { + name: err.name, + message: err.message, + stack: err.stack, + }; + const record = err as Error & Record; + if (typeof record["phase"] === "string") out["phase"] = record["phase"]; + if (typeof record["sessionId"] === "string") out["sessionId"] = record["sessionId"]; + if (typeof record["table"] === "string") out["table"] = record["table"]; + if (typeof record["sql"] === "string") out["sql"] = record["sql"]; + if (error instanceof DeeplakeQueryError) { + out["sqlSummary"] = error.sqlSummary; + out["status"] = error.status; + out["responseBody"] = error.responseBody; + } else if (typeof record["sql"] === "string") { + out["sqlSummary"] = summarizeSql(record["sql"] as string); + } + const cause = record["cause"]; + if (cause instanceof DeeplakeQueryError) { + out["cause"] = { + name: cause.name, + message: cause.message, + sqlSummary: cause.sqlSummary, + status: cause.status, + responseBody: cause.responseBody, + stack: cause.stack, + }; + } else if (cause instanceof Error) { + out["cause"] = { + name: cause.name, + message: cause.message, + stack: cause.stack, + }; + } else if (cause != null) { + out["cause"] = String(cause); + } + return out; +} + +function appendErrorLog(logPath: string | undefined, payload: Record): void { + if (!logPath) return; + appendFileSync(logPath, `${JSON.stringify(payload)}\n`, "utf-8"); +} + +const NODE_TYPE_PRIORITY = [ + "person", + "organization", + "place", + "event", + "project", + "artifact", + "tool", + "file", + "goal", + "status", + "preference", + "concept", + "other", +]; + +function nodeTypeRank(value: string): number { + const idx = NODE_TYPE_PRIORITY.indexOf(value); + return idx === -1 ? NODE_TYPE_PRIORITY.length : idx; +} + +function preferNodeType(a: string, b: string): string { + return nodeTypeRank(a) <= nodeTypeRank(b) ? a : b; +} + +function pushLimited(set: Set, value: string, max = 8): void { + const trimmed = value.trim(); + if (!trimmed) return; + if (set.has(trimmed)) return; + if (set.size >= max) return; + set.add(trimmed); +} + +function mergeSummarySet(set: Set): string { + return [...set].join(" | "); +} + +function chooseRepresentative(set: Set, fallback: string): string { + return [...set].at(-1) || fallback; +} + +function resolveNodeId(name: string, aliases: string[], aliasMap: Map): string { + const candidates = [name, ...aliases] + .map((value) => value.trim()) + .filter(Boolean) + .map((value) => buildGraphNodeId(value)); + for (const candidate of candidates) { + const existing = aliasMap.get(candidate); + if (existing) return existing; + } + return buildGraphNodeId(name); +} + +function mergeGraphIntoAggregate(args: { + graph: GraphExtraction; + sessionId: string; + sourcePath: string; + nodes: Map; + edges: Map; + aliasMap: Map; +}): void { + const localNodeIds = new Map(); + const ensureNode = (rawName: string, type = "other", summary = "", aliases: string[] = []): string => { + const name = rawName.trim(); + if (!name) return buildGraphNodeId("unknown"); + const nodeId = resolveNodeId(name, aliases, args.aliasMap); + const existing = args.nodes.get(nodeId); + if (existing) { + existing.nodeType = preferNodeType(existing.nodeType, type || "other"); + pushLimited(existing.summaries, summary); + existing.sourceSessionIds.add(args.sessionId); + existing.sourcePaths.add(args.sourcePath); + existing.representativeSessionId = args.sessionId; + existing.representativeSourcePath = args.sourcePath; + for (const alias of [name, ...aliases]) { + const trimmed = alias.trim(); + if (!trimmed) continue; + existing.aliases.add(trimmed); + args.aliasMap.set(buildGraphNodeId(trimmed), nodeId); + } + } else { + const node: AggregateNode = { + nodeId, + canonicalName: name, + nodeType: type || "other", + aliases: new Set(), + summaries: new Set(), + sourceSessionIds: new Set([args.sessionId]), + sourcePaths: new Set([args.sourcePath]), + representativeSessionId: args.sessionId, + representativeSourcePath: args.sourcePath, + }; + pushLimited(node.summaries, summary); + for (const alias of [name, ...aliases]) { + const trimmed = alias.trim(); + if (!trimmed) continue; + node.aliases.add(trimmed); + args.aliasMap.set(buildGraphNodeId(trimmed), nodeId); + } + args.nodes.set(nodeId, node); + } + localNodeIds.set(name, nodeId); + return nodeId; + }; + + for (const node of args.graph.nodes) { + ensureNode(node.name, node.type || "other", node.summary || "", node.aliases || []); + } + for (const edge of args.graph.edges) { + const sourceNodeId = localNodeIds.get(edge.source.trim()) || ensureNode(edge.source); + const targetNodeId = localNodeIds.get(edge.target.trim()) || ensureNode(edge.target); + const edgeId = `${sourceNodeId}:${edge.relation}:${targetNodeId}`; + const existing = args.edges.get(edgeId); + if (existing) { + pushLimited(existing.summaries, edge.summary || `${edge.source} ${edge.relation} ${edge.target}`); + pushLimited(existing.evidences, edge.evidence || ""); + existing.sourceSessionIds.add(args.sessionId); + existing.sourcePaths.add(args.sourcePath); + existing.representativeSessionId = args.sessionId; + existing.representativeSourcePath = args.sourcePath; + } else { + const aggregateEdge: AggregateEdge = { + edgeId, + sourceNodeId, + targetNodeId, + relation: edge.relation, + summaries: new Set(), + evidences: new Set(), + sourceSessionIds: new Set([args.sessionId]), + sourcePaths: new Set([args.sourcePath]), + representativeSessionId: args.sessionId, + representativeSourcePath: args.sourcePath, + }; + pushLimited(aggregateEdge.summaries, edge.summary || `${edge.source} ${edge.relation} ${edge.target}`); + pushLimited(aggregateEdge.evidences, edge.evidence || ""); + args.edges.set(edgeId, aggregateEdge); + } + } +} + +async function insertAggregatedGraph(args: { + api: DeeplakeApi; + nodesTable: string; + edgesTable: string; + project: string; + agent: string; + nodes: Map; + edges: Map; +}): Promise { + const ts = new Date().toISOString(); + const nodePath = "/graphs/nodes/locomo/global.jsonl"; + const edgePath = "/graphs/edges/locomo/global.jsonl"; + const nodeFilename = "global.jsonl"; + const edgeFilename = "global.jsonl"; + + await args.api.query(`DELETE FROM "${args.nodesTable}"`); + await args.api.query(`DELETE FROM "${args.edgesTable}"`); + + const nodeRows = [...args.nodes.values()].map((node) => { + const aliases = [...node.aliases].filter((alias) => alias !== node.canonicalName); + const sourceSessionIds = [...node.sourceSessionIds]; + const sourcePaths = [...node.sourcePaths]; + const summary = mergeSummarySet(node.summaries) || buildSummaryBlurb(`# Graph Node\n\n${node.canonicalName}`); + const searchText = [ + node.canonicalName, + node.nodeType, + ...aliases, + ...node.summaries, + ...sourceSessionIds, + ...sourcePaths, + ].join(" | "); + return ( + `('${randomUUID()}', '${esc(nodePath)}', '${esc(nodeFilename)}', '${esc(node.nodeId)}', ` + + `'${esc(node.canonicalName)}', '${esc(node.nodeType)}', E'${esc(summary)}', E'${esc(searchText)}', ` + + `'${esc(aliases.join(", "))}', '${esc(chooseRepresentative(node.sourceSessionIds, node.representativeSessionId))}', ` + + `'${esc(sourceSessionIds.join(" || "))}', '${esc(chooseRepresentative(node.sourcePaths, node.representativeSourcePath))}', ` + + `'${esc(sourcePaths.join(" || "))}', 'locomo', 'application/json', ` + + `${Buffer.byteLength(searchText, "utf-8")}, '${esc(args.project)}', E'${esc(buildSummaryBlurb(summary))}', ` + + `'${esc(args.agent)}', '${ts}', '${ts}')` + ); + }); + + for (let i = 0; i < nodeRows.length; i += 100) { + const chunk = nodeRows.slice(i, i + 100); + if (chunk.length === 0) continue; + await args.api.query( + `INSERT INTO "${args.nodesTable}" ` + + `(id, path, filename, node_id, canonical_name, node_type, summary, search_text, aliases, source_session_id, source_session_ids, source_path, source_paths, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ${chunk.join(", ")}` + ); + } + + const edgeRows = [...args.edges.values()].map((edge) => { + const sourceSessionIds = [...edge.sourceSessionIds]; + const sourcePaths = [...edge.sourcePaths]; + const summary = mergeSummarySet(edge.summaries) || edge.edgeId; + const evidence = mergeSummarySet(edge.evidences); + const searchText = [ + edge.sourceNodeId, + edge.relation, + edge.targetNodeId, + ...edge.summaries, + ...edge.evidences, + ...sourceSessionIds, + ...sourcePaths, + ].join(" | "); + return ( + `('${randomUUID()}', '${esc(edgePath)}', '${esc(edgeFilename)}', '${esc(edge.edgeId)}', ` + + `'${esc(edge.sourceNodeId)}', '${esc(edge.targetNodeId)}', '${esc(edge.relation)}', E'${esc(summary)}', ` + + `E'${esc(evidence)}', E'${esc(searchText)}', '${esc(chooseRepresentative(edge.sourceSessionIds, edge.representativeSessionId))}', ` + + `'${esc(sourceSessionIds.join(" || "))}', '${esc(chooseRepresentative(edge.sourcePaths, edge.representativeSourcePath))}', ` + + `'${esc(sourcePaths.join(" || "))}', 'locomo', 'application/json', ` + + `${Buffer.byteLength(searchText, "utf-8")}, '${esc(args.project)}', E'${esc(buildSummaryBlurb(summary))}', ` + + `'${esc(args.agent)}', '${ts}', '${ts}')` + ); + }); + + for (let i = 0; i < edgeRows.length; i += 100) { + const chunk = edgeRows.slice(i, i + 100); + if (chunk.length === 0) continue; + await args.api.query( + `INSERT INTO "${args.edgesTable}" ` + + `(id, path, filename, edge_id, source_node_id, target_node_id, relation, summary, evidence, search_text, source_session_id, source_session_ids, source_path, source_paths, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ${chunk.join(", ")}` + ); + } +} + +async function withConcurrency(items: T[], concurrency: number, fn: (item: T, idx: number) => Promise) { + let next = 0; + let running = 0; + await new Promise((resolve) => { + function launch() { + while (running < concurrency && next < items.length) { + const idx = next++; + running++; + fn(items[idx], idx).finally(() => { + running--; + if (next >= items.length && running === 0) resolve(); + else launch(); + }); + } + } + launch(); + }); +} + +async function main(): Promise { + const opts = parseArgs(); + const errorLogPath = opts.errorLogPath || `/tmp/locomo-graph-backfill-errors-${Date.now()}.jsonl`; + writeFileSync(errorLogPath, "", "utf-8"); + console.log(`error_log=${errorLogPath}`); + const creds = loadCredentials(); + if (!creds?.token) throw new Error("No Deeplake credentials found. Run hivemind login first."); + + const api = new DeeplakeApi( + creds.token, + creds.apiUrl ?? "https://api.deeplake.ai", + creds.orgId, + creds.workspaceId ?? "default", + opts.memoryTable, + ); + + await api.ensureGraphNodesTable(opts.graphNodesTable); + await api.ensureGraphEdgesTable(opts.graphEdgesTable); + if (opts.clearGraph) { + await api.query(`DELETE FROM "${opts.graphNodesTable}"`); + await api.query(`DELETE FROM "${opts.graphEdgesTable}"`); + } + + const summaryRows = (await api.query( + `SELECT path, summary, project FROM "${opts.memoryTable}" WHERE path LIKE '/summaries/locomo/%' ORDER BY path` + )) + .filter((row) => typeof row["path"] === "string" && typeof row["summary"] === "string") + .map((row) => ({ + path: row["path"] as string, + summary: row["summary"] as string, + project: typeof row["project"] === "string" ? row["project"] as string : undefined, + })) as SummaryRow[]; + const claudeBin = findClaudeBin(); + const aggregateNodes = new Map(); + const aggregateEdges = new Map(); + const aliasMap = new Map(); + + let completed = 0; + let failed = 0; + await withConcurrency(summaryRows, opts.concurrency, async (row) => { + const sessionId = sessionIdFromSummaryPath(row.path); + const sourcePath = extractSummarySourcePath(row.summary) || `/sessions/${sessionId}.jsonl`; + try { + const graph = await generateGraph(row.summary, sourcePath, sessionId, row.project || "locomo", claudeBin, opts.model); + mergeGraphIntoAggregate({ + graph, + sessionId, + sourcePath, + nodes: aggregateNodes, + edges: aggregateEdges, + aliasMap, + }); + completed++; + if (completed % 10 === 0 || completed === summaryRows.length) { + console.log(`graph ${completed}/${summaryRows.length}`); + } + } catch (error) { + failed++; + const serialized = serializeError(error); + appendErrorLog(errorLogPath, { + ts: new Date().toISOString(), + summaryPath: row.path, + sessionId, + sourcePath, + project: row.project || "locomo", + ...serialized, + }); + const phase = typeof serialized["phase"] === "string" ? ` phase=${serialized["phase"]}` : ""; + const status = typeof serialized["status"] === "number" ? ` status=${serialized["status"]}` : ""; + const sqlSummary = typeof serialized["sqlSummary"] === "string" ? ` sql=${serialized["sqlSummary"]}` : ""; + console.error(`FAIL ${row.path}:${phase}${status}${sqlSummary} ${serialized["message"]}`); + } + }); + + if (failed === 0 || completed > 0) { + await insertAggregatedGraph({ + api, + nodesTable: opts.graphNodesTable, + edgesTable: opts.graphEdgesTable, + project: "locomo", + agent: "claude_code", + nodes: aggregateNodes, + edges: aggregateEdges, + }); + console.log(`graph_rows nodes=${aggregateNodes.size} edges=${aggregateEdges.size}`); + } + + console.log(`Done. graph_summaries=${completed} failed=${failed}`); +} + +main().catch((error) => { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +}); diff --git a/src/tools/backfill-locomo-memory.ts b/src/tools/backfill-locomo-memory.ts new file mode 100644 index 0000000..50eb9c1 --- /dev/null +++ b/src/tools/backfill-locomo-memory.ts @@ -0,0 +1,477 @@ +#!/usr/bin/env node + +import { execFile } from "node:child_process"; +import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join, basename } from "node:path"; +import { promisify } from "node:util"; +import { loadCredentials } from "../commands/auth.js"; +import { DeeplakeApi } from "../deeplake-api.js"; +import { uploadSummary } from "../hooks/upload-summary.js"; +import { WIKI_PROMPT_TEMPLATE, findClaudeBin } from "../hooks/spawn-wiki-worker.js"; + +const execFileAsync = promisify(execFile); + +interface SessionRow { + path: string; + filename: string; + creation_date?: string; + source_date_time?: string; + turn_index?: number; + dia_id?: string; + speaker?: string; + text?: string; + turn_summary?: string; + event_type?: string; + message: unknown; +} + +interface SessionTask { + sessionId: string; + sourcePath: string; + summaryPath: string; + summaryFilename: string; + jsonlContent: string; + jsonlLines: number; +} + +interface Args { + sessionsTable: string; + memoryTable: string; + concurrency: number; + model: string; + clearMemory: boolean; +} + +const VISIBILITY_RETRIES = 5; +const VISIBILITY_DELAY_MS = 1500; +const REPAIR_ROUNDS = 2; + +function parseArgs(): Args { + const args = process.argv.slice(2); + const opts: Args = { + sessionsTable: "sessions", + memoryTable: "memory", + concurrency: 5, + model: "haiku", + clearMemory: true, + }; + + for (let i = 0; i < args.length; i++) { + switch (args[i]) { + case "--sessions-table": + opts.sessionsTable = args[++i]; + break; + case "--memory-table": + opts.memoryTable = args[++i]; + break; + case "--concurrency": + opts.concurrency = Math.max(1, parseInt(args[++i], 10) || 5); + break; + case "--model": + opts.model = args[++i] || "haiku"; + break; + case "--no-clear-memory": + opts.clearMemory = false; + break; + } + } + + return opts; +} + +function parseSessionPayload(raw: unknown): Record { + if (typeof raw === "string") { + try { + return JSON.parse(raw) as Record; + } catch { + return { raw }; + } + } + if (raw && typeof raw === "object") return raw as Record; + return { raw }; +} + +function buildSessionTaskFromBlob(row: SessionRow): SessionTask { + const sessionId = basename(row.path).replace(/\.[^.]+$/, ""); + const summaryFilename = `${sessionId}_summary.md`; + const summaryPath = `/summaries/locomo/${summaryFilename}`; + const payload = parseSessionPayload(row.message); + const turns = Array.isArray(payload["turns"]) ? payload["turns"] as Array> : []; + + if (turns.length === 0) { + return { + sessionId, + sourcePath: row.path, + summaryPath, + summaryFilename, + jsonlContent: `${typeof row.message === "string" ? row.message : JSON.stringify(row.message)}\n`, + jsonlLines: 1, + }; + } + + const speakers = payload["speakers"] && typeof payload["speakers"] === "object" + ? payload["speakers"] as Record + : {}; + const meta = { + type: "session_meta", + session_id: sessionId, + source_path: row.path, + conversation_id: payload["conversation_id"] ?? null, + session_number: payload["session_number"] ?? null, + date_time: payload["date_time"] ?? payload["date"] ?? null, + speaker_a: speakers["speaker_a"] ?? null, + speaker_b: speakers["speaker_b"] ?? null, + }; + + const lines = [JSON.stringify(meta)]; + for (const turn of turns) { + lines.push(JSON.stringify({ + type: "dialogue_turn", + session_id: sessionId, + date_time: payload["date_time"] ?? payload["date"] ?? null, + speaker: turn["speaker"] ?? null, + dia_id: turn["dia_id"] ?? null, + text: turn["text"] ?? null, + })); + } + + return { + sessionId, + sourcePath: row.path, + summaryPath, + summaryFilename, + jsonlContent: `${lines.join("\n")}\n`, + jsonlLines: lines.length, + }; +} + +function buildSessionTaskFromRows(rows: SessionRow[]): SessionTask { + if (rows.length === 0) throw new Error("buildSessionTaskFromRows requires at least one row"); + const sorted = [...rows].sort((a, b) => { + const turnA = typeof a.turn_index === "number" ? a.turn_index : Number.MAX_SAFE_INTEGER; + const turnB = typeof b.turn_index === "number" ? b.turn_index : Number.MAX_SAFE_INTEGER; + if (turnA !== turnB) return turnA - turnB; + return (a.creation_date ?? "").localeCompare(b.creation_date ?? ""); + }); + const first = sorted[0]; + const sessionId = basename(first.path).replace(/\.[^.]+$/, ""); + const summaryFilename = `${sessionId}_summary.md`; + const summaryPath = `/summaries/locomo/${summaryFilename}`; + const sessionDateTime = first.source_date_time ?? first.creation_date ?? null; + + const lines = [JSON.stringify({ + type: "session_meta", + session_id: sessionId, + source_path: first.path, + date_time: sessionDateTime, + })]; + + for (const row of sorted) { + if ((row.event_type && row.event_type !== "dialogue_turn") && !row.text) continue; + lines.push(JSON.stringify({ + type: row.event_type || "dialogue_turn", + session_id: sessionId, + date_time: row.source_date_time ?? row.creation_date ?? null, + turn_index: row.turn_index ?? null, + dia_id: row.dia_id ?? null, + speaker: row.speaker ?? null, + text: row.text ?? null, + summary: row.turn_summary ?? null, + })); + } + + return { + sessionId, + sourcePath: first.path, + summaryPath, + summaryFilename, + jsonlContent: `${lines.join("\n")}\n`, + jsonlLines: lines.length, + }; +} + +function buildPrompt(task: SessionTask): string { + return WIKI_PROMPT_TEMPLATE + .replace(/__JSONL__/g, "__TMP_JSONL__") + .replace(/__SUMMARY__/g, "__TMP_SUMMARY__") + .replace(/__SESSION_ID__/g, task.sessionId) + .replace(/__PROJECT__/g, "locomo") + .replace(/__PREV_OFFSET__/g, "0") + .replace(/__JSONL_LINES__/g, String(task.jsonlLines)) + .replace(/__JSONL_SERVER_PATH__/g, task.sourcePath); +} + +async function generateSummary(task: SessionTask, claudeBin: string, model: string): Promise { + const tmpRoot = await mkdtemp(join(tmpdir(), `locomo-summary-${task.sessionId}-`)); + const tmpJsonl = join(tmpRoot, "session.jsonl"); + const tmpSummary = join(tmpRoot, "summary.md"); + + try { + await writeFile(tmpJsonl, task.jsonlContent, "utf-8"); + const prompt = buildPrompt(task) + .replace(/__TMP_JSONL__/g, tmpJsonl) + .replace(/__TMP_SUMMARY__/g, tmpSummary); + + await execFileAsync(claudeBin, [ + "-p", + prompt, + "--no-session-persistence", + "--model", + model, + "--permission-mode", + "bypassPermissions", + ], { + timeout: 120_000, + env: { + ...process.env, + DEEPLAKE_CAPTURE: "false", + HIVEMIND_CAPTURE: "false", + HIVEMIND_WIKI_WORKER: "1", + DEEPLAKE_WIKI_WORKER: "1", + }, + }); + + return await readFile(tmpSummary, "utf-8"); + } finally { + await rm(tmpRoot, { recursive: true, force: true }); + } +} + +async function generateSummaryWithRetry(task: SessionTask, claudeBin: string, model: string, retries = 2): Promise { + let lastError: unknown; + for (let attempt = 0; attempt <= retries; attempt++) { + try { + return await generateSummary(task, claudeBin, model); + } catch (error) { + lastError = error; + if (attempt === retries) break; + await new Promise((resolve) => setTimeout(resolve, 2000 * (attempt + 1))); + } + } + throw lastError instanceof Error ? lastError : new Error(String(lastError)); +} + +async function sleep(ms: number): Promise { + await new Promise((resolve) => setTimeout(resolve, ms)); +} + +async function listExistingSummaryPaths(api: DeeplakeApi, memoryTable: string): Promise> { + const existingRows = await api.query( + `SELECT path FROM "${memoryTable}" WHERE path LIKE '/summaries/locomo/%'` + ); + return new Set( + existingRows + .map((row) => row["path"]) + .filter((value): value is string => typeof value === "string" && value.length > 0), + ); +} + +async function waitForVisibleSummaryPath( + api: DeeplakeApi, + memoryTable: string, + summaryPath: string, + retries = VISIBILITY_RETRIES, +): Promise { + for (let attempt = 0; attempt <= retries; attempt++) { + const rows = await api.query( + `SELECT path FROM "${memoryTable}" WHERE path = '${summaryPath.replace(/\\/g, "\\\\").replace(/'/g, "''")}' LIMIT 1` + ); + if (rows.length > 0) return true; + if (attempt < retries) await sleep(VISIBILITY_DELAY_MS * (attempt + 1)); + } + return false; +} + +async function uploadSummaryWithVerification( + api: DeeplakeApi, + memoryTable: string, + task: SessionTask, + text: string, + retries = 2, +): Promise { + let lastError: unknown; + for (let attempt = 0; attempt <= retries; attempt++) { + try { + await uploadSummary(api.query.bind(api), { + tableName: memoryTable, + vpath: task.summaryPath, + fname: task.summaryFilename, + userName: "locomo", + project: "locomo", + agent: "claude_code", + sessionId: task.sessionId, + text, + }); + const visible = await waitForVisibleSummaryPath(api, memoryTable, task.summaryPath); + if (visible) return; + lastError = new Error("summary row not visible after upload"); + } catch (error) { + lastError = error; + } + if (attempt < retries) await sleep(2000 * (attempt + 1)); + } + throw lastError instanceof Error ? lastError : new Error(String(lastError)); +} + +async function withConcurrency(items: T[], concurrency: number, fn: (item: T, idx: number) => Promise) { + let running = 0; + let next = 0; + await new Promise((resolve) => { + function launch() { + while (running < concurrency && next < items.length) { + const idx = next++; + running++; + fn(items[idx], idx) + .finally(() => { + running--; + if (next >= items.length && running === 0) resolve(); + else launch(); + }); + } + } + launch(); + }); +} + +async function main(): Promise { + const opts = parseArgs(); + const creds = loadCredentials(); + if (!creds?.token) { + throw new Error("No Deeplake credentials found. Run hivemind login first."); + } + + const api = new DeeplakeApi( + creds.token, + creds.apiUrl ?? "https://api.deeplake.ai", + creds.orgId, + creds.workspaceId ?? "default", + opts.memoryTable, + ); + + const claudeBin = findClaudeBin(); + const sessionRowsRaw = await api.query( + `SELECT path, filename, creation_date, source_date_time, turn_index, dia_id, speaker, text, turn_summary, event_type, message ` + + `FROM "${opts.sessionsTable}" WHERE path LIKE '/sessions/conv_%_session_%.json%' ` + + `ORDER BY path, creation_date, turn_index` + ); + const sessionRows = sessionRowsRaw + .filter((row) => + typeof row["path"] === "string" && + typeof row["filename"] === "string" && + "message" in row, + ) + .map((row) => ({ + path: row["path"] as string, + filename: row["filename"] as string, + creation_date: typeof row["creation_date"] === "string" ? row["creation_date"] as string : undefined, + source_date_time: typeof row["source_date_time"] === "string" ? row["source_date_time"] as string : undefined, + turn_index: typeof row["turn_index"] === "number" ? row["turn_index"] as number : undefined, + dia_id: typeof row["dia_id"] === "string" ? row["dia_id"] as string : undefined, + speaker: typeof row["speaker"] === "string" ? row["speaker"] as string : undefined, + text: typeof row["text"] === "string" ? row["text"] as string : undefined, + turn_summary: typeof row["turn_summary"] === "string" ? row["turn_summary"] as string : undefined, + event_type: typeof row["event_type"] === "string" ? row["event_type"] as string : undefined, + message: row["message"], + })) as SessionRow[]; + + const grouped = new Map(); + for (const row of sessionRows) { + if (!row.path.includes("/conv_")) continue; + const list = grouped.get(row.path) ?? []; + list.push(row); + grouped.set(row.path, list); + } + + const allTasks = [...grouped.values()].map((rows) => { + const blobRow = rows.find((row) => { + const payload = parseSessionPayload(row.message); + return Array.isArray(payload["turns"]) || Array.isArray(payload["dialogue"]); + }); + return blobRow ? buildSessionTaskFromBlob(blobRow) : buildSessionTaskFromRows(rows); + }); + let tasks = [...allTasks]; + const tasksByPath = new Map(allTasks.map((task) => [task.summaryPath, task])); + const expectedPaths = new Set(allTasks.map((task) => task.summaryPath)); + + console.log(`Workspace: ${creds.workspaceId ?? "default"} | Org: ${creds.orgName ?? creds.orgId}`); + console.log(`Sessions table: ${opts.sessionsTable} | Memory table: ${opts.memoryTable}`); + console.log(`Model: ${opts.model} | Concurrency: ${opts.concurrency}`); + console.log(`Found ${tasks.length} LOCOMO sessions`); + + if (opts.clearMemory) { + console.log(`Clearing "${opts.memoryTable}" before backfill...`); + await api.query(`DELETE FROM "${opts.memoryTable}"`); + } else { + const existingPaths = await listExistingSummaryPaths(api, opts.memoryTable); + const before = tasks.length; + tasks = tasks.filter((task) => !existingPaths.has(task.summaryPath)); + console.log(`Existing LOCOMO summaries: ${existingPaths.size}. Pending tasks: ${tasks.length}/${before}`); + } + + let completed = 0; + let failed = 0; + const failures: string[] = []; + + await withConcurrency(tasks, opts.concurrency, async (task) => { + try { + const text = await generateSummaryWithRetry(task, claudeBin, opts.model); + if (!text.trim()) throw new Error("empty summary"); + + await uploadSummaryWithVerification(api, opts.memoryTable, task, text); + + completed++; + if (completed % 10 === 0 || completed === tasks.length) { + console.log(` ${completed}/${tasks.length}`); + } + } catch (error) { + failed++; + failures.push(`${task.sessionId}: ${error instanceof Error ? error.message : String(error)}`); + console.error(`FAIL ${task.sessionId}: ${error instanceof Error ? error.message : String(error)}`); + } + }); + + let existingPaths = await listExistingSummaryPaths(api, opts.memoryTable); + let missingPaths = [...expectedPaths].filter((path) => !existingPaths.has(path)); + + for (let round = 1; round <= REPAIR_ROUNDS && missingPaths.length > 0; round++) { + console.log(`Repair round ${round}: ${missingPaths.length} missing summaries`); + await withConcurrency( + missingPaths + .map((path) => tasksByPath.get(path)) + .filter((task): task is SessionTask => Boolean(task)), + 1, + async (task) => { + try { + const text = await generateSummaryWithRetry(task, claudeBin, opts.model); + if (!text.trim()) throw new Error("empty summary"); + await uploadSummaryWithVerification(api, opts.memoryTable, task, text, 3); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + failures.push(`${task.sessionId}: repair round ${round}: ${message}`); + console.error(`FAIL ${task.sessionId} (repair ${round}): ${message}`); + } + }, + ); + existingPaths = await listExistingSummaryPaths(api, opts.memoryTable); + missingPaths = [...expectedPaths].filter((path) => !existingPaths.has(path)); + } + + const finalCount = existingPaths.size; + console.log(`Done. summaries=${completed} failed=${failed} memory_rows=${finalCount}`); + if (missingPaths.length > 0) { + console.error(`Still missing ${missingPaths.length} summaries:`); + for (const path of missingPaths.slice(0, 20)) console.error(` ${path}`); + process.exitCode = 1; + } + if (failures.length > 0) { + console.error(`Failures (${failures.length}):`); + for (const failure of failures.slice(0, 20)) console.error(` ${failure}`); + process.exitCode = 1; + } +} + +main().catch((error) => { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +}); diff --git a/src/tools/migrate-locomo-sessions.ts b/src/tools/migrate-locomo-sessions.ts new file mode 100644 index 0000000..8406a13 --- /dev/null +++ b/src/tools/migrate-locomo-sessions.ts @@ -0,0 +1,238 @@ +#!/usr/bin/env node + +import { basename } from "node:path"; +import { loadCredentials } from "../commands/auth.js"; +import { DeeplakeApi } from "../deeplake-api.js"; +import { buildSessionInsertSql, type QueuedSessionRow } from "../hooks/session-queue.js"; + +interface Args { + sessionsTable: string; + backupTable: string; + batchSize: number; + dryRun: boolean; +} + +interface SessionRowRecord extends Record { + id: string; + path: string; + filename: string; + message: unknown; + author: string; + size_bytes: number; + project: string; + description: string; + agent: string; + creation_date: string; + last_update_date: string; +} + +const LOCOMO_PATH_FILTER = `/sessions/conv_%_session_%.json%`; + +function parseArgs(): Args { + const args = process.argv.slice(2); + const opts: Args = { + sessionsTable: "sessions", + backupTable: "sessions_locomo_blob_backup", + batchSize: 100, + dryRun: false, + }; + + for (let i = 0; i < args.length; i++) { + switch (args[i]) { + case "--sessions-table": + opts.sessionsTable = args[++i] ?? opts.sessionsTable; + break; + case "--backup-table": + opts.backupTable = args[++i] ?? opts.backupTable; + break; + case "--batch-size": + opts.batchSize = Math.max(1, Number(args[++i]) || opts.batchSize); + break; + case "--dry-run": + opts.dryRun = true; + break; + } + } + return opts; +} + +function parseJson(value: unknown): Record | null { + if (typeof value === "string") { + try { return JSON.parse(value) as Record; } catch { return null; } + } + return value && typeof value === "object" ? value as Record : null; +} + +function extractString(value: unknown): string { + return typeof value === "string" ? value : value == null ? "" : String(value); +} + +function extractNumber(value: unknown): number { + if (typeof value === "number" && Number.isFinite(value)) return value; + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) return parsed; + } + return 0; +} + +function isTranscriptBlob(row: SessionRowRecord): boolean { + const parsed = parseJson(row.message); + return !!parsed && (Array.isArray(parsed["turns"]) || Array.isArray(parsed["dialogue"])); +} + +function normalizeMessageJson(value: unknown): string { + try { + return JSON.stringify(typeof value === "string" ? JSON.parse(value) : value); + } catch { + return JSON.stringify({ type: "raw_message", content: String(value ?? "") }); + } +} + +function toQueuedRowFromExisting(row: SessionRowRecord): QueuedSessionRow { + const message = normalizeMessageJson(row.message); + return { + id: extractString(row.id), + path: extractString(row.path), + filename: extractString(row.filename), + message, + sessionId: extractString(parseJson(row.message)?.["session_id"]) || basename(extractString(row.path)).replace(/\.[^.]+$/, ""), + eventType: extractString(parseJson(row.message)?.["type"]), + turnIndex: extractNumber(parseJson(row.message)?.["turn_index"]), + diaId: extractString(parseJson(row.message)?.["dia_id"]), + speaker: extractString(parseJson(row.message)?.["speaker"]), + text: extractString(parseJson(row.message)?.["text"]) || extractString(parseJson(row.message)?.["content"]), + turnSummary: extractString(parseJson(row.message)?.["summary"]) || extractString(parseJson(row.message)?.["message_summary"]), + sourceDateTime: extractString(parseJson(row.message)?.["date_time"]) || extractString(parseJson(row.message)?.["source_date_time"]), + author: extractString(row.author), + sizeBytes: extractNumber(row.size_bytes), + project: extractString(row.project), + description: extractString(row.description), + agent: extractString(row.agent), + creationDate: extractString(row.creation_date), + lastUpdateDate: extractString(row.last_update_date), + }; +} + +function explodeTranscriptRow(row: SessionRowRecord): QueuedSessionRow[] { + const parsed = parseJson(row.message); + if (!parsed) return []; + const turns = Array.isArray(parsed["turns"]) + ? parsed["turns"] as Array> + : Array.isArray(parsed["dialogue"]) + ? parsed["dialogue"] as Array> + : []; + const sessionId = basename(extractString(row.path)).replace(/\.[^.]+$/, ""); + const sourceDateTime = extractString(parsed["date_time"]) || extractString(parsed["date"]) || extractString(row.creation_date); + + return turns.map((turn, index) => { + const messageObject = { + type: "dialogue_turn", + session_id: sessionId, + source_path: extractString(row.path), + conversation_id: parsed["conversation_id"] ?? null, + session_number: parsed["session_number"] ?? null, + date_time: sourceDateTime || null, + turn_index: index + 1, + dia_id: turn["dia_id"] ?? null, + speaker: turn["speaker"] ?? turn["name"] ?? null, + text: turn["text"] ?? turn["content"] ?? null, + summary: turn["summary"] ?? turn["message_summary"] ?? null, + }; + const message = JSON.stringify(messageObject); + return { + id: crypto.randomUUID(), + path: extractString(row.path), + filename: extractString(row.filename), + message, + sessionId, + eventType: "dialogue_turn", + turnIndex: index + 1, + diaId: extractString(turn["dia_id"]), + speaker: extractString(turn["speaker"]) || extractString(turn["name"]), + text: extractString(turn["text"]) || extractString(turn["content"]), + turnSummary: extractString(turn["summary"]) || extractString(turn["message_summary"]), + sourceDateTime, + author: extractString(row.author) || "locomo", + sizeBytes: Buffer.byteLength(message, "utf-8"), + project: extractString(row.project) || "locomo", + description: "dialogue_turn", + agent: extractString(row.agent) || "claude_code", + creationDate: extractString(row.creation_date) || sourceDateTime, + lastUpdateDate: extractString(row.last_update_date) || extractString(row.creation_date) || sourceDateTime, + }; + }); +} + +async function insertRows(api: DeeplakeApi, table: string, rows: QueuedSessionRow[], batchSize: number): Promise { + for (let i = 0; i < rows.length; i += batchSize) { + await api.query(buildSessionInsertSql(table, rows.slice(i, i + batchSize))); + } +} + +async function main(): Promise { + const opts = parseArgs(); + const creds = loadCredentials(); + if (!creds?.token) { + throw new Error("No Deeplake credentials found. Run hivemind login first."); + } + + const api = new DeeplakeApi( + creds.token, + creds.apiUrl ?? "https://api.deeplake.ai", + creds.orgId, + creds.workspaceId ?? "default", + opts.sessionsTable, + ); + + await api.ensureSessionsTable(opts.sessionsTable); + await api.ensureSessionsTable(opts.backupTable); + + const backupRows = await api.query( + `SELECT id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date ` + + `FROM "${opts.backupTable}" WHERE path LIKE '${LOCOMO_PATH_FILTER}' ORDER BY path, creation_date` + ) as SessionRowRecord[]; + + let sourceRows = backupRows; + if (sourceRows.length === 0) { + sourceRows = await api.query( + `SELECT id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date ` + + `FROM "${opts.sessionsTable}" WHERE path LIKE '${LOCOMO_PATH_FILTER}' ORDER BY path, creation_date` + ) as SessionRowRecord[]; + + if (sourceRows.length === 0) { + console.log("No LOCOMO session rows found to migrate."); + return; + } + + if (!opts.dryRun) { + console.log(`Backing up ${sourceRows.length} original LOCOMO session rows to "${opts.backupTable}"...`); + await insertRows(api, opts.backupTable, sourceRows.map(toQueuedRowFromExisting), opts.batchSize); + } + } + + const transcriptBlobRows = sourceRows.filter(isTranscriptBlob); + const migratedRows = transcriptBlobRows.flatMap(explodeTranscriptRow); + + console.log(`Workspace: ${creds.workspaceId ?? "default"} | Sessions table: ${opts.sessionsTable}`); + console.log(`Original LOCOMO blob rows: ${transcriptBlobRows.length}`); + console.log(`Expanded turn rows: ${migratedRows.length}`); + + if (opts.dryRun) return; + + console.log(`Deleting existing LOCOMO rows from "${opts.sessionsTable}"...`); + await api.query(`DELETE FROM "${opts.sessionsTable}" WHERE path LIKE '${LOCOMO_PATH_FILTER}'`); + + console.log(`Inserting ${migratedRows.length} migrated turn rows into "${opts.sessionsTable}"...`); + await insertRows(api, opts.sessionsTable, migratedRows, opts.batchSize); + + const finalRows = await api.query( + `SELECT path, COUNT(*) AS row_count FROM "${opts.sessionsTable}" WHERE path LIKE '${LOCOMO_PATH_FILTER}' GROUP BY path ORDER BY path` + ); + console.log(`Done. migrated_paths=${finalRows.length}`); +} + +main().catch((error) => { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +}); diff --git a/src/tools/smoke-summary-bm25.ts b/src/tools/smoke-summary-bm25.ts new file mode 100644 index 0000000..0711ad1 --- /dev/null +++ b/src/tools/smoke-summary-bm25.ts @@ -0,0 +1,54 @@ +import { loadConfig } from "../config.js"; +import { DeeplakeApi } from "../deeplake-api.js"; +import { sqlLike, sqlStr } from "../utils/sql.js"; + +async function main(): Promise { + const config = loadConfig(); + if (!config) { + throw new Error("Missing Hivemind/Deeplake config"); + } + + const queryText = process.argv.slice(2).join(" ").trim() || "book novel literature"; + const api = new DeeplakeApi( + config.token, + config.apiUrl, + config.orgId, + config.workspaceId, + config.tableName, + ); + + const started = Date.now(); + await api.ensureSummaryBm25Index(); + const createMs = Date.now() - started; + + const bm25Started = Date.now(); + const bm25Rows = await api.query( + `SELECT path, (summary <#> '${sqlStr(queryText)}') AS score ` + + `FROM "${config.tableName}" WHERE path LIKE '/summaries/%' ` + + `ORDER BY score DESC LIMIT 10`, + ); + const bm25Ms = Date.now() - bm25Started; + + const ilikeStarted = Date.now(); + const ilikeRows = await api.query( + `SELECT path FROM "${config.tableName}" WHERE path LIKE '/summaries/%' ` + + `AND summary ILIKE '%${sqlLike(queryText.split(/\s+/)[0] ?? queryText)}%' LIMIT 10`, + ); + const ilikeMs = Date.now() - ilikeStarted; + + console.log(JSON.stringify({ + table: config.tableName, + queryText, + createIndexMs: createMs, + bm25Ms, + bm25TopPaths: bm25Rows.slice(0, 5).map((row) => ({ path: row["path"], score: row["score"] })), + ilikeMs, + ilikeTopPaths: ilikeRows.slice(0, 5).map((row) => row["path"]), + }, null, 2)); +} + +main().catch((error: unknown) => { + const message = error instanceof Error ? error.message : String(error); + console.error(message); + process.exit(1); +}); diff --git a/src/utils/hybrid-fusion.ts b/src/utils/hybrid-fusion.ts new file mode 100644 index 0000000..0941540 --- /dev/null +++ b/src/utils/hybrid-fusion.ts @@ -0,0 +1,127 @@ +export interface ScoredRetrievalRow { + path: string; + content: string; + sourceOrder: number; + creationDate: string; + score: number; +} + +export interface FusedRetrievalRow { + path: string; + content: string; + sourceOrder: number; + creationDate: string; + textScore: number; + vectorScore: number; + fusedScore: number; +} + +function coerceFinite(value: number): number { + return Number.isFinite(value) ? value : 0; +} + +function normalizeWeights(vectorWeight: number, textWeight: number): { vectorWeight: number; textWeight: number } { + const safeVector = Math.max(0, coerceFinite(vectorWeight)); + const safeText = Math.max(0, coerceFinite(textWeight)); + const total = safeVector + safeText; + if (total <= 0) return { vectorWeight: 0.5, textWeight: 0.5 }; + return { + vectorWeight: safeVector / total, + textWeight: safeText / total, + }; +} + +export function softmaxNormalizeScores(scores: number[]): number[] { + if (scores.length === 0) return []; + const safeScores = scores.map(coerceFinite); + const maxScore = Math.max(...safeScores); + const exps = safeScores.map((score) => Math.exp(score - maxScore)); + const sum = exps.reduce((acc, value) => acc + value, 0) || 1; + return exps.map((value) => value / sum); +} + +function pickPreferredRow(existing: ScoredRetrievalRow | undefined, candidate: ScoredRetrievalRow): ScoredRetrievalRow { + if (!existing) return candidate; + if (candidate.score > existing.score) return candidate; + if (candidate.score < existing.score) return existing; + if (candidate.sourceOrder < existing.sourceOrder) return candidate; + if (candidate.sourceOrder > existing.sourceOrder) return existing; + if (candidate.creationDate < existing.creationDate) return candidate; + if (candidate.creationDate > existing.creationDate) return existing; + return candidate.path < existing.path ? candidate : existing; +} + +function dedupeBestRows(rows: ScoredRetrievalRow[]): ScoredRetrievalRow[] { + const bestByPath = new Map(); + for (const row of rows) { + if (!row.path) continue; + bestByPath.set(row.path, pickPreferredRow(bestByPath.get(row.path), row)); + } + return [...bestByPath.values()]; +} + +export function fuseRetrievalRows(args: { + textRows: ScoredRetrievalRow[]; + vectorRows: ScoredRetrievalRow[]; + textWeight: number; + vectorWeight: number; + limit: number; +}): FusedRetrievalRow[] { + const { + textRows, + vectorRows, + limit, + } = args; + const { textWeight, vectorWeight } = normalizeWeights(args.vectorWeight, args.textWeight); + const dedupedTextRows = dedupeBestRows(textRows); + const dedupedVectorRows = dedupeBestRows(vectorRows); + const textNorm = softmaxNormalizeScores(dedupedTextRows.map((row) => row.score)); + const vectorNorm = softmaxNormalizeScores(dedupedVectorRows.map((row) => row.score)); + const fusedByPath = new Map(); + + for (let i = 0; i < dedupedTextRows.length; i++) { + const row = dedupedTextRows[i]; + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: textNorm[i] ?? 0, + vectorScore: 0, + fusedScore: textWeight * (textNorm[i] ?? 0), + }); + } + + for (let i = 0; i < dedupedVectorRows.length; i++) { + const row = dedupedVectorRows[i]; + const existing = fusedByPath.get(row.path); + const vectorScore = vectorNorm[i] ?? 0; + if (existing) { + if (existing.content.length === 0 && row.content.length > 0) existing.content = row.content; + existing.sourceOrder = Math.min(existing.sourceOrder, row.sourceOrder); + if (!existing.creationDate || row.creationDate < existing.creationDate) existing.creationDate = row.creationDate; + existing.vectorScore = vectorScore; + existing.fusedScore = (textWeight * existing.textScore) + (vectorWeight * existing.vectorScore); + continue; + } + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: 0, + vectorScore, + fusedScore: vectorWeight * vectorScore, + }); + } + + return [...fusedByPath.values()] + .sort((a, b) => + (b.fusedScore - a.fusedScore) + || (b.vectorScore - a.vectorScore) + || (b.textScore - a.textScore) + || (a.sourceOrder - b.sourceOrder) + || a.creationDate.localeCompare(b.creationDate) + || a.path.localeCompare(b.path)) + .slice(0, Math.max(0, limit)); +} diff --git a/src/utils/retrieval-mode.ts b/src/utils/retrieval-mode.ts new file mode 100644 index 0000000..b3fcdfb --- /dev/null +++ b/src/utils/retrieval-mode.ts @@ -0,0 +1,32 @@ +export function isSessionsOnlyMode(): boolean { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + +export type GrepRetrievalMode = "classic" | "embedding" | "hybrid"; + +export function getGrepRetrievalMode(): GrepRetrievalMode { + const raw = (process.env["HIVEMIND_GREP_RETRIEVAL_MODE"] ?? process.env["DEEPLAKE_GREP_RETRIEVAL_MODE"] ?? "").trim().toLowerCase(); + if (raw === "embedding" || raw === "hybrid") return raw; + return "classic"; +} + +export function isIndexDisabled(): boolean { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + +export function isSummaryBm25Disabled(): boolean { + const raw = process.env["HIVEMIND_DISABLE_SUMMARY_BM25"] ?? process.env["DEEPLAKE_DISABLE_SUMMARY_BM25"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + +export function isPsqlMode(): boolean { + const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + +export function isFactsSessionsOnlyPsqlMode(): boolean { + const raw = process.env["HIVEMIND_PSQL_FACTS_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_PSQL_FACTS_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} diff --git a/src/utils/summary-format.ts b/src/utils/summary-format.ts new file mode 100644 index 0000000..04254de --- /dev/null +++ b/src/utils/summary-format.ts @@ -0,0 +1,184 @@ +function escapeRegex(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function basename(path: string): string { + const trimmed = path.replace(/\/+$/, ""); + const idx = trimmed.lastIndexOf("/"); + return idx === -1 ? trimmed : trimmed.slice(idx + 1); +} + +export function extractSection(text: string, heading: string): string | null { + const re = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} + +export function extractHeaderField(text: string, field: string): string | null { + const re = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} + +function compactText(value: string): string { + return value.replace(/\s+/g, " ").trim(); +} + +function splitMetadataList(value: string | null): string[] { + if (!value) return []; + return [...new Set( + value + .split(/\s*(?:,|;|&|\band\b)\s*/i) + .map((part) => compactText(part)) + .filter((part) => part.length >= 2 && !/^unknown$/i.test(part)), + )]; +} + +function extractBullets(section: string | null, limit = 3): string[] { + if (!section) return []; + return section + .split("\n") + .map((line) => line.trim()) + .filter((line) => line.startsWith("- ")) + .map((line) => compactText(line.slice(2))) + .filter(Boolean) + .slice(0, limit); +} + +export function extractSummaryDate(text: string): string | null { + return extractHeaderField(text, "Date") + ?? extractHeaderField(text, "Started"); +} + +export function extractSummaryParticipants(text: string): string | null { + return extractHeaderField(text, "Participants") + ?? extractHeaderField(text, "Speakers"); +} + +export function extractSummaryTopics(text: string): string | null { + return extractHeaderField(text, "Topics"); +} + +export function extractSummarySource(text: string): string | null { + return extractHeaderField(text, "Source"); +} + +export function buildSummaryBlurb(text: string): string { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + + const parts: string[] = []; + if (participants) parts.push(participants); + if (topics) parts.push(topics); + if (keyBullets.length > 0) parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) parts.push(whatHappened); + + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} + +function truncate(value: string, max: number): string { + return value.length > max ? `${value.slice(0, max - 1).trimEnd()}…` : value; +} + +function formatIndexTimestamp(value: string): string { + if (!value) return ""; + if (!/^\d{4}-\d{2}-\d{2}T/.test(value)) return value; + const parsed = Date.parse(value); + if (!Number.isFinite(parsed)) return value; + const ts = new Date(parsed); + const yyyy = ts.getUTCFullYear(); + const mm = String(ts.getUTCMonth() + 1).padStart(2, "0"); + const dd = String(ts.getUTCDate()).padStart(2, "0"); + const hh = String(ts.getUTCHours()).padStart(2, "0"); + const min = String(ts.getUTCMinutes()).padStart(2, "0"); + return `${yyyy}-${mm}-${dd} ${hh}:${min} UTC`; +} + +export interface SummaryIndexRow { + path?: unknown; + project?: unknown; + description?: unknown; + summary?: unknown; + creation_date?: unknown; + last_update_date?: unknown; +} + +export interface SummaryIndexEntry { + path: string; + label: string; + project: string; + description: string; + date: string; + createdAt: string; + updatedAt: string; + sortDate: string; + participantsText: string; + participants: string[]; + topicsText: string; + topics: string[]; + source: string; + blurb: string; +} + +export function buildSummaryIndexEntry(row: SummaryIndexRow): SummaryIndexEntry | null { + const path = typeof row.path === "string" ? row.path : ""; + if (!path) return null; + if (path.startsWith("/summaries/") && !/^\/summaries\/[^/]+\/[^/]+$/.test(path)) return null; + + const summary = typeof row.summary === "string" ? row.summary : ""; + const project = typeof row.project === "string" ? row.project.trim() : ""; + const description = typeof row.description === "string" ? compactText(row.description) : ""; + const creationDate = typeof row.creation_date === "string" ? row.creation_date : ""; + const lastUpdateDate = typeof row.last_update_date === "string" ? row.last_update_date : ""; + + const label = basename(path) || path; + const date = summary ? extractSummaryDate(summary) ?? creationDate : creationDate; + const participantsText = summary ? extractSummaryParticipants(summary) ?? "" : ""; + const topicsText = summary ? extractSummaryTopics(summary) ?? "" : ""; + const source = summary ? extractSummarySource(summary) ?? "" : ""; + const structuredBlurb = summary ? buildSummaryBlurb(summary) : ""; + const blurb = structuredBlurb && structuredBlurb !== "completed" + ? structuredBlurb + : truncate(description, 220); + + return { + path, + label, + project, + description, + date, + createdAt: creationDate, + updatedAt: lastUpdateDate, + sortDate: lastUpdateDate || creationDate || date, + participantsText, + participants: splitMetadataList(participantsText), + topicsText, + topics: splitMetadataList(topicsText), + source, + blurb, + }; +} + +export function formatSummaryIndexEntry(entry: SummaryIndexEntry): string { + const parts = [`- [summary: ${entry.label}](${entry.path})`]; + if (entry.source) parts.push(`[session](${entry.source})`); + if (entry.date) parts.push(truncate(entry.date, 40)); + const visibleTime = entry.updatedAt || entry.createdAt; + if (visibleTime) parts.push(`updated: ${truncate(formatIndexTimestamp(visibleTime), 24)}`); + if (entry.participantsText) parts.push(truncate(entry.participantsText, 80)); + if (entry.topicsText) parts.push(`topics: ${truncate(entry.topicsText, 90)}`); + if (entry.project) parts.push(`[${truncate(entry.project, 40)}]`); + if (entry.blurb && entry.blurb !== "completed") parts.push(truncate(entry.blurb, 220)); + return parts.join(" — "); +} + +export function buildSummaryIndexLine(row: SummaryIndexRow | SummaryIndexEntry): string | null { + const entry = "label" in row && typeof row.label === "string" + ? row + : buildSummaryIndexEntry(row); + return entry ? formatSummaryIndexEntry(entry) : null; +} diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..1c54df7 --- /dev/null +++ b/uv.lock @@ -0,0 +1,1004 @@ +version = 1 +revision = 2 +requires-python = ">=3.11" + +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + +[[package]] +name = "anyio" +version = "4.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, +] + +[[package]] +name = "certifi" +version = "2026.2.25" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" }, +] + +[[package]] +name = "click" +version = "8.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/57/75/31212c6bf2503fdf920d87fee5d7a86a2e3bcf444984126f13d8e4016804/click-8.3.2.tar.gz", hash = "sha256:14162b8b3b3550a7d479eafa77dfd3c38d9dc8951f6f69c78913a8f9a7540fd5", size = 302856, upload-time = "2026-04-03T19:14:45.118Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/20/71885d8b97d4f3dde17b1fdb92dbd4908b00541c5a3379787137285f602e/click-8.3.2-py3-none-any.whl", hash = "sha256:1924d2c27c5653561cd2cae4548d1406039cb79b858b747cfea24924bbc1616d", size = 108379, upload-time = "2026-04-03T19:14:43.505Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "cuda-bindings" +version = "13.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cuda-pathfinder" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/a9/3a8241c6e19483ac1f1dcf5c10238205dcb8a6e9d0d4d4709240dff28ff4/cuda_bindings-13.2.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:721104c603f059780d287969be3d194a18d0cc3b713ed9049065a1107706759d", size = 5730273, upload-time = "2026-03-11T00:12:37.18Z" }, + { url = "https://files.pythonhosted.org/packages/e9/94/2748597f47bb1600cd466b20cab4159f1530a3a33fe7f70fee199b3abb9e/cuda_bindings-13.2.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1eba9504ac70667dd48313395fe05157518fd6371b532790e96fbb31bbb5a5e1", size = 6313924, upload-time = "2026-03-11T00:12:39.462Z" }, + { url = "https://files.pythonhosted.org/packages/52/c8/b2589d68acf7e3d63e2be330b84bc25712e97ed799affbca7edd7eae25d6/cuda_bindings-13.2.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e865447abfb83d6a98ad5130ed3c70b1fc295ae3eeee39fd07b4ddb0671b6788", size = 5722404, upload-time = "2026-03-11T00:12:44.041Z" }, + { url = "https://files.pythonhosted.org/packages/1f/92/f899f7bbb5617bb65ec52a6eac1e9a1447a86b916c4194f8a5001b8cde0c/cuda_bindings-13.2.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46d8776a55d6d5da9dd6e9858fba2efcda2abe6743871dee47dd06eb8cb6d955", size = 6320619, upload-time = "2026-03-11T00:12:45.939Z" }, + { url = "https://files.pythonhosted.org/packages/df/93/eef988860a3ca985f82c4f3174fc0cdd94e07331ba9a92e8e064c260337f/cuda_bindings-13.2.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6629ca2df6f795b784752409bcaedbd22a7a651b74b56a165ebc0c9dcbd504d0", size = 5614610, upload-time = "2026-03-11T00:12:50.337Z" }, + { url = "https://files.pythonhosted.org/packages/18/23/6db3aba46864aee357ab2415135b3fe3da7e9f1fa0221fa2a86a5968099c/cuda_bindings-13.2.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7dca0da053d3b4cc4869eff49c61c03f3c5dbaa0bcd712317a358d5b8f3f385d", size = 6149914, upload-time = "2026-03-11T00:12:52.374Z" }, + { url = "https://files.pythonhosted.org/packages/c0/87/87a014f045b77c6de5c8527b0757fe644417b184e5367db977236a141602/cuda_bindings-13.2.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a6464b30f46692d6c7f65d4a0e0450d81dd29de3afc1bb515653973d01c2cd6e", size = 5685673, upload-time = "2026-03-11T00:12:56.371Z" }, + { url = "https://files.pythonhosted.org/packages/ee/5e/c0fe77a73aaefd3fff25ffaccaac69c5a63eafdf8b9a4c476626ef0ac703/cuda_bindings-13.2.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4af9f3e1be603fa12d5ad6cfca7844c9d230befa9792b5abdf7dd79979c3626", size = 6191386, upload-time = "2026-03-11T00:12:58.965Z" }, + { url = "https://files.pythonhosted.org/packages/5f/58/ed2c3b39c8dd5f96aa7a4abef0d47a73932c7a988e30f5fa428f00ed0da1/cuda_bindings-13.2.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:df850a1ff8ce1b3385257b08e47b70e959932f5f432d0a4e46a355962b4e4771", size = 5507469, upload-time = "2026-03-11T00:13:04.063Z" }, + { url = "https://files.pythonhosted.org/packages/1f/01/0c941b112ceeb21439b05895eace78ca1aa2eaaf695c8521a068fd9b4c00/cuda_bindings-13.2.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8a16384c6494e5485f39314b0b4afb04bee48d49edb16d5d8593fd35bbd231b", size = 6059693, upload-time = "2026-03-11T00:13:06.003Z" }, +] + +[[package]] +name = "cuda-pathfinder" +version = "1.5.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d3/d6/ac63065d33dd700fee7ebd7d287332401b54e31b9346e142f871e1f0b116/cuda_pathfinder-1.5.3-py3-none-any.whl", hash = "sha256:dff021123aedbb4117cc7ec81717bbfe198fb4e8b5f1ee57e0e084fec5c8577d", size = 49991, upload-time = "2026-04-14T20:09:27.037Z" }, +] + +[[package]] +name = "cuda-toolkit" +version = "13.0.2" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/57/b2/453099f5f3b698d7d0eab38916aac44c7f76229f451709e2eb9db6615dcd/cuda_toolkit-13.0.2-py2.py3-none-any.whl", hash = "sha256:b198824cf2f54003f50d64ada3a0f184b42ca0846c1c94192fa269ecd97a66eb", size = 2364, upload-time = "2025-12-19T23:24:07.328Z" }, +] + +[package.optional-dependencies] +cublas = [ + { name = "nvidia-cublas", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cudart = [ + { name = "nvidia-cuda-runtime", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cufft = [ + { name = "nvidia-cufft", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cufile = [ + { name = "nvidia-cufile", marker = "sys_platform == 'linux'" }, +] +cupti = [ + { name = "nvidia-cuda-cupti", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +curand = [ + { name = "nvidia-curand", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cusolver = [ + { name = "nvidia-cusolver", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cusparse = [ + { name = "nvidia-cusparse", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +nvjitlink = [ + { name = "nvidia-nvjitlink", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +nvrtc = [ + { name = "nvidia-cuda-nvrtc", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +nvtx = [ + { name = "nvidia-nvtx", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] + +[[package]] +name = "filelock" +version = "3.29.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/fe/997687a931ab51049acce6fa1f23e8f01216374ea81374ddee763c493db5/filelock-3.29.0.tar.gz", hash = "sha256:69974355e960702e789734cb4871f884ea6fe50bd8404051a3530bc07809cf90", size = 57571, upload-time = "2026-04-19T15:39:10.068Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/47/dd9a212ef6e343a6857485ffe25bba537304f1913bdbed446a23f7f592e1/filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258", size = 39812, upload-time = "2026-04-19T15:39:08.752Z" }, +] + +[[package]] +name = "fsspec" +version = "2026.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e1/cf/b50ddf667c15276a9ab15a70ef5f257564de271957933ffea49d2cdbcdfb/fsspec-2026.3.0.tar.gz", hash = "sha256:1ee6a0e28677557f8c2f994e3eea77db6392b4de9cd1f5d7a9e87a0ae9d01b41", size = 313547, upload-time = "2026-03-27T19:11:14.892Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/1f/5f4a3cd9e4440e9d9bc78ad0a91a1c8d46b4d429d5239ebe6793c9fe5c41/fsspec-2026.3.0-py3-none-any.whl", hash = "sha256:d2ceafaad1b3457968ed14efa28798162f1638dbb5d2a6868a2db002a5ee39a4", size = 202595, upload-time = "2026-03-27T19:11:13.595Z" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "hf-xet" +version = "1.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/53/92/ec9ad04d0b5728dca387a45af7bc98fbb0d73b2118759f5f6038b61a57e8/hf_xet-1.4.3.tar.gz", hash = "sha256:8ddedb73c8c08928c793df2f3401ec26f95be7f7e516a7bee2fbb546f6676113", size = 670477, upload-time = "2026-03-31T22:40:07.874Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/43/724d307b34e353da0abd476e02f72f735cdd2bc86082dee1b32ea0bfee1d/hf_xet-1.4.3-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:7551659ba4f1e1074e9623996f28c3873682530aee0a846b7f2f066239228144", size = 3800935, upload-time = "2026-03-31T22:39:49.618Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d2/8bee5996b699262edb87dbb54118d287c0e1b2fc78af7cdc41857ba5e3c4/hf_xet-1.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bee693ada985e7045997f05f081d0e12c4c08bd7626dc397f8a7c487e6c04f7f", size = 3558942, upload-time = "2026-03-31T22:39:47.938Z" }, + { url = "https://files.pythonhosted.org/packages/c3/a1/e993d09cbe251196fb60812b09a58901c468127b7259d2bf0f68bf6088eb/hf_xet-1.4.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21644b404bb0100fe3857892f752c4d09642586fd988e61501c95bbf44b393a3", size = 4207657, upload-time = "2026-03-31T22:39:39.69Z" }, + { url = "https://files.pythonhosted.org/packages/64/44/9eb6d21e5c34c63e5e399803a6932fa983cabdf47c0ecbcfe7ea97684b8c/hf_xet-1.4.3-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:987f09cfe418237812896a6736b81b1af02a3a6dcb4b4944425c4c4fca7a7cf8", size = 3986765, upload-time = "2026-03-31T22:39:37.936Z" }, + { url = "https://files.pythonhosted.org/packages/ea/7b/8ad6f16fdb82f5f7284a34b5ec48645bd575bdcd2f6f0d1644775909c486/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:60cf7fc43a99da0a853345cf86d23738c03983ee5249613a6305d3e57a5dca74", size = 4188162, upload-time = "2026-03-31T22:39:58.382Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c4/39d6e136cbeea9ca5a23aad4b33024319222adbdc059ebcda5fc7d9d5ff4/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2815a49a7a59f3e2edf0cf113ae88e8cb2ca2a221bf353fb60c609584f4884d4", size = 4424525, upload-time = "2026-03-31T22:40:00.225Z" }, + { url = "https://files.pythonhosted.org/packages/46/f2/adc32dae6bdbc367853118b9878139ac869419a4ae7ba07185dc31251b76/hf_xet-1.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:42ee323265f1e6a81b0e11094564fb7f7e0ec75b5105ffd91ae63f403a11931b", size = 3671610, upload-time = "2026-03-31T22:40:10.42Z" }, + { url = "https://files.pythonhosted.org/packages/e2/19/25d897dcc3f81953e0c2cde9ec186c7a0fee413eb0c9a7a9130d87d94d3a/hf_xet-1.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:27c976ba60079fb8217f485b9c5c7fcd21c90b0367753805f87cb9f3cdc4418a", size = 3528529, upload-time = "2026-03-31T22:40:09.106Z" }, + { url = "https://files.pythonhosted.org/packages/ec/36/3e8f85ca9fe09b8de2b2e10c63b3b3353d7dda88a0b3d426dffbe7b8313b/hf_xet-1.4.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:5251d5ece3a81815bae9abab41cf7ddb7bcb8f56411bce0827f4a3071c92fdc6", size = 3801019, upload-time = "2026-03-31T22:39:56.651Z" }, + { url = "https://files.pythonhosted.org/packages/b5/9c/defb6cb1de28bccb7bd8d95f6e60f72a3d3fa4cb3d0329c26fb9a488bfe7/hf_xet-1.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1feb0f3abeacee143367c326a128a2e2b60868ec12a36c225afb1d6c5a05e6d2", size = 3558746, upload-time = "2026-03-31T22:39:54.766Z" }, + { url = "https://files.pythonhosted.org/packages/c1/bd/8d001191893178ff8e826e46ad5299446e62b93cd164e17b0ffea08832ec/hf_xet-1.4.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b301fc150290ca90b4fccd079829b84bb4786747584ae08b94b4577d82fb791", size = 4207692, upload-time = "2026-03-31T22:39:46.246Z" }, + { url = "https://files.pythonhosted.org/packages/ce/48/6790b402803250e9936435613d3a78b9aaeee7973439f0918848dde58309/hf_xet-1.4.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:d972fbe95ddc0d3c0fc49b31a8a69f47db35c1e3699bf316421705741aab6653", size = 3986281, upload-time = "2026-03-31T22:39:44.648Z" }, + { url = "https://files.pythonhosted.org/packages/51/56/ea62552fe53db652a9099eda600b032d75554d0e86c12a73824bfedef88b/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c5b48db1ee344a805a1b9bd2cda9b6b65fe77ed3787bd6e87ad5521141d317cd", size = 4187414, upload-time = "2026-03-31T22:40:04.951Z" }, + { url = "https://files.pythonhosted.org/packages/7d/f5/bc1456d4638061bea997e6d2db60a1a613d7b200e0755965ec312dc1ef79/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:22bdc1f5fb8b15bf2831440b91d1c9bbceeb7e10c81a12e8d75889996a5c9da8", size = 4424368, upload-time = "2026-03-31T22:40:06.347Z" }, + { url = "https://files.pythonhosted.org/packages/e4/76/ab597bae87e1f06d18d3ecb8ed7f0d3c9a37037fc32ce76233d369273c64/hf_xet-1.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:0392c79b7cf48418cd61478c1a925246cf10639f4cd9d94368d8ca1e8df9ea07", size = 3672280, upload-time = "2026-03-31T22:40:16.401Z" }, + { url = "https://files.pythonhosted.org/packages/62/05/2e462d34e23a09a74d73785dbed71cc5dbad82a72eee2ad60a72a554155d/hf_xet-1.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:681c92a07796325778a79d76c67011764ecc9042a8c3579332b61b63ae512075", size = 3528945, upload-time = "2026-03-31T22:40:14.995Z" }, + { url = "https://files.pythonhosted.org/packages/ac/9f/9c23e4a447b8f83120798f9279d0297a4d1360bdbf59ef49ebec78fe2545/hf_xet-1.4.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d0da85329eaf196e03e90b84c2d0aca53bd4573d097a75f99609e80775f98025", size = 3805048, upload-time = "2026-03-31T22:39:53.105Z" }, + { url = "https://files.pythonhosted.org/packages/0b/f8/7aacb8e5f4a7899d39c787b5984e912e6c18b11be136ef13947d7a66d265/hf_xet-1.4.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e23717ce4186b265f69afa66e6f0069fe7efbf331546f5c313d00e123dc84583", size = 3562178, upload-time = "2026-03-31T22:39:51.295Z" }, + { url = "https://files.pythonhosted.org/packages/df/9a/a24b26dc8a65f0ecc0fe5be981a19e61e7ca963b85e062c083f3a9100529/hf_xet-1.4.3-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc360b70c815bf340ed56c7b8c63aacf11762a4b099b2fe2c9bd6d6068668c08", size = 4212320, upload-time = "2026-03-31T22:39:42.922Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/46d493db155d2ee2801b71fb1b0fd67696359047fdd8caee2c914cc50c79/hf_xet-1.4.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:39f2d2e9654cd9b4319885733993807aab6de9dfbd34c42f0b78338d6617421f", size = 3991546, upload-time = "2026-03-31T22:39:41.335Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f5/067363e1c96c6b17256910830d1b54099d06287e10f4ec6ec4e7e08371fc/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:49ad8a8cead2b56051aa84d7fce3e1335efe68df3cf6c058f22a65513885baac", size = 4193200, upload-time = "2026-03-31T22:40:01.936Z" }, + { url = "https://files.pythonhosted.org/packages/42/4b/53951592882d9c23080c7644542fda34a3813104e9e11fa1a7d82d419cb8/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7716d62015477a70ea272d2d68cd7cad140f61c52ee452e133e139abfe2c17ba", size = 4429392, upload-time = "2026-03-31T22:40:03.492Z" }, + { url = "https://files.pythonhosted.org/packages/8a/21/75a6c175b4e79662ad8e62f46a40ce341d8d6b206b06b4320d07d55b188c/hf_xet-1.4.3-cp37-abi3-win_amd64.whl", hash = "sha256:6b591fcad34e272a5b02607485e4f2a1334aebf1bc6d16ce8eb1eb8978ac2021", size = 3677359, upload-time = "2026-03-31T22:40:13.619Z" }, + { url = "https://files.pythonhosted.org/packages/8a/7c/44314ecd0e89f8b2b51c9d9e5e7a60a9c1c82024ac471d415860557d3cd8/hf_xet-1.4.3-cp37-abi3-win_arm64.whl", hash = "sha256:7c2c7e20bcfcc946dc67187c203463f5e932e395845d098cc2a93f5b67ca0b47", size = 3533664, upload-time = "2026-03-31T22:40:12.152Z" }, +] + +[[package]] +name = "hivemind-harrier-backfill" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "numpy" }, + { name = "safetensors" }, + { name = "torch" }, + { name = "transformers" }, +] + +[package.metadata] +requires-dist = [ + { name = "numpy", specifier = ">=1.26" }, + { name = "safetensors", specifier = ">=0.4" }, + { name = "torch", specifier = ">=2.4" }, + { name = "transformers", specifier = ">=4.57" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + +[[package]] +name = "huggingface-hub" +version = "1.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "httpx" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "tqdm" }, + { name = "typer" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/89/e7aa12d8a6b9259bed10671abb25ae6fa437c0f88a86ecbf59617bae7759/huggingface_hub-1.11.0.tar.gz", hash = "sha256:15fb3713c7f9cdff7b808a94fd91664f661ab142796bb48c9cd9493e8d166278", size = 761749, upload-time = "2026-04-16T13:07:39.73Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/02/4f3f8997d1ea7fe0146b343e5e14bd065fa87af790d07e5576d31b31cc18/huggingface_hub-1.11.0-py3-none-any.whl", hash = "sha256:42a6de0afbfeb5e022222d36398f029679db4eb4778801aafda32257ae9131ab", size = 645499, upload-time = "2026-04-16T13:07:37.716Z" }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, +] + +[[package]] +name = "markdown-it-py" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/db/fefacb2136439fc8dd20e797950e749aa1f4997ed584c62cfb8ef7c2be0e/markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad", size = 11631, upload-time = "2025-09-27T18:36:18.185Z" }, + { url = "https://files.pythonhosted.org/packages/e1/2e/5898933336b61975ce9dc04decbc0a7f2fee78c30353c5efba7f2d6ff27a/markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a", size = 12058, upload-time = "2025-09-27T18:36:19.444Z" }, + { url = "https://files.pythonhosted.org/packages/1d/09/adf2df3699d87d1d8184038df46a9c80d78c0148492323f4693df54e17bb/markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50", size = 24287, upload-time = "2025-09-27T18:36:20.768Z" }, + { url = "https://files.pythonhosted.org/packages/30/ac/0273f6fcb5f42e314c6d8cd99effae6a5354604d461b8d392b5ec9530a54/markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf", size = 22940, upload-time = "2025-09-27T18:36:22.249Z" }, + { url = "https://files.pythonhosted.org/packages/19/ae/31c1be199ef767124c042c6c3e904da327a2f7f0cd63a0337e1eca2967a8/markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f", size = 21887, upload-time = "2025-09-27T18:36:23.535Z" }, + { url = "https://files.pythonhosted.org/packages/b2/76/7edcab99d5349a4532a459e1fe64f0b0467a3365056ae550d3bcf3f79e1e/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a", size = 23692, upload-time = "2025-09-27T18:36:24.823Z" }, + { url = "https://files.pythonhosted.org/packages/a4/28/6e74cdd26d7514849143d69f0bf2399f929c37dc2b31e6829fd2045b2765/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115", size = 21471, upload-time = "2025-09-27T18:36:25.95Z" }, + { url = "https://files.pythonhosted.org/packages/62/7e/a145f36a5c2945673e590850a6f8014318d5577ed7e5920a4b3448e0865d/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a", size = 22923, upload-time = "2025-09-27T18:36:27.109Z" }, + { url = "https://files.pythonhosted.org/packages/0f/62/d9c46a7f5c9adbeeeda52f5b8d802e1094e9717705a645efc71b0913a0a8/markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19", size = 14572, upload-time = "2025-09-27T18:36:28.045Z" }, + { url = "https://files.pythonhosted.org/packages/83/8a/4414c03d3f891739326e1783338e48fb49781cc915b2e0ee052aa490d586/markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01", size = 15077, upload-time = "2025-09-27T18:36:29.025Z" }, + { url = "https://files.pythonhosted.org/packages/35/73/893072b42e6862f319b5207adc9ae06070f095b358655f077f69a35601f0/markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c", size = 13876, upload-time = "2025-09-27T18:36:29.954Z" }, + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" }, + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + +[[package]] +name = "mpmath" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, +] + +[[package]] +name = "networkx" +version = "3.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" }, +] + +[[package]] +name = "numpy" +version = "2.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/9f/b8cef5bffa569759033adda9481211426f12f53299629b410340795c2514/numpy-2.4.4.tar.gz", hash = "sha256:2d390634c5182175533585cc89f3608a4682ccb173cc9bb940b2881c8d6f8fa0", size = 20731587, upload-time = "2026-03-29T13:22:01.298Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/c6/4218570d8c8ecc9704b5157a3348e486e84ef4be0ed3e38218ab473c83d2/numpy-2.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f983334aea213c99992053ede6168500e5f086ce74fbc4acc3f2b00f5762e9db", size = 16976799, upload-time = "2026-03-29T13:18:15.438Z" }, + { url = "https://files.pythonhosted.org/packages/dd/92/b4d922c4a5f5dab9ed44e6153908a5c665b71acf183a83b93b690996e39b/numpy-2.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:72944b19f2324114e9dc86a159787333b77874143efcf89a5167ef83cfee8af0", size = 14971552, upload-time = "2026-03-29T13:18:18.606Z" }, + { url = "https://files.pythonhosted.org/packages/8a/dc/df98c095978fa6ee7b9a9387d1d58cbb3d232d0e69ad169a4ce784bde4fd/numpy-2.4.4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:86b6f55f5a352b48d7fbfd2dbc3d5b780b2d79f4d3c121f33eb6efb22e9a2015", size = 5476566, upload-time = "2026-03-29T13:18:21.532Z" }, + { url = "https://files.pythonhosted.org/packages/28/34/b3fdcec6e725409223dd27356bdf5a3c2cc2282e428218ecc9cb7acc9763/numpy-2.4.4-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:ba1f4fc670ed79f876f70082eff4f9583c15fb9a4b89d6188412de4d18ae2f40", size = 6806482, upload-time = "2026-03-29T13:18:23.634Z" }, + { url = "https://files.pythonhosted.org/packages/68/62/63417c13aa35d57bee1337c67446761dc25ea6543130cf868eace6e8157b/numpy-2.4.4-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a87ec22c87be071b6bdbd27920b129b94f2fc964358ce38f3822635a3e2e03d", size = 15973376, upload-time = "2026-03-29T13:18:26.677Z" }, + { url = "https://files.pythonhosted.org/packages/cf/c5/9fcb7e0e69cef59cf10c746b84f7d58b08bc66a6b7d459783c5a4f6101a6/numpy-2.4.4-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:df3775294accfdd75f32c74ae39fcba920c9a378a2fc18a12b6820aa8c1fb502", size = 16925137, upload-time = "2026-03-29T13:18:30.14Z" }, + { url = "https://files.pythonhosted.org/packages/7e/43/80020edacb3f84b9efdd1591120a4296462c23fd8db0dde1666f6ef66f13/numpy-2.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0d4e437e295f18ec29bc79daf55e8a47a9113df44d66f702f02a293d93a2d6dd", size = 17329414, upload-time = "2026-03-29T13:18:33.733Z" }, + { url = "https://files.pythonhosted.org/packages/fd/06/af0658593b18a5f73532d377188b964f239eb0894e664a6c12f484472f97/numpy-2.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6aa3236c78803afbcb255045fbef97a9e25a1f6c9888357d205ddc42f4d6eba5", size = 18658397, upload-time = "2026-03-29T13:18:37.511Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ce/13a09ed65f5d0ce5c7dd0669250374c6e379910f97af2c08c57b0608eee4/numpy-2.4.4-cp311-cp311-win32.whl", hash = "sha256:30caa73029a225b2d40d9fae193e008e24b2026b7ee1a867b7ee8d96ca1a448e", size = 6239499, upload-time = "2026-03-29T13:18:40.372Z" }, + { url = "https://files.pythonhosted.org/packages/bd/63/05d193dbb4b5eec1eca73822d80da98b511f8328ad4ae3ca4caf0f4db91d/numpy-2.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:6bbe4eb67390b0a0265a2c25458f6b90a409d5d069f1041e6aff1e27e3d9a79e", size = 12614257, upload-time = "2026-03-29T13:18:42.95Z" }, + { url = "https://files.pythonhosted.org/packages/87/c5/8168052f080c26fa984c413305012be54741c9d0d74abd7fbeeccae3889f/numpy-2.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:fcfe2045fd2e8f3cb0ce9d4ba6dba6333b8fa05bb8a4939c908cd43322d14c7e", size = 10486775, upload-time = "2026-03-29T13:18:45.835Z" }, + { url = "https://files.pythonhosted.org/packages/28/05/32396bec30fb2263770ee910142f49c1476d08e8ad41abf8403806b520ce/numpy-2.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:15716cfef24d3a9762e3acdf87e27f58dc823d1348f765bbea6bef8c639bfa1b", size = 16689272, upload-time = "2026-03-29T13:18:49.223Z" }, + { url = "https://files.pythonhosted.org/packages/c5/f3/a983d28637bfcd763a9c7aafdb6d5c0ebf3d487d1e1459ffdb57e2f01117/numpy-2.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23cbfd4c17357c81021f21540da84ee282b9c8fba38a03b7b9d09ba6b951421e", size = 14699573, upload-time = "2026-03-29T13:18:52.629Z" }, + { url = "https://files.pythonhosted.org/packages/9b/fd/e5ecca1e78c05106d98028114f5c00d3eddb41207686b2b7de3e477b0e22/numpy-2.4.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8b3b60bb7cba2c8c81837661c488637eee696f59a877788a396d33150c35d842", size = 5204782, upload-time = "2026-03-29T13:18:55.579Z" }, + { url = "https://files.pythonhosted.org/packages/de/2f/702a4594413c1a8632092beae8aba00f1d67947389369b3777aed783fdca/numpy-2.4.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e4a010c27ff6f210ff4c6ef34394cd61470d01014439b192ec22552ee867f2a8", size = 6552038, upload-time = "2026-03-29T13:18:57.769Z" }, + { url = "https://files.pythonhosted.org/packages/7f/37/eed308a8f56cba4d1fdf467a4fc67ef4ff4bf1c888f5fc980481890104b1/numpy-2.4.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9e75681b59ddaa5e659898085ae0eaea229d054f2ac0c7e563a62205a700121", size = 15670666, upload-time = "2026-03-29T13:19:00.341Z" }, + { url = "https://files.pythonhosted.org/packages/0a/0d/0e3ecece05b7a7e87ab9fb587855548da437a061326fff64a223b6dcb78a/numpy-2.4.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:81f4a14bee47aec54f883e0cad2d73986640c1590eb9bfaaba7ad17394481e6e", size = 16645480, upload-time = "2026-03-29T13:19:03.63Z" }, + { url = "https://files.pythonhosted.org/packages/34/49/f2312c154b82a286758ee2f1743336d50651f8b5195db18cdb63675ff649/numpy-2.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:62d6b0f03b694173f9fcb1fb317f7222fd0b0b103e784c6549f5e53a27718c44", size = 17020036, upload-time = "2026-03-29T13:19:07.428Z" }, + { url = "https://files.pythonhosted.org/packages/7b/e9/736d17bd77f1b0ec4f9901aaec129c00d59f5d84d5e79bba540ef12c2330/numpy-2.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbc356aae7adf9e6336d336b9c8111d390a05df88f1805573ebb0807bd06fd1d", size = 18368643, upload-time = "2026-03-29T13:19:10.775Z" }, + { url = "https://files.pythonhosted.org/packages/63/f6/d417977c5f519b17c8a5c3bc9e8304b0908b0e21136fe43bf628a1343914/numpy-2.4.4-cp312-cp312-win32.whl", hash = "sha256:0d35aea54ad1d420c812bfa0385c71cd7cc5bcf7c65fed95fc2cd02fe8c79827", size = 5961117, upload-time = "2026-03-29T13:19:13.464Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5b/e1deebf88ff431b01b7406ca3583ab2bbb90972bbe1c568732e49c844f7e/numpy-2.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:b5f0362dc928a6ecd9db58868fca5e48485205e3855957bdedea308f8672ea4a", size = 12320584, upload-time = "2026-03-29T13:19:16.155Z" }, + { url = "https://files.pythonhosted.org/packages/58/89/e4e856ac82a68c3ed64486a544977d0e7bdd18b8da75b78a577ca31c4395/numpy-2.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:846300f379b5b12cc769334464656bc882e0735d27d9726568bc932fdc49d5ec", size = 10221450, upload-time = "2026-03-29T13:19:18.994Z" }, + { url = "https://files.pythonhosted.org/packages/14/1d/d0a583ce4fefcc3308806a749a536c201ed6b5ad6e1322e227ee4848979d/numpy-2.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08f2e31ed5e6f04b118e49821397f12767934cfdd12a1ce86a058f91e004ee50", size = 16684933, upload-time = "2026-03-29T13:19:22.47Z" }, + { url = "https://files.pythonhosted.org/packages/c1/62/2b7a48fbb745d344742c0277f01286dead15f3f68e4f359fbfcf7b48f70f/numpy-2.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e823b8b6edc81e747526f70f71a9c0a07ac4e7ad13020aa736bb7c9d67196115", size = 14694532, upload-time = "2026-03-29T13:19:25.581Z" }, + { url = "https://files.pythonhosted.org/packages/e5/87/499737bfba066b4a3bebff24a8f1c5b2dee410b209bc6668c9be692580f0/numpy-2.4.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4a19d9dba1a76618dd86b164d608566f393f8ec6ac7c44f0cc879011c45e65af", size = 5199661, upload-time = "2026-03-29T13:19:28.31Z" }, + { url = "https://files.pythonhosted.org/packages/cd/da/464d551604320d1491bc345efed99b4b7034143a85787aab78d5691d5a0e/numpy-2.4.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d2a8490669bfe99a233298348acc2d824d496dee0e66e31b66a6022c2ad74a5c", size = 6547539, upload-time = "2026-03-29T13:19:30.97Z" }, + { url = "https://files.pythonhosted.org/packages/7d/90/8d23e3b0dafd024bf31bdec225b3bb5c2dbfa6912f8a53b8659f21216cbf/numpy-2.4.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45dbed2ab436a9e826e302fcdcbe9133f9b0006e5af7168afb8963a6520da103", size = 15668806, upload-time = "2026-03-29T13:19:33.887Z" }, + { url = "https://files.pythonhosted.org/packages/d1/73/a9d864e42a01896bb5974475438f16086be9ba1f0d19d0bb7a07427c4a8b/numpy-2.4.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c901b15172510173f5cb310eae652908340f8dede90fff9e3bf6c0d8dfd92f83", size = 16632682, upload-time = "2026-03-29T13:19:37.336Z" }, + { url = "https://files.pythonhosted.org/packages/34/fb/14570d65c3bde4e202a031210475ae9cde9b7686a2e7dc97ee67d2833b35/numpy-2.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:99d838547ace2c4aace6c4f76e879ddfe02bb58a80c1549928477862b7a6d6ed", size = 17019810, upload-time = "2026-03-29T13:19:40.963Z" }, + { url = "https://files.pythonhosted.org/packages/8a/77/2ba9d87081fd41f6d640c83f26fb7351e536b7ce6dd9061b6af5904e8e46/numpy-2.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0aec54fd785890ecca25a6003fd9a5aed47ad607bbac5cd64f836ad8666f4959", size = 18357394, upload-time = "2026-03-29T13:19:44.859Z" }, + { url = "https://files.pythonhosted.org/packages/a2/23/52666c9a41708b0853fa3b1a12c90da38c507a3074883823126d4e9d5b30/numpy-2.4.4-cp313-cp313-win32.whl", hash = "sha256:07077278157d02f65c43b1b26a3886bce886f95d20aabd11f87932750dfb14ed", size = 5959556, upload-time = "2026-03-29T13:19:47.661Z" }, + { url = "https://files.pythonhosted.org/packages/57/fb/48649b4971cde70d817cf97a2a2fdc0b4d8308569f1dd2f2611959d2e0cf/numpy-2.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:5c70f1cc1c4efbe316a572e2d8b9b9cc44e89b95f79ca3331553fbb63716e2bf", size = 12317311, upload-time = "2026-03-29T13:19:50.67Z" }, + { url = "https://files.pythonhosted.org/packages/ba/d8/11490cddd564eb4de97b4579ef6bfe6a736cc07e94c1598590ae25415e01/numpy-2.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:ef4059d6e5152fa1a39f888e344c73fdc926e1b2dd58c771d67b0acfbf2aa67d", size = 10222060, upload-time = "2026-03-29T13:19:54.229Z" }, + { url = "https://files.pythonhosted.org/packages/99/5d/dab4339177a905aad3e2221c915b35202f1ec30d750dd2e5e9d9a72b804b/numpy-2.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4bbc7f303d125971f60ec0aaad5e12c62d0d2c925f0ab1273debd0e4ba37aba5", size = 14822302, upload-time = "2026-03-29T13:19:57.585Z" }, + { url = "https://files.pythonhosted.org/packages/eb/e4/0564a65e7d3d97562ed6f9b0fd0fb0a6f559ee444092f105938b50043876/numpy-2.4.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:4d6d57903571f86180eb98f8f0c839fa9ebbfb031356d87f1361be91e433f5b7", size = 5327407, upload-time = "2026-03-29T13:20:00.601Z" }, + { url = "https://files.pythonhosted.org/packages/29/8d/35a3a6ce5ad371afa58b4700f1c820f8f279948cca32524e0a695b0ded83/numpy-2.4.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:4636de7fd195197b7535f231b5de9e4b36d2c440b6e566d2e4e4746e6af0ca93", size = 6647631, upload-time = "2026-03-29T13:20:02.855Z" }, + { url = "https://files.pythonhosted.org/packages/f4/da/477731acbd5a58a946c736edfdabb2ac5b34c3d08d1ba1a7b437fa0884df/numpy-2.4.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ad2e2ef14e0b04e544ea2fa0a36463f847f113d314aa02e5b402fdf910ef309e", size = 15727691, upload-time = "2026-03-29T13:20:06.004Z" }, + { url = "https://files.pythonhosted.org/packages/e6/db/338535d9b152beabeb511579598418ba0212ce77cf9718edd70262cc4370/numpy-2.4.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a285b3b96f951841799528cd1f4f01cd70e7e0204b4abebac9463eecfcf2a40", size = 16681241, upload-time = "2026-03-29T13:20:09.417Z" }, + { url = "https://files.pythonhosted.org/packages/e2/a9/ad248e8f58beb7a0219b413c9c7d8151c5d285f7f946c3e26695bdbbe2df/numpy-2.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f8474c4241bc18b750be2abea9d7a9ec84f46ef861dbacf86a4f6e043401f79e", size = 17085767, upload-time = "2026-03-29T13:20:13.126Z" }, + { url = "https://files.pythonhosted.org/packages/b5/1a/3b88ccd3694681356f70da841630e4725a7264d6a885c8d442a697e1146b/numpy-2.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4e874c976154687c1f71715b034739b45c7711bec81db01914770373d125e392", size = 18403169, upload-time = "2026-03-29T13:20:17.096Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c9/fcfd5d0639222c6eac7f304829b04892ef51c96a75d479214d77e3ce6e33/numpy-2.4.4-cp313-cp313t-win32.whl", hash = "sha256:9c585a1790d5436a5374bac930dad6ed244c046ed91b2b2a3634eb2971d21008", size = 6083477, upload-time = "2026-03-29T13:20:20.195Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e3/3938a61d1c538aaec8ed6fd6323f57b0c2d2d2219512434c5c878db76553/numpy-2.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:93e15038125dc1e5345d9b5b68aa7f996ec33b98118d18c6ca0d0b7d6198b7e8", size = 12457487, upload-time = "2026-03-29T13:20:22.946Z" }, + { url = "https://files.pythonhosted.org/packages/97/6a/7e345032cc60501721ef94e0e30b60f6b0bd601f9174ebd36389a2b86d40/numpy-2.4.4-cp313-cp313t-win_arm64.whl", hash = "sha256:0dfd3f9d3adbe2920b68b5cd3d51444e13a10792ec7154cd0a2f6e74d4ab3233", size = 10292002, upload-time = "2026-03-29T13:20:25.909Z" }, + { url = "https://files.pythonhosted.org/packages/6e/06/c54062f85f673dd5c04cbe2f14c3acb8c8b95e3384869bb8cc9bff8cb9df/numpy-2.4.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f169b9a863d34f5d11b8698ead99febeaa17a13ca044961aa8e2662a6c7766a0", size = 16684353, upload-time = "2026-03-29T13:20:29.504Z" }, + { url = "https://files.pythonhosted.org/packages/4c/39/8a320264a84404c74cc7e79715de85d6130fa07a0898f67fb5cd5bd79908/numpy-2.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2483e4584a1cb3092da4470b38866634bafb223cbcd551ee047633fd2584599a", size = 14704914, upload-time = "2026-03-29T13:20:33.547Z" }, + { url = "https://files.pythonhosted.org/packages/91/fb/287076b2614e1d1044235f50f03748f31fa287e3dbe6abeb35cdfa351eca/numpy-2.4.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:2d19e6e2095506d1736b7d80595e0f252d76b89f5e715c35e06e937679ea7d7a", size = 5210005, upload-time = "2026-03-29T13:20:36.45Z" }, + { url = "https://files.pythonhosted.org/packages/63/eb/fcc338595309910de6ecabfcef2419a9ce24399680bfb149421fa2df1280/numpy-2.4.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:6a246d5914aa1c820c9443ddcee9c02bec3e203b0c080349533fae17727dfd1b", size = 6544974, upload-time = "2026-03-29T13:20:39.014Z" }, + { url = "https://files.pythonhosted.org/packages/44/5d/e7e9044032a716cdfaa3fba27a8e874bf1c5f1912a1ddd4ed071bf8a14a6/numpy-2.4.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:989824e9faf85f96ec9c7761cd8d29c531ad857bfa1daa930cba85baaecf1a9a", size = 15684591, upload-time = "2026-03-29T13:20:42.146Z" }, + { url = "https://files.pythonhosted.org/packages/98/7c/21252050676612625449b4807d6b695b9ce8a7c9e1c197ee6216c8a65c7c/numpy-2.4.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27a8d92cd10f1382a67d7cf4db7ce18341b66438bdd9f691d7b0e48d104c2a9d", size = 16637700, upload-time = "2026-03-29T13:20:46.204Z" }, + { url = "https://files.pythonhosted.org/packages/b1/29/56d2bbef9465db24ef25393383d761a1af4f446a1df9b8cded4fe3a5a5d7/numpy-2.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e44319a2953c738205bf3354537979eaa3998ed673395b964c1176083dd46252", size = 17035781, upload-time = "2026-03-29T13:20:50.242Z" }, + { url = "https://files.pythonhosted.org/packages/e3/2b/a35a6d7589d21f44cea7d0a98de5ddcbb3d421b2622a5c96b1edf18707c3/numpy-2.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e892aff75639bbef0d2a2cfd55535510df26ff92f63c92cd84ef8d4ba5a5557f", size = 18362959, upload-time = "2026-03-29T13:20:54.019Z" }, + { url = "https://files.pythonhosted.org/packages/64/c9/d52ec581f2390e0f5f85cbfd80fb83d965fc15e9f0e1aec2195faa142cde/numpy-2.4.4-cp314-cp314-win32.whl", hash = "sha256:1378871da56ca8943c2ba674530924bb8ca40cd228358a3b5f302ad60cf875fc", size = 6008768, upload-time = "2026-03-29T13:20:56.912Z" }, + { url = "https://files.pythonhosted.org/packages/fa/22/4cc31a62a6c7b74a8730e31a4274c5dc80e005751e277a2ce38e675e4923/numpy-2.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:715d1c092715954784bc79e1174fc2a90093dc4dc84ea15eb14dad8abdcdeb74", size = 12449181, upload-time = "2026-03-29T13:20:59.548Z" }, + { url = "https://files.pythonhosted.org/packages/70/2e/14cda6f4d8e396c612d1bf97f22958e92148801d7e4f110cabebdc0eef4b/numpy-2.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:2c194dd721e54ecad9ad387c1d35e63dce5c4450c6dc7dd5611283dda239aabb", size = 10496035, upload-time = "2026-03-29T13:21:02.524Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e8/8fed8c8d848d7ecea092dc3469643f9d10bc3a134a815a3b033da1d2039b/numpy-2.4.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2aa0613a5177c264ff5921051a5719d20095ea586ca88cc802c5c218d1c67d3e", size = 14824958, upload-time = "2026-03-29T13:21:05.671Z" }, + { url = "https://files.pythonhosted.org/packages/05/1a/d8007a5138c179c2bf33ef44503e83d70434d2642877ee8fbb230e7c0548/numpy-2.4.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:42c16925aa5a02362f986765f9ebabf20de75cdefdca827d14315c568dcab113", size = 5330020, upload-time = "2026-03-29T13:21:08.635Z" }, + { url = "https://files.pythonhosted.org/packages/99/64/ffb99ac6ae93faf117bcbd5c7ba48a7f45364a33e8e458545d3633615dda/numpy-2.4.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:874f200b2a981c647340f841730fc3a2b54c9d940566a3c4149099591e2c4c3d", size = 6650758, upload-time = "2026-03-29T13:21:10.949Z" }, + { url = "https://files.pythonhosted.org/packages/6e/6e/795cc078b78a384052e73b2f6281ff7a700e9bf53bcce2ee579d4f6dd879/numpy-2.4.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9b39d38a9bd2ae1becd7eac1303d031c5c110ad31f2b319c6e7d98b135c934d", size = 15729948, upload-time = "2026-03-29T13:21:14.047Z" }, + { url = "https://files.pythonhosted.org/packages/5f/86/2acbda8cc2af5f3d7bfc791192863b9e3e19674da7b5e533fded124d1299/numpy-2.4.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b268594bccac7d7cf5844c7732e3f20c50921d94e36d7ec9b79e9857694b1b2f", size = 16679325, upload-time = "2026-03-29T13:21:17.561Z" }, + { url = "https://files.pythonhosted.org/packages/bc/59/cafd83018f4aa55e0ac6fa92aa066c0a1877b77a615ceff1711c260ffae8/numpy-2.4.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ac6b31e35612a26483e20750126d30d0941f949426974cace8e6b5c58a3657b0", size = 17084883, upload-time = "2026-03-29T13:21:21.106Z" }, + { url = "https://files.pythonhosted.org/packages/f0/85/a42548db84e65ece46ab2caea3d3f78b416a47af387fcbb47ec28e660dc2/numpy-2.4.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8e3ed142f2728df44263aaf5fb1f5b0b99f4070c553a0d7f033be65338329150", size = 18403474, upload-time = "2026-03-29T13:21:24.828Z" }, + { url = "https://files.pythonhosted.org/packages/ed/ad/483d9e262f4b831000062e5d8a45e342166ec8aaa1195264982bca267e62/numpy-2.4.4-cp314-cp314t-win32.whl", hash = "sha256:dddbbd259598d7240b18c9d87c56a9d2fb3b02fe266f49a7c101532e78c1d871", size = 6155500, upload-time = "2026-03-29T13:21:28.205Z" }, + { url = "https://files.pythonhosted.org/packages/c7/03/2fc4e14c7bd4ff2964b74ba90ecb8552540b6315f201df70f137faa5c589/numpy-2.4.4-cp314-cp314t-win_amd64.whl", hash = "sha256:a7164afb23be6e37ad90b2f10426149fd75aee07ca55653d2aa41e66c4ef697e", size = 12637755, upload-time = "2026-03-29T13:21:31.107Z" }, + { url = "https://files.pythonhosted.org/packages/58/78/548fb8e07b1a341746bfbecb32f2c268470f45fa028aacdbd10d9bc73aab/numpy-2.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:ba203255017337d39f89bdd58417f03c4426f12beed0440cfd933cb15f8669c7", size = 10566643, upload-time = "2026-03-29T13:21:34.339Z" }, + { url = "https://files.pythonhosted.org/packages/6b/33/8fae8f964a4f63ed528264ddf25d2b683d0b663e3cba26961eb838a7c1bd/numpy-2.4.4-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:58c8b5929fcb8287cbd6f0a3fae19c6e03a5c48402ae792962ac465224a629a4", size = 16854491, upload-time = "2026-03-29T13:21:38.03Z" }, + { url = "https://files.pythonhosted.org/packages/bc/d0/1aabee441380b981cf8cdda3ae7a46aa827d1b5a8cce84d14598bc94d6d9/numpy-2.4.4-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:eea7ac5d2dce4189771cedb559c738a71512768210dc4e4753b107a2048b3d0e", size = 14895830, upload-time = "2026-03-29T13:21:41.509Z" }, + { url = "https://files.pythonhosted.org/packages/a5/b8/aafb0d1065416894fccf4df6b49ef22b8db045187949545bced89c034b8e/numpy-2.4.4-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:51fc224f7ca4d92656d5a5eb315f12eb5fe2c97a66249aa7b5f562528a3be38c", size = 5400927, upload-time = "2026-03-29T13:21:44.747Z" }, + { url = "https://files.pythonhosted.org/packages/d6/77/063baa20b08b431038c7f9ff5435540c7b7265c78cf56012a483019ca72d/numpy-2.4.4-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:28a650663f7314afc3e6ec620f44f333c386aad9f6fc472030865dc0ebb26ee3", size = 6715557, upload-time = "2026-03-29T13:21:47.406Z" }, + { url = "https://files.pythonhosted.org/packages/c7/a8/379542d45a14f149444c5c4c4e7714707239ce9cc1de8c2803958889da14/numpy-2.4.4-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:19710a9ca9992d7174e9c52f643d4272dcd1558c5f7af7f6f8190f633bd651a7", size = 15804253, upload-time = "2026-03-29T13:21:50.753Z" }, + { url = "https://files.pythonhosted.org/packages/a2/c8/f0a45426d6d21e7ea3310a15cf90c43a14d9232c31a837702dba437f3373/numpy-2.4.4-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9b2aec6af35c113b05695ebb5749a787acd63cafc83086a05771d1e1cd1e555f", size = 16753552, upload-time = "2026-03-29T13:21:54.344Z" }, + { url = "https://files.pythonhosted.org/packages/04/74/f4c001f4714c3ad9ce037e18cf2b9c64871a84951eaa0baf683a9ca9301c/numpy-2.4.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f2cf083b324a467e1ab358c105f6cad5ea950f50524668a80c486ff1db24e119", size = 12509075, upload-time = "2026-03-29T13:21:57.644Z" }, +] + +[[package]] +name = "nvidia-cublas" +version = "13.1.0.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/a5/fce49e2ae977e0ccc084e5adafceb4f0ac0c8333cb6863501618a7277f67/nvidia_cublas-13.1.0.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c86fc7f7ae36d7528288c5d88098edcb7b02c633d262e7ddbb86b0ad91be5df2", size = 542851226, upload-time = "2025-10-09T08:59:04.818Z" }, + { url = "https://files.pythonhosted.org/packages/e7/44/423ac00af4dd95a5aeb27207e2c0d9b7118702149bf4704c3ddb55bb7429/nvidia_cublas-13.1.0.3-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:ee8722c1f0145ab246bccb9e452153b5e0515fd094c3678df50b2a0888b8b171", size = 423133236, upload-time = "2025-10-09T08:59:32.536Z" }, +] + +[[package]] +name = "nvidia-cuda-cupti" +version = "13.0.85" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/2a/80353b103fc20ce05ef51e928daed4b6015db4aaa9162ed0997090fe2250/nvidia_cuda_cupti-13.0.85-py3-none-manylinux_2_25_aarch64.whl", hash = "sha256:796bd679890ee55fb14a94629b698b6db54bcfd833d391d5e94017dd9d7d3151", size = 10310827, upload-time = "2025-09-04T08:26:42.012Z" }, + { url = "https://files.pythonhosted.org/packages/33/6d/737d164b4837a9bbd202f5ae3078975f0525a55730fe871d8ed4e3b952b0/nvidia_cuda_cupti-13.0.85-py3-none-manylinux_2_25_x86_64.whl", hash = "sha256:4eb01c08e859bf924d222250d2e8f8b8ff6d3db4721288cf35d14252a4d933c8", size = 10715597, upload-time = "2025-09-04T08:26:51.312Z" }, +] + +[[package]] +name = "nvidia-cuda-nvrtc" +version = "13.0.88" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/68/483a78f5e8f31b08fb1bb671559968c0ca3a065ac7acabfc7cee55214fd6/nvidia_cuda_nvrtc-13.0.88-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:ad9b6d2ead2435f11cbb6868809d2adeeee302e9bb94bcf0539c7a40d80e8575", size = 90215200, upload-time = "2025-09-04T08:28:44.204Z" }, + { url = "https://files.pythonhosted.org/packages/b7/dc/6bb80850e0b7edd6588d560758f17e0550893a1feaf436807d64d2da040f/nvidia_cuda_nvrtc-13.0.88-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d27f20a0ca67a4bb34268a5e951033496c5b74870b868bacd046b1b8e0c3267b", size = 43015449, upload-time = "2025-09-04T08:28:20.239Z" }, +] + +[[package]] +name = "nvidia-cuda-runtime" +version = "13.0.96" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/4f/17d7b9b8e285199c58ce28e31b5c5bbaa4d8271af06a89b6405258245de2/nvidia_cuda_runtime-13.0.96-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ef9bcbe90493a2b9d810e43d249adb3d02e98dd30200d86607d8d02687c43f55", size = 2261060, upload-time = "2025-10-09T08:55:15.78Z" }, + { url = "https://files.pythonhosted.org/packages/2e/24/d1558f3b68b1d26e706813b1d10aa1d785e4698c425af8db8edc3dced472/nvidia_cuda_runtime-13.0.96-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7f82250d7782aa23b6cfe765ecc7db554bd3c2870c43f3d1821f1d18aebf0548", size = 2243632, upload-time = "2025-10-09T08:55:36.117Z" }, +] + +[[package]] +name = "nvidia-cudnn-cu13" +version = "9.19.0.56" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-cublas" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/84/26025437c1e6b61a707442184fa0c03d083b661adf3a3eecfd6d21677740/nvidia_cudnn_cu13-9.19.0.56-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:6ed29ffaee1176c612daf442e4dd6cfeb6a0caa43ddcbeb59da94953030b1be4", size = 433781201, upload-time = "2026-02-03T20:40:53.805Z" }, + { url = "https://files.pythonhosted.org/packages/a3/22/0b4b932655d17a6da1b92fa92ab12844b053bb2ac2475e179ba6f043da1e/nvidia_cudnn_cu13-9.19.0.56-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:d20e1734305e9d68889a96e3f35094d733ff1f83932ebe462753973e53a572bf", size = 366066321, upload-time = "2026-02-03T20:44:52.837Z" }, +] + +[[package]] +name = "nvidia-cufft" +version = "12.0.0.61" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-nvjitlink" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/ae/f417a75c0259e85c1d2f83ca4e960289a5f814ed0cea74d18c353d3e989d/nvidia_cufft-12.0.0.61-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2708c852ef8cd89d1d2068bdbece0aa188813a0c934db3779b9b1faa8442e5f5", size = 214053554, upload-time = "2025-09-04T08:31:38.196Z" }, + { url = "https://files.pythonhosted.org/packages/a8/2f/7b57e29836ea8714f81e9898409196f47d772d5ddedddf1592eadb8ab743/nvidia_cufft-12.0.0.61-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6c44f692dce8fd5ffd3e3df134b6cdb9c2f72d99cf40b62c32dde45eea9ddad3", size = 214085489, upload-time = "2025-09-04T08:31:56.044Z" }, +] + +[[package]] +name = "nvidia-cufile" +version = "1.15.1.6" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/70/4f193de89a48b71714e74602ee14d04e4019ad36a5a9f20c425776e72cd6/nvidia_cufile-1.15.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08a3ecefae5a01c7f5117351c64f17c7c62efa5fffdbe24fc7d298da19cd0b44", size = 1223672, upload-time = "2025-09-04T08:32:22.779Z" }, + { url = "https://files.pythonhosted.org/packages/ab/73/cc4a14c9813a8a0d509417cf5f4bdaba76e924d58beb9864f5a7baceefbf/nvidia_cufile-1.15.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:bdc0deedc61f548bddf7733bdc216456c2fdb101d020e1ab4b88d232d5e2f6d1", size = 1136992, upload-time = "2025-09-04T08:32:14.119Z" }, +] + +[[package]] +name = "nvidia-curand" +version = "10.4.0.35" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/72/7c2ae24fb6b63a32e6ae5d241cc65263ea18d08802aaae087d9f013335a2/nvidia_curand-10.4.0.35-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:133df5a7509c3e292aaa2b477afd0194f06ce4ea24d714d616ff36439cee349a", size = 61962106, upload-time = "2025-08-04T10:21:41.128Z" }, + { url = "https://files.pythonhosted.org/packages/a5/9f/be0a41ca4a4917abf5cb9ae0daff1a6060cc5de950aec0396de9f3b52bc5/nvidia_curand-10.4.0.35-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:1aee33a5da6e1db083fe2b90082def8915f30f3248d5896bcec36a579d941bfc", size = 59544258, upload-time = "2025-08-04T10:22:03.992Z" }, +] + +[[package]] +name = "nvidia-cusolver" +version = "12.0.4.66" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-cublas" }, + { name = "nvidia-cusparse" }, + { name = "nvidia-nvjitlink" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/c3/b30c9e935fc01e3da443ec0116ed1b2a009bb867f5324d3f2d7e533e776b/nvidia_cusolver-12.0.4.66-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:02c2457eaa9e39de20f880f4bd8820e6a1cfb9f9a34f820eb12a155aa5bc92d2", size = 223467760, upload-time = "2025-09-04T08:33:04.222Z" }, + { url = "https://files.pythonhosted.org/packages/5f/67/cba3777620cdacb99102da4042883709c41c709f4b6323c10781a9c3aa34/nvidia_cusolver-12.0.4.66-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:0a759da5dea5c0ea10fd307de75cdeb59e7ea4fcb8add0924859b944babf1112", size = 200941980, upload-time = "2025-09-04T08:33:22.767Z" }, +] + +[[package]] +name = "nvidia-cusparse" +version = "12.6.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-nvjitlink" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/94/5c26f33738ae35276672f12615a64bd008ed5be6d1ebcb23579285d960a9/nvidia_cusparse-12.6.3.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:80bcc4662f23f1054ee334a15c72b8940402975e0eab63178fc7e670aa59472c", size = 162155568, upload-time = "2025-09-04T08:33:42.864Z" }, + { url = "https://files.pythonhosted.org/packages/fa/18/623c77619c31d62efd55302939756966f3ecc8d724a14dab2b75f1508850/nvidia_cusparse-12.6.3.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b3c89c88d01ee0e477cb7f82ef60a11a4bcd57b6b87c33f789350b59759360b", size = 145942937, upload-time = "2025-09-04T08:33:58.029Z" }, +] + +[[package]] +name = "nvidia-cusparselt-cu13" +version = "0.8.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/10/8dcd1175260706a2fc92a16a52e306b71d4c1ea0b0cc4a9484183399818a/nvidia_cusparselt_cu13-0.8.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:400c6ed1cf6780fc6efedd64ec9f1345871767e6a1a0a552a1ea0578117ea77c", size = 220791277, upload-time = "2025-08-13T19:22:40.982Z" }, + { url = "https://files.pythonhosted.org/packages/fd/53/43b0d71f4e702fa9733f8b4571fdca50a8813f1e450b656c239beff12315/nvidia_cusparselt_cu13-0.8.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:25e30a8a7323935d4ad0340b95a0b69926eee755767e8e0b1cf8dd85b197d3fd", size = 169884119, upload-time = "2025-08-13T19:23:41.967Z" }, +] + +[[package]] +name = "nvidia-nccl-cu13" +version = "2.28.9" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/55/1920646a2e43ffd4fc958536b276197ed740e9e0c54105b4bb3521591fc7/nvidia_nccl_cu13-2.28.9-py3-none-manylinux_2_18_aarch64.whl", hash = "sha256:01c873ba1626b54caa12272ed228dc5b2781545e0ae8ba3f432a8ef1c6d78643", size = 196561677, upload-time = "2025-11-18T05:49:03.45Z" }, + { url = "https://files.pythonhosted.org/packages/b0/b4/878fefaad5b2bcc6fcf8d474a25e3e3774bc5133e4b58adff4d0bca238bc/nvidia_nccl_cu13-2.28.9-py3-none-manylinux_2_18_x86_64.whl", hash = "sha256:e4553a30f34195f3fa1da02a6da3d6337d28f2003943aa0a3d247bbc25fefc42", size = 196493177, upload-time = "2025-11-18T05:49:17.677Z" }, +] + +[[package]] +name = "nvidia-nvjitlink" +version = "13.0.88" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/7a/123e033aaff487c77107195fa5a2b8686795ca537935a24efae476c41f05/nvidia_nvjitlink-13.0.88-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:13a74f429e23b921c1109976abefacc69835f2f433ebd323d3946e11d804e47b", size = 40713933, upload-time = "2025-09-04T08:35:43.553Z" }, + { url = "https://files.pythonhosted.org/packages/ab/2c/93c5250e64df4f894f1cbb397c6fd71f79813f9fd79d7cd61de3f97b3c2d/nvidia_nvjitlink-13.0.88-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e931536ccc7d467a98ba1d8b89ff7fa7f1fa3b13f2b0069118cd7f47bff07d0c", size = 38768748, upload-time = "2025-09-04T08:35:20.008Z" }, +] + +[[package]] +name = "nvidia-nvshmem-cu13" +version = "3.4.5" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/0f/05cc9c720236dcd2db9c1ab97fff629e96821be2e63103569da0c9b72f19/nvidia_nvshmem_cu13-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6dc2a197f38e5d0376ad52cd1a2a3617d3cdc150fd5966f4aee9bcebb1d68fe9", size = 60215947, upload-time = "2025-09-06T00:32:20.022Z" }, + { url = "https://files.pythonhosted.org/packages/3c/35/a9bf80a609e74e3b000fef598933235c908fcefcef9026042b8e6dfde2a9/nvidia_nvshmem_cu13-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:290f0a2ee94c9f3687a02502f3b9299a9f9fe826e6d0287ee18482e78d495b80", size = 60412546, upload-time = "2025-09-06T00:32:41.564Z" }, +] + +[[package]] +name = "nvidia-nvtx" +version = "13.0.85" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/f3/d86c845465a2723ad7e1e5c36dcd75ddb82898b3f53be47ebd429fb2fa5d/nvidia_nvtx-13.0.85-py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4936d1d6780fbe68db454f5e72a42ff64d1fd6397df9f363ae786930fd5c1cd4", size = 148047, upload-time = "2025-09-04T08:29:01.761Z" }, + { url = "https://files.pythonhosted.org/packages/a8/64/3708a90d1ebe202ffdeb7185f878a3c84d15c2b2c31858da2ce0583e2def/nvidia_nvtx-13.0.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb7780edb6b14107373c835bf8b72e7a178bac7367e23da7acb108f973f157a6", size = 148878, upload-time = "2025-09-04T08:28:53.627Z" }, +] + +[[package]] +name = "packaging" +version = "26.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/de/0d2b39fb4af88a0258f3bac87dfcbb48e73fbdea4a2ed0e2213f9a4c2f9a/packaging-26.1.tar.gz", hash = "sha256:f042152b681c4bfac5cae2742a55e103d27ab2ec0f3d88037136b6bfe7c9c5de", size = 215519, upload-time = "2026-04-14T21:12:49.362Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/c2/920ef838e2f0028c8262f16101ec09ebd5969864e5a64c4c05fad0617c56/packaging-26.1-py3-none-any.whl", hash = "sha256:5d9c0669c6285e491e0ced2eee587eaf67b670d94a19e94e3984a481aba6802f", size = 95831, upload-time = "2026-04-14T21:12:47.56Z" }, +] + +[[package]] +name = "pygments" +version = "2.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826, upload-time = "2025-09-25T21:31:58.655Z" }, + { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577, upload-time = "2025-09-25T21:32:00.088Z" }, + { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556, upload-time = "2025-09-25T21:32:01.31Z" }, + { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114, upload-time = "2025-09-25T21:32:03.376Z" }, + { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638, upload-time = "2025-09-25T21:32:04.553Z" }, + { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463, upload-time = "2025-09-25T21:32:06.152Z" }, + { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986, upload-time = "2025-09-25T21:32:07.367Z" }, + { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543, upload-time = "2025-09-25T21:32:08.95Z" }, + { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" }, + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, +] + +[[package]] +name = "regex" +version = "2026.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/0e/3a246dbf05666918bd3664d9d787f84a9108f6f43cc953a077e4a7dfdb7e/regex-2026.4.4.tar.gz", hash = "sha256:e08270659717f6973523ce3afbafa53515c4dc5dcad637dc215b6fd50f689423", size = 416000, upload-time = "2026-04-03T20:56:28.155Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/7a/617356cbecdb452812a5d42f720d6d5096b360d4a4c1073af700ea140ad2/regex-2026.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b4c36a85b00fadb85db9d9e90144af0a980e1a3d2ef9cd0f8a5bef88054657c6", size = 489415, upload-time = "2026-04-03T20:53:11.645Z" }, + { url = "https://files.pythonhosted.org/packages/20/e6/bf057227144d02e3ba758b66649e87531d744dda5f3254f48660f18ae9d8/regex-2026.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dcb5453ecf9cd58b562967badd1edbf092b0588a3af9e32ee3d05c985077ce87", size = 291205, upload-time = "2026-04-03T20:53:13.289Z" }, + { url = "https://files.pythonhosted.org/packages/eb/3b/637181b787dd1a820ba1c712cee2b4144cd84a32dc776ca067b12b2d70c8/regex-2026.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6aa809ed4dc3706cc38594d67e641601bd2f36d5555b2780ff074edfcb136cf8", size = 289225, upload-time = "2026-04-03T20:53:16.002Z" }, + { url = "https://files.pythonhosted.org/packages/05/21/bac05d806ed02cd4b39d9c8e5b5f9a2998c94c3a351b7792e80671fa5315/regex-2026.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33424f5188a7db12958246a54f59a435b6cb62c5cf9c8d71f7cc49475a5fdada", size = 792434, upload-time = "2026-04-03T20:53:17.414Z" }, + { url = "https://files.pythonhosted.org/packages/d9/17/c65d1d8ae90b772d5758eb4014e1e011bb2db353fc4455432e6cc9100df7/regex-2026.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7d346fccdde28abba117cc9edc696b9518c3307fbfcb689e549d9b5979018c6d", size = 861730, upload-time = "2026-04-03T20:53:18.903Z" }, + { url = "https://files.pythonhosted.org/packages/ad/64/933321aa082a2c6ee2785f22776143ba89840189c20d3b6b1d12b6aae16b/regex-2026.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:415a994b536440f5011aa77e50a4274d15da3245e876e5c7f19da349caaedd87", size = 906495, upload-time = "2026-04-03T20:53:20.561Z" }, + { url = "https://files.pythonhosted.org/packages/01/ea/4c8d306e9c36ac22417336b1e02e7b358152c34dc379673f2d331143725f/regex-2026.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21e5eb86179b4c67b5759d452ea7c48eb135cd93308e7a260aa489ed2eb423a4", size = 799810, upload-time = "2026-04-03T20:53:22.961Z" }, + { url = "https://files.pythonhosted.org/packages/29/ce/7605048f00e1379eba89d610c7d644d8f695dc9b26d3b6ecfa3132b872ff/regex-2026.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:312ec9dd1ae7d96abd8c5a36a552b2139931914407d26fba723f9e53c8186f86", size = 774242, upload-time = "2026-04-03T20:53:25.015Z" }, + { url = "https://files.pythonhosted.org/packages/e9/77/283e0d5023fde22cd9e86190d6d9beb21590a452b195ffe00274de470691/regex-2026.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a0d2b28aa1354c7cd7f71b7658c4326f7facac106edd7f40eda984424229fd59", size = 781257, upload-time = "2026-04-03T20:53:26.918Z" }, + { url = "https://files.pythonhosted.org/packages/8b/fb/7f3b772be101373c8626ed34c5d727dcbb8abd42a7b1219bc25fd9a3cc04/regex-2026.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:349d7310eddff40429a099c08d995c6d4a4bfaf3ff40bd3b5e5cb5a5a3c7d453", size = 854490, upload-time = "2026-04-03T20:53:29.065Z" }, + { url = "https://files.pythonhosted.org/packages/85/30/56547b80f34f4dd2986e1cdd63b1712932f63b6c4ce2f79c50a6cd79d1c2/regex-2026.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:e7ab63e9fe45a9ec3417509e18116b367e89c9ceb6219222a3396fa30b147f80", size = 763544, upload-time = "2026-04-03T20:53:30.917Z" }, + { url = "https://files.pythonhosted.org/packages/ac/2f/ce060fdfea8eff34a8997603532e44cdb7d1f35e3bc253612a8707a90538/regex-2026.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fe896e07a5a2462308297e515c0054e9ec2dd18dfdc9427b19900b37dfe6f40b", size = 844442, upload-time = "2026-04-03T20:53:32.463Z" }, + { url = "https://files.pythonhosted.org/packages/e5/44/810cb113096a1dacbe82789fbfab2823f79d19b7f1271acecb7009ba9b88/regex-2026.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:eb59c65069498dbae3c0ef07bbe224e1eaa079825a437fb47a479f0af11f774f", size = 789162, upload-time = "2026-04-03T20:53:34.039Z" }, + { url = "https://files.pythonhosted.org/packages/20/96/9647dd7f2ecf6d9ce1fb04dfdb66910d094e10d8fe53e9c15096d8aa0bd2/regex-2026.4.4-cp311-cp311-win32.whl", hash = "sha256:2a5d273181b560ef8397c8825f2b9d57013de744da9e8257b8467e5da8599351", size = 266227, upload-time = "2026-04-03T20:53:35.601Z" }, + { url = "https://files.pythonhosted.org/packages/33/80/74e13262460530c3097ff343a17de9a34d040a5dc4de9cf3a8241faab51c/regex-2026.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:9542ccc1e689e752594309444081582f7be2fdb2df75acafea8a075108566735", size = 278399, upload-time = "2026-04-03T20:53:37.021Z" }, + { url = "https://files.pythonhosted.org/packages/1c/3c/39f19f47f19dcefa3403f09d13562ca1c0fd07ab54db2bc03148f3f6b46a/regex-2026.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:b5f9fb784824a042be3455b53d0b112655686fdb7a91f88f095f3fee1e2a2a54", size = 270473, upload-time = "2026-04-03T20:53:38.633Z" }, + { url = "https://files.pythonhosted.org/packages/e5/28/b972a4d3df61e1d7bcf1b59fdb3cddef22f88b6be43f161bb41ebc0e4081/regex-2026.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c07ab8794fa929e58d97a0e1796b8b76f70943fa39df225ac9964615cf1f9d52", size = 490434, upload-time = "2026-04-03T20:53:40.219Z" }, + { url = "https://files.pythonhosted.org/packages/84/20/30041446cf6dc3e0eab344fc62770e84c23b6b68a3b657821f9f80cb69b4/regex-2026.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2c785939dc023a1ce4ec09599c032cc9933d258a998d16ca6f2b596c010940eb", size = 292061, upload-time = "2026-04-03T20:53:41.862Z" }, + { url = "https://files.pythonhosted.org/packages/62/c8/3baa06d75c98c46d4cc4262b71fd2edb9062b5665e868bca57859dadf93a/regex-2026.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1b1ce5c81c9114f1ce2f9288a51a8fd3aeea33a0cc440c415bf02da323aa0a76", size = 289628, upload-time = "2026-04-03T20:53:43.701Z" }, + { url = "https://files.pythonhosted.org/packages/31/87/3accf55634caad8c0acab23f5135ef7d4a21c39f28c55c816ae012931408/regex-2026.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:760ef21c17d8e6a4fe8cf406a97cf2806a4df93416ccc82fc98d25b1c20425be", size = 796651, upload-time = "2026-04-03T20:53:45.379Z" }, + { url = "https://files.pythonhosted.org/packages/f6/0c/aaa2c83f34efedbf06f61cb1942c25f6cf1ee3b200f832c4d05f28306c2e/regex-2026.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7088fcdcb604a4417c208e2169715800d28838fefd7455fbe40416231d1d47c1", size = 865916, upload-time = "2026-04-03T20:53:47.064Z" }, + { url = "https://files.pythonhosted.org/packages/d9/f6/8c6924c865124643e8f37823eca845dc27ac509b2ee58123685e71cd0279/regex-2026.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:07edca1ba687998968f7db5bc355288d0c6505caa7374f013d27356d93976d13", size = 912287, upload-time = "2026-04-03T20:53:49.422Z" }, + { url = "https://files.pythonhosted.org/packages/11/0e/a9f6f81013e0deaf559b25711623864970fe6a098314e374ccb1540a4152/regex-2026.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:993f657a7c1c6ec51b5e0ba97c9817d06b84ea5fa8d82e43b9405de0defdc2b9", size = 801126, upload-time = "2026-04-03T20:53:51.096Z" }, + { url = "https://files.pythonhosted.org/packages/71/61/3a0cc8af2dc0c8deb48e644dd2521f173f7e6513c6e195aad9aa8dd77ac5/regex-2026.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:2b69102a743e7569ebee67e634a69c4cb7e59d6fa2e1aa7d3bdbf3f61435f62d", size = 776788, upload-time = "2026-04-03T20:53:52.889Z" }, + { url = "https://files.pythonhosted.org/packages/64/0b/8bb9cbf21ef7dee58e49b0fdb066a7aded146c823202e16494a36777594f/regex-2026.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dac006c8b6dda72d86ea3d1333d45147de79a3a3f26f10c1cf9287ca4ca0ac3", size = 785184, upload-time = "2026-04-03T20:53:55.627Z" }, + { url = "https://files.pythonhosted.org/packages/99/c2/d3e80e8137b25ee06c92627de4e4d98b94830e02b3e6f81f3d2e3f504cf5/regex-2026.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:50a766ee2010d504554bfb5f578ed2e066898aa26411d57e6296230627cdefa0", size = 859913, upload-time = "2026-04-03T20:53:57.249Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/9d5d876157d969c804622456ef250017ac7a8f83e0e14f903b9e6df5ce95/regex-2026.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:9e2f5217648f68e3028c823df58663587c1507a5ba8419f4fdfc8a461be76043", size = 765732, upload-time = "2026-04-03T20:53:59.428Z" }, + { url = "https://files.pythonhosted.org/packages/82/80/b568935b4421388561c8ed42aff77247285d3ae3bb2a6ca22af63bae805e/regex-2026.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:39d8de85a08e32632974151ba59c6e9140646dcc36c80423962b1c5c0a92e244", size = 852152, upload-time = "2026-04-03T20:54:01.505Z" }, + { url = "https://files.pythonhosted.org/packages/39/29/f0f81217e21cd998245da047405366385d5c6072048038a3d33b37a79dc0/regex-2026.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:55d9304e0e7178dfb1e106c33edf834097ddf4a890e2f676f6c5118f84390f73", size = 789076, upload-time = "2026-04-03T20:54:03.323Z" }, + { url = "https://files.pythonhosted.org/packages/49/1d/1d957a61976ab9d4e767dd4f9d04b66cc0c41c5e36cf40e2d43688b5ae6f/regex-2026.4.4-cp312-cp312-win32.whl", hash = "sha256:04bb679bc0bde8a7bfb71e991493d47314e7b98380b083df2447cda4b6edb60f", size = 266700, upload-time = "2026-04-03T20:54:05.639Z" }, + { url = "https://files.pythonhosted.org/packages/c5/5c/bf575d396aeb58ea13b06ef2adf624f65b70fafef6950a80fc3da9cae3bc/regex-2026.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:db0ac18435a40a2543dbb3d21e161a6c78e33e8159bd2e009343d224bb03bb1b", size = 277768, upload-time = "2026-04-03T20:54:07.312Z" }, + { url = "https://files.pythonhosted.org/packages/c9/27/049df16ec6a6828ccd72add3c7f54b4df029669bea8e9817df6fff58be90/regex-2026.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:4ce255cc05c1947a12989c6db801c96461947adb7a59990f1360b5983fab4983", size = 270568, upload-time = "2026-04-03T20:54:09.484Z" }, + { url = "https://files.pythonhosted.org/packages/9d/83/c4373bc5f31f2cf4b66f9b7c31005bd87fe66f0dce17701f7db4ee79ee29/regex-2026.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:62f5519042c101762509b1d717b45a69c0139d60414b3c604b81328c01bd1943", size = 490273, upload-time = "2026-04-03T20:54:11.202Z" }, + { url = "https://files.pythonhosted.org/packages/46/f8/fe62afbcc3cf4ad4ac9adeaafd98aa747869ae12d3e8e2ac293d0593c435/regex-2026.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3790ba9fb5dd76715a7afe34dbe603ba03f8820764b1dc929dd08106214ed031", size = 291954, upload-time = "2026-04-03T20:54:13.412Z" }, + { url = "https://files.pythonhosted.org/packages/5a/92/4712b9fe6a33d232eeb1c189484b80c6c4b8422b90e766e1195d6e758207/regex-2026.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8fae3c6e795d7678963f2170152b0d892cf6aee9ee8afc8c45e6be38d5107fe7", size = 289487, upload-time = "2026-04-03T20:54:15.824Z" }, + { url = "https://files.pythonhosted.org/packages/88/2c/f83b93f85e01168f1070f045a42d4c937b69fdb8dd7ae82d307253f7e36e/regex-2026.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:298c3ec2d53225b3bf91142eb9691025bab610e0c0c51592dde149db679b3d17", size = 796646, upload-time = "2026-04-03T20:54:18.229Z" }, + { url = "https://files.pythonhosted.org/packages/df/55/61a2e17bf0c4dc57e11caf8dd11771280d8aaa361785f9e3bc40d653f4a7/regex-2026.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e9638791082eaf5b3ac112c587518ee78e083a11c4b28012d8fe2a0f536dfb17", size = 865904, upload-time = "2026-04-03T20:54:20.019Z" }, + { url = "https://files.pythonhosted.org/packages/45/32/1ac8ed1b5a346b5993a3d256abe0a0f03b0b73c8cc88d928537368ac65b6/regex-2026.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae3e764bd4c5ff55035dc82a8d49acceb42a5298edf6eb2fc4d328ee5dd7afae", size = 912304, upload-time = "2026-04-03T20:54:22.403Z" }, + { url = "https://files.pythonhosted.org/packages/26/47/2ee5c613ab546f0eddebf9905d23e07beb933416b1246c2d8791d01979b4/regex-2026.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ffa81f81b80047ba89a3c69ae6a0f78d06f4a42ce5126b0eb2a0a10ad44e0b2e", size = 801126, upload-time = "2026-04-03T20:54:24.308Z" }, + { url = "https://files.pythonhosted.org/packages/75/cd/41dacd129ca9fd20bd7d02f83e0fad83e034ac8a084ec369c90f55ef37e2/regex-2026.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f56ebf9d70305307a707911b88469213630aba821e77de7d603f9d2f0730687d", size = 776772, upload-time = "2026-04-03T20:54:26.319Z" }, + { url = "https://files.pythonhosted.org/packages/89/6d/5af0b588174cb5f46041fa7dd64d3fd5cd2fe51f18766703d1edc387f324/regex-2026.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:773d1dfd652bbffb09336abf890bfd64785c7463716bf766d0eb3bc19c8b7f27", size = 785228, upload-time = "2026-04-03T20:54:28.387Z" }, + { url = "https://files.pythonhosted.org/packages/b7/3b/f5a72b7045bd59575fc33bf1345f156fcfd5a8484aea6ad84b12c5a82114/regex-2026.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d51d20befd5275d092cdffba57ded05f3c436317ee56466c8928ac32d960edaf", size = 860032, upload-time = "2026-04-03T20:54:30.641Z" }, + { url = "https://files.pythonhosted.org/packages/39/a4/72a317003d6fcd7a573584a85f59f525dfe8f67e355ca74eb6b53d66a5e2/regex-2026.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:0a51cdb3c1e9161154f976cb2bef9894bc063ac82f31b733087ffb8e880137d0", size = 765714, upload-time = "2026-04-03T20:54:32.789Z" }, + { url = "https://files.pythonhosted.org/packages/25/1e/5672e16f34dbbcb2560cc7e6a2fbb26dfa8b270711e730101da4423d3973/regex-2026.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ae5266a82596114e41fb5302140e9630204c1b5f325c770bec654b95dd54b0aa", size = 852078, upload-time = "2026-04-03T20:54:34.546Z" }, + { url = "https://files.pythonhosted.org/packages/f7/0d/c813f0af7c6cc7ed7b9558bac2e5120b60ad0fa48f813e4d4bd55446f214/regex-2026.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c882cd92ec68585e9c1cf36c447ec846c0d94edd706fe59e0c198e65822fd23b", size = 789181, upload-time = "2026-04-03T20:54:36.642Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6d/a344608d1adbd2a95090ddd906cec09a11be0e6517e878d02a5123e0917f/regex-2026.4.4-cp313-cp313-win32.whl", hash = "sha256:05568c4fbf3cb4fa9e28e3af198c40d3237cf6041608a9022285fe567ec3ad62", size = 266690, upload-time = "2026-04-03T20:54:38.343Z" }, + { url = "https://files.pythonhosted.org/packages/31/07/54049f89b46235ca6f45cd6c88668a7050e77d4a15555e47dd40fde75263/regex-2026.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:3384df51ed52db0bea967e21458ab0a414f67cdddfd94401688274e55147bb81", size = 277733, upload-time = "2026-04-03T20:54:40.11Z" }, + { url = "https://files.pythonhosted.org/packages/0e/21/61366a8e20f4d43fb597708cac7f0e2baadb491ecc9549b4980b2be27d16/regex-2026.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:acd38177bd2c8e69a411d6521760806042e244d0ef94e2dd03ecdaa8a3c99427", size = 270565, upload-time = "2026-04-03T20:54:41.883Z" }, + { url = "https://files.pythonhosted.org/packages/f1/1e/3a2b9672433bef02f5d39aa1143ca2c08f311c1d041c464a42be9ae648dc/regex-2026.4.4-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:f94a11a9d05afcfcfa640e096319720a19cc0c9f7768e1a61fceee6a3afc6c7c", size = 494126, upload-time = "2026-04-03T20:54:43.602Z" }, + { url = "https://files.pythonhosted.org/packages/4e/4b/c132a4f4fe18ad3340d89fcb56235132b69559136036b845be3c073142ed/regex-2026.4.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:36bcb9d6d1307ab629edc553775baada2aefa5c50ccc0215fbfd2afcfff43141", size = 293882, upload-time = "2026-04-03T20:54:45.41Z" }, + { url = "https://files.pythonhosted.org/packages/f4/5f/eaa38092ce7a023656280f2341dbbd4ad5f05d780a70abba7bb4f4bea54c/regex-2026.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:261c015b3e2ed0919157046d768774ecde57f03d8fa4ba78d29793447f70e717", size = 292334, upload-time = "2026-04-03T20:54:47.051Z" }, + { url = "https://files.pythonhosted.org/packages/5f/f6/dd38146af1392dac33db7074ab331cec23cced3759167735c42c5460a243/regex-2026.4.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c228cf65b4a54583763645dcd73819b3b381ca8b4bb1b349dee1c135f4112c07", size = 811691, upload-time = "2026-04-03T20:54:49.074Z" }, + { url = "https://files.pythonhosted.org/packages/7a/f0/dc54c2e69f5eeec50601054998ec3690d5344277e782bd717e49867c1d29/regex-2026.4.4-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dd2630faeb6876fb0c287f664d93ddce4d50cd46c6e88e60378c05c9047e08ca", size = 871227, upload-time = "2026-04-03T20:54:51.035Z" }, + { url = "https://files.pythonhosted.org/packages/a1/af/cb16bd5dc61621e27df919a4449bbb7e5a1034c34d307e0a706e9cc0f3e3/regex-2026.4.4-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6a50ab11b7779b849472337191f3a043e27e17f71555f98d0092fa6d73364520", size = 917435, upload-time = "2026-04-03T20:54:52.994Z" }, + { url = "https://files.pythonhosted.org/packages/5c/71/8b260897f22996b666edd9402861668f45a2ca259f665ac029e6104a2d7d/regex-2026.4.4-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0734f63afe785138549fbe822a8cfeaccd1bae814c5057cc0ed5b9f2de4fc883", size = 816358, upload-time = "2026-04-03T20:54:54.884Z" }, + { url = "https://files.pythonhosted.org/packages/1c/60/775f7f72a510ef238254906c2f3d737fc80b16ca85f07d20e318d2eea894/regex-2026.4.4-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c4ee50606cb1967db7e523224e05f32089101945f859928e65657a2cbb3d278b", size = 785549, upload-time = "2026-04-03T20:54:57.01Z" }, + { url = "https://files.pythonhosted.org/packages/58/42/34d289b3627c03cf381e44da534a0021664188fa49ba41513da0b4ec6776/regex-2026.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6c1818f37be3ca02dcb76d63f2c7aaba4b0dc171b579796c6fbe00148dfec6b1", size = 801364, upload-time = "2026-04-03T20:54:58.981Z" }, + { url = "https://files.pythonhosted.org/packages/fc/20/f6ecf319b382a8f1ab529e898b222c3f30600fcede7834733c26279e7465/regex-2026.4.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f5bfc2741d150d0be3e4a0401a5c22b06e60acb9aa4daa46d9e79a6dcd0f135b", size = 866221, upload-time = "2026-04-03T20:55:00.88Z" }, + { url = "https://files.pythonhosted.org/packages/92/6a/9f16d3609d549bd96d7a0b2aee1625d7512ba6a03efc01652149ef88e74d/regex-2026.4.4-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:504ffa8a03609a087cad81277a629b6ce884b51a24bd388a7980ad61748618ff", size = 772530, upload-time = "2026-04-03T20:55:03.213Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f6/aa9768bc96a4c361ac96419fbaf2dcdc33970bb813df3ba9b09d5d7b6d96/regex-2026.4.4-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:70aadc6ff12e4b444586e57fc30771f86253f9f0045b29016b9605b4be5f7dfb", size = 856989, upload-time = "2026-04-03T20:55:05.087Z" }, + { url = "https://files.pythonhosted.org/packages/4d/b4/c671db3556be2473ae3e4bb7a297c518d281452871501221251ea4ecba57/regex-2026.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f4f83781191007b6ef43b03debc35435f10cad9b96e16d147efe84a1d48bdde4", size = 803241, upload-time = "2026-04-03T20:55:07.162Z" }, + { url = "https://files.pythonhosted.org/packages/2a/5c/83e3b1d89fa4f6e5a1bc97b4abd4a9a97b3c1ac7854164f694f5f0ba98a0/regex-2026.4.4-cp313-cp313t-win32.whl", hash = "sha256:e014a797de43d1847df957c0a2a8e861d1c17547ee08467d1db2c370b7568baa", size = 269921, upload-time = "2026-04-03T20:55:09.62Z" }, + { url = "https://files.pythonhosted.org/packages/28/07/077c387121f42cdb4d92b1301133c0d93b5709d096d1669ab847dda9fe2e/regex-2026.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:b15b88b0d52b179712632832c1d6e58e5774f93717849a41096880442da41ab0", size = 281240, upload-time = "2026-04-03T20:55:11.521Z" }, + { url = "https://files.pythonhosted.org/packages/9d/22/ead4a4abc7c59a4d882662aa292ca02c8b617f30b6e163bc1728879e9353/regex-2026.4.4-cp313-cp313t-win_arm64.whl", hash = "sha256:586b89cdadf7d67bf86ae3342a4dcd2b8d70a832d90c18a0ae955105caf34dbe", size = 272440, upload-time = "2026-04-03T20:55:13.365Z" }, + { url = "https://files.pythonhosted.org/packages/f0/f5/ed97c2dc47b5fbd4b73c0d7d75f9ebc8eca139f2bbef476bba35f28c0a77/regex-2026.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:2da82d643fa698e5e5210e54af90181603d5853cf469f5eedf9bfc8f59b4b8c7", size = 490343, upload-time = "2026-04-03T20:55:15.241Z" }, + { url = "https://files.pythonhosted.org/packages/80/e9/de4828a7385ec166d673a5790ad06ac48cdaa98bc0960108dd4b9cc1aef7/regex-2026.4.4-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:54a1189ad9d9357760557c91103d5e421f0a2dabe68a5cdf9103d0dcf4e00752", size = 291909, upload-time = "2026-04-03T20:55:17.558Z" }, + { url = "https://files.pythonhosted.org/packages/b4/d6/5cfbfc97f3201a4d24b596a77957e092030dcc4205894bc035cedcfce62f/regex-2026.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:76d67d5afb1fe402d10a6403bae668d000441e2ab115191a804287d53b772951", size = 289692, upload-time = "2026-04-03T20:55:20.561Z" }, + { url = "https://files.pythonhosted.org/packages/8e/ac/f2212d9fd56fe897e36d0110ba30ba2d247bd6410c5bd98499c7e5a1e1f2/regex-2026.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e7cd3e4ee8d80447a83bbc9ab0c8459781fa77087f856c3e740d7763be0df27f", size = 796979, upload-time = "2026-04-03T20:55:22.56Z" }, + { url = "https://files.pythonhosted.org/packages/c9/e3/a016c12675fbac988a60c7e1c16e67823ff0bc016beb27bd7a001dbdabc6/regex-2026.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e19e18c568d2866d8b6a6dfad823db86193503f90823a8f66689315ba28fbe8", size = 866744, upload-time = "2026-04-03T20:55:24.646Z" }, + { url = "https://files.pythonhosted.org/packages/af/a4/0b90ca4cf17adc3cb43de80ec71018c37c88ad64987e8d0d481a95ca60b5/regex-2026.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7698a6f38730fd1385d390d1ed07bb13dce39aa616aca6a6d89bea178464b9a4", size = 911613, upload-time = "2026-04-03T20:55:27.033Z" }, + { url = "https://files.pythonhosted.org/packages/8e/3b/2b3dac0b82d41ab43aa87c6ecde63d71189d03fe8854b8ca455a315edac3/regex-2026.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:173a66f3651cdb761018078e2d9487f4cf971232c990035ec0eb1cdc6bf929a9", size = 800551, upload-time = "2026-04-03T20:55:29.532Z" }, + { url = "https://files.pythonhosted.org/packages/25/fe/5365eb7aa0e753c4b5957815c321519ecab033c279c60e1b1ae2367fa810/regex-2026.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa7922bbb2cc84fa062d37723f199d4c0cd200245ce269c05db82d904db66b83", size = 776911, upload-time = "2026-04-03T20:55:31.526Z" }, + { url = "https://files.pythonhosted.org/packages/aa/b3/7fb0072156bba065e3b778a7bc7b0a6328212be5dd6a86fd207e0c4f2dab/regex-2026.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:59f67cd0a0acaf0e564c20bbd7f767286f23e91e2572c5703bf3e56ea7557edb", size = 785751, upload-time = "2026-04-03T20:55:33.797Z" }, + { url = "https://files.pythonhosted.org/packages/02/1a/9f83677eb699273e56e858f7bd95acdbee376d42f59e8bfca2fd80d79df3/regex-2026.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:475e50f3f73f73614f7cba5524d6de49dee269df00272a1b85e3d19f6d498465", size = 860484, upload-time = "2026-04-03T20:55:35.745Z" }, + { url = "https://files.pythonhosted.org/packages/3b/7a/93937507b61cfcff8b4c5857f1b452852b09f741daa9acae15c971d8554e/regex-2026.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:a1c0c7d67b64d85ac2e1879923bad2f08a08f3004055f2f406ef73c850114bd4", size = 765939, upload-time = "2026-04-03T20:55:37.972Z" }, + { url = "https://files.pythonhosted.org/packages/86/ea/81a7f968a351c6552b1670ead861e2a385be730ee28402233020c67f9e0f/regex-2026.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:1371c2ccbb744d66ee63631cc9ca12aa233d5749972626b68fe1a649dd98e566", size = 851417, upload-time = "2026-04-03T20:55:39.92Z" }, + { url = "https://files.pythonhosted.org/packages/4c/7e/323c18ce4b5b8f44517a36342961a0306e931e499febbd876bb149d900f0/regex-2026.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:59968142787042db793348a3f5b918cf24ced1f23247328530e063f89c128a95", size = 789056, upload-time = "2026-04-03T20:55:42.303Z" }, + { url = "https://files.pythonhosted.org/packages/c0/af/e7510f9b11b1913b0cd44eddb784b2d650b2af6515bfce4cffcc5bfd1d38/regex-2026.4.4-cp314-cp314-win32.whl", hash = "sha256:59efe72d37fd5a91e373e5146f187f921f365f4abc1249a5ab446a60f30dd5f8", size = 272130, upload-time = "2026-04-03T20:55:44.995Z" }, + { url = "https://files.pythonhosted.org/packages/9a/51/57dae534c915e2d3a21490e88836fa2ae79dde3b66255ecc0c0a155d2c10/regex-2026.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:e0aab3ff447845049d676827d2ff714aab4f73f340e155b7de7458cf53baa5a4", size = 280992, upload-time = "2026-04-03T20:55:47.316Z" }, + { url = "https://files.pythonhosted.org/packages/0a/5e/abaf9f4c3792e34edb1434f06717fae2b07888d85cb5cec29f9204931bf8/regex-2026.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:a7a5bb6aa0cf62208bb4fa079b0c756734f8ad0e333b425732e8609bd51ee22f", size = 273563, upload-time = "2026-04-03T20:55:49.273Z" }, + { url = "https://files.pythonhosted.org/packages/ff/06/35da85f9f217b9538b99cbb170738993bcc3b23784322decb77619f11502/regex-2026.4.4-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:97850d0638391bdc7d35dc1c1039974dcb921eaafa8cc935ae4d7f272b1d60b3", size = 494191, upload-time = "2026-04-03T20:55:51.258Z" }, + { url = "https://files.pythonhosted.org/packages/54/5b/1bc35f479eef8285c4baf88d8c002023efdeebb7b44a8735b36195486ae7/regex-2026.4.4-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ee7337f88f2a580679f7bbfe69dc86c043954f9f9c541012f49abc554a962f2e", size = 293877, upload-time = "2026-04-03T20:55:53.214Z" }, + { url = "https://files.pythonhosted.org/packages/39/5b/f53b9ad17480b3ddd14c90da04bfb55ac6894b129e5dea87bcaf7d00e336/regex-2026.4.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7429f4e6192c11d659900c0648ba8776243bf396ab95558b8c51a345afeddde6", size = 292410, upload-time = "2026-04-03T20:55:55.736Z" }, + { url = "https://files.pythonhosted.org/packages/bb/56/52377f59f60a7c51aa4161eecf0b6032c20b461805aca051250da435ffc9/regex-2026.4.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4f10fbd5dd13dcf4265b4cc07d69ca70280742870c97ae10093e3d66000359", size = 811831, upload-time = "2026-04-03T20:55:57.802Z" }, + { url = "https://files.pythonhosted.org/packages/dd/63/8026310bf066f702a9c361f83a8c9658f3fe4edb349f9c1e5d5273b7c40c/regex-2026.4.4-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a152560af4f9742b96f3827090f866eeec5becd4765c8e0d3473d9d280e76a5a", size = 871199, upload-time = "2026-04-03T20:56:00.333Z" }, + { url = "https://files.pythonhosted.org/packages/20/9f/a514bbb00a466dbb506d43f187a04047f7be1505f10a9a15615ead5080ee/regex-2026.4.4-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54170b3e95339f415d54651f97df3bff7434a663912f9358237941bbf9143f55", size = 917649, upload-time = "2026-04-03T20:56:02.445Z" }, + { url = "https://files.pythonhosted.org/packages/cb/6b/8399f68dd41a2030218839b9b18360d79b86d22b9fab5ef477c7f23ca67c/regex-2026.4.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:07f190d65f5a72dcb9cf7106bfc3d21e7a49dd2879eda2207b683f32165e4d99", size = 816388, upload-time = "2026-04-03T20:56:04.595Z" }, + { url = "https://files.pythonhosted.org/packages/1e/9c/103963f47c24339a483b05edd568594c2be486188f688c0170fd504b2948/regex-2026.4.4-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9a2741ce5a29d3c84b0b94261ba630ab459a1b847a0d6beca7d62d188175c790", size = 785746, upload-time = "2026-04-03T20:56:07.13Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ee/7f6054c0dec0cee3463c304405e4ff42e27cff05bf36fcb34be549ab17bd/regex-2026.4.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b26c30df3a28fd9793113dac7385a4deb7294a06c0f760dd2b008bd49a9139bc", size = 801483, upload-time = "2026-04-03T20:56:09.365Z" }, + { url = "https://files.pythonhosted.org/packages/30/c2/51d3d941cf6070dc00c3338ecf138615fc3cce0421c3df6abe97a08af61a/regex-2026.4.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:421439d1bee44b19f4583ccf42670ca464ffb90e9fdc38d37f39d1ddd1e44f1f", size = 866331, upload-time = "2026-04-03T20:56:12.039Z" }, + { url = "https://files.pythonhosted.org/packages/16/e8/76d50dcc122ac33927d939f350eebcfe3dbcbda96913e03433fc36de5e63/regex-2026.4.4-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:b40379b53ecbc747fd9bdf4a0ea14eb8188ca1bd0f54f78893a39024b28f4863", size = 772673, upload-time = "2026-04-03T20:56:14.558Z" }, + { url = "https://files.pythonhosted.org/packages/a5/6e/5f6bf75e20ea6873d05ba4ec78378c375cbe08cdec571c83fbb01606e563/regex-2026.4.4-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:08c55c13d2eef54f73eeadc33146fb0baaa49e7335eb1aff6ae1324bf0ddbe4a", size = 857146, upload-time = "2026-04-03T20:56:16.663Z" }, + { url = "https://files.pythonhosted.org/packages/0b/33/3c76d9962949e487ebba353a18e89399f292287204ac8f2f4cfc3a51c233/regex-2026.4.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9776b85f510062f5a75ef112afe5f494ef1635607bf1cc220c1391e9ac2f5e81", size = 803463, upload-time = "2026-04-03T20:56:18.923Z" }, + { url = "https://files.pythonhosted.org/packages/19/eb/ef32dcd2cb69b69bc0c3e55205bce94a7def48d495358946bc42186dcccc/regex-2026.4.4-cp314-cp314t-win32.whl", hash = "sha256:385edaebde5db5be103577afc8699fea73a0e36a734ba24870be7ffa61119d74", size = 275709, upload-time = "2026-04-03T20:56:20.996Z" }, + { url = "https://files.pythonhosted.org/packages/a0/86/c291bf740945acbf35ed7dbebf8e2eea2f3f78041f6bd7cdab80cb274dc0/regex-2026.4.4-cp314-cp314t-win_amd64.whl", hash = "sha256:5d354b18839328927832e2fa5f7c95b7a3ccc39e7a681529e1685898e6436d45", size = 285622, upload-time = "2026-04-03T20:56:23.641Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e7/ec846d560ae6a597115153c02ca6138a7877a1748b2072d9521c10a93e58/regex-2026.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:af0384cb01a33600c49505c27c6c57ab0b27bf84a74e28524c92ca897ebdac9d", size = 275773, upload-time = "2026-04-03T20:56:26.07Z" }, +] + +[[package]] +name = "rich" +version = "15.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680, upload-time = "2026-04-12T08:24:00.75Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, +] + +[[package]] +name = "safetensors" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" }, + { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" }, + { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" }, + { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" }, + { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" }, + { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" }, + { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" }, + { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" }, + { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" }, + { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423, upload-time = "2025-11-19T15:18:45.74Z" }, + { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380, upload-time = "2025-11-19T15:18:44.427Z" }, +] + +[[package]] +name = "setuptools" +version = "81.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/1c/73e719955c59b8e424d015ab450f51c0af856ae46ea2da83eba51cc88de1/setuptools-81.0.0.tar.gz", hash = "sha256:487b53915f52501f0a79ccfd0c02c165ffe06631443a886740b91af4b7a5845a", size = 1198299, upload-time = "2026-02-06T21:10:39.601Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/e3/c164c88b2e5ce7b24d667b9bd83589cf4f3520d97cad01534cd3c4f55fdb/setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6", size = 1062021, upload-time = "2026-02-06T21:10:37.175Z" }, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + +[[package]] +name = "sympy" +version = "1.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mpmath" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, +] + +[[package]] +name = "tokenizers" +version = "0.22.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload-time = "2026-01-05T10:41:02.158Z" }, + { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload-time = "2026-01-05T10:41:00.276Z" }, + { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload-time = "2026-01-05T10:40:32.165Z" }, + { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload-time = "2026-01-05T10:40:38.847Z" }, + { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload-time = "2026-01-05T10:40:56.614Z" }, + { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload-time = "2026-01-05T10:40:44.507Z" }, + { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload-time = "2026-01-05T10:40:51.139Z" }, + { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload-time = "2026-01-05T10:40:58.331Z" }, + { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload-time = "2026-01-05T10:41:04.053Z" }, + { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload-time = "2026-01-05T10:45:10.673Z" }, + { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload-time = "2026-01-05T10:45:12.559Z" }, + { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload-time = "2026-01-05T10:45:14.333Z" }, + { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363, upload-time = "2026-01-05T10:45:20.593Z" }, + { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786, upload-time = "2026-01-05T10:45:18.411Z" }, + { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" }, +] + +[[package]] +name = "torch" +version = "2.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cuda-bindings", marker = "sys_platform == 'linux'" }, + { name = "cuda-toolkit", extra = ["cublas", "cudart", "cufft", "cufile", "cupti", "curand", "cusolver", "cusparse", "nvjitlink", "nvrtc", "nvtx"], marker = "sys_platform == 'linux'" }, + { name = "filelock" }, + { name = "fsspec" }, + { name = "jinja2" }, + { name = "networkx" }, + { name = "nvidia-cudnn-cu13", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusparselt-cu13", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nccl-cu13", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvshmem-cu13", marker = "sys_platform == 'linux'" }, + { name = "setuptools" }, + { name = "sympy" }, + { name = "triton", marker = "sys_platform == 'linux'" }, + { name = "typing-extensions" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/0d/98b410492609e34a155fa8b121b55c7dca229f39636851c3a9ec20edea21/torch-2.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7b6a60d48062809f58595509c524b88e6ddec3ebe25833d6462eeab81e5f2ce4", size = 80529712, upload-time = "2026-03-23T18:12:02.608Z" }, + { url = "https://files.pythonhosted.org/packages/84/03/acea680005f098f79fd70c1d9d5ccc0cb4296ec2af539a0450108232fc0c/torch-2.11.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:d91aac77f24082809d2c5a93f52a5f085032740a1ebc9252a7b052ef5a4fddc6", size = 419718178, upload-time = "2026-03-23T18:10:46.675Z" }, + { url = "https://files.pythonhosted.org/packages/8c/8b/d7be22fbec9ffee6cff31a39f8750d4b3a65d349a286cf4aec74c2375662/torch-2.11.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:7aa2f9bbc6d4595ba72138026b2074be1233186150e9292865e04b7a63b8c67a", size = 530604548, upload-time = "2026-03-23T18:10:03.569Z" }, + { url = "https://files.pythonhosted.org/packages/d1/bd/9912d30b68845256aabbb4a40aeefeef3c3b20db5211ccda653544ada4b6/torch-2.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:73e24aaf8f36ab90d95cd1761208b2eb70841c2a9ca1a3f9061b39fc5331b708", size = 114519675, upload-time = "2026-03-23T18:11:52.995Z" }, + { url = "https://files.pythonhosted.org/packages/6f/8b/69e3008d78e5cee2b30183340cc425081b78afc5eff3d080daab0adda9aa/torch-2.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4b5866312ee6e52ea625cd211dcb97d6a2cdc1131a5f15cc0d87eec948f6dd34", size = 80606338, upload-time = "2026-03-23T18:11:34.781Z" }, + { url = "https://files.pythonhosted.org/packages/13/16/42e5915ebe4868caa6bac83a8ed59db57f12e9a61b7d749d584776ed53d5/torch-2.11.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f99924682ef0aa6a4ab3b1b76f40dc6e273fca09f367d15a524266db100a723f", size = 419731115, upload-time = "2026-03-23T18:11:06.944Z" }, + { url = "https://files.pythonhosted.org/packages/1a/c9/82638ef24d7877510f83baf821f5619a61b45568ce21c0a87a91576510aa/torch-2.11.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:0f68f4ac6d95d12e896c3b7a912b5871619542ec54d3649cf48cc1edd4dd2756", size = 530712279, upload-time = "2026-03-23T18:10:31.481Z" }, + { url = "https://files.pythonhosted.org/packages/1c/ff/6756f1c7ee302f6d202120e0f4f05b432b839908f9071157302cedfc5232/torch-2.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:fbf39280699d1b869f55eac536deceaa1b60bd6788ba74f399cc67e60a5fab10", size = 114556047, upload-time = "2026-03-23T18:10:55.931Z" }, + { url = "https://files.pythonhosted.org/packages/87/89/5ea6722763acee56b045435fb84258db7375c48165ec8be7880ab2b281c5/torch-2.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1e6debd97ccd3205bbb37eb806a9d8219e1139d15419982c09e23ef7d4369d18", size = 80606801, upload-time = "2026-03-23T18:10:18.649Z" }, + { url = "https://files.pythonhosted.org/packages/32/d1/8ed2173589cbfe744ed54e5a73efc107c0085ba5777ee93a5f4c1ab90553/torch-2.11.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:63a68fa59de8f87acc7e85a5478bb2dddbb3392b7593ec3e78827c793c4b73fd", size = 419732382, upload-time = "2026-03-23T18:08:30.835Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e1/b73f7c575a4b8f87a5928f50a1e35416b5e27295d8be9397d5293e7e8d4c/torch-2.11.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:cc89b9b173d9adfab59fd227f0ab5e5516d9a52b658ae41d64e59d2e55a418db", size = 530711509, upload-time = "2026-03-23T18:08:47.213Z" }, + { url = "https://files.pythonhosted.org/packages/66/82/3e3fcdd388fbe54e29fd3f991f36846ff4ac90b0d0181e9c8f7236565f82/torch-2.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:4dda3b3f52d121063a731ddb835f010dc137b920d7fec2778e52f60d8e4bf0cd", size = 114555842, upload-time = "2026-03-23T18:09:52.111Z" }, + { url = "https://files.pythonhosted.org/packages/db/38/8ac78069621b8c2b4979c2f96dc8409ef5e9c4189f6aac629189a78677ca/torch-2.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8b394322f49af4362d4f80e424bcaca7efcd049619af03a4cf4501520bdf0fb4", size = 80959574, upload-time = "2026-03-23T18:10:14.214Z" }, + { url = "https://files.pythonhosted.org/packages/6d/6c/56bfb37073e7136e6dd86bfc6af7339946dd684e0ecf2155ac0eee687ae1/torch-2.11.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2658f34ce7e2dabf4ec73b45e2ca68aedad7a5be87ea756ad656eaf32bf1e1ea", size = 419732324, upload-time = "2026-03-23T18:09:36.604Z" }, + { url = "https://files.pythonhosted.org/packages/07/f4/1b666b6d61d3394cca306ea543ed03a64aad0a201b6cd159f1d41010aeb1/torch-2.11.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:98bb213c3084cfe176302949bdc360074b18a9da7ab59ef2edc9d9f742504778", size = 530596026, upload-time = "2026-03-23T18:09:20.842Z" }, + { url = "https://files.pythonhosted.org/packages/48/6b/30d1459fa7e4b67e9e3fe1685ca1d8bb4ce7c62ef436c3a615963c6c866c/torch-2.11.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a97b94bbf62992949b4730c6cd2cc9aee7b335921ee8dc207d930f2ed09ae2db", size = 114793702, upload-time = "2026-03-23T18:09:47.304Z" }, + { url = "https://files.pythonhosted.org/packages/26/0d/8603382f61abd0db35841148ddc1ffd607bf3100b11c6e1dab6d2fc44e72/torch-2.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:01018087326984a33b64e04c8cb5c2795f9120e0d775ada1f6638840227b04d7", size = 80573442, upload-time = "2026-03-23T18:09:10.117Z" }, + { url = "https://files.pythonhosted.org/packages/c7/86/7cd7c66cb9cec6be330fff36db5bd0eef386d80c031b581ec81be1d4b26c/torch-2.11.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:2bb3cc54bd0dea126b0060bb1ec9de0f9c7f7342d93d436646516b0330cd5be7", size = 419749385, upload-time = "2026-03-23T18:07:33.77Z" }, + { url = "https://files.pythonhosted.org/packages/47/e8/b98ca2d39b2e0e4730c0ee52537e488e7008025bc77ca89552ff91021f7c/torch-2.11.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4dc8b3809469b6c30b411bb8c4cad3828efd26236153d9beb6a3ec500f211a60", size = 530716756, upload-time = "2026-03-23T18:07:50.02Z" }, + { url = "https://files.pythonhosted.org/packages/78/88/d4a4cda8362f8a30d1ed428564878c3cafb0d87971fbd3947d4c84552095/torch-2.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:2b4e811728bd0cc58fb2b0948fe939a1ee2bf1422f6025be2fca4c7bd9d79718", size = 114552300, upload-time = "2026-03-23T18:09:05.617Z" }, + { url = "https://files.pythonhosted.org/packages/bf/46/4419098ed6d801750f26567b478fc185c3432e11e2cad712bc6b4c2ab0d0/torch-2.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8245477871c3700d4370352ffec94b103cfcb737229445cf9946cddb7b2ca7cd", size = 80959460, upload-time = "2026-03-23T18:09:00.818Z" }, + { url = "https://files.pythonhosted.org/packages/fd/66/54a56a4a6ceaffb567231994a9745821d3af922a854ed33b0b3a278e0a99/torch-2.11.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:ab9a8482f475f9ba20e12db84b0e55e2f58784bdca43a854a6ccd3fd4b9f75e6", size = 419735835, upload-time = "2026-03-23T18:07:18.974Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e7/0b6665f533aa9e337662dc190425abc0af1fe3234088f4454c52393ded61/torch-2.11.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:563ed3d25542d7e7bbc5b235ccfacfeb97fb470c7fee257eae599adb8005c8a2", size = 530613405, upload-time = "2026-03-23T18:08:07.014Z" }, + { url = "https://files.pythonhosted.org/packages/cf/bf/c8d12a2c86dbfd7f40fb2f56fbf5a505ccf2d9ce131eb559dfc7c51e1a04/torch-2.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b2a43985ff5ef6ddd923bbcf99943e5f58059805787c5c9a2622bf05ca2965b0", size = 114792991, upload-time = "2026-03-23T18:08:19.216Z" }, +] + +[[package]] +name = "tqdm" +version = "4.67.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, +] + +[[package]] +name = "transformers" +version = "5.5.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "regex" }, + { name = "safetensors" }, + { name = "tokenizers" }, + { name = "tqdm" }, + { name = "typer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a5/1e/1e244ab2ab50a863e6b52cc55761910567fa532b69a6740f6e99c5fdbd98/transformers-5.5.4.tar.gz", hash = "sha256:2e67cadba81fc7608cc07c4dd54f524820bc3d95b1cabd0ef3db7733c4f8b82e", size = 8227649, upload-time = "2026-04-13T16:55:55.181Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/29/fb/162a66789c65e5afa3b051309240c26bf37fbc8fea285b4546ae747995a2/transformers-5.5.4-py3-none-any.whl", hash = "sha256:0bd6281b82966fe5a7a16f553ea517a9db1dee6284d7cb224dfd88fc0dd1c167", size = 10236696, upload-time = "2026-04-13T16:55:51.497Z" }, +] + +[[package]] +name = "triton" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/2c/96f92f3c60387e14cc45aed49487f3486f89ea27106c1b1376913c62abe4/triton-3.6.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49df5ef37379c0c2b5c0012286f80174fcf0e073e5ade1ca9a86c36814553651", size = 176081190, upload-time = "2026-01-20T16:16:00.523Z" }, + { url = "https://files.pythonhosted.org/packages/e0/12/b05ba554d2c623bffa59922b94b0775673de251f468a9609bc9e45de95e9/triton-3.6.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8e323d608e3a9bfcc2d9efcc90ceefb764a82b99dea12a86d643c72539ad5d3", size = 188214640, upload-time = "2026-01-20T16:00:35.869Z" }, + { url = "https://files.pythonhosted.org/packages/17/5d/08201db32823bdf77a0e2b9039540080b2e5c23a20706ddba942924ebcd6/triton-3.6.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:374f52c11a711fd062b4bfbb201fd9ac0a5febd28a96fb41b4a0f51dde3157f4", size = 176128243, upload-time = "2026-01-20T16:16:07.857Z" }, + { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload-time = "2026-01-20T16:00:43.041Z" }, + { url = "https://files.pythonhosted.org/packages/3c/12/34d71b350e89a204c2c7777a9bba0dcf2f19a5bfdd70b57c4dbc5ffd7154/triton-3.6.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448e02fe6dc898e9e5aa89cf0ee5c371e99df5aa5e8ad976a80b93334f3494fd", size = 176133521, upload-time = "2026-01-20T16:16:13.321Z" }, + { url = "https://files.pythonhosted.org/packages/f9/0b/37d991d8c130ce81a8728ae3c25b6e60935838e9be1b58791f5997b24a54/triton-3.6.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10c7f76c6e72d2ef08df639e3d0d30729112f47a56b0c81672edc05ee5116ac9", size = 188289450, upload-time = "2026-01-20T16:00:49.136Z" }, + { url = "https://files.pythonhosted.org/packages/ce/4e/41b0c8033b503fd3cfcd12392cdd256945026a91ff02452bef40ec34bee7/triton-3.6.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1722e172d34e32abc3eb7711d0025bb69d7959ebea84e3b7f7a341cd7ed694d6", size = 176276087, upload-time = "2026-01-20T16:16:18.989Z" }, + { url = "https://files.pythonhosted.org/packages/35/f8/9c66bfc55361ec6d0e4040a0337fb5924ceb23de4648b8a81ae9d33b2b38/triton-3.6.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d002e07d7180fd65e622134fbd980c9a3d4211fb85224b56a0a0efbd422ab72f", size = 188400296, upload-time = "2026-01-20T16:00:56.042Z" }, + { url = "https://files.pythonhosted.org/packages/49/55/5ecf0dcaa0f2fbbd4420f7ef227ee3cb172e91e5fede9d0ecaddc43363b4/triton-3.6.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5523241e7d1abca00f1d240949eebdd7c673b005edbbce0aca95b8191f1d43", size = 176138577, upload-time = "2026-01-20T16:16:25.426Z" }, + { url = "https://files.pythonhosted.org/packages/df/3d/9e7eee57b37c80cec63322c0231bb6da3cfe535a91d7a4d64896fcb89357/triton-3.6.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a17a5d5985f0ac494ed8a8e54568f092f7057ef60e1b0fa09d3fd1512064e803", size = 188273063, upload-time = "2026-01-20T16:01:07.278Z" }, + { url = "https://files.pythonhosted.org/packages/48/db/56ee649cab5eaff4757541325aca81f52d02d4a7cd3506776cad2451e060/triton-3.6.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b3a97e8ed304dfa9bd23bb41ca04cdf6b2e617d5e782a8653d616037a5d537d", size = 176274804, upload-time = "2026-01-20T16:16:31.528Z" }, + { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994, upload-time = "2026-01-20T16:01:14.236Z" }, +] + +[[package]] +name = "typer" +version = "0.24.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] diff --git a/vitest.config.ts b/vitest.config.ts index dccf756..16e7a3b 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -65,6 +65,12 @@ export default defineConfig({ functions: 80, lines: 80, }, + "src/embeddings/text.ts": { + statements: 80, + branches: 80, + functions: 80, + lines: 80, + }, // fix/index-md-include-sessions — 5-fix PR stacked on PR #61. // output-cap.ts is new in this PR (fix #5); virtual-table-query.ts was // heavily modified by fix #1 (index.md builder / fallback) and fix #4