anomalyco · tobias-weiss-ai-xr · Apr 11, 2026 · Apr 11, 2026 · Apr 11, 2026 · Apr 11, 2026
diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
@@ -151,6 +151,88 @@ export namespace ProviderTransform {
       return result
     }
 
+    // GLM / ZhipuAI: filter empty content, scrub tool IDs, fix consecutive roles, handle reasoning
+    if (model.api.id.toLowerCase().includes("glm") || ["zai", "zhipuai"].includes(model.providerID.toLowerCase())) {
+      const scrub = (id: string) => id.replace(/[^a-zA-Z0-9_-]/g, "").substring(0, 64)
+      const isInterleaved = typeof model.capabilities.interleaved === "object" && model.capabilities.interleaved.field
+      const field = isInterleaved ? (model.capabilities.interleaved as { field: string }).field : null
+
+      const result: ModelMessage[] = []
+      for (let i = 0; i < msgs.length; i++) {
+        const msg = msgs[i]
+        const nextMsg = msgs[i + 1]
+
+        // Skip empty string content
+        if (typeof msg.content === "string") {
+          if (msg.content === "") continue
+          result.push(msg)
+          continue
+        }
+
+        if (!Array.isArray(msg.content)) {
+          result.push(msg)
+          continue
+        }
+
+        // Filter empty text/reasoning parts
+        let filtered = msg.content.filter((part) => {
+          if ((part.type === "text" || part.type === "reasoning") && part.text === "") return false
+          return true
+        })
+
+        // Remove reasoning parts for non-interleaved models (GLM API doesn't support them)
+        if (!isInterleaved) filtered = filtered.filter((part) => part.type !== "reasoning")
+
+        if (filtered.length === 0) continue
+
+        // Scrub tool call/result IDs
+        if (msg.role === "assistant" || msg.role === "tool") {
+          filtered = filtered.map((part) => {
+            if (part.type === "tool-call" || part.type === "tool-result") {
+              return { ...part, toolCallId: scrub(part.toolCallId) }
+            }
+            return part
+          })
+        }
+
+        // Extract reasoning to providerOptions for interleaved models
+        if (isInterleaved && msg.role === "assistant") {
+          const reasoning = filtered.filter((p: any) => p.type === "reasoning")
+          const text = reasoning.map((p: any) => p.text).join("")
+          const rest = filtered.filter((p: any) => p.type !== "reasoning")
+
+          if (text) {
+            result.push({
+              ...msg,
+              content: rest,
+              providerOptions: {
+                ...msg.providerOptions,
+                openaiCompatible: {
+                  ...(msg.providerOptions as any)?.openaiCompatible,
+                  ...(field ? { [field]: text } : {}),
+                },
+              },
+            } as ModelMessage)
+          } else {
+            result.push({ ...msg, content: rest } as ModelMessage)
+          }
+        } else {
+          result.push({ ...msg, content: filtered } as ModelMessage)
+        }
+
+        // Fix consecutive assistant messages (GLM requires strict user/assistant alternation)
+        const last = result[result.length - 1]
+        if (last?.role === "assistant" && nextMsg?.role === "assistant") {
+          result.push({
+            role: "user",
+            content: [{ type: "text", text: "Done." }],
+          })
+        }
+      }
+
+      return result
+    }
+
     if (typeof model.capabilities.interleaved === "object" && model.capabilities.interleaved.field) {
       const field = model.capabilities.interleaved.field
       return msgs.map((msg) => {

diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
@@ -4,6 +4,7 @@ import { Session } from "."
 import { SessionID, MessageID, PartID } from "./schema"
 import { Instance } from "../project/instance"
 import { Provider } from "../provider/provider"
+import { ProviderTransform } from "../provider/transform"
 import { MessageV2 } from "./message-v2"
 import z from "zod"
 import { Token } from "../util/token"
@@ -33,15 +34,29 @@ export namespace SessionCompaction {
   }
 
   export const PRUNE_MINIMUM = 20_000
-  export const PRUNE_PROTECT = 40_000
-  const PRUNE_PROTECTED_TOOLS = ["skill"]
+  export const PRUNE_PROTECT_MIN = 20_000
+  export const PRUNE_PROTECT_RATIO = 0.15
+  export const PRUNE_PROACTIVE_RATIO = 0.5
+  const PRUNE_PROTECTED_TOOLS = [
+    "skill",
+    "compress",
+    "todowrite",
+    "background_output",
+    "lsp_diagnostics",
+    "lsp_symbols",
+  ]
 
   export interface Interface {
     readonly isOverflow: (input: {
       tokens: MessageV2.Assistant["tokens"]
       model: Provider.Model
     }) => Effect.Effect<boolean>
     readonly prune: (input: { sessionID: SessionID }) => Effect.Effect<void>
+    readonly pruneIfNeeded: (input: {
+      sessionID: SessionID
+      tokens: MessageV2.Assistant["tokens"]
+      model: Provider.Model
+    }) => Effect.Effect<void>
     readonly process: (input: {
       parentID: MessageID
       messages: MessageV2.WithParts[]
@@ -100,6 +115,16 @@ export namespace SessionCompaction {
           .pipe(Effect.catchIf(NotFoundError.isInstance, () => Effect.succeed(undefined)))
         if (!msgs) return
 
+        let contextLimit = 128_000
+        const lastAssistant = msgs.findLast((m) => m.info.role === "assistant")
+        if (lastAssistant && lastAssistant.info.role === "assistant") {
+          try {
+            const model = yield* provider.getModel(lastAssistant.info.providerID, lastAssistant.info.modelID)
+            contextLimit = model.limit.context || 128_000
+          } catch {}
+        }
+        const protect = Math.max(PRUNE_PROTECT_MIN, Math.round(contextLimit * PRUNE_PROTECT_RATIO))
+
         let total = 0
         let pruned = 0
         const toPrune: MessageV2.ToolPart[] = []
@@ -115,10 +140,10 @@ export namespace SessionCompaction {
             if (part.type === "tool")
               if (part.state.status === "completed") {
                 if (PRUNE_PROTECTED_TOOLS.includes(part.tool)) continue
-                if (part.state.time.compacted) break loop
+                if (part.state.time.compacted) continue
                 const estimate = Token.estimate(part.state.output)
                 total += estimate
-                if (total > PRUNE_PROTECT) {
+                if (total > protect) {
                   pruned += estimate
                   toPrune.push(part)
                 }
@@ -368,9 +393,30 @@ When constructing the summary, try to stick to this template:
         })
       })
 
+      const pruneIfNeeded = Effect.fn("SessionCompaction.pruneIfNeeded")(function* (input: {
+        sessionID: SessionID
+        tokens: MessageV2.Assistant["tokens"]
+        model: Provider.Model
+      }) {
+        const cfg = yield* config.get()
+        if (cfg.compaction?.prune === false) return
+        const context = input.model.limit.context
+        if (context === 0) return
+        const maxOutput = ProviderTransform.maxOutputTokens(input.model)
+        const reserved = cfg.compaction?.reserved ?? Math.min(20_000, maxOutput)
+        const usable = input.model.limit.input ? input.model.limit.input - reserved : context - maxOutput
+        const count =
+          input.tokens.total ||
+          input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
+        if (count < usable * PRUNE_PROACTIVE_RATIO) return
+        log.info("proactive prune triggered", { count, usable, ratio: count / usable })
+        yield* prune({ sessionID: input.sessionID })
+      })
+
       return Service.of({
         isOverflow,
         prune,
+        pruneIfNeeded,
         process: processCompaction,
         create,
       })

diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts
@@ -11,6 +11,9 @@ import { Session } from "."
 import { LLM } from "./llm"
 import { MessageV2 } from "./message-v2"
 import { isOverflow } from "./overflow"
+import { SessionCompaction } from "./compaction"
+import { Token } from "@/util/token"
+import { ProviderTransform } from "@/provider/transform"
 import { PartID } from "./schema"
 import type { SessionID } from "./schema"
 import { SessionRetry } from "./retry"
@@ -72,6 +75,7 @@ export namespace SessionProcessor {
     needsCompaction: boolean
     currentText: MessageV2.TextPart | undefined
     reasoningMap: Record<string, MessageV2.ReasoningPart>
+    sentChars: number
   }
 
   type StreamEvent = Event
@@ -119,6 +123,7 @@ export namespace SessionProcessor {
           needsCompaction: false,
           currentText: undefined,
           reasoningMap: {},
+          sentChars: 0,
         }
         let aborted = false
         const slog = log.with({ sessionID: input.sessionID, messageID: input.assistantMessage.id })
@@ -360,6 +365,7 @@ export namespace SessionProcessor {
               ctx.assistantMessage.finish = value.finishReason
               ctx.assistantMessage.cost += usage.cost
               ctx.assistantMessage.tokens = usage.tokens
+              Token.updateRatio({ chars: ctx.sentChars, tokens: usage.tokens.input })
               yield* session.updatePart({
                 id: PartID.ascending(),
                 reason: value.finishReason,
@@ -395,6 +401,20 @@ export namespace SessionProcessor {
               ) {
                 ctx.needsCompaction = true
               }
+              if (!ctx.assistantMessage.summary) {
+                const context = ctx.model.limit.context
+                if (context > 0) {
+                  const maxOutput = ProviderTransform.maxOutputTokens(ctx.model)
+                  const reserved = (yield* config.get()).compaction?.reserved ?? Math.min(20_000, maxOutput)
+                  const usable = ctx.model.limit.input ? ctx.model.limit.input - reserved : context - maxOutput
+                  const count =
+                    usage.tokens.total ||
+                    usage.tokens.input + usage.tokens.output + usage.tokens.cache.read + usage.tokens.cache.write
+                  if (count >= usable * 0.5) {
+                    SessionCompaction.prune({ sessionID: ctx.sessionID })
+                  }
+                }
+              }
               return
             }
 
@@ -539,6 +559,7 @@ export namespace SessionProcessor {
             yield* Effect.gen(function* () {
               ctx.currentText = undefined
               ctx.reasoningMap = {}
+              ctx.sentChars = JSON.stringify(streamInput.messages).length
               const stream = llm.stream(streamInput)
 
               yield* stream.pipe(

diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
@@ -1367,6 +1367,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
             }
 
             if (task?.type === "compaction") {
+              yield* compaction.prune({ sessionID })
               const result = yield* compaction.process({
                 messages: msgs,
                 parentID: lastUser.id,

diff --git a/packages/opencode/src/util/token.ts b/packages/opencode/src/util/token.ts
@@ -1,7 +1,16 @@
 export namespace Token {
-  const CHARS_PER_TOKEN = 4
+  let charsPerToken = 4
 
   export function estimate(input: string) {
-    return Math.max(0, Math.round((input || "").length / CHARS_PER_TOKEN))
+    return Math.max(0, Math.round((input || "").length / charsPerToken))
+  }
+
+  export function updateRatio({ chars, tokens }: { chars: number; tokens: number }) {
+    if (tokens <= 0 || chars <= 0) return
+    charsPerToken = charsPerToken * 0.7 + (chars / tokens) * 0.3
+  }
+
+  export function resetRatio() {
+    charsPerToken = 4
   }
 }