From 48f5a07fc597ab4600f8882f2c91efb93aea5156 Mon Sep 17 00:00:00 2001
From: "tobias@tobias-weiss.org" <tobias@tobias-weiss.org>
Date: Sat, 11 Apr 2026 13:44:49 +0200
Subject: [PATCH 1/5] feat(compaction): run prune before compaction to reduce
 context size

---
 packages/opencode/src/session/prompt.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index 97a37865dfa2..363ce2ee4309 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -1367,6 +1367,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
             }
 
             if (task?.type === "compaction") {
+              yield* compaction.prune({ sessionID })
               const result = yield* compaction.process({
                 messages: msgs,
                 parentID: lastUser.id,

From f9f66ed7b655b401513e5661d7e0b6cc58670b18 Mon Sep 17 00:00:00 2001
From: "tobias@tobias-weiss.org" <tobias@tobias-weiss.org>
Date: Sat, 11 Apr 2026 13:44:58 +0200
Subject: [PATCH 2/5] feat(token): model-aware token estimation via EMA from
 actual usage

---
 packages/opencode/src/session/processor.ts |  5 +++++
 packages/opencode/src/util/token.ts        | 13 +++++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts
index be0977c1ddd2..cf4d49ecc845 100644
--- a/packages/opencode/src/session/processor.ts
+++ b/packages/opencode/src/session/processor.ts
@@ -11,6 +11,7 @@ import { Session } from "."
 import { LLM } from "./llm"
 import { MessageV2 } from "./message-v2"
 import { isOverflow } from "./overflow"
+import { Token } from "@/util/token"
 import { PartID } from "./schema"
 import type { SessionID } from "./schema"
 import { SessionRetry } from "./retry"
@@ -72,6 +73,7 @@ export namespace SessionProcessor {
     needsCompaction: boolean
     currentText: MessageV2.TextPart | undefined
     reasoningMap: Record<string, MessageV2.ReasoningPart>
+    sentChars: number
   }
 
   type StreamEvent = Event
@@ -119,6 +121,7 @@ export namespace SessionProcessor {
           needsCompaction: false,
           currentText: undefined,
           reasoningMap: {},
+          sentChars: 0,
         }
         let aborted = false
         const slog = log.with({ sessionID: input.sessionID, messageID: input.assistantMessage.id })
@@ -360,6 +363,7 @@ export namespace SessionProcessor {
               ctx.assistantMessage.finish = value.finishReason
               ctx.assistantMessage.cost += usage.cost
               ctx.assistantMessage.tokens = usage.tokens
+              Token.updateRatio({ chars: ctx.sentChars, tokens: usage.tokens.input })
               yield* session.updatePart({
                 id: PartID.ascending(),
                 reason: value.finishReason,
@@ -539,6 +543,7 @@ export namespace SessionProcessor {
             yield* Effect.gen(function* () {
               ctx.currentText = undefined
               ctx.reasoningMap = {}
+              ctx.sentChars = JSON.stringify(streamInput.messages).length
               const stream = llm.stream(streamInput)
 
               yield* stream.pipe(
diff --git a/packages/opencode/src/util/token.ts b/packages/opencode/src/util/token.ts
index cee5adc37713..6a120994ecae 100644
--- a/packages/opencode/src/util/token.ts
+++ b/packages/opencode/src/util/token.ts
@@ -1,7 +1,16 @@
 export namespace Token {
-  const CHARS_PER_TOKEN = 4
+  let charsPerToken = 4
 
   export function estimate(input: string) {
-    return Math.max(0, Math.round((input || "").length / CHARS_PER_TOKEN))
+    return Math.max(0, Math.round((input || "").length / charsPerToken))
+  }
+
+  export function updateRatio({ chars, tokens }: { chars: number; tokens: number }) {
+    if (tokens <= 0 || chars <= 0) return
+    charsPerToken = charsPerToken * 0.7 + (chars / tokens) * 0.3
+  }
+
+  export function resetRatio() {
+    charsPerToken = 4
   }
 }

From 9dbb2373c0a09eb6faa07bb1cd4f9b777e086bfa Mon Sep 17 00:00:00 2001
From: "tobias@tobias-weiss.org" <tobias@tobias-weiss.org>
Date: Sat, 11 Apr 2026 14:01:15 +0200
Subject: [PATCH 3/5] feat(compaction): smarter pruning with context-relative
 budget and better tool protection

---
 packages/opencode/src/session/compaction.ts | 54 +++++++++++++++++++--
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
index b280971c76de..a7d0daca89d4 100644
--- a/packages/opencode/src/session/compaction.ts
+++ b/packages/opencode/src/session/compaction.ts
@@ -4,6 +4,7 @@ import { Session } from "."
 import { SessionID, MessageID, PartID } from "./schema"
 import { Instance } from "../project/instance"
 import { Provider } from "../provider/provider"
+import { ProviderTransform } from "../provider/transform"
 import { MessageV2 } from "./message-v2"
 import z from "zod"
 import { Token } from "../util/token"
@@ -33,8 +34,17 @@ export namespace SessionCompaction {
   }
 
   export const PRUNE_MINIMUM = 20_000
-  export const PRUNE_PROTECT = 40_000
-  const PRUNE_PROTECTED_TOOLS = ["skill"]
+  export const PRUNE_PROTECT_MIN = 20_000
+  export const PRUNE_PROTECT_RATIO = 0.15
+  export const PRUNE_PROACTIVE_RATIO = 0.5
+  const PRUNE_PROTECTED_TOOLS = [
+    "skill",
+    "compress",
+    "todowrite",
+    "background_output",
+    "lsp_diagnostics",
+    "lsp_symbols",
+  ]
 
   export interface Interface {
     readonly isOverflow: (input: {
@@ -42,6 +52,11 @@ export namespace SessionCompaction {
       model: Provider.Model
     }) => Effect.Effect<boolean>
     readonly prune: (input: { sessionID: SessionID }) => Effect.Effect<void>
+    readonly pruneIfNeeded: (input: {
+      sessionID: SessionID
+      tokens: MessageV2.Assistant["tokens"]
+      model: Provider.Model
+    }) => Effect.Effect<void>
     readonly process: (input: {
       parentID: MessageID
       messages: MessageV2.WithParts[]
@@ -100,6 +115,16 @@ export namespace SessionCompaction {
           .pipe(Effect.catchIf(NotFoundError.isInstance, () => Effect.succeed(undefined)))
         if (!msgs) return
 
+        let contextLimit = 128_000
+        const lastAssistant = msgs.findLast((m) => m.info.role === "assistant")
+        if (lastAssistant && lastAssistant.info.role === "assistant") {
+          try {
+            const model = yield* provider.getModel(lastAssistant.info.providerID, lastAssistant.info.modelID)
+            contextLimit = model.limit.context || 128_000
+          } catch {}
+        }
+        const protect = Math.max(PRUNE_PROTECT_MIN, Math.round(contextLimit * PRUNE_PROTECT_RATIO))
+
         let total = 0
         let pruned = 0
         const toPrune: MessageV2.ToolPart[] = []
@@ -115,10 +140,10 @@ export namespace SessionCompaction {
             if (part.type === "tool")
               if (part.state.status === "completed") {
                 if (PRUNE_PROTECTED_TOOLS.includes(part.tool)) continue
-                if (part.state.time.compacted) break loop
+                if (part.state.time.compacted) continue
                 const estimate = Token.estimate(part.state.output)
                 total += estimate
-                if (total > PRUNE_PROTECT) {
+                if (total > protect) {
                   pruned += estimate
                   toPrune.push(part)
                 }
@@ -368,9 +393,30 @@ When constructing the summary, try to stick to this template:
         })
       })
 
+      const pruneIfNeeded = Effect.fn("SessionCompaction.pruneIfNeeded")(function* (input: {
+        sessionID: SessionID
+        tokens: MessageV2.Assistant["tokens"]
+        model: Provider.Model
+      }) {
+        const cfg = yield* config.get()
+        if (cfg.compaction?.prune === false) return
+        const context = input.model.limit.context
+        if (context === 0) return
+        const maxOutput = ProviderTransform.maxOutputTokens(input.model)
+        const reserved = cfg.compaction?.reserved ?? Math.min(20_000, maxOutput)
+        const usable = input.model.limit.input ? input.model.limit.input - reserved : context - maxOutput
+        const count =
+          input.tokens.total ||
+          input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
+        if (count < usable * PRUNE_PROACTIVE_RATIO) return
+        log.info("proactive prune triggered", { count, usable, ratio: count / usable })
+        yield* prune({ sessionID: input.sessionID })
+      })
+
       return Service.of({
         isOverflow,
         prune,
+        pruneIfNeeded,
         process: processCompaction,
         create,
       })

From 88721454536d60d39b393cd19b9774a984035a4b Mon Sep 17 00:00:00 2001
From: "tobias@tobias-weiss.org" <tobias@tobias-weiss.org>
Date: Sat, 11 Apr 2026 14:01:26 +0200
Subject: [PATCH 4/5] feat(compaction): proactive pruning when context exceeds
 50% threshold

---
 packages/opencode/src/session/processor.ts | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts
index cf4d49ecc845..cac0bf1175fa 100644
--- a/packages/opencode/src/session/processor.ts
+++ b/packages/opencode/src/session/processor.ts
@@ -11,7 +11,9 @@ import { Session } from "."
 import { LLM } from "./llm"
 import { MessageV2 } from "./message-v2"
 import { isOverflow } from "./overflow"
+import { SessionCompaction } from "./compaction"
 import { Token } from "@/util/token"
+import { ProviderTransform } from "@/provider/transform"
 import { PartID } from "./schema"
 import type { SessionID } from "./schema"
 import { SessionRetry } from "./retry"
@@ -399,6 +401,20 @@ export namespace SessionProcessor {
               ) {
                 ctx.needsCompaction = true
               }
+              if (!ctx.assistantMessage.summary) {
+                const context = ctx.model.limit.context
+                if (context > 0) {
+                  const maxOutput = ProviderTransform.maxOutputTokens(ctx.model)
+                  const reserved = (yield* config.get()).compaction?.reserved ?? Math.min(20_000, maxOutput)
+                  const usable = ctx.model.limit.input ? ctx.model.limit.input - reserved : context - maxOutput
+                  const count =
+                    usage.tokens.total ||
+                    usage.tokens.input + usage.tokens.output + usage.tokens.cache.read + usage.tokens.cache.write
+                  if (count >= usable * 0.5) {
+                    SessionCompaction.prune({ sessionID: ctx.sessionID })
+                  }
+                }
+              }
               return
             }
 

From d7605c8227358c2dced07ec29e35d53c4b892bc2 Mon Sep 17 00:00:00 2001
From: "tobias@tobias-weiss.org" <tobias@tobias-weiss.org>
Date: Sun, 12 Apr 2026 00:17:23 +0200
Subject: [PATCH 5/5] core: fix GLM/ZhipuAI message format rejected by API

GLM models (glm-5-turbo, glm-4.7, etc.) via litellm proxy returned
'The messages parameter is illegal' because messages contained empty
content, consecutive assistant roles, unscrubbed tool IDs, and raw
reasoning parts the GLM API doesn't support.
---
 packages/opencode/src/provider/transform.ts   |  82 ++++++++++
 .../opencode/test/provider/transform.test.ts  | 153 ++++++++++++++++++
 2 files changed, 235 insertions(+)

diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
index dea8cf936af4..973e9b2dd253 100644
--- a/packages/opencode/src/provider/transform.ts
+++ b/packages/opencode/src/provider/transform.ts
@@ -151,6 +151,88 @@ export namespace ProviderTransform {
       return result
     }
 
+    // GLM / ZhipuAI: filter empty content, scrub tool IDs, fix consecutive roles, handle reasoning
+    if (model.api.id.toLowerCase().includes("glm") || ["zai", "zhipuai"].includes(model.providerID.toLowerCase())) {
+      const scrub = (id: string) => id.replace(/[^a-zA-Z0-9_-]/g, "").substring(0, 64)
+      const isInterleaved = typeof model.capabilities.interleaved === "object" && model.capabilities.interleaved.field
+      const field = isInterleaved ? (model.capabilities.interleaved as { field: string }).field : null
+
+      const result: ModelMessage[] = []
+      for (let i = 0; i < msgs.length; i++) {
+        const msg = msgs[i]
+        const nextMsg = msgs[i + 1]
+
+        // Skip empty string content
+        if (typeof msg.content === "string") {
+          if (msg.content === "") continue
+          result.push(msg)
+          continue
+        }
+
+        if (!Array.isArray(msg.content)) {
+          result.push(msg)
+          continue
+        }
+
+        // Filter empty text/reasoning parts
+        let filtered = msg.content.filter((part) => {
+          if ((part.type === "text" || part.type === "reasoning") && part.text === "") return false
+          return true
+        })
+
+        // Remove reasoning parts for non-interleaved models (GLM API doesn't support them)
+        if (!isInterleaved) filtered = filtered.filter((part) => part.type !== "reasoning")
+
+        if (filtered.length === 0) continue
+
+        // Scrub tool call/result IDs
+        if (msg.role === "assistant" || msg.role === "tool") {
+          filtered = filtered.map((part) => {
+            if (part.type === "tool-call" || part.type === "tool-result") {
+              return { ...part, toolCallId: scrub(part.toolCallId) }
+            }
+            return part
+          })
+        }
+
+        // Extract reasoning to providerOptions for interleaved models
+        if (isInterleaved && msg.role === "assistant") {
+          const reasoning = filtered.filter((p: any) => p.type === "reasoning")
+          const text = reasoning.map((p: any) => p.text).join("")
+          const rest = filtered.filter((p: any) => p.type !== "reasoning")
+
+          if (text) {
+            result.push({
+              ...msg,
+              content: rest,
+              providerOptions: {
+                ...msg.providerOptions,
+                openaiCompatible: {
+                  ...(msg.providerOptions as any)?.openaiCompatible,
+                  ...(field ? { [field]: text } : {}),
+                },
+              },
+            } as ModelMessage)
+          } else {
+            result.push({ ...msg, content: rest } as ModelMessage)
+          }
+        } else {
+          result.push({ ...msg, content: filtered } as ModelMessage)
+        }
+
+        // Fix consecutive assistant messages (GLM requires strict user/assistant alternation)
+        const last = result[result.length - 1]
+        if (last?.role === "assistant" && nextMsg?.role === "assistant") {
+          result.push({
+            role: "user",
+            content: [{ type: "text", text: "Done." }],
+          })
+        }
+      }
+
+      return result
+    }
+
     if (typeof model.capabilities.interleaved === "object" && model.capabilities.interleaved.field) {
       const field = model.capabilities.interleaved.field
       return msgs.map((msg) => {
diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts
index 3a001e2756af..fc2c1fbf563f 100644
--- a/packages/opencode/test/provider/transform.test.ts
+++ b/packages/opencode/test/provider/transform.test.ts
@@ -2889,3 +2889,156 @@ describe("ProviderTransform.variants", () => {
     })
   })
 })
+
+describe("ProviderTransform.message - GLM/ZhipuAI normalization", () => {
+  const glmModel = {
+    id: "zai/glm-5-turbo",
+    providerID: "zai",
+    api: {
+      id: "glm-5-turbo",
+      url: "https://open.bigmodel.cn/api/paas/v4",
+      npm: "@ai-sdk/openai-compatible",
+    },
+    name: "GLM-5 Turbo",
+    capabilities: {
+      temperature: true,
+      reasoning: false,
+      attachment: true,
+      toolcall: true,
+      input: { text: true, audio: false, image: true, video: false, pdf: false },
+      output: { text: true, audio: false, image: false, video: false, pdf: false },
+      interleaved: false,
+    },
+    cost: {
+      input: 0.001,
+      output: 0.002,
+      cache: { read: 0.0001, write: 0.0002 },
+    },
+    limit: {
+      context: 128000,
+      output: 8192,
+    },
+    status: "active",
+    options: {},
+    headers: {},
+  } as any
+
+  test("filters out messages with empty string content", () => {
+    const msgs = [
+      { role: "user", content: "Hello" },
+      { role: "assistant", content: "" },
+      { role: "user", content: "World" },
+    ] as any[]
+
+    const result = ProviderTransform.message(msgs, glmModel, {})
+
+    expect(result).toHaveLength(2)
+    expect(result[0].content).toBe("Hello")
+    expect(result[1].content).toBe("World")
+  })
+
+  test("filters out empty text parts from array content", () => {
+    const msgs = [
+      {
+        role: "assistant",
+        content: [
+          { type: "text", text: "" },
+          { type: "text", text: "Hello" },
+          { type: "text", text: "" },
+        ],
+      },
+    ] as any[]
+
+    const result = ProviderTransform.message(msgs, glmModel, {})
+
+    expect(result).toHaveLength(1)
+    expect(result[0].content).toHaveLength(1)
+    expect(result[0].content[0]).toEqual({ type: "text", text: "Hello" })
+  })
+
+  test("removes reasoning parts for non-interleaved models", () => {
+    const msgs = [
+      {
+        role: "assistant",
+        content: [
+          { type: "reasoning", text: "Thinking..." },
+          { type: "text", text: "Answer" },
+        ],
+      },
+    ] as any[]
+
+    const result = ProviderTransform.message(msgs, glmModel, {})
+
+    expect(result).toHaveLength(1)
+    expect(result[0].content).toHaveLength(1)
+    expect(result[0].content[0]).toEqual({ type: "text", text: "Answer" })
+  })
+
+  test("scrubs tool call IDs to alphanumeric", () => {
+    const msgs = [
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "tool-call",
+            toolCallId: "call_abc-123!@#",
+            toolName: "bash",
+            input: { command: "ls" },
+          },
+        ],
+      },
+    ] as any[]
+
+    const result = ProviderTransform.message(msgs, glmModel, {})
+
+    expect((result[0].content as any[])[0].toolCallId).toBe("call_abc-123")
+  })
+
+  test("fixes consecutive assistant messages by inserting bridge user message", () => {
+    const msgs = [
+      { role: "assistant", content: [{ type: "text", text: "First reply" }] },
+      { role: "assistant", content: [{ type: "text", text: "Second reply" }] },
+      { role: "user", content: "Thanks" },
+    ] as any[]
+
+    const result = ProviderTransform.message(msgs, glmModel, {})
+
+    expect(result).toHaveLength(4)
+    expect(result[0].role).toBe("assistant")
+    expect(result[0].content).toEqual([{ type: "text", text: "First reply" }])
+    expect(result[1].role).toBe("user")
+    expect(result[1].content).toEqual([{ type: "text", text: "Done." }])
+    expect(result[2].role).toBe("assistant")
+    expect(result[2].content).toEqual([{ type: "text", text: "Second reply" }])
+    expect(result[3].role).toBe("user")
+  })
+
+  test("does not normalize non-GLM providers", () => {
+    const openaiModel = {
+      ...glmModel,
+      providerID: "openai",
+      api: {
+        id: "gpt-4",
+        url: "https://api.openai.com",
+        npm: "@ai-sdk/openai",
+      },
+    }
+
+    const msgs = [
+      { role: "assistant", content: "" },
+      {
+        role: "assistant",
+        content: [
+          { type: "text", text: "" },
+          { type: "tool-call", toolCallId: "call_abc!@#", toolName: "bash", input: {} },
+        ],
+      },
+    ] as any[]
+
+    const result = ProviderTransform.message(msgs, openaiModel, {})
+
+    expect(result).toHaveLength(2)
+    expect(result[0].content).toBe("")
+    expect((result[1].content as any[])[1].toolCallId).toBe("call_abc!@#")
+  })
+})