From 48f5a07fc597ab4600f8882f2c91efb93aea5156 Mon Sep 17 00:00:00 2001 From: "tobias@tobias-weiss.org" Date: Sat, 11 Apr 2026 13:44:49 +0200 Subject: [PATCH 1/5] feat(compaction): run prune before compaction to reduce context size --- packages/opencode/src/session/prompt.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index 97a37865dfa2..363ce2ee4309 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -1367,6 +1367,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the } if (task?.type === "compaction") { + yield* compaction.prune({ sessionID }) const result = yield* compaction.process({ messages: msgs, parentID: lastUser.id, From f9f66ed7b655b401513e5661d7e0b6cc58670b18 Mon Sep 17 00:00:00 2001 From: "tobias@tobias-weiss.org" Date: Sat, 11 Apr 2026 13:44:58 +0200 Subject: [PATCH 2/5] feat(token): model-aware token estimation via EMA from actual usage --- packages/opencode/src/session/processor.ts | 5 +++++ packages/opencode/src/util/token.ts | 13 +++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index be0977c1ddd2..cf4d49ecc845 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -11,6 +11,7 @@ import { Session } from "." import { LLM } from "./llm" import { MessageV2 } from "./message-v2" import { isOverflow } from "./overflow" +import { Token } from "@/util/token" import { PartID } from "./schema" import type { SessionID } from "./schema" import { SessionRetry } from "./retry" @@ -72,6 +73,7 @@ export namespace SessionProcessor { needsCompaction: boolean currentText: MessageV2.TextPart | undefined reasoningMap: Record + sentChars: number } type StreamEvent = Event @@ -119,6 +121,7 @@ export namespace SessionProcessor { needsCompaction: false, currentText: undefined, reasoningMap: {}, + sentChars: 0, } let aborted = false const slog = log.with({ sessionID: input.sessionID, messageID: input.assistantMessage.id }) @@ -360,6 +363,7 @@ export namespace SessionProcessor { ctx.assistantMessage.finish = value.finishReason ctx.assistantMessage.cost += usage.cost ctx.assistantMessage.tokens = usage.tokens + Token.updateRatio({ chars: ctx.sentChars, tokens: usage.tokens.input }) yield* session.updatePart({ id: PartID.ascending(), reason: value.finishReason, @@ -539,6 +543,7 @@ export namespace SessionProcessor { yield* Effect.gen(function* () { ctx.currentText = undefined ctx.reasoningMap = {} + ctx.sentChars = JSON.stringify(streamInput.messages).length const stream = llm.stream(streamInput) yield* stream.pipe( diff --git a/packages/opencode/src/util/token.ts b/packages/opencode/src/util/token.ts index cee5adc37713..6a120994ecae 100644 --- a/packages/opencode/src/util/token.ts +++ b/packages/opencode/src/util/token.ts @@ -1,7 +1,16 @@ export namespace Token { - const CHARS_PER_TOKEN = 4 + let charsPerToken = 4 export function estimate(input: string) { - return Math.max(0, Math.round((input || "").length / CHARS_PER_TOKEN)) + return Math.max(0, Math.round((input || "").length / charsPerToken)) + } + + export function updateRatio({ chars, tokens }: { chars: number; tokens: number }) { + if (tokens <= 0 || chars <= 0) return + charsPerToken = charsPerToken * 0.7 + (chars / tokens) * 0.3 + } + + export function resetRatio() { + charsPerToken = 4 } } From 9dbb2373c0a09eb6faa07bb1cd4f9b777e086bfa Mon Sep 17 00:00:00 2001 From: "tobias@tobias-weiss.org" Date: Sat, 11 Apr 2026 14:01:15 +0200 Subject: [PATCH 3/5] feat(compaction): smarter pruning with context-relative budget and better tool protection --- packages/opencode/src/session/compaction.ts | 54 +++++++++++++++++++-- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index b280971c76de..a7d0daca89d4 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -4,6 +4,7 @@ import { Session } from "." import { SessionID, MessageID, PartID } from "./schema" import { Instance } from "../project/instance" import { Provider } from "../provider/provider" +import { ProviderTransform } from "../provider/transform" import { MessageV2 } from "./message-v2" import z from "zod" import { Token } from "../util/token" @@ -33,8 +34,17 @@ export namespace SessionCompaction { } export const PRUNE_MINIMUM = 20_000 - export const PRUNE_PROTECT = 40_000 - const PRUNE_PROTECTED_TOOLS = ["skill"] + export const PRUNE_PROTECT_MIN = 20_000 + export const PRUNE_PROTECT_RATIO = 0.15 + export const PRUNE_PROACTIVE_RATIO = 0.5 + const PRUNE_PROTECTED_TOOLS = [ + "skill", + "compress", + "todowrite", + "background_output", + "lsp_diagnostics", + "lsp_symbols", + ] export interface Interface { readonly isOverflow: (input: { @@ -42,6 +52,11 @@ export namespace SessionCompaction { model: Provider.Model }) => Effect.Effect readonly prune: (input: { sessionID: SessionID }) => Effect.Effect + readonly pruneIfNeeded: (input: { + sessionID: SessionID + tokens: MessageV2.Assistant["tokens"] + model: Provider.Model + }) => Effect.Effect readonly process: (input: { parentID: MessageID messages: MessageV2.WithParts[] @@ -100,6 +115,16 @@ export namespace SessionCompaction { .pipe(Effect.catchIf(NotFoundError.isInstance, () => Effect.succeed(undefined))) if (!msgs) return + let contextLimit = 128_000 + const lastAssistant = msgs.findLast((m) => m.info.role === "assistant") + if (lastAssistant && lastAssistant.info.role === "assistant") { + try { + const model = yield* provider.getModel(lastAssistant.info.providerID, lastAssistant.info.modelID) + contextLimit = model.limit.context || 128_000 + } catch {} + } + const protect = Math.max(PRUNE_PROTECT_MIN, Math.round(contextLimit * PRUNE_PROTECT_RATIO)) + let total = 0 let pruned = 0 const toPrune: MessageV2.ToolPart[] = [] @@ -115,10 +140,10 @@ export namespace SessionCompaction { if (part.type === "tool") if (part.state.status === "completed") { if (PRUNE_PROTECTED_TOOLS.includes(part.tool)) continue - if (part.state.time.compacted) break loop + if (part.state.time.compacted) continue const estimate = Token.estimate(part.state.output) total += estimate - if (total > PRUNE_PROTECT) { + if (total > protect) { pruned += estimate toPrune.push(part) } @@ -368,9 +393,30 @@ When constructing the summary, try to stick to this template: }) }) + const pruneIfNeeded = Effect.fn("SessionCompaction.pruneIfNeeded")(function* (input: { + sessionID: SessionID + tokens: MessageV2.Assistant["tokens"] + model: Provider.Model + }) { + const cfg = yield* config.get() + if (cfg.compaction?.prune === false) return + const context = input.model.limit.context + if (context === 0) return + const maxOutput = ProviderTransform.maxOutputTokens(input.model) + const reserved = cfg.compaction?.reserved ?? Math.min(20_000, maxOutput) + const usable = input.model.limit.input ? input.model.limit.input - reserved : context - maxOutput + const count = + input.tokens.total || + input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write + if (count < usable * PRUNE_PROACTIVE_RATIO) return + log.info("proactive prune triggered", { count, usable, ratio: count / usable }) + yield* prune({ sessionID: input.sessionID }) + }) + return Service.of({ isOverflow, prune, + pruneIfNeeded, process: processCompaction, create, }) From 88721454536d60d39b393cd19b9774a984035a4b Mon Sep 17 00:00:00 2001 From: "tobias@tobias-weiss.org" Date: Sat, 11 Apr 2026 14:01:26 +0200 Subject: [PATCH 4/5] feat(compaction): proactive pruning when context exceeds 50% threshold --- packages/opencode/src/session/processor.ts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index cf4d49ecc845..cac0bf1175fa 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -11,7 +11,9 @@ import { Session } from "." import { LLM } from "./llm" import { MessageV2 } from "./message-v2" import { isOverflow } from "./overflow" +import { SessionCompaction } from "./compaction" import { Token } from "@/util/token" +import { ProviderTransform } from "@/provider/transform" import { PartID } from "./schema" import type { SessionID } from "./schema" import { SessionRetry } from "./retry" @@ -399,6 +401,20 @@ export namespace SessionProcessor { ) { ctx.needsCompaction = true } + if (!ctx.assistantMessage.summary) { + const context = ctx.model.limit.context + if (context > 0) { + const maxOutput = ProviderTransform.maxOutputTokens(ctx.model) + const reserved = (yield* config.get()).compaction?.reserved ?? Math.min(20_000, maxOutput) + const usable = ctx.model.limit.input ? ctx.model.limit.input - reserved : context - maxOutput + const count = + usage.tokens.total || + usage.tokens.input + usage.tokens.output + usage.tokens.cache.read + usage.tokens.cache.write + if (count >= usable * 0.5) { + SessionCompaction.prune({ sessionID: ctx.sessionID }) + } + } + } return } From d7605c8227358c2dced07ec29e35d53c4b892bc2 Mon Sep 17 00:00:00 2001 From: "tobias@tobias-weiss.org" Date: Sun, 12 Apr 2026 00:17:23 +0200 Subject: [PATCH 5/5] core: fix GLM/ZhipuAI message format rejected by API GLM models (glm-5-turbo, glm-4.7, etc.) via litellm proxy returned 'The messages parameter is illegal' because messages contained empty content, consecutive assistant roles, unscrubbed tool IDs, and raw reasoning parts the GLM API doesn't support. --- packages/opencode/src/provider/transform.ts | 82 ++++++++++ .../opencode/test/provider/transform.test.ts | 153 ++++++++++++++++++ 2 files changed, 235 insertions(+) diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index dea8cf936af4..973e9b2dd253 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -151,6 +151,88 @@ export namespace ProviderTransform { return result } + // GLM / ZhipuAI: filter empty content, scrub tool IDs, fix consecutive roles, handle reasoning + if (model.api.id.toLowerCase().includes("glm") || ["zai", "zhipuai"].includes(model.providerID.toLowerCase())) { + const scrub = (id: string) => id.replace(/[^a-zA-Z0-9_-]/g, "").substring(0, 64) + const isInterleaved = typeof model.capabilities.interleaved === "object" && model.capabilities.interleaved.field + const field = isInterleaved ? (model.capabilities.interleaved as { field: string }).field : null + + const result: ModelMessage[] = [] + for (let i = 0; i < msgs.length; i++) { + const msg = msgs[i] + const nextMsg = msgs[i + 1] + + // Skip empty string content + if (typeof msg.content === "string") { + if (msg.content === "") continue + result.push(msg) + continue + } + + if (!Array.isArray(msg.content)) { + result.push(msg) + continue + } + + // Filter empty text/reasoning parts + let filtered = msg.content.filter((part) => { + if ((part.type === "text" || part.type === "reasoning") && part.text === "") return false + return true + }) + + // Remove reasoning parts for non-interleaved models (GLM API doesn't support them) + if (!isInterleaved) filtered = filtered.filter((part) => part.type !== "reasoning") + + if (filtered.length === 0) continue + + // Scrub tool call/result IDs + if (msg.role === "assistant" || msg.role === "tool") { + filtered = filtered.map((part) => { + if (part.type === "tool-call" || part.type === "tool-result") { + return { ...part, toolCallId: scrub(part.toolCallId) } + } + return part + }) + } + + // Extract reasoning to providerOptions for interleaved models + if (isInterleaved && msg.role === "assistant") { + const reasoning = filtered.filter((p: any) => p.type === "reasoning") + const text = reasoning.map((p: any) => p.text).join("") + const rest = filtered.filter((p: any) => p.type !== "reasoning") + + if (text) { + result.push({ + ...msg, + content: rest, + providerOptions: { + ...msg.providerOptions, + openaiCompatible: { + ...(msg.providerOptions as any)?.openaiCompatible, + ...(field ? { [field]: text } : {}), + }, + }, + } as ModelMessage) + } else { + result.push({ ...msg, content: rest } as ModelMessage) + } + } else { + result.push({ ...msg, content: filtered } as ModelMessage) + } + + // Fix consecutive assistant messages (GLM requires strict user/assistant alternation) + const last = result[result.length - 1] + if (last?.role === "assistant" && nextMsg?.role === "assistant") { + result.push({ + role: "user", + content: [{ type: "text", text: "Done." }], + }) + } + } + + return result + } + if (typeof model.capabilities.interleaved === "object" && model.capabilities.interleaved.field) { const field = model.capabilities.interleaved.field return msgs.map((msg) => { diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts index 3a001e2756af..fc2c1fbf563f 100644 --- a/packages/opencode/test/provider/transform.test.ts +++ b/packages/opencode/test/provider/transform.test.ts @@ -2889,3 +2889,156 @@ describe("ProviderTransform.variants", () => { }) }) }) + +describe("ProviderTransform.message - GLM/ZhipuAI normalization", () => { + const glmModel = { + id: "zai/glm-5-turbo", + providerID: "zai", + api: { + id: "glm-5-turbo", + url: "https://open.bigmodel.cn/api/paas/v4", + npm: "@ai-sdk/openai-compatible", + }, + name: "GLM-5 Turbo", + capabilities: { + temperature: true, + reasoning: false, + attachment: true, + toolcall: true, + input: { text: true, audio: false, image: true, video: false, pdf: false }, + output: { text: true, audio: false, image: false, video: false, pdf: false }, + interleaved: false, + }, + cost: { + input: 0.001, + output: 0.002, + cache: { read: 0.0001, write: 0.0002 }, + }, + limit: { + context: 128000, + output: 8192, + }, + status: "active", + options: {}, + headers: {}, + } as any + + test("filters out messages with empty string content", () => { + const msgs = [ + { role: "user", content: "Hello" }, + { role: "assistant", content: "" }, + { role: "user", content: "World" }, + ] as any[] + + const result = ProviderTransform.message(msgs, glmModel, {}) + + expect(result).toHaveLength(2) + expect(result[0].content).toBe("Hello") + expect(result[1].content).toBe("World") + }) + + test("filters out empty text parts from array content", () => { + const msgs = [ + { + role: "assistant", + content: [ + { type: "text", text: "" }, + { type: "text", text: "Hello" }, + { type: "text", text: "" }, + ], + }, + ] as any[] + + const result = ProviderTransform.message(msgs, glmModel, {}) + + expect(result).toHaveLength(1) + expect(result[0].content).toHaveLength(1) + expect(result[0].content[0]).toEqual({ type: "text", text: "Hello" }) + }) + + test("removes reasoning parts for non-interleaved models", () => { + const msgs = [ + { + role: "assistant", + content: [ + { type: "reasoning", text: "Thinking..." }, + { type: "text", text: "Answer" }, + ], + }, + ] as any[] + + const result = ProviderTransform.message(msgs, glmModel, {}) + + expect(result).toHaveLength(1) + expect(result[0].content).toHaveLength(1) + expect(result[0].content[0]).toEqual({ type: "text", text: "Answer" }) + }) + + test("scrubs tool call IDs to alphanumeric", () => { + const msgs = [ + { + role: "assistant", + content: [ + { + type: "tool-call", + toolCallId: "call_abc-123!@#", + toolName: "bash", + input: { command: "ls" }, + }, + ], + }, + ] as any[] + + const result = ProviderTransform.message(msgs, glmModel, {}) + + expect((result[0].content as any[])[0].toolCallId).toBe("call_abc-123") + }) + + test("fixes consecutive assistant messages by inserting bridge user message", () => { + const msgs = [ + { role: "assistant", content: [{ type: "text", text: "First reply" }] }, + { role: "assistant", content: [{ type: "text", text: "Second reply" }] }, + { role: "user", content: "Thanks" }, + ] as any[] + + const result = ProviderTransform.message(msgs, glmModel, {}) + + expect(result).toHaveLength(4) + expect(result[0].role).toBe("assistant") + expect(result[0].content).toEqual([{ type: "text", text: "First reply" }]) + expect(result[1].role).toBe("user") + expect(result[1].content).toEqual([{ type: "text", text: "Done." }]) + expect(result[2].role).toBe("assistant") + expect(result[2].content).toEqual([{ type: "text", text: "Second reply" }]) + expect(result[3].role).toBe("user") + }) + + test("does not normalize non-GLM providers", () => { + const openaiModel = { + ...glmModel, + providerID: "openai", + api: { + id: "gpt-4", + url: "https://api.openai.com", + npm: "@ai-sdk/openai", + }, + } + + const msgs = [ + { role: "assistant", content: "" }, + { + role: "assistant", + content: [ + { type: "text", text: "" }, + { type: "tool-call", toolCallId: "call_abc!@#", toolName: "bash", input: {} }, + ], + }, + ] as any[] + + const result = ProviderTransform.message(msgs, openaiModel, {}) + + expect(result).toHaveLength(2) + expect(result[0].content).toBe("") + expect((result[1].content as any[])[1].toolCallId).toBe("call_abc!@#") + }) +})