Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions packages/opencode/src/provider/transform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,88 @@ export namespace ProviderTransform {
return result
}

// GLM / ZhipuAI: filter empty content, scrub tool IDs, fix consecutive roles, handle reasoning
if (model.api.id.toLowerCase().includes("glm") || ["zai", "zhipuai"].includes(model.providerID.toLowerCase())) {
const scrub = (id: string) => id.replace(/[^a-zA-Z0-9_-]/g, "").substring(0, 64)
const isInterleaved = typeof model.capabilities.interleaved === "object" && model.capabilities.interleaved.field
const field = isInterleaved ? (model.capabilities.interleaved as { field: string }).field : null

const result: ModelMessage[] = []
for (let i = 0; i < msgs.length; i++) {
const msg = msgs[i]
const nextMsg = msgs[i + 1]

// Skip empty string content
if (typeof msg.content === "string") {
if (msg.content === "") continue
result.push(msg)
continue
}

if (!Array.isArray(msg.content)) {
result.push(msg)
continue
}

// Filter empty text/reasoning parts
let filtered = msg.content.filter((part) => {
if ((part.type === "text" || part.type === "reasoning") && part.text === "") return false
return true
})

// Remove reasoning parts for non-interleaved models (GLM API doesn't support them)
if (!isInterleaved) filtered = filtered.filter((part) => part.type !== "reasoning")

if (filtered.length === 0) continue

// Scrub tool call/result IDs
if (msg.role === "assistant" || msg.role === "tool") {
filtered = filtered.map((part) => {
if (part.type === "tool-call" || part.type === "tool-result") {
return { ...part, toolCallId: scrub(part.toolCallId) }
}
return part
})
}

// Extract reasoning to providerOptions for interleaved models
if (isInterleaved && msg.role === "assistant") {
const reasoning = filtered.filter((p: any) => p.type === "reasoning")
const text = reasoning.map((p: any) => p.text).join("")
const rest = filtered.filter((p: any) => p.type !== "reasoning")

if (text) {
result.push({
...msg,
content: rest,
providerOptions: {
...msg.providerOptions,
openaiCompatible: {
...(msg.providerOptions as any)?.openaiCompatible,
...(field ? { [field]: text } : {}),
},
},
} as ModelMessage)
} else {
result.push({ ...msg, content: rest } as ModelMessage)
}
} else {
result.push({ ...msg, content: filtered } as ModelMessage)
}

// Fix consecutive assistant messages (GLM requires strict user/assistant alternation)
const last = result[result.length - 1]
if (last?.role === "assistant" && nextMsg?.role === "assistant") {
result.push({
role: "user",
content: [{ type: "text", text: "Done." }],
})
}
}

return result
}

if (typeof model.capabilities.interleaved === "object" && model.capabilities.interleaved.field) {
const field = model.capabilities.interleaved.field
return msgs.map((msg) => {
Expand Down
54 changes: 50 additions & 4 deletions packages/opencode/src/session/compaction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { Session } from "."
import { SessionID, MessageID, PartID } from "./schema"
import { Instance } from "../project/instance"
import { Provider } from "../provider/provider"
import { ProviderTransform } from "../provider/transform"
import { MessageV2 } from "./message-v2"
import z from "zod"
import { Token } from "../util/token"
Expand Down Expand Up @@ -33,15 +34,29 @@ export namespace SessionCompaction {
}

export const PRUNE_MINIMUM = 20_000
export const PRUNE_PROTECT = 40_000
const PRUNE_PROTECTED_TOOLS = ["skill"]
export const PRUNE_PROTECT_MIN = 20_000
export const PRUNE_PROTECT_RATIO = 0.15
export const PRUNE_PROACTIVE_RATIO = 0.5
const PRUNE_PROTECTED_TOOLS = [
"skill",
"compress",
"todowrite",
"background_output",
"lsp_diagnostics",
"lsp_symbols",
]

export interface Interface {
readonly isOverflow: (input: {
tokens: MessageV2.Assistant["tokens"]
model: Provider.Model
}) => Effect.Effect<boolean>
readonly prune: (input: { sessionID: SessionID }) => Effect.Effect<void>
readonly pruneIfNeeded: (input: {
sessionID: SessionID
tokens: MessageV2.Assistant["tokens"]
model: Provider.Model
}) => Effect.Effect<void>
readonly process: (input: {
parentID: MessageID
messages: MessageV2.WithParts[]
Expand Down Expand Up @@ -100,6 +115,16 @@ export namespace SessionCompaction {
.pipe(Effect.catchIf(NotFoundError.isInstance, () => Effect.succeed(undefined)))
if (!msgs) return

let contextLimit = 128_000
const lastAssistant = msgs.findLast((m) => m.info.role === "assistant")
if (lastAssistant && lastAssistant.info.role === "assistant") {
try {
const model = yield* provider.getModel(lastAssistant.info.providerID, lastAssistant.info.modelID)
contextLimit = model.limit.context || 128_000
} catch {}
}
const protect = Math.max(PRUNE_PROTECT_MIN, Math.round(contextLimit * PRUNE_PROTECT_RATIO))

let total = 0
let pruned = 0
const toPrune: MessageV2.ToolPart[] = []
Expand All @@ -115,10 +140,10 @@ export namespace SessionCompaction {
if (part.type === "tool")
if (part.state.status === "completed") {
if (PRUNE_PROTECTED_TOOLS.includes(part.tool)) continue
if (part.state.time.compacted) break loop
if (part.state.time.compacted) continue
const estimate = Token.estimate(part.state.output)
total += estimate
if (total > PRUNE_PROTECT) {
if (total > protect) {
pruned += estimate
toPrune.push(part)
}
Expand Down Expand Up @@ -368,9 +393,30 @@ When constructing the summary, try to stick to this template:
})
})

const pruneIfNeeded = Effect.fn("SessionCompaction.pruneIfNeeded")(function* (input: {
sessionID: SessionID
tokens: MessageV2.Assistant["tokens"]
model: Provider.Model
}) {
const cfg = yield* config.get()
if (cfg.compaction?.prune === false) return
const context = input.model.limit.context
if (context === 0) return
const maxOutput = ProviderTransform.maxOutputTokens(input.model)
const reserved = cfg.compaction?.reserved ?? Math.min(20_000, maxOutput)
const usable = input.model.limit.input ? input.model.limit.input - reserved : context - maxOutput
const count =
input.tokens.total ||
input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
if (count < usable * PRUNE_PROACTIVE_RATIO) return
log.info("proactive prune triggered", { count, usable, ratio: count / usable })
yield* prune({ sessionID: input.sessionID })
})

return Service.of({
isOverflow,
prune,
pruneIfNeeded,
process: processCompaction,
create,
})
Expand Down
21 changes: 21 additions & 0 deletions packages/opencode/src/session/processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ import { Session } from "."
import { LLM } from "./llm"
import { MessageV2 } from "./message-v2"
import { isOverflow } from "./overflow"
import { SessionCompaction } from "./compaction"
import { Token } from "@/util/token"
import { ProviderTransform } from "@/provider/transform"
import { PartID } from "./schema"
import type { SessionID } from "./schema"
import { SessionRetry } from "./retry"
Expand Down Expand Up @@ -72,6 +75,7 @@ export namespace SessionProcessor {
needsCompaction: boolean
currentText: MessageV2.TextPart | undefined
reasoningMap: Record<string, MessageV2.ReasoningPart>
sentChars: number
}

type StreamEvent = Event
Expand Down Expand Up @@ -119,6 +123,7 @@ export namespace SessionProcessor {
needsCompaction: false,
currentText: undefined,
reasoningMap: {},
sentChars: 0,
}
let aborted = false
const slog = log.with({ sessionID: input.sessionID, messageID: input.assistantMessage.id })
Expand Down Expand Up @@ -360,6 +365,7 @@ export namespace SessionProcessor {
ctx.assistantMessage.finish = value.finishReason
ctx.assistantMessage.cost += usage.cost
ctx.assistantMessage.tokens = usage.tokens
Token.updateRatio({ chars: ctx.sentChars, tokens: usage.tokens.input })
yield* session.updatePart({
id: PartID.ascending(),
reason: value.finishReason,
Expand Down Expand Up @@ -395,6 +401,20 @@ export namespace SessionProcessor {
) {
ctx.needsCompaction = true
}
if (!ctx.assistantMessage.summary) {
const context = ctx.model.limit.context
if (context > 0) {
const maxOutput = ProviderTransform.maxOutputTokens(ctx.model)
const reserved = (yield* config.get()).compaction?.reserved ?? Math.min(20_000, maxOutput)
const usable = ctx.model.limit.input ? ctx.model.limit.input - reserved : context - maxOutput
const count =
usage.tokens.total ||
usage.tokens.input + usage.tokens.output + usage.tokens.cache.read + usage.tokens.cache.write
if (count >= usable * 0.5) {
SessionCompaction.prune({ sessionID: ctx.sessionID })
}
}
}
return
}

Expand Down Expand Up @@ -539,6 +559,7 @@ export namespace SessionProcessor {
yield* Effect.gen(function* () {
ctx.currentText = undefined
ctx.reasoningMap = {}
ctx.sentChars = JSON.stringify(streamInput.messages).length
const stream = llm.stream(streamInput)

yield* stream.pipe(
Expand Down
1 change: 1 addition & 0 deletions packages/opencode/src/session/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1367,6 +1367,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
}

if (task?.type === "compaction") {
yield* compaction.prune({ sessionID })
const result = yield* compaction.process({
messages: msgs,
parentID: lastUser.id,
Expand Down
13 changes: 11 additions & 2 deletions packages/opencode/src/util/token.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
export namespace Token {
const CHARS_PER_TOKEN = 4
let charsPerToken = 4

export function estimate(input: string) {
return Math.max(0, Math.round((input || "").length / CHARS_PER_TOKEN))
return Math.max(0, Math.round((input || "").length / charsPerToken))
}

export function updateRatio({ chars, tokens }: { chars: number; tokens: number }) {
if (tokens <= 0 || chars <= 0) return
charsPerToken = charsPerToken * 0.7 + (chars / tokens) * 0.3
}

export function resetRatio() {
charsPerToken = 4
}
}
Loading
Loading