From 7a746a86e08377479620bfaeccb18bff20629e19 Mon Sep 17 00:00:00 2001 From: charles wu Date: Tue, 2 Jun 2026 15:13:32 +0800 Subject: [PATCH] fix: use max output size for completion budget --- .../max-output-size-completion-budget.md | 6 ++++++ .../agent-core/src/agent/compaction/full.ts | 1 + packages/agent-core/src/agent/config/index.ts | 4 ++++ packages/agent-core/src/agent/index.ts | 1 + .../agent-core/src/session/provider-manager.ts | 2 ++ .../agent-core/src/utils/completion-budget.ts | 4 ++++ .../test/harness/runtime-provider.test.ts | 2 ++ .../test/utils/completion-budget.test.ts | 18 ++++++++++++++++++ 8 files changed, 38 insertions(+) create mode 100644 .changeset/max-output-size-completion-budget.md diff --git a/.changeset/max-output-size-completion-budget.md b/.changeset/max-output-size-completion-budget.md new file mode 100644 index 00000000..801a01d2 --- /dev/null +++ b/.changeset/max-output-size-completion-budget.md @@ -0,0 +1,6 @@ +--- +"@moonshot-ai/agent-core": patch +"@moonshot-ai/kimi-code": patch +--- + +Use configured model output limits as completion token caps. diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts index eac1b65b..c7d54a73 100644 --- a/packages/agent-core/src/agent/compaction/full.ts +++ b/packages/agent-core/src/agent/compaction/full.ts @@ -243,6 +243,7 @@ export class FullCompaction { const provider = applyCompletionBudget({ provider: this.agent.config.provider, budget: resolveCompletionBudget({ + maxOutputSize: this.agent.config.maxOutputSize, reservedContextSize: this.agent.kimiConfig?.loopControl?.reservedContextSize, }), capability: this.agent.config.modelCapabilities, diff --git a/packages/agent-core/src/agent/config/index.ts b/packages/agent-core/src/agent/config/index.ts index 45aca213..3f1aafaf 100644 --- a/packages/agent-core/src/agent/config/index.ts +++ b/packages/agent-core/src/agent/config/index.ts @@ -127,6 +127,10 @@ export class ConfigState { return this.tryResolvedProviderConfig()?.modelCapabilities ?? UNKNOWN_CAPABILITY; } + get maxOutputSize(): number | undefined { + return this.tryResolvedProviderConfig()?.maxOutputSize; + } + private get resolvedProviderConfig(): ResolvedRuntimeProvider | undefined { if (this._modelAlias === undefined) return undefined; return this.agent.modelProvider?.resolveProviderConfig(this._modelAlias); diff --git a/packages/agent-core/src/agent/index.ts b/packages/agent-core/src/agent/index.ts index cad1e5b4..a1dc443d 100644 --- a/packages/agent-core/src/agent/index.ts +++ b/packages/agent-core/src/agent/index.ts @@ -201,6 +201,7 @@ export class Agent { const provider = this.config.provider.withThinking(this.config.thinkingLevel); const loopControl = this.kimiConfig?.loopControl; const completionBudgetConfig = resolveCompletionBudget({ + maxOutputSize: this.config.maxOutputSize, reservedContextSize: loopControl?.reservedContextSize, }); return new KosongLLM({ diff --git a/packages/agent-core/src/session/provider-manager.ts b/packages/agent-core/src/session/provider-manager.ts index f675c126..7d940eee 100644 --- a/packages/agent-core/src/session/provider-manager.ts +++ b/packages/agent-core/src/session/provider-manager.ts @@ -17,6 +17,7 @@ export interface ResolvedRuntimeProvider { readonly providerName: string; readonly provider: KosongProviderConfig; readonly modelCapabilities: ModelCapability; + readonly maxOutputSize?: number | undefined; } interface ProviderManagerOptions { @@ -115,6 +116,7 @@ export class ProviderManager implements ModelProvider { providerName, provider, modelCapabilities: resolveModelCapabilities(alias, provider), + maxOutputSize: alias.maxOutputSize, }; } diff --git a/packages/agent-core/src/utils/completion-budget.ts b/packages/agent-core/src/utils/completion-budget.ts index 5136ec60..aea8010e 100644 --- a/packages/agent-core/src/utils/completion-budget.ts +++ b/packages/agent-core/src/utils/completion-budget.ts @@ -16,6 +16,7 @@ const DEFAULT_UNKNOWN_CONTEXT_FALLBACK = 32000; * non-positive env values disable clamping. */ export function resolveCompletionBudget(args: { + readonly maxOutputSize?: number | undefined; readonly reservedContextSize?: number; readonly env?: NodeJS.ProcessEnv; }): CompletionBudgetConfig | undefined { @@ -28,6 +29,9 @@ export function resolveCompletionBudget(args: { if (fromLegacy !== 'absent') { return fromLegacy === 'disabled' ? undefined : { hardCap: fromLegacy }; } + if (args.maxOutputSize !== undefined) { + return { hardCap: args.maxOutputSize }; + } if (args.reservedContextSize !== undefined && args.reservedContextSize > 0) { return { fallback: args.reservedContextSize }; } diff --git a/packages/agent-core/test/harness/runtime-provider.test.ts b/packages/agent-core/test/harness/runtime-provider.test.ts index 6ef2c3c7..29ea213c 100644 --- a/packages/agent-core/test/harness/runtime-provider.test.ts +++ b/packages/agent-core/test/harness/runtime-provider.test.ts @@ -88,6 +88,7 @@ describe('resolveRuntimeProvider model metadata', () => { provider: 'openai', model: 'gpt-runtime', maxContextSize: 200000, + maxOutputSize: 24000, capabilities: ['tool_use'], }, }, @@ -106,6 +107,7 @@ describe('resolveRuntimeProvider model metadata', () => { tool_use: true, max_context_tokens: 200000, }); + expect(resolved.maxOutputSize).toBe(24000); }); it('uses config Kimi capabilities without requiring an api key during OAuth setup', () => { diff --git a/packages/agent-core/test/utils/completion-budget.test.ts b/packages/agent-core/test/utils/completion-budget.test.ts index 9b75f5f1..39621b3f 100644 --- a/packages/agent-core/test/utils/completion-budget.test.ts +++ b/packages/agent-core/test/utils/completion-budget.test.ts @@ -172,6 +172,24 @@ describe('resolveCompletionBudget', () => { expect(budget?.hardCap).toBe(2048); }); + it('uses maxOutputSize as the hard cap when env vars are unset', () => { + const budget = resolveCompletionBudget({ + maxOutputSize: 2048, + reservedContextSize: 1000, + env: {}, + }); + expect(budget?.hardCap).toBe(2048); + expect(budget?.fallback).toBeUndefined(); + }); + + it('lets env vars override maxOutputSize', () => { + const budget = resolveCompletionBudget({ + maxOutputSize: 2048, + env: { KIMI_MODEL_MAX_COMPLETION_TOKENS: '4096' }, + }); + expect(budget?.hardCap).toBe(4096); + }); + it('uses reservedContextSize as the unknown-context fallback when no env var is set', () => { const budget = resolveCompletionBudget({ reservedContextSize: 12345,