From aa687e0db9e0acf3c8641050bd4859278f18e787 Mon Sep 17 00:00:00 2001 From: iceteaSA <171169159+iceteaSA@users.noreply.github.com> Date: Thu, 21 May 2026 20:08:51 +0200 Subject: [PATCH 1/2] fix(pi): strip trailing assistant messages, improve sanitize and error guards - Strip trailing assistant-role messages from converted requests to prevent Anthropic prefill 400 errors on some models - Add Unicode flag to surrogate regex for spec compliance - Simplify sanitizeToolId with cleaner length check - Improve error content guard to catch empty string case - Add comprehensive test coverage for all pi conversion logic --- packages/pi/package.json | 1 + packages/pi/src/convert.ts | 53 ++-- packages/pi/src/tests/convert.test.ts | 335 ++++++++++++++++++++++++++ 3 files changed, 371 insertions(+), 18 deletions(-) create mode 100644 packages/pi/src/tests/convert.test.ts diff --git a/packages/pi/package.json b/packages/pi/package.json index 5d71a00..9d42fcd 100644 --- a/packages/pi/package.json +++ b/packages/pi/package.json @@ -32,6 +32,7 @@ "build": "rm -rf dist && tsc -p tsconfig.build.json", "typecheck": "tsc", "types": "tsc", + "test": "bun test src/tests", "prepublishOnly": "bun run build" }, "dependencies": { diff --git a/packages/pi/src/convert.ts b/packages/pi/src/convert.ts index 8d99642..9fa3413 100644 --- a/packages/pi/src/convert.ts +++ b/packages/pi/src/convert.ts @@ -48,14 +48,18 @@ export type AnthropicRequestBody = { } function sanitize(text: string): string { - return text.replace(/[\uD800-\uDFFF]/g, '\uFFFD') + return text.replace(/[\uD800-\uDFFF]/gu, '\uFFFD') } +/** + * Sanitize a tool-call ID to match Anthropic's `^[a-zA-Z0-9_-]+$` pattern. + * Cross-provider IDs (e.g. OpenAI Codex `call_xxx|fc_xxx`) contain characters + * Anthropic rejects. Deterministic — same input always yields the same output. + */ function sanitizeToolId(id: string): string { if (!id) return 'tool_call_unknown' - const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, '_') - if (!sanitized) return 'tool_call_unknown' - return sanitized.slice(0, 256) + const cleaned = id.replace(/[^a-zA-Z0-9_-]/g, '_') + return cleaned.length > 256 ? cleaned.slice(0, 256) : cleaned } function toClaudeCodeToolName(name: string): string { @@ -157,17 +161,20 @@ function convertMessages( if (message.role === 'toolResult') { const toolResult = message as ToolResultMessage const toolResults: Array> = [] - const firstContent = convertTextAndImages(toolResult.content) - const firstContentArr = Array.isArray(firstContent) - ? firstContent - : [{ type: 'text', text: firstContent }] - if (toolResult.isError && firstContentArr.length === 0) { - firstContentArr.push({ type: 'text', text: 'Error' }) + let content = convertTextAndImages(toolResult.content) + // Anthropic rejects tool_result with is_error=true but empty content + if ( + toolResult.isError && + (!content || + (Array.isArray(content) && content.length === 0) || + content === '') + ) { + content = [{ type: 'text', text: 'Error' }] } toolResults.push({ type: 'tool_result', tool_use_id: sanitizeToolId(toolResult.toolCallId), - content: firstContentArr, + content: content, is_error: toolResult.isError, }) @@ -177,17 +184,20 @@ function convertMessages( messages[nextIndex]?.role === 'toolResult' ) { const next = messages[nextIndex] as ToolResultMessage - const nextContent = convertTextAndImages(next.content) - const nextContentArr = Array.isArray(nextContent) - ? nextContent - : [{ type: 'text', text: nextContent }] - if (next.isError && nextContentArr.length === 0) { - nextContentArr.push({ type: 'text', text: 'Error' }) + let nextContent = convertTextAndImages(next.content) + // Anthropic rejects tool_result with is_error=true but empty content + if ( + next.isError && + (!nextContent || + (Array.isArray(nextContent) && nextContent.length === 0) || + nextContent === '') + ) { + nextContent = [{ type: 'text', text: 'Error' }] } toolResults.push({ type: 'tool_result', tool_use_id: sanitizeToolId(next.toolCallId), - content: nextContentArr, + content: nextContent, is_error: next.isError, }) nextIndex += 1 @@ -275,6 +285,13 @@ export async function buildAnthropicRequest( identity?: ClaudeCodeIdentity, ): Promise<{ body: AnthropicRequestBody; bodyText: string }> { const messages = convertMessages(context.messages) + // Strip trailing assistant messages — Anthropic rejects prefill on some models + while ( + messages.length && + messages[messages.length - 1]?.role === 'assistant' + ) { + messages.pop() + } const system = [ { type: 'text', diff --git a/packages/pi/src/tests/convert.test.ts b/packages/pi/src/tests/convert.test.ts new file mode 100644 index 0000000..71aa881 --- /dev/null +++ b/packages/pi/src/tests/convert.test.ts @@ -0,0 +1,335 @@ +import { describe, expect, test } from 'bun:test' +import type { Message } from '@earendil-works/pi-ai' +import { buildAnthropicRequest } from '../convert' + +function userMsg(text: string): Message { + return { role: 'user', content: text, timestamp: 0 } +} + +function assistantMsg(text: string): Message { + return { + role: 'assistant', + content: [{ type: 'text', text }], + timestamp: 0, + } as Message +} + +function toolCallMsg(id: string, name: string): Message { + return { + role: 'assistant', + content: [{ type: 'toolCall', id, name, arguments: {} }], + timestamp: 0, + } as Message +} + +function toolResultMsg(toolCallId: string, text: string): Message { + return { + role: 'toolResult', + toolCallId, + content: [{ type: 'text', text }], + timestamp: 0, + } as Message +} + +const defaultCache = { enabled: false, mode: 'hybrid' as const } + +async function buildMessages(messages: Message[]) { + const context = { + messages, + systemPrompt: 'test', + tools: [], + } + const { body } = await buildAnthropicRequest( + 'claude-sonnet-4-20250514', + context as any, + undefined, + defaultCache, + ) + return body.messages +} + +describe('buildAnthropicRequest — prefill stripping', () => { + test('strips single trailing assistant message', async () => { + const messages = await buildMessages([ + userMsg('hello'), + assistantMsg('I will help'), + ]) + expect(messages.length).toBe(1) + expect(messages[0]?.role).toBe('user') + }) + + test('strips multiple trailing assistant messages', async () => { + const messages = await buildMessages([ + userMsg('hello'), + assistantMsg('first'), + assistantMsg('second'), + ]) + expect(messages.length).toBe(1) + expect(messages[0]?.role).toBe('user') + }) + + test('preserves assistant message followed by user message', async () => { + const messages = await buildMessages([ + userMsg('hello'), + assistantMsg('response'), + userMsg('follow up'), + ]) + expect(messages.length).toBe(3) + expect(messages[0]?.role).toBe('user') + expect(messages[1]?.role).toBe('assistant') + expect(messages[2]?.role).toBe('user') + }) + + test('preserves assistant with tool_use followed by tool_result', async () => { + const messages = await buildMessages([ + userMsg('do something'), + toolCallMsg('tool_1', 'Bash'), + toolResultMsg('tool_1', 'output'), + ]) + expect(messages.length).toBe(3) + expect(messages[0]?.role).toBe('user') + expect(messages[1]?.role).toBe('assistant') + expect(messages[2]?.role).toBe('user') // tool_result maps to user role + }) + + test('strips trailing assistant after tool_result + assistant', async () => { + const messages = await buildMessages([ + userMsg('do something'), + toolCallMsg('tool_1', 'Bash'), + toolResultMsg('tool_1', 'output'), + assistantMsg('based on that output...'), + ]) + expect(messages.length).toBe(3) + expect(messages[2]?.role).toBe('user') + }) + + test('handles user-only conversation', async () => { + const messages = await buildMessages([userMsg('hello')]) + expect(messages.length).toBe(1) + expect(messages[0]?.role).toBe('user') + }) + + test('handles empty messages array', async () => { + const messages = await buildMessages([]) + expect(messages.length).toBe(0) + }) + + test('strips all-assistant conversation to empty', async () => { + const messages = await buildMessages([ + assistantMsg('first'), + assistantMsg('second'), + ]) + expect(messages.length).toBe(0) + }) +}) + +describe('convertMessages — basic transforms', () => { + test('converts user text message', async () => { + const messages = await buildMessages([userMsg('hello world')]) + expect(messages.length).toBe(1) + expect(messages[0]).toEqual({ role: 'user', content: 'hello world' }) + }) + + test('skips empty user messages', async () => { + const messages = await buildMessages([userMsg(''), userMsg('real message')]) + expect(messages.length).toBe(1) + expect(messages[0]).toEqual({ role: 'user', content: 'real message' }) + }) + + test('converts assistant text blocks', async () => { + const messages = await buildMessages([ + userMsg('hi'), + assistantMsg('hello back'), + userMsg('thanks'), + ]) + expect(messages[1]?.role).toBe('assistant') + const content = messages[1]?.content as Array> + expect(content[0]).toEqual({ type: 'text', text: 'hello back' }) + }) + + test('converts tool_use and tool_result round-trip', async () => { + const messages = await buildMessages([ + userMsg('run ls'), + toolCallMsg('call_1', 'Bash'), + toolResultMsg('call_1', 'file1.txt'), + userMsg('thanks'), + ]) + expect(messages.length).toBe(4) + + // assistant with tool_use + const assistantContent = messages[1]?.content as Array< + Record + > + expect(assistantContent[0]?.type).toBe('tool_use') + expect(assistantContent[0]?.name).toBe('Bash') + + // tool_result mapped to user role + const toolContent = messages[2]?.content as Array> + expect(toolContent[0]?.type).toBe('tool_result') + expect(toolContent[0]?.tool_use_id).toBe('call_1') + }) + + test('sanitizes surrogate pairs in text', async () => { + const messages = await buildMessages([userMsg('hello \uD800 world')]) + expect(messages[0]).toEqual({ + role: 'user', + content: 'hello \uFFFD world', + }) + }) + + test('sanitizes tool IDs with invalid characters', async () => { + const messages = await buildMessages([ + userMsg('run it'), + toolCallMsg('call_xxx|fc_yyy', 'Bash'), + toolResultMsg('call_xxx|fc_yyy', 'output'), + userMsg('done'), + ]) + const assistantContent = messages[1]?.content as Array< + Record + > + expect(assistantContent[0]?.id).toBe('call_xxx_fc_yyy') + const toolContent = messages[2]?.content as Array> + expect(toolContent[0]?.tool_use_id).toBe('call_xxx_fc_yyy') + }) +}) + +describe('convertMessages — empty base64 image guard', () => { + test('filters out image with empty data from user message', async () => { + const messages = await buildMessages([ + { + role: 'user', + content: [ + { type: 'text', text: 'see image' }, + { type: 'image', mimeType: 'image/png', data: '' }, + ], + timestamp: 0, + } as Message, + ]) + const content = messages[0]?.content as Array> + expect(content.length).toBe(1) + expect(content[0]?.type).toBe('text') + }) + + test('filters out image with null/undefined data', async () => { + const messages = await buildMessages([ + { + role: 'user', + content: [ + { type: 'text', text: 'screenshot' }, + { type: 'image', mimeType: 'image/png', data: null }, + ], + timestamp: 0, + } as Message, + ]) + const content = messages[0]?.content as Array> + expect(content.length).toBe(1) + expect(content[0]?.type).toBe('text') + }) + + test('adds placeholder text when all images filtered and no text remains', async () => { + const messages = await buildMessages([ + { + role: 'user', + content: [{ type: 'image', mimeType: 'image/png', data: '' }], + timestamp: 0, + } as Message, + ]) + const content = messages[0]?.content as Array> + expect(content[0]?.type).toBe('text') + expect(content[0]?.text).toBe('(see attached image)') + }) + + test('keeps valid image with actual data', async () => { + const messages = await buildMessages([ + { + role: 'user', + content: [ + { type: 'text', text: 'look' }, + { type: 'image', mimeType: 'image/png', data: 'aGVsbG8=' }, + ], + timestamp: 0, + } as Message, + ]) + const content = messages[0]?.content as Array> + expect(content.length).toBe(2) + expect(content[1]?.type).toBe('image') + }) +}) + +describe('convertMessages — empty error tool_result guard', () => { + test('injects Error placeholder when is_error=true and content is empty', async () => { + const messages = await buildMessages([ + userMsg('run it'), + toolCallMsg('tool_1', 'Bash'), + { + role: 'toolResult', + toolCallId: 'tool_1', + content: [], + isError: true, + timestamp: 0, + } as unknown as Message, + userMsg('ok'), + ]) + const toolContent = messages[2]?.content as Array> + expect(toolContent[0]?.is_error).toBe(true) + const innerContent = toolContent[0]?.content as Array< + Record + > + expect(innerContent).toEqual([{ type: 'text', text: 'Error' }]) + }) + + test('keeps content when is_error=true but content is non-empty', async () => { + const messages = await buildMessages([ + userMsg('run it'), + toolCallMsg('tool_1', 'Bash'), + { + role: 'toolResult', + toolCallId: 'tool_1', + content: [{ type: 'text', text: 'command failed: exit 1' }], + isError: true, + timestamp: 0, + } as unknown as Message, + userMsg('ok'), + ]) + const toolContent = messages[2]?.content as Array> + // Non-empty content should be preserved even with is_error=true + expect(toolContent[0]?.is_error).toBe(true) + expect(toolContent[0]?.content).toBe('command failed: exit 1') + }) + + test('injects Error placeholder for consecutive error tool_results', async () => { + const messages = await buildMessages([ + userMsg('run both'), + { + role: 'assistant', + content: [ + { type: 'toolCall', id: 'tool_1', name: 'Bash', arguments: {} }, + { type: 'toolCall', id: 'tool_2', name: 'Bash', arguments: {} }, + ], + timestamp: 0, + } as Message, + { + role: 'toolResult', + toolCallId: 'tool_1', + content: [], + isError: true, + timestamp: 0, + } as unknown as Message, + { + role: 'toolResult', + toolCallId: 'tool_2', + content: [], + isError: true, + timestamp: 0, + } as unknown as Message, + userMsg('ok'), + ]) + // Both tool_results should be in same user message (batched) + const toolContent = messages[2]?.content as Array> + expect(toolContent.length).toBe(2) + for (const tr of toolContent) { + expect(tr.is_error).toBe(true) + expect(tr.content).toEqual([{ type: 'text', text: 'Error' }]) + } + }) +}) From ae0626e18ef532c54f25ca4fb5e1b249578a5bbf Mon Sep 17 00:00:00 2001 From: iceteaSA <171169159+iceteaSA@users.noreply.github.com> Date: Fri, 29 May 2026 20:39:00 +0200 Subject: [PATCH 2/2] fix(pi): preserve signed thinking verbatim, guard lone surrogates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed thinking blocks must be sent back byte-identical — the signature is computed over the original text, so sanitizing would alter it and Anthropic rejects the block as modified. Send signed thinking verbatim instead of sanitizing it. The one exception is a signed block containing a lone (unpaired) UTF-16 surrogate: the signature cannot survive sanitization and the raw surrogate is invalid UTF-8 (400). Detect lone surrogates with hasLoneSurrogate() and, only in that concrete case, drop the signature and downgrade to sanitized text. No broad historical-thinking healing — valid signed/redacted thinking is preserved. (Signed-thinking reordering is handled OpenCode-side in #30182.) --- packages/pi/src/convert.ts | 25 +++++++- packages/pi/src/tests/convert.test.ts | 82 +++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 2 deletions(-) diff --git a/packages/pi/src/convert.ts b/packages/pi/src/convert.ts index 9fa3413..3219a08 100644 --- a/packages/pi/src/convert.ts +++ b/packages/pi/src/convert.ts @@ -51,6 +51,16 @@ function sanitize(text: string): string { return text.replace(/[\uD800-\uDFFF]/gu, '\uFFFD') } +/** + * Detect lone (unpaired) UTF-16 surrogates. With the `u` flag the character + * class only matches surrogates that are NOT part of a valid pair, since valid + * pairs are folded into a single astral code point. Anthropic rejects payloads + * containing lone surrogates (invalid UTF-8). + */ +function hasLoneSurrogate(text: string): boolean { + return /[\uD800-\uDFFF]/u.test(text) +} + /** * Sanitize a tool-call ID to match Anthropic's `^[a-zA-Z0-9_-]+$` pattern. * Cross-provider IDs (e.g. OpenAI Codex `call_xxx|fc_xxx`) contain characters @@ -136,13 +146,24 @@ function convertMessages( blocks.push({ type: 'text', text: sanitize(block.text) }) } else if (block.type === 'thinking' && block.thinking.trim()) { const thinking = block as ThinkingContent - if (thinking.thinkingSignature) { + if ( + thinking.thinkingSignature && + !hasLoneSurrogate(thinking.thinking) + ) { + // Signed thinking blocks must be sent back verbatim — the signature + // is computed over the original text. Sanitizing would alter it and + // Anthropic rejects the block as "modified". Anthropic-origin + // thinking is valid UTF-8, so this is the normal path. blocks.push({ type: 'thinking', - thinking: sanitize(thinking.thinking), + thinking: thinking.thinking, signature: thinking.thinkingSignature, }) } else { + // Either unsigned, or signed-but-contains a lone surrogate. In the + // latter case we cannot keep the signature: sanitizing breaks it and + // sending the raw lone surrogate is an invalid-UTF8 400. Drop the + // signature and downgrade to sanitized text. blocks.push({ type: 'text', text: sanitize(thinking.thinking) }) } } else if (block.type === 'toolCall') { diff --git a/packages/pi/src/tests/convert.test.ts b/packages/pi/src/tests/convert.test.ts index 71aa881..c6aa3b5 100644 --- a/packages/pi/src/tests/convert.test.ts +++ b/packages/pi/src/tests/convert.test.ts @@ -333,3 +333,85 @@ describe('convertMessages — empty error tool_result guard', () => { } }) }) + +function thinkingToolMsg( + thinking: string, + signature: string, + toolId: string, +): Message { + return { + role: 'assistant', + content: [ + { type: 'thinking', thinking, thinkingSignature: signature }, + { type: 'toolCall', id: toolId, name: 'Bash', arguments: {} }, + ], + timestamp: 0, + } as Message +} + +describe('convertMessages — signed thinking blocks', () => { + test('preserves signed thinking with signature in the last assistant turn', async () => { + const messages = await buildMessages([ + userMsg('q1'), + thinkingToolMsg('reason A', 'sig-A', 'tool_1'), + toolResultMsg('tool_1', 'out1'), + thinkingToolMsg('reason B', 'sig-B', 'tool_2'), + toolResultMsg('tool_2', 'out2'), + ]) + + // Last assistant turn (index 3): thinking preserved verbatim with signature. + const current = messages[3]?.content as Array> + expect(current[0]).toEqual({ + type: 'thinking', + thinking: 'reason B', + signature: 'sig-B', + }) + expect(current[1]?.type).toBe('tool_use') + }) + + test('downgrades a signed thinking block with a lone surrogate to sanitized text', async () => { + const messages = await buildMessages([ + userMsg('q1'), + thinkingToolMsg('bad\uD800text', 'sig-A', 'tool_1'), + toolResultMsg('tool_1', 'out1'), + thinkingToolMsg('reason B', 'sig-B', 'tool_2'), + toolResultMsg('tool_2', 'out2'), + ]) + + // A signed block with a lone surrogate can't keep its signature: sanitizing + // would break it and sending the raw surrogate is an invalid-UTF8 400. Drop + // the signature and downgrade to sanitized text during conversion. + const block = messages[1]?.content as Array> + expect(block[0]).toEqual({ type: 'text', text: 'bad\uFFFDtext' }) + }) + + test('downgrades signed last-turn thinking to sanitized text if it has a lone surrogate', async () => { + const messages = await buildMessages([ + userMsg('q1'), + thinkingToolMsg('safe reason', 'sig-A', 'tool_1'), + toolResultMsg('tool_1', 'out1'), + thinkingToolMsg('bad\uD800reason', 'sig-B', 'tool_2'), + toolResultMsg('tool_2', 'out2'), + ]) + + // Last assistant turn, but signed thinking contains a lone surrogate: the + // signature cannot survive sanitization, so drop it and emit text. + const current = messages[3]?.content as Array> + expect(current[0]).toEqual({ type: 'text', text: 'bad\uFFFDreason' }) + expect(current[1]?.type).toBe('tool_use') + }) + + test('preserves clean signed last-turn thinking verbatim (no surrogate)', async () => { + const messages = await buildMessages([ + userMsg('q1'), + thinkingToolMsg('clean reason', 'sig-B', 'tool_2'), + toolResultMsg('tool_2', 'out2'), + ]) + const current = messages[1]?.content as Array> + expect(current[0]).toEqual({ + type: 'thinking', + thinking: 'clean reason', + signature: 'sig-B', + }) + }) +})