Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/pi/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"build": "rm -rf dist && tsc -p tsconfig.build.json",
"typecheck": "tsc",
"types": "tsc",
"test": "bun test src/tests",
"prepublishOnly": "bun run build"
},
"dependencies": {
Expand Down
130 changes: 110 additions & 20 deletions packages/pi/src/convert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,28 @@ export type AnthropicRequestBody = {
}

function sanitize(text: string): string {
return text.replace(/[\uD800-\uDFFF]/g, '\uFFFD')
return text.replace(/[\uD800-\uDFFF]/gu, '\uFFFD')
}

/**
* Detect lone (unpaired) UTF-16 surrogates. With the `u` flag the character
* class only matches surrogates that are NOT part of a valid pair, since valid
* pairs are folded into a single astral code point. Anthropic rejects payloads
* containing lone surrogates (invalid UTF-8).
*/
function hasLoneSurrogate(text: string): boolean {
return /[\uD800-\uDFFF]/u.test(text)
}

/**
* Sanitize a tool-call ID to match Anthropic's `^[a-zA-Z0-9_-]+$` pattern.
* Cross-provider IDs (e.g. OpenAI Codex `call_xxx|fc_xxx`) contain characters
* Anthropic rejects. Deterministic — same input always yields the same output.
*/
function sanitizeToolId(id: string): string {
if (!id) return 'tool_call_unknown'
const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, '_')
if (!sanitized) return 'tool_call_unknown'
return sanitized.slice(0, 256)
const cleaned = id.replace(/[^a-zA-Z0-9_-]/g, '_')
return cleaned.length > 256 ? cleaned.slice(0, 256) : cleaned
}

function toClaudeCodeToolName(name: string): string {
Expand Down Expand Up @@ -132,13 +146,24 @@ function convertMessages(
blocks.push({ type: 'text', text: sanitize(block.text) })
} else if (block.type === 'thinking' && block.thinking.trim()) {
const thinking = block as ThinkingContent
if (thinking.thinkingSignature) {
if (
thinking.thinkingSignature &&
!hasLoneSurrogate(thinking.thinking)
) {
// Signed thinking blocks must be sent back verbatim — the signature
// is computed over the original text. Sanitizing would alter it and
// Anthropic rejects the block as "modified". Anthropic-origin
// thinking is valid UTF-8, so this is the normal path.
blocks.push({
type: 'thinking',
thinking: sanitize(thinking.thinking),
thinking: thinking.thinking,
signature: thinking.thinkingSignature,
})
} else {
// Either unsigned, or signed-but-contains a lone surrogate. In the
// latter case we cannot keep the signature: sanitizing breaks it and
// sending the raw lone surrogate is an invalid-UTF8 400. Drop the
// signature and downgrade to sanitized text.
blocks.push({ type: 'text', text: sanitize(thinking.thinking) })
}
} else if (block.type === 'toolCall') {
Expand All @@ -157,17 +182,20 @@ function convertMessages(
if (message.role === 'toolResult') {
const toolResult = message as ToolResultMessage
const toolResults: Array<Record<string, unknown>> = []
const firstContent = convertTextAndImages(toolResult.content)
const firstContentArr = Array.isArray(firstContent)
? firstContent
: [{ type: 'text', text: firstContent }]
if (toolResult.isError && firstContentArr.length === 0) {
firstContentArr.push({ type: 'text', text: 'Error' })
let content = convertTextAndImages(toolResult.content)
// Anthropic rejects tool_result with is_error=true but empty content
if (
toolResult.isError &&
(!content ||
(Array.isArray(content) && content.length === 0) ||
content === '')
) {
content = [{ type: 'text', text: 'Error' }]
}
toolResults.push({
type: 'tool_result',
tool_use_id: sanitizeToolId(toolResult.toolCallId),
content: firstContentArr,
content: content,
is_error: toolResult.isError,
})

Expand All @@ -177,17 +205,20 @@ function convertMessages(
messages[nextIndex]?.role === 'toolResult'
) {
const next = messages[nextIndex] as ToolResultMessage
const nextContent = convertTextAndImages(next.content)
const nextContentArr = Array.isArray(nextContent)
? nextContent
: [{ type: 'text', text: nextContent }]
if (next.isError && nextContentArr.length === 0) {
nextContentArr.push({ type: 'text', text: 'Error' })
let nextContent = convertTextAndImages(next.content)
// Anthropic rejects tool_result with is_error=true but empty content
if (
next.isError &&
(!nextContent ||
(Array.isArray(nextContent) && nextContent.length === 0) ||
nextContent === '')
) {
nextContent = [{ type: 'text', text: 'Error' }]
}
toolResults.push({
type: 'tool_result',
tool_use_id: sanitizeToolId(next.toolCallId),
content: nextContentArr,
content: nextContent,
is_error: next.isError,
})
nextIndex += 1
Expand All @@ -200,6 +231,55 @@ function convertMessages(
return result
}

/**
* Downgrade signed `thinking` blocks in historical (non-last) assistant turns
* to plain `<thinking>` text blocks, stripping the signature.
*
* Anthropic signatures are bound to the request context in which they were
* produced. When session history is replayed in a new request (e.g. after
* context compaction or provider failover), the signatures on older assistant
* turns become stale and Anthropic rejects them with
* `Invalid signature in thinking block`. Only the *last* assistant turn is the
* current turn, so its signed thinking blocks are preserved verbatim; all
* earlier ones are converted to sanitized text that retains the reasoning
* content without an (now invalid) signature.
*/
function sanitizeHistoricalThinkingSignatures(
messages: AnthropicRequestBody['messages'],
): void {
let lastAssistant = -1
for (let index = messages.length - 1; index >= 0; index--) {
if (messages[index]?.role === 'assistant') {
lastAssistant = index
break
}
}

for (let index = 0; index < messages.length; index++) {
if (index === lastAssistant) continue
const message = messages[index]
if (message?.role !== 'assistant') continue
const content = message.content
if (!Array.isArray(content)) continue
message.content = content.map((block) => {
if (
block &&
typeof block === 'object' &&
(block as Record<string, unknown>).type === 'thinking'
) {
const thinking = String(
(block as Record<string, unknown>).thinking ?? '',
)
return {
type: 'text',
text: `<thinking>${sanitize(thinking)}</thinking>`,
}
}
return block
})
}
}

function convertTools(
tools: Tool[] | undefined,
): AnthropicRequestBody['tools'] {
Expand Down Expand Up @@ -275,6 +355,16 @@ export async function buildAnthropicRequest(
identity?: ClaudeCodeIdentity,
): Promise<{ body: AnthropicRequestBody; bodyText: string }> {
const messages = convertMessages(context.messages)
// Strip trailing assistant messages — Anthropic rejects prefill on some models
while (
messages.length &&
messages[messages.length - 1]?.role === 'assistant'
) {
messages.pop()
}
// Strip stale signatures from historical thinking blocks; the current turn
// (last assistant message) keeps its signed thinking verbatim.
sanitizeHistoricalThinkingSignatures(messages)
const system = [
{
type: 'text',
Expand Down
Loading
Loading