Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@
"openai": "^4.104.0",
"react": "18.3.1",
"semver": "^7.8.2",
"sharp": "^0.34.5",
"shell-quote": "^1.8.4",
"spawn-rx": "^5.1.2",
"string-width": "^7.2.0",
Expand Down
57 changes: 36 additions & 21 deletions src/services/ai/adapters/chatCompletions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ import {
import { Tool, getToolDescription } from '@tool'
import { zodToJsonSchema } from 'zod-to-json-schema'
import { setRequestStatus } from '@utils/session/requestStatus'
import {
extractTextAndImageUrls,
toOpenAIImageUrlParts,
} from '@utils/model/visionContent'

export class ChatCompletionsAdapter extends OpenAIAdapter {
createRequest(params: UnifiedRequestParams): any {
Expand Down Expand Up @@ -120,33 +124,44 @@ export class ChatCompletionsAdapter extends OpenAIAdapter {
return []
}

return messages.map(msg => {
const normalized: any[] = []

for (const msg of messages) {
if (!msg || typeof msg !== 'object') {
return msg
normalized.push(msg)
continue
}

if (msg.role === 'tool') {
if (Array.isArray(msg.content)) {
return {
...msg,
content:
msg.content
.map(c => c?.text || '')
.filter(Boolean)
.join('\n\n') || '(empty content)',
}
} else if (typeof msg.content !== 'string') {
return {
...msg,
content:
msg.content === null || msg.content === undefined
? '(empty content)'
: JSON.stringify(msg.content),
}
const { text, imageUrls } = extractTextAndImageUrls(msg.content)
normalized.push({
...msg,
content:
text ||
(imageUrls.length > 0
? '(image output attached in following message)'
: '(empty content)'),
})

if (imageUrls.length > 0) {
normalized.push({
role: 'user',
content: [
{
type: 'text',
text: `Image output from tool ${msg.tool_call_id || msg.id || 'unknown'}:`,
},
...toOpenAIImageUrlParts(imageUrls),
],
})
}
continue
}
return msg
})

normalized.push(msg)
}

return normalized
}

protected async *processStreamingChunk(
Expand Down
63 changes: 38 additions & 25 deletions src/services/ai/adapters/responsesAPI.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ import { zodToJsonSchema } from 'zod-to-json-schema'
import { processResponsesStream } from './responsesStreaming'
import { debug as debugLogger } from '@utils/log/debugLogger'
import { logError } from '@utils/log'
import {
extractTextAndImageUrls,
getImageUrlFromPart,
toResponsesImageParts,
} from '@utils/model/visionContent'

export class ResponsesAPIAdapter extends OpenAIAdapter {
createRequest(params: UnifiedRequestParams): any {
Expand Down Expand Up @@ -387,26 +392,12 @@ ${reasoningContent}
if (role === 'tool') {
const callId = message.tool_call_id || message.id
if (typeof callId === 'string' && callId) {
let content = message.content || ''
if (Array.isArray(content)) {
const texts = []
for (const part of content) {
if (typeof part === 'object' && part !== null) {
const t = part.text || part.content
if (typeof t === 'string' && t) {
texts.push(t)
}
}
}
content = texts.join('\n')
}
if (typeof content === 'string') {
inputItems.push({
type: 'function_call_output',
call_id: callId,
output: content,
})
}
const output = this.convertToolOutput(message.content)
inputItems.push({
type: 'function_call_output',
call_id: callId,
output,
})
}
continue
}
Expand Down Expand Up @@ -456,11 +447,19 @@ ${reasoningContent}
contentItems.push({ type: kind, text: text })
}
} else if (ptype === 'image_url') {
const image = part.image_url
const url =
typeof image === 'object' && image !== null ? image.url : image
if (typeof url === 'string' && url) {
contentItems.push({ type: 'input_image', image_url: url })
const imageUrl = getImageUrlFromPart(part)
if (imageUrl) {
contentItems.push({ type: 'input_image', image_url: imageUrl })
}
} else if (ptype === 'image') {
const imageUrl = getImageUrlFromPart(part)
if (imageUrl) {
contentItems.push({ type: 'input_image', image_url: imageUrl })
}
} else if (ptype === 'input_image') {
const imageUrl = getImageUrlFromPart(part)
if (imageUrl) {
contentItems.push({ type: 'input_image', image_url: imageUrl })
}
}
}
Expand All @@ -482,6 +481,20 @@ ${reasoningContent}
return inputItems
}

private convertToolOutput(content: unknown): string | any[] {
const { text, imageUrls } = extractTextAndImageUrls(content)
if (imageUrls.length === 0) {
return text
}

const output: any[] = []
if (text) {
output.push({ type: 'input_text', text })
}
output.push(...toResponsesImageParts(imageUrls))
return output
}

private buildInstructions(systemPrompt: string[]): string {
const systemContent = systemPrompt
.filter(content => content.trim())
Expand Down
101 changes: 56 additions & 45 deletions src/tools/filesystem/FileReadTool/FileReadTool.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,27 @@ import {
import { DESCRIPTION, PROMPT } from './prompt'
import { hasReadPermission } from '@utils/permissions/filesystem'
import { secureFileService } from '@utils/fs/secureFile'
import { readFileBun, fileExistsBun, getFileSizeBun } from '@utils/bun/file'
import { readFileBun } from '@utils/bun/file'
import {
type AnthropicImageMediaType,
normalizeImageMediaType,
} from '@utils/ai/anthropic'
detectImageMediaType,
isSvgBuffer,
isSvgExtension,
rasterizeSvgToPng,
type SupportedImageMediaType,
} from '@utils/image/media'

const MAX_LINES_TO_RENDER = 5
const MAX_LINE_LENGTH = 2000
const MAX_OUTPUT_SIZE = 0.25 * 1024 * 1024

const IMAGE_EXTENSIONS = new Set(['.png', '.jpg', '.jpeg', '.gif', '.webp'])
const IMAGE_EXTENSIONS = new Set([
'.png',
'.jpg',
'.jpeg',
'.gif',
'.webp',
'.svg',
])

const MAX_WIDTH = 2000
const MAX_HEIGHT = 2000
Expand Down Expand Up @@ -433,7 +443,7 @@ export const FileReadTool = {
type: 'image'
file: {
base64: string
type: AnthropicImageMediaType
type: SupportedImageMediaType
originalSize: number
}
}
Expand All @@ -449,30 +459,21 @@ const formatFileSizeError = (sizeInBytes: number) =>

function createImageResponse(
buffer: Buffer,
ext: string,
mediaType: SupportedImageMediaType,
originalSize: number,
): {
type: 'image'
file: {
base64: string
type: AnthropicImageMediaType
type: SupportedImageMediaType
originalSize: number
}
} {
const normalized = normalizeImageMediaType(
ext === '.jpg' || ext === '.jpeg'
? 'image/jpeg'
: ext === '.png'
? 'image/png'
: ext === '.gif'
? 'image/gif'
: 'image/webp',
)
return {
type: 'image',
file: {
base64: buffer.toString('base64'),
type: normalized,
type: mediaType,
originalSize,
},
}
Expand All @@ -485,31 +486,46 @@ async function readImage(
type: 'image'
file: {
base64: string
type: AnthropicImageMediaType
type: SupportedImageMediaType
originalSize: number
}
}> {
try {
const stats = statSync(filePath)
const sharpModule = (await import('sharp')) as any
const sharp = sharpModule.default || sharpModule

const fileReadResult = secureFileService.safeReadFile(filePath, {
encoding: 'buffer' as BufferEncoding,
maxFileSize: MAX_IMAGE_SIZE,
checkFileExtension: false,
})

if (!fileReadResult.success) {
throw new Error(`Failed to read image file: ${fileReadResult.error}`)
}

const image = sharp(fileReadResult.content as Buffer)
const inputBuffer = fileReadResult.content as Buffer

if (isSvgExtension(ext) || isSvgBuffer(inputBuffer)) {
const rasterized = await rasterizeSvgToPng(inputBuffer)
return createImageResponse(rasterized, 'image/png', stats.size)
}

const detectedMediaType = detectImageMediaType(inputBuffer)
if (!detectedMediaType) {
throw new Error(
'Unsupported image format. Supported image formats are PNG, JPEG, GIF, WebP, and SVG.',
)
}

const sharpModule = (await import('sharp')) as any
const sharp = sharpModule.default || sharpModule

const image = sharp(inputBuffer)
const metadata = await image.metadata()

if (!metadata.width || !metadata.height) {
if (stats.size > MAX_IMAGE_SIZE) {
const compressedBuffer = await image.jpeg({ quality: 80 }).toBuffer()
return createImageResponse(compressedBuffer, '.jpeg', stats.size)
return createImageResponse(compressedBuffer, 'image/jpeg', stats.size)
}
}

Expand All @@ -521,20 +537,7 @@ async function readImage(
width <= MAX_WIDTH &&
height <= MAX_HEIGHT
) {
const fileReadResult = secureFileService.safeReadFile(filePath, {
encoding: 'buffer' as BufferEncoding,
maxFileSize: MAX_IMAGE_SIZE,
})

if (!fileReadResult.success) {
throw new Error(`Failed to read image file: ${fileReadResult.error}`)
}

return createImageResponse(
fileReadResult.content as Buffer,
ext,
stats.size,
)
return createImageResponse(inputBuffer, detectedMediaType, stats.size)
}

if (width > MAX_WIDTH) {
Expand All @@ -556,10 +559,14 @@ async function readImage(

if (resizedImageBuffer.length > MAX_IMAGE_SIZE) {
const compressedBuffer = await image.jpeg({ quality: 80 }).toBuffer()
return createImageResponse(compressedBuffer, '.jpeg', stats.size)
return createImageResponse(compressedBuffer, 'image/jpeg', stats.size)
}

return createImageResponse(resizedImageBuffer, ext, stats.size)
return createImageResponse(
resizedImageBuffer,
detectedMediaType,
stats.size,
)
} catch (e) {
logError(e)
const stats = statSync(filePath)
Expand All @@ -572,10 +579,14 @@ async function readImage(
throw new Error(`Failed to read image file: ${fileReadResult.error}`)
}

return createImageResponse(
fileReadResult.content as Buffer,
ext,
stats.size,
)
const buffer = fileReadResult.content as Buffer
const detectedMediaType = detectImageMediaType(buffer)
if (!detectedMediaType) {
throw new Error(
'Unsupported image format. Supported image formats are PNG, JPEG, GIF, WebP, and SVG.',
)
}

return createImageResponse(buffer, detectedMediaType, stats.size)
}
}
5 changes: 3 additions & 2 deletions src/ui/components/PromptInput.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import { CompactModeIndicator } from '@components/ModeIndicator'
import { getPromptInputSpecialKeyAction } from '@utils/terminal/promptInputSpecialKey'
import { logStartupProfile } from '@utils/config/startupProfile'
import { useStatusLine } from '@hooks/useStatusLine'
import type { ClipboardImage } from '@utils/image/media'

async function interpretHashCommand(input: string): Promise<string> {
try {
Expand Down Expand Up @@ -498,13 +499,13 @@ function PromptInput({
}
}

function onImagePaste(image: string): string {
function onImagePaste(image: ClipboardImage): string {
onModeChange('prompt')
const placeholder = `[Image #${pastedImageCounter.current}]`
pastedImageCounter.current += 1
setPastedImages(prev => [
...prev,
{ placeholder, data: image, mediaType: 'image/png' },
{ placeholder, data: image.data, mediaType: image.mediaType },
])
return placeholder
}
Expand Down
Loading
Loading