diff --git a/package.json b/package.json index 6f00f7550..26339b74b 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "@temporalio/envconfig": "workspace:*", "@temporalio/interceptors-opentelemetry": "workspace:*", "@temporalio/nexus": "workspace:*", + "@temporalio/openai-agents": "workspace:*", "@temporalio/nyc-test-coverage": "workspace:*", "@temporalio/plugin": "workspace:*", "@temporalio/proto": "workspace:*", diff --git a/packages/openai-agents/package.json b/packages/openai-agents/package.json new file mode 100644 index 000000000..d91b2f28b --- /dev/null +++ b/packages/openai-agents/package.json @@ -0,0 +1,99 @@ +{ + "name": "@temporalio/openai-agents", + "version": "1.16.0", + "description": "Temporal OpenAI Agents SDK integration package", + "main": "lib/index.js", + "types": "./lib/index.d.ts", + "exports": { + ".": { + "types": "./lib/index.d.ts", + "import": "./lib/index.js", + "require": "./lib/index.js", + "default": "./lib/index.js" + }, + "./workflow": { + "types": "./lib/workflow.d.ts", + "import": "./lib/workflow.js", + "require": "./lib/workflow.js", + "default": "./lib/workflow.js" + }, + "./load-polyfills": { + "types": "./lib/workflow/load-polyfills.d.ts", + "import": "./lib/workflow/load-polyfills.js", + "require": "./lib/workflow/load-polyfills.js", + "default": "./lib/workflow/load-polyfills.js" + }, + "./lib/workflow": { + "types": "./lib/workflow.d.ts", + "import": "./lib/workflow.js", + "require": "./lib/workflow.js", + "default": "./lib/workflow.js" + }, + "./lib/load-polyfills": { + "types": "./lib/workflow/load-polyfills.d.ts", + "import": "./lib/workflow/load-polyfills.js", + "require": "./lib/workflow/load-polyfills.js", + "default": "./lib/workflow/load-polyfills.js" + }, + "./lib/index": { + "types": "./lib/index.d.ts", + "import": "./lib/index.js", + "require": "./lib/index.js", + "default": "./lib/index.js" + }, + "./testing": { + "types": "./lib/worker/testing.d.ts", + "import": "./lib/worker/testing.js", + "require": "./lib/worker/testing.js", + "default": "./lib/worker/testing.js" + }, + "./lib/testing": { + "types": "./lib/worker/testing.d.ts", + "import": "./lib/worker/testing.js", + "require": "./lib/worker/testing.js", + "default": "./lib/worker/testing.js" + } + }, + "keywords": [ + "temporal", + "workflow", + "ai", + "openai", + "agents" + ], + "author": "Temporal Technologies Inc. ", + "license": "MIT", + "dependencies": { + "@opentelemetry/api": "^1.9.0", + "@temporalio/activity": "workspace:*", + "@temporalio/common": "workspace:*", + "@temporalio/plugin": "workspace:*", + "@temporalio/workflow": "workspace:*", + "@ungap/structured-clone": "^1.3.0", + "headers-polyfill": "^4.0.3", + "web-streams-polyfill": "^4.2.0" + }, + "peerDependencies": { + "@openai/agents-core": "~0.3.0", + "@openai/agents-openai": "~0.3.0" + }, + "engines": { + "node": ">= 20.0.0" + }, + "bugs": { + "url": "https://github.com/temporalio/sdk-typescript/issues" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/temporalio/sdk-typescript.git", + "directory": "packages/openai-agents" + }, + "homepage": "https://github.com/temporalio/sdk-typescript/tree/main/packages/openai-agents", + "publishConfig": { + "access": "public" + }, + "files": [ + "src", + "lib" + ] +} diff --git a/packages/openai-agents/src/common/errors.ts b/packages/openai-agents/src/common/errors.ts new file mode 100644 index 000000000..18d377e3e --- /dev/null +++ b/packages/openai-agents/src/common/errors.ts @@ -0,0 +1,19 @@ +import { TemporalFailure } from '@temporalio/common'; + +export function unwrapTemporalFailure(error: unknown): TemporalFailure | undefined { + const visited = new Set(); + const stack: unknown[] = [error]; + while (stack.length > 0) { + const current = stack.pop(); + if (!current || typeof current !== 'object' || visited.has(current)) continue; + visited.add(current); + if (current instanceof TemporalFailure) return current; + if (current instanceof AggregateError) { + for (const inner of current.errors) { + stack.push(inner); + } + } + stack.push((current as any).cause); + } + return undefined; +} diff --git a/packages/openai-agents/src/common/model-activity-options.ts b/packages/openai-agents/src/common/model-activity-options.ts new file mode 100644 index 000000000..35ce2c7df --- /dev/null +++ b/packages/openai-agents/src/common/model-activity-options.ts @@ -0,0 +1,42 @@ +import type { Agent, AgentInputItem } from '@openai/agents-core'; +import { ActivityCancellationType, type Duration, type Priority, type RetryPolicy } from '@temporalio/common'; + +export type { AgentInputItem } from '@openai/agents-core'; + +export interface ModelSummaryProvider { + /** Generate a human-readable summary for the model activity shown in the Temporal UI. */ + provide( + agent: Agent | undefined, + instructions: string | undefined, + input: string | AgentInputItem[] + ): string; +} + +export interface ModelActivityOptions { + /** Task queue for the model activity. Defaults to the current worker's task queue. */ + taskQueue?: string; + /** Maximum total time from schedule to completion, including retries. */ + scheduleToCloseTimeout?: Duration; + /** Maximum time the activity can wait in the task queue before a worker picks it up. */ + scheduleToStartTimeout?: Duration; + /** Maximum time for a single activity execution attempt. @default '60s' */ + startToCloseTimeout?: Duration; + /** Interval for heartbeat checks. The activity must heartbeat within this period. */ + heartbeatTimeout?: Duration; + /** Retry policy for the model activity. Defaults to the server-defined policy. */ + retryPolicy?: RetryPolicy; + /** Use local activities instead of regular activities. Avoids a server round-trip but lacks independent retry. @default false */ + useLocalActivity?: boolean; + /** Activity summary shown in the Temporal UI. String for static text, ModelSummaryProvider for dynamic. */ + summaryOverride?: string | ModelSummaryProvider; + /** How cancellation propagates from the workflow to the activity. @default ActivityCancellationType.TRY_CANCEL */ + cancellationType?: ActivityCancellationType; + /** Priority for the model activity. Omit to use server defaults. */ + priority?: Priority; +} + +export const DEFAULT_MODEL_ACTIVITY_OPTIONS: ModelActivityOptions = { + startToCloseTimeout: '60s', + useLocalActivity: false, + cancellationType: ActivityCancellationType.TRY_CANCEL, +}; diff --git a/packages/openai-agents/src/common/serialized-model.ts b/packages/openai-agents/src/common/serialized-model.ts new file mode 100644 index 000000000..75f259de0 --- /dev/null +++ b/packages/openai-agents/src/common/serialized-model.ts @@ -0,0 +1,57 @@ +import type { ModelSettings, SerializedHandoff, SerializedOutputType, SerializedTool } from '@openai/agents-core'; + +/** Current wire protocol version. Activity validates this on every invocation. */ +export const WIRE_VERSION = 1; + +// Note: Some fields (modelSettings, tools, outputType, handoffs) reference upstream types directly. +// We trust these to remain JSON-safe; if upstream adds a non-serializable field, bump WIRE_VERSION +// and either project or exclude. + +/** Recursive JSON-safe type replacing upstream `unknown` fields on the wire. */ +export type JsonValue = null | string | number | boolean | JsonValue[] | { [k: string]: JsonValue }; + +/** JSON-serializable projection of upstream `ModelRequest`, sent workflow → activity. */ +export interface SerializedModelRequest { + __wireVersion: typeof WIRE_VERSION; + systemInstructions?: string; + input: JsonValue; + modelSettings: ModelSettings; + tools: SerializedTool[]; + toolsExplicitlyProvided?: boolean; + outputType: SerializedOutputType; + handoffs: SerializedHandoff[]; + prompt?: JsonValue; + previousResponseId?: string; + conversationId?: string; + // tracing is ModelTracing (boolean | 'enabled_without_data') — an enablement flag, not trace context. + // OTel span context for activity-under-generation nesting is tracked separately (see TemporalTracingProcessor). + tracing: JsonValue; + overridePromptModel?: boolean; + // Excluded by design (must stay in this comment for future contributors): + // signal — AbortSignal; not serializable. Temporal cancellation provides equivalent. +} + +/** JSON-serializable projection of upstream `ModelResponse`, returned activity → workflow. */ +export interface SerializedModelResponse { + __wireVersion: typeof WIRE_VERSION; + usage: JsonValue; + output: JsonValue[]; + responseId?: string; + /** + * Provider-specific metadata. Upstream type is `Record`. + * + * **Coercion warning:** Temporal's JSON codec serializes this field as-is. Non-JSON-primitive + * values (Date, Map, Set, class instances) will be silently coerced — e.g., Date becomes an + * ISO 8601 string. Code consuming this field on the workflow side will receive the coerced + * form, not the original type. If your model provider populates providerData with non-JSON + * types, handle the coerced representation explicitly. + */ + providerData?: Record; + // All upstream ModelResponse fields are present, with types narrowed to JSON-safe equivalents (Usage → JsonValue, etc.). +} + +/** Activity input envelope: model name + serialized request. */ +export interface InvokeModelActivityInput { + modelName: string; + request: SerializedModelRequest; +} diff --git a/packages/openai-agents/src/index.ts b/packages/openai-agents/src/index.ts new file mode 100644 index 000000000..5ca4dfab0 --- /dev/null +++ b/packages/openai-agents/src/index.ts @@ -0,0 +1,39 @@ +/** + * @temporalio/openai-agents — Temporal integration for the OpenAI Agents SDK. + * + * Deferred (not in this package): + * - StatefulMCPServerProvider — persistent MCP server connections across worker lifecycle + * - nexusOperationAsTool — TS SDK lacks executeNexusOperation; add when available + * - testing.AgentEnvironment — richer test harness beyond FakeModel + * - workflowFailureExceptionTypes registration (TS SDK doesn't support) + */ + +// Main entry — all public exports (plugin, activities, workflow utilities, testing namespace, errors) + +export { OpenAIAgentsPlugin } from './worker/plugin'; +export type { OpenAIAgentsPluginOptions } from './worker/plugin'; +export { toSerializedModelResponse } from './worker/activities'; +export { StatelessMCPServerProvider } from './worker/mcp-provider'; +export type { StatelessMCPServerFactory, MCPToolDefinition, MCPCallToolResult } from './worker/mcp-provider'; +export { + WIRE_VERSION, + type SerializedModelRequest, + type SerializedModelResponse, + type InvokeModelActivityInput, + type JsonValue, +} from './common/serialized-model'; +export type { ModelActivityOptions, ModelSummaryProvider, AgentInputItem } from './common/model-activity-options'; +export { DEFAULT_MODEL_ACTIVITY_OPTIONS } from './common/model-activity-options'; +export { ToolSerializationError } from './workflow/tools'; +export type { ActivityToolDefinition, ActivityAsToolOptions, JsonObjectSchema } from './workflow/tools'; +export type { StatelessMcpServerOptions, TemporalMCPServer, MCPPromptDefinition } from './workflow/mcp-client'; +export { + isInWorkflow, + isReplaying, + TemporalTracingProcessor, + ensureTracingProcessorRegistered, +} from './workflow/tracing'; +export type { TemporalTracingProcessorOptions } from './workflow/tracing'; +export type { TemporalOpenAIRunnerOptions } from './workflow/runner'; + +export * as testing from './worker/testing'; diff --git a/packages/openai-agents/src/testing.ts b/packages/openai-agents/src/testing.ts new file mode 100644 index 000000000..3f84e0321 --- /dev/null +++ b/packages/openai-agents/src/testing.ts @@ -0,0 +1,9 @@ +export { + FakeModel, + FakeModelProvider, + textResponse, + toolCallResponse, + handoffResponse, + multiToolCallResponse, + ResponseBuilders, +} from './worker/testing'; diff --git a/packages/openai-agents/src/worker/activities.ts b/packages/openai-agents/src/worker/activities.ts new file mode 100644 index 000000000..254823df8 --- /dev/null +++ b/packages/openai-agents/src/worker/activities.ts @@ -0,0 +1,186 @@ +import type { ModelProvider, ModelRequest, ModelResponse } from '@openai/agents-core'; +import { ApplicationFailure } from '@temporalio/common'; +import { heartbeat, activityInfo } from '@temporalio/activity'; +import { + type InvokeModelActivityInput, + type JsonValue, + type SerializedModelRequest, + type SerializedModelResponse, + WIRE_VERSION, +} from '../common/serialized-model'; + +/** Projects an upstream ModelResponse to its JSON-serializable wire form. */ +export function toSerializedModelResponse(response: ModelResponse): SerializedModelResponse { + return { + __wireVersion: WIRE_VERSION, + // Usage is a class with an add() method, but all its data properties are JSON-safe primitives, + // arrays, or records. The double-cast is needed because TypeScript can't narrow a class to JsonValue. + usage: response.usage as unknown as JsonValue, + // AgentOutputItem[] items are Zod-inferred plain objects (no class instances, no methods). + // JSON round-trip preserves them losslessly. Double-cast needed for the same TS reason. + output: response.output as unknown as JsonValue[], + responseId: response.responseId, + providerData: response.providerData as Record | undefined, + }; +} + +function fromSerializedModelRequest(wire: SerializedModelRequest): ModelRequest { + return { + systemInstructions: wire.systemInstructions, + input: wire.input, + modelSettings: wire.modelSettings, + tools: wire.tools, + toolsExplicitlyProvided: wire.toolsExplicitlyProvided, + outputType: wire.outputType, + handoffs: wire.handoffs, + prompt: wire.prompt, + previousResponseId: wire.previousResponseId, + conversationId: wire.conversationId, + tracing: wire.tracing, + overridePromptModel: wire.overridePromptModel, + // __wireVersion deliberately stripped — internal protocol field, not part of upstream ModelRequest. + // Type assertion: JsonValue wire fields are structurally compatible with their upstream types at runtime. + } as ModelRequest; +} + +function getStatus(error: unknown): number | undefined { + if (!error || typeof error !== 'object') return undefined; + const e = error as any; + if (typeof e.status === 'number') return e.status; + if (e.response && typeof e.response.status === 'number') return e.response.status; + return undefined; +} + +function getHeader(error: unknown, name: string): string | undefined { + if (!error || typeof error !== 'object') return undefined; + const e = error as any; + const h1 = e.headers; + if (h1) { + if (typeof h1.get === 'function') { + const v = h1.get(name); + if (typeof v === 'string') return v; + } else if (typeof h1 === 'object' && typeof h1[name] === 'string') { + return h1[name]; + } + } + const h2 = e.response?.headers; + if (h2) { + if (typeof h2.get === 'function') { + const v = h2.get(name); + if (typeof v === 'string') return v; + } else if (typeof h2 === 'object' && typeof h2[name] === 'string') { + return h2[name]; + } + } + return undefined; +} + +function isRetryableError(error: unknown): boolean { + if (!error || typeof error !== 'object') return false; + + const shouldRetry = getHeader(error, 'x-should-retry'); + if (shouldRetry === 'true') return true; + if (shouldRetry === 'false') return false; + + const status = getStatus(error); + if (status === undefined) { + return (error as any).response !== undefined; + } + if (status === 408 || status === 409 || status === 429 || status >= 500) { + return true; + } + return false; +} + +function errorTypeFromStatus(status: number | undefined): string { + if (status === undefined) return 'ModelInvocationError'; + if (status === 429) return 'ModelInvocationError.RateLimit'; + if (status === 401 || status === 403) return 'ModelInvocationError.Authentication'; + if (status === 408) return 'ModelInvocationError.Timeout'; + if (status === 409) return 'ModelInvocationError.Conflict'; + if (status >= 400 && status < 500) return 'ModelInvocationError.BadRequest'; + if (status >= 500) return 'ModelInvocationError.ServerError'; + return 'ModelInvocationError'; +} + +function getRetryAfterMs(error: unknown): number | undefined { + const ms = getHeader(error, 'retry-after-ms'); + if (ms) { + const parsed = parseFloat(ms); + if (!Number.isNaN(parsed)) return parsed; + } + const s = getHeader(error, 'retry-after'); + if (s) { + const parsed = parseFloat(s); + if (!Number.isNaN(parsed)) return parsed * 1000; + } + return undefined; +} + +/** + * Creates the model activity functions to be registered with the Worker. + * The returned activities use the provided ModelProvider to resolve models + * and execute real LLM calls. + */ +export function createModelActivity(modelProvider: ModelProvider): { + invokeModelActivity: (input: InvokeModelActivityInput) => Promise; +} { + return { + async invokeModelActivity(input: InvokeModelActivityInput): Promise { + if (input.request.__wireVersion !== WIRE_VERSION) { + throw ApplicationFailure.nonRetryable( + `OpenAI Agents wire version mismatch: payload=${input.request.__wireVersion}, runtime=${WIRE_VERSION}. ` + + `Upgrade workers and clients together.`, + 'WireVersionMismatch' + ); + } + // Shape validation beyond version check is intentionally minimal: no Zod or runtime schema + // validation. The wire version literal + structural projection in toSerializedModelRequest + // cover the actual risks (version skew and field leakage). Adding a runtime validator would + // introduce a dependency with no concrete safety gain — upstream types are JSON-safe by design. + + const model = await Promise.resolve(modelProvider.getModel(input.modelName)); + + const info = activityInfo(); + let heartbeatTimer: ReturnType | undefined; + let stopped = false; + if (info.heartbeatTimeoutMs && info.heartbeatTimeoutMs > 0) { + const interval = info.heartbeatTimeoutMs / 2; + const scheduleHeartbeat = () => { + heartbeatTimer = setTimeout(() => { + if (stopped) return; + try { + heartbeat(); + } catch { + // Activity might be cancelled — ignore heartbeat errors + } + scheduleHeartbeat(); + }, interval); + }; + scheduleHeartbeat(); + } + + try { + const response = await model.getResponse(fromSerializedModelRequest(input.request)); + return toSerializedModelResponse(response); + } catch (error) { + const retryable = isRetryableError(error); + const message = error instanceof Error ? error.message : String(error); + const retryAfterMs = getRetryAfterMs(error); + + throw ApplicationFailure.create({ + message: `Model invocation failed: ${message}`, + type: errorTypeFromStatus(getStatus(error)), + nonRetryable: !retryable, + cause: error instanceof Error ? error : new Error(String(error)), + ...(retryAfterMs !== undefined ? { nextRetryDelay: retryAfterMs } : {}), + }); + } finally { + stopped = true; + if (heartbeatTimer) { + clearTimeout(heartbeatTimer); + } + } + }, + }; +} diff --git a/packages/openai-agents/src/worker/mcp-provider.ts b/packages/openai-agents/src/worker/mcp-provider.ts new file mode 100644 index 000000000..2b59fd00b --- /dev/null +++ b/packages/openai-agents/src/worker/mcp-provider.ts @@ -0,0 +1,56 @@ +import type { MCPPromptDefinition } from '../workflow/mcp-client'; + +export type { MCPPromptDefinition } from '../workflow/mcp-client'; + +export interface MCPToolDefinition { + name: string; + description?: string; + inputSchema: { + type: 'object'; + properties: Record; + required: string[]; + additionalProperties: boolean; + }; +} + +export interface MCPCallToolResult { + type: string; + text: string; +} + +export interface StatelessMCPServerFactory { + listTools(arg?: unknown): Promise; + callTool(arg: { + toolName: string; + args: Record | null; + factoryArgument?: unknown; + }): Promise; + listPrompts(arg?: unknown): Promise; + getPrompt(arg: { + promptName: string; + promptArguments: Record | null; + factoryArgument?: unknown; + }): Promise; +} + +export class StatelessMCPServerProvider { + constructor( + public readonly name: string, + private factory: StatelessMCPServerFactory + ) {} + + _getActivities(): Record Promise> { + const callTool = (input: any) => this.factory.callTool(input); + const getPrompt = (input: any) => this.factory.getPrompt(input); + return { + [`${this.name}-list-tools`]: (input: unknown) => this.factory.listTools(input), + [`${this.name}-call-tool-v2`]: callTool, + [`${this.name}-list-prompts`]: (input: unknown) => this.factory.listPrompts(input), + [`${this.name}-get-prompt-v2`]: getPrompt, + // Deprecated: use call-tool-v2 instead (JSDoc on computed keys doesn't render in IDEs) + [`${this.name}-call-tool`]: callTool, + // Deprecated: use get-prompt-v2 instead + [`${this.name}-get-prompt`]: getPrompt, + }; + } +} diff --git a/packages/openai-agents/src/worker/plugin.ts b/packages/openai-agents/src/worker/plugin.ts new file mode 100644 index 000000000..94e1179a7 --- /dev/null +++ b/packages/openai-agents/src/worker/plugin.ts @@ -0,0 +1,59 @@ +import type { ModelProvider } from '@openai/agents-core'; +import { SimplePlugin } from '@temporalio/plugin'; +import type { ModelActivityOptions } from '../common/model-activity-options'; +import { createModelActivity } from './activities'; +import type { StatelessMCPServerProvider } from './mcp-provider'; + +/** + * Options for the OpenAI Agents plugin. + */ +export interface OpenAIAgentsPluginOptions { + /** The model provider to use for resolving model names to Model instances (e.g. OpenAIProvider) */ + modelProvider: ModelProvider; + /** Stateless MCP server providers whose activities will be auto-registered */ + mcpServerProviders?: StatelessMCPServerProvider[]; + /** + * Default model activity options (timeouts, retry, task queue, etc.). + * + * Config surface only — users must still pass `modelParams` to + * `new TemporalOpenAIRunner(options)` in workflow code because the plugin + * runs worker-side and cannot inject config into the V8 workflow sandbox. + * Future versions may auto-propagate via workflow interceptors. + */ + modelParams?: ModelActivityOptions; +} + +/** + * A Temporal plugin that integrates the OpenAI Agents SDK for use in workflows. + * Registers model invocation activities so that workflow-side ActivityBackedModel + * can delegate LLM calls to the activity worker. + */ +export class OpenAIAgentsPlugin extends SimplePlugin { + constructor(options: OpenAIAgentsPluginOptions) { + const modelActivities = createModelActivity(options.modelProvider); + + let allActivities: Record Promise> = { ...modelActivities }; + + if (options.mcpServerProviders) { + const seenNames = new Set(); + for (const provider of options.mcpServerProviders ?? []) { + if (seenNames.has(provider.name)) { + throw new Error( + `Duplicate MCP server provider name: '${provider.name}'. Each provider must have a unique name — activity keys collide.` + ); + } + seenNames.add(provider.name); + } + + for (const provider of options.mcpServerProviders) { + const providerActivities = provider._getActivities(); + allActivities = { ...allActivities, ...providerActivities }; + } + } + + super({ + name: 'OpenAIAgentsPlugin', + activities: allActivities, + }); + } +} diff --git a/packages/openai-agents/src/worker/testing.ts b/packages/openai-agents/src/worker/testing.ts new file mode 100644 index 000000000..698bd8370 --- /dev/null +++ b/packages/openai-agents/src/worker/testing.ts @@ -0,0 +1,117 @@ +import { + Usage, + type AgentOutputItem, + type Model, + type ModelProvider, + type ModelRequest, + type ModelResponse, + type StreamEvent, +} from '@openai/agents-core'; + +export class FakeModel implements Model { + private getNext: () => ModelResponse; + + constructor(source: ModelResponse[] | Generator) { + if (Array.isArray(source)) { + let index = 0; + this.getNext = () => { + if (index >= source.length) { + throw new Error( + `FakeModel: no more canned responses (called ${index + 1} times, only ${source.length} responses provided)` + ); + } + return source[index++]!; + }; + } else { + let done = false; + const gen = source; + this.getNext = () => { + if (done) throw new Error('FakeModel: generator exhausted'); + const result = gen.next(); + if (result.done) { + done = true; + throw new Error('FakeModel: generator exhausted'); + } + return result.value; + }; + } + } + + async getResponse(_request: ModelRequest): Promise { + return this.getNext(); + } + + // eslint-disable-next-line require-yield + async *getStreamedResponse(_request: ModelRequest): AsyncIterable { + throw new Error('Streaming not supported in FakeModel'); + } +} + +export class FakeModelProvider implements ModelProvider { + private model: FakeModel; + + constructor(source: ModelResponse[] | (() => Generator)) { + this.model = typeof source === 'function' ? new FakeModel(source()) : new FakeModel(source); + } + + getModel(_name?: string): Model { + return this.model; + } +} + +function fakeUsage(outputTokens: number): Usage { + return new Usage({ + requests: 1, + inputTokens: 10, + outputTokens, + totalTokens: 10 + outputTokens, + }); +} + +export function textResponse(text: string): ModelResponse { + const output: AgentOutputItem[] = [ + { + type: 'message', + id: 'msg_fake_001', + role: 'assistant', + content: [{ type: 'output_text', text }], + status: 'completed', + }, + ]; + return { output, usage: fakeUsage(text.length) }; +} + +export function toolCallResponse(toolName: string, args: Record): ModelResponse { + const output: AgentOutputItem[] = [ + { + type: 'function_call', + name: toolName, + arguments: JSON.stringify(args), + callId: `call_fake_${toolName}`, + status: 'completed', + }, + ]; + return { output, usage: fakeUsage(20) }; +} + +export function handoffResponse(handoffToolName: string, args: Record = {}): ModelResponse { + return toolCallResponse(handoffToolName, args); +} + +export function multiToolCallResponse(calls: Array<{ name: string; args: Record }>): ModelResponse { + const output: AgentOutputItem[] = calls.map((c) => ({ + type: 'function_call' as const, + name: c.name, + arguments: JSON.stringify(c.args), + callId: `call_fake_${c.name}`, + status: 'completed' as const, + })); + return { output, usage: fakeUsage(20) }; +} + +export const ResponseBuilders = { + text: textResponse, + toolCall: toolCallResponse, + handoff: handoffResponse, + multiToolCall: multiToolCallResponse, +}; diff --git a/packages/openai-agents/src/workflow.ts b/packages/openai-agents/src/workflow.ts new file mode 100644 index 000000000..65328a6e0 --- /dev/null +++ b/packages/openai-agents/src/workflow.ts @@ -0,0 +1,24 @@ +// Workflow-safe exports — these can be imported from workflow code +// that runs inside the V8 sandbox. + +export { TemporalOpenAIRunner } from './workflow/runner'; +export type { TemporalRunOptions, TemporalOpenAIRunnerOptions } from './workflow/runner'; +export { activityAsTool, ToolSerializationError } from './workflow/tools'; +export type { ActivityToolDefinition, ActivityAsToolOptions, JsonObjectSchema } from './workflow/tools'; +export { statelessMcpServer } from './workflow/mcp-client'; +export type { StatelessMcpServerOptions, TemporalMCPServer, MCPPromptDefinition } from './workflow/mcp-client'; +export { + isInWorkflow, + isReplaying, + TemporalTracingProcessor, + ensureTracingProcessorRegistered, +} from './workflow/tracing'; +export type { TemporalTracingProcessorOptions } from './workflow/tracing'; +export { + WIRE_VERSION, + type SerializedModelRequest, + type SerializedModelResponse, + type InvokeModelActivityInput, + type JsonValue, +} from './common/serialized-model'; +export { toSerializedModelRequest } from './workflow/activity-backed-model'; diff --git a/packages/openai-agents/src/workflow/activity-backed-model.ts b/packages/openai-agents/src/workflow/activity-backed-model.ts new file mode 100644 index 000000000..0be6836ff --- /dev/null +++ b/packages/openai-agents/src/workflow/activity-backed-model.ts @@ -0,0 +1,146 @@ +import { + Usage, + withGenerationSpan, + type Agent, + type Model, + type ModelRequest, + type ModelResponse, + type StreamEvent, +} from '@openai/agents-core'; +import { proxyActivities, proxyLocalActivities } from '@temporalio/workflow'; +import type { ActivityOptions, LocalActivityOptions } from '@temporalio/common'; +import type { ModelActivityOptions, ModelSummaryProvider } from '../common/model-activity-options'; +import { + type InvokeModelActivityInput, + type JsonValue, + type SerializedModelRequest, + type SerializedModelResponse, + WIRE_VERSION, +} from '../common/serialized-model'; + +export function toSerializedModelRequest(request: ModelRequest): SerializedModelRequest { + return { + __wireVersion: WIRE_VERSION, + systemInstructions: request.systemInstructions, + // input is string | AgentInputItem[] — both are Zod-inferred plain objects, JSON-safe. + input: request.input as JsonValue, + modelSettings: request.modelSettings, + tools: request.tools, + toolsExplicitlyProvided: request.toolsExplicitlyProvided, + outputType: request.outputType, + handoffs: request.handoffs, + // Prompt is { promptId, version?, variables? } — plain object, JSON-safe. + prompt: request.prompt as JsonValue | undefined, + previousResponseId: request.previousResponseId, + conversationId: request.conversationId, + // ModelTracing is boolean | 'enabled_without_data' — already a JSON primitive. + tracing: request.tracing as JsonValue, + overridePromptModel: request.overridePromptModel, + }; +} + +function fromSerializedModelResponse(wire: SerializedModelResponse): ModelResponse { + // Usage is the only class instance in ModelResponse that needs reconstruction. Its add() method + // is used by the Runner to accumulate token counts across turns. All AgentOutputItem variants in + // output[] are Zod-inferred plain objects — they survive JSON round-trip without reconstruction. + return { + usage: new Usage(wire.usage as Record), + output: wire.output, + responseId: wire.responseId, + providerData: wire.providerData, + // Cast: __wireVersion stripped (protocol-only). Remaining fields are structurally compatible + // with ModelResponse at runtime — Usage is reconstructed above, output items are plain objects. + } as ModelResponse; +} + +interface ModelActivities { + invokeModelActivity(input: InvokeModelActivityInput): Promise; +} + +/** + * A Model implementation that delegates to a Temporal activity. + * Replaces the agent's real model in workflow context, ensuring all LLM calls + * go through the activity worker where real ModelProviders live. + */ +export class ActivityBackedModel implements Model { + private readonly activities: ModelActivities; + private readonly modelParams: ModelActivityOptions; + private agent?: Agent; + + constructor( + private readonly modelName: string, + modelParams: ModelActivityOptions + ) { + this.modelParams = modelParams; + + if (modelParams.useLocalActivity) { + const localOpts: LocalActivityOptions = { + startToCloseTimeout: modelParams.startToCloseTimeout ?? '60s', + scheduleToCloseTimeout: modelParams.scheduleToCloseTimeout, + retry: modelParams.retryPolicy, + cancellationType: modelParams.cancellationType, + summary: typeof modelParams.summaryOverride === 'string' ? modelParams.summaryOverride : undefined, + }; + this.activities = proxyLocalActivities(localOpts); + } else { + const opts: ActivityOptions = { + startToCloseTimeout: modelParams.startToCloseTimeout ?? '60s', + heartbeatTimeout: modelParams.heartbeatTimeout, + taskQueue: modelParams.taskQueue, + scheduleToCloseTimeout: modelParams.scheduleToCloseTimeout, + scheduleToStartTimeout: modelParams.scheduleToStartTimeout, + retry: modelParams.retryPolicy, + cancellationType: modelParams.cancellationType, + summary: typeof modelParams.summaryOverride === 'string' ? modelParams.summaryOverride : undefined, + priority: modelParams.priority, + }; + this.activities = proxyActivities(opts); + } + } + + setAgent(agent: Agent): void { + this.agent = agent; + } + + async getResponse(request: ModelRequest): Promise { + // Upstream model adapters emit a generation span inside getResponse(). + // We mirror that here so the trace tree stays: agent → generation → activity. + return withGenerationSpan(async (span) => { + span.spanData.model = this.modelName; + + const wire = toSerializedModelRequest(request); + const input: InvokeModelActivityInput = { + modelName: this.modelName, + request: wire, + }; + + const summaryOverride = this.modelParams.summaryOverride; + if (summaryOverride && typeof summaryOverride !== 'string') { + const provider = summaryOverride as ModelSummaryProvider; + const systemInstructions = request.systemInstructions; + const summary = provider.provide(this.agent, systemInstructions, request.input); + const activitiesWithOptions = this.activities as any; + if (typeof activitiesWithOptions.invokeModelActivity?.executeWithOptions !== 'function') { + throw new Error( + 'ModelSummaryProvider requires executeWithOptions on the activity proxy, ' + + 'but it is not available. Use a string summaryOverride instead, or ensure ' + + 'the activity proxy supports per-call options.' + ); + } + const wireResponse = (await activitiesWithOptions.invokeModelActivity.executeWithOptions({ summary }, [ + input, + ])) as SerializedModelResponse; + return fromSerializedModelResponse(wireResponse); + } + + return fromSerializedModelResponse(await this.activities.invokeModelActivity(input)); + }); + } + + // eslint-disable-next-line require-yield + async *getStreamedResponse(_request: ModelRequest): AsyncIterable { + throw new Error( + 'Streaming is not supported in Temporal workflows. ' + 'Use non-streaming mode with TemporalOpenAIRunner.' + ); + } +} diff --git a/packages/openai-agents/src/workflow/agent-internals.ts b/packages/openai-agents/src/workflow/agent-internals.ts new file mode 100644 index 000000000..71c9fcd47 --- /dev/null +++ b/packages/openai-agents/src/workflow/agent-internals.ts @@ -0,0 +1,14 @@ +import type { Agent } from '@openai/agents-core'; + +/** + * Centralizes all access to upstream's typed-but-opaque Agent generic fields. + * Used by both convertAgent (tool validation + model conversion) and any future + * code that needs to inspect agent internals without casting inline. + */ +export function getAgentInternals(agent: Agent): { + model?: unknown; + handoffs?: unknown[]; + tools?: unknown[]; +} { + return agent as unknown as { model?: unknown; handoffs?: unknown[]; tools?: unknown[] }; +} diff --git a/packages/openai-agents/src/workflow/convert-agent.ts b/packages/openai-agents/src/workflow/convert-agent.ts new file mode 100644 index 000000000..36c9240a6 --- /dev/null +++ b/packages/openai-agents/src/workflow/convert-agent.ts @@ -0,0 +1,120 @@ +/** + * Upstream version contracts — @openai/agents-core ~0.3.0 + * + * This module depends on three implicit contracts from the upstream library: + * + * 1. Agent.clone({ model }) — accepts a Model override and returns a new Agent + * with the same configuration except the model field. + * 2. Handoff.onInvokeHandoff(ctx, args): Promise — the next-agent callback + * invoked by the runner when a handoff is triggered. + * 3. Agent.handoffs — iterable as (Agent | Handoff)[]. Each entry is either a bare + * Agent (auto-wrapped by the runner) or a Handoff instance. + * + * When upgrading @openai/agents-core, re-verify these contracts against the new + * version's source/types before merging. + */ +import { Agent, Handoff } from '@openai/agents-core'; +import { ApplicationFailure } from '@temporalio/common'; +import type { ModelActivityOptions } from '../common/model-activity-options'; +import { ActivityBackedModel } from './activity-backed-model'; +import { getAgentInternals } from './agent-internals'; + +/** + * Recursively convert an agent graph: validate tools, replace each agent's model + * with an ActivityBackedModel, and clone handoffs. Single-pass traversal using + * a seen map to handle circular handoff references. + */ +export function convertAgent( + agent: Agent, + modelParams: ModelActivityOptions, + seen?: Map, Agent>, + modelNameOverride?: string +): Agent { + seen = seen ?? new Map(); + if (seen.has(agent)) return seen.get(agent)!; + + const internals = getAgentInternals(agent); + + // --- Tool validation --- + // Accepted tool types (alphabetical): + // activityAsTool() products (type: 'function', TEMPORAL_ACTIVITY_TOOL_MARKER) — runs as Temporal activity + // ApplyPatch (local I/O) — passes validation but will fail in sandbox + // Code interpreter (hosted) — runs on OpenAI servers + // Computer (local I/O) — passes validation but will fail in sandbox + // File search (hosted) — runs on OpenAI servers + // Image generation (hosted) — runs on OpenAI servers + // MCP tools (hosted) — runs on OpenAI servers + // Shell (local I/O) — passes validation but will fail in sandbox + // tool() factory products (type: 'function') — runs inline in workflow; user must ensure determinism + // Web search (hosted) — runs on OpenAI servers + // + // Rejected: + // Raw functions (typeof === 'function') — authoring mistake; use tool() or activityAsTool() + const tools: unknown[] = internals.tools ?? []; + for (const t of tools) { + if (typeof t === 'function') { + throw ApplicationFailure.create({ + message: + `Agent '${agent.name}': Provided tool is a raw function, not a tool object. ` + + 'Did you mean to use tool() or activityAsTool()?', + type: 'AgentsWorkflowError', + nonRetryable: true, + }); + } + } + + // --- Convert model --- + const rawModel = internals.model; + if (rawModel !== undefined && rawModel !== null && typeof rawModel !== 'string') { + throw ApplicationFailure.create({ + message: + `Agent '${agent.name}' has a Model object instead of a string model name. ` + + 'In Temporal workflows, all models must be specified as strings — use ' + + 'runConfig.model to override, or declare a string model on the agent.', + type: 'AgentsWorkflowError', + nonRetryable: true, + }); + } + const modelName = modelNameOverride ?? (typeof rawModel === 'string' ? rawModel : undefined); + if (modelName === undefined) { + throw ApplicationFailure.create({ + message: + `Agent '${agent.name}' has no model declared and no runConfig.model override given. ` + + 'Declare a model on the agent or pass runConfig.model to runner.run().', + type: 'AgentsWorkflowError', + nonRetryable: true, + }); + } + const activityBackedModel = new ActivityBackedModel(modelName, modelParams); + // Pass the ORIGINAL agent (pre-clone) so the summary provider sees the + // user-declared `name` and `instructions`, not the wrapper. The cloned + // agent has the same field values today, but binding to the original + // makes that invariance explicit and survives any future clone-side + // mutation. + activityBackedModel.setAgent(agent); + + const converted = agent.clone({ model: activityBackedModel }); + seen.set(agent, converted); + + const convertedHandoffs = (internals.handoffs ?? []).map((h: unknown) => { + if (h instanceof Handoff) { + const convertedHandoffAgent = convertAgent(h.agent, modelParams, seen, modelNameOverride); + const originalOnInvoke = h.onInvokeHandoff; + const wrappedOnInvoke = async (ctx: any, args: string) => { + await originalOnInvoke(ctx, args); + return convertedHandoffAgent; + }; + const newHandoff = Object.create(Object.getPrototypeOf(h), Object.getOwnPropertyDescriptors(h)) as Handoff< + any, + any + >; + newHandoff.agent = convertedHandoffAgent; + newHandoff.onInvokeHandoff = wrappedOnInvoke; + return newHandoff; + } + return convertAgent(h as Agent, modelParams, seen, modelNameOverride); + }); + + converted.handoffs = convertedHandoffs; + return converted; +} diff --git a/packages/openai-agents/src/workflow/dummy-model-provider.ts b/packages/openai-agents/src/workflow/dummy-model-provider.ts new file mode 100644 index 000000000..f971988ef --- /dev/null +++ b/packages/openai-agents/src/workflow/dummy-model-provider.ts @@ -0,0 +1,31 @@ +import type { Model, ModelProvider, ModelRequest, ModelResponse, StreamEvent } from '@openai/agents-core'; + +/** + * A Model that throws if called. Used as a safety net — all model resolution + * should go through ActivityBackedModel, so DummyModel should never be invoked. + */ +class DummyModel implements Model { + async getResponse(_request: ModelRequest): Promise { + throw new Error( + 'DummyModel.getResponse should never be called. ' + + 'All model calls should go through ActivityBackedModel via activities. ' + + 'If you see this error, an agent has a model that was not replaced by convertAgent().' + ); + } + + // eslint-disable-next-line require-yield + async *getStreamedResponse(_request: ModelRequest): AsyncIterable { + throw new Error('Streaming is not supported in Temporal workflows.'); + } +} + +/** + * A ModelProvider that returns DummyModel instances. Passed to the internal Runner + * in workflow context to prevent real model providers (e.g. OpenAIProvider) from + * being imported into the workflow sandbox. + */ +export class DummyModelProvider implements ModelProvider { + getModel(_modelName?: string): Model { + return new DummyModel(); + } +} diff --git a/packages/openai-agents/src/workflow/load-polyfills.ts b/packages/openai-agents/src/workflow/load-polyfills.ts new file mode 100644 index 000000000..6244a91bf --- /dev/null +++ b/packages/openai-agents/src/workflow/load-polyfills.ts @@ -0,0 +1,99 @@ +import { Headers } from 'headers-polyfill'; +import { inWorkflowContext, uuid4 } from '@temporalio/workflow'; + +if (inWorkflowContext()) { + // Headers polyfill (shared with ai-sdk) + if (typeof globalThis.Headers === 'undefined') { + (globalThis as any).Headers = Headers; + } + + // ReadableStream polyfill (shared with ai-sdk) + // eslint-disable-next-line @typescript-eslint/no-require-imports,import/no-unassigned-import + require('web-streams-polyfill/polyfill'); + + // structuredClone polyfill (shared with ai-sdk) + if (!('structuredClone' in globalThis)) { + // eslint-disable-next-line @typescript-eslint/no-require-imports + const sc = require('@ungap/structured-clone'); + (globalThis as any).structuredClone = sc.default; + } + + // crypto.randomUUID polyfill — agents-core calls this internally even with tracing disabled. + // Uses Temporal's deterministic uuid4() which is backed by a per-workflow seeded PRNG, + // ensuring replay safety and per-workflow isolation. + if (typeof (globalThis as any).crypto === 'undefined') { + (globalThis as any).crypto = {}; + } + if (!(globalThis as any).crypto.randomUUID) { + let fallbackCounter = 0; + (globalThis as any).crypto.randomUUID = (): string => { + try { + return uuid4(); + } catch { + const c = (fallbackCounter++).toString(16).padStart(12, '0'); + return `00000000-0000-4000-8000-${c}`; + } + }; + } + + // EventTarget polyfill — agents-core uses EventTarget internally for event handling + if (typeof (globalThis as any).EventTarget === 'undefined') { + (globalThis as any).EventTarget = class EventTargetPolyfill { + _listeners: Record void>> = {}; + + addEventListener(type: string, listener: (event: any) => void): void { + if (!this._listeners[type]) this._listeners[type] = []; + this._listeners[type].push(listener); + } + + removeEventListener(type: string, listener: (event: any) => void): void { + const arr = this._listeners[type]; + if (arr) this._listeners[type] = arr.filter((l) => l !== listener); + } + + dispatchEvent(event: any): boolean { + (event as any).target = this; + (event as any).currentTarget = this; + const arr = this._listeners[event.type]; + if (arr) { + arr.forEach((l) => { + try { + l(event); + } catch { + // Isolate listener errors — one bad listener shouldn't break dispatch + } + }); + } + return true; + } + }; + } + + // Event polyfill + if (typeof (globalThis as any).Event === 'undefined') { + (globalThis as any).Event = class EventPolyfill { + type: string; + bubbles: boolean; + cancelable: boolean; + + constructor(type: string, opts?: { bubbles?: boolean; cancelable?: boolean }) { + this.type = type; + this.bubbles = opts?.bubbles ?? false; + this.cancelable = opts?.cancelable ?? false; + } + }; + } + + // CustomEvent polyfill + if (typeof (globalThis as any).CustomEvent === 'undefined') { + const EventClass = (globalThis as any).Event; + (globalThis as any).CustomEvent = class CustomEventPolyfill extends EventClass { + detail: any; + + constructor(type: string, opts?: { bubbles?: boolean; cancelable?: boolean; detail?: any }) { + super(type, opts); + this.detail = opts?.detail ?? null; + } + }; + } +} diff --git a/packages/openai-agents/src/workflow/mcp-client.ts b/packages/openai-agents/src/workflow/mcp-client.ts new file mode 100644 index 000000000..21d717efd --- /dev/null +++ b/packages/openai-agents/src/workflow/mcp-client.ts @@ -0,0 +1,94 @@ +import type { MCPServer } from '@openai/agents-core'; +import type { Duration, RetryPolicy } from '@temporalio/common'; +import { proxyActivities } from '@temporalio/workflow'; + +export interface MCPPromptDefinition { + name: string; + description?: string; + arguments?: Array<{ + name: string; + description?: string; + required?: boolean; + }>; +} + +export interface TemporalMCPServer extends MCPServer { + listPrompts(factoryArgument?: unknown): Promise; + getPrompt(promptName: string, args?: Record | null, factoryArgument?: unknown): Promise; +} + +export interface StatelessMcpServerOptions { + cacheToolsList?: boolean; + startToCloseTimeout?: Duration; + heartbeatTimeout?: Duration; + taskQueue?: string; + retryPolicy?: RetryPolicy; + factoryArgument?: unknown; +} + +export function statelessMcpServer(name: string, options?: StatelessMcpServerOptions): TemporalMCPServer { + const activities = proxyActivities Promise>>({ + startToCloseTimeout: options?.startToCloseTimeout ?? '1 minute', + heartbeatTimeout: options?.heartbeatTimeout, + taskQueue: options?.taskQueue, + retry: options?.retryPolicy, + }); + + const listToolsActivityName = `${name}-list-tools`; + const callToolActivityName = `${name}-call-tool-v2`; + const listPromptsActivityName = `${name}-list-prompts`; + const getPromptActivityName = `${name}-get-prompt-v2`; + + let cachedTools: any[] | undefined; + const shouldCache = options?.cacheToolsList ?? true; + const factoryArg = options?.factoryArgument; + + return { + cacheToolsList: shouldCache, + get name() { + return name; + }, + async connect() { + // No-op — connections are managed in activities + }, + async close() { + // No-op + }, + async listTools() { + if (shouldCache && cachedTools) { + return cachedTools; + } + const listToolsFn = activities[listToolsActivityName]; + if (!listToolsFn) throw new Error(`Activity '${listToolsActivityName}' not found`); + const tools = await listToolsFn(factoryArg !== undefined ? { factoryArgument: factoryArg } : undefined); + if (shouldCache) { + cachedTools = tools; + } + return tools; + }, + async callTool(toolName: string, args: Record | null) { + const callToolFn = activities[callToolActivityName]; + if (!callToolFn) throw new Error(`Activity '${callToolActivityName}' not found`); + return callToolFn({ toolName, args, ...(factoryArg !== undefined ? { factoryArgument: factoryArg } : {}) }); + }, + async listPrompts(overrideFactoryArg?: unknown) { + const listPromptsFn = activities[listPromptsActivityName]; + if (!listPromptsFn) throw new Error(`Activity '${listPromptsActivityName}' not found`); + const fa = overrideFactoryArg ?? factoryArg; + return listPromptsFn(fa !== undefined ? { factoryArgument: fa } : undefined); + }, + async getPrompt(promptName: string, args?: Record | null, overrideFactoryArg?: unknown) { + const getPromptFn = activities[getPromptActivityName]; + if (!getPromptFn) throw new Error(`Activity '${getPromptActivityName}' not found`); + const fa = overrideFactoryArg ?? factoryArg; + return getPromptFn({ + promptName, + promptArguments: args ?? null, + ...(fa !== undefined ? { factoryArgument: fa } : {}), + }); + }, + async invalidateToolsCache() { + cachedTools = undefined; + }, + }; +} diff --git a/packages/openai-agents/src/workflow/runner.ts b/packages/openai-agents/src/workflow/runner.ts new file mode 100644 index 000000000..76de14da3 --- /dev/null +++ b/packages/openai-agents/src/workflow/runner.ts @@ -0,0 +1,135 @@ +import { + Agent, + Runner, + type AgentOutputType, + type CallModelInputFilter, + type HandoffInputData, + type InputGuardrail, + type ModelSettings, + type OutputGuardrail, + type RunResult, + type Session, + type SessionInputCallback, + type TracingConfig, +} from '@openai/agents-core'; +import { ApplicationFailure } from '@temporalio/common'; +import { DEFAULT_MODEL_ACTIVITY_OPTIONS, type ModelActivityOptions } from '../common/model-activity-options'; +import { unwrapTemporalFailure } from '../common/errors'; +import { DummyModelProvider } from './dummy-model-provider'; +import { convertAgent } from './convert-agent'; +import { ensureTracingProcessorRegistered } from './tracing'; + +export interface TemporalRunOptions { + /** Run context passed to agents and tools */ + context?: TContext; + /** Maximum agent loop turns before aborting */ + maxTurns?: number; + /** Previous OpenAI response ID for conversation continuity */ + previousResponseId?: string; + /** OpenAI conversation ID for multi-turn persistence */ + conversationId?: string; + /** Session state for conversation memory */ + session?: Session; + /** Customize how session history merges with current turn input */ + sessionInputCallback?: SessionInputCallback; + /** Edit system instructions or input items just before calling the model */ + callModelInputFilter?: CallModelInputFilter; + /** Per-run tracing config override */ + tracing?: TracingConfig; + // signal intentionally omitted — use Temporal CancellationScope for workflow cancellation + + /** Runner-level config overrides */ + runConfig?: { + /** Model name override (string only — Model objects can't cross the workflow/activity boundary) */ + model?: string; + /** Global model settings (temperature, topP, etc.). Non-null values override agent-specific settings. */ + modelSettings?: ModelSettings; + /** Global handoff input filter. Agent-level inputFilter takes precedence. */ + handoffInputFilter?: (input: HandoffInputData) => HandoffInputData; + /** Input guardrails run inline in the workflow — callbacks must be deterministic */ + inputGuardrails?: InputGuardrail[]; + /** Output guardrails run inline in the workflow — callbacks must be deterministic */ + outputGuardrails?: OutputGuardrail>[]; + /** Disable tracing for this run */ + tracingDisabled?: boolean; + /** Include sensitive data (tool I/O, LLM outputs) in trace spans */ + traceIncludeSensitiveData?: boolean; + /** Logical name for the run, used in tracing */ + workflowName?: string; + /** Custom trace ID */ + traceId?: string; + /** Grouping ID for linking traces (e.g., chat thread ID) */ + groupId?: string; + /** Additional metadata attached to the trace */ + traceMetadata?: Record; + }; +} + +export interface TemporalOpenAIRunnerOptions extends ModelActivityOptions { + /** + * When `true`, emit OTel spans even during workflow replay. Defaults to `false`. + * Useful for debugging replay-divergence issues where trace output helps identify + * which spans differ between original execution and replay. + */ + startSpansInReplay?: boolean; +} + +/** + * A Temporal-aware agent runner that delegates model calls to activities. + * + * Streaming is not supported in Temporal workflows because activities are + * request-response. Use run() for all agent invocations. + */ +export class TemporalOpenAIRunner { + private readonly modelParams: ModelActivityOptions; + + constructor(options?: TemporalOpenAIRunnerOptions) { + const { startSpansInReplay, ...modelParams } = options ?? {}; + this.modelParams = { ...DEFAULT_MODEL_ACTIVITY_OPTIONS, ...modelParams }; + ensureTracingProcessorRegistered({ startSpansInReplay }); + } + + /** + * Run an agent in workflow context. Model calls are delegated to activities + * via ActivityBackedModel, while the agent loop runs durably in the workflow. + */ + async run, TContext = undefined>( + agent: TAgent, + input: string, + options?: TemporalRunOptions + ): Promise> { + const { model: modelOverride, ...runnerConfigOverrides } = options?.runConfig ?? {}; + + const converted = convertAgent(agent, this.modelParams, undefined, modelOverride); + + const internalRunner = new Runner({ + modelProvider: new DummyModelProvider(), + ...runnerConfigOverrides, + }); + + try { + return (await internalRunner.run(converted, input, { + maxTurns: options?.maxTurns, + context: options?.context, + previousResponseId: options?.previousResponseId, + conversationId: options?.conversationId, + session: options?.session, + sessionInputCallback: options?.sessionInputCallback, + callModelInputFilter: options?.callModelInputFilter, + tracing: options?.tracing, + })) as RunResult; + } catch (error) { + const temporalFailure = unwrapTemporalFailure(error); + if (temporalFailure) throw temporalFailure; + if (error instanceof Error) { + throw ApplicationFailure.create({ + message: `Agent workflow failed: ${error.message}`, + type: 'AgentsWorkflowError', + nonRetryable: true, + cause: error, + }); + } + throw error; + } + } +} diff --git a/packages/openai-agents/src/workflow/tools.ts b/packages/openai-agents/src/workflow/tools.ts new file mode 100644 index 000000000..b055f02a2 --- /dev/null +++ b/packages/openai-agents/src/workflow/tools.ts @@ -0,0 +1,94 @@ +import type { FunctionTool, RunContext } from '@openai/agents-core'; +import type { Duration, RetryPolicy } from '@temporalio/common'; +import { proxyActivities } from '@temporalio/workflow'; + +export class ToolSerializationError extends Error { + constructor(message: string) { + super(message); + this.name = 'ToolSerializationError'; + } +} + +// JsonObjectSchema is not publicly exported from @openai/agents-core — local equivalent +export interface JsonObjectSchema<_T = unknown> { + type: 'object'; + properties: Record; + required?: string[]; + additionalProperties?: boolean; +} + +export const TEMPORAL_ACTIVITY_TOOL_MARKER = Symbol.for('temporal.activityAsTool'); + +/** + * Definition for wrapping a Temporal activity as an agent tool. + * The `activityFn` is used for TypeScript type inference only — it is never called directly. + */ +export interface ActivityToolDefinition { + /** Activity name — must match the registered activity on the worker */ + name: string; + /** Tool description shown to the model */ + description: string; + /** Explicit JSON schema for the tool parameters (not Zod) */ + parameters: JsonObjectSchema; + /** Activity function reference — used for type inference only, never called */ + activityFn: (input: TInput) => Promise; +} + +/** + * Options for controlling how the tool's activity is scheduled. + */ +export interface ActivityAsToolOptions { + startToCloseTimeout?: Duration; + heartbeatTimeout?: Duration; + taskQueue?: string; + retryPolicy?: RetryPolicy; + strict?: boolean; +} + +/** + * Wrap a Temporal activity as an OpenAI Agents FunctionTool. + * When the agent invokes this tool, it schedules the named activity + * via `proxyActivities` and returns the stringified result. + * + * @param definition - Activity tool definition (name, description, JSON schema, type reference) + * @param options - Activity scheduling options (timeouts, retry, task queue) + */ +export function activityAsTool( + definition: ActivityToolDefinition, + options?: ActivityAsToolOptions +): FunctionTool { + const activities = proxyActivities Promise>>({ + startToCloseTimeout: options?.startToCloseTimeout ?? '1 minute', + heartbeatTimeout: options?.heartbeatTimeout, + taskQueue: options?.taskQueue, + retry: options?.retryPolicy, + }); + + const t = { + type: 'function', + name: definition.name, + description: definition.description, + parameters: definition.parameters as any, + strict: options?.strict ?? true, + invoke: async (_runContext: RunContext, input: string): Promise => { + let parsedInput: TInput; + try { + parsedInput = JSON.parse(input); + } catch (e) { + throw new ToolSerializationError(`Failed to parse tool input for '${definition.name}': ${e}`); + } + const activityFn = activities[definition.name]; + if (!activityFn) { + throw new ToolSerializationError(`Activity '${definition.name}' not found`); + } + const result = await activityFn(parsedInput); + return typeof result === 'string' ? result : JSON.stringify(result); + }, + needsApproval: async () => false, + isEnabled: async () => true, + } as FunctionTool; + + (t as any)[TEMPORAL_ACTIVITY_TOOL_MARKER] = true; + + return t; +} diff --git a/packages/openai-agents/src/workflow/tracing.ts b/packages/openai-agents/src/workflow/tracing.ts new file mode 100644 index 000000000..3830773cc --- /dev/null +++ b/packages/openai-agents/src/workflow/tracing.ts @@ -0,0 +1,281 @@ +import * as otel from '@opentelemetry/api'; +import { + type TracingProcessor, + type Span, + type Trace, + type SpanData, + addTraceProcessor, + setTracingDisabled, +} from '@openai/agents-core'; +import { inWorkflowContext, workflowInfo } from '@temporalio/workflow'; + +// --- Existing public helpers (preserved) --- + +export function isInWorkflow(): boolean { + return inWorkflowContext(); +} + +export function isReplaying(): boolean { + if (!inWorkflowContext()) return false; + return workflowInfo().unsafe.isReplaying; +} + +// --- OTel bridge: maps OpenAI Agents SDK trace events to OTel spans --- + +const TRACER_NAME = '@temporalio/openai-agents'; +const REGISTERED_KEY = Symbol.for('temporal-openai-agents-processor-registered'); + +function spanNameFromData(data: SpanData): string { + switch (data.type) { + case 'agent': + return `openai.agents.agent:${data.name}`; + case 'function': + return `openai.agents.function:${data.name}`; + case 'generation': + return 'openai.agents.generation'; + case 'response': + return 'openai.agents.response'; + case 'handoff': + return 'openai.agents.handoff'; + case 'guardrail': + return `openai.agents.guardrail:${data.name}`; + case 'custom': + return `openai.agents.custom:${data.name}`; + case 'transcription': + return 'openai.agents.transcription'; + case 'speech': + return 'openai.agents.speech'; + case 'speech_group': + return 'openai.agents.speech_group'; + case 'mcp_tools': + return 'openai.agents.mcp_tools'; + default: + return 'openai.agents.unknown'; + } +} + +function staticAttributesFromSpanData(data: SpanData): otel.Attributes { + const attrs: otel.Attributes = { 'openai.agents.span_type': data.type }; + switch (data.type) { + case 'agent': + attrs['openai.agents.agent.name'] = data.name; + if (data.handoffs) attrs['openai.agents.agent.handoffs'] = data.handoffs.join(','); + if (data.output_type) attrs['openai.agents.agent.output_type'] = data.output_type; + break; + case 'function': + attrs['openai.agents.function.name'] = data.name; + break; + case 'handoff': + if (data.from_agent) attrs['openai.agents.handoff.from_agent'] = data.from_agent; + if (data.to_agent) attrs['openai.agents.handoff.to_agent'] = data.to_agent; + break; + case 'guardrail': + attrs['openai.agents.guardrail.name'] = data.name; + break; + case 'custom': + attrs['openai.agents.custom.name'] = data.name; + break; + case 'mcp_tools': + if (data.server) attrs['openai.agents.mcp_tools.server'] = data.server; + break; + } + return attrs; +} + +function dynamicAttributesFromSpanData(data: SpanData): otel.Attributes { + const attrs: otel.Attributes = {}; + switch (data.type) { + case 'agent': + if (data.tools) attrs['openai.agents.agent.tools'] = data.tools.join(','); + break; + case 'generation': + if (data.model) attrs['openai.agents.generation.model'] = data.model; + break; + case 'guardrail': + attrs['openai.agents.guardrail.triggered'] = data.triggered; + break; + case 'mcp_tools': + if (data.result) attrs['openai.agents.mcp_tools.result'] = data.result.join(','); + break; + } + return attrs; +} + +interface SpanEntry { + span: otel.Span; + context: otel.Context; +} + +export interface TemporalTracingProcessorOptions { + /** + * When `true`, emit OTel spans even during workflow replay. Defaults to `false`. + * Useful for debugging replay-divergence issues where trace output helps identify + * which spans differ between original execution and replay. + */ + startSpansInReplay?: boolean; +} + +/** + * Bridges OpenAI Agents SDK trace events to OpenTelemetry spans. + * + * Requires @temporalio/interceptors-opentelemetry (or equivalent) to set up an OTel + * tracer provider in the workflow sandbox. Without a registered provider, + * otel.trace.getTracer() returns a no-op tracer and spans are silently discarded. + * + * Deterministic trace/span IDs are provided by the `crypto.randomUUID` polyfill in + * `load-polyfills.ts`, which delegates to Temporal's `uuid4()` (per-workflow seeded + * PRNG). Upstream `@openai/agents-core` calls `crypto.randomUUID()` internally for + * ID generation, so IDs are automatically replay-safe without a custom TraceProvider. + * + * Activity spans nest correctly under generation spans when + * `@temporalio/interceptors-opentelemetry` is configured (recommended). The OTel + * outbound interceptor injects the active span context into activity headers, and the + * inbound interceptor extracts it so the activity span becomes a child of the + * generation span. Without `interceptors-opentelemetry`, activity spans appear at + * the workflow level rather than nested. + */ +export class TemporalTracingProcessor implements TracingProcessor { + private readonly tracer: otel.Tracer; + private readonly startSpansInReplay: boolean; + // Outer key: workflowId, inner key: spanId or traceId. + // Scoped per-workflow so concurrent workflows on the same worker don't share state. + private readonly spans = new Map>(); + + constructor(options?: TemporalTracingProcessorOptions) { + this.tracer = otel.trace.getTracer(TRACER_NAME); + this.startSpansInReplay = options?.startSpansInReplay ?? false; + } + + private getWorkflowSpans(): Map { + const wfId = workflowInfo().workflowId; + let inner = this.spans.get(wfId); + if (!inner) { + inner = new Map(); + this.spans.set(wfId, inner); + } + return inner; + } + + private getSpanEntry(id: string): SpanEntry | undefined { + return this.spans.get(workflowInfo().workflowId)?.get(id); + } + + private deleteSpanEntry(id: string): void { + const wfId = workflowInfo().workflowId; + const inner = this.spans.get(wfId); + if (!inner) return; + inner.delete(id); + if (inner.size === 0) this.spans.delete(wfId); + } + + private shouldSkip(): boolean { + return !this.startSpansInReplay && isReplaying(); + } + + async onTraceStart(trace: Trace): Promise { + if (this.shouldSkip()) return; + + const parentCtx = otel.context.active(); + const attrs: otel.Attributes = { 'openai.agents.trace_id': trace.traceId }; + if (trace.name) attrs['openai.agents.trace.name'] = trace.name; + if (trace.groupId) attrs['openai.agents.trace.group_id'] = trace.groupId; + + const span = this.tracer.startSpan('openai.agents.run', { attributes: attrs }, parentCtx); + const ctx = otel.trace.setSpan(parentCtx, span); + this.getWorkflowSpans().set(trace.traceId, { span, context: ctx }); + } + + async onTraceEnd(trace: Trace): Promise { + if (this.shouldSkip()) return; + + const entry = this.getSpanEntry(trace.traceId); + if (!entry) return; + entry.span.setStatus({ code: otel.SpanStatusCode.OK }); + entry.span.end(); + this.deleteSpanEntry(trace.traceId); + } + + async onSpanStart(span: Span): Promise { + if (this.shouldSkip()) return; + + const data = span.spanData; + const name = spanNameFromData(data); + const attrs = staticAttributesFromSpanData(data); + + let parentCtx: otel.Context; + const parentEntry = span.parentId ? this.getSpanEntry(span.parentId) : this.getSpanEntry(span.traceId); + if (parentEntry) { + parentCtx = parentEntry.context; + } else { + parentCtx = otel.context.active(); + } + + const otelSpan = this.tracer.startSpan(name, { attributes: attrs }, parentCtx); + const ctx = otel.trace.setSpan(parentCtx, otelSpan); + this.getWorkflowSpans().set(span.spanId, { span: otelSpan, context: ctx }); + } + + async onSpanEnd(span: Span): Promise { + if (this.shouldSkip()) return; + + const entry = this.getSpanEntry(span.spanId); + if (!entry) return; + + const dynAttrs = dynamicAttributesFromSpanData(span.spanData); + for (const [key, value] of Object.entries(dynAttrs)) { + if (value !== undefined) entry.span.setAttribute(key, value); + } + + if (span.error) { + entry.span.setStatus({ code: otel.SpanStatusCode.ERROR, message: span.error.message }); + entry.span.recordException(new Error(span.error.message)); + } else { + entry.span.setStatus({ code: otel.SpanStatusCode.OK }); + } + + entry.span.end(); + this.deleteSpanEntry(span.spanId); + } + + async shutdown(): Promise { + for (const [, inner] of this.spans) { + for (const [, entry] of inner) { + entry.span.end(); + } + } + this.spans.clear(); + } + + async forceFlush(): Promise { + // No buffering — spans are forwarded to OTel immediately + } +} + +/** + * Appends a {@link TemporalTracingProcessor} to the OpenAI Agents SDK's + * global processor list and enables tracing. + * + * **Side effect**: mutates the upstream `@openai/agents-core` global + * `TraceProvider` (stored on `globalThis` via a well-known Symbol). A single + * {@link TemporalTracingProcessor} instance is shared across all workflows in + * the V8 isolate; per-workflow isolation is handled internally by the processor's + * workflow-scoped span Map. + * + * Called automatically by the {@link TemporalOpenAIRunner} constructor — users + * do not need to call this unless they need tracing without a runner instance. + * + * Uses `addTraceProcessor` rather than `setTraceProcessors` so that any + * processors registered by user code before runner construction are preserved. + * The upstream `TraceProvider` starts with an empty processor list — no default + * exporter is auto-registered — so there is no network-I/O risk from keeping + * pre-existing processors. + * + * Idempotent — safe to call multiple times per isolate. Options from the first + * call win; subsequent calls are no-ops regardless of options passed. + */ +export function ensureTracingProcessorRegistered(options?: TemporalTracingProcessorOptions): void { + if ((globalThis as any)[REGISTERED_KEY]) return; + (globalThis as any)[REGISTERED_KEY] = true; + setTracingDisabled(false); + addTraceProcessor(new TemporalTracingProcessor(options)); +} diff --git a/packages/openai-agents/tsconfig.json b/packages/openai-agents/tsconfig.json new file mode 100644 index 000000000..ee7f9f20b --- /dev/null +++ b/packages/openai-agents/tsconfig.json @@ -0,0 +1,14 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./lib", + "rootDir": "./src" + }, + "references": [ + { "path": "../plugin" }, + { "path": "../workflow" }, + { "path": "../activity" }, + { "path": "../common" } + ], + "include": ["./src/**/*.ts"] +} diff --git a/packages/test/package.json b/packages/test/package.json index 8fab02120..3204b13ed 100644 --- a/packages/test/package.json +++ b/packages/test/package.json @@ -46,6 +46,9 @@ "@temporalio/envconfig": "workspace:*", "@temporalio/interceptors-opentelemetry": "workspace:*", "@temporalio/nexus": "workspace:*", + "@temporalio/openai-agents": "workspace:*", + "@openai/agents-core": "~0.3.0", + "@openai/agents-openai": "~0.3.0", "@temporalio/nyc-test-coverage": "workspace:*", "@temporalio/plugin": "workspace:*", "@temporalio/proto": "workspace:*", diff --git a/packages/test/src/activities/openai-agents.ts b/packages/test/src/activities/openai-agents.ts new file mode 100644 index 000000000..1054b704a --- /dev/null +++ b/packages/test/src/activities/openai-agents.ts @@ -0,0 +1,12 @@ +export async function getWeather(input: { location: string }): Promise { + const weatherData: Record = { + tokyo: 'Sunny, 22°C', + london: 'Cloudy, 15°C', + }; + const weather = weatherData[input.location.toLowerCase()] ?? 'Unknown'; + return JSON.stringify({ location: input.location, weather }); +} + +export async function calculateSum(input: { a: number; b: number }): Promise { + return JSON.stringify({ result: input.a + input.b }); +} diff --git a/packages/test/src/stubs/openai-agents.ts b/packages/test/src/stubs/openai-agents.ts new file mode 100644 index 000000000..91b6e83cb --- /dev/null +++ b/packages/test/src/stubs/openai-agents.ts @@ -0,0 +1,146 @@ +import { + Usage, + type AgentOutputItem, + type Model, + type ModelProvider, + type ModelRequest, + type ModelResponse, + type StreamEvent, +} from '@openai/agents-core'; +import { textResponse as _textResponse } from '@temporalio/openai-agents/lib/testing'; + +// Re-export public testing utilities from the package +export { + FakeModel, + FakeModelProvider, + textResponse, + toolCallResponse, + handoffResponse, + multiToolCallResponse, +} from '@temporalio/openai-agents/lib/testing'; + +/** + * Helper to create a ModelResponse with a Date field for testing serialization (D7/F16). + */ +export function responseWithDate(text: string): ModelResponse { + const base = _textResponse(text); + (base as any).createdAt = new Date('2025-01-01T00:00:00Z'); + return base; +} + +/** + * A model that always throws the given error. Used for testing error handling. + */ +export class ErrorModel implements Model { + private error: Error; + + constructor(error: Error) { + this.error = error; + } + + async getResponse(_request: ModelRequest): Promise { + throw this.error; + } + + // eslint-disable-next-line require-yield + async *getStreamedResponse(_request: ModelRequest): AsyncIterable { + throw this.error; + } +} + +/** + * A model provider that returns an ErrorModel. Used for testing model error handling. + */ +export class ErrorModelProvider implements ModelProvider { + private model: ErrorModel; + + constructor(error: Error) { + this.model = new ErrorModel(error); + } + + getModel(_name?: string): Model { + return this.model; + } +} + +/** + * A model that captures the last ModelRequest it received. + * Used for testing that request fields survive serialization through ActivityBackedModel. + */ +export class RequestCapturingModel implements Model { + public lastRequest: ModelRequest | undefined; + + async getResponse(request: ModelRequest): Promise { + this.lastRequest = request; + const output: AgentOutputItem[] = [ + { + type: 'message', + id: 'msg_capture', + role: 'assistant', + content: [{ type: 'output_text', text: 'captured' }], + status: 'completed', + }, + ]; + return { + output, + usage: new Usage({ requests: 1, inputTokens: 10, outputTokens: 8, totalTokens: 18 }), + }; + } + + // eslint-disable-next-line require-yield + async *getStreamedResponse(_request: ModelRequest): AsyncIterable { + throw new Error('Streaming not supported'); + } +} + +export class RequestCapturingModelProvider implements ModelProvider { + public model = new RequestCapturingModel(); + getModel(_name?: string): Model { + return this.model; + } + get lastRequest(): ModelRequest | undefined { + return this.model.lastRequest; + } +} + +/** + * A model provider that captures the model name passed to getModel(). + * Used for testing that runConfig.model override reaches the activity (H1). + */ +export class ModelNameCapturingModelProvider implements ModelProvider { + public capturedModelNames: string[] = []; + + getModel(name?: string): Model { + this.capturedModelNames.push(name ?? '(default)'); + return new RequestCapturingModel(); + } +} + +/** + * A model that throws an arbitrary value (not necessarily an Error). + * Used for testing error handling with non-Error throws (D1/F9, D6/F15). + */ +export class ThrowAnythingModel implements Model { + constructor(private value: unknown) {} + + async getResponse(_request: ModelRequest): Promise { + throw this.value; + } + + // eslint-disable-next-line require-yield + async *getStreamedResponse(_request: ModelRequest): AsyncIterable { + throw this.value; + } +} + +export class ThrowAnythingModelProvider implements ModelProvider { + private model: ThrowAnythingModel; + + constructor(value: unknown) { + this.model = new ThrowAnythingModel(value); + } + + getModel(_name?: string): Model { + return this.model; + } +} diff --git a/packages/test/src/test-openai-agents.ts b/packages/test/src/test-openai-agents.ts new file mode 100644 index 000000000..afbf63e7e --- /dev/null +++ b/packages/test/src/test-openai-agents.ts @@ -0,0 +1,2635 @@ +/** + * Test OpenAI Agents SDK integration with Temporal workflows + */ +import { OpenAIAgentsPlugin, StatelessMCPServerProvider, toSerializedModelResponse } from '@temporalio/openai-agents'; +import { WorkflowFailedError } from '@temporalio/client'; +import { temporal } from '@temporalio/proto'; +import { + basicAgentWorkflow, + toolAgentWorkflow, + handoffAgentWorkflow, + maxTurnsAgentWorkflow, + multiToolAgentWorkflow, + contextAgentWorkflow, + rawFunctionToolWorkflow, + runConfigStringModelWorkflow, + localActivityAgentWorkflow, + retryableModelWorkflow, + agentsWorkflowErrorWorkflow, + mcpAgentWorkflow, + builtInToolAgentWorkflow, + handoffInstanceWorkflow, + cyclicHandoffWorkflow, + promptFieldWorkflow, + nonStringModelWorkflow, + wrappedTemporalFailureWorkflow, + runStreamedWorkflow, + agentsWorkflowErrorClassCheckWorkflow, + eventTargetListenerErrorWorkflow, + eventTargetTargetFieldWorkflow, + dateInResponseWorkflow, + directToolFactoryWorkflow, + mcpPromptsWorkflow, + mcpFactoryArgWorkflow, + mcpProviderWorkflow, + summaryOverrideStringWorkflow, + tracingUtilitiesWorkflow, + extendedModelParamsWorkflow, + runConfigModelOverrideCheckWorkflow, + handoffWithRawToolWorkflow, + handoffInstanceWithRawToolWorkflow, + handoffMutationCheckWorkflow, + handoffOnHandoffCallbackWorkflow, + handoffIsEnabledFalseWorkflow, + handoffWithCustomSchemaWorkflow, + timeoutErrorWorkflow, + xShouldRetryWorkflow, + plainErrorWorkflow, + wireRoundTripWorkflow, + wireStrippingCheckWorkflow, + wireVersionMismatchWorkflow, + wireRequestSnapshotWorkflow, + tracingSpanCaptureWorkflow, + replaySafetyWorkflow, + handoffCloneSnapshotWorkflow, + concurrentTracingIsolationWorkflow, +} from './workflows/openai-agents'; +import { helpers, makeTestFunction } from './helpers-integration'; +import { + FakeModelProvider, + ErrorModelProvider, + RequestCapturingModelProvider, + ModelNameCapturingModelProvider, + ThrowAnythingModelProvider, + textResponse, + toolCallResponse, + handoffResponse, + responseWithDate, + multiToolCallResponse, +} from './stubs/openai-agents'; +import { getWeather, calculateSum } from './activities/openai-agents'; +import EventType = temporal.api.enums.v1.EventType; + +const test = makeTestFunction({ + workflowsPath: require.resolve('./workflows/openai-agents'), +}); + +test('Basic agent responds to prompt', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Hello from agent!')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(basicAgentWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + t.is(result, 'Hello from agent!'); + }); +}); + +function* toolWorkflowGenerator() { + yield toolCallResponse('getWeather', { location: 'Tokyo' }); + yield textResponse('The weather in Tokyo is sunny, 14-20C.'); +} + +test('Agent can use tools backed by Temporal activities', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider(() => toolWorkflowGenerator()), + }), + ], + activities: { + getWeather, + }, + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(toolAgentWorkflow, { + args: ['What is the weather in Tokyo?'], + workflowExecutionTimeout: '30 seconds', + }); + + const result = await handle.result(); + t.is(result, 'The weather in Tokyo is sunny, 14-20C.'); + + // Verify both invokeModelActivity and getWeather appear in the workflow history + const { events } = await handle.fetchHistory(); + const activityScheduledEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_SCHEDULED) ?? []; + + const activityTypes = activityScheduledEvents.map( + (e) => e?.activityTaskScheduledEventAttributes?.activityType?.name + ); + + t.true( + activityTypes.includes('invokeModelActivity'), + `invokeModelActivity should be in history, got: ${activityTypes.join(', ')}` + ); + t.true(activityTypes.includes('getWeather'), `getWeather should be in history, got: ${activityTypes.join(', ')}`); + + // Should have at least 3 activities: 2x invokeModelActivity (tool call + final response) + 1x getWeather + t.true( + activityScheduledEvents.length >= 3, + `Expected at least 3 activity events, got ${activityScheduledEvents.length}` + ); + }); +}); + +function* handoffWorkflowGenerator() { + // Turn 1: TriageAgent decides to hand off to WeatherSpecialist + yield handoffResponse('transfer_to_WeatherSpecialist'); + // Turn 2: WeatherSpecialist responds with text + yield textResponse('Sunny day!'); +} + +test('Agent can hand off to other agents', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider(() => handoffWorkflowGenerator()), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(handoffAgentWorkflow, { + args: ['What is the weather in Tokyo?'], + workflowExecutionTimeout: '30 seconds', + }); + + const result = await handle.result(); + t.true(result.includes('Sunny'), `Expected output to contain 'Sunny', got: ${result}`); + + // Verify the handoff happened by checking activity history + const { events } = await handle.fetchHistory(); + const activityScheduledEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_SCHEDULED) ?? []; + + const activityTypes = activityScheduledEvents.map( + (e) => e?.activityTaskScheduledEventAttributes?.activityType?.name + ); + + // Should have 2 invokeModelActivity calls: one for triage agent, one for weather specialist + const modelCalls = activityTypes.filter((name) => name === 'invokeModelActivity'); + t.true( + modelCalls.length >= 2, + `Expected at least 2 invokeModelActivity calls for handoff, got ${modelCalls.length}` + ); + }); +}); + +test('Agent respects max turns limit', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Single turn response')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(maxTurnsAgentWorkflow, { + args: ['Hello', 1], + workflowExecutionTimeout: '30 seconds', + }); + + t.is(result.output, 'Single turn response'); + t.true(result.turnCount <= 1, `Expected turnCount <= 1, got ${result.turnCount}`); + }); +}); + +test('Model invocations are scheduled as activities', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Activity check')]), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(basicAgentWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + await handle.result(); + + const { events } = await handle.fetchHistory(); + const activityScheduledEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_SCHEDULED) ?? []; + + const activityTypes = activityScheduledEvents.map( + (e) => e?.activityTaskScheduledEventAttributes?.activityType?.name + ); + + t.true( + activityTypes.includes('invokeModelActivity'), + `invokeModelActivity should be scheduled as an activity, got: ${activityTypes.join(', ')}` + ); + }); +}); + +test('Handles model errors gracefully', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + // Create an error with a 400 status so isRetryableError returns false (non-retryable) + const modelError = new Error('Model API error'); + Object.assign(modelError, { + response: { status: 400, headers: { get: () => undefined } }, + }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(modelError), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(basicAgentWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { + instanceOf: WorkflowFailedError, + }); + + // Verify the error chain contains our error message + t.truthy(err, 'Expected WorkflowFailedError'); + const fullMessage = String(err!.cause?.cause ?? err!.cause ?? err); + t.true( + fullMessage.includes('Model API error'), + `Expected error chain to contain 'Model API error', got: ${fullMessage}` + ); + + // Verify error chain preserves classification + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + t.is( + failure?.applicationFailureInfo?.type, + 'ModelInvocationError.BadRequest', + `Expected error type 'ModelInvocationError.BadRequest' for 400, got: ${failure?.applicationFailureInfo?.type}` + ); + t.true( + failure?.applicationFailureInfo?.nonRetryable === true, + `Expected nonRetryable=true for 400, got nonRetryable=${failure?.applicationFailureInfo?.nonRetryable}` + ); + t.true( + failure?.message?.includes('Model API error') === true, + `Expected original message preserved in failure, got: ${failure?.message}` + ); + }); +}); + +function* multiToolGenerator() { + yield toolCallResponse('getWeather', { location: 'Tokyo' }); + yield toolCallResponse('calculateSum', { a: 5, b: 3 }); + yield textResponse('Weather in Tokyo is sunny and 5+3=8.'); +} + +test('Agent with multiple tools', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider(() => multiToolGenerator()), + }), + ], + activities: { + getWeather, + calculateSum, + }, + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(multiToolAgentWorkflow, { + args: ['What is the weather in Tokyo and what is 5+3?'], + workflowExecutionTimeout: '30 seconds', + }); + + const result = await handle.result(); + t.is(result, 'Weather in Tokyo is sunny and 5+3=8.'); + + // Verify both tool activities appear in history + const { events } = await handle.fetchHistory(); + const activityScheduledEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_SCHEDULED) ?? []; + + const activityTypes = activityScheduledEvents.map( + (e) => e?.activityTaskScheduledEventAttributes?.activityType?.name + ); + + t.true(activityTypes.includes('getWeather'), `getWeather should be in history, got: ${activityTypes.join(', ')}`); + t.true( + activityTypes.includes('calculateSum'), + `calculateSum should be in history, got: ${activityTypes.join(', ')}` + ); + }); +}); + +test('Agent workflow with typed context', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Hello user-123!')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(contextAgentWorkflow, { + args: ['Hello', 'user-123'], + workflowExecutionTimeout: '30 seconds', + }); + + t.is(result, 'Hello user-123!'); + }); +}); + +test('Raw function tool is rejected with clear error', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Should not reach here')]), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(rawFunctionToolWorkflow, { + args: ['What is the weather?'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { + instanceOf: WorkflowFailedError, + }); + + t.truthy(err, 'Expected WorkflowFailedError'); + const fullMessage = String(err!.cause?.cause ?? err!.cause ?? err); + t.true(fullMessage.includes('activityAsTool'), `Expected error to mention activityAsTool, got: ${fullMessage}`); + }); +}); + +test('RunConfig.model string override works', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Model override response')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(runConfigStringModelWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + t.is(result, 'Model override response'); + }); +}); + +test('Local activity mode uses local activities for model calls', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Local activity response')]), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(localActivityAgentWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const result = await handle.result(); + t.is(result, 'Local activity response'); + + const { events } = await handle.fetchHistory(); + + // Local activities appear as MarkerRecorded events (marker name "core_local_activity"), + // not as ActivityTaskScheduled events + const markerEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_MARKER_RECORDED) ?? []; + t.true( + markerEvents.length > 0, + `Expected MarkerRecorded events for local activities in history, got ${markerEvents.length}` + ); + + // Should NOT have regular activity scheduled events for model invocation + const activityScheduledEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_SCHEDULED) ?? []; + const modelActivities = activityScheduledEvents.filter( + (e) => e.activityTaskScheduledEventAttributes?.activityType?.name === 'invokeModelActivity' + ); + t.is(modelActivities.length, 0, `Expected no regular invokeModelActivity, got ${modelActivities.length}`); + }); +}); + +test('Retryable 429 error is classified as retryable (nonRetryable=false)', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const error429 = new Error('Rate limit exceeded'); + Object.assign(error429, { + response: { status: 429, headers: { get: () => undefined } }, + }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(error429), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(retryableModelWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + // Workflow should fail after all retry attempts are exhausted + const err = await t.throwsAsync(handle.result(), { + instanceOf: WorkflowFailedError, + }); + + t.truthy(err, 'Expected WorkflowFailedError'); + const fullMessage = String(err!.cause?.cause ?? err!.cause ?? err); + t.true( + fullMessage.includes('Rate limit exceeded'), + `Expected error chain to contain 'Rate limit exceeded', got: ${fullMessage}` + ); + + // Verify the failure is classified as retryable + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + const nonRetryable = failure?.applicationFailureInfo?.nonRetryable; + t.falsy( + nonRetryable, + `Expected 429 to be classified as retryable (nonRetryable=false), got nonRetryable=${nonRetryable}` + ); + const failureType = failure?.applicationFailureInfo?.type; + t.is(failureType, 'ModelInvocationError.RateLimit'); + }); +}); + +test('Non-retryable 400 error fails without retry', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const error400 = new Error('Bad request: invalid prompt'); + Object.assign(error400, { + response: { status: 400, headers: { get: () => undefined } }, + }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(error400), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(basicAgentWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { + instanceOf: WorkflowFailedError, + }); + + t.truthy(err, 'Expected WorkflowFailedError'); + const fullMessage = String(err!.cause?.cause ?? err!.cause ?? err); + t.true( + fullMessage.includes('Bad request: invalid prompt'), + `Expected error chain to contain 'Bad request: invalid prompt', got: ${fullMessage}` + ); + + // Verify only 1 activity attempt — non-retryable errors should not be retried + const { events } = await handle.fetchHistory(); + const activityStartedEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_STARTED) ?? []; + t.is( + activityStartedEvents.length, + 1, + `Expected exactly 1 activity attempt (no retry), got ${activityStartedEvents.length}` + ); + + // Verify the failure is classified as non-retryable + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + t.true(failure?.applicationFailureInfo?.nonRetryable, 'Expected 400 to be classified as non-retryable'); + }); +}); + +test('AgentsWorkflowError wraps non-Temporal errors', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Should not reach here')]), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(agentsWorkflowErrorWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { + instanceOf: WorkflowFailedError, + }); + + t.truthy(err, 'Expected WorkflowFailedError'); + // Error chain: err.cause = ApplicationFailure(type='AgentsWorkflowError'), + // err.cause.cause = original Error. No intermediate AgentsWorkflowError wrapper. + const wrappedMessage = String(err!.cause); + t.true( + wrappedMessage.includes('Agent workflow failed'), + `Expected wrapper message to contain 'Agent workflow failed', got: ${wrappedMessage}` + ); + t.true( + wrappedMessage.includes('Instructions evaluation failed'), + `Expected wrapper to contain original error message, got: ${wrappedMessage}` + ); + }); +}); + +// --- Stateless MCP --- + +function* mcpToolWorkflowGenerator() { + // Turn 1: model calls the MCP tool "get_time" + yield toolCallResponse('get_time', {}); + // Turn 2: model returns a text response incorporating the tool result + yield textResponse('The current time is 2026-01-01T00:00:00Z.'); +} + +test('Stateless MCP server delegates listTools and callTool to activities', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider(() => mcpToolWorkflowGenerator()), + }), + ], + activities: { + 'testMcp-list-tools': async () => { + return [ + { + name: 'get_time', + description: 'Returns current time', + inputSchema: { + type: 'object' as const, + properties: {}, + required: [] as string[], + additionalProperties: false, + }, + }, + ]; + }, + 'testMcp-call-tool-v2': async (_input: { toolName: string; args: Record | null }) => { + return [{ type: 'text', text: '2026-01-01T00:00:00Z' }]; + }, + }, + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(mcpAgentWorkflow, { + args: ['What time is it?'], + workflowExecutionTimeout: '30 seconds', + }); + + const result = await handle.result(); + t.is(result, 'The current time is 2026-01-01T00:00:00Z.'); + + // Verify MCP activities appear in the workflow history + const { events } = await handle.fetchHistory(); + const activityScheduledEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_SCHEDULED) ?? []; + + const activityTypes = activityScheduledEvents.map( + (e) => e?.activityTaskScheduledEventAttributes?.activityType?.name + ); + + t.true( + activityTypes.includes('testMcp-list-tools'), + `testMcp-list-tools should be in history, got: ${activityTypes.join(', ')}` + ); + t.true( + activityTypes.includes('testMcp-call-tool-v2'), + `testMcp-call-tool-v2 should be in history, got: ${activityTypes.join(', ')}` + ); + t.true( + activityTypes.includes('invokeModelActivity'), + `invokeModelActivity should be in history, got: ${activityTypes.join(', ')}` + ); + }); +}); + +// --- Built-in tools pass-through --- + +test('Built-in tools pass through without serialization error', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse("I could search but won't")]), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(builtInToolAgentWorkflow, { + args: ['Search for something'], + workflowExecutionTimeout: '30 seconds', + }); + + const result = await handle.result(); + t.is(result, "I could search but won't"); + + // Verify the model activity fired (the built-in tool survived serialization) + const { events } = await handle.fetchHistory(); + const activityScheduledEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_SCHEDULED) ?? []; + + const activityTypes = activityScheduledEvents.map( + (e) => e?.activityTaskScheduledEventAttributes?.activityType?.name + ); + + t.true( + activityTypes.includes('invokeModelActivity'), + `invokeModelActivity should be in history, got: ${activityTypes.join(', ')}` + ); + }); +}); + +// --- Bug exercise tests: handoff, cycle, prompt, model validation --- + +// T1 — F1: Handoff instance (via handoff()) reaches target agent via activity +function* handoffInstanceGenerator() { + yield handoffResponse('transfer_to_WeatherSpecialist'); + yield textResponse('Specialist says: sunny!'); +} + +test('F1: Handoff-instance handoff reaches target agent via model activity', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider(() => handoffInstanceGenerator()), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(handoffInstanceWorkflow, { + args: ['What is the weather in Tokyo?'], + workflowExecutionTimeout: '30 seconds', + }); + + const result = await handle.result(); + t.true(result.includes('sunny'), `Expected output to contain 'sunny', got: ${result}`); + + // Verify at least 2 model activity calls (triage + specialist after handoff) + const { events } = await handle.fetchHistory(); + const activityScheduledEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_SCHEDULED) ?? []; + const modelCalls = activityScheduledEvents.filter( + (e) => e?.activityTaskScheduledEventAttributes?.activityType?.name === 'invokeModelActivity' + ); + t.true( + modelCalls.length >= 2, + `Expected >= 2 invokeModelActivity calls (triage + specialist), got ${modelCalls.length}` + ); + }); +}); + +// T2 — F2: Cyclic handoff graph terminates without stack overflow +test('F2: Cyclic handoff graph terminates without stack overflow', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('ok')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(cyclicHandoffWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '5 seconds', + }); + t.is(result, 'ok'); + }); +}); + +// T3 — F3: prompt field is forwarded to the activity +test('F3: prompt field is forwarded through ActivityBackedModel to the activity', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const provider = new RequestCapturingModelProvider(); + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: provider, + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(promptFieldWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + t.is(result, 'captured'); + }); + + // After workflow completes, verify the model received the prompt field + t.truthy(provider.lastRequest, 'Expected model to have received a request'); + const receivedPrompt = (provider.lastRequest as any)?.prompt; + t.truthy(receivedPrompt, 'Expected prompt field to be present in model request'); + t.is(receivedPrompt?.promptId, 'pt_test', `Expected promptId 'pt_test', got: ${receivedPrompt?.promptId}`); +}); + +// T4 — F4: Non-string agent.model throws AgentsWorkflowError +test('F4: Non-string agent.model throws AgentsWorkflowError', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Should not reach')]), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(nonStringModelWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { + instanceOf: WorkflowFailedError, + }); + + t.truthy(err, 'Expected WorkflowFailedError'); + const cause = err!.cause as any; + const failureType = + cause?.failure?.applicationFailureInfo?.type ?? cause?.applicationFailureInfo?.type ?? cause?.type; + t.is(failureType, 'AgentsWorkflowError', `Expected type 'AgentsWorkflowError', got: ${failureType}`); + + const fullMessage = String(cause); + t.true(fullMessage.includes('string'), `Expected error message to mention 'string', got: ${fullMessage}`); + }); +}); + +// T5a — F5: SDK-shape 429 (status on error directly) classified as retryable +test('F5: SDK-shape 429 (error.status) classified as retryable', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const sdkError429 = new Error('Rate limit exceeded'); + Object.assign(sdkError429, { status: 429, headers: {} }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(sdkError429), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(retryableModelWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + t.falsy( + failure?.applicationFailureInfo?.nonRetryable, + 'Expected SDK-shape 429 to be classified as retryable (nonRetryable=false)' + ); + }); +}); + +// T5b — F5: SDK-shape 400 (status on error directly) classified as non-retryable +test('F5: SDK-shape 400 (error.status) classified as non-retryable', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const sdkError400 = new Error('Bad request: invalid parameters'); + Object.assign(sdkError400, { status: 400, headers: {} }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(sdkError400), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(retryableModelWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + t.true(failure?.applicationFailureInfo?.nonRetryable, 'Expected SDK-shape 400 to be classified as non-retryable'); + + // Non-retryable means only 1 attempt + const startedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_STARTED) ?? []; + t.is(startedEvents.length, 1, `Expected 1 attempt for non-retryable, got ${startedEvents.length}`); + }); +}); + +// T6 — F6: retry-after-ms header is honored as nextRetryDelay +test('F6: retry-after-ms header sets nextRetryDelay on activity failure', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const error429 = new Error('Rate limited'); + Object.assign(error429, { + status: 429, + headers: { get: (k: string) => (k === 'retry-after-ms' ? '5000' : undefined) }, + }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(error429), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(retryableModelWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + const nextRetryDelay = failure?.applicationFailureInfo?.nextRetryDelay; + t.truthy(nextRetryDelay, 'Expected nextRetryDelay to be set from retry-after-ms header'); + const delaySec = Number(nextRetryDelay?.seconds ?? 0); + t.is(delaySec, 5, `Expected nextRetryDelay of 5 seconds (from retry-after-ms: 5000), got: ${delaySec}s`); + }); +}); + +// T7 — F13: TemporalFailure in Error.cause is unwrapped, not re-wrapped as AgentsWorkflowError +test('F13: TemporalFailure in Error.cause is unwrapped and re-thrown', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Should not reach')]), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(wrappedTemporalFailureWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { + instanceOf: WorkflowFailedError, + }); + t.truthy(err); + + // If F13 is fixed: inner ApplicationFailure (type 'InnerFailureType') is re-thrown directly + // If F13 is buggy: runner wraps as 'AgentsWorkflowError', losing the original type + const cause = err!.cause as any; + const failureType = + cause?.failure?.applicationFailureInfo?.type ?? cause?.applicationFailureInfo?.type ?? cause?.type; + t.not( + failureType, + 'AgentsWorkflowError', + `Expected inner TemporalFailure to propagate, not be wrapped as AgentsWorkflowError` + ); + t.is(failureType, 'InnerFailureType', `Expected failure type 'InnerFailureType', got: ${failureType}`); + }); +}); + +// --- Error wrapping + streaming --- + +// C1 — F7: AgentsWorkflowError type tag is set on the ApplicationFailure created +// by the runner, and appears on the serialized failure via the cause chain. +test('C1/F7: AgentsWorkflowError type is preserved in serialized failure', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Should not reach')]), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(agentsWorkflowErrorWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { + instanceOf: WorkflowFailedError, + }); + t.truthy(err); + + const cause = err!.cause as any; + const failureType = + cause?.failure?.applicationFailureInfo?.type ?? cause?.applicationFailureInfo?.type ?? cause?.type; + t.is(failureType, 'AgentsWorkflowError', `Expected failure type 'AgentsWorkflowError', got: ${failureType}`); + }); +}); + +// C1/F7: Verify runner wraps errors as ApplicationFailure with original error as cause. +// The workflow catches the runner's error and inspects e.cause.name. +test('C1/F7: Runner wraps error as ApplicationFailure with original Error cause', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Should not reach')]), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(agentsWorkflowErrorClassCheckWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const result = await handle.result(); + const info = JSON.parse(result); + t.is( + info.causeName, + 'Error', + `Expected runner to throw with original Error as cause, got causeName=${info.causeName}` + ); + }); +}); + +// C3 — F27: runStreamed() method was removed — calling it via `as any` fails at runtime. +test('C3/F27: runStreamed call fails (method removed)', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Should not reach')]), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(runStreamedWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { + instanceOf: WorkflowFailedError, + }); + t.truthy(err, 'Expected WorkflowFailedError when calling removed runStreamed()'); + }); +}); + +// --- Determinism + error hygiene --- + +// D1/F9: Non-Error thrown values should be preserved as cause +test('D1/F9: Non-Error thrown value is wrapped and preserved as cause', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ThrowAnythingModelProvider('custom string error'), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(basicAgentWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { + instanceOf: WorkflowFailedError, + }); + t.truthy(err); + + const fullMessage = String(err!.cause?.cause ?? err!.cause ?? err); + t.true( + fullMessage.includes('custom string error'), + `Expected error chain to contain 'custom string error', got: ${fullMessage}` + ); + + // The key assertion: the activity failure's cause should be preserved (not undefined) + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + t.truthy(failure?.cause, 'Expected non-Error value to be wrapped in Error and preserved as cause'); + }); +}); + +// D3/F11: EventTarget polyfill should isolate listener errors +test('D3/F11: EventTarget polyfill isolates listener errors', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('unused')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(eventTargetListenerErrorWorkflow, { + workflowExecutionTimeout: '30 seconds', + }); + + t.true(result.dispatchSucceeded, 'dispatchEvent should succeed even if a listener throws'); + t.true(result.secondListenerCalled, 'Second listener should be called even if first throws'); + }); +}); + +// D4/F12: EventTarget polyfill should set event.target and event.currentTarget +test('D4/F12: EventTarget polyfill sets event.target and event.currentTarget', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('unused')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(eventTargetTargetFieldWorkflow, { + workflowExecutionTimeout: '30 seconds', + }); + + t.true(result.targetDefined, 'event.target should be defined (set to the EventTarget instance)'); + t.true(result.currentTargetDefined, 'event.currentTarget should be defined (set to the EventTarget instance)'); + }); +}); + +// D5/F14: Error type should be derived from status code +test('D5/F14: 429 error produces ModelInvocationError.RateLimit type', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const error429 = new Error('Rate limit exceeded'); + Object.assign(error429, { status: 429, headers: {} }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(error429), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(retryableModelWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + const failureType = failure?.applicationFailureInfo?.type; + t.is(failureType, 'ModelInvocationError.RateLimit', `Expected RateLimit type for 429, got: ${failureType}`); + }); +}); + +test('D5/F14: 401 error produces ModelInvocationError.Authentication type', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const error401 = new Error('Unauthorized'); + Object.assign(error401, { status: 401, headers: {} }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(error401), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(basicAgentWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + const failureType = failure?.applicationFailureInfo?.type; + t.is( + failureType, + 'ModelInvocationError.Authentication', + `Expected Authentication type for 401, got: ${failureType}` + ); + }); +}); + +test('D5/F14: 400 error produces ModelInvocationError.BadRequest type', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const error400 = new Error('Bad request'); + Object.assign(error400, { status: 400, headers: {} }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(error400), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(basicAgentWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + const failureType = failure?.applicationFailureInfo?.type; + t.is(failureType, 'ModelInvocationError.BadRequest', `Expected BadRequest type for 400, got: ${failureType}`); + }); +}); + +test('D5/F14: 500 error produces ModelInvocationError.ServerError type', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const error500 = new Error('Internal server error'); + Object.assign(error500, { status: 500, headers: {} }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(error500), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(retryableModelWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + const failureType = failure?.applicationFailureInfo?.type; + t.is(failureType, 'ModelInvocationError.ServerError', `Expected ServerError type for 500, got: ${failureType}`); + }); +}); + +// D6/F15: Non-Error non-object (e.g. throw 42) produces non-retryable ApplicationFailure +test('D6/F15: Non-Error non-object throw produces non-retryable failure', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ThrowAnythingModelProvider(42), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(basicAgentWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const fullMessage = String(err!.cause?.cause ?? err!.cause ?? err); + t.true(fullMessage.includes('42'), `Expected error chain to contain '42', got: ${fullMessage}`); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + t.true( + failure?.applicationFailureInfo?.nonRetryable, + 'Expected non-object throw to be classified as non-retryable' + ); + // Should only have 1 attempt (non-retryable = no retries) + const startedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_STARTED) ?? []; + t.is(startedEvents.length, 1, `Expected 1 attempt for non-retryable, got ${startedEvents.length}`); + }); +}); + +// D7/F16: Date fields in ModelResponse are coerced by Temporal JSON serialization. +// Temporal's default payload converter serializes via JSON.stringify. +// Date objects become ISO strings, class instances become plain objects. +// @openai/agents-core's ModelResponse uses plain JSON-safe types by default, +// so this is typically not a concern. Custom ModelProviders that emit Dates +// or class instances should pre-serialize them. +test('D7/F16: Date in ModelResponse is coerced to string by Temporal serialization', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([responseWithDate('Date test')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(dateInResponseWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + t.is(typeof result.hasDateField, 'boolean', 'Workflow should return hasDateField status'); + if (result.hasDateField) { + t.is(result.dateFieldType, 'string', 'Date is coerced to ISO string by Temporal JSON serialization'); + } else { + t.is(result.dateFieldType, 'undefined', 'Stripped custom field should have undefined type'); + } + }); +}); + +// --- Tool validation --- + +// E3/F20: tool() from agents-core runs inline in workflow (permissive — matches Python) +function* inlineToolGenerator() { + yield toolCallResponse('inlineTool', { input: 'hello' }); + yield textResponse('Tool said: processed: hello'); +} + +test('E3/F20: FunctionTool from tool() factory runs inline in workflow', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider(() => inlineToolGenerator()), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(directToolFactoryWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + t.is(result, 'Tool said: processed: hello'); + }); +}); + +// --- F2: MCP prompts + provider --- + +test('F2: MCP listPrompts and getPrompt delegate to activities', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('unused')]), + }), + ], + activities: { + 'testMcp-list-tools': async () => [], + 'testMcp-call-tool-v2': async () => [], + 'testMcp-list-prompts': async () => { + return [ + { name: 'greeting', description: 'A greeting prompt' }, + { name: 'farewell', description: 'A farewell prompt' }, + ]; + }, + 'testMcp-get-prompt-v2': async (input: { + promptName: string; + promptArguments: Record | null; + }) => { + return { + messages: [{ role: 'user', content: `Hello, ${(input.promptArguments as any)?.name ?? 'stranger'}!` }], + }; + }, + }, + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(mcpPromptsWorkflow, { + args: ['test'], + workflowExecutionTimeout: '30 seconds', + }); + + const result = await handle.result(); + + // Verify listPrompts returned data + t.is((result.prompts as any[]).length, 2, 'Expected 2 prompts from listPrompts'); + t.is((result.prompts as any[])[0].name, 'greeting'); + + // Verify getPrompt returned data + t.truthy(result.promptResult, 'Expected getPrompt to return data'); + t.is((result.promptResult as any).messages[0].content, 'Hello, World!'); + + // Verify activities appeared in history + const { events } = await handle.fetchHistory(); + const activityScheduledEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_SCHEDULED) ?? []; + const activityTypes = activityScheduledEvents.map( + (e) => e?.activityTaskScheduledEventAttributes?.activityType?.name + ); + + t.true( + activityTypes.includes('testMcp-list-prompts'), + `testMcp-list-prompts should be in history, got: ${activityTypes.join(', ')}` + ); + t.true( + activityTypes.includes('testMcp-get-prompt-v2'), + `testMcp-get-prompt-v2 should be in history, got: ${activityTypes.join(', ')}` + ); + }); +}); + +function* mcpFactoryArgGenerator() { + yield toolCallResponse('get_time', {}); + yield textResponse('The time for tenant-42 is 2026-01-01.'); +} + +test('F2: factoryArgument is passed through to MCP activities', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + let receivedFactoryArg: unknown; + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider(() => mcpFactoryArgGenerator()), + }), + ], + activities: { + 'testMcp-list-tools': async (input: any) => { + receivedFactoryArg = input?.factoryArgument; + return [ + { + name: 'get_time', + description: 'Returns current time', + inputSchema: { + type: 'object' as const, + properties: {}, + required: [] as string[], + additionalProperties: false, + }, + }, + ]; + }, + 'testMcp-call-tool-v2': async (input: any) => { + t.deepEqual(input.factoryArgument, { tenantId: 'tenant-42' }, 'factoryArgument should be passed to callTool'); + return [{ type: 'text', text: '2026-01-01' }]; + }, + }, + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(mcpFactoryArgWorkflow, { + args: ['What time is it?'], + workflowExecutionTimeout: '30 seconds', + }); + + const result = await handle.result(); + t.truthy(result, 'Workflow should complete successfully'); + + // Verify factoryArgument was passed to listTools activity + t.deepEqual(receivedFactoryArg, { tenantId: 'tenant-42' }, 'factoryArgument should be passed to listTools'); + }); +}); + +function* mcpProviderGenerator() { + yield toolCallResponse('get_data', {}); + yield textResponse('Data retrieved via provider.'); +} + +test('F2: StatelessMCPServerProvider registers activities via plugin', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const mcpProvider = new StatelessMCPServerProvider('providerMcp', { + listTools: async () => [ + { + name: 'get_data', + description: 'Get some data', + inputSchema: { type: 'object' as const, properties: {}, required: [] as string[], additionalProperties: false }, + }, + ], + callTool: async () => [{ type: 'text', text: 'provider-data-result' }], + listPrompts: async () => [], + getPrompt: async () => ({ messages: [] }), + }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider(() => mcpProviderGenerator()), + mcpServerProviders: [mcpProvider], + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(mcpProviderWorkflow, { + args: ['Get the data'], + workflowExecutionTimeout: '30 seconds', + }); + + const result = await handle.result(); + t.is(result, 'Data retrieved via provider.'); + + // Verify provider-registered activities appear in history + const { events } = await handle.fetchHistory(); + const activityScheduledEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_SCHEDULED) ?? []; + const activityTypes = activityScheduledEvents.map( + (e) => e?.activityTaskScheduledEventAttributes?.activityType?.name + ); + + t.true( + activityTypes.includes('providerMcp-list-tools'), + `providerMcp-list-tools should be in history, got: ${activityTypes.join(', ')}` + ); + t.true( + activityTypes.includes('providerMcp-call-tool-v2'), + `providerMcp-call-tool-v2 should be in history, got: ${activityTypes.join(', ')}` + ); + }); +}); + +// --- F4: Summary override --- + +test('F4: summaryOverride string is passed through to model activity', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Summary test response')]), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(summaryOverrideStringWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const result = await handle.result(); + t.is(result, 'Summary test response'); + + const { events } = await handle.fetchHistory(); + const activityScheduledEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_SCHEDULED) ?? []; + t.true( + activityScheduledEvents.length >= 1, + `Expected at least 1 activity scheduled event, got ${activityScheduledEvents.length}` + ); + + const modelEvent = activityScheduledEvents.find( + (e) => e?.activityTaskScheduledEventAttributes?.activityType?.name === 'invokeModelActivity' + ); + t.truthy(modelEvent, 'Expected invokeModelActivity in history'); + const userMetadata = (modelEvent as any)?.userMetadata; + t.truthy(userMetadata, 'Expected userMetadata on activity scheduled event'); + if (userMetadata) { + const summaryPayload = userMetadata?.summary; + t.truthy(summaryPayload, 'Expected summary payload in userMetadata'); + if (summaryPayload) { + const summaryText = Buffer.from(summaryPayload.data).toString('utf-8'); + t.true( + summaryText.includes('Custom model summary'), + `Expected summary metadata to contain 'Custom model summary', got: ${summaryText}` + ); + } + } + }); +}); + +// --- F1b: Tracing utilities --- + +test('F1b: Tracing utilities return correct values in workflow context', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('unused')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(tracingUtilitiesWorkflow, { + workflowExecutionTimeout: '30 seconds', + }); + + t.true(result.isInWf, 'isInWorkflow() should return true inside workflow'); + t.is(typeof result.isReplay, 'boolean', 'isReplaying() should return a boolean'); + }); +}); + +// --- F5: Additional model activity parameters --- + +test('F5: Extended model params (priority) pass through without error', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Extended params OK')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(extendedModelParamsWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + t.is(result, 'Extended params OK'); + }); +}); + +// --- F1a: Public testing namespace --- + +test('F1a: Testing namespace exports are importable', async (t) => { + // Verify the testing namespace is accessible from the main package + const testing = await import('@temporalio/openai-agents/lib/testing'); + + t.truthy(testing.FakeModel, 'FakeModel should be exported'); + t.truthy(testing.FakeModelProvider, 'FakeModelProvider should be exported'); + t.truthy(testing.FakeModel, 'FakeModel should be exported (also covers former GeneratorFakeModel)'); + t.truthy( + testing.FakeModelProvider, + 'FakeModelProvider should be exported (also covers former GeneratorFakeModelProvider)' + ); + t.truthy(testing.textResponse, 'textResponse should be exported'); + t.truthy(testing.toolCallResponse, 'toolCallResponse should be exported'); + t.truthy(testing.handoffResponse, 'handoffResponse should be exported'); + t.truthy(testing.multiToolCallResponse, 'multiToolCallResponse should be exported'); + t.truthy(testing.ResponseBuilders, 'ResponseBuilders namespace should be exported'); + + // Verify they work + const response = testing.textResponse('test'); + t.truthy(response.output, 'textResponse should produce a valid ModelResponse'); +}); + +// --- Batch G: Test coverage gaps --- + +// G2/F29: Verify retry policy is applied — retryState proves the server used the policy +test('G2/F29: Retryable 429 error exhausts retry policy (retryState=MAXIMUM_ATTEMPTS_REACHED)', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const error429 = new Error('Rate limit exceeded'); + Object.assign(error429, { + response: { status: 429, headers: { get: () => undefined } }, + }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(error429), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(retryableModelWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { + instanceOf: WorkflowFailedError, + }); + t.truthy(err, 'Workflow should fail after retry policy is exhausted'); + + const { events } = await handle.fetchHistory(); + + // Temporal dev server reports MAX_ATTEMPTS_REACHED regardless of actual retry count; + // asserting retryState proves the retry-policy path was taken (vs NON_RETRYABLE_FAILURE). + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one ACTIVITY_TASK_FAILED event'); + const lastFailed = failedEvents[failedEvents.length - 1]; + t.is( + lastFailed?.activityTaskFailedEventAttributes?.retryState, + 4, // RETRY_STATE_MAXIMUM_ATTEMPTS_REACHED + 'Retry state should be MAXIMUM_ATTEMPTS_REACHED (retry policy applied, not non-retryable)' + ); + }); +}); + +// G3/F32: Parallel tool calls — single model response containing multiple function_calls +function* parallelToolCallGenerator() { + yield multiToolCallResponse([ + { name: 'getWeather', args: { location: 'Tokyo' } }, + { name: 'calculateSum', args: { a: 5, b: 3 } }, + ]); + yield textResponse('Weather is sunny and 5+3=8.'); +} + +test('G3/F32: Parallel tool calls in one model response', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider(() => parallelToolCallGenerator()), + }), + ], + activities: { + getWeather, + calculateSum, + }, + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(multiToolAgentWorkflow, { + args: ['What is the weather in Tokyo and what is 5+3?'], + workflowExecutionTimeout: '30 seconds', + }); + + const result = await handle.result(); + t.is(result, 'Weather is sunny and 5+3=8.'); + + const { events } = await handle.fetchHistory(); + const activityScheduledEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_SCHEDULED) ?? []; + const activityTypes = activityScheduledEvents.map( + (e) => e?.activityTaskScheduledEventAttributes?.activityType?.name + ); + + t.true(activityTypes.includes('getWeather'), `getWeather should be scheduled, got: ${activityTypes.join(', ')}`); + t.true( + activityTypes.includes('calculateSum'), + `calculateSum should be scheduled, got: ${activityTypes.join(', ')}` + ); + + // Both tool calls from a single model response + the final text response = 2 model activity calls + const modelCalls = activityTypes.filter((name) => name === 'invokeModelActivity'); + t.is(modelCalls.length, 2, `Expected 2 invokeModelActivity calls, got ${modelCalls.length}`); + + // Verify parallel scheduling: both tool activities should be scheduled + // in the same workflow task (same workflowTaskCompletedEventId) + const toolEvents = activityScheduledEvents.filter((e) => { + const name = e?.activityTaskScheduledEventAttributes?.activityType?.name; + return name === 'getWeather' || name === 'calculateSum'; + }); + if (toolEvents.length === 2) { + const taskId1 = (toolEvents[0]?.activityTaskScheduledEventAttributes as any)?.workflowTaskCompletedEventId; + const taskId2 = (toolEvents[1]?.activityTaskScheduledEventAttributes as any)?.workflowTaskCompletedEventId; + t.truthy(taskId1, 'Expected workflowTaskCompletedEventId on first tool event'); + t.deepEqual(taskId1, taskId2, 'Both tool activities should be scheduled in the same workflow task (parallel)'); + } + }); +}); + +// G5/F34: Replay smoke test — verify determinism by replaying recorded history +test('G5/F34: Workflow replay succeeds without determinism errors', async (t) => { + const { createWorker, startWorkflow, runReplayHistory } = helpers(t); + + let history: temporal.api.history.v1.IHistory | undefined; + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider(() => toolWorkflowGenerator()), + }), + ], + activities: { + getWeather, + }, + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(toolAgentWorkflow, { + args: ['What is the weather in Tokyo?'], + workflowExecutionTimeout: '30 seconds', + }); + + await handle.result(); + history = (await handle.fetchHistory()) ?? undefined; + }); + + t.truthy(history, 'Should have captured workflow history'); + await runReplayHistory({}, history!); + t.pass('Replay completed without determinism errors'); +}); + +// G6/F-C: Schema-invalid tool input — activityAsTool does not validate args against schema +function* schemaInvalidToolInputGenerator() { + yield toolCallResponse('calculateSum', { x: 5, y: 3 }); + yield textResponse('The calculation returned a result.'); +} + +// --- H1: runConfig.model override reaches activity --- + +test('H1: runConfig.model string override uses override model name in activity', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const provider = new ModelNameCapturingModelProvider(); + const worker = await createWorker({ + plugins: [new OpenAIAgentsPlugin({ modelProvider: provider })], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(runConfigModelOverrideCheckWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + t.is(result, 'captured'); + }); + + t.true( + provider.capturedModelNames.includes('override-model'), + `Expected 'override-model' in activity, got: ${provider.capturedModelNames.join(', ')}` + ); + t.false( + provider.capturedModelNames.includes('original-model'), + `Agent's original model 'original-model' should NOT be used when runConfig.model overrides it, got: ${provider.capturedModelNames.join( + ', ' + )}` + ); +}); + +// --- H2: convertAgent catches raw function tools on handoff agents --- + +test('H2: convertAgent catches raw function tool on handoff agent (Agent handoff)', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Should not reach')]), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(handoffWithRawToolWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { + instanceOf: WorkflowFailedError, + }); + + t.truthy(err, 'Expected WorkflowFailedError for raw tool on handoff agent'); + const fullMessage = String(err!.cause?.cause ?? err!.cause ?? err); + t.true( + fullMessage.includes('raw function') || fullMessage.includes('not a tool'), + `Expected error about raw function tool on handoff agent, got: ${fullMessage}` + ); + }); +}); + +test('H2: convertAgent catches raw function tool on handoff() instance agent', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Should not reach')]), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(handoffInstanceWithRawToolWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { + instanceOf: WorkflowFailedError, + }); + + t.truthy(err, 'Expected WorkflowFailedError for raw tool on handoff() agent'); + const fullMessage = String(err!.cause?.cause ?? err!.cause ?? err); + t.true( + fullMessage.includes('raw function') || fullMessage.includes('not a tool'), + `Expected error about raw function tool, got: ${fullMessage}` + ); + }); +}); + +// --- H5: Handoff mutation --- + +function* handoffMutationGenerator() { + yield handoffResponse('transfer_to_Specialist'); + yield textResponse('Specialist says hello'); +} + +test('H5: convertAgent does not mutate original Handoff objects', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider(() => handoffMutationGenerator()), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(handoffMutationCheckWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const info = JSON.parse(result); + t.false( + info.mutated, + `Original handoff should not be mutated. Model type was '${info.originalModelType}' before, '${info.afterModelType}' after` + ); + }); +}); + +// --- H3: Error classification edge cases --- + +test('H3: 408 Timeout error produces ModelInvocationError.Timeout type', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const error408 = new Error('Request timeout'); + Object.assign(error408, { status: 408, headers: {} }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(error408), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(timeoutErrorWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + t.is( + failure?.applicationFailureInfo?.type, + 'ModelInvocationError.Timeout', + `Expected Timeout type for 408, got: ${failure?.applicationFailureInfo?.type}` + ); + t.falsy(failure?.applicationFailureInfo?.nonRetryable, 'Expected 408 to be classified as retryable'); + }); +}); + +test('H3: 409 Conflict error produces ModelInvocationError.Conflict type', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const error409 = new Error('Conflict'); + Object.assign(error409, { status: 409, headers: {} }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(error409), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(timeoutErrorWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + t.is( + failure?.applicationFailureInfo?.type, + 'ModelInvocationError.Conflict', + `Expected Conflict type for 409, got: ${failure?.applicationFailureInfo?.type}` + ); + t.falsy(failure?.applicationFailureInfo?.nonRetryable, 'Expected 409 to be classified as retryable'); + }); +}); + +test('H3: 422 error produces ModelInvocationError.BadRequest type', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const error422 = new Error('Unprocessable entity'); + Object.assign(error422, { status: 422, headers: {} }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(error422), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(basicAgentWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + t.is( + failure?.applicationFailureInfo?.type, + 'ModelInvocationError.BadRequest', + `Expected BadRequest type for 422, got: ${failure?.applicationFailureInfo?.type}` + ); + t.true(failure?.applicationFailureInfo?.nonRetryable, 'Expected 422 to be classified as non-retryable'); + }); +}); + +test('H3: x-should-retry true overrides non-retryable 400 to retryable', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const error400WithRetry = new Error('Bad request but should retry'); + Object.assign(error400WithRetry, { + status: 400, + headers: { get: (k: string) => (k === 'x-should-retry' ? 'true' : undefined) }, + }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(error400WithRetry), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(xShouldRetryWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + t.falsy( + failure?.applicationFailureInfo?.nonRetryable, + 'Expected x-should-retry:true to make 400 retryable (nonRetryable=false)' + ); + }); +}); + +test('H3: Plain Error without HTTP status is non-retryable', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(new Error('non-HTTP bug')), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(plainErrorWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + t.true( + failure?.applicationFailureInfo?.nonRetryable, + 'Expected plain Error (no HTTP status) to be classified as non-retryable' + ); + + const startedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_STARTED) ?? []; + t.is(startedEvents.length, 1, `Expected 1 attempt for non-retryable, got ${startedEvents.length}`); + }); +}); + +test('H3: x-should-retry false overrides retryable 429 to non-retryable', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const error429NoRetry = new Error('Rate limit but do not retry'); + Object.assign(error429NoRetry, { + status: 429, + headers: { get: (k: string) => (k === 'x-should-retry' ? 'false' : undefined) }, + }); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new ErrorModelProvider(error429NoRetry), + }), + ], + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(basicAgentWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + + const err = await t.throwsAsync(handle.result(), { instanceOf: WorkflowFailedError }); + t.truthy(err); + + const { events } = await handle.fetchHistory(); + const failedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_FAILED) ?? []; + t.true(failedEvents.length >= 1, 'Expected at least one activity failure'); + const failure = failedEvents[0]?.activityTaskFailedEventAttributes?.failure; + t.true(failure?.applicationFailureInfo?.nonRetryable, 'Expected x-should-retry:false to make 429 non-retryable'); + + const startedEvents = events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_STARTED) ?? []; + t.is(startedEvents.length, 1, `Expected 1 attempt for non-retryable, got ${startedEvents.length}`); + }); +}); + +// --- NEW-1: Handoff option preservation --- + +function* handoffCallbackGenerator() { + yield handoffResponse('transfer_to_CallbackSpecialist', { reason: 'weather question' }); + yield textResponse('Specialist handled it!'); +} + +test('NEW-1: Handoff onHandoff callback is preserved through convertAgent', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider(() => handoffCallbackGenerator()), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(handoffOnHandoffCallbackWorkflow, { + args: ['What is the weather?'], + workflowExecutionTimeout: '30 seconds', + }); + + t.true( + result.onHandoffCalled, + 'onHandoff callback should fire when handoff is invoked (convertAgent must preserve it)' + ); + t.true(result.output.includes('Specialist'), `Expected output from specialist, got: ${result.output}`); + }); +}); + +test('NEW-1b: Handoff isEnabled=false is preserved through convertAgent', async (t) => { + const provider = new RequestCapturingModelProvider(); + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [new OpenAIAgentsPlugin({ modelProvider: provider })], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(handoffIsEnabledFalseWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + t.is(result, 'captured'); + }); + + const handoffs = (provider.lastRequest as any)?.handoffs ?? []; + t.is( + handoffs.length, + 0, + `Expected 0 handoffs (isEnabled=false should hide it), got ${handoffs.length}: ${handoffs + .map((h: any) => h.toolName) + .join(', ')}` + ); +}); + +test('NEW-1c: Handoff inputJsonSchema is preserved through convertAgent', async (t) => { + const provider = new RequestCapturingModelProvider(); + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [new OpenAIAgentsPlugin({ modelProvider: provider })], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(handoffWithCustomSchemaWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + t.is(result, 'captured'); + }); + + const handoffs = (provider.lastRequest as any)?.handoffs ?? []; + t.true(handoffs.length >= 1, 'Expected at least 1 handoff'); + const schema = handoffs[0]?.inputJsonSchema; + t.truthy( + schema?.properties?.reason, + `Expected inputJsonSchema to have 'reason' property from custom schema, got: ${JSON.stringify(schema)}` + ); +}); + +test('G6/F-C: Schema-invalid tool input is passed through without validation', async (t) => { + const { createWorker, startWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider(() => schemaInvalidToolInputGenerator()), + }), + ], + activities: { + getWeather, + calculateSum, + }, + }); + + await worker.runUntil(async () => { + const handle = await startWorkflow(multiToolAgentWorkflow, { + args: ['Calculate something'], + workflowExecutionTimeout: '30 seconds', + }); + + // activityAsTool does not validate tool arguments against the JSON schema. + // With { x: 5, y: 3 } instead of { a: number, b: number }, the calculateSum + // activity receives undefined for a and b, producing NaN (serialized as null). + // agents-core feeds the result back to the model, which produces a text response. + const result = await handle.result(); + t.is(result, 'The calculation returned a result.'); + + const { events } = await handle.fetchHistory(); + const activityScheduledEvents = + events?.filter((e) => e.eventType === EventType.EVENT_TYPE_ACTIVITY_TASK_SCHEDULED) ?? []; + const activityTypes = activityScheduledEvents.map( + (e) => e?.activityTaskScheduledEventAttributes?.activityType?.name + ); + t.true( + activityTypes.includes('calculateSum'), + `calculateSum should be scheduled even with invalid input, got: ${activityTypes.join(', ')}` + ); + }); +}); + +// --- Wire contract tests --- + +test('Wire contract: prompt and tracing survive round trip through wire projection', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const provider = new RequestCapturingModelProvider(); + const worker = await createWorker({ + plugins: [new OpenAIAgentsPlugin({ modelProvider: provider })], + }); + + let result: Awaited>; + await worker.runUntil(async () => { + result = await executeWorkflow(wireRoundTripWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + }); + + // --- Request side (captured by activity-side model) --- + const req = provider.lastRequest as any; + t.truthy(req, 'Expected model to have received a request'); + + // Prompt field with nested structure should survive the wire + t.truthy(req?.prompt, 'Expected prompt field to survive round trip'); + t.is(req?.prompt?.promptId, 'pt_round_trip', `Expected promptId 'pt_round_trip', got: ${req?.prompt?.promptId}`); + t.deepEqual(req?.prompt?.variables, { key: 'value', nested: { deep: true } }); + + // Tracing field should survive (default is false when tracing is disabled in workflow) + t.true('tracing' in req, 'Expected tracing field to be present in wire request'); + + // __wireVersion is stripped by fromSerializedModelRequest before reaching the model + t.false('__wireVersion' in req, '__wireVersion should be stripped before reaching the model'); + + // --- Response side (returned from activity to workflow) --- + t.is(result!.usageInputTokens, 10, `Expected usage.inputTokens=10, got: ${result!.usageInputTokens}`); + t.is(result!.usageOutputTokens, 8, `Expected usage.outputTokens=8, got: ${result!.usageOutputTokens}`); + t.is(result!.outputLength, 1, `Expected output array length=1, got: ${result!.outputLength}`); + t.false(result!.hasWireVersion, '__wireVersion should be stripped from response by fromSerializedModelResponse'); +}); + +// Stripping is a structural guarantee: toSerializedModelRequest uses additive projection +// (only copies listed fields), so unlisted fields like `signal` can never leak through. +// This integration test verifies the end-to-end absence on the activity-side model request. +test('Wire contract: signal is stripped from wire request', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const provider = new RequestCapturingModelProvider(); + const worker = await createWorker({ + plugins: [new OpenAIAgentsPlugin({ modelProvider: provider })], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(wireStrippingCheckWorkflow, { + args: ['Hello'], + workflowExecutionTimeout: '30 seconds', + }); + t.is(result, 'captured'); + }); + + const req = provider.lastRequest as any; + t.truthy(req, 'Expected model to have received a request'); + t.false('signal' in req, 'signal should be stripped from wire request (AbortSignal is not serializable)'); +}); + +test('Wire contract: version mismatch throws non-retryable WireVersionMismatch error', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Should not reach')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(wireVersionMismatchWorkflow, { + workflowExecutionTimeout: '30 seconds', + }); + + t.is(result.errorType, 'WireVersionMismatch', `Expected WireVersionMismatch error type, got: ${result.errorType}`); + t.true( + result.errorMessage.includes('wire version mismatch'), + `Expected descriptive message about version mismatch, got: ${result.errorMessage}` + ); + }); +}); + +test('Wire contract: SerializedModelRequest shape snapshot', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('unused')]), + }), + ], + }); + + await worker.runUntil(async () => { + const actualKeys = await executeWorkflow(wireRequestSnapshotWorkflow, { + workflowExecutionTimeout: '30 seconds', + }); + + const expectedKeys = [ + '__wireVersion', + 'conversationId', + 'handoffs', + 'input', + 'modelSettings', + 'outputType', + 'overridePromptModel', + 'previousResponseId', + 'prompt', + 'systemInstructions', + 'tools', + 'toolsExplicitlyProvided', + 'tracing', + ]; + t.deepEqual( + actualKeys, + expectedKeys, + `SerializedModelRequest shape changed — bump WIRE_VERSION and update this snapshot. Got: ${actualKeys.join(', ')}` + ); + t.true(actualKeys.includes('__wireVersion'), 'Wire version key must be present'); + t.false(actualKeys.includes('signal'), 'signal must not be on wire (AbortSignal is not serializable)'); + }); +}); + +test('Wire contract: SerializedModelResponse shape snapshot', async (t) => { + const response = { + usage: { + requests: 1, + inputTokens: 10, + outputTokens: 5, + totalTokens: 15, + inputTokensDetails: [], + outputTokensDetails: [], + }, + output: [{ type: 'message', content: 'test' }], + responseId: 'resp_123', + providerData: { key: 'value' }, + } as any; + + const wire = toSerializedModelResponse(response); + const actualKeys = Object.keys(wire).sort(); + + const expectedKeys = ['__wireVersion', 'output', 'providerData', 'responseId', 'usage']; + t.deepEqual( + actualKeys, + expectedKeys, + `SerializedModelResponse shape changed — bump WIRE_VERSION and update this snapshot. Got: ${actualKeys.join(', ')}` + ); + t.is(wire.__wireVersion, 1, 'Wire version should be 1'); +}); + +// Upstream-drift detection: verifies that all fields we project onto the wire are JSON-safe. +// If upstream changes a field type from a JSON-safe primitive to a class/Date/Map, this test +// fails, signaling that WIRE_VERSION needs a bump and the projection needs updating. +test('Wire contract: upstream ModelRequest fields survive JSON round-trip (drift detection)', async (t) => { + const sampleRequest = { + systemInstructions: 'You are a helpful assistant.', + input: [{ role: 'user', content: [{ type: 'input_text', text: 'Hello' }], providerData: {} }], + modelSettings: { temperature: 0.7, maxTokens: 100, topP: 0.9 }, + tools: [{ type: 'function', name: 'get_weather', parameters: { type: 'object', properties: {} }, strict: true }], + toolsExplicitlyProvided: true, + outputType: { type: 'text' }, + handoffs: [{ toolName: 'transfer_to_agent', toolDescription: 'Transfer', strictJsonSchema: true }], + prompt: { promptId: 'pt_drift', version: 'v1', variables: { city: 'NYC' } }, + previousResponseId: 'resp_prev_001', + conversationId: 'conv_drift_001', + tracing: false, + overridePromptModel: false, + }; + + const roundTripped = JSON.parse(JSON.stringify(sampleRequest)); + t.deepEqual( + roundTripped, + sampleRequest, + 'All upstream ModelRequest field values must survive JSON round-trip. ' + + 'If this fails, upstream introduced a non-JSON-safe field — bump WIRE_VERSION and update the projection.' + ); +}); + +test('Wire contract: upstream ModelResponse fields survive JSON round-trip (drift detection)', async (t) => { + const sampleResponse = { + usage: { + requests: 1, + inputTokens: 42, + outputTokens: 15, + totalTokens: 57, + inputTokensDetails: [{ cachedTokens: 10 }], + outputTokensDetails: [{ reasoningTokens: 5 }], + }, + output: [ + { + type: 'message', + role: 'assistant', + status: 'completed', + content: [{ type: 'output_text', text: 'Hello!' }], + id: 'msg_drift_001', + providerData: { model: 'gpt-4o' }, + }, + ], + responseId: 'resp_drift_001', + providerData: { model: 'gpt-4o', latencyMs: 150 }, + } as any; + + const wire = toSerializedModelResponse(sampleResponse); + const roundTripped = JSON.parse(JSON.stringify(wire)); + t.deepEqual( + roundTripped, + wire, + 'All SerializedModelResponse field values must survive JSON round-trip. ' + + 'If this fails, upstream introduced a non-JSON-safe field — bump WIRE_VERSION and update the projection.' + ); +}); + +// --- T1: Tracing span capture --- + +test('T1: OpenAI Agents tracing path is active and produces trace/span events', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Traced response')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(tracingSpanCaptureWorkflow, { + workflowExecutionTimeout: '30 seconds', + }); + + t.true(result.traceIds.length > 0, 'Should capture at least one trace'); + t.true(result.spanTypes.includes('agent'), 'Should have an agent span'); + t.true( + result.spanTypes.includes('generation') || result.spanTypes.includes('response'), + 'Should have a generation or response span' + ); + }); +}); + +// --- T2: Replay-safety test --- + +test('T2: Tracing is replay-safe — no NondeterminismError when workflow replays', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + maxCachedWorkflows: 0, + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('Replayed response')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(replaySafetyWorkflow, { + workflowExecutionTimeout: '30 seconds', + }); + + t.true( + result.replayDetected, + 'Workflow should have detected replay (proves maxCachedWorkflows: 0 forced a replay)' + ); + t.true(result.traceIds.length > 0, 'Should capture at least one trace during non-replay execution'); + t.true(result.spanTypes.includes('agent'), 'Should have an agent span'); + }); +}); + +// --- CLEANUP-6: Handoff-clone snapshot test --- + +test('CLEANUP-6: Handoff clone preserves all public fields through convertAgent', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([textResponse('unused')]), + }), + ], + }); + + await worker.runUntil(async () => { + const result = await executeWorkflow(handoffCloneSnapshotWorkflow, { + workflowExecutionTimeout: '30 seconds', + }); + + for (const [field, preserved] of Object.entries(result.fieldsPreserved)) { + t.true(preserved, `Handoff clone field '${field}' should be preserved`); + } + t.true(result.agentReplaced, 'Handoff clone agent should be replaced with converted agent'); + t.true(result.onInvokeHandoffReplaced, 'Handoff clone onInvokeHandoff should be replaced with wrapper'); + t.true(result.prototypeMatch, 'Handoff clone prototype should match original'); + }); +}); + +// --- T3: Concurrent-workflow tracing isolation test --- + +test('T3: Concurrent workflows on same worker have isolated trace spans', async (t) => { + const { createWorker, executeWorkflow } = helpers(t); + + const worker = await createWorker({ + plugins: [ + new OpenAIAgentsPlugin({ + modelProvider: new FakeModelProvider([ + textResponse('Isolated response 1'), + textResponse('Isolated response 2'), + ]), + }), + ], + }); + + await worker.runUntil(async () => { + const [result1, result2] = await Promise.all([ + executeWorkflow(concurrentTracingIsolationWorkflow, { + workflowId: 'isolation-wf-1', + workflowExecutionTimeout: '30 seconds', + }), + executeWorkflow(concurrentTracingIsolationWorkflow, { + workflowId: 'isolation-wf-2', + workflowExecutionTimeout: '30 seconds', + }), + ]); + + // Each workflow reports its own ID + t.is(result1.workflowId, 'isolation-wf-1'); + t.is(result2.workflowId, 'isolation-wf-2'); + + // Both workflows captured traces + t.true(result1.traceIds.length > 0, 'Workflow 1 should capture at least one trace'); + t.true(result2.traceIds.length > 0, 'Workflow 2 should capture at least one trace'); + + // Both workflows captured spans + t.true(result1.spanTypes.includes('agent'), 'Workflow 1 should have an agent span'); + t.true(result2.spanTypes.includes('agent'), 'Workflow 2 should have an agent span'); + + // No cross-pollination: trace IDs should be disjoint between the two workflows + const sharedTraces = result1.traceIds.filter((id) => result2.traceIds.includes(id)); + t.is(sharedTraces.length, 0, 'No shared trace IDs between concurrent workflows'); + }); +}); diff --git a/packages/test/src/workflows/openai-agents.ts b/packages/test/src/workflows/openai-agents.ts new file mode 100644 index 000000000..e523cfb07 --- /dev/null +++ b/packages/test/src/workflows/openai-agents.ts @@ -0,0 +1,1259 @@ +// Test workflows for OpenAI Agents SDK integration +// eslint-disable-next-line import/no-unassigned-import +import '@temporalio/openai-agents/lib/load-polyfills'; + +import { Agent, handoff, tool, addTraceProcessor, type ModelResponse } from '@openai/agents-core'; +import { z } from 'zod'; +import { webSearchTool } from '@openai/agents-openai'; +import { ApplicationFailure, proxyActivities, workflowInfo } from '@temporalio/workflow'; +import { + activityAsTool, + TemporalOpenAIRunner, + statelessMcpServer, + isInWorkflow, + isReplaying, + toSerializedModelRequest, + type TemporalMCPServer, +} from '@temporalio/openai-agents/lib/workflow'; +import type * as activities from '../activities/openai-agents'; + +/** + * Basic workflow that creates an agent and runs it with a prompt. + * The agent's model is automatically replaced with a ActivityBackedModel + * by the runner, so LLM calls go through activities. + */ +export async function basicAgentWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'TestAgent', + instructions: 'You are a helpful assistant.', + model: 'gpt-4o-mini', + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +/** + * Workflow that uses an agent with a tool backed by a Temporal activity. + * The getWeather tool is wrapped via activityAsTool(), so when the model + * requests a tool call, it schedules the getWeather activity. + */ +export async function toolAgentWorkflow(prompt: string): Promise { + const weatherTool = activityAsTool<{ location: string }, Awaited>>({ + name: 'getWeather', + description: 'Get the weather for a given city', + parameters: { + type: 'object', + properties: { + location: { type: 'string', description: 'The city name' }, + }, + required: ['location'], + additionalProperties: false, + }, + // Type reference only — not called in the workflow + activityFn: null! as typeof activities.getWeather, + }); + + const agent = new Agent({ + name: 'WeatherAgent', + instructions: 'You are a weather assistant. Use the getWeather tool when asked about weather.', + model: 'gpt-4o-mini', + tools: [weatherTool], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt, { maxTurns: 5 }); + return result.finalOutput ?? ''; +} + +/** + * Workflow that tests agent handoffs. The TriageAgent hands off to the + * WeatherSpecialist when it receives a weather-related question. + */ +export async function handoffAgentWorkflow(question: string): Promise { + const weatherSpecialist = new Agent({ + name: 'WeatherSpecialist', + instructions: 'You are a weather specialist.', + handoffDescription: 'Weather questions', + model: 'fake-model', + }); + + const triageAgent = new Agent({ + name: 'TriageAgent', + instructions: 'Route to specialists.', + model: 'fake-model', + handoffs: [weatherSpecialist], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(triageAgent, question, { maxTurns: 10 }); + return result.finalOutput ?? ''; +} + +/** + * Workflow that tests the maxTurns option. Returns output and turn count. + */ +export async function maxTurnsAgentWorkflow( + prompt: string, + maxTurns: number +): Promise<{ output: string; turnCount: number }> { + const agent = new Agent({ + name: 'TurnsAgent', + instructions: 'You are a helpful assistant.', + model: 'fake-model', + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt, { maxTurns }); + return { output: result.finalOutput ?? '', turnCount: result.rawResponses.length }; +} + +/** + * Workflow with an agent that has multiple tools (getWeather + calculateSum). + * Tests that multiple activity-backed tools work together. + */ +export async function multiToolAgentWorkflow(prompt: string): Promise { + const weatherTool = activityAsTool<{ location: string }, Awaited>>({ + name: 'getWeather', + description: 'Get the weather for a given city', + parameters: { + type: 'object', + properties: { + location: { type: 'string', description: 'The city name' }, + }, + required: ['location'], + additionalProperties: false, + }, + activityFn: null! as typeof activities.getWeather, + }); + + const sumTool = activityAsTool<{ a: number; b: number }, Awaited>>({ + name: 'calculateSum', + description: 'Calculate the sum of two numbers', + parameters: { + type: 'object', + properties: { + a: { type: 'number', description: 'First number' }, + b: { type: 'number', description: 'Second number' }, + }, + required: ['a', 'b'], + additionalProperties: false, + }, + activityFn: null! as typeof activities.calculateSum, + }); + + const agent = new Agent({ + name: 'MultiToolAgent', + instructions: 'Use tools to answer questions.', + model: 'fake-model', + tools: [weatherTool, sumTool], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt, { maxTurns: 10 }); + return result.finalOutput ?? ''; +} + +/** + * Workflow that passes typed context through the runner. + */ +interface UserContext { + userId: string; + preferences: { language: string }; +} + +export async function contextAgentWorkflow(prompt: string, userId: string): Promise { + const agent = new Agent({ + name: 'ContextAgent', + instructions: 'You are a helpful assistant.', + model: 'fake-model', + }); + + const context: UserContext = { + userId, + preferences: { language: 'en' }, + }; + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt, { context }); + return result.finalOutput ?? ''; +} + +/** + * Workflow that passes a raw function as a tool instead of using activityAsTool(). + * convertAgent should reject this with a clear error. + */ +export async function rawFunctionToolWorkflow(question: string): Promise { + const rawFunction = async ({ location }: { location: string }) => { + return { weather: 'sunny', location }; + }; + + const agent = new Agent({ + name: 'RawToolAgent', + instructions: 'Test agent with raw function tool.', + tools: [rawFunction as any], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, question); + return result.finalOutput ?? ''; +} + +/** + * Workflow that passes runConfig.model as a string to override the agent's model. + * The string model name should be wrapped with ActivityBackedModel by the runner. + */ +export async function runConfigStringModelWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'ModelOverrideAgent', + instructions: 'You are a helpful assistant.', + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt, { runConfig: { model: 'gpt-4o-mini' } }); + return result.finalOutput ?? ''; +} + +/** + * Workflow that uses local activities for model invocations. + * The model call should appear as a local activity marker in the history. + */ +export async function localActivityAgentWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'LocalActivityAgent', + instructions: 'You are a helpful assistant.', + model: 'gpt-4o-mini', + }); + + const runner = new TemporalOpenAIRunner({ useLocalActivity: true, startToCloseTimeout: '60s' }); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +/** + * Workflow with explicit retry policy for testing Temporal-level activity retries. + * Uses maximumAttempts: 3 so if the model always throws a retryable error, + * Temporal retries and then fails after exhausting attempts. + */ +export async function retryableModelWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'RetryAgent', + instructions: 'You are a helpful assistant.', + model: 'gpt-4o-mini', + }); + + const runner = new TemporalOpenAIRunner({ + startToCloseTimeout: '10s', + retryPolicy: { maximumAttempts: 3, initialInterval: '100ms' }, + }); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +/** + * Workflow where the agent's instructions function throws a plain Error. + * This triggers the runner's catch block which wraps non-Temporal errors + * as ApplicationFailure with type 'AgentsWorkflowError'. + */ +export async function agentsWorkflowErrorWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'ThrowingAgent', + instructions: () => { + throw new Error('Instructions evaluation failed'); + }, + model: 'fake-model', + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +/** + * Workflow that uses an agent with a stateless MCP server. + * The MCP server delegates listTools and callTool to Temporal activities. + */ +export async function mcpAgentWorkflow(prompt: string): Promise { + const mcpServer = statelessMcpServer('testMcp'); + + const agent = new Agent({ + name: 'McpAgent', + instructions: 'You have access to MCP tools.', + model: 'gpt-4o-mini', + mcpServers: [mcpServer], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt, { maxTurns: 5 }); + return result.finalOutput ?? ''; +} + +/** + * Workflow that uses an agent with a built-in hosted tool (webSearchTool). + * Verifies that hosted tools pass through without serialization error. + */ +export async function builtInToolAgentWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'SearchAgent', + instructions: 'You have web search.', + model: 'gpt-4o-mini', + tools: [webSearchTool()], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +// --- Regression exercise workflows --- + +/** + * F1: Uses handoff(agent) wrapper (Handoff instance, not raw Agent in handoffs array). + * If F1 regresses, the Handoff's inner agent won't get its model replaced with + * ActivityBackedModel, so its model call hits DummyModel and throws. + */ +export async function handoffInstanceWorkflow(question: string): Promise { + const weatherSpecialist = new Agent({ + name: 'WeatherSpecialist', + instructions: 'You are a weather specialist.', + handoffDescription: 'Weather questions', + model: 'fake-model', + }); + + const triageAgent = new Agent({ + name: 'TriageAgent', + instructions: 'Route to specialists.', + model: 'fake-model', + handoffs: [handoff(weatherSpecialist)], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(triageAgent, question, { maxTurns: 5 }); + return result.finalOutput ?? ''; +} + +/** + * F2: Two agents with cyclic handoff references (A → B → A). + * If the bug exists, convertAgent recurses infinitely and crashes with stack overflow. + */ +export async function cyclicHandoffWorkflow(prompt: string): Promise { + const agentA = new Agent({ + name: 'AgentA', + instructions: 'You are agent A.', + model: 'fake-model', + }); + const agentB = new Agent({ + name: 'AgentB', + instructions: 'You are agent B.', + model: 'fake-model', + }); + (agentA as any).handoffs = [agentB]; + (agentB as any).handoffs = [agentA]; + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agentA, prompt); + return result.finalOutput ?? ''; +} + +/** + * F3: Agent with a prompt template. The prompt field on ModelRequest must + * survive serialization through ActivityBackedModel. + * If the bug exists, prompt is stripped during destructuring and the model + * never receives it. + */ +export async function promptFieldWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'PromptAgent', + instructions: 'You are a helpful assistant.', + model: 'fake-model', + prompt: { + promptId: 'pt_test', + variables: {}, + }, + } as any); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +/** + * F4: Agent with a non-string model (object instead of string). + * If the bug exists, the object silently becomes 'default'. + * If fixed, the runner throws immediately with a clear error. + */ +export async function nonStringModelWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'BadModelAgent', + instructions: 'You are a helpful assistant.', + model: {} as any, + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +/** + * F13: Error whose .cause chain contains a TemporalFailure (ApplicationFailure). + * Simulates agents-core wrapping a Temporal failure in its own exception. + * If the bug exists, the runner wraps it as AgentsWorkflowError (hiding the original). + * If fixed, the runner walks .cause and re-throws the inner TemporalFailure. + */ +export async function wrappedTemporalFailureWorkflow(prompt: string): Promise { + const inner = ApplicationFailure.create({ + message: 'Inner temporal failure', + type: 'InnerFailureType', + nonRetryable: true, + }); + const wrapper = new Error('Agents wrapper error'); + wrapper.cause = inner; + + const agent = new Agent({ + name: 'WrapperAgent', + instructions: () => { + throw wrapper; + }, + model: 'fake-model', + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +/** + * C1/F7: Catches the runner's error INSIDE the workflow to inspect the + * error shape. The runner throws ApplicationFailure with the original + * error as cause (no intermediate AgentsWorkflowError wrapper). + */ +export async function agentsWorkflowErrorClassCheckWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'ThrowingAgent', + instructions: () => { + throw new Error('Instructions evaluation failed'); + }, + model: 'fake-model', + }); + + const runner = new TemporalOpenAIRunner(); + try { + await runner.run(agent, prompt); + return 'no-error'; + } catch (e: any) { + return JSON.stringify({ + errorName: e?.name ?? 'unknown', + causeName: e?.cause?.name ?? 'none', + }); + } +} + +/** + * D3/F11: Tests EventTarget polyfill listener error isolation. + * If the polyfill doesn't wrap listeners in try/catch, the first throwing listener + * prevents subsequent listeners from firing and propagates the error. + */ +export async function eventTargetListenerErrorWorkflow(): Promise<{ + secondListenerCalled: boolean; + dispatchSucceeded: boolean; +}> { + const ET = (globalThis as any).EventTarget; + const Evt = (globalThis as any).Event; + const et = new ET(); + let secondCalled = false; + et.addEventListener('test', () => { + throw new Error('listener error'); + }); + et.addEventListener('test', () => { + secondCalled = true; + }); + let succeeded = false; + try { + et.dispatchEvent(new Evt('test')); + succeeded = true; + } catch { + succeeded = false; + } + return { secondListenerCalled: secondCalled, dispatchSucceeded: succeeded }; +} + +/** + * D4/F12: Tests EventTarget polyfill sets event.target and event.currentTarget. + * If the polyfill doesn't set these fields, listeners see undefined. + */ +export async function eventTargetTargetFieldWorkflow(): Promise<{ + targetDefined: boolean; + currentTargetDefined: boolean; +}> { + const ET = (globalThis as any).EventTarget; + const Evt = (globalThis as any).Event; + const et = new ET(); + let targetVal: unknown; + let currentTargetVal: unknown; + et.addEventListener('test', (e: any) => { + targetVal = e.target; + currentTargetVal = e.currentTarget; + }); + et.dispatchEvent(new Evt('test')); + return { + targetDefined: targetVal !== undefined && targetVal !== null, + currentTargetDefined: currentTargetVal !== undefined && currentTargetVal !== null, + }; +} + +/** + * D7/F16: Tests Date field serialization in ModelResponse. + * Temporal's JSON converter coerces Date objects to ISO strings. + */ +export async function dateInResponseWorkflow(prompt: string): Promise<{ + dateFieldType: string; + hasDateField: boolean; +}> { + const agent = new Agent({ + name: 'DateAgent', + instructions: 'You are a helpful assistant.', + model: 'gpt-4o-mini', + }); + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt); + const raw = result.rawResponses[0] as any; + const dateField = raw?.createdAt; + return { + dateFieldType: dateField instanceof Date ? 'Date' : typeof dateField, + hasDateField: dateField !== undefined, + }; +} + +/** + * C3/F27: Calls runner.runStreamed() which no longer exists on TemporalOpenAIRunner. + * The method was removed — TS catches this at compile time. This workflow exercises + * the runtime path (via `as any`) to verify it still fails cleanly. + */ +export async function runStreamedWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'StreamAgent', + instructions: 'You are a helpful assistant.', + model: 'gpt-4o-mini', + }); + + const runner = new TemporalOpenAIRunner(); + const stream = await (runner as any).runStreamed(agent, prompt); + let output = ''; + for await (const event of stream) { + if (event?.data?.text) output += event.data.text; + } + return output || ''; +} + +/** + * E3/F20: Uses tool() from agents-core directly instead of activityAsTool(). + * Deterministic tool() products run inline in the workflow — no activity overhead. + * The tool's execute callback must be deterministic (no I/O, no randomness). + */ +export async function directToolFactoryWorkflow(prompt: string): Promise { + const inlineTool = tool({ + name: 'inlineTool', + description: 'A deterministic tool that runs inline in the workflow', + parameters: { + type: 'object' as const, + properties: { + input: { type: 'string' }, + }, + required: ['input'] as const, + additionalProperties: false as const, + }, + execute: async (_ctx, args) => { + return `processed: ${(args as any).input}`; + }, + }); + + const agent = new Agent({ + name: 'DirectToolAgent', + instructions: 'Test agent with direct tool() factory tool.', + model: 'fake-model', + tools: [inlineTool], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +// --- F2: MCP prompts --- + +/** + * F2: Workflow that tests MCP listPrompts and getPrompt via activities. + * Returns the prompt data directly to verify the activities were called. + */ +export async function mcpPromptsWorkflow(_prompt: string): Promise<{ + prompts: unknown[]; + promptResult: unknown; +}> { + const mcpServer = statelessMcpServer('testMcp') as TemporalMCPServer; + + const prompts = await mcpServer.listPrompts(); + const promptResult = await mcpServer.getPrompt('greeting', { name: 'World' }); + + return { prompts, promptResult }; +} + +/** + * F2: Workflow that tests MCP factoryArgument passthrough. + * The factoryArgument should be included in every activity call. + */ +export async function mcpFactoryArgWorkflow(prompt: string): Promise { + const mcpServer = statelessMcpServer('testMcp', { + factoryArgument: { tenantId: 'tenant-42' }, + }); + + const agent = new Agent({ + name: 'McpFactoryArgAgent', + instructions: 'You have access to MCP tools.', + model: 'gpt-4o-mini', + mcpServers: [mcpServer], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt, { maxTurns: 5 }); + return result.finalOutput ?? ''; +} + +/** + * F2: Workflow that uses MCP server - same as mcpAgentWorkflow but intended to + * be used with StatelessMCPServerProvider-registered activities on worker side. + */ +export async function mcpProviderWorkflow(prompt: string): Promise { + const mcpServer = statelessMcpServer('providerMcp'); + + const agent = new Agent({ + name: 'McpProviderAgent', + instructions: 'You have access to MCP tools.', + model: 'gpt-4o-mini', + mcpServers: [mcpServer], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt, { maxTurns: 5 }); + return result.finalOutput ?? ''; +} + +// --- F4: Summary override --- + +/** + * F4: Workflow that uses summaryOverride string in model params. + */ +export async function summaryOverrideStringWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'SummaryAgent', + instructions: 'You are a helpful assistant.', + model: 'gpt-4o-mini', + }); + + const runner = new TemporalOpenAIRunner({ + summaryOverride: 'Custom model summary', + }); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +// --- F1b: Tracing utilities --- + +/** + * F1b: Workflow that verifies tracing utilities return expected values + * when called from workflow context. + */ +export async function tracingUtilitiesWorkflow(): Promise<{ + isInWf: boolean; + isReplay: boolean; +}> { + return { + isInWf: isInWorkflow(), + isReplay: isReplaying(), + }; +} + +// --- H1: runConfig.model override verification --- + +/** + * H1: Agent with explicit model 'original-model'. The test overrides via runConfig.model + * to 'override-model'. If the override works, the activity receives 'override-model'. + * If broken, the activity receives 'original-model' (convertAgent ignores the override). + */ +export async function runConfigModelOverrideCheckWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'OverrideCheckAgent', + instructions: 'You are a helpful assistant.', + model: 'original-model', + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt, { runConfig: { model: 'override-model' } }); + return result.finalOutput ?? ''; +} + +// --- H2: convertAgent recursion into handoffs --- + +/** + * H2: Handoff agent has a raw function tool that should be rejected. + * If convertAgent doesn't recurse, the raw tool on the handoff agent is missed. + */ +export async function handoffWithRawToolWorkflow(prompt: string): Promise { + const specialist = new Agent({ + name: 'SpecialistWithRawTool', + instructions: 'You are a specialist.', + model: 'fake-model', + tools: [(() => 'raw result') as any], + }); + + const triageAgent = new Agent({ + name: 'TriageAgent', + instructions: 'Route to specialists.', + model: 'fake-model', + handoffs: [specialist], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(triageAgent, prompt); + return result.finalOutput ?? ''; +} + +/** + * H2b: Same as H2 but using handoff() wrapper instance. + */ +export async function handoffInstanceWithRawToolWorkflow(prompt: string): Promise { + const specialist = new Agent({ + name: 'SpecialistWithRawTool', + instructions: 'You are a specialist.', + model: 'fake-model', + tools: [(() => 'raw result') as any], + }); + + const triageAgent = new Agent({ + name: 'TriageAgent', + instructions: 'Route to specialists.', + model: 'fake-model', + handoffs: [handoff(specialist)], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(triageAgent, prompt); + return result.finalOutput ?? ''; +} + +// --- H5: Handoff mutation check --- + +/** + * H5: Tests that convertAgent does not mutate the original Handoff object. + * Creates a handoff, runs the workflow, then checks if the original handoff's + * agent still has its original model (not a ActivityBackedModel). + */ +export async function handoffMutationCheckWorkflow(prompt: string): Promise { + const specialist = new Agent({ + name: 'Specialist', + instructions: 'You are a specialist.', + model: 'specialist-model', + }); + + const handoffObj = handoff(specialist); + const originalAgentModel = typeof (handoffObj.agent as any).model; + + const triageAgent = new Agent({ + name: 'TriageAgent', + instructions: 'Route to specialists.', + model: 'fake-model', + handoffs: [handoffObj], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(triageAgent, prompt); + + const afterAgentModel = typeof (handoffObj.agent as any).model; + + return JSON.stringify({ + output: result.finalOutput ?? '', + originalModelType: originalAgentModel, + afterModelType: afterAgentModel, + mutated: originalAgentModel !== afterAgentModel, + }); +} + +// --- NEW-1: Handoff option preservation --- + +/** + * NEW-1: Workflow with handoff that has onHandoff callback. + * If convertAgent drops onInvokeHandoff, the callback never fires. + */ +export async function handoffOnHandoffCallbackWorkflow(prompt: string): Promise<{ + output: string; + onHandoffCalled: boolean; +}> { + let onHandoffCalled = false; + + const specialist = new Agent({ + name: 'CallbackSpecialist', + instructions: 'You are a specialist.', + model: 'fake-model', + }); + + const handoffObj = handoff(specialist, { + onHandoff: async (_ctx: any, _input?: { reason: string }) => { + onHandoffCalled = true; + }, + inputType: z.object({ + reason: z.string().describe('Reason for handoff'), + }), + }); + + const triageAgent = new Agent({ + name: 'TriageAgent', + instructions: 'Route to specialists.', + model: 'fake-model', + handoffs: [handoffObj], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(triageAgent, prompt, { maxTurns: 5 }); + + return { + output: result.finalOutput ?? '', + onHandoffCalled, + }; +} + +/** + * NEW-1b: Handoff with isEnabled=false. If convertAgent drops isEnabled, + * the handoff defaults to always-enabled and appears in the model's tool list. + */ +export async function handoffIsEnabledFalseWorkflow(prompt: string): Promise { + const specialist = new Agent({ + name: 'DisabledSpecialist', + instructions: 'You are a specialist.', + model: 'fake-model', + }); + + const handoffObj = handoff(specialist, { + isEnabled: false, + }); + + const triageAgent = new Agent({ + name: 'TriageAgent', + instructions: 'Route to specialists.', + model: 'fake-model', + handoffs: [handoffObj], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(triageAgent, prompt, { maxTurns: 3 }); + return result.finalOutput ?? ''; +} + +/** + * NEW-1c: Handoff with custom inputJsonSchema. If convertAgent drops the schema, + * it reverts to the default empty object schema. + */ +export async function handoffWithCustomSchemaWorkflow(prompt: string): Promise { + const specialist = new Agent({ + name: 'SchemaSpecialist', + instructions: 'You are a specialist.', + model: 'fake-model', + }); + + const handoffObj = handoff(specialist); + (handoffObj as any).inputJsonSchema = { + type: 'object', + properties: { + reason: { type: 'string', description: 'Reason for handoff' }, + }, + required: ['reason'], + additionalProperties: false, + }; + (handoffObj as any).strictJsonSchema = true; + + const triageAgent = new Agent({ + name: 'TriageAgent', + instructions: 'Route to specialists.', + model: 'fake-model', + handoffs: [handoffObj], + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(triageAgent, prompt, { maxTurns: 3 }); + return result.finalOutput ?? ''; +} + +// --- H3: Error classification edge cases --- + +/** + * H3: Workflow for testing 408 Timeout error classification. + */ +export async function timeoutErrorWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'TimeoutAgent', + instructions: 'You are a helpful assistant.', + model: 'gpt-4o-mini', + }); + + const runner = new TemporalOpenAIRunner({ + startToCloseTimeout: '10s', + retryPolicy: { maximumAttempts: 1 }, + }); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +/** + * H3: Workflow for testing x-should-retry header override. + */ +export async function xShouldRetryWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'XShouldRetryAgent', + instructions: 'You are a helpful assistant.', + model: 'gpt-4o-mini', + }); + + const runner = new TemporalOpenAIRunner({ + startToCloseTimeout: '10s', + retryPolicy: { maximumAttempts: 1 }, + }); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +// --- H3: Plain error (no HTTP status) --- + +/** + * H3: Workflow for testing that a plain Error without HTTP status/response + * is classified as non-retryable. + */ +export async function plainErrorWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'PlainErrorAgent', + instructions: 'You are a helpful assistant.', + model: 'gpt-4o-mini', + }); + + const runner = new TemporalOpenAIRunner({ + startToCloseTimeout: '10s', + retryPolicy: { maximumAttempts: 3, initialInterval: '100ms' }, + }); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +// --- F5: Additional model activity parameters --- + +/** + * F5: Workflow that uses priority in model params. + * Verifies it doesn't cause errors when passed through. + */ +export async function extendedModelParamsWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'ExtendedParamsAgent', + instructions: 'You are a helpful assistant.', + model: 'gpt-4o-mini', + }); + + const runner = new TemporalOpenAIRunner({ + priority: { priorityKey: 1 }, + }); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +// --- Wire contract tests --- + +/** + * Wire round-trip: verifies that populated Prompt-shaped and ModelTracing-shaped + * objects survive workflow→activity→workflow through the wire contract. + * Returns both the final output and response-side metadata for assertion. + */ +export async function wireRoundTripWorkflow(prompt: string): Promise<{ + finalOutput: string; + usageInputTokens: number; + usageOutputTokens: number; + outputLength: number; + hasWireVersion: boolean; +}> { + const agent = new Agent({ + name: 'WireRoundTripAgent', + instructions: 'You are a helpful assistant.', + model: 'gpt-4o-mini', + prompt: { + promptId: 'pt_round_trip', + variables: { key: 'value', nested: { deep: true } }, + }, + } as any); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt); + const raw: ModelResponse | undefined = result.rawResponses[0]; + return { + finalOutput: result.finalOutput ?? '', + usageInputTokens: raw?.usage?.inputTokens ?? -1, + usageOutputTokens: raw?.usage?.outputTokens ?? -1, + outputLength: Array.isArray(raw?.output) ? raw.output.length : -1, + hasWireVersion: '__wireVersion' in (raw ?? {}), + }; +} + +/** + * Wire stripping: verifies that signal is not present in the + * request received by the activity-side model. + */ +export async function wireStrippingCheckWorkflow(prompt: string): Promise { + const agent = new Agent({ + name: 'WireStrippingAgent', + instructions: 'You are a helpful assistant.', + model: 'gpt-4o-mini', + }); + + const runner = new TemporalOpenAIRunner(); + const result = await runner.run(agent, prompt); + return result.finalOutput ?? ''; +} + +/** + * Wire version mismatch: directly proxies the invokeModelActivity activity and + * calls it with __wireVersion: 999 to trigger the version check. + * + * True cross-version testing would require running two package versions simultaneously, + * which is impractical. This test verifies the activity-side guard responds correctly + * to a stale payload. + */ +export async function wireVersionMismatchWorkflow(): Promise<{ errorType: string; errorMessage: string }> { + const activities = proxyActivities<{ + invokeModelActivity(input: unknown): Promise; + }>({ startToCloseTimeout: '30s' }); + + try { + await activities.invokeModelActivity({ + modelName: 'test-model', + request: { + __wireVersion: 999, + input: 'test', + modelSettings: {}, + tools: [], + outputType: { type: 'text' }, + handoffs: [], + }, + }); + return { errorType: 'none', errorMessage: 'no error' }; + } catch (e: any) { + return { + errorType: e?.cause?.type ?? 'unknown', + errorMessage: e?.cause?.message ?? String(e), + }; + } +} + +// --- Wire shape snapshot workflow --- + +export async function wireRequestSnapshotWorkflow(): Promise { + const request = { + systemInstructions: 'test instructions', + input: 'test input', + modelSettings: { temperature: 0.5 }, + tools: [], + toolsExplicitlyProvided: true, + outputType: { type: 'text' }, + handoffs: [], + prompt: { promptId: 'pt_test' }, + previousResponseId: 'resp_123', + conversationId: 'conv_456', + tracing: false, + overridePromptModel: true, + signal: 'should-be-stripped', + } as any; + + const wire = toSerializedModelRequest(request); + return Object.keys(wire).sort(); +} + +// --- T1: Tracing span capture --- + +/** + * T1: Verifies that the OpenAI Agents SDK tracing path is active (not disabled) + * and that TemporalTracingProcessor receives trace/span events during an agent run. + * Uses addTraceProcessor to install a lightweight capture processor that records + * trace IDs and span types. + */ +export async function tracingSpanCaptureWorkflow(): Promise<{ + traceIds: string[]; + spanTypes: string[]; +}> { + const capture: { traceIds: string[]; spanTypes: string[] } = { traceIds: [], spanTypes: [] }; + + // TemporalOpenAIRunner constructor calls ensureTracingProcessorRegistered(), + // which uses setTraceProcessors([...]) on first invocation. We create the runner + // first so that call has already fired, then add our test processor on top. + const runner = new TemporalOpenAIRunner(); + + addTraceProcessor({ + async onTraceStart(trace: any) { + capture.traceIds.push(trace.traceId); + }, + async onTraceEnd() {}, + async onSpanStart(span: any) { + capture.spanTypes.push(span.spanData.type); + }, + async onSpanEnd() {}, + async shutdown() {}, + async forceFlush() {}, + }); + + const agent = new Agent({ + name: 'TracingTestAgent', + instructions: 'You are a test agent.', + model: 'fake-model', + }); + + await runner.run(agent, 'Hello'); + + return capture; +} + +/** + * T2: Replay-safety test workflow. Designed to run with maxCachedWorkflows: 0 + * so the worker evicts the workflow after each task and replays from scratch. + * + * The model activity creates a workflow task boundary. On the second task the + * SDK replays from the beginning. The `isReplaying()` check at workflow start + * captures whether replay occurred. Trace events are captured unconditionally + * by the inline processor. + */ +export async function replaySafetyWorkflow(): Promise<{ + traceIds: string[]; + spanTypes: string[]; + replayDetected: boolean; +}> { + const capture: { traceIds: string[]; spanTypes: string[]; replayDetected: boolean } = { + traceIds: [], + spanTypes: [], + replayDetected: isReplaying(), + }; + + const runner = new TemporalOpenAIRunner(); + + addTraceProcessor({ + async onTraceStart(trace: any) { + capture.traceIds.push(trace.traceId); + }, + async onTraceEnd() {}, + async onSpanStart(span: any) { + capture.spanTypes.push(span.spanData.type); + }, + async onSpanEnd() {}, + async shutdown() {}, + async forceFlush() {}, + }); + + const agent = new Agent({ + name: 'ReplayTestAgent', + instructions: 'You are a test agent.', + model: 'fake-model', + }); + + await runner.run(agent, 'Hello'); + + return capture; +} + +// --- CLEANUP-6: Handoff-clone snapshot test --- +// Tests the Object.create clone technique used by convertAgent to clone Handoff +// instances. Uses the same mechanism directly to avoid importing convertAgent +// (which lives in the openai-agents package and may not be in the compiled output). + +export async function handoffCloneSnapshotWorkflow(): Promise<{ + fieldsPreserved: Record; + agentReplaced: boolean; + onInvokeHandoffReplaced: boolean; + prototypeMatch: boolean; +}> { + const specialist = new Agent({ + name: 'SnapshotSpecialist', + instructions: 'You are a specialist.', + model: 'snapshot-model', + }); + + const inputFilterFn = (data: any) => data; + + const handoffObj = handoff(specialist, { + toolNameOverride: 'custom_snapshot_tool', + toolDescriptionOverride: 'Custom snapshot description', + onHandoff: async () => {}, + inputType: z.object({ reason: z.string() }), + inputFilter: inputFilterFn, + isEnabled: false, + }); + + // Clone using the exact same Object.create technique as convertAgent + const clone = Object.create( + Object.getPrototypeOf(handoffObj), + Object.getOwnPropertyDescriptors(handoffObj) + ) as typeof handoffObj; + + // Simulate what convertAgent does: replace agent and onInvokeHandoff + const replacementAgent = new Agent({ + name: 'ReplacementSpecialist', + instructions: 'Replacement.', + model: 'replacement-model', + }); + clone.agent = replacementAgent; + const replacementOnInvoke = async () => replacementAgent; + clone.onInvokeHandoff = replacementOnInvoke; + + return { + fieldsPreserved: { + toolName: clone.toolName === handoffObj.toolName, + toolDescription: clone.toolDescription === handoffObj.toolDescription, + inputJsonSchema: JSON.stringify(clone.inputJsonSchema) === JSON.stringify(handoffObj.inputJsonSchema), + strictJsonSchema: clone.strictJsonSchema === handoffObj.strictJsonSchema, + agentName: clone.agentName === handoffObj.agentName, + inputFilter: clone.inputFilter === handoffObj.inputFilter, + isEnabled: clone.isEnabled === handoffObj.isEnabled, + }, + agentReplaced: clone.agent !== handoffObj.agent, + onInvokeHandoffReplaced: clone.onInvokeHandoff !== handoffObj.onInvokeHandoff, + prototypeMatch: Object.getPrototypeOf(clone) === Object.getPrototypeOf(handoffObj), + }; +} + +// --- T3: Concurrent-workflow tracing isolation test --- + +export async function concurrentTracingIsolationWorkflow(): Promise<{ + traceIds: string[]; + spanTypes: string[]; + workflowId: string; +}> { + const capture: { traceIds: string[]; spanTypes: string[]; workflowId: string } = { + traceIds: [], + spanTypes: [], + workflowId: workflowInfo().workflowId, + }; + + const runner = new TemporalOpenAIRunner(); + + addTraceProcessor({ + async onTraceStart(trace: any) { + capture.traceIds.push(trace.traceId); + }, + async onTraceEnd() {}, + async onSpanStart(span: any) { + capture.spanTypes.push(span.spanData.type); + }, + async onSpanEnd() {}, + async shutdown() {}, + async forceFlush() {}, + }); + + const agent = new Agent({ + name: 'IsolationTestAgent', + instructions: 'You are a test agent for isolation testing.', + model: 'fake-model', + }); + + await runner.run(agent, 'Hello'); + + return capture; +} diff --git a/packages/test/tsconfig.json b/packages/test/tsconfig.json index fa29ed3f1..08796e8cc 100644 --- a/packages/test/tsconfig.json +++ b/packages/test/tsconfig.json @@ -49,6 +49,9 @@ }, { "path": "../ai-sdk" + }, + { + "path": "../openai-agents" } ], "include": ["./src/**/*.ts"] diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 60ddfc954..347ad54a7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -43,6 +43,9 @@ importers: '@temporalio/nyc-test-coverage': specifier: workspace:* version: link:packages/nyc-test-coverage + '@temporalio/openai-agents': + specifier: workspace:* + version: link:packages/openai-agents '@temporalio/plugin': specifier: workspace:* version: link:packages/plugin @@ -482,6 +485,39 @@ importers: specifier: ^5.104.1 version: 5.105.1 + packages/openai-agents: + dependencies: + '@openai/agents-core': + specifier: ~0.3.0 + version: 0.3.9(zod@3.25.76) + '@openai/agents-openai': + specifier: ~0.3.0 + version: 0.3.9(zod@3.25.76) + '@opentelemetry/api': + specifier: ^1.9.0 + version: 1.9.0 + '@temporalio/activity': + specifier: workspace:* + version: link:../activity + '@temporalio/common': + specifier: workspace:* + version: link:../common + '@temporalio/plugin': + specifier: workspace:* + version: link:../plugin + '@temporalio/workflow': + specifier: workspace:* + version: link:../workflow + '@ungap/structured-clone': + specifier: ^1.3.0 + version: 1.3.0 + headers-polyfill: + specifier: ^4.0.3 + version: 4.0.3 + web-streams-polyfill: + specifier: ^4.2.0 + version: 4.2.0 + packages/plugin: devDependencies: '@temporalio/client': @@ -533,6 +569,12 @@ importers: '@modelcontextprotocol/sdk': specifier: ^1.26.0 version: 1.26.0(zod@3.25.76) + '@openai/agents-core': + specifier: ~0.3.0 + version: 0.3.9(zod@3.25.76) + '@openai/agents-openai': + specifier: ~0.3.0 + version: 0.3.9(zod@3.25.76) '@opentelemetry/api': specifier: ^1.9.0 version: 1.9.0 @@ -581,6 +623,9 @@ importers: '@temporalio/nyc-test-coverage': specifier: workspace:* version: link:../nyc-test-coverage + '@temporalio/openai-agents': + specifier: workspace:* + version: link:../openai-agents '@temporalio/plugin': specifier: workspace:* version: link:../plugin @@ -1213,6 +1258,19 @@ packages: resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==} engines: {node: '>= 8'} + '@openai/agents-core@0.3.9': + resolution: {integrity: sha512-6Fr/VkA3lMaTT9EV2+OsmkMX9Yx+/PeWtlmaWNKDRG8D15IWuK13NOC9eFklTsa7otbuwbw/Xmjes+h4Z+CwSQ==} + peerDependencies: + zod: ^3.25.40 || ^4.0 + peerDependenciesMeta: + zod: + optional: true + + '@openai/agents-openai@0.3.9': + resolution: {integrity: sha512-duXUt0xU6K/+c7ae4m8BrJIUzZal6Pzln8V0frnJfNyfYO4SvHMV4qwPRzVDvv/ANj4DQXWI2L1JdPxKJeSHkw==} + peerDependencies: + zod: ^3.25.40 || ^4.0 + '@opentelemetry/api-logs@0.52.1': resolution: {integrity: sha512-qnSqB2DQ9TPP96dl8cDubDvrUyWc0/sK81xHTK8eSUspzDM3bsewX903qclQFvVhgStjRWdC5bLb3kQqMkfV5A==} engines: {node: '>=14'} @@ -3786,6 +3844,18 @@ packages: once@1.4.0: resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} + openai@6.34.0: + resolution: {integrity: sha512-yEr2jdGf4tVFYG6ohmr3pF6VJuveP0EA/sS8TBx+4Eq5NT10alu5zg2dmxMXMgqpihRDQlFGpRt2XwsGj+Fyxw==} + hasBin: true + peerDependencies: + ws: ^8.18.0 + zod: ^3.25 || ^4.0 + peerDependenciesMeta: + ws: + optional: true + zod: + optional: true + optionator@0.8.3: resolution: {integrity: sha512-+IW9pACdk3XWmmTXG8m3upGUJst5XRGzxMRjXzAuJ1XnIFNvfhjjIuYkDvysnPQ7qzqVzLt78BCruntqRhWQbA==} engines: {node: '>= 0.8.0'} @@ -5236,6 +5306,29 @@ snapshots: '@nodelib/fs.scandir': 2.1.5 fastq: 1.16.0 + '@openai/agents-core@0.3.9(zod@3.25.76)': + dependencies: + debug: 4.4.3 + openai: 6.34.0(zod@3.25.76) + optionalDependencies: + '@modelcontextprotocol/sdk': 1.26.0(zod@3.25.76) + zod: 3.25.76 + transitivePeerDependencies: + - '@cfworker/json-schema' + - supports-color + - ws + + '@openai/agents-openai@0.3.9(zod@3.25.76)': + dependencies: + '@openai/agents-core': 0.3.9(zod@3.25.76) + debug: 4.4.3 + openai: 6.34.0(zod@3.25.76) + zod: 3.25.76 + transitivePeerDependencies: + - '@cfworker/json-schema' + - supports-color + - ws + '@opentelemetry/api-logs@0.52.1': dependencies: '@opentelemetry/api': 1.9.0 @@ -5697,7 +5790,7 @@ snapshots: dependencies: '@typescript-eslint/typescript-estree': 8.13.0(typescript@5.6.3) '@typescript-eslint/utils': 8.13.0(eslint@9.39.2)(typescript@5.6.3) - debug: 4.4.1 + debug: 4.4.3 ts-api-utils: 1.4.0(typescript@5.6.3) optionalDependencies: typescript: 5.6.3 @@ -5725,7 +5818,7 @@ snapshots: dependencies: '@typescript-eslint/types': 8.13.0 '@typescript-eslint/visitor-keys': 8.13.0 - debug: 4.4.1 + debug: 4.4.3 fast-glob: 3.3.2 is-glob: 4.0.3 minimatch: 9.0.9 @@ -8144,6 +8237,10 @@ snapshots: dependencies: wrappy: 1.0.2 + openai@6.34.0(zod@3.25.76): + optionalDependencies: + zod: 3.25.76 + optionator@0.8.3: dependencies: deep-is: 0.1.4 diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 62362712b..151249146 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -10,6 +10,7 @@ packages: - packages/interceptors-opentelemetry - packages/meta - packages/nexus + - packages/openai-agents - packages/nyc-test-coverage - packages/plugin - packages/proto