-
Notifications
You must be signed in to change notification settings - Fork 0
feat(agent) : agent built🎉 #5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Changes from 1 commit
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
a6b4a9d
feat(agent) : agent built🎉
yb175 c61b58f
fix(review) : fixed all p1 and p2 reviews
yb175 a1290cd
fix(review) : fixed reviews
yb175 6b19d16
security : fixed approvalvulnerability
yb175 15fa766
fix(review) : fixed review
yb175 d886d6c
fix(llm) : added llm timeout
yb175 e2c1ab7
fix(gemimi-timeout) : fixed timout issue
yb175 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,319 @@ | ||
| import { vi, describe, it, expect, beforeEach } from "vitest"; | ||
| import { runAgent } from "./loop.js"; | ||
| import { createMemory } from "./memory.js"; | ||
| import { llmClient } from "./llm.js"; | ||
|
|
||
| // Mock @repo/db | ||
| vi.mock("@repo/db", () => { | ||
| return { | ||
| db: { | ||
| approval: { | ||
| findUnique: vi.fn(), | ||
| }, | ||
| conversation: { | ||
| findUnique: vi.fn(), | ||
| update: vi.fn(), | ||
| upsert: vi.fn(), | ||
| }, | ||
| }, | ||
| }; | ||
| }); | ||
|
|
||
| // Import mocked db | ||
| import { db } from "@repo/db"; | ||
|
|
||
| // Mock decision engine | ||
| vi.mock("../policy/decision.js", () => { | ||
| return { | ||
| decide: vi.fn(), | ||
| }; | ||
| }); | ||
| import { decide } from "../policy/decision.js"; | ||
|
|
||
| // Mock MCP bootstrapping | ||
| vi.mock("../../mcp/bootstrap.js", () => { | ||
| return { | ||
| mcpDiscovery: { | ||
| discoverTools: vi.fn(), | ||
| }, | ||
| mcpExecutor: { | ||
| execute: vi.fn(), | ||
| }, | ||
| }; | ||
| }); | ||
| import { mcpDiscovery, mcpExecutor } from "../../mcp/bootstrap.js"; | ||
|
|
||
| describe("Agent Module & Execution Loop", () => { | ||
| const mockTool = { | ||
| name: "test_tool", | ||
| description: "A test tool description", | ||
| inputSchema: { | ||
| type: "object", | ||
| properties: { | ||
| arg1: { type: "string" }, | ||
| }, | ||
| required: ["arg1"], | ||
| }, | ||
| execute: vi.fn(), | ||
| }; | ||
|
|
||
| beforeEach(() => { | ||
| vi.clearAllMocks(); | ||
|
|
||
| // Mock conversation database queries | ||
| vi.mocked(db.conversation.findUnique).mockResolvedValue({ | ||
| id: "conv-1", | ||
| tokens_used: 0, | ||
| budget_limit: 1000, | ||
| createdAt: new Date(), | ||
| } as any); | ||
| vi.mocked(db.conversation.update).mockResolvedValue({} as any); | ||
| vi.mocked(db.conversation.upsert).mockResolvedValue({} as any); | ||
|
|
||
| // Default discovery stub returning the test tool | ||
| const mockToolsMap = new Map(); | ||
| mockToolsMap.set("test_tool", { | ||
| server: { name: "test_server" }, | ||
| tool: mockTool, | ||
| }); | ||
| vi.mocked(mcpDiscovery.discoverTools).mockResolvedValue(mockToolsMap); | ||
| }); | ||
|
|
||
| // 1) tool call - LLM requests a tool call | ||
| it("scenario 1: tool call gets evaluated and mapped properly in the loop", async () => { | ||
| vi.spyOn(llmClient, "callModel").mockResolvedValue( | ||
| JSON.stringify({ | ||
| type: "tool_call", | ||
| tool_name: "test_tool", | ||
| arguments: { arg1: "hello" }, | ||
| }) | ||
| ); | ||
|
|
||
| vi.mocked(decide).mockResolvedValue({ | ||
| decision: "PENDING", | ||
| reason: "approval-uuid-1", | ||
| }); | ||
|
|
||
| const result = await runAgent("Perform task", "conv-1", 100); | ||
| expect(result.status).toBe("PENDING"); | ||
| expect(result.approvalId).toBe("approval-uuid-1"); | ||
| expect(decide).toHaveBeenCalledWith( | ||
| expect.objectContaining({ | ||
| tool_name: "test_tool", | ||
| arguments: { arg1: "hello" }, | ||
| }), | ||
| { conversationId: "conv-1", token: expect.any(Number) } | ||
| ); | ||
| }); | ||
|
|
||
| // 2) final answer - LLM returns a final answer | ||
| it("scenario 2: final answer stops execution and returns success", async () => { | ||
| vi.spyOn(llmClient, "callModel").mockResolvedValue( | ||
| JSON.stringify({ | ||
| type: "final_answer", | ||
| answer: "Task completed successfully.", | ||
| }) | ||
| ); | ||
|
|
||
| const result = await runAgent("Perform task", "conv-1", 100); | ||
| expect(result.status).toBe("SUCCESS"); | ||
| expect(result.answer).toBe("Task completed successfully."); | ||
| expect(result.memory.messages).toContainEqual({ | ||
| role: "assistant", | ||
| content: "Task completed successfully.", | ||
| }); | ||
| }); | ||
|
|
||
| // 3) approval pending - tool call requires approval, decide() returns PENDING | ||
| it("scenario 3: decision PENDING saves approvalId and returns PENDING status", async () => { | ||
| vi.spyOn(llmClient, "callModel").mockResolvedValue( | ||
| JSON.stringify({ | ||
| type: "tool_call", | ||
| tool_name: "test_tool", | ||
| arguments: { arg1: "value" }, | ||
| }) | ||
| ); | ||
|
|
||
| vi.mocked(decide).mockResolvedValue({ | ||
| decision: "PENDING", | ||
| reason: "pending-approval-id", | ||
| }); | ||
|
|
||
| const result = await runAgent("Start workflow", "conv-2", 200); | ||
| expect(result.status).toBe("PENDING"); | ||
| expect(result.approvalId).toBe("pending-approval-id"); | ||
| expect(result.memory.approvalId).toBe("pending-approval-id"); | ||
| }); | ||
|
|
||
| // 4) denied tool - tool call is denied, decide() returns DENY | ||
| it("scenario 4: decision DENY stops execution and returns DENY status", async () => { | ||
| vi.spyOn(llmClient, "callModel").mockResolvedValue( | ||
| JSON.stringify({ | ||
| type: "tool_call", | ||
| tool_name: "test_tool", | ||
| arguments: { arg1: "forbidden" }, | ||
| }) | ||
| ); | ||
|
|
||
| vi.mocked(decide).mockResolvedValue({ | ||
| decision: "DENY", | ||
| reason: "Tool execution blocked by policy", | ||
| }); | ||
|
|
||
| const result = await runAgent("Run forbidden action", "conv-3", 300); | ||
| expect(result.status).toBe("DENY"); | ||
| expect(result.reason).toBe("Tool execution blocked by policy"); | ||
| }); | ||
|
|
||
| // 5) successful execution - tool call is allowed and executes successfully | ||
| it("scenario 5: allowed tool call executes successfully, records result, and requests next step", async () => { | ||
| // 1st call: request tool | ||
| // 2nd call: return final answer | ||
| let callCount = 0; | ||
| vi.spyOn(llmClient, "callModel").mockImplementation(async () => { | ||
| callCount++; | ||
| if (callCount === 1) { | ||
| return JSON.stringify({ | ||
| type: "tool_call", | ||
| tool_name: "test_tool", | ||
| arguments: { arg1: "valid-input" }, | ||
| }); | ||
| } | ||
| return JSON.stringify({ | ||
| type: "final_answer", | ||
| answer: "Execution completed successfully.", | ||
| }); | ||
| }); | ||
|
|
||
| vi.mocked(decide).mockResolvedValue({ | ||
| decision: "ALLOW", | ||
| }); | ||
|
|
||
| vi.mocked(mcpExecutor.execute).mockResolvedValue("Success output"); | ||
|
|
||
| const result = await runAgent("Run action", "conv-4", 400); | ||
| expect(result.status).toBe("SUCCESS"); | ||
| expect(result.answer).toBe("Execution completed successfully."); | ||
| expect(mcpExecutor.execute).toHaveBeenCalledWith( | ||
| "test_tool", | ||
| { arg1: "valid-input" }, | ||
| { conversationId: "conv-4", decision: "ALLOW" } | ||
| ); | ||
| expect(result.memory.toolResults).toContain("Success output"); | ||
| }); | ||
|
|
||
| // 6) invalid llm output - LLM returns something that is not valid JSON or doesn't match expected schema | ||
| it("scenario 6: invalid argument type fails schema validation and throws error", async () => { | ||
| vi.spyOn(llmClient, "callModel").mockResolvedValue( | ||
| JSON.stringify({ | ||
| type: "tool_call", | ||
| tool_name: "test_tool", | ||
| arguments: { arg1: 12345 }, // arg1 must be string | ||
| }) | ||
| ); | ||
|
|
||
| await expect(runAgent("Run action", "conv-5", 500)).rejects.toThrow( | ||
| "Invalid arguments for tool test_tool" | ||
| ); | ||
| }); | ||
|
|
||
| it("scenario 6b: unknown tool rejection", async () => { | ||
| vi.spyOn(llmClient, "callModel").mockResolvedValue( | ||
| JSON.stringify({ | ||
| type: "tool_call", | ||
| tool_name: "unknown_tool", | ||
| arguments: {}, | ||
| }) | ||
| ); | ||
|
|
||
| await expect(runAgent("Run action", "conv-5", 500)).rejects.toThrow( | ||
| "Unknown tool: unknown_tool" | ||
| ); | ||
| }); | ||
|
|
||
| // 7) executor throws - MCP executor throws an error | ||
| it("scenario 7: executor exception throws an error and fails closed", async () => { | ||
| vi.spyOn(llmClient, "callModel").mockResolvedValue( | ||
| JSON.stringify({ | ||
| type: "tool_call", | ||
| tool_name: "test_tool", | ||
| arguments: { arg1: "trigger-fail" }, | ||
| }) | ||
| ); | ||
|
|
||
| vi.mocked(decide).mockResolvedValue({ | ||
| decision: "ALLOW", | ||
| }); | ||
|
|
||
| vi.mocked(mcpExecutor.execute).mockRejectedValue(new Error("Executor crash")); | ||
|
|
||
| await expect(runAgent("Fail task", "conv-6", 600)).rejects.toThrow( | ||
| "Tool execution failed: Executor crash" | ||
| ); | ||
| }); | ||
|
|
||
| // 8) malformed json - LLM output is not valid JSON | ||
| it("scenario 8: malformed json from LLM throws error", async () => { | ||
| vi.spyOn(llmClient, "callModel").mockResolvedValue("not-json-format"); | ||
|
|
||
| await expect(runAgent("Fail task", "conv-7", 700)).rejects.toThrow( | ||
| "Malformed JSON from LLM response" | ||
| ); | ||
| }); | ||
|
|
||
| // 9) approval resumes execution - agent is resumed with an approvalId and continues | ||
| it("scenario 9: agent loop resumes from approval ID, skips nextStep for the first call, and proceeds", async () => { | ||
| // Mock db.approval.findUnique to return the original tool call parameters | ||
| vi.mocked(db.approval.findUnique).mockResolvedValue({ | ||
| id: "approval-999", | ||
| tool_name: "test_tool", | ||
| arguments: { arg1: "resumed-val" }, | ||
| status: "APPROVED" as any, | ||
| createdAt: new Date(), | ||
| updatedAt: new Date(), | ||
| }); | ||
|
|
||
| // decision of ALLOW when decisionContext includes the approved approvalId | ||
| vi.mocked(decide).mockResolvedValue({ | ||
| decision: "ALLOW", | ||
| }); | ||
|
|
||
| vi.mocked(mcpExecutor.execute).mockResolvedValue("Resumed execution success"); | ||
|
|
||
| // The model is only called once after the executor finishes to retrieve the final answer | ||
| vi.spyOn(llmClient, "callModel").mockResolvedValue( | ||
| JSON.stringify({ | ||
| type: "final_answer", | ||
| answer: "Completed resumed action.", | ||
| }) | ||
| ); | ||
|
|
||
| const memory = createMemory(); | ||
| memory.addMessage("user", "Run step 1"); | ||
| // Resume agent with the approval ID | ||
| const result = await runAgent(null, "conv-8", 800, { | ||
| memory, | ||
| approvalId: "approval-999", | ||
| }); | ||
|
|
||
| expect(result.status).toBe("SUCCESS"); | ||
| expect(result.answer).toBe("Completed resumed action."); | ||
| expect(db.approval.findUnique).toHaveBeenCalledWith({ | ||
| where: { id: "approval-999" }, | ||
| }); | ||
| expect(decide).toHaveBeenCalledWith( | ||
| expect.objectContaining({ | ||
| tool_name: "test_tool", | ||
| arguments: { arg1: "resumed-val" }, | ||
| approvalId: "approval-999", | ||
| }), | ||
| { conversationId: "conv-8", token: 0 } | ||
| ); | ||
| expect(mcpExecutor.execute).toHaveBeenCalledWith( | ||
| "test_tool", | ||
| { arg1: "resumed-val" }, | ||
| { conversationId: "conv-8", decision: "ALLOW" } | ||
| ); | ||
| expect(result.memory.toolResults).toContain("Resumed execution success"); | ||
| }); | ||
| }); |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.