Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 51 additions & 14 deletions src/__tests__/commands/crawl.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ describe('executeCrawl', () => {
expect(mockClient.crawl).toHaveBeenCalledWith(
'https://example.com',
expect.objectContaining({
pollInterval: 5000, // Default poll interval
pollInterval: 5, // Default poll interval
})
);
expect(result).toEqual({
Expand Down Expand Up @@ -371,7 +371,7 @@ describe('executeCrawl', () => {
expect(mockClient.crawl).toHaveBeenCalledWith(
'https://example.com',
expect.objectContaining({
pollInterval: 10000, // Converted to milliseconds
pollInterval: 10, // seconds
})
);
});
Expand All @@ -395,7 +395,7 @@ describe('executeCrawl', () => {
expect(mockClient.crawl).toHaveBeenCalledWith(
'https://example.com',
expect.objectContaining({
timeout: 300000, // Converted to milliseconds
timeout: 300, // seconds
})
);
});
Expand All @@ -422,8 +422,8 @@ describe('executeCrawl', () => {
expect(mockClient.crawl).toHaveBeenCalledWith(
'https://example.com',
expect.objectContaining({
pollInterval: 5000,
timeout: 600000,
pollInterval: 5,
timeout: 600,
limit: 50,
maxDiscoveryDepth: 2,
})
Expand All @@ -443,20 +443,22 @@ describe('executeCrawl', () => {
vi.restoreAllMocks();
vi.useRealTimers();
});

it('should use custom polling with progress when progress flag is set', async () => {
const jobId = '550e8400-e29b-41d4-a716-446655440000';

const mockStartResponse = {
id: jobId,
url: 'https://example.com',
};

const mockScrapingStatus = {
id: jobId,
status: 'scraping',
total: 100,
completed: 50,
data: [],
};

const mockCompletedStatus = {
id: jobId,
status: 'completed',
Expand All @@ -466,34 +468,69 @@ describe('executeCrawl', () => {
};

mockClient.startCrawl.mockResolvedValue(mockStartResponse);
// First call returns scraping status, second returns completed

mockClient.getCrawlStatus
.mockResolvedValueOnce(mockScrapingStatus)
.mockResolvedValueOnce(mockCompletedStatus);

// Start the async operation
const crawlPromise = executeCrawl({
urlOrJobId: 'https://example.com',
wait: true,
progress: true,
pollInterval: 0.001, // Very short interval for testing (1ms)
pollInterval: 1, // seconds
});

// Fast-forward timers to resolve the first setTimeout
await vi.advanceTimersByTimeAsync(1);

// Fast-forward again to resolve the second setTimeout
await vi.advanceTimersByTimeAsync(1);
await vi.advanceTimersByTimeAsync(1000);
await vi.advanceTimersByTimeAsync(1000);

const result = await crawlPromise;

expect(mockClient.startCrawl).toHaveBeenCalledTimes(1);
expect(mockClient.getCrawlStatus).toHaveBeenCalledTimes(2);
expect(result.success).toBe(true);

if (result.success && 'data' in result) {
expect(result.data.status).toBe('completed');
}
});

it('should timeout correctly in progress mode', async () => {
const jobId = '550e8400-e29b-41d4-a716-446655440000';
const mockStartResponse = {
id: jobId,
url: 'https://example.com',
};
// Always return 'scraping' so crawl never completes (forces timeout)
const mockScrapingStatus = {
id: jobId,
status: 'scraping',
total: 100,
completed: 50,
data: [],
};

mockClient.startCrawl.mockResolvedValue(mockStartResponse);
// Always returns scraping (never completes)
mockClient.getCrawlStatus.mockResolvedValue(mockScrapingStatus);

// Start the async operation
const crawlPromise = executeCrawl({
urlOrJobId: 'https://example.com',
wait: true,
progress: true,
pollInterval: 1, // 1 second
timeout: 2, // 2 seconds
});

// Advance time beyond timeout
await vi.advanceTimersByTimeAsync(3000);
await vi.runAllTimersAsync(); // ensures all async chains resolve
await Promise.resolve(); // Flush microtasks explicitly

const result = await crawlPromise;
expect(result.success).toBe(false);
expect(result.error).toMatch(/Timeout/i);
});
});

describe('Error handling', () => {
Expand Down
26 changes: 16 additions & 10 deletions src/commands/crawl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ export async function executeCrawl(
}

// Build crawl options
const crawlOptions: any = {
const crawlOptions: Partial<CrawlOptions> & Record<string, any> = {
integration: 'cli',
};
Comment on lines +60 to 62
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

crawlOptions is typed as Partial<CrawlOptions> & Record<string, any>, but this object is the SDK parameter bag (it includes fields like integration and maxDiscoveryDepth that are not part of CrawlOptions, and omits CLI-only fields that are in CrawlOptions). This type can be misleading and doesn’t provide useful safety. Consider defining a dedicated SDK params type (similar to agentParams in src/commands/agent.ts:196-208) or using a plain Record<string, any> here.

Copilot uses AI. Check for mistakes.

Expand Down Expand Up @@ -97,15 +97,15 @@ export async function executeCrawl(

// If wait mode, use the convenience crawl method with polling
if (wait) {
// Set polling options
// Set polling options (SDK expects seconds, not ms)
if (pollInterval !== undefined) {
crawlOptions.pollInterval = pollInterval * 1000; // Convert to milliseconds
crawlOptions.pollInterval = pollInterval; // seconds
} else {
// Default poll interval: 5 seconds
crawlOptions.pollInterval = 5000;
crawlOptions.pollInterval = 5;
}
if (timeout !== undefined) {
crawlOptions.timeout = timeout * 1000; // Convert to milliseconds
crawlOptions.timeout = timeout; // seconds
}

// Show progress if requested - use custom polling for better UX
Expand All @@ -117,13 +117,19 @@ export async function executeCrawl(
process.stderr.write(`Crawling ${urlOrJobId}...\n`);
process.stderr.write(`Job ID: ${jobId}\n`);

// Poll for status with progress updates
const pollMs = crawlOptions.pollInterval || 5000;
// Converts seconds -> ms Only here
const pollMs =
crawlOptions.pollInterval !== undefined
? crawlOptions.pollInterval * 1000
: 5000;
const startTime = Date.now();
const timeoutMs = timeout ? timeout * 1000 : undefined;
const timeoutMs =
crawlOptions.timeout !== undefined
? crawlOptions.timeout * 1000
: undefined;
Comment on lines +120 to +129
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pollMs/timeoutMs are derived by multiplying crawlOptions.pollInterval/timeout without validating they’re finite numbers. Since the CLI parses these with parseFloat, invalid input (e.g. --poll-interval foo) becomes NaN, and with the current !== undefined checks this results in pollMs = NaN (treated as 0 by setTimeout), causing a tight polling loop and potential API/CPU overload. Add a Number.isFinite(...) (and likely > 0) guard and fall back to defaults or error out when values are invalid.

Copilot uses AI. Check for mistakes.

while (true) {
await new Promise((resolve) => setTimeout(resolve, pollMs));
await new Promise<void>((resolve) => setTimeout(resolve, pollMs));

const status = await app.getCrawlStatus(jobId);

Expand All @@ -145,7 +151,7 @@ export async function executeCrawl(
}

// Check timeout
if (timeoutMs && Date.now() - startTime > timeoutMs) {
if (timeoutMs !== undefined && Date.now() - startTime > timeoutMs) {
process.stderr.write('\n');
return {
success: false,
Expand Down
Loading