From 822afc6a09bc17975409b64aee5689152df91c87 Mon Sep 17 00:00:00 2001 From: bb Date: Wed, 20 May 2026 23:25:24 +0000 Subject: [PATCH] [DASH-2089] [templates] smart-fetch-scraper: comment about Fetch markdown/json formats --- python/smart-fetch-scraper/main.py | 8 ++++++++ typescript/smart-fetch-scraper/index.ts | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/python/smart-fetch-scraper/main.py b/python/smart-fetch-scraper/main.py index 302a5d45..bd8a6864 100644 --- a/python/smart-fetch-scraper/main.py +++ b/python/smart-fetch-scraper/main.py @@ -3,6 +3,11 @@ # Tries the Browserbase Fetch API first (fast, no browser session needed). # If the page is JS-rendered or the content is insufficient, falls back to # a full Stagehand browser session with AI-powered extraction. +# +# Note: the Fetch API can return content as `raw` HTML, clean `markdown`, +# or schema-extracted `json` — pass `format="markdown"` or `format="json"` +# (with a JSON schema) to `bb.fetch_api.create` to skip writing your own +# HTML parser. Docs: https://docs.browserbase.com/platform/fetch/overview import asyncio import json @@ -116,6 +121,9 @@ async def try_fetch_api(url: str) -> dict | None: print("[Fetch API] Attempting lightweight fetch...") try: + # Tip: pass `format="markdown"` or `format="json"` (with a `schema`) + # here to have Browserbase return cleaner content or structured data + # directly — see https://docs.browserbase.com/platform/fetch/overview # Use asyncio.to_thread for synchronous SDK calls data = await asyncio.to_thread(bb.fetch_api.create, url=url, allow_redirects=True) diff --git a/typescript/smart-fetch-scraper/index.ts b/typescript/smart-fetch-scraper/index.ts index a31baaaa..f20b799e 100644 --- a/typescript/smart-fetch-scraper/index.ts +++ b/typescript/smart-fetch-scraper/index.ts @@ -3,6 +3,11 @@ // Tries the Browserbase Fetch API first (fast, no browser session needed). // If the page is JS-rendered or the content is insufficient, falls back to // a full Stagehand browser session with AI-powered extraction. +// +// Note: the Fetch API can return content as `raw` HTML, clean `markdown`, +// or schema-extracted `json` — pass `format: "markdown"` or `format: "json"` +// (with a JSON schema) to `bb.fetchAPI.create` to skip writing your own +// HTML parser. Docs: https://docs.browserbase.com/platform/fetch/overview import "dotenv/config"; import Browserbase from "@browserbasehq/sdk"; @@ -91,6 +96,9 @@ async function tryFetchApi(url: string): Promise<{ content: string; statusCode: console.log("[Fetch API] Attempting lightweight fetch..."); try { + // Tip: pass `format: "markdown"` or `format: "json"` (with a `schema`) + // here to have Browserbase return cleaner content or structured data + // directly — see https://docs.browserbase.com/platform/fetch/overview const data = await bb.fetchAPI.create({ url, allowRedirects: true }); console.log(