From 096b0912021a17c78c7df4270ebf5a370b444498 Mon Sep 17 00:00:00 2001 From: Guilherme Rodrigues Date: Sat, 9 May 2026 21:46:39 -0300 Subject: [PATCH] feat(agents): add native GEO Audit Agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Installs a per-org Virtual MCP that runs the open-source geo-seo-claude toolkit (https://github.com/zubair-trabzada/geo-seo-claude) inside Studio's warm sandbox. The agent's metadata.instructions orchestrates the audit via the built-in VM tools (bash/read/write/share_with_user) — no separately deployed MCP, no Python rewrite. The geo-seo-claude repo is cloned fresh on the first turn of every thread; pip install + playwright install are gated by marker files for warm-pool reuse. - apps/mesh/src/agents/geo-seo/prompt.md: ported SKILL.md, XML-structured to match studio-pack convention; sandbox bootstrap, per-audit-type dispatch, composite scoring formula, archive via share_with_user. - apps/mesh/src/agents/geo-seo/index.ts: installGeoAuditAgent() server function mirroring installStudioPack; idempotent canonical id. - apps/mesh/src/auth/org.ts: seedOrgDb hook installs the agent for new orgs. - apps/mesh/migrations/077-install-geo-seo-agent.ts: backfill for existing orgs, idempotent via onConflict.doNothing. GitHub issue creation is deferred — the sandbox does not yet expose GITHUB_TOKEN to gh; v1 archives only via share_with_user (presigned S3). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../migrations/077-install-geo-seo-agent.ts | 101 +++++++++++++++++ apps/mesh/migrations/index.ts | 2 + apps/mesh/src/agents/geo-seo/index.ts | 64 +++++++++++ apps/mesh/src/agents/geo-seo/prompt.md | 102 ++++++++++++++++++ apps/mesh/src/auth/org.ts | 9 ++ 5 files changed, 278 insertions(+) create mode 100644 apps/mesh/migrations/077-install-geo-seo-agent.ts create mode 100644 apps/mesh/src/agents/geo-seo/index.ts create mode 100644 apps/mesh/src/agents/geo-seo/prompt.md diff --git a/apps/mesh/migrations/077-install-geo-seo-agent.ts b/apps/mesh/migrations/077-install-geo-seo-agent.ts new file mode 100644 index 0000000000..eb34ffa5d5 --- /dev/null +++ b/apps/mesh/migrations/077-install-geo-seo-agent.ts @@ -0,0 +1,101 @@ +/** + * Backfill — install the GEO Audit Agent for every existing organization. + * + * New orgs get this agent via `seedOrgDb` (`apps/mesh/src/auth/org.ts`) + * which calls `installGeoAuditAgent`. This migration covers orgs that + * existed before that hook was added. Idempotent: skips orgs that already + * have the canonical `geo-audit_` VIRTUAL connection. + * + * The system prompt is a snapshot of `apps/mesh/src/agents/geo-seo/prompt.md` + * as of this migration. Future prompt edits affect only NEW orgs (via + * `seedOrgDb`); to update existing orgs in bulk, write a follow-up migration. + */ + +import { readFileSync } from "node:fs"; +import { Kysely } from "kysely"; +import { fileURLToPath } from "node:url"; + +const PROMPT_PATH = fileURLToPath( + new URL("../src/agents/geo-seo/prompt.md", import.meta.url), +); + +const AGENT_TITLE = "GEO Audit Agent"; +const AGENT_DESCRIPTION = + "Audit a website's visibility to AI search engines (ChatGPT, Claude, Perplexity, Google AI Overviews). Produces a composite GEO Score (0–100) and a prioritized action plan."; +const AGENT_ICON = "icon://BarChart02?color=violet"; + +export async function up(db: Kysely): Promise { + const instructions = readFileSync(PROMPT_PATH, "utf-8"); + const metadata = JSON.stringify({ instructions }); + + // Owner per org for created_by attribution. Same pattern as + // migration 048-merge-projects-agents.ts. + const orgOwners = (await db + .selectFrom("member" as never) + .select(["organizationId" as never, "userId" as never]) + .where("role" as never, "=", "owner" as never) + .execute()) as Array<{ organizationId: string; userId: string }>; + + const orgOwnerMap = new Map(); + for (const row of orgOwners) { + if (!orgOwnerMap.has(row.organizationId)) { + orgOwnerMap.set(row.organizationId, row.userId); + } + } + + const orgs = (await db + .selectFrom("organization" as never) + .select(["id" as never]) + .execute()) as Array<{ id: string }>; + + const now = new Date().toISOString(); + + for (const org of orgs) { + const createdBy = orgOwnerMap.get(org.id); + if (!createdBy) continue; // skip orgs with no owner row + + const id = `geo-audit_${org.id}`; + + await db + .insertInto("connections" as never) + .values({ + id, + organization_id: org.id, + created_by: createdBy, + updated_by: null, + title: AGENT_TITLE, + description: AGENT_DESCRIPTION, + icon: AGENT_ICON, + app_name: null, + app_id: null, + connection_type: "VIRTUAL", + connection_url: `virtual://${id}`, + connection_token: null, + connection_headers: null, + oauth_config: null, + configuration_state: null, + configuration_scopes: null, + metadata, + bindings: null, + status: "active", + pinned: false, + subtype: "agent", + created_at: now, + updated_at: now, + } as never) + // biome-ignore lint/suspicious/noExplicitAny: kysely's onConflict signature + .onConflict((oc: any) => oc.column("id").doNothing()) + .execute(); + } +} + +export async function down(db: Kysely): Promise { + // Remove every GEO Audit Agent row this migration could have inserted, + // including any added later by `seedOrgDb` for new orgs — they share the + // canonical id prefix and have no other source. + await db + .deleteFrom("connections" as never) + .where("connection_type" as never, "=", "VIRTUAL" as never) + .where("id" as never, "like", "geo-audit_%" as never) + .execute(); +} diff --git a/apps/mesh/migrations/index.ts b/apps/mesh/migrations/index.ts index 23f3bf7b97..8b6c1e5056 100644 --- a/apps/mesh/migrations/index.ts +++ b/apps/mesh/migrations/index.ts @@ -75,6 +75,7 @@ import * as migration073backfillbasicusageroles from "./073-backfill-basic-usage import * as migration074sandboxrunnerstatehandlenonunique from "./074-sandbox-runner-state-handle-nonunique.ts"; import * as migration075threadinflightasyncjobs from "./075-thread-inflight-async-jobs.ts"; import * as migration076automationsdropagentjson from "./076-automations-drop-agent-json.ts"; +import * as migration077installgeoseoagent from "./077-install-geo-seo-agent.ts"; /** * Core migrations for the Mesh application. @@ -165,6 +166,7 @@ const migrations: Record = { migration074sandboxrunnerstatehandlenonunique, "075-thread-inflight-async-jobs": migration075threadinflightasyncjobs, "076-automations-drop-agent-json": migration076automationsdropagentjson, + "077-install-geo-seo-agent": migration077installgeoseoagent, }; export default migrations; diff --git a/apps/mesh/src/agents/geo-seo/index.ts b/apps/mesh/src/agents/geo-seo/index.ts new file mode 100644 index 0000000000..b5b704788c --- /dev/null +++ b/apps/mesh/src/agents/geo-seo/index.ts @@ -0,0 +1,64 @@ +/** + * GEO Audit Agent — server-side installer. + * + * Mirrors the Studio Pack pattern (`apps/mesh/src/tools/virtual/studio-pack.ts`): + * a stable per-org Virtual MCP whose `metadata.instructions` is the ported + * `prompt.md`. The agent has no aggregated child connections — it relies + * exclusively on Studio's built-in VM tools (bash/read/write/share_with_user) + * to run the geo-seo-claude Python toolkit inside the warm sandbox. + */ + +import { readFileSync } from "node:fs"; +import { fileURLToPath } from "node:url"; +import type { VirtualMCPStorage } from "@/storage/virtual"; + +export const GEO_AUDIT_AGENT_ID_PREFIX = "geo-audit_"; + +export const getGeoAuditAgentId = (orgId: string): string => + `${GEO_AUDIT_AGENT_ID_PREFIX}${orgId}`; + +export const isGeoAuditAgent = (id: string | null | undefined): boolean => + !!id && id.startsWith(GEO_AUDIT_AGENT_ID_PREFIX); + +export const GEO_AUDIT_AGENT = { + title: "GEO Audit Agent", + description: + "Audit a website's visibility to AI search engines (ChatGPT, Claude, Perplexity, Google AI Overviews). Produces a composite GEO Score (0–100) and a prioritized action plan.", + icon: "icon://BarChart02?color=violet", +} as const; + +// Loaded once at module init. The prompt is bundled with the mesh server +// build, so readFileSync is fine here — no per-request I/O. +const PROMPT_PATH = fileURLToPath(new URL("./prompt.md", import.meta.url)); +export const GEO_AUDIT_INSTRUCTIONS = readFileSync(PROMPT_PATH, "utf-8"); + +/** + * Idempotently install the GEO Audit Agent for an organization. Skips if a + * VIRTUAL connection with the canonical id already exists. + */ +export async function installGeoAuditAgent( + orgId: string, + createdBy: string, + virtualMcpStorage: VirtualMCPStorage, +): Promise { + const id = getGeoAuditAgentId(orgId); + const existing = await virtualMcpStorage.findById(id).catch(() => null); + if (existing) return; + + await virtualMcpStorage.create( + orgId, + createdBy, + { + title: GEO_AUDIT_AGENT.title, + description: GEO_AUDIT_AGENT.description, + icon: GEO_AUDIT_AGENT.icon, + status: "active", + pinned: false, + metadata: { + instructions: GEO_AUDIT_INSTRUCTIONS, + }, + connections: [], + }, + { id }, + ); +} diff --git a/apps/mesh/src/agents/geo-seo/prompt.md b/apps/mesh/src/agents/geo-seo/prompt.md new file mode 100644 index 0000000000..ef6c01d33a --- /dev/null +++ b/apps/mesh/src/agents/geo-seo/prompt.md @@ -0,0 +1,102 @@ + +You are the GEO Audit Agent. You evaluate websites for visibility to AI search engines (ChatGPT, Claude, Perplexity, Google AI Overviews, Gemini) using the open-source `geo-seo-claude` toolkit, which you run inside a persistent sandbox. + +Philosophy: GEO-first, SEO-supported. AI search is eating traditional search; you optimize for where traffic is going. + + + +- Boot a persistent sandbox and clone `https://github.com/zubair-trabzada/geo-seo-claude` into `/workspace/geo-seo`. +- Run individual GEO sub-audits (citability, AI crawler access, llms.txt, brand mentions, structured data, technical SEO, content E-E-A-T, platform readiness). +- Synthesize findings into a composite GEO Score (0–100) and a prioritized action plan. +- Archive the final report to the user via `share_with_user` (presigned download URL). +- Use the warm sandbox across turns: install steps run once per thread, then later turns reuse the environment. + + + +- Always run inside the sandbox via `bash`. Never call WebFetch directly for audit work — the Python tools handle fetching with appropriate AI-crawler user-agents and rate limiting. +- Respect robots.txt of audited sites. The bundled scripts already do this; do not bypass. +- Cap each audit at 50 pages and 30 seconds per fetch (built into the scripts — do not raise limits). +- Never fabricate scores. If a script fails or a fetch is blocked, report the failure and lower the relevant component score accordingly. +- Do not modify the cloned repo. Treat it as read-only tooling. +- Do not install Python packages outside `requirements.txt` and the audit-needed extras (`playwright`). If a script demands more, surface the gap to the user. + + + + +1. **First turn of every thread — bootstrap the sandbox.** + Run via `bash` (one block, in this order; the marker files make subsequent turns idempotent): + ``` + set -e + mkdir -p /workspace && cd /workspace + if [ ! -d geo-seo ]; then + git clone --depth 1 https://github.com/zubair-trabzada/geo-seo-claude geo-seo + fi + cd geo-seo + if [ ! -f .deps_installed ]; then + pip3 install -q -r requirements.txt && touch .deps_installed + fi + if [ ! -f .playwright_installed ]; then + pip3 install -q playwright && playwright install --with-deps chromium && touch .playwright_installed + fi + echo "geo-seo ready: $(git rev-parse --short HEAD)" + ``` + Confirm the readiness line in the output. If it is absent, surface the bash error and stop. + +2. **Greet and gather input.** + If the user has not yet supplied a URL, ask: "What URL should I audit? Audit type defaults to **full**; you can also say `quick`, `citability`, `crawlers`, `llmstxt`, `brands`, `schema`, `technical`, `content`, or `platforms` to narrow the scope." + Do not start the audit until you have a URL. + +3. **Dispatch the audit.** + For each requested type, run the matching scripts inside `/workspace/geo-seo`. When a script does not exist for a sub-skill, follow the methodology in the corresponding `agents/*.md` file (read it with the `read` tool, then act inline). + + | Audit type | What to run | + |---|---| + | `quick` | `python3 scripts/fetch_page.py `; output a 60-second snapshot of business type, citability sample, crawler access, and llms.txt presence. No file output. | + | `citability` | `python3 scripts/citability_scorer.py > /workspace/out/GEO-CITABILITY-SCORE.md` | + | `crawlers` | Read `agents/geo-ai-visibility.md` § Step 3 + parse `/robots.txt` via `python3 -c "..."` to evaluate the crawler table. Write `/workspace/out/GEO-CRAWLER-ACCESS.md`. | + | `llmstxt` | `python3 scripts/llmstxt_generator.py > /workspace/out/llms.txt` (and a sibling validation note if the input already exists) | + | `brands` | `python3 scripts/brand_scanner.py "" > /workspace/out/GEO-BRAND-MENTIONS.md` (use the brand name as it appears on the homepage, not the bare domain) | + | `schema` | Fetch the page, extract `