Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 59 additions & 16 deletions apps/mesh/src/api/routes/sandbox-proxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ async function proxyDaemon(
const runner = requireRunner(c);
if (runner instanceof Response) return runner;

const { claimName } = c.get("vmClaim");
const { claimName, userId, projectRef } = c.get("vmClaim");
const method = opts?.method ?? "POST";
let body: string | null = null;
const headers = new Headers();
Expand All @@ -174,14 +174,20 @@ async function proxyDaemon(
headers.set("content-type", "application/json");
}

const requestInit = {
method,
headers,
body,
...(opts?.signal ? { signal: opts.signal } : {}),
};

let upstream: Response;
try {
upstream = await runner.proxyDaemonRequest(claimName, daemonPath, {
method,
headers,
body,
...(opts?.signal ? { signal: opts.signal } : {}),
});
upstream = await runner.proxyDaemonRequest(
claimName,
daemonPath,
requestInit,
);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
return c.json({ error: `Daemon unreachable: ${message}` }, 502);
Expand All @@ -193,14 +199,32 @@ async function proxyDaemon(
} catch {
/* ignore */
}
return c.json(
{
error:
"Sandbox handle is gone. The sandbox needs to be re-provisioned.",
},
410,
SANDBOX_PROXY_CACHE_HEADERS,
const adopted = await runner.adoptLiveClaim?.(
{ userId, projectRef },
claimName,
);
Comment on lines +202 to 205
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: adoptLiveClaim errors are unhandled in proxyDaemon, which can turn a recoverable 404 path into a 500 response.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At apps/mesh/src/api/routes/sandbox-proxy.ts, line 202:

<comment>`adoptLiveClaim` errors are unhandled in `proxyDaemon`, which can turn a recoverable 404 path into a 500 response.</comment>

<file context>
@@ -193,14 +199,32 @@ async function proxyDaemon(
-      },
-      410,
-      SANDBOX_PROXY_CACHE_HEADERS,
+    const adopted = await runner.adoptLiveClaim?.(
+      { userId, projectRef },
+      claimName,
</file context>
Suggested change
const adopted = await runner.adoptLiveClaim?.(
{ userId, projectRef },
claimName,
);
let adopted = false;
try {
adopted =
(await runner.adoptLiveClaim?.({ userId, projectRef }, claimName)) ??
false;
} catch {
adopted = false;
}

if (adopted) {
try {
upstream = await runner.proxyDaemonRequest(
claimName,
daemonPath,
requestInit,
);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
return c.json({ error: `Daemon unreachable: ${message}` }, 502);
}
}
if (upstream.status === 404) {
return c.json(
{
error:
"Sandbox handle is gone. The sandbox needs to be re-provisioned.",
},
410,
SANDBOX_PROXY_CACHE_HEADERS,
);
}
}

const text = await upstream.text();
Expand All @@ -217,13 +241,30 @@ async function fetchDaemonJson<T>(
claimName: string,
daemonPath: string,
method: "GET" | "POST" = "GET",
sandboxId?: { userId: string; projectRef: string },
): Promise<T> {
const upstream = await runner.proxyDaemonRequest(claimName, daemonPath, {
let upstream = await runner.proxyDaemonRequest(claimName, daemonPath, {
method,
headers: new Headers(),
body: null,
});

if (upstream.status === 404 && sandboxId) {
try {
await upstream.body?.cancel();
} catch {
/* ignore */
}
const adopted = await runner.adoptLiveClaim?.(sandboxId, claimName);
if (adopted) {
upstream = await runner.proxyDaemonRequest(claimName, daemonPath, {
method,
headers: new Headers(),
body: null,
});
}
}

if (upstream.status === 404) {
try {
await upstream.body?.cancel();
Expand Down Expand Up @@ -423,7 +464,7 @@ export const createSandboxRoutes = () => {
const runner = requireRunner(c);
if (runner instanceof Response) return runner;

const { claimName } = c.get("vmClaim");
const { claimName, userId, projectRef } = c.get("vmClaim");
const ctx = c.var.meshContext;

let body: { status?: GitStatusLike; diff?: GitDiffLike } = {};
Expand Down Expand Up @@ -453,12 +494,14 @@ export const createSandboxRoutes = () => {
claimName,
"/_sandbox/git/status",
"GET",
{ userId, projectRef },
),
fetchDaemonJson<GitDiffLike>(
runner,
claimName,
"/_sandbox/git/diff",
"GET",
{ userId, projectRef },
),
]);
}
Expand Down
76 changes: 54 additions & 22 deletions apps/mesh/src/web/components/thread/github/publish-dialog.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { useMCPClient } from "@decocms/mesh-sdk";
import { SELF_MCP_ALIAS_ID, useMCPClient } from "@decocms/mesh-sdk";
import { Button } from "@deco/ui/components/button.tsx";
import { Dialog, DialogContent } from "@deco/ui/components/dialog.tsx";
import { Input } from "@deco/ui/components/input.tsx";
Expand Down Expand Up @@ -31,6 +31,7 @@ import {
type CreatedPullRequest,
} from "./github-pr-api.ts";
import type { PrSummary } from "./use-pr-data.ts";
import { useSandboxStart } from "@/web/components/sandbox/hooks/use-sandbox-start";
import {
countGitChanges,
discardGitFiles,
Expand All @@ -39,6 +40,7 @@ import {
fetchSuggestCommitMessage,
hasUnpublishedWork,
isDecoOnlyDiff,
isSandboxUnreachable,
publishGitChanges,
PUBLISH_REQUIRES_SUBMIT_TOOLTIP,
readGitHeadBranch,
Expand Down Expand Up @@ -119,6 +121,12 @@ function PublishDialogBody({
orgId,
orgSlug,
});
const selfClient = useMCPClient({
connectionId: SELF_MCP_ALIAS_ID,
orgId,
orgSlug,
});
const startSandbox = useSandboxStart(selfClient);

const commitToOpenPr = openPullRequest?.state === "open";

Expand All @@ -138,6 +146,32 @@ function PublishDialogBody({
const [isGeneratingSuggestion, setIsGeneratingSuggestion] = useState(false);

const loadStartedRef = useRef(false);
const reprovisionAttemptedRef = useRef(false);

const loadGitState = async () => {
const [status, diff] = await Promise.all([
fetchGitStatus(orgSlug, virtualMcpId, branch),
fetchGitDiff(orgSlug, virtualMcpId, branch),
]);
setGitStatus(status);
setGitDiff(diff);
setPublishTitle(`Changes from ${status.current ?? branch}`);

setIsGeneratingSuggestion(true);
fetchSuggestCommitMessage(orgSlug, virtualMcpId, branch, {
status,
diff,
})
.then((commitSuggestion) => {
setPublishTitle(commitSuggestion.title);
setPublishBody(commitSuggestion.body);
})
.catch(() => {
/* best-effort */
})
.finally(() => setIsGeneratingSuggestion(false));
};

// oxlint-disable-next-line ban-ref-current-assignment/ban-ref-current-assignment -- one-shot load on dialog open
if (!loadStartedRef.current) {
// oxlint-disable-next-line ban-ref-current-assignment/ban-ref-current-assignment -- one-shot load on dialog open
Expand All @@ -151,28 +185,26 @@ function PublishDialogBody({
setSubmitForReviewError(undefined);
setSaveChangesError(undefined);
try {
const [status, diff] = await Promise.all([
fetchGitStatus(orgSlug, virtualMcpId, branch),
fetchGitDiff(orgSlug, virtualMcpId, branch),
]);
setGitStatus(status);
setGitDiff(diff);
setPublishTitle(`Changes from ${status.current ?? branch}`);

setIsGeneratingSuggestion(true);
fetchSuggestCommitMessage(orgSlug, virtualMcpId, branch, {
status,
diff,
})
.then((commitSuggestion) => {
setPublishTitle(commitSuggestion.title);
setPublishBody(commitSuggestion.body);
})
.catch(() => {
/* best-effort */
})
.finally(() => setIsGeneratingSuggestion(false));
await loadGitState();
} catch (error) {
// Preview can stay live via the gateway while mesh's daemon proxy
// still holds a stale handle. Re-provision once, then retry — same
// self-heal path as preview.tsx's notFound → SANDBOX_START flow.
if (isSandboxUnreachable(error) && !reprovisionAttemptedRef.current) {
reprovisionAttemptedRef.current = true;
try {
await startSandbox.mutateAsync({ virtualMcpId, branch });
await loadGitState();
return;
} catch (retryErr) {
setPublishError(
retryErr instanceof Error
? retryErr.message
: "Failed to load changes after re-provisioning the sandbox.",
);
return;
}
}
setPublishError(
error instanceof Error ? error.message : "Failed to load changes.",
);
Expand Down
79 changes: 75 additions & 4 deletions packages/sandbox/server/provider/agent-sandbox/runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@
path: string,
init: ProxyRequestInit,
): Promise<Response> {
const rec = await this.getRecord(handle);
let rec = await this.getRecord(handle);

// rehydrate failed (port-forward is pod-local); route via in-cluster Service instead.
if (!rec && this.previewUrlPattern && this.stateStore) {
Expand All @@ -516,10 +516,31 @@
if (row && token) {
const adoptedName = state?.adoptedSandboxName ?? handle;
const daemonUrl = `http://${adoptedName}.${this.namespace}.svc.cluster.local:${DAEMON_CONTAINER_PORT}`;
return proxyDaemonRequest(daemonUrl, token, path, init);
try {
const resp = await proxyDaemonRequest(daemonUrl, token, path, init);
if (resp.status !== 404) {
return resp;
}
try {
await resp.body?.cancel();
} catch {
/* ignore */
}
} catch {
// Stale adoptedSandboxName after operator eviction — fall through
// to resurrection, same as `proxyPreviewRequest`.
}
}
}

// Preview traffic can resurrect autonomously (gateway fetch retry) while
// daemon/git/exec paths still hit a cold records map. `exec` already
// routes through `requireRecord`; mirror that here so Save-changes/git
// APIs don't 410 while the iframe preview is live.
if (!rec) {
rec = await this.resurrectByHandle(handle);
}

if (!rec) {
return new Response(JSON.stringify({ error: "sandbox not found" }), {
status: 404,
Expand All @@ -529,6 +550,7 @@
let activeRec = rec;
const start = performance.now();
let status = 0;
const canRetryBody = !(init.body instanceof ReadableStream);
try {
let resp = await proxyDaemonRequest(rec.daemonUrl, rec.token, path, init);
// A 401 means the cached record is stale — the pool pod was recreated
Expand All @@ -537,9 +559,9 @@
// body is re-sendable: of the BodyInit variants only a ReadableStream is
// one-shot (consumed by the first fetch); strings, buffers,
// URLSearchParams, FormData and Blobs are re-read from memory on retry.
if (resp.status === 401 && !(init.body instanceof ReadableStream)) {
if (resp.status === 401 && canRetryBody) {
this.invalidateRecord(handle);
const fresh = await this.getRecord(handle).catch(() => null);
const fresh = await this.requireRecord(handle).catch(() => null);
if (fresh) {
activeRec = fresh;
resp = await proxyDaemonRequest(
Expand All @@ -552,6 +574,24 @@
}
status = resp.status;
return resp;
} catch (err) {
// Stale port-forward / dead pod after idle eviction — same recovery
// path as preview's fetch-retry arm.
if (!canRetryBody) throw err;
this.invalidateRecord(handle);
const fresh =
(await this.resurrectByHandle(handle)) ??
(await this.getRecord(handle).catch(() => null));
if (!fresh) throw err;
activeRec = fresh;
const resp = await proxyDaemonRequest(
fresh.daemonUrl,
fresh.token,
path,
init,
);
status = resp.status;
return resp;
} finally {
this.recordProxyDuration(
"daemon",
Expand All @@ -562,6 +602,37 @@
}
}

/**
* Repopulate mesh's records cache from a SandboxClaim that already exists
* in the cluster. Preview gateway traffic can keep serving while mesh's
* daemon/git proxy still holds a cold cache or missing state-store row.
*/
async adoptLiveClaim(id: SandboxId, handle: string): Promise<boolean> {
if (this.records.has(handle)) return true;
const existing = await getSandboxClaim(
this.kubeConfig,
this.namespace,
handle,
).catch(() => undefined);
if (!existing || existing.metadata?.deletionTimestamp) return false;

return this.inflight.run(handle, async () => {

Check failure on line 619 in packages/sandbox/server/provider/agent-sandbox/runner.ts

View workflow job for this annotation

GitHub Actions / typecheck

Argument of type '() => Promise<boolean>' is not assignable to parameter of type '() => Promise<Sandbox>'.

Check failure on line 619 in packages/sandbox/server/provider/agent-sandbox/runner.ts

View workflow job for this annotation

GitHub Actions / typecheck

Type 'Sandbox' is not assignable to type 'boolean'.
if (this.records.has(handle)) return true;
const adopted = await this.adopt(id, handle, existing).catch(() => null);
if (!adopted) return false;
await withSandboxLock(this.stateStore, id, RUNNER_KIND, async (ops) => {
await this.finish(
adopted,
ops,
/* persistNow */ true,
/* patchTtl */ true,
"adopt",
);
});
return true;
});
}

/**
* Resolves the HTTP base URL for a sandbox's daemon. Used by the preview
* reverse-proxy at the mesh edge.
Expand Down
7 changes: 7 additions & 0 deletions packages/sandbox/server/provider/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,13 @@ export interface SandboxProvider {
init: ProxyRequestInit,
): Promise<Response>;

/**
* Repopulate in-process routing state from a claim that already exists in
* the cluster (preview gateway traffic can outlive mesh's records cache).
* Optional — only agent-sandbox implements this today.
*/
adoptLiveClaim?(id: SandboxId, handle: string): Promise<boolean>;

/**
* Stream of phase transitions for the pre-Ready lifecycle. Used by mesh's
* unified `/api/vm-events` SSE so the UI can show meaningful progress
Expand Down
Loading