GoogleCloudPlatform · geojaz · Jun 3, 2026 · Jun 23, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/perfkitbenchmarker/benchmark_spec.py b/perfkitbenchmarker/benchmark_spec.py
@@ -65,6 +65,7 @@
 from perfkitbenchmarker.configs import vm_group_decoders
 from perfkitbenchmarker.resources import ai_agent_service
 from perfkitbenchmarker.resources import base_job
+from perfkitbenchmarker.resources import agent_sandbox
 from perfkitbenchmarker.resources import example_resource
 from perfkitbenchmarker.resources import managed_ai_model
 from perfkitbenchmarker.resources.container_service import container_cluster
@@ -202,6 +203,7 @@ def __init__(
     self.base_job = None
     self.edw_service = None
     self.edw_compute_resource = None
+    self.agent_sandbox = None
     self.example_resource = None
     self.multi_attach_disk = None
     self.nfs_service = None
@@ -337,6 +339,7 @@ def ConstructResources(self):
     # Put registry first, as it can be needed by cluster.
     self.ConstructContainerRegistry()
     self.ConstructContainerCluster()
+    self.ConstructAgentSandbox()
     # dpb service needs to go first, because it adds some vms.
     self.ConstructDpbService()
     self.ConstructCluster()
@@ -589,6 +592,19 @@ def ConstructExampleResource(self):
     )  # pytype: disable=not-instantiable
     self.resources.append(self.example_resource)
 
+  def ConstructAgentSandbox(self):
+    """Create the agent_sandbox object (requires a container_cluster)."""
+    if self.config.agent_sandbox is None:
+      return
+    if self.container_cluster is None:
+      raise errors.Config.InvalidValue(
+          'agent_sandbox requires a container_cluster to be configured.')
+    self.agent_sandbox = agent_sandbox.GetAgentSandbox(
+        self.config.agent_sandbox, self.container_cluster
+    )
+    if self.agent_sandbox:
+      self.resources.append(self.agent_sandbox)
+
   def ConstructBaseJob(self):
     """Create an instance of the base job.It is also called from pkb.py."""
     if self.config.base_job is None:
@@ -1057,6 +1073,8 @@ def Provision(self):
 
     if self.container_cluster:
       self.container_cluster.Create()
+    if self.agent_sandbox:
+      self.agent_sandbox.Create()
 
     # do after network setup but before VM created
     if self.nfs_service and self.nfs_service.CLOUD != nfs_service.UNMANAGED:
@@ -1207,6 +1225,8 @@ def Delete(self):
       self.edw_service.Delete()
     if hasattr(self, 'edw_compute_resource') and self.edw_compute_resource:
       self.edw_compute_resource.Delete()
+    if self.agent_sandbox:
+      self.agent_sandbox.Delete()
     if self.example_resource:
       self.example_resource.Delete()
     if self.base_job:

diff --git a/perfkitbenchmarker/configs/benchmark_config_spec.py b/perfkitbenchmarker/configs/benchmark_config_spec.py
@@ -39,6 +39,7 @@
 from perfkitbenchmarker.configs import spec
 from perfkitbenchmarker.configs import vm_group_decoders
 from perfkitbenchmarker.resources import ai_agent_service_spec
+from perfkitbenchmarker.resources import agent_sandbox_spec
 from perfkitbenchmarker.resources import example_resource_spec
 from perfkitbenchmarker.resources import jobs_setter
 from perfkitbenchmarker.resources import managed_ai_model_spec
@@ -1488,6 +1489,10 @@ def _GetOptionDecoderConstructions(cls):
         'tpu_groups': (_TpuGroupsDecoder, {'default': {}}),
         'edw_compute_resource': (_EdwComputeResourceDecoder, {'default': None}),
         'edw_service': (_EdwServiceDecoder, {'default': None}),
+        'agent_sandbox': (
+            agent_sandbox_spec.AgentSandboxConfigDecoder,
+            {'default': None, 'none_ok': True},
+        ),
         'example_resource': (_ExampleResourceDecoder, {'default': None}),
         'base_job': (_BaseJobDecoder, {'default': None}),
         'memory_store': (_MemoryStoreDecoder, {'default': None}),

diff --git a/perfkitbenchmarker/data/agent_sandbox/gvisor-installer/daemonset.yaml b/perfkitbenchmarker/data/agent_sandbox/gvisor-installer/daemonset.yaml
@@ -0,0 +1,65 @@
+# Privileged DaemonSet that runs an init container to install runsc and the
+# containerd-runsc shim onto the host, then sleeps as a pause container.
+#
+# The actual install logic comes from a ConfigMap named gvisor-installer-script
+# (created by install_gvisor() in resources/kubernetes/k8s_agent_sandbox.py from
+# data/agent_sandbox/gvisor-installer/install.sh before this DaemonSet is
+# applied). The ConfigMap key is "install.sh", mounted at /scripts.
+#
+# nodeSelector and tolerations are injected at apply time (see
+# _render_gvisor_daemonset in resources/kubernetes/k8s_agent_sandbox.py) so the
+# DaemonSet targets the sandbox node pool via the pkb_nodepool label.
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: gvisor-installer
+  namespace: kube-system
+  labels:
+    app.kubernetes.io/name: gvisor-installer
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: gvisor-installer
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: gvisor-installer
+    spec:
+      hostPID: true
+      initContainers:
+        - name: install
+          image: docker.io/library/ubuntu:24.04
+          imagePullPolicy: IfNotPresent
+          securityContext:
+            privileged: true
+          env:
+            - name: GVISOR_VERSION
+              # Pinned for benchmarking. Update in lockstep across all envs.
+              # Verify available releases at https://gvisor.dev/docs/user_guide/install/
+              value: "20260511"
+          command: ["/bin/bash", "/scripts/install.sh"]
+          volumeMounts:
+            - name: host
+              mountPath: /host
+            - name: script
+              mountPath: /scripts
+              readOnly: true
+      containers:
+        # Pause container keeps the DaemonSet "Running" after install completes.
+        - name: pause
+          image: registry.k8s.io/pause:3.9
+          resources:
+            requests:
+              cpu: 10m
+              memory: 16Mi
+            limits:
+              cpu: 50m
+              memory: 64Mi
+      volumes:
+        - name: host
+          hostPath:
+            path: /
+        - name: script
+          configMap:
+            name: gvisor-installer-script
+            defaultMode: 0755
diff --git a/perfkitbenchmarker/data/agent_sandbox/gvisor-installer/install.sh b/perfkitbenchmarker/data/agent_sandbox/gvisor-installer/install.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+set -euxo pipefail
+
+: "${GVISOR_VERSION:?must be set}"
+HOST=/host
+ARCH=$(uname -m)
+URL="https://storage.googleapis.com/gvisor/releases/release/${GVISOR_VERSION}/${ARCH}"
+
+apt-get update -qq
+apt-get install -y -qq curl util-linux
+
+NEEDS_RESTART=0
+
+# On COS nodes /usr/local/bin is read-only; binaries live on the writable
+# stateful partition at /home/kubernetes/bin. On all other nodes (Ubuntu,
+# Amazon Linux) /usr/local/bin is writable and already on PATH.
+if [ -d "${HOST}/home/kubernetes" ]; then
+  INSTALL_DIR="${HOST}/home/kubernetes/bin"
+  NEEDS_PATH_DROPIN=1
+else
+  INSTALL_DIR="${HOST}/usr/local/bin"
+  NEEDS_PATH_DROPIN=0
+fi
+mkdir -p "${INSTALL_DIR}"
+
+for bin in runsc containerd-shim-runsc-v1; do
+  TARGET="${INSTALL_DIR}/${bin}"
+  if [ ! -x "${TARGET}" ]; then
+    curl -fsSL "${URL}/${bin}" -o "${TARGET}.new"
+    chmod +x "${TARGET}.new"
+    mv "${TARGET}.new" "${TARGET}"
+    NEEDS_RESTART=1
+  fi
+done
+
+# On COS, /home/kubernetes/bin is not on systemd's default PATH; drop in a
+# unit override for containerd so the shim is found. Not needed on non-COS
+# nodes where /usr/local/bin is already on PATH.
+if [ "${NEEDS_PATH_DROPIN}" -eq 1 ]; then
+  DROPIN_DIR="${HOST}/etc/systemd/system/containerd.service.d"
+  DROPIN="${DROPIN_DIR}/10-runsc-path.conf"
+  mkdir -p "${DROPIN_DIR}"
+  if [ ! -f "${DROPIN}" ]; then
+    cat > "${DROPIN}" <<'EOF'
+[Service]
+Environment="PATH=/home/kubernetes/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+EOF
+    NEEDS_RESTART=1
+  fi
+fi
+
+# Register the runsc runtime with containerd.
+CONFIG="${HOST}/etc/containerd/config.toml"
+if [ ! -f "${CONFIG}" ]; then
+  mkdir -p "$(dirname "${CONFIG}")"
+  nsenter -t 1 -m -u -i -n -p -- containerd config default > "${CONFIG}"
+fi
+if ! grep -q 'io.containerd.runsc.v1' "${CONFIG}"; then
+  # containerd v2+ uses config version 3 where the CRI runtime plugin moved
+  # from io.containerd.grpc.v1.cri to io.containerd.cri.v1.runtime.
+  # Appending to the wrong section is silently ignored, leaving runsc
+  # unconfigured even though the binary is installed.
+  if grep -q 'version = 3' "${CONFIG}"; then
+    CRI_PLUGIN='io.containerd.cri.v1.runtime'
+  else
+    CRI_PLUGIN='io.containerd.grpc.v1.cri'
+  fi
+  cat >>"${CONFIG}" <<TOML
+
+[plugins."${CRI_PLUGIN}".containerd.runtimes.runsc]
+runtime_type = "io.containerd.runsc.v1"
+TOML
+  NEEDS_RESTART=1
+fi
+
+if [ "${NEEDS_RESTART}" -eq 1 ]; then
+  nsenter -t 1 -m -u -i -n -p -- systemctl daemon-reload
+  nsenter -t 1 -m -u -i -n -p -- systemctl restart containerd
+fi
+
+echo "gVisor self-install complete on $(uname -n)."
diff --git a/perfkitbenchmarker/data/agent_sandbox/gvisor-installer/runtimeclass.yaml b/perfkitbenchmarker/data/agent_sandbox/gvisor-installer/runtimeclass.yaml
@@ -0,0 +1,15 @@
+# RuntimeClass that self-managed sandbox pods reference via
+# runtimeClassName: runsc. The handler "runsc" matches the runtime
+# registered in containerd's config by the installer DaemonSet:
+#   [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc]
+#     runtime_type = "io.containerd.runsc.v1"
+#
+# Named "runsc" (not "gvisor") because GKE Standard ships a pre-installed
+# "gvisor" RuntimeClass with handler "gvisor" and addonmanager mode
+# Reconcile, so we can't own that name. Platform-managed scenarios use
+# GKE's "gvisor" RC and don't need this manifest.
+apiVersion: node.k8s.io/v1
+kind: RuntimeClass
+metadata:
+  name: runsc
+handler: runsc
diff --git a/perfkitbenchmarker/data/agent_sandbox/load_runner_job.yaml.j2 b/perfkitbenchmarker/data/agent_sandbox/load_runner_job.yaml.j2
@@ -0,0 +1,55 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: agent-sandbox-load-runner
+  namespace: {{ namespace }}
+spec:
+  backoffLimit: 0
+  ttlSecondsAfterFinished: 3600
+  template:
+    metadata:
+      labels:
+        app: agent-sandbox-load-runner
+    spec:
+      restartPolicy: Never
+      serviceAccountName: agent-sandbox-load-runner
+      containers:
+        - name: runner
+          image: python:3.12-slim
+          command: ["bash", "-c"]
+          args:
+            - |
+              set -euo pipefail
+              mkdir -p /tmp/results
+              pip install --no-cache-dir --quiet kubernetes
+              python /opt/load-runner/load_runner.py \
+                --namespace {{ namespace }} \
+                --template-name {{ template_name }} \
+                --max-concurrent {{ max_concurrent }} \
+                --workload-duration {{ workload_duration }} \
+                {% if qps is not none %}--qps {{ qps }} \
+                {% endif %}{% if total is not none %}--total {{ total }} \
+                {% endif %}{% if duration is not none %}--duration {{ duration }} \
+                {% endif %}{% if ready_timeout is not none %}--ready-timeout {{ ready_timeout }} \
+                {% endif %}--output /tmp/results/run.jsonl
+              echo '---RESULTS---'
+              cat /tmp/results/run.jsonl
+          resources:
+            requests:
+              cpu: "1"
+              memory: "1Gi"
+            limits:
+              cpu: "2"
+              memory: "2Gi"
+          volumeMounts:
+            - name: script
+              mountPath: /opt/load-runner
+              readOnly: true
+            - name: results
+              mountPath: /tmp/results
+      volumes:
+        - name: script
+          configMap:
+            name: agent-sandbox-load-runner-script
+        - name: results
+          emptyDir: {}
diff --git a/perfkitbenchmarker/data/agent_sandbox/load_runner_rbac.yaml.j2 b/perfkitbenchmarker/data/agent_sandbox/load_runner_rbac.yaml.j2
@@ -0,0 +1,41 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: agent-sandbox-load-runner
+  namespace: {{ namespace }}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: agent-sandbox-load-runner
+  namespace: {{ namespace }}
+rules:
+  - apiGroups: ["extensions.agents.x-k8s.io"]
+    resources: ["sandboxclaims"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: ["extensions.agents.x-k8s.io"]
+    resources: ["sandboxtemplates"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: ["agents.x-k8s.io"]
+    resources: ["sandboxes"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["get", "list"]
+  - apiGroups: [""]
+    resources: ["pods/exec"]
+    verbs: ["create"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: agent-sandbox-load-runner
+  namespace: {{ namespace }}
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: agent-sandbox-load-runner
+subjects:
+  - kind: ServiceAccount
+    name: agent-sandbox-load-runner
+    namespace: {{ namespace }}
diff --git a/perfkitbenchmarker/data/agent_sandbox/sandbox-template.yaml.j2 b/perfkitbenchmarker/data/agent_sandbox/sandbox-template.yaml.j2
@@ -0,0 +1,44 @@
+# Reusable blueprint for the sandboxes that SandboxClaim will provision.
+# Pod-shape values come from the sandbox_template config block; the defaults
+# below match the original hardcoded template.
+#
+# The security/placement fields below are REQUIRED by GKE's
+# secure-sandbox-policy ValidatingAdmissionPolicy on Agent-Sandbox-enabled
+# clusters. They are harmless on clusters that do not run the policy.
+apiVersion: extensions.agents.x-k8s.io/v1beta1
+kind: SandboxTemplate
+metadata:
+  name: {{ name }}
+spec:
+  podTemplate:
+    metadata:
+      labels:
+{% for k, v in labels.items() %}        {{ k }}: "{{ v }}"
+{% endfor %}    spec:
+      runtimeClassName: {{ runtime_class }}
+      automountServiceAccountToken: false
+      securityContext:
+        runAsNonRoot: true
+      containers:
+        - name: python-runtime
+          image: {{ image }}
+          ports:
+            - containerPort: 8888
+          readinessProbe:
+            httpGet:
+              path: "/"
+              port: 8888
+            initialDelaySeconds: 0
+            periodSeconds: 1
+          securityContext:
+            capabilities:
+              drop: ["ALL"]
+          resources:
+            requests:
+              cpu: "{{ cpu_request }}"
+              memory: "{{ memory_request }}"
+              ephemeral-storage: "256Mi"
+            limits:
+              cpu: "{{ cpu_limit }}"
+              memory: "{{ memory_limit }}"
+      restartPolicy: "OnFailure"
diff --git a/perfkitbenchmarker/data/agent_sandbox/sandbox-warmpool.yaml.j2 b/perfkitbenchmarker/data/agent_sandbox/sandbox-warmpool.yaml.j2
@@ -0,0 +1,11 @@
+# Pre-warmed pool of sandbox pods. Replenishment latency under contention is
+# one of the primary metrics for the benchmark -- keep replicas identical
+# across envs so claim throughput is the only variable.
+apiVersion: extensions.agents.x-k8s.io/v1beta1
+kind: SandboxWarmPool
+metadata:
+  name: {{ name }}
+spec:
+  replicas: {{ replicas }}
+  sandboxTemplateRef:
+    name: {{ name }}