diff --git a/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py
index b280adfa59..4ffa1d5f1d 100644
--- a/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py
@@ -60,6 +60,7 @@
     vm.bootable_time in a cluster of VMs is reported as the cluster boot time.
 """
 
+from collections.abc import Mapping
 import logging
 import os
 import shlex
@@ -67,9 +68,10 @@
 import socket
 import subprocess
 import time
-from typing import List, Tuple
+from typing import Any, List, Tuple
 
 from absl import flags
+import immutabledict
 from perfkitbenchmarker import background_tasks
 from perfkitbenchmarker import configs
 from perfkitbenchmarker import errors
@@ -239,7 +241,10 @@ def Prepare(unused_benchmark_spec):
   pass
 
 
-def GetTimeToBoot(vms):
+def GetTimeToBoot(
+    vms: List[virtual_machine.BaseVirtualMachine],
+    metadata: Mapping[str, Any] = immutabledict.immutabledict(),
+):
   """Creates Samples for the boot time of a list of VMs.
 
   The time to create async return is the time difference from before the VM is
@@ -254,6 +259,7 @@ def GetTimeToBoot(vms):
 
   Args:
     vms: List of BaseVirtualMachine subclasses.
+    metadata: Additional metadata to attach to each sample.
 
   Returns:
     List of Samples containing each of the provisioning metrics listed above,
@@ -281,7 +287,7 @@ def GetTimeToBoot(vms):
     os_types.add(vm.OS_TYPE)
     create_delay_sec = vm.create_start_time - min_create_start_time
     max_create_delay_sec = max(max_create_delay_sec, create_delay_sec)
-    metadata = {
+    metadata = metadata | {
         'machine_instance': i,
         'num_vms': len(vms),
         'os_type': vm.OS_TYPE,
@@ -421,6 +427,8 @@ def GetTimeToBoot(vms):
     )
   if _LINUX_BOOT_METRICS.value or CollectNetworkSamples():
     for vm in vms:
+      assert vm.bootable_time
+      assert vm.bootable_time >= vm.create_start_time
       samples.extend(
           linux_boot.CollectBootSamples(
               vm,
diff --git a/perfkitbenchmarker/linux_benchmarks/provisioning_benchmarks/provision_and_scale_managed_vm_group_benchmark.py b/perfkitbenchmarker/linux_benchmarks/provisioning_benchmarks/provision_and_scale_managed_vm_group_benchmark.py
index 4178007e43..c4428dc4ee 100644
--- a/perfkitbenchmarker/linux_benchmarks/provisioning_benchmarks/provision_and_scale_managed_vm_group_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/provisioning_benchmarks/provision_and_scale_managed_vm_group_benchmark.py
@@ -59,9 +59,14 @@
 
 def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]:
   """Runs the benchmark."""
+  # This needs to be done explicitly because --boot_samples only captures the
+  # VMs that exist at the end of the run phase.
+  samples = cluster_boot_benchmark.GetTimeToBoot(
+      benchmark_spec.vms, metadata={'vm_creation': 'CREATE_GROUP'}
+  )
   if not _SCALE_METHOD.value:
-    # provisioning and boot metrics are reported by the resource framework.
-    return []
+    # Provisioning metrics are reported by the resource framework.
+    return samples
 
   vm_group = benchmark_spec.managed_vm_groups['default']
   old_vm_count = vm_group.vm_count
@@ -85,7 +90,6 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]:
         f'{old_vm_count} to {_NEW_VM_COUNT.value}'
     )
 
-  samples = []
   samples.append(
       sample.Sample(
           metric='scale_to_ready_duration',
@@ -98,9 +102,12 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]:
           },
       )
   )
-  if FLAGS.boot_samples:
-    new_vms = [vm for vm in vm_group.vms if vm.name not in old_vms]
-    samples.extend(cluster_boot_benchmark.GetTimeToBoot(new_vms))
+  new_vms = [vm for vm in vm_group.vms if vm.name not in old_vms]
+  samples.extend(
+      cluster_boot_benchmark.GetTimeToBoot(
+          new_vms, metadata={'vm_creation': 'SCALE_GROUP'}
+      )
+  )
 
   return samples
 
@@ -109,8 +116,7 @@ def GetConfig(user_config: dict[str, Any]) -> dict[str, Any]:
   benchmark_config = configs.LoadConfig(
       BENCHMARK_CONFIG, user_config, BENCHMARK_NAME
   )
-  if FLAGS.boot_samples:
-    cluster_boot_benchmark.ConfigureStartupScript(benchmark_config)
+  cluster_boot_benchmark.ConfigureStartupScript(benchmark_config)
   return benchmark_config