From c30c6ba885b91824837eb244124abb98084f4b3d Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Fri, 5 Jun 2026 14:11:30 +0000 Subject: [PATCH 01/15] kubernetes_management: add benchmark + base cluster management plane methods kubernetes_management_benchmark.py: - Full implementation of Scenarios A, B, C on top of Zach's skeleton - _CleanStartSweep: guard clause, let it fail (no broad except) - Run(): guard clause for version resolution - All helpers: _RunScenarioA/B/C, _TimedAsync, _RunAsync, _OpSamples - _Results accumulator, _AggregateSamples, _OutlierSamples kubernetes_cluster.py: - AddNodepool: delegates to CreateNodePool for standard clusters - CreateNodePool/DeleteNodePool/UpgradeNodePool/UpdateCluster: sync wrappers calling *Async + WaitForOperation - Abstract methods: CreateNodePoolAsync, DeleteNodePoolAsync, UpgradeNodePoolAsync, UpdateClusterAsync, WaitForOperation, ResolveNodePoolVersions, GetNodePoolNames - BareMinor, AdjacentMinorBelow: version helper functions Tested: - 89 unit tests passing - EKS end-to-end: 99 pools, 100% success all 7 scenarios - pyink + lint-diffs clean --- CHANGES.next.md | 7 + .../kubernetes_management_benchmark.py | 784 +++++++++++- .../container_service/kubernetes_cluster.py | 129 +- .../kubernetes_management_benchmark_test.py | 1102 ++++++++++++++++- 4 files changed, 1997 insertions(+), 25 deletions(-) diff --git a/CHANGES.next.md b/CHANGES.next.md index d537f98b11..6c1461a725 100644 --- a/CHANGES.next.md +++ b/CHANGES.next.md @@ -1,3 +1,10 @@ +### New features: +- Add kubernetes_management benchmark for measuring GKE/EKS/AKS management + plane API responsiveness. (from @ashishsuneja) +- Add KubernetesCluster base class management plane abstract methods: + CreateNodePool, DeleteNodePool, UpgradeNodePool, UpdateCluster and + their async counterparts. (from @ashishsuneja) + ### Breaking changes: - Added --accept_licenses flag. User have to turn this flag on to acknowledge diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index 9f44b52c78..579dd8f068 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -13,16 +13,43 @@ # limitations under the License. """Benchmark for Kubernetes management plane operations. -TODO: Add comments & implement. +Measures GKE/EKS/AKS control-plane API responsiveness via three scenarios: + A. Concurrent node-pool create/upgrade/delete. + B. Node-pool create overlapping with a long-running cluster update. + C. Large-scale node-pool provisioning (single scale or sweep). + +Optimizations for minimum run time: + - Streaming concurrency in Scenario C (no batch barriers) + - Optional pipelined Scenario A (create->upgrade->delete per thread) + - Reduced poll_interval in provider WaitForOperation (5s vs 10s) + - Per-op threads capped at _MAX_CONCURRENT to avoid OS limits + - Accurate delete success rate via attempted_ops denominator """ -from typing import Any +import copy +import dataclasses +import statistics +import threading +import time +from typing import Callable +from absl import flags +from absl import logging +from perfkitbenchmarker import background_tasks from perfkitbenchmarker import benchmark_spec as bm_spec from perfkitbenchmarker import configs +from perfkitbenchmarker import errors from perfkitbenchmarker import sample +from perfkitbenchmarker.configs import benchmark_config_spec +from perfkitbenchmarker.resources.container_service import ( + container as container_lib, +) +from perfkitbenchmarker.resources.container_service import kubectl +from perfkitbenchmarker.resources.container_service import kubernetes_cluster + +_SLEEP_POD_NAME = "pkb-mgmt-sleep" -BENCHMARK_NAME = 'kubernetes_management' +BENCHMARK_NAME = "kubernetes_management" BENCHMARK_CONFIG = """ kubernetes_management: @@ -30,29 +57,764 @@ Benchmarks GKE/EKS/AKS management plane operations: concurrent node pool create/upgrade/delete, overlapping cluster + node-pool ops, and large-scale provisioning. Focused on control-plane API responsiveness. + Spec regions: GCP us-central1, AWS us-east-1 (closest), Azure eastus. + Equivalent machine types across clouds per Google benchmark spec. container_cluster: type: Kubernetes vm_count: 1 + vm_spec: + GCP: + # us-central1-a: spec primary region for GCP + # e2-standard-2: 2 vCPU 8GB — equivalent to t3.medium / D2s_v3 + machine_type: e2-standard-2 + zone: us-central1-a + AWS: + # us-east-1a: closest comparable region to GCP us-central1 + # t3.medium: 2 vCPU 4GB — closest equivalent to e2-standard-2 + machine_type: t3.medium + zone: us-east-1a + Azure: + # eastus: closest comparable region to GCP us-central1 + # Standard_D2s_v3: 2 vCPU 8GB — equivalent to e2-standard-2 + machine_type: Standard_D2s_v3 + zone: eastus """ +_VALID_SCENARIOS = frozenset({"A", "B", "C"}) + +_CONCURRENT_NODEPOOLS = flags.DEFINE_integer( + "k8s_mgmt_concurrent_nodepools", + 5, + "Number of node pools to create/upgrade/delete concurrently in Scenario A.", +) +_LARGE_SCALE_NODEPOOLS = flags.DEFINE_integer( + "k8s_mgmt_large_scale_nodepools", + 1000, + "Number of node pools to provision in the large-scale Scenario C. " + + "Spec target is 1000; ensure VPC/quota is available before running.", +) +_NODES_PER_NODEPOOL = flags.DEFINE_integer( + "k8s_mgmt_nodes_per_nodepool", + 2, + "Number of nodes per node pool. Google spec: 2 nodes per pool.", +) +_INITIAL_VERSION = flags.DEFINE_string( + "k8s_mgmt_initial_version", + None, + "Kubernetes version for newly-created node pools (N-1). None = auto.", +) +_TARGET_VERSION = flags.DEFINE_string( + "k8s_mgmt_target_version", + None, + "Kubernetes version to upgrade node pools to (N). None = cluster version.", +) +_SCENARIOS = flags.DEFINE_list( + "k8s_mgmt_scenarios", + ["A", "B", "C"], + "Comma-separated subset of scenarios to run. Valid values: A, B, C.", +) +_SCALE_SWEEP = flags.DEFINE_list( + "k8s_mgmt_scale_sweep", + [], + "Comma-separated list of node-pool counts for Scenario C scale sweep. " + + "Each scale runs as a separate sub-run with full create/delete cycle. " + + "Example: --k8s_mgmt_scale_sweep=10,50,100,500,1000. " + + "If empty, uses --k8s_mgmt_large_scale_nodepools.", +) +_MAX_CONCURRENT = flags.DEFINE_integer( + "k8s_mgmt_max_concurrent", + 50, + "Cap on concurrent provider API calls within a batch. " + + "Higher = faster but more aggressive on connection pools.", +) +_PIPELINE_SCENARIO_A = flags.DEFINE_boolean( + "k8s_mgmt_pipeline_scenario_a", + True, + "If True, run Scenario A as per-pool pipeline (create->upgrade->delete " + + "back-to-back per thread). Minimizes wall time. " + + "Default False for spec-strict phase-by-phase.", +) + +# AKS caps node-pool names at 12 chars — keep all names within that limit. +_PREFIX = "pkbm" + + +def _ScenarioAName(i): + return f"{_PREFIX}a{i:03d}" + + +_SCENARIO_B_NAME = f"{_PREFIX}b" + + +def _ScenarioCName(i): + return f"{_PREFIX}c{i:04d}" + -def GetConfig(user_config: dict[str, Any]) -> dict[str, Any]: - """Returns the configuration of a benchmark.""" +@dataclasses.dataclass +class _OpResult: + """Holds timing and outcome for a single async management-plane operation.""" + + name: str + init_dur: float + e2e_dur: float + error: Exception | None = None + + def __iter__(self): + yield self.name + yield self.init_dur + yield self.e2e_dur + yield self.error + + +def GetConfig(user_config): return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) -def CheckPrerequisites(benchmark_config: bm_spec.BenchmarkSpec) -> None: - del benchmark_config +def CheckPrerequisites( + benchmark_config: benchmark_config_spec.BenchmarkConfigSpec, +): + """Validates flag values and cluster type before any cloud calls.""" + invalid = [s for s in _SCENARIOS.value if s.strip() not in _VALID_SCENARIOS] + if invalid: + raise errors.Config.InvalidValue( + f"Invalid value(s) for --k8s_mgmt_scenarios: {invalid}. " + + f"Valid options: {sorted(_VALID_SCENARIOS)}." + ) + for s in _SCALE_SWEEP.value: + try: + int(s.strip()) + except ValueError as e: + raise errors.Config.InvalidValue( + f"Non-integer value in --k8s_mgmt_scale_sweep: {s!r}" + ) from e + if benchmark_config.container_cluster.type != "Kubernetes": + raise errors.Config.InvalidValue( + "kubernetes_management benchmark requires a Kubernetes" + + " container cluster." + ) def Prepare(benchmark_spec: bm_spec.BenchmarkSpec) -> None: - del benchmark_spec + """Asserts the cluster is reachable; deploys spec-defined sleep workload.""" + cluster = benchmark_spec.container_cluster + assert isinstance(cluster, kubernetes_cluster.KubernetesCluster) + benchmark_spec.always_call_cleanup = True + logging.info( + "kubernetes_management Prepare: cluster=%s, version=%s", + cluster.name, + cluster.k8s_version, + ) + # Spec workload: "a simple container that sleeps for a given time". + # Confirms data-plane reachability; generates no data-plane load. + kubectl.RunKubectlCommand( + [ + "run", + _SLEEP_POD_NAME, + "--image=busybox", + "--restart=Never", + "--", + "sleep", + "86400", + ], + ) + + +def _CleanStartSweep(cluster: kubernetes_cluster.KubernetesCluster) -> None: + """Deletes any stale pkbm* node pools so each run starts clean (spec C.2).""" + stale = [n for n in cluster.GetNodePoolNames() if n.startswith(_PREFIX)] + if not stale: + logging.info("CleanStart: no stale pools found — clean start confirmed.") + return + logging.info("CleanStart: deleting %d stale pools: %s", len(stale), stale) + background_tasks.RunThreaded(cluster.DeleteNodePool, stale) def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: - del benchmark_spec - return [] + """Runs the selected scenarios and returns flat list of samples.""" + cluster = benchmark_spec.container_cluster + assert isinstance(cluster, kubernetes_cluster.KubernetesCluster) + + # Spec C.2: start clean. + _CleanStartSweep(cluster) + + # Resolve versions once; log clearly; tag every sample. + # Google spec: initial=N-1, target=N (adjacent minor upgrade). + flag_initial = _INITIAL_VERSION.value + flag_target = _TARGET_VERSION.value + if not (flag_initial and flag_target): + resolved_initial, resolved_target = cluster.ResolveNodePoolVersions() + flag_initial = flag_initial or resolved_initial + flag_target = flag_target or resolved_target + initial, target = flag_initial, flag_target + if _INITIAL_VERSION.value and _TARGET_VERSION.value: + source = "flags" + elif not (_INITIAL_VERSION.value or _TARGET_VERSION.value): + source = "auto-resolved" + else: + source = "mixed" + + logging.info( + "NodePool versions (%s): initial=%s -> target=%s " + + "(cluster k8s_version=%s) | nodes_per_pool=%d | machine_type=%s", + source, + initial, + target, + cluster.k8s_version, + _NODES_PER_NODEPOOL.value, + cluster.default_nodepool.machine_type + if hasattr(cluster, "default_nodepool") + else "unknown", + ) + + scenarios = {s.strip().upper() for s in _SCENARIOS.value} + samples: list[sample.Sample] = [] + + if "A" in scenarios: + samples += _RunScenarioA(cluster, initial, target) + if "B" in scenarios: + samples += _RunScenarioB(cluster, initial) + if "C" in scenarios: + # fix: Scenario A/B pools may still be in Deleting state and count + # toward AKS's 100-pool cluster limit. Sweep them out before Scenario C + # so we don't hit MaxAgentPoolCountReached mid-run. + _CleanStartSweep(cluster) + scales = ( + [int(x.strip()) for x in _SCALE_SWEEP.value] + if _SCALE_SWEEP.value + else [_LARGE_SCALE_NODEPOOLS.value] + ) + logging.info("Scenario C: scale sweep = %s", scales) + for scale in scales: + scenario_c_samples = _RunScenarioC(cluster, initial, scale) + for s in scenario_c_samples: + s.metadata["scenario_c_scale"] = str(scale) + samples += scenario_c_samples + + # Tag all samples with version path and run config for published results. + run_meta = { + "initial_version": str(initial), + "target_version": str(target), + "cluster_k8s_version": str(cluster.k8s_version), + "nodes_per_nodepool": str(_NODES_PER_NODEPOOL.value), + "concurrent_nodepools": str(_CONCURRENT_NODEPOOLS.value), + } + for s in samples: + s.metadata.update(run_meta) + + return samples def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec) -> None: - del benchmark_spec + """Best-effort delete of leftover benchmark node pools and sleep pod.""" + cluster = benchmark_spec.container_cluster + if cluster is None: + return + kubectl.RunKubectlCommand( + ["delete", "pod", _SLEEP_POD_NAME, "--ignore-not-found"], + raise_on_failure=False, + ) + leftover = [n for n in cluster.GetNodePoolNames() if n.startswith(_PREFIX)] + if not leftover: + return + logging.info("Cleanup: deleting %d leftover node pools", len(leftover)) + background_tasks.RunThreaded(cluster.DeleteNodePool, leftover) + + +# --------------------------------------------------------------------------- +# Scenario A +# --------------------------------------------------------------------------- + + +def _RunScenarioA( + cluster: kubernetes_cluster.KubernetesCluster, + initial: str, + target: str, +) -> list[sample.Sample]: + """Concurrent CreateNodePool, UpgradeNodePool, DeleteNodePool.""" + n = _CONCURRENT_NODEPOOLS.value + if _PIPELINE_SCENARIO_A.value: + logging.info( + "Scenario A (pipelined): %d pools, initial=%s, target=%s", + n, + initial, + target, + ) + return _RunScenarioAPipelined(cluster, n, initial, target) + + logging.info( + "Scenario A (phase-by-phase): %d pools, initial=%s, target=%s", + n, + initial, + target, + ) + pool_names = [_ScenarioAName(i) for i in range(n)] + configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] + samples: list[sample.Sample] = [] + + # ── Phase 1: concurrent creates ───────────────────────────────────────── + create_results = _RunAsync( + kickoff=lambda cfg: cluster.CreateNodePoolAsync( + cfg, node_version=initial + ), + wait_fn=cluster.WaitForOperation, + items=configs_, + get_name=lambda cfg: cfg.name, + ) + samples += _OpSamples( + "ScenarioA_Create", create_results, attempted_ops=len(pool_names) + ) + + # ── Phase 2: concurrent upgrades (only successfully created pools) ─────── + created = [r.name for r in create_results if r.error is None] + logging.info( + "Scenario A: %d/%d pools created — proceeding to upgrade", len(created), n + ) + upgrade_results = _RunAsync( + kickoff=lambda name: cluster.UpgradeNodePoolAsync(name, target), + wait_fn=cluster.WaitForOperation, + items=created, + get_name=str, + ) + samples += _OpSamples( + "ScenarioA_Upgrade", upgrade_results, attempted_ops=len(created) + ) + + # # ── Idiomatic Control Plane Synchronization Barrier ────────────────────── + # # Give the GKE control plane a brief window to register the async ops. + # time.sleep(15) + + # # Check if the cluster object has our native upgrade tracking capability. + # if hasattr(cluster, 'HasActiveUpgradeOperations'): + # logging.info('GCP GKE cluster detected; polling via provider API.') + + # while cluster.HasActiveUpgradeOperations(): + # logging.info( + # 'Upgrade operations active; holding delete phase for 30s.') + # time.sleep(30) + + # logging.info( + # 'All upgrade ops completed; flushing API gateway write-locks.') + # time.sleep(10) + # else: + # # Non-GCP providers (Azure AKS / AWS EKS): standard safety pause. + # logging.info( + # 'Non-GCP cluster; proceeding with stabilization pause.') + # time.sleep(5) + + # ── Phase 3: concurrent deletes (live-list to catch EKS rollbacks) ────── + alive = [p for p in cluster.GetNodePoolNames() if p.startswith(f"{_PREFIX}a")] + logging.info( + "Scenario A: %d live pools found for delete (originally %d)", + len(alive), + n, + ) + delete_results = _RunAsync( + kickoff=cluster.DeleteNodePoolAsync, + wait_fn=cluster.WaitForOperation, + items=alive, + get_name=str, + ) + # attempted_ops=n: success rate reflects original request, not just live. + # EKS rolls back timed-out pools silently — without this shows 100%. + samples += _OpSamples("ScenarioA_Delete", delete_results, attempted_ops=n) + return samples + + +def _RunScenarioAPipelined( + cluster: kubernetes_cluster.KubernetesCluster, + n: int, + initial: str, + target: str, +) -> list[sample.Sample]: + """Per-pool pipeline: create->upgrade->delete back-to-back per thread. + + Minimizes wall time: max_i(create_i + upgrade_i + delete_i) vs + max(creates)+max(upgrades)+max(deletes) in phase-by-phase mode. + Trade-off: ops run under mixed-type concurrent load. + """ + pool_names = [_ScenarioAName(i) for i in range(n)] + creates = _Results() + upgrades = _Results() + deletes = _Results() + + def DoPool(pool_name: str): + """Runs timed create/upgrade/delete for one pool.""" + cfg = _MakeNodePoolConfig(cluster, pool_name) + init, e2e, err = _TimedAsync( + lambda: cluster.CreateNodePoolAsync(cfg, node_version=initial), + cluster.WaitForOperation, + ) + creates.add(pool_name, init, e2e, err) + if err is not None: + return + init, e2e, err = _TimedAsync( + lambda: cluster.UpgradeNodePoolAsync(pool_name, target), + cluster.WaitForOperation, + ) + upgrades.add(pool_name, init, e2e, err) + init, e2e, err = _TimedAsync( + lambda: cluster.DeleteNodePoolAsync(pool_name), + cluster.WaitForOperation, + ) + deletes.add(pool_name, init, e2e, err) + + background_tasks.RunThreaded( + DoPool, + pool_names, + max_concurrent_threads=min(n, _MAX_CONCURRENT.value), + ) + samples: list[sample.Sample] = [] + samples += _OpSamples("ScenarioA_Create", creates.entries, attempted_ops=n) + samples += _OpSamples("ScenarioA_Upgrade", upgrades.entries, attempted_ops=n) + samples += _OpSamples("ScenarioA_Delete", deletes.entries, attempted_ops=n) + return samples + + +# --------------------------------------------------------------------------- +# Scenario B +# --------------------------------------------------------------------------- + + +def _RunScenarioB( + cluster: kubernetes_cluster.KubernetesCluster, + initial: str, +) -> list[sample.Sample]: + """CreateNodePool fired concurrently with a long-running cluster update. + + Both ops kick off async on separate threads; initiation + E2E latency + recorded independently. Overlap window = ClusterUpdate E2E latency. + """ + logging.info("Scenario B: overlapping cluster update + node-pool create") + cfg = _MakeNodePoolConfig(cluster, _SCENARIO_B_NAME) + results = _Results() + + def DoClusterUpdate(): + init, e2e, err = _TimedAsync( + cluster.UpdateClusterAsync, cluster.WaitForOperation + ) + results.add("ScenarioB_ClusterUpdate", init, e2e, err) + logging.info( + "Scenario B ClusterUpdate: init=%.2fs e2e=%.2fs ok=%s", + init, + e2e, + err is None, + ) + + def DoCreate(): + init, e2e, err = _TimedAsync( + lambda: cluster.CreateNodePoolAsync(cfg, node_version=initial), + cluster.WaitForOperation, + ) + results.add("ScenarioB_NodePoolCreate", init, e2e, err) + logging.info( + "Scenario B NodePoolCreate: init=%.2fs e2e=%.2fs ok=%s", + init, + e2e, + err is None, + ) + + background_tasks.RunThreaded(lambda fn: fn(), [DoClusterUpdate, DoCreate]) + + samples: list[sample.Sample] = [] + for entry in results.entries: + samples += _OpSamples(entry.name, [entry], attempted_ops=1) + + # Remove test pool (best-effort). + cluster.DeleteNodePool(_SCENARIO_B_NAME) + return samples + + +# --------------------------------------------------------------------------- +# Scenario C +# --------------------------------------------------------------------------- + + +def _RunScenarioC( + cluster: kubernetes_cluster.KubernetesCluster, + initial: str, + scale: int, +) -> list[sample.Sample]: + """Large-scale node-pool provisioning at a given scale. + + Streams all `scale` creates through a single executor capped at + _MAX_CONCURRENT workers — as each op completes the next starts immediately + (no batch barriers). Delete uses a live-list so EKS-rolled-back pools are + excluded from the denominator correctly. + """ + logging.info( + "Scenario C: scale=%d, max_concurrent=%d, initial_version=%s", + scale, + _MAX_CONCURRENT.value, + initial, + ) + pool_names = [_ScenarioCName(i) for i in range(scale)] + configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] + samples: list[sample.Sample] = [] + + # ── Creates ────────────────────────────────────────────────────────────── + create_results = _RunAsync( + kickoff=lambda cfg: cluster.CreateNodePoolAsync( + cfg, node_version=initial + ), + wait_fn=cluster.WaitForOperation, + items=configs_, + get_name=lambda cfg: cfg.name, + ) + created_ok = sum(1 for r in create_results if r.error is None) + logging.info( + "Scenario C scale=%d: %d/%d creates succeeded", scale, created_ok, scale + ) + samples += _OpSamples("ScenarioC_Create", create_results, attempted_ops=scale) + + # ── Deletes (live-list) ────────────────────────────────────────────────── + alive = [p for p in cluster.GetNodePoolNames() if p.startswith(f"{_PREFIX}c")] + logging.info( + "Scenario C scale=%d: %d live pools for delete (originally %d;" + + " %d rolled back by cloud)", + scale, + len(alive), + scale, + scale - len(alive), + ) + if not alive: + logging.info("Scenario C scale=%d: all creates rolled back.", scale) + samples += _OpSamples("ScenarioC_Delete", [], attempted_ops=scale) + return samples + + delete_results = _RunAsync( + kickoff=cluster.DeleteNodePoolAsync, + wait_fn=cluster.WaitForOperation, + items=alive, + get_name=str, + ) + # attempted_ops=scale: accurate rate against original request count. + samples += _OpSamples("ScenarioC_Delete", delete_results, attempted_ops=scale) + return samples + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +class _Results: + """Thread-safe collector for (name, init_latency, e2e_latency, error).""" + + def __init__(self): + self._lock = threading.Lock() + self.entries: list[_OpResult] = [] + + def add( + self, name: str, init_dur: float, e2e_dur: float, err: Exception | None + ) -> None: + result = _OpResult(name, init_dur, e2e_dur, err) + with self._lock: + self.entries.append(result) + + +def _TimedAsync( + kickoff: Callable[[], str], + wait_fn: Callable[[str], None], +) -> tuple[float, float, Exception | None]: + """Runs kickoff() then wait_fn(handle); returns (init_lat, e2e_lat, err). + + init_lat = time for kickoff() to return (API accepted). + e2e_lat = total wall time including wait. On kickoff failure both are set + to elapsed time at failure point. + """ + init_start = time.monotonic() + try: + handle = kickoff() + except Exception as exc: # pylint: disable=broad-except + elapsed = time.monotonic() - init_start + return elapsed, elapsed, exc + init_dur = time.monotonic() - init_start + try: + wait_fn(handle) + return init_dur, time.monotonic() - init_start, None + except Exception as exc: # pylint: disable=broad-except + return init_dur, time.monotonic() - init_start, exc + + +def _RunAsync( + kickoff: Callable, + wait_fn: Callable[[str], None], + items: list, + get_name: Callable[[object], str], +) -> list[tuple[str, float, float, Exception | None]]: + """Fires kickoff(item) concurrently for all items; returns timed results. + + Uses background_tasks.RunThreaded with a concurrency cap for streaming + execution — completed ops free their slot immediately for the next one. + """ + if not items: + return [] + results = _Results() + cap = min(len(items), _MAX_CONCURRENT.value) + + def DoWrap(item): + init_dur, e2e_dur, err = _TimedAsync(lambda: kickoff(item), wait_fn) + name = get_name(item) + results.add(name, init_dur, e2e_dur, err) + logging.info( + "%s ok=%s initiation=%.2fs end_to_end=%.2fs", + name, + err is None, + init_dur, + e2e_dur, + ) + + background_tasks.RunThreaded(DoWrap, items, max_concurrent_threads=cap) + return results.entries + + +def _MakeNodePoolConfig( + cluster: kubernetes_cluster.KubernetesCluster, + name: str, +) -> container_lib.BaseNodePoolConfig: + """Builds a node-pool config from the cluster's default pool.""" + cfg = copy.copy(cluster.default_nodepool) + cfg.name = name + cfg.num_nodes = _NODES_PER_NODEPOOL.value + cfg.min_nodes = _NODES_PER_NODEPOOL.value + cfg.max_nodes = _NODES_PER_NODEPOOL.value + return cfg + + +def _OpSamples( + metric_prefix: str, + results: list[_OpResult], + attempted_ops: int | None = None, +) -> list[sample.Sample]: + """Per-op + aggregate samples for initiation and end-to-end latency. + + Args: + metric_prefix: prefix for all metric names. + results: list of (operation_name, init_lat, e2e_lat, err). + attempted_ops: total ops originally requested. Used as the denominator + for SuccessRate so EKS-rolled-back pools (which never + appear in results) are counted as failures, not ignored. + If None, len(results) is used (original behavior). + """ + samples: list[sample.Sample] = [] + init_latencies: list[float] = [] + e2e_latencies: list[float] = [] + success = 0 + + for r in results: + if isinstance(r, tuple): + r = _OpResult(*r) + meta = {"operation_name": r.name, "success": str(r.error is None)} + if r.error is not None: + meta["error"] = str(r.error)[:200] + else: + success += 1 + init_latencies.append(r.init_dur) + e2e_latencies.append(r.e2e_dur) + samples.append( + sample.Sample( + f"{metric_prefix}_InitiationLatency", + r.init_dur, + "seconds", + dict(meta), + ) + ) + samples.append( + sample.Sample( + f"{metric_prefix}_EndToEndLatency", r.e2e_dur, "seconds", dict(meta) + ) + ) + + # ── Success rate ───────────────────────────────────────────────────────── + total = attempted_ops if attempted_ops is not None else len(results) + executed = len(results) + if total > 0: + samples.append( + sample.Sample( + f"{metric_prefix}_SuccessRate", + 100.0 * success / total, + "percent", + { + "total_ops": str(total), + "executed_ops": str(executed), + "successful_ops": str(success), + "skipped_ops": str(total - executed), + }, + ) + ) + + # ── Aggregate stats (successful ops only) ──────────────────────────────── + for phase_label, latencies in ( + ("InitiationLatency", init_latencies), + ("EndToEndLatency", e2e_latencies), + ): + if len(latencies) >= 2: + samples += _AggregateSamples(metric_prefix, phase_label, latencies) + if len(latencies) >= 4: + samples += _OutlierSamples(metric_prefix, phase_label, latencies) + + return samples + + +def _AggregateSamples( + metric_prefix: str, phase_label: str, latencies: list[float] +) -> list[sample.Sample]: + """Emits Mean/StdDev/Min/Median/P90/P99/Max samples for a latency series.""" + n = len(latencies) + meta = {"sample_count": str(n)} + + # statistics.quantiles with method='inclusive' matches linear interpolation + # and returns n-1 cut points; index 89→P90, 98→P99. + quantiles = statistics.quantiles(latencies, n=100, method="inclusive") + + stats = [ + ("Mean", statistics.mean(latencies)), + ("StdDev", statistics.pstdev(latencies)), + ("Min", min(latencies)), + ("Median", statistics.median(latencies)), + ("P90", quantiles[89]), + ("P99", quantiles[98]), + ("Max", max(latencies)), + ] + result = [] + for label, value in stats: + result.append( + sample.Sample( + f"{metric_prefix}_{phase_label}_{label}", + value, + "seconds", + dict(meta), + ) + ) + return result + + +def _OutlierSamples( + metric_prefix: str, phase_label: str, latencies: list[float] +) -> list[sample.Sample]: + """Emits a single OutlierCount sample using IQR-fence outlier detection.""" + # statistics.quantiles(n=4) returns [Q1, Q2, Q3]; indices 0 and 2. + quartiles = statistics.quantiles(latencies, n=4, method="inclusive") + q1, q3 = quartiles[0], quartiles[2] + iqr = q3 - q1 + lower_fence = q1 - 1.5 * iqr + upper_fence = q3 + 1.5 * iqr + outlier_count = sum( + 1 for v in latencies if v < lower_fence or v > upper_fence + ) + meta = { + "q1": str(q1), + "q3": str(q3), + "iqr": str(iqr), + "upper_fence": str(upper_fence), + "lower_fence": str(lower_fence), + "sample_count": str(len(latencies)), + } + return [ + sample.Sample( + f"{metric_prefix}_{phase_label}_OutlierCount", + outlier_count, + "count", + meta, + ) + ] diff --git a/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py b/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py index 9e20f57583..4cc386cd5c 100644 --- a/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py +++ b/perfkitbenchmarker/resources/container_service/kubernetes_cluster.py @@ -1,5 +1,6 @@ """Classes related to KubernetesCluster.""" +import abc import functools import json import logging @@ -10,7 +11,9 @@ from perfkitbenchmarker import vm_util from perfkitbenchmarker.configs import container_spec as container_spec_lib from perfkitbenchmarker.resources import kubernetes_inference_server -from perfkitbenchmarker.resources.container_service import container as container_lib +from perfkitbenchmarker.resources.container_service import ( + container as container_lib, +) from perfkitbenchmarker.resources.container_service import container_cluster from perfkitbenchmarker.resources.container_service import kubectl from perfkitbenchmarker.resources.container_service import kubernetes @@ -297,9 +300,127 @@ def _GetAddressFromIngress(self, ingress_out: str): ) return 'http://' + ip.strip() - def AddNodepool(self, batch_name: str, pool_id: str): - """Adds an additional nodepool with the given name to the cluster.""" - pass + def AddNodepool(self, batch_name: str, pool_id: str) -> None: + """Adds a node pool; delegates to CreateNodePool for standard clusters. + + Karpenter-based subclasses override this to apply a manifest instead. + """ + nodepool_config = container_lib.BaseNodePoolConfig( + self.nodepools[container_cluster.DEFAULT_NODEPOOL].vm_spec, + name=f'{batch_name}-{pool_id}', + ) + self.CreateNodePool(nodepool_config) + + def CreateNodePool( + self, + nodepool_config: container_lib.BaseNodePoolConfig, + node_version: str | None = None, + ) -> None: + """Creates a single named node pool on the cluster (blocks until ready). + + Args: + nodepool_config: Node pool definition (name, machine type, node count). + node_version: Optional Kubernetes version to pin the node pool to. None + means use the cluster default. + """ + raise NotImplementedError + + def DeleteNodePool(self, name: str) -> None: + """Deletes the named node pool (blocks until removed).""" + raise NotImplementedError + + def UpgradeNodePool(self, name: str, target_version: str) -> None: + """Upgrades the named node pool to the given Kubernetes version.""" + raise NotImplementedError + + def UpdateCluster(self) -> None: + """Performs a lightweight cluster-level update operation (blocks). + + Intended for management-plane benchmarks that need to overlap a real + cluster-level operation with a node-pool operation. The implementation + should issue a control-plane mutation (so an actual operation runs) that + is non-destructive and idempotent across repeated invocations. + """ + raise NotImplementedError + + def CreateNodePoolAsync( + self, + nodepool_config: container_lib.BaseNodePoolConfig, + node_version: str | None = None, + ) -> str: + """Initiates node-pool create; returns opaque op handle. Does NOT wait.""" + raise NotImplementedError + + def UpgradeNodePoolAsync(self, name: str, target_version: str) -> str: + """Initiates node-pool upgrade; returns opaque op handle. Does NOT wait.""" + raise NotImplementedError + + def DeleteNodePoolAsync(self, name: str) -> str: + """Initiates node-pool delete; returns opaque op handle. Does NOT wait.""" + raise NotImplementedError + + def UpdateClusterAsync(self) -> str: + """Initiates cluster-level update. Returns op handle; does NOT wait.""" + raise NotImplementedError + + @abc.abstractmethod + def GetNodePoolNames(self) -> list[str]: + """Returns the names of all node pools currently in the cluster. + + Used by the kubernetes_management benchmark to: + - Sweep stale pkbm* pools before each run (clean-start spec requirement) + - Re-list live pools after creates before deleting (avoids stale names) + """ + + def WaitForOperation(self, op_handle: str) -> None: + """Blocks until the operation identified by op_handle completes. + + Args: + op_handle: provider-specific opaque string from one of the *Async + methods above. + + Raises: + errors.Resource.RetryableCreationError or similar on timeout/failure. + """ + raise NotImplementedError + + def ResolveNodePoolVersions(self) -> tuple[str, str]: + """Returns (initial, target) K8s versions per benchmark spec. + + Spec contract: + target = cluster's current K8s version (the latest available) + initial = the adjacent minor below target (e.g., target=1.35 -> 1.34) + Default implementation returns bare-minor strings ("1.34", "1.35") which + EKS and AKS accept directly. Providers requiring fully-qualified versions + (notably GKE) must override. + """ + target = BareMinor(self.k8s_version) + initial = AdjacentMinorBelow(self.k8s_version) + return initial, target + + +def BareMinor(version: str) -> str: + """Returns the 'major.minor' part of a K8s version string. + + Accepts and normalizes formats like 'v1.35.4', '1.35.4-gke.1234', '1.35'. + """ + if version.startswith('v'): + version = version[1:] + bare = version.split('-', 1)[0] + parts = bare.split('.') + if len(parts) < 2 or not parts[0].isdigit() or not parts[1].isdigit(): + raise ValueError(f'Cannot parse K8s version: {version!r}') + return f'{parts[0]}.{parts[1]}' + + +def AdjacentMinorBelow(version: str) -> str: + """Returns the bare minor one below the given version: '1.35.4' -> '1.34'.""" + bare = BareMinor(version) + major_s, minor_s = bare.split('.') + minor = int(minor_s) + if minor <= 0: + raise ValueError(f'No adjacent minor below {version!r}') + return f'{major_s}.{minor - 1}' def _DeleteAllFromDefaultNamespace(): diff --git a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py index a58ad03497..c24c8477d6 100644 --- a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py +++ b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py @@ -11,28 +11,1110 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Tests for linux_benchmarks.kubernetes_management_benchmark.""" +import threading +import time import unittest + +# pylint: disable=invalid-name,protected-access from unittest import mock -from perfkitbenchmarker import benchmark_spec as bm_spec_lib +from absl import flags +from absl.testing import flagsaver +from perfkitbenchmarker import errors +from perfkitbenchmarker import sample from perfkitbenchmarker.linux_benchmarks import kubernetes_management_benchmark +from perfkitbenchmarker.resources.container_service import kubernetes_cluster from tests import pkb_common_test_case +FLAGS = flags.FLAGS + +_CLUSTER_NAME = 'test-cluster' + -class KubernetesManagementBenchmarkTestCase( - pkb_common_test_case.PkbCommonTestCase +def _make_sample(metric, value, unit='seconds', metadata=None): + return sample.Sample(metric, value, unit, metadata or {}) + + +def _make_mock_cluster( + name=_CLUSTER_NAME, + k8s_version='1.34', + pool_names=None, ): + """Creates a fully-stubbed KubernetesCluster mock for use in tests.""" + cluster = mock.create_autospec( + kubernetes_cluster.KubernetesCluster, instance=True + ) + cluster.name = name + cluster.k8s_version = k8s_version + cluster.cluster_version = k8s_version + cluster.GetNodePoolNames.return_value = pool_names or [] + cluster.ResolveNodePoolVersions.return_value = ('1.33', '1.34') + cluster.CreateNodePoolAsync.return_value = 'op-create-1' + cluster.UpgradeNodePoolAsync.return_value = 'op-upgrade-1' + cluster.DeleteNodePoolAsync.return_value = 'op-delete-1' + cluster.UpdateClusterAsync.return_value = 'op-update-1' + cluster.WaitForOperation.return_value = None + default_np = mock.MagicMock() + default_np.machine_type = 'e2-standard-2' + default_np.num_nodes = 1 + default_np.min_nodes = 1 + default_np.max_nodes = 1 + default_np.zone = 'us-central1-a' + default_np.disk_size = 100 + default_np.name = 'default-pool' + cluster.default_nodepool = default_np + return cluster + + +def _make_mock_benchmark_spec(cluster=None): + spec = mock.MagicMock() + spec.container_cluster = cluster or _make_mock_cluster() + return spec + + +def _make_mock_config(cluster_type='Kubernetes'): + cfg = mock.MagicMock() + cfg.container_cluster.type = cluster_type + return cfg + + +class ScenarioNameTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for _SCENARIO_A_NAME, _SCENARIO_B_NAME, _SCENARIO_C_NAME.""" + + def testScenarioANameZeroPadsToThreeDigits(self): + self.assertEqual( + 'pkbma000', + kubernetes_management_benchmark._ScenarioAName(0), + ) + + def testScenarioANameTwoDigitIndex(self): + self.assertEqual( + 'pkbma042', + kubernetes_management_benchmark._ScenarioAName(42), + ) + + def testScenarioANameMaxThreeDigits(self): + self.assertEqual( + 'pkbma999', + kubernetes_management_benchmark._ScenarioAName(999), + ) + + def testScenarioBNameIsConstant(self): + self.assertEqual( + 'pkbmb', + kubernetes_management_benchmark._SCENARIO_B_NAME, + ) + + def testScenarioCNameZeroPadsToFourDigits(self): + self.assertEqual( + 'pkbmc0000', + kubernetes_management_benchmark._ScenarioCName(0), + ) + + def testScenarioCNameSingleDigitIndex(self): + self.assertEqual( + 'pkbmc0007', + kubernetes_management_benchmark._ScenarioCName(7), + ) + + def testScenarioCNameFourDigitIndex(self): + self.assertEqual( + 'pkbmc1000', + kubernetes_management_benchmark._ScenarioCName(1000), + ) + + def testAllNamesWithinAksLimit(self): + for i in range(1000): + self.assertLessEqual( + len(kubernetes_management_benchmark._ScenarioAName(i)), 12 + ) + for i in range(10000): + self.assertLessEqual( + len(kubernetes_management_benchmark._ScenarioCName(i)), 12 + ) + self.assertLessEqual( + len(kubernetes_management_benchmark._SCENARIO_B_NAME), 12 + ) + + +class CheckPrerequisitesTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the CheckPrerequisites validation function.""" + + def testValidScenariosPass(self): + with flagsaver.flagsaver(k8s_mgmt_scenarios=['A', 'B', 'C']): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + + def testInvalidScenarioRaises(self): + with flagsaver.flagsaver(k8s_mgmt_scenarios=['X']): + with self.assertRaises(errors.Config.InvalidValue): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + + def testMixedValidInvalidRaises(self): + with flagsaver.flagsaver(k8s_mgmt_scenarios=['A', 'Z']): + with self.assertRaises(errors.Config.InvalidValue): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + + def testNonKubernetesClusterTypeRaises(self): + with flagsaver.flagsaver(k8s_mgmt_scenarios=['A']): + with self.assertRaises(errors.Config.InvalidValue): + kubernetes_management_benchmark.CheckPrerequisites( + _make_mock_config(cluster_type='Mesos') + ) + + def testInvalidScaleSweepRaises(self): + with flagsaver.flagsaver( + k8s_mgmt_scenarios=['C'], k8s_mgmt_scale_sweep=['10', 'abc'] + ): + with self.assertRaises(errors.Config.InvalidValue): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + + def testValidScaleSweepPasses(self): + with flagsaver.flagsaver( + k8s_mgmt_scenarios=['C'], k8s_mgmt_scale_sweep=['10', '50', '100'] + ): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + + def testLowercaseScenarioRaises(self): + with flagsaver.flagsaver(k8s_mgmt_scenarios=['a']): + with self.assertRaises(errors.Config.InvalidValue): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + + +class PrepareTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the Prepare benchmark lifecycle function.""" + + def _patch_kubectl(self, rc=0): + return mock.patch( + 'perfkitbenchmarker.resources.container_service.kubectl' + + '.RunKubectlCommand', + return_value=('', '', rc), + ) + + def testPrepareRunsKubectlSleepPod(self): + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with self._patch_kubectl() as mock_kubectl: + kubernetes_management_benchmark.Prepare(bm_spec) + mock_kubectl.assert_called_once() + args = mock_kubectl.call_args[0][0] + self.assertIn('run', args) + self.assertIn('pkb-mgmt-sleep', args) + self.assertIn('sleep', args) + + def testPrepareSetsAlwaysCallCleanup(self): + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with self._patch_kubectl(): + kubernetes_management_benchmark.Prepare(bm_spec) + self.assertTrue(bm_spec.always_call_cleanup) + + def testPrepareToleratesKubectlNonZeroReturn(self): + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with self._patch_kubectl(rc=1): + kubernetes_management_benchmark.Prepare(bm_spec) + + +class CleanupTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the Cleanup benchmark lifecycle function.""" + + def _patch_kubectl(self): + return mock.patch( + 'perfkitbenchmarker.resources.container_service.kubectl' + + '.RunKubectlCommand', + return_value=('', '', 0), + ) + + def testCleanupDeletesSleepPod(self): + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with self._patch_kubectl() as mock_kubectl: + kubernetes_management_benchmark.Cleanup(bm_spec) + delete_calls = [ + str(c) + for c in mock_kubectl.call_args_list + if 'pkb-mgmt-sleep' in str(c) + ] + self.assertNotEmpty(delete_calls) + + def testCleanupDeletesAllPkbmPrefixedPools(self): + cluster = _make_mock_cluster( + pool_names=['pkbma000', 'default-pool', 'pkbmc0001'] + ) + bm_spec = _make_mock_benchmark_spec(cluster) + with self._patch_kubectl(): + kubernetes_management_benchmark.Cleanup(bm_spec) + deleted = {c.args[0] for c in cluster.DeleteNodePool.call_args_list} + self.assertIn('pkbma000', deleted) + self.assertIn('pkbmc0001', deleted) + self.assertNotIn('default-pool', deleted) + + def testCleanupSkipsDeleteWhenNoLeftoverPools(self): + cluster = _make_mock_cluster(pool_names=['default-pool']) + bm_spec = _make_mock_benchmark_spec(cluster) + with self._patch_kubectl(): + kubernetes_management_benchmark.Cleanup(bm_spec) + cluster.DeleteNodePool.assert_not_called() + + def testCleanupHandlesNoneCluster(self): + bm_spec = _make_mock_benchmark_spec() + bm_spec.container_cluster = None + kubernetes_management_benchmark.Cleanup(bm_spec) + + +class CleanStartSweepTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _CleanStartSweep helper function.""" + + def testDeletesStalePkbmPools(self): + cluster = _make_mock_cluster( + pool_names=['pkbma000', 'pkbmc0001', 'user-pool'] + ) + kubernetes_management_benchmark._CleanStartSweep(cluster) + deleted = {c.args[0] for c in cluster.DeleteNodePool.call_args_list} + self.assertIn('pkbma000', deleted) + self.assertIn('pkbmc0001', deleted) + self.assertNotIn('user-pool', deleted) + + def testDoesNothingWhenNoPkbmPools(self): + cluster = _make_mock_cluster(pool_names=['user-pool', 'default-pool']) + kubernetes_management_benchmark._CleanStartSweep(cluster) + cluster.DeleteNodePool.assert_not_called() + + def testCleanStartSweepRaisesOnGetNodePoolNamesException(self): + cluster = _make_mock_cluster() + cluster.GetNodePoolNames.side_effect = RuntimeError('API error') + with self.assertRaises(RuntimeError): + kubernetes_management_benchmark._CleanStartSweep(cluster) + + +class ResultsTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _Results result-accumulator helper.""" + + def testAddSingleEntry(self): + r = kubernetes_management_benchmark._Results() + r.add('op1', 0.1, 1.0, None) + self.assertLen(r.entries, 1) + name, init, e2e, err = r.entries[0] + self.assertEqual('op1', name) + self.assertAlmostEqual(0.1, init, places=5) + self.assertAlmostEqual(1.0, e2e, places=5) + self.assertIsNone(err) + + def testAddMultipleEntries(self): + r = kubernetes_management_benchmark._Results() + r.add('op1', 0.1, 1.0, None) + r.add('op2', 0.2, 2.0, ValueError('fail')) + self.assertLen(r.entries, 2) + + def testAddIsThreadSafe(self): + """Tests that concurrent add() calls from multiple threads are safe.""" + r = kubernetes_management_benchmark._Results() + n = 100 + + def _add(i): + r.add(f'op{i}', float(i), float(i) * 2, None) + + threads = [threading.Thread(target=_add, args=(i,)) for i in range(n)] + for t in threads: + t.start() + for t in threads: + t.join() + self.assertLen(r.entries, n) + + def testAddPreservesError(self): + r = kubernetes_management_benchmark._Results() + exc = RuntimeError('test error') + r.add('failing-op', 0.5, 0.5, exc) + _, _, _, err = r.entries[0] + self.assertIs(exc, err) + + +class TimedAsyncTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _TimedAsync timing helper.""" + + def testSuccessfulKickoffAndWait(self): + kickoff = mock.Mock(return_value='op-handle') + wait_fn = mock.Mock(return_value=None) + init_lat, e2e_lat, err = kubernetes_management_benchmark._TimedAsync( + kickoff, wait_fn + ) + kickoff.assert_called_once() + wait_fn.assert_called_once_with('op-handle') + self.assertIsNone(err) + self.assertGreaterEqual(init_lat, 0.0) + self.assertGreaterEqual(e2e_lat, init_lat) + + def testKickoffFailureReturnsError(self): + exc = RuntimeError('kickoff failed') + kickoff = mock.Mock(side_effect=exc) + wait_fn = mock.Mock() + init_lat, e2e_lat, err = kubernetes_management_benchmark._TimedAsync( + kickoff, wait_fn + ) + self.assertIs(exc, err) + wait_fn.assert_not_called() + self.assertAlmostEqual(init_lat, e2e_lat, places=2) + + def testWaitFailureReturnsError(self): + exc = RuntimeError('wait failed') + kickoff = mock.Mock(return_value='op-handle') + wait_fn = mock.Mock(side_effect=exc) + _, e2e_lat, err = kubernetes_management_benchmark._TimedAsync( + kickoff, wait_fn + ) + self.assertIs(exc, err) + self.assertGreater(e2e_lat, 0.0) + + def testInitLatencyNotGreaterThanE2eLatency(self): + kickoff = mock.Mock(return_value='handle') + wait_fn = mock.Mock(side_effect=lambda _: time.sleep(0.01)) + init_lat, e2e_lat, err = kubernetes_management_benchmark._TimedAsync( + kickoff, wait_fn + ) + self.assertIsNone(err) + self.assertLessEqual(init_lat, e2e_lat) + + def testHandlePassedToWaitFn(self): + kickoff = mock.Mock(return_value='my-op-handle') + wait_fn = mock.Mock() + kubernetes_management_benchmark._TimedAsync(kickoff, wait_fn) + wait_fn.assert_called_once_with('my-op-handle') + + +class RunAsyncTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _RunAsync concurrent execution helper.""" + + def testEmptyItemsReturnsEmptyList(self): + results = kubernetes_management_benchmark._RunAsync( + kickoff=mock.Mock(), + wait_fn=mock.Mock(), + items=[], + get_name=str, + ) + self.assertEmpty(results) + + @flagsaver.flagsaver(k8s_mgmt_max_concurrent=50) + def testReturnsOneResultPerItem(self): + kickoff = mock.Mock(return_value='op-handle') + wait_fn = mock.Mock(return_value=None) + results = kubernetes_management_benchmark._RunAsync( + kickoff=kickoff, wait_fn=wait_fn, items=['a', 'b', 'c'], get_name=str + ) + self.assertLen(results, 3) + self.assertEqual({'a', 'b', 'c'}, {name for name, _, _, _ in results}) + + @flagsaver.flagsaver(k8s_mgmt_max_concurrent=50) + def testKickoffErrorCapturedInResults(self): + kickoff = mock.Mock(side_effect=RuntimeError('kaboom')) + results = kubernetes_management_benchmark._RunAsync( + kickoff=kickoff, wait_fn=mock.Mock(), items=['x'], get_name=str + ) + self.assertLen(results, 1) + _, _, _, err = results[0] + self.assertIsNotNone(err) + + @flagsaver.flagsaver(k8s_mgmt_max_concurrent=2) + def testConcurrencyCapDoesNotDropItems(self): + results = kubernetes_management_benchmark._RunAsync( + kickoff=mock.Mock(return_value='op'), + wait_fn=mock.Mock(return_value=None), + items=list(range(5)), + get_name=str, + ) + self.assertLen(results, 5) + + @flagsaver.flagsaver(k8s_mgmt_max_concurrent=50) + def testGetNameCallableApplied(self): + cfg = mock.MagicMock() + cfg.name = 'poolname' + results = kubernetes_management_benchmark._RunAsync( + kickoff=mock.Mock(return_value='h'), + wait_fn=mock.Mock(), + items=[cfg], + get_name=lambda c: c.name, + ) + name, _, _, _ = results[0] + self.assertEqual('poolname', name) + + +class MakeNodePoolConfigTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _MakeNodePoolConfig factory.""" + + @flagsaver.flagsaver(k8s_mgmt_nodes_per_nodepool=3) + def testNameIsSet(self): + cluster = _make_mock_cluster() + cfg = kubernetes_management_benchmark._MakeNodePoolConfig(cluster, 'mypool') + self.assertEqual('mypool', cfg.name) + + @flagsaver.flagsaver(k8s_mgmt_nodes_per_nodepool=3) + def testNumNodesComesFromFlag(self): + cluster = _make_mock_cluster() + cfg = kubernetes_management_benchmark._MakeNodePoolConfig(cluster, 'p') + self.assertEqual(3, cfg.num_nodes) + self.assertEqual(3, cfg.min_nodes) + self.assertEqual(3, cfg.max_nodes) + + @flagsaver.flagsaver(k8s_mgmt_nodes_per_nodepool=1) + def testDoesNotMutateDefaultNodepool(self): + cluster = _make_mock_cluster() + original_name = cluster.default_nodepool.name + kubernetes_management_benchmark._MakeNodePoolConfig(cluster, 'newname') + self.assertEqual(original_name, cluster.default_nodepool.name) + - def setUp(self): - super().setUp() - self.bm_spec = mock.create_autospec( - bm_spec_lib.BenchmarkSpec, instance=True +class OpSamplesTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _OpSamples sample-generation helper.""" + + def testEmptyResultsYieldsSuccessRateOfZero(self): + samples = kubernetes_management_benchmark._OpSamples( + 'PrefixOp', [], attempted_ops=5 + ) + rate = next(s for s in samples if s.metric == 'PrefixOp_SuccessRate') + self.assertEqual(0.0, rate.value) + + def testPerOpInitiationAndE2eSamplesGenerated(self): + results = [('op1', 0.1, 1.0, None), ('op2', 0.2, 2.0, None)] + samples = kubernetes_management_benchmark._OpSamples( + 'MyOp', results, attempted_ops=2 + ) + metrics = [s.metric for s in samples] + self.assertIn('MyOp_InitiationLatency', metrics) + self.assertIn('MyOp_EndToEndLatency', metrics) + + def testSuccessRateHundredPercentWhenAllSucceed(self): + results = [('op1', 1.0, 2.0, None), ('op2', 0.5, 1.5, None)] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=2 + ) + rate = next(s for s in samples if s.metric == 'Op_SuccessRate') + self.assertAlmostEqual(100.0, rate.value) + + def testSuccessRateFiftyPercentWhenHalfFail(self): + results = [ + ('op1', 1.0, 2.0, None), + ('op2', 0.5, 0.5, RuntimeError('fail')), + ] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=2 + ) + rate = next(s for s in samples if s.metric == 'Op_SuccessRate') + self.assertAlmostEqual(50.0, rate.value) + + def testAttemptedOpsExceedingExecutedOpsLowersRate(self): + results = [('op1', 1.0, 2.0, None)] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=3 + ) + rate = next(s for s in samples if s.metric == 'Op_SuccessRate') + self.assertAlmostEqual(100.0 / 3, rate.value, places=3) + + def testSuccessRateMetadataFields(self): + results = [('op1', 1.0, 2.0, None), ('op2', 0.5, 0.5, Exception('err'))] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=3 + ) + rate = next(s for s in samples if s.metric == 'Op_SuccessRate') + self.assertEqual('3', rate.metadata['total_ops']) + self.assertEqual('2', rate.metadata['executed_ops']) + self.assertEqual('1', rate.metadata['successful_ops']) + self.assertEqual('1', rate.metadata['skipped_ops']) + + def testFailedOpIncludesErrorMessage(self): + results = [('fail-op', 0.5, 0.5, RuntimeError('oops'))] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=1 + ) + init_s = next(s for s in samples if s.metric == 'Op_InitiationLatency') + self.assertIn('error', init_s.metadata) + self.assertIn('oops', init_s.metadata['error']) + + def testAggregatesGeneratedForTwoOrMoreSuccesses(self): + results = [(f'op{i}', float(i), float(i) * 2, None) for i in range(1, 4)] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=3 + ) + metrics = [s.metric for s in samples] + self.assertIn('Op_InitiationLatency_Mean', metrics) + self.assertIn('Op_EndToEndLatency_Mean', metrics) + + def testAggregatesNotGeneratedForSingleSuccess(self): + results = [('op1', 1.0, 2.0, None)] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=1 + ) + self.assertNotIn('Op_InitiationLatency_Mean', [s.metric for s in samples]) + + def testOutliersGeneratedForFourOrMoreSuccesses(self): + results = [(f'op{i}', float(i), float(i) * 2, None) for i in range(1, 6)] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=5 + ) + metrics = [s.metric for s in samples] + self.assertIn('Op_InitiationLatency_OutlierCount', metrics) + self.assertIn('Op_EndToEndLatency_OutlierCount', metrics) + + def testOutliersNotGeneratedForThreeOrFewerSuccesses(self): + results = [(f'op{i}', float(i), float(i) * 2, None) for i in range(1, 4)] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=3 + ) + self.assertNotIn( + 'Op_InitiationLatency_OutlierCount', [s.metric for s in samples] + ) + + +class AggregateSamplesTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _AggregateSamples statistics helper.""" + + def testProducesAllExpectedStatMetrics(self): + samples = kubernetes_management_benchmark._AggregateSamples( + 'Pfx', 'InitiationLatency', [1.0, 2.0, 3.0, 4.0, 5.0] + ) + metrics = {s.metric for s in samples} + for label in ('Mean', 'StdDev', 'Min', 'Median', 'P90', 'P99', 'Max'): + self.assertIn(f'Pfx_InitiationLatency_{label}', metrics) + + def testMeanValueCorrect(self): + samples = kubernetes_management_benchmark._AggregateSamples( + 'Op', 'E2E', [1.0, 2.0, 3.0, 4.0, 5.0] + ) + mean_s = next(s for s in samples if 'Mean' in s.metric) + self.assertAlmostEqual(3.0, mean_s.value, places=3) + + def testMinValueCorrect(self): + samples = kubernetes_management_benchmark._AggregateSamples( + 'Op', 'E2E', [10.0, 20.0, 30.0] + ) + min_s = next(s for s in samples if 'Min' in s.metric) + self.assertAlmostEqual(10.0, min_s.value, places=3) + + def testMaxValueCorrect(self): + samples = kubernetes_management_benchmark._AggregateSamples( + 'Op', 'E2E', [10.0, 20.0, 30.0] + ) + max_s = next(s for s in samples if 'Max' in s.metric) + self.assertAlmostEqual(30.0, max_s.value, places=3) + + def testSampleCountInMetadata(self): + samples = kubernetes_management_benchmark._AggregateSamples( + 'Op', 'E2E', [1.0, 2.0, 3.0] + ) + for s in samples: + self.assertEqual('3', s.metadata.get('sample_count')) + + def testUnitsAreSeconds(self): + samples = kubernetes_management_benchmark._AggregateSamples( + 'Op', 'E2E', [1.0, 2.0] + ) + for s in samples: + self.assertEqual('seconds', s.unit) + + +class OutlierSamplesTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _OutlierSamples IQR-based outlier detection helper.""" + + def testNoOutliersYieldsZeroCount(self): + samples = kubernetes_management_benchmark._OutlierSamples( + 'Op', 'E2E', [1.0, 1.1, 1.2, 1.3, 1.4, 1.5] + ) + self.assertLen(samples, 1) + self.assertEqual(0, samples[0].value) + + def testClearOutlierDetected(self): + samples = kubernetes_management_benchmark._OutlierSamples( + 'Op', 'E2E', [1.0, 1.0, 1.0, 1.0, 100.0] + ) + self.assertEqual(1, samples[0].value) + + def testMetricNameFormatted(self): + samples = kubernetes_management_benchmark._OutlierSamples( + 'MyPrefix', 'InitiationLatency', [1.0, 2.0, 3.0, 4.0] + ) + self.assertEqual( + 'MyPrefix_InitiationLatency_OutlierCount', samples[0].metric + ) + + def testMetadataContainsFenceFields(self): + """Tests that outlier samples contain fence metadata fields.""" + meta = kubernetes_management_benchmark._OutlierSamples( + 'Op', 'E2E', [1.0, 2.0, 3.0, 4.0, 5.0] + )[0].metadata + for field in ( + 'q1', + 'q3', + 'iqr', + 'upper_fence', + 'lower_fence', + 'sample_count', + ): + self.assertIn(field, meta) + + def testSampleCountInMetadata(self): + samples = kubernetes_management_benchmark._OutlierSamples( + 'Op', 'E2E', [1.0, 2.0, 3.0, 4.0, 5.0] ) + self.assertEqual('5', samples[0].metadata['sample_count']) - def testRun(self): - samples = kubernetes_management_benchmark.Run(self.bm_spec) - self.assertEqual(samples, []) + def testUnitIsCount(self): + samples = kubernetes_management_benchmark._OutlierSamples( + 'Op', 'E2E', [1.0, 2.0, 3.0, 4.0] + ) + self.assertEqual('count', samples[0].unit) + + def testReturnsSingleSample(self): + samples = kubernetes_management_benchmark._OutlierSamples( + 'Op', 'E2E', list(range(1, 11)) + ) + self.assertLen(samples, 1) + + +class RunTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the Run benchmark entry-point function.""" + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['A', 'B', 'C'], + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunCallsCleanStartSweep(self): + """Tests that Run invokes _CleanStartSweep before executing scenarios.""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ) as mock_clean, mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ): + kubernetes_management_benchmark.Run(bm_spec) + self.assertEqual(mock_clean.call_count, 2) + mock_clean.assert_called_with(cluster) + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['A'], + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunOnlyScenarioACallsOnlyA(self): + """Tests that Run only calls _RunScenarioA when scenarios=['A'].""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + ) as mock_a, mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ) as mock_b, mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ) as mock_c: + kubernetes_management_benchmark.Run(bm_spec) + mock_a.assert_called_once() + mock_b.assert_not_called() + mock_c.assert_not_called() + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['B'], + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunOnlyScenarioBCallsOnlyB(self): + """Tests that Run only calls _RunScenarioB when scenarios=['B'].""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + ) as mock_a, mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ) as mock_b, mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ) as mock_c: + kubernetes_management_benchmark.Run(bm_spec) + mock_a.assert_not_called() + mock_b.assert_called_once() + mock_c.assert_not_called() + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['C'], + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=42, + ) + def testRunScenarioCPassesLargeScaleFlag(self): + """Tests that Run passes the large-scale-nodepools flag to _RunScenarioC.""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ) as mock_c: + kubernetes_management_benchmark.Run(bm_spec) + mock_c.assert_called_once() + _, _, scale = mock_c.call_args.args + self.assertEqual(42, scale) + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['C'], + k8s_mgmt_scale_sweep=['10', '50'], + k8s_mgmt_large_scale_nodepools=100, + ) + def testRunScenarioCScaleSweepRunsTwice(self): + """Tests that Run calls _RunScenarioC once per scale in the sweep.""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, + '_RunScenarioC', + return_value=[_make_sample('m', 1.0)], + ) as mock_c: + kubernetes_management_benchmark.Run(bm_spec) + self.assertEqual(2, mock_c.call_count) + scales = [call.args[2] for call in mock_c.call_args_list] + self.assertIn(10, scales) + self.assertIn(50, scales) + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['C'], + k8s_mgmt_scale_sweep=['10'], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunTagsScenarioCScaleInMetadata(self): + """Tests that Run adds scenario_c_scale to each sample's metadata.""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + test_sample = _make_sample('metric', 1.0) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, + '_RunScenarioC', + return_value=[test_sample], + ): + samples = kubernetes_management_benchmark.Run(bm_spec) + self.assertIn('scenario_c_scale', samples[0].metadata) + self.assertEqual('10', samples[0].metadata['scenario_c_scale']) + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['A'], + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunTagsAllSamplesWithRunMetadata(self): + """Tests that Run adds version and config keys to all sample metadata.""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + test_sample = _make_sample('m', 1.0) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, + '_RunScenarioA', + return_value=[test_sample], + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ): + samples = kubernetes_management_benchmark.Run(bm_spec) + meta = samples[0].metadata + for key in ( + 'initial_version', + 'target_version', + 'cluster_k8s_version', + 'nodes_per_nodepool', + 'concurrent_nodepools', + ): + self.assertIn(key, meta) + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['A'], + k8s_mgmt_initial_version='1.30', + k8s_mgmt_target_version='1.31', + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunUsesExplicitVersionFlags(self): + """Tests that Run uses explicit version flags over auto-resolved ones.""" + cluster = _make_mock_cluster() + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, + '_RunScenarioA', + return_value=[_make_sample('m', 1.0)], + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ): + samples = kubernetes_management_benchmark.Run(bm_spec) + cluster.ResolveNodePoolVersions.assert_not_called() + self.assertEqual('1.30', samples[0].metadata['initial_version']) + self.assertEqual('1.31', samples[0].metadata['target_version']) + + @flagsaver.flagsaver( + k8s_mgmt_scenarios=['A'], + k8s_mgmt_scale_sweep=[], + k8s_mgmt_large_scale_nodepools=10, + ) + def testRunAutoResolvesVersionsWhenFlagsAbsent(self): + """Tests Run calls ResolveNodePoolVersions when version flags absent.""" + cluster = _make_mock_cluster() + cluster.ResolveNodePoolVersions.return_value = ('1.33', '1.34') + bm_spec = _make_mock_benchmark_spec(cluster) + with mock.patch.object( + kubernetes_management_benchmark, '_CleanStartSweep' + ), mock.patch.object( + kubernetes_management_benchmark, + '_RunScenarioA', + return_value=[_make_sample('m', 1.0)], + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + ), mock.patch.object( + kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + ): + samples = kubernetes_management_benchmark.Run(bm_spec) + cluster.ResolveNodePoolVersions.assert_called_once() + self.assertEqual('1.33', samples[0].metadata['initial_version']) + self.assertEqual('1.34', samples[0].metadata['target_version']) + + +class RunScenarioATest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _RunScenarioA phase-by-phase and pipelined modes.""" + + @flagsaver.flagsaver( + k8s_mgmt_concurrent_nodepools=2, + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + k8s_mgmt_pipeline_scenario_a=False, + ) + def testPhaseByPhaseProducesCreateUpgradeDeleteSamples(self): + """Tests Scenario A produces Create, Upgrade, and Delete samples.""" + cluster = _make_mock_cluster(pool_names=['pkbma000', 'pkbma001']) + samples = kubernetes_management_benchmark._RunScenarioA( + cluster, '1.33', '1.34' + ) + metrics = {s.metric for s in samples} + self.assertTrue(any('ScenarioA_Create' in m for m in metrics)) + self.assertTrue(any('ScenarioA_Upgrade' in m for m in metrics)) + self.assertTrue(any('ScenarioA_Delete' in m for m in metrics)) + + @flagsaver.flagsaver( + k8s_mgmt_concurrent_nodepools=2, + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + k8s_mgmt_pipeline_scenario_a=False, + ) + def testPhaseByPhasePassesInitialVersionToCreate(self): + """Tests _RunScenarioA passes initial_version to CreateNodePoolAsync.""" + cluster = _make_mock_cluster(pool_names=['pkbma000', 'pkbma001']) + kubernetes_management_benchmark._RunScenarioA(cluster, '1.33', '1.34') + for call in cluster.CreateNodePoolAsync.call_args_list: + kw = call.kwargs if call.kwargs else {} + pos = call.args + node_version = kw.get('node_version') or ( + pos[1] if len(pos) > 1 else None + ) + self.assertEqual('1.33', node_version) + + @flagsaver.flagsaver( + k8s_mgmt_concurrent_nodepools=2, + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + k8s_mgmt_pipeline_scenario_a=False, + ) + def testPhaseByPhaseDeleteUsesLivePoolList(self): + """Tests that _RunScenarioA deletes only the pools it finds at runtime.""" + cluster = _make_mock_cluster(pool_names=['pkbma000']) + kubernetes_management_benchmark._RunScenarioA(cluster, '1.33', '1.34') + self.assertEqual(1, cluster.DeleteNodePoolAsync.call_count) + + @flagsaver.flagsaver( + k8s_mgmt_concurrent_nodepools=2, + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + k8s_mgmt_pipeline_scenario_a=True, + ) + def testPipelinedModeActivatedByFlag(self): + """Tests pipelined mode is activated by the pipeline_scenario_a flag.""" + cluster = _make_mock_cluster(pool_names=[]) + samples = kubernetes_management_benchmark._RunScenarioA( + cluster, '1.33', '1.34' + ) + metrics = {s.metric for s in samples} + self.assertTrue(any('ScenarioA_Create' in m for m in metrics)) + self.assertTrue(any('ScenarioA_Upgrade' in m for m in metrics)) + self.assertTrue(any('ScenarioA_Delete' in m for m in metrics)) + + +class RunScenarioAPipelinedTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _RunScenarioAPipelined pipelined execution path.""" + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testPipelinedProducesAllThreePhases(self): + """Tests pipelined Scenario A produces Create/Upgrade/Delete samples.""" + cluster = _make_mock_cluster(pool_names=[]) + samples = kubernetes_management_benchmark._RunScenarioAPipelined( + cluster, n=2, initial='1.33', target='1.34' + ) + metrics = {s.metric for s in samples} + self.assertTrue(any('ScenarioA_Create' in m for m in metrics)) + self.assertTrue(any('ScenarioA_Upgrade' in m for m in metrics)) + self.assertTrue(any('ScenarioA_Delete' in m for m in metrics)) + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testPipelinedSkipsUpgradeAfterCreateFailure(self): + """Tests pipelined mode skips upgrade when create fails.""" + cluster = _make_mock_cluster(pool_names=[]) + cluster.CreateNodePoolAsync.side_effect = RuntimeError('create failed') + samples = kubernetes_management_benchmark._RunScenarioAPipelined( + cluster, n=1, initial='1.33', target='1.34' + ) + cluster.UpgradeNodePoolAsync.assert_not_called() + upgrade_rate = next( + (s for s in samples if s.metric == 'ScenarioA_Upgrade_SuccessRate'), + None, + ) + if upgrade_rate is not None: + self.assertEqual(0.0, upgrade_rate.value) + + +class RunScenarioBTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _RunScenarioB cluster-update + nodepool-create scenario.""" + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testProducesClusterUpdateAndNodePoolCreateSamples(self): + cluster = _make_mock_cluster(pool_names=[]) + samples = kubernetes_management_benchmark._RunScenarioB(cluster, '1.33') + metrics = {s.metric for s in samples} + self.assertTrue(any('ScenarioB_ClusterUpdate' in m for m in metrics)) + self.assertTrue(any('ScenarioB_NodePoolCreate' in m for m in metrics)) + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testDeletesTestPoolAfterRun(self): + cluster = _make_mock_cluster(pool_names=[]) + kubernetes_management_benchmark._RunScenarioB(cluster, '1.33') + cluster.DeleteNodePool.assert_called_once_with( + kubernetes_management_benchmark._SCENARIO_B_NAME + ) + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testDeleteFailureRaisesInScenarioB(self): + cluster = _make_mock_cluster(pool_names=[]) + cluster.DeleteNodePool.side_effect = RuntimeError('delete failed') + with self.assertRaises(RuntimeError): + kubernetes_management_benchmark._RunScenarioB(cluster, '1.33') + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testPassesInitialVersionToCreate(self): + """Tests _RunScenarioB passes initial_version to CreateNodePoolAsync.""" + cluster = _make_mock_cluster(pool_names=[]) + kubernetes_management_benchmark._RunScenarioB(cluster, '1.33') + for call in cluster.CreateNodePoolAsync.call_args_list: + kw = call.kwargs if call.kwargs else {} + pos = call.args + node_version = kw.get('node_version') or ( + pos[1] if len(pos) > 1 else None + ) + self.assertEqual('1.33', node_version) + + +class RunScenarioCTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _RunScenarioC large-scale create-and-delete scenario.""" + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testProducesCreateAndDeleteSamples(self): + cluster = _make_mock_cluster(pool_names=['pkbmc0000', 'pkbmc0001']) + samples = kubernetes_management_benchmark._RunScenarioC( + cluster, '1.33', scale=2 + ) + metrics = {s.metric for s in samples} + self.assertTrue(any('ScenarioC_Create' in m for m in metrics)) + self.assertTrue(any('ScenarioC_Delete' in m for m in metrics)) + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testZeroLivePoolsRecordsZeroDeleteSuccessRate(self): + """Tests Scenario C records 0% delete rate when no live pools exist.""" + cluster = _make_mock_cluster(pool_names=[]) + samples = kubernetes_management_benchmark._RunScenarioC( + cluster, '1.33', scale=3 + ) + delete_rate = next( + s for s in samples if s.metric == 'ScenarioC_Delete_SuccessRate' + ) + self.assertEqual(0.0, delete_rate.value) + cluster.DeleteNodePoolAsync.assert_not_called() + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testDeleteUsesLiveListNotOriginalCreateList(self): + cluster = _make_mock_cluster(pool_names=['pkbmc0000', 'pkbmc0001']) + kubernetes_management_benchmark._RunScenarioC(cluster, '1.33', scale=3) + self.assertEqual(2, cluster.DeleteNodePoolAsync.call_count) + + @flagsaver.flagsaver( + k8s_mgmt_nodes_per_nodepool=1, + k8s_mgmt_max_concurrent=50, + ) + def testCreateSuccessRateUsesScaleAsDenominator(self): + """Tests Scenario C create success rate uses scale as total_ops.""" + cluster = _make_mock_cluster(pool_names=['pkbmc0000']) + samples = kubernetes_management_benchmark._RunScenarioC( + cluster, '1.33', scale=3 + ) + create_rate = next( + s for s in samples if s.metric == 'ScenarioC_Create_SuccessRate' + ) + self.assertLessEqual(create_rate.value, 100.0) + self.assertEqual('3', create_rate.metadata['total_ops']) if __name__ == '__main__': From 6bdc03e44fb9ca50e77291bc642ca4d2d5f9a958 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Tue, 9 Jun 2026 10:51:12 +0000 Subject: [PATCH 02/15] kubernetes_management: address review (config anchor, Prepare docstring, remove dead code) --- .../kubernetes_management_benchmark.py | 44 ++----------------- 1 file changed, 3 insertions(+), 41 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index 579dd8f068..1e30efc247 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -57,27 +57,10 @@ Benchmarks GKE/EKS/AKS management plane operations: concurrent node pool create/upgrade/delete, overlapping cluster + node-pool ops, and large-scale provisioning. Focused on control-plane API responsiveness. - Spec regions: GCP us-central1, AWS us-east-1 (closest), Azure eastus. - Equivalent machine types across clouds per Google benchmark spec. container_cluster: type: Kubernetes vm_count: 1 - vm_spec: - GCP: - # us-central1-a: spec primary region for GCP - # e2-standard-2: 2 vCPU 8GB — equivalent to t3.medium / D2s_v3 - machine_type: e2-standard-2 - zone: us-central1-a - AWS: - # us-east-1a: closest comparable region to GCP us-central1 - # t3.medium: 2 vCPU 4GB — closest equivalent to e2-standard-2 - machine_type: t3.medium - zone: us-east-1a - Azure: - # eastus: closest comparable region to GCP us-central1 - # Standard_D2s_v3: 2 vCPU 8GB — equivalent to e2-standard-2 - machine_type: Standard_D2s_v3 - zone: eastus + vm_spec: *default_dual_core """ _VALID_SCENARIOS = frozenset({"A", "B", "C"}) @@ -195,8 +178,9 @@ def CheckPrerequisites( def Prepare(benchmark_spec: bm_spec.BenchmarkSpec) -> None: - """Asserts the cluster is reachable; deploys spec-defined sleep workload.""" + """Deploys a sleep pod to confirm data-plane reachability.""" cluster = benchmark_spec.container_cluster + # Type narrowing for pytype; reachability is confirmed by the sleep pod below. assert isinstance(cluster, kubernetes_cluster.KubernetesCluster) benchmark_spec.always_call_cleanup = True logging.info( @@ -379,28 +363,6 @@ def _RunScenarioA( "ScenarioA_Upgrade", upgrade_results, attempted_ops=len(created) ) - # # ── Idiomatic Control Plane Synchronization Barrier ────────────────────── - # # Give the GKE control plane a brief window to register the async ops. - # time.sleep(15) - - # # Check if the cluster object has our native upgrade tracking capability. - # if hasattr(cluster, 'HasActiveUpgradeOperations'): - # logging.info('GCP GKE cluster detected; polling via provider API.') - - # while cluster.HasActiveUpgradeOperations(): - # logging.info( - # 'Upgrade operations active; holding delete phase for 30s.') - # time.sleep(30) - - # logging.info( - # 'All upgrade ops completed; flushing API gateway write-locks.') - # time.sleep(10) - # else: - # # Non-GCP providers (Azure AKS / AWS EKS): standard safety pause. - # logging.info( - # 'Non-GCP cluster; proceeding with stabilization pause.') - # time.sleep(5) - # ── Phase 3: concurrent deletes (live-list to catch EKS rollbacks) ────── alive = [p for p in cluster.GetNodePoolNames() if p.startswith(f"{_PREFIX}a")] logging.info( From c9593896b294ccd4950019adf401c97bb3096ccc Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Tue, 9 Jun 2026 12:42:21 +0000 Subject: [PATCH 03/15] kubernetes_management: descriptive scenario names, grouped flags, scenario-flag cross-validation --- .../kubernetes_management_benchmark.py | 122 ++++++++++++------ .../kubernetes_management_benchmark_test.py | 58 +++++++-- 2 files changed, 125 insertions(+), 55 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index 1e30efc247..164c18d9e9 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -14,13 +14,13 @@ """Benchmark for Kubernetes management plane operations. Measures GKE/EKS/AKS control-plane API responsiveness via three scenarios: - A. Concurrent node-pool create/upgrade/delete. - B. Node-pool create overlapping with a long-running cluster update. - C. Large-scale node-pool provisioning (single scale or sweep). + concurrent_node_pool_ops: concurrent node-pool create/upgrade/delete. + overlapping_cluster_update: node-pool create overlapping a cluster update. + large_scale_provisioning: large-scale node-pool provisioning (scale/sweep). Optimizations for minimum run time: - - Streaming concurrency in Scenario C (no batch barriers) - - Optional pipelined Scenario A (create->upgrade->delete per thread) + - Streaming concurrency in large_scale_provisioning (no batch barriers) + - Optional pipelined concurrent_node_pool_ops (create/upgrade/delete) - Reduced poll_interval in provider WaitForOperation (5s vs 10s) - Per-op threads capped at _MAX_CONCURRENT to avoid OS limits - Accurate delete success rate via attempted_ops denominator @@ -63,24 +63,52 @@ vm_spec: *default_dual_core """ -_VALID_SCENARIOS = frozenset({"A", "B", "C"}) - -_CONCURRENT_NODEPOOLS = flags.DEFINE_integer( - "k8s_mgmt_concurrent_nodepools", - 5, - "Number of node pools to create/upgrade/delete concurrently in Scenario A.", -) -_LARGE_SCALE_NODEPOOLS = flags.DEFINE_integer( - "k8s_mgmt_large_scale_nodepools", - 1000, - "Number of node pools to provision in the large-scale Scenario C. " - + "Spec target is 1000; ensure VPC/quota is available before running.", +# Scenarios measured by this benchmark (select via --k8s_mgmt_scenarios): +# concurrent_node_pool_ops: concurrently create, upgrade, and delete N +# node pools; measures control-plane throughput under parallel ops. +# overlapping_cluster_update: run a cluster update and a node-pool create +# simultaneously; measures behaviour when a cluster-scoped op overlaps a +# node-pool-scoped one. +# large_scale_provisioning: create then delete a large number of node pools +# (optionally swept via --k8s_mgmt_scale_sweep); measures scaling limits +# and large-batch provisioning latency. +_VALID_SCENARIOS = frozenset({ + "concurrent_node_pool_ops", + "overlapping_cluster_update", + "large_scale_provisioning", +}) + +# ── Shared flags (apply across all scenarios) ── +_SCENARIOS = flags.DEFINE_list( + "k8s_mgmt_scenarios", + [ + "concurrent_node_pool_ops", + "overlapping_cluster_update", + "large_scale_provisioning", + ], + "Comma-separated subset of scenarios to run. Valid values: " + + "concurrent_node_pool_ops, overlapping_cluster_update, " + + "large_scale_provisioning.", ) _NODES_PER_NODEPOOL = flags.DEFINE_integer( "k8s_mgmt_nodes_per_nodepool", 2, "Number of nodes per node pool. Google spec: 2 nodes per pool.", ) +_MAX_CONCURRENT = flags.DEFINE_integer( + "k8s_mgmt_max_concurrent", + 50, + "Cap on concurrent provider API calls within a batch. " + + "Higher = faster but more aggressive on connection pools.", +) + +# ── concurrent_node_pool_ops flags ── +_CONCURRENT_NODEPOOLS = flags.DEFINE_integer( + "k8s_mgmt_concurrent_nodepools", + 5, + "Number of node pools to create/upgrade/delete concurrently in the " + + "concurrent_node_pool_ops scenario.", +) _INITIAL_VERSION = flags.DEFINE_string( "k8s_mgmt_initial_version", None, @@ -91,32 +119,31 @@ None, "Kubernetes version to upgrade node pools to (N). None = cluster version.", ) -_SCENARIOS = flags.DEFINE_list( - "k8s_mgmt_scenarios", - ["A", "B", "C"], - "Comma-separated subset of scenarios to run. Valid values: A, B, C.", +_PIPELINE_SCENARIO_A = flags.DEFINE_boolean( + "k8s_mgmt_pipeline_scenario_a", + True, + "If True, run concurrent_node_pool_ops as a per-pool pipeline " + + "(create->upgrade->delete back-to-back per thread). Minimizes wall time. " + + "Default False for spec-strict phase-by-phase.", +) + +# ── large_scale_provisioning flags ── +_LARGE_SCALE_NODEPOOLS = flags.DEFINE_integer( + "k8s_mgmt_large_scale_nodepools", + 1000, + "Number of node pools to provision in the large_scale_provisioning " + + "scenario. Spec target is 1000; ensure VPC/quota is available before " + + "running.", ) _SCALE_SWEEP = flags.DEFINE_list( "k8s_mgmt_scale_sweep", [], - "Comma-separated list of node-pool counts for Scenario C scale sweep. " - + "Each scale runs as a separate sub-run with full create/delete cycle. " - + "Example: --k8s_mgmt_scale_sweep=10,50,100,500,1000. " + "Comma-separated list of node-pool counts for the large_scale_provisioning " + + "scale sweep. Each scale runs as a separate sub-run with full " + + "create/delete cycle. Example:" + " --k8s_mgmt_scale_sweep=10,50,100,500,1000. " + "If empty, uses --k8s_mgmt_large_scale_nodepools.", ) -_MAX_CONCURRENT = flags.DEFINE_integer( - "k8s_mgmt_max_concurrent", - 50, - "Cap on concurrent provider API calls within a batch. " - + "Higher = faster but more aggressive on connection pools.", -) -_PIPELINE_SCENARIO_A = flags.DEFINE_boolean( - "k8s_mgmt_pipeline_scenario_a", - True, - "If True, run Scenario A as per-pool pipeline (create->upgrade->delete " - + "back-to-back per thread). Minimizes wall time. " - + "Default False for spec-strict phase-by-phase.", -) # AKS caps node-pool names at 12 chars — keep all names within that limit. _PREFIX = "pkbm" @@ -163,6 +190,19 @@ def CheckPrerequisites( f"Invalid value(s) for --k8s_mgmt_scenarios: {invalid}. " + f"Valid options: {sorted(_VALID_SCENARIOS)}." ) + selected = {s.strip() for s in _SCENARIOS.value} + if ( + _INITIAL_VERSION.value or _TARGET_VERSION.value + ) and "concurrent_node_pool_ops" not in selected: + raise errors.Config.InvalidValue( + "--k8s_mgmt_initial_version / --k8s_mgmt_target_version apply only to " + + "the concurrent_node_pool_ops scenario, which is not selected." + ) + if _SCALE_SWEEP.value and "large_scale_provisioning" not in selected: + raise errors.Config.InvalidValue( + "--k8s_mgmt_scale_sweep applies only to the large_scale_provisioning " + + "scenario, which is not selected." + ) for s in _SCALE_SWEEP.value: try: int(s.strip()) @@ -250,14 +290,14 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: else "unknown", ) - scenarios = {s.strip().upper() for s in _SCENARIOS.value} + scenarios = {s.strip() for s in _SCENARIOS.value} samples: list[sample.Sample] = [] - if "A" in scenarios: + if "concurrent_node_pool_ops" in scenarios: samples += _RunScenarioA(cluster, initial, target) - if "B" in scenarios: + if "overlapping_cluster_update" in scenarios: samples += _RunScenarioB(cluster, initial) - if "C" in scenarios: + if "large_scale_provisioning" in scenarios: # fix: Scenario A/B pools may still be in Deleting state and count # toward AKS's 100-pool cluster limit. Sweep them out before Scenario C # so we don't hit MaxAgentPoolCountReached mid-run. diff --git a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py index c24c8477d6..13cdf4e56c 100644 --- a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py +++ b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py @@ -143,7 +143,13 @@ class CheckPrerequisitesTest(pkb_common_test_case.PkbCommonTestCase): """Tests for the CheckPrerequisites validation function.""" def testValidScenariosPass(self): - with flagsaver.flagsaver(k8s_mgmt_scenarios=['A', 'B', 'C']): + with flagsaver.flagsaver( + k8s_mgmt_scenarios=[ + 'concurrent_node_pool_ops', + 'overlapping_cluster_update', + 'large_scale_provisioning', + ] + ): kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) def testInvalidScenarioRaises(self): @@ -152,12 +158,14 @@ def testInvalidScenarioRaises(self): kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) def testMixedValidInvalidRaises(self): - with flagsaver.flagsaver(k8s_mgmt_scenarios=['A', 'Z']): + with flagsaver.flagsaver( + k8s_mgmt_scenarios=['concurrent_node_pool_ops', 'Z'] + ): with self.assertRaises(errors.Config.InvalidValue): kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) def testNonKubernetesClusterTypeRaises(self): - with flagsaver.flagsaver(k8s_mgmt_scenarios=['A']): + with flagsaver.flagsaver(k8s_mgmt_scenarios=['concurrent_node_pool_ops']): with self.assertRaises(errors.Config.InvalidValue): kubernetes_management_benchmark.CheckPrerequisites( _make_mock_config(cluster_type='Mesos') @@ -165,14 +173,16 @@ def testNonKubernetesClusterTypeRaises(self): def testInvalidScaleSweepRaises(self): with flagsaver.flagsaver( - k8s_mgmt_scenarios=['C'], k8s_mgmt_scale_sweep=['10', 'abc'] + k8s_mgmt_scenarios=['large_scale_provisioning'], + k8s_mgmt_scale_sweep=['10', 'abc'], ): with self.assertRaises(errors.Config.InvalidValue): kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) def testValidScaleSweepPasses(self): with flagsaver.flagsaver( - k8s_mgmt_scenarios=['C'], k8s_mgmt_scale_sweep=['10', '50', '100'] + k8s_mgmt_scenarios=['large_scale_provisioning'], + k8s_mgmt_scale_sweep=['10', '50', '100'], ): kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) @@ -181,6 +191,22 @@ def testLowercaseScenarioRaises(self): with self.assertRaises(errors.Config.InvalidValue): kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + def testVersionFlagWithoutConcurrentRaises(self): + with flagsaver.flagsaver( + k8s_mgmt_scenarios=['large_scale_provisioning'], + k8s_mgmt_target_version='1.34', + ): + with self.assertRaises(errors.Config.InvalidValue): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + + def testScaleSweepWithoutLargeScaleRaises(self): + with flagsaver.flagsaver( + k8s_mgmt_scenarios=['concurrent_node_pool_ops'], + k8s_mgmt_scale_sweep=['10', '50'], + ): + with self.assertRaises(errors.Config.InvalidValue): + kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) + class PrepareTest(pkb_common_test_case.PkbCommonTestCase): """Tests for the Prepare benchmark lifecycle function.""" @@ -674,7 +700,11 @@ class RunTest(pkb_common_test_case.PkbCommonTestCase): """Tests for the Run benchmark entry-point function.""" @flagsaver.flagsaver( - k8s_mgmt_scenarios=['A', 'B', 'C'], + k8s_mgmt_scenarios=[ + 'concurrent_node_pool_ops', + 'overlapping_cluster_update', + 'large_scale_provisioning', + ], k8s_mgmt_scale_sweep=[], k8s_mgmt_large_scale_nodepools=10, ) @@ -696,7 +726,7 @@ def testRunCallsCleanStartSweep(self): mock_clean.assert_called_with(cluster) @flagsaver.flagsaver( - k8s_mgmt_scenarios=['A'], + k8s_mgmt_scenarios=['concurrent_node_pool_ops'], k8s_mgmt_scale_sweep=[], k8s_mgmt_large_scale_nodepools=10, ) @@ -719,7 +749,7 @@ def testRunOnlyScenarioACallsOnlyA(self): mock_c.assert_not_called() @flagsaver.flagsaver( - k8s_mgmt_scenarios=['B'], + k8s_mgmt_scenarios=['overlapping_cluster_update'], k8s_mgmt_scale_sweep=[], k8s_mgmt_large_scale_nodepools=10, ) @@ -742,7 +772,7 @@ def testRunOnlyScenarioBCallsOnlyB(self): mock_c.assert_not_called() @flagsaver.flagsaver( - k8s_mgmt_scenarios=['C'], + k8s_mgmt_scenarios=['large_scale_provisioning'], k8s_mgmt_scale_sweep=[], k8s_mgmt_large_scale_nodepools=42, ) @@ -765,7 +795,7 @@ def testRunScenarioCPassesLargeScaleFlag(self): self.assertEqual(42, scale) @flagsaver.flagsaver( - k8s_mgmt_scenarios=['C'], + k8s_mgmt_scenarios=['large_scale_provisioning'], k8s_mgmt_scale_sweep=['10', '50'], k8s_mgmt_large_scale_nodepools=100, ) @@ -791,7 +821,7 @@ def testRunScenarioCScaleSweepRunsTwice(self): self.assertIn(50, scales) @flagsaver.flagsaver( - k8s_mgmt_scenarios=['C'], + k8s_mgmt_scenarios=['large_scale_provisioning'], k8s_mgmt_scale_sweep=['10'], k8s_mgmt_large_scale_nodepools=10, ) @@ -816,7 +846,7 @@ def testRunTagsScenarioCScaleInMetadata(self): self.assertEqual('10', samples[0].metadata['scenario_c_scale']) @flagsaver.flagsaver( - k8s_mgmt_scenarios=['A'], + k8s_mgmt_scenarios=['concurrent_node_pool_ops'], k8s_mgmt_scale_sweep=[], k8s_mgmt_large_scale_nodepools=10, ) @@ -848,7 +878,7 @@ def testRunTagsAllSamplesWithRunMetadata(self): self.assertIn(key, meta) @flagsaver.flagsaver( - k8s_mgmt_scenarios=['A'], + k8s_mgmt_scenarios=['concurrent_node_pool_ops'], k8s_mgmt_initial_version='1.30', k8s_mgmt_target_version='1.31', k8s_mgmt_scale_sweep=[], @@ -875,7 +905,7 @@ def testRunUsesExplicitVersionFlags(self): self.assertEqual('1.31', samples[0].metadata['target_version']) @flagsaver.flagsaver( - k8s_mgmt_scenarios=['A'], + k8s_mgmt_scenarios=['concurrent_node_pool_ops'], k8s_mgmt_scale_sweep=[], k8s_mgmt_large_scale_nodepools=10, ) From bf8c1d1f63f8e1c64770dc4ac6713992a5a482f4 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Tue, 9 Jun 2026 13:28:03 +0000 Subject: [PATCH 04/15] =?UTF-8?q?kubernetes=5Fmanagement:=20split=20out=20?= =?UTF-8?q?upgrade=20path=20=E2=80=94=20create/delete=20only?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kubernetes_management_benchmark.py | 144 +++--------------- .../kubernetes_management_benchmark_test.py | 90 ++--------- 2 files changed, 26 insertions(+), 208 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index 164c18d9e9..6c60ae6c0f 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -14,13 +14,12 @@ """Benchmark for Kubernetes management plane operations. Measures GKE/EKS/AKS control-plane API responsiveness via three scenarios: - concurrent_node_pool_ops: concurrent node-pool create/upgrade/delete. + concurrent_node_pool_ops: concurrent node-pool create/delete. overlapping_cluster_update: node-pool create overlapping a cluster update. large_scale_provisioning: large-scale node-pool provisioning (scale/sweep). Optimizations for minimum run time: - Streaming concurrency in large_scale_provisioning (no batch barriers) - - Optional pipelined concurrent_node_pool_ops (create/upgrade/delete) - Reduced poll_interval in provider WaitForOperation (5s vs 10s) - Per-op threads capped at _MAX_CONCURRENT to avoid OS limits - Accurate delete success rate via attempted_ops denominator @@ -55,7 +54,7 @@ kubernetes_management: description: > Benchmarks GKE/EKS/AKS management plane operations: concurrent node pool - create/upgrade/delete, overlapping cluster + node-pool ops, and large-scale + create/delete, overlapping cluster + node-pool ops, and large-scale provisioning. Focused on control-plane API responsiveness. container_cluster: type: Kubernetes @@ -64,8 +63,8 @@ """ # Scenarios measured by this benchmark (select via --k8s_mgmt_scenarios): -# concurrent_node_pool_ops: concurrently create, upgrade, and delete N -# node pools; measures control-plane throughput under parallel ops. +# concurrent_node_pool_ops: concurrently create and delete N node pools; +# measures control-plane throughput under parallel ops. # overlapping_cluster_update: run a cluster update and a node-pool create # simultaneously; measures behaviour when a cluster-scoped op overlaps a # node-pool-scoped one. @@ -106,7 +105,7 @@ _CONCURRENT_NODEPOOLS = flags.DEFINE_integer( "k8s_mgmt_concurrent_nodepools", 5, - "Number of node pools to create/upgrade/delete concurrently in the " + "Number of node pools to create and delete concurrently in the " + "concurrent_node_pool_ops scenario.", ) _INITIAL_VERSION = flags.DEFINE_string( @@ -114,18 +113,6 @@ None, "Kubernetes version for newly-created node pools (N-1). None = auto.", ) -_TARGET_VERSION = flags.DEFINE_string( - "k8s_mgmt_target_version", - None, - "Kubernetes version to upgrade node pools to (N). None = cluster version.", -) -_PIPELINE_SCENARIO_A = flags.DEFINE_boolean( - "k8s_mgmt_pipeline_scenario_a", - True, - "If True, run concurrent_node_pool_ops as a per-pool pipeline " - + "(create->upgrade->delete back-to-back per thread). Minimizes wall time. " - + "Default False for spec-strict phase-by-phase.", -) # ── large_scale_provisioning flags ── _LARGE_SCALE_NODEPOOLS = flags.DEFINE_integer( @@ -191,13 +178,6 @@ def CheckPrerequisites( + f"Valid options: {sorted(_VALID_SCENARIOS)}." ) selected = {s.strip() for s in _SCENARIOS.value} - if ( - _INITIAL_VERSION.value or _TARGET_VERSION.value - ) and "concurrent_node_pool_ops" not in selected: - raise errors.Config.InvalidValue( - "--k8s_mgmt_initial_version / --k8s_mgmt_target_version apply only to " - + "the concurrent_node_pool_ops scenario, which is not selected." - ) if _SCALE_SWEEP.value and "large_scale_provisioning" not in selected: raise errors.Config.InvalidValue( "--k8s_mgmt_scale_sweep applies only to the large_scale_provisioning " @@ -261,28 +241,19 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: # Spec C.2: start clean. _CleanStartSweep(cluster) - # Resolve versions once; log clearly; tag every sample. - # Google spec: initial=N-1, target=N (adjacent minor upgrade). + # Resolve the initial node-pool version once; log clearly; tag every sample. flag_initial = _INITIAL_VERSION.value - flag_target = _TARGET_VERSION.value - if not (flag_initial and flag_target): - resolved_initial, resolved_target = cluster.ResolveNodePoolVersions() - flag_initial = flag_initial or resolved_initial - flag_target = flag_target or resolved_target - initial, target = flag_initial, flag_target - if _INITIAL_VERSION.value and _TARGET_VERSION.value: - source = "flags" - elif not (_INITIAL_VERSION.value or _TARGET_VERSION.value): - source = "auto-resolved" - else: - source = "mixed" + if not flag_initial: + resolved_initial, _ = cluster.ResolveNodePoolVersions() + flag_initial = resolved_initial + initial = flag_initial + source = "flag" if _INITIAL_VERSION.value else "auto-resolved" logging.info( - "NodePool versions (%s): initial=%s -> target=%s " + "NodePool version (%s): initial=%s " + "(cluster k8s_version=%s) | nodes_per_pool=%d | machine_type=%s", source, initial, - target, cluster.k8s_version, _NODES_PER_NODEPOOL.value, cluster.default_nodepool.machine_type @@ -294,7 +265,7 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: samples: list[sample.Sample] = [] if "concurrent_node_pool_ops" in scenarios: - samples += _RunScenarioA(cluster, initial, target) + samples += _RunScenarioA(cluster, initial) if "overlapping_cluster_update" in scenarios: samples += _RunScenarioB(cluster, initial) if "large_scale_provisioning" in scenarios: @@ -317,7 +288,6 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: # Tag all samples with version path and run config for published results. run_meta = { "initial_version": str(initial), - "target_version": str(target), "cluster_k8s_version": str(cluster.k8s_version), "nodes_per_nodepool": str(_NODES_PER_NODEPOOL.value), "concurrent_nodepools": str(_CONCURRENT_NODEPOOLS.value), @@ -352,25 +322,10 @@ def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec) -> None: def _RunScenarioA( cluster: kubernetes_cluster.KubernetesCluster, initial: str, - target: str, ) -> list[sample.Sample]: - """Concurrent CreateNodePool, UpgradeNodePool, DeleteNodePool.""" + """Concurrent CreateNodePool then DeleteNodePool.""" n = _CONCURRENT_NODEPOOLS.value - if _PIPELINE_SCENARIO_A.value: - logging.info( - "Scenario A (pipelined): %d pools, initial=%s, target=%s", - n, - initial, - target, - ) - return _RunScenarioAPipelined(cluster, n, initial, target) - - logging.info( - "Scenario A (phase-by-phase): %d pools, initial=%s, target=%s", - n, - initial, - target, - ) + logging.info("concurrent_node_pool_ops: %d pools, initial=%s", n, initial) pool_names = [_ScenarioAName(i) for i in range(n)] configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] samples: list[sample.Sample] = [] @@ -388,25 +343,10 @@ def _RunScenarioA( "ScenarioA_Create", create_results, attempted_ops=len(pool_names) ) - # ── Phase 2: concurrent upgrades (only successfully created pools) ─────── - created = [r.name for r in create_results if r.error is None] - logging.info( - "Scenario A: %d/%d pools created — proceeding to upgrade", len(created), n - ) - upgrade_results = _RunAsync( - kickoff=lambda name: cluster.UpgradeNodePoolAsync(name, target), - wait_fn=cluster.WaitForOperation, - items=created, - get_name=str, - ) - samples += _OpSamples( - "ScenarioA_Upgrade", upgrade_results, attempted_ops=len(created) - ) - - # ── Phase 3: concurrent deletes (live-list to catch EKS rollbacks) ────── + # ── Phase 2: concurrent deletes (live-list to catch EKS rollbacks) ────── alive = [p for p in cluster.GetNodePoolNames() if p.startswith(f"{_PREFIX}a")] logging.info( - "Scenario A: %d live pools found for delete (originally %d)", + "concurrent_node_pool_ops: %d live pools for delete (originally %d)", len(alive), n, ) @@ -422,56 +362,6 @@ def _RunScenarioA( return samples -def _RunScenarioAPipelined( - cluster: kubernetes_cluster.KubernetesCluster, - n: int, - initial: str, - target: str, -) -> list[sample.Sample]: - """Per-pool pipeline: create->upgrade->delete back-to-back per thread. - - Minimizes wall time: max_i(create_i + upgrade_i + delete_i) vs - max(creates)+max(upgrades)+max(deletes) in phase-by-phase mode. - Trade-off: ops run under mixed-type concurrent load. - """ - pool_names = [_ScenarioAName(i) for i in range(n)] - creates = _Results() - upgrades = _Results() - deletes = _Results() - - def DoPool(pool_name: str): - """Runs timed create/upgrade/delete for one pool.""" - cfg = _MakeNodePoolConfig(cluster, pool_name) - init, e2e, err = _TimedAsync( - lambda: cluster.CreateNodePoolAsync(cfg, node_version=initial), - cluster.WaitForOperation, - ) - creates.add(pool_name, init, e2e, err) - if err is not None: - return - init, e2e, err = _TimedAsync( - lambda: cluster.UpgradeNodePoolAsync(pool_name, target), - cluster.WaitForOperation, - ) - upgrades.add(pool_name, init, e2e, err) - init, e2e, err = _TimedAsync( - lambda: cluster.DeleteNodePoolAsync(pool_name), - cluster.WaitForOperation, - ) - deletes.add(pool_name, init, e2e, err) - - background_tasks.RunThreaded( - DoPool, - pool_names, - max_concurrent_threads=min(n, _MAX_CONCURRENT.value), - ) - samples: list[sample.Sample] = [] - samples += _OpSamples("ScenarioA_Create", creates.entries, attempted_ops=n) - samples += _OpSamples("ScenarioA_Upgrade", upgrades.entries, attempted_ops=n) - samples += _OpSamples("ScenarioA_Delete", deletes.entries, attempted_ops=n) - return samples - - # --------------------------------------------------------------------------- # Scenario B # --------------------------------------------------------------------------- diff --git a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py index 13cdf4e56c..97aef64124 100644 --- a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py +++ b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py @@ -191,14 +191,6 @@ def testLowercaseScenarioRaises(self): with self.assertRaises(errors.Config.InvalidValue): kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) - def testVersionFlagWithoutConcurrentRaises(self): - with flagsaver.flagsaver( - k8s_mgmt_scenarios=['large_scale_provisioning'], - k8s_mgmt_target_version='1.34', - ): - with self.assertRaises(errors.Config.InvalidValue): - kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) - def testScaleSweepWithoutLargeScaleRaises(self): with flagsaver.flagsaver( k8s_mgmt_scenarios=['concurrent_node_pool_ops'], @@ -870,7 +862,6 @@ def testRunTagsAllSamplesWithRunMetadata(self): meta = samples[0].metadata for key in ( 'initial_version', - 'target_version', 'cluster_k8s_version', 'nodes_per_nodepool', 'concurrent_nodepools', @@ -880,7 +871,6 @@ def testRunTagsAllSamplesWithRunMetadata(self): @flagsaver.flagsaver( k8s_mgmt_scenarios=['concurrent_node_pool_ops'], k8s_mgmt_initial_version='1.30', - k8s_mgmt_target_version='1.31', k8s_mgmt_scale_sweep=[], k8s_mgmt_large_scale_nodepools=10, ) @@ -902,7 +892,6 @@ def testRunUsesExplicitVersionFlags(self): samples = kubernetes_management_benchmark.Run(bm_spec) cluster.ResolveNodePoolVersions.assert_not_called() self.assertEqual('1.30', samples[0].metadata['initial_version']) - self.assertEqual('1.31', samples[0].metadata['target_version']) @flagsaver.flagsaver( k8s_mgmt_scenarios=['concurrent_node_pool_ops'], @@ -928,39 +917,34 @@ def testRunAutoResolvesVersionsWhenFlagsAbsent(self): samples = kubernetes_management_benchmark.Run(bm_spec) cluster.ResolveNodePoolVersions.assert_called_once() self.assertEqual('1.33', samples[0].metadata['initial_version']) - self.assertEqual('1.34', samples[0].metadata['target_version']) class RunScenarioATest(pkb_common_test_case.PkbCommonTestCase): - """Tests for the _RunScenarioA phase-by-phase and pipelined modes.""" + """Tests for the _RunScenarioA phase-by-phase create/delete path.""" @flagsaver.flagsaver( k8s_mgmt_concurrent_nodepools=2, k8s_mgmt_nodes_per_nodepool=1, k8s_mgmt_max_concurrent=50, - k8s_mgmt_pipeline_scenario_a=False, ) - def testPhaseByPhaseProducesCreateUpgradeDeleteSamples(self): - """Tests Scenario A produces Create, Upgrade, and Delete samples.""" + def testProducesCreateAndDeleteSamples(self): + """Tests Scenario A produces Create and Delete samples.""" cluster = _make_mock_cluster(pool_names=['pkbma000', 'pkbma001']) - samples = kubernetes_management_benchmark._RunScenarioA( - cluster, '1.33', '1.34' - ) + samples = kubernetes_management_benchmark._RunScenarioA(cluster, '1.33') metrics = {s.metric for s in samples} self.assertTrue(any('ScenarioA_Create' in m for m in metrics)) - self.assertTrue(any('ScenarioA_Upgrade' in m for m in metrics)) self.assertTrue(any('ScenarioA_Delete' in m for m in metrics)) + self.assertFalse(any('ScenarioA_Upgrade' in m for m in metrics)) @flagsaver.flagsaver( k8s_mgmt_concurrent_nodepools=2, k8s_mgmt_nodes_per_nodepool=1, k8s_mgmt_max_concurrent=50, - k8s_mgmt_pipeline_scenario_a=False, ) - def testPhaseByPhasePassesInitialVersionToCreate(self): + def testPassesInitialVersionToCreate(self): """Tests _RunScenarioA passes initial_version to CreateNodePoolAsync.""" cluster = _make_mock_cluster(pool_names=['pkbma000', 'pkbma001']) - kubernetes_management_benchmark._RunScenarioA(cluster, '1.33', '1.34') + kubernetes_management_benchmark._RunScenarioA(cluster, '1.33') for call in cluster.CreateNodePoolAsync.call_args_list: kw = call.kwargs if call.kwargs else {} pos = call.args @@ -973,69 +957,13 @@ def testPhaseByPhasePassesInitialVersionToCreate(self): k8s_mgmt_concurrent_nodepools=2, k8s_mgmt_nodes_per_nodepool=1, k8s_mgmt_max_concurrent=50, - k8s_mgmt_pipeline_scenario_a=False, ) - def testPhaseByPhaseDeleteUsesLivePoolList(self): + def testDeleteUsesLivePoolList(self): """Tests that _RunScenarioA deletes only the pools it finds at runtime.""" cluster = _make_mock_cluster(pool_names=['pkbma000']) - kubernetes_management_benchmark._RunScenarioA(cluster, '1.33', '1.34') + kubernetes_management_benchmark._RunScenarioA(cluster, '1.33') self.assertEqual(1, cluster.DeleteNodePoolAsync.call_count) - @flagsaver.flagsaver( - k8s_mgmt_concurrent_nodepools=2, - k8s_mgmt_nodes_per_nodepool=1, - k8s_mgmt_max_concurrent=50, - k8s_mgmt_pipeline_scenario_a=True, - ) - def testPipelinedModeActivatedByFlag(self): - """Tests pipelined mode is activated by the pipeline_scenario_a flag.""" - cluster = _make_mock_cluster(pool_names=[]) - samples = kubernetes_management_benchmark._RunScenarioA( - cluster, '1.33', '1.34' - ) - metrics = {s.metric for s in samples} - self.assertTrue(any('ScenarioA_Create' in m for m in metrics)) - self.assertTrue(any('ScenarioA_Upgrade' in m for m in metrics)) - self.assertTrue(any('ScenarioA_Delete' in m for m in metrics)) - - -class RunScenarioAPipelinedTest(pkb_common_test_case.PkbCommonTestCase): - """Tests for the _RunScenarioAPipelined pipelined execution path.""" - - @flagsaver.flagsaver( - k8s_mgmt_nodes_per_nodepool=1, - k8s_mgmt_max_concurrent=50, - ) - def testPipelinedProducesAllThreePhases(self): - """Tests pipelined Scenario A produces Create/Upgrade/Delete samples.""" - cluster = _make_mock_cluster(pool_names=[]) - samples = kubernetes_management_benchmark._RunScenarioAPipelined( - cluster, n=2, initial='1.33', target='1.34' - ) - metrics = {s.metric for s in samples} - self.assertTrue(any('ScenarioA_Create' in m for m in metrics)) - self.assertTrue(any('ScenarioA_Upgrade' in m for m in metrics)) - self.assertTrue(any('ScenarioA_Delete' in m for m in metrics)) - - @flagsaver.flagsaver( - k8s_mgmt_nodes_per_nodepool=1, - k8s_mgmt_max_concurrent=50, - ) - def testPipelinedSkipsUpgradeAfterCreateFailure(self): - """Tests pipelined mode skips upgrade when create fails.""" - cluster = _make_mock_cluster(pool_names=[]) - cluster.CreateNodePoolAsync.side_effect = RuntimeError('create failed') - samples = kubernetes_management_benchmark._RunScenarioAPipelined( - cluster, n=1, initial='1.33', target='1.34' - ) - cluster.UpgradeNodePoolAsync.assert_not_called() - upgrade_rate = next( - (s for s in samples if s.metric == 'ScenarioA_Upgrade_SuccessRate'), - None, - ) - if upgrade_rate is not None: - self.assertEqual(0.0, upgrade_rate.value) - class RunScenarioBTest(pkb_common_test_case.PkbCommonTestCase): """Tests for the _RunScenarioB cluster-update + nodepool-create scenario.""" From b0c04ec6d6b3f92370fddb25078f83517acd4771 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Wed, 10 Jun 2026 12:44:42 +0000 Subject: [PATCH 05/15] kubernetes_management: purge A/B/C naming, extract helpers, separate count metrics, fail-fast on zero ops --- .../kubernetes_management_benchmark.py | 149 ++++++---- .../kubernetes_management_benchmark_test.py | 268 ++++++++++++------ 2 files changed, 274 insertions(+), 143 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index 6c60ae6c0f..049a2b8c8c 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -136,14 +136,14 @@ _PREFIX = "pkbm" -def _ScenarioAName(i): +def _ConcurrentPoolName(i): return f"{_PREFIX}a{i:03d}" -_SCENARIO_B_NAME = f"{_PREFIX}b" +_OVERLAPPING_POOL_NAME = f"{_PREFIX}b" -def _ScenarioCName(i): +def _ScalePoolName(i): return f"{_PREFIX}c{i:04d}" @@ -265,25 +265,15 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: samples: list[sample.Sample] = [] if "concurrent_node_pool_ops" in scenarios: - samples += _RunScenarioA(cluster, initial) + samples += _RunConcurrentNodePoolOps(cluster, initial) if "overlapping_cluster_update" in scenarios: - samples += _RunScenarioB(cluster, initial) + samples += _RunOverlappingClusterUpdate(cluster, initial) if "large_scale_provisioning" in scenarios: - # fix: Scenario A/B pools may still be in Deleting state and count - # toward AKS's 100-pool cluster limit. Sweep them out before Scenario C + # Stale pools from earlier scenarios may still be in Deleting state and + # count toward AKS's 100-pool cluster limit; sweep before the scale work # so we don't hit MaxAgentPoolCountReached mid-run. _CleanStartSweep(cluster) - scales = ( - [int(x.strip()) for x in _SCALE_SWEEP.value] - if _SCALE_SWEEP.value - else [_LARGE_SCALE_NODEPOOLS.value] - ) - logging.info("Scenario C: scale sweep = %s", scales) - for scale in scales: - scenario_c_samples = _RunScenarioC(cluster, initial, scale) - for s in scenario_c_samples: - s.metadata["scenario_c_scale"] = str(scale) - samples += scenario_c_samples + samples += _SweepScales(cluster, initial) # Tag all samples with version path and run config for published results. run_meta = { @@ -314,19 +304,39 @@ def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec) -> None: background_tasks.RunThreaded(cluster.DeleteNodePool, leftover) -# --------------------------------------------------------------------------- -# Scenario A -# --------------------------------------------------------------------------- +def _SweepScales( + cluster: kubernetes_cluster.KubernetesCluster, + initial: str, +) -> list[sample.Sample]: + """Runs large-scale provisioning across each requested scale. + + Scales come from --k8s_mgmt_scale_sweep when set, else the single + --k8s_mgmt_large_scale_nodepools value. Each scale's samples are tagged + with large_scale_scale so results stay distinguishable. + """ + scales = ( + [int(x.strip()) for x in _SCALE_SWEEP.value] + if _SCALE_SWEEP.value + else [_LARGE_SCALE_NODEPOOLS.value] + ) + logging.info("large_scale_provisioning: scale sweep = %s", scales) + samples: list[sample.Sample] = [] + for scale in scales: + scale_samples = _ScaleToPoolCount(cluster, initial, scale) + for s in scale_samples: + s.metadata["large_scale_scale"] = str(scale) + samples += scale_samples + return samples -def _RunScenarioA( +def _RunConcurrentNodePoolOps( cluster: kubernetes_cluster.KubernetesCluster, initial: str, ) -> list[sample.Sample]: """Concurrent CreateNodePool then DeleteNodePool.""" n = _CONCURRENT_NODEPOOLS.value logging.info("concurrent_node_pool_ops: %d pools, initial=%s", n, initial) - pool_names = [_ScenarioAName(i) for i in range(n)] + pool_names = [_ConcurrentPoolName(i) for i in range(n)] configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] samples: list[sample.Sample] = [] @@ -340,11 +350,11 @@ def _RunScenarioA( get_name=lambda cfg: cfg.name, ) samples += _OpSamples( - "ScenarioA_Create", create_results, attempted_ops=len(pool_names) + "ConcurrentOps_Create", create_results, attempted_ops=len(pool_names) ) # ── Phase 2: concurrent deletes (live-list to catch EKS rollbacks) ────── - alive = [p for p in cluster.GetNodePoolNames() if p.startswith(f"{_PREFIX}a")] + alive = _LiveNodePoolNames(cluster, f"{_PREFIX}a") logging.info( "concurrent_node_pool_ops: %d live pools for delete (originally %d)", len(alive), @@ -358,16 +368,11 @@ def _RunScenarioA( ) # attempted_ops=n: success rate reflects original request, not just live. # EKS rolls back timed-out pools silently — without this shows 100%. - samples += _OpSamples("ScenarioA_Delete", delete_results, attempted_ops=n) + samples += _OpSamples("ConcurrentOps_Delete", delete_results, attempted_ops=n) return samples -# --------------------------------------------------------------------------- -# Scenario B -# --------------------------------------------------------------------------- - - -def _RunScenarioB( +def _RunOverlappingClusterUpdate( cluster: kubernetes_cluster.KubernetesCluster, initial: str, ) -> list[sample.Sample]: @@ -377,14 +382,14 @@ def _RunScenarioB( recorded independently. Overlap window = ClusterUpdate E2E latency. """ logging.info("Scenario B: overlapping cluster update + node-pool create") - cfg = _MakeNodePoolConfig(cluster, _SCENARIO_B_NAME) + cfg = _MakeNodePoolConfig(cluster, _OVERLAPPING_POOL_NAME) results = _Results() def DoClusterUpdate(): init, e2e, err = _TimedAsync( cluster.UpdateClusterAsync, cluster.WaitForOperation ) - results.add("ScenarioB_ClusterUpdate", init, e2e, err) + results.add("OverlappingUpdate_ClusterUpdate", init, e2e, err) logging.info( "Scenario B ClusterUpdate: init=%.2fs e2e=%.2fs ok=%s", init, @@ -397,7 +402,7 @@ def DoCreate(): lambda: cluster.CreateNodePoolAsync(cfg, node_version=initial), cluster.WaitForOperation, ) - results.add("ScenarioB_NodePoolCreate", init, e2e, err) + results.add("OverlappingUpdate_NodePoolCreate", init, e2e, err) logging.info( "Scenario B NodePoolCreate: init=%.2fs e2e=%.2fs ok=%s", init, @@ -412,16 +417,11 @@ def DoCreate(): samples += _OpSamples(entry.name, [entry], attempted_ops=1) # Remove test pool (best-effort). - cluster.DeleteNodePool(_SCENARIO_B_NAME) + cluster.DeleteNodePool(_OVERLAPPING_POOL_NAME) return samples -# --------------------------------------------------------------------------- -# Scenario C -# --------------------------------------------------------------------------- - - -def _RunScenarioC( +def _ScaleToPoolCount( cluster: kubernetes_cluster.KubernetesCluster, initial: str, scale: int, @@ -439,7 +439,7 @@ def _RunScenarioC( _MAX_CONCURRENT.value, initial, ) - pool_names = [_ScenarioCName(i) for i in range(scale)] + pool_names = [_ScalePoolName(i) for i in range(scale)] configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] samples: list[sample.Sample] = [] @@ -456,10 +456,12 @@ def _RunScenarioC( logging.info( "Scenario C scale=%d: %d/%d creates succeeded", scale, created_ok, scale ) - samples += _OpSamples("ScenarioC_Create", create_results, attempted_ops=scale) + samples += _OpSamples( + "LargeScale_Create", create_results, attempted_ops=scale + ) # ── Deletes (live-list) ────────────────────────────────────────────────── - alive = [p for p in cluster.GetNodePoolNames() if p.startswith(f"{_PREFIX}c")] + alive = _LiveNodePoolNames(cluster, f"{_PREFIX}c") logging.info( "Scenario C scale=%d: %d live pools for delete (originally %d;" + " %d rolled back by cloud)", @@ -470,7 +472,7 @@ def _RunScenarioC( ) if not alive: logging.info("Scenario C scale=%d: all creates rolled back.", scale) - samples += _OpSamples("ScenarioC_Delete", [], attempted_ops=scale) + samples += _OpSamples("LargeScale_Delete", [], attempted_ops=scale) return samples delete_results = _RunAsync( @@ -480,7 +482,9 @@ def _RunScenarioC( get_name=str, ) # attempted_ops=scale: accurate rate against original request count. - samples += _OpSamples("ScenarioC_Delete", delete_results, attempted_ops=scale) + samples += _OpSamples( + "LargeScale_Delete", delete_results, attempted_ops=scale + ) return samples @@ -573,6 +577,13 @@ def _MakeNodePoolConfig( return cfg +def _LiveNodePoolNames( + cluster: kubernetes_cluster.KubernetesCluster, prefix: str +) -> list[str]: + """Returns current node-pool names matching the given prefix.""" + return [p for p in cluster.GetNodePoolNames() if p.startswith(prefix)] + + def _OpSamples( metric_prefix: str, results: list[_OpResult], @@ -594,8 +605,6 @@ def _OpSamples( success = 0 for r in results: - if isinstance(r, tuple): - r = _OpResult(*r) meta = {"operation_name": r.name, "success": str(r.error is None)} if r.error is not None: meta["error"] = str(r.error)[:200] @@ -617,23 +626,43 @@ def _OpSamples( ) ) - # ── Success rate ───────────────────────────────────────────────────────── + # ── Counts + success rate ────────────────────────────────────────────── total = attempted_ops if attempted_ops is not None else len(results) executed = len(results) - if total > 0: + if total == 0: + raise errors.Benchmarks.RunError( + f"{metric_prefix}: zero operations attempted — the scenario " + "produced no work, which indicates a setup or dispatch failure." + ) + # Expose each count as its own metric (not just SuccessRate metadata). + count_meta = { + "total_ops": str(total), + "executed_ops": str(executed), + "successful_ops": str(success), + "skipped_ops": str(total - executed), + } + for count_label, count_value in ( + ("TotalOps", total), + ("ExecutedOps", executed), + ("SuccessfulOps", success), + ("SkippedOps", total - executed), + ): samples.append( sample.Sample( - f"{metric_prefix}_SuccessRate", - 100.0 * success / total, - "percent", - { - "total_ops": str(total), - "executed_ops": str(executed), - "successful_ops": str(success), - "skipped_ops": str(total - executed), - }, + f"{metric_prefix}_{count_label}", + count_value, + "count", + dict(count_meta), ) ) + samples.append( + sample.Sample( + f"{metric_prefix}_SuccessRate", + 100.0 * success / total, + "percent", + dict(count_meta), + ) + ) # ── Aggregate stats (successful ops only) ──────────────────────────────── for phase_label, latencies in ( diff --git a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py index 97aef64124..9518c2ede6 100644 --- a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py +++ b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py @@ -81,61 +81,61 @@ def _make_mock_config(cluster_type='Kubernetes'): class ScenarioNameTest(pkb_common_test_case.PkbCommonTestCase): - """Tests for _SCENARIO_A_NAME, _SCENARIO_B_NAME, _SCENARIO_C_NAME.""" + """Tests for _SCENARIO_A_NAME, _OVERLAPPING_POOL_NAME, _SCENARIO_C_NAME.""" def testScenarioANameZeroPadsToThreeDigits(self): self.assertEqual( 'pkbma000', - kubernetes_management_benchmark._ScenarioAName(0), + kubernetes_management_benchmark._ConcurrentPoolName(0), ) def testScenarioANameTwoDigitIndex(self): self.assertEqual( 'pkbma042', - kubernetes_management_benchmark._ScenarioAName(42), + kubernetes_management_benchmark._ConcurrentPoolName(42), ) def testScenarioANameMaxThreeDigits(self): self.assertEqual( 'pkbma999', - kubernetes_management_benchmark._ScenarioAName(999), + kubernetes_management_benchmark._ConcurrentPoolName(999), ) def testScenarioBNameIsConstant(self): self.assertEqual( 'pkbmb', - kubernetes_management_benchmark._SCENARIO_B_NAME, + kubernetes_management_benchmark._OVERLAPPING_POOL_NAME, ) def testScenarioCNameZeroPadsToFourDigits(self): self.assertEqual( 'pkbmc0000', - kubernetes_management_benchmark._ScenarioCName(0), + kubernetes_management_benchmark._ScalePoolName(0), ) def testScenarioCNameSingleDigitIndex(self): self.assertEqual( 'pkbmc0007', - kubernetes_management_benchmark._ScenarioCName(7), + kubernetes_management_benchmark._ScalePoolName(7), ) def testScenarioCNameFourDigitIndex(self): self.assertEqual( 'pkbmc1000', - kubernetes_management_benchmark._ScenarioCName(1000), + kubernetes_management_benchmark._ScalePoolName(1000), ) def testAllNamesWithinAksLimit(self): for i in range(1000): self.assertLessEqual( - len(kubernetes_management_benchmark._ScenarioAName(i)), 12 + len(kubernetes_management_benchmark._ConcurrentPoolName(i)), 12 ) for i in range(10000): self.assertLessEqual( - len(kubernetes_management_benchmark._ScenarioCName(i)), 12 + len(kubernetes_management_benchmark._ScalePoolName(i)), 12 ) self.assertLessEqual( - len(kubernetes_management_benchmark._SCENARIO_B_NAME), 12 + len(kubernetes_management_benchmark._OVERLAPPING_POOL_NAME), 12 ) @@ -493,7 +493,10 @@ def testEmptyResultsYieldsSuccessRateOfZero(self): self.assertEqual(0.0, rate.value) def testPerOpInitiationAndE2eSamplesGenerated(self): - results = [('op1', 0.1, 1.0, None), ('op2', 0.2, 2.0, None)] + results = [ + kubernetes_management_benchmark._OpResult('op1', 0.1, 1.0, None), + kubernetes_management_benchmark._OpResult('op2', 0.2, 2.0, None), + ] samples = kubernetes_management_benchmark._OpSamples( 'MyOp', results, attempted_ops=2 ) @@ -502,7 +505,10 @@ def testPerOpInitiationAndE2eSamplesGenerated(self): self.assertIn('MyOp_EndToEndLatency', metrics) def testSuccessRateHundredPercentWhenAllSucceed(self): - results = [('op1', 1.0, 2.0, None), ('op2', 0.5, 1.5, None)] + results = [ + kubernetes_management_benchmark._OpResult('op1', 1.0, 2.0, None), + kubernetes_management_benchmark._OpResult('op2', 0.5, 1.5, None), + ] samples = kubernetes_management_benchmark._OpSamples( 'Op', results, attempted_ops=2 ) @@ -511,8 +517,10 @@ def testSuccessRateHundredPercentWhenAllSucceed(self): def testSuccessRateFiftyPercentWhenHalfFail(self): results = [ - ('op1', 1.0, 2.0, None), - ('op2', 0.5, 0.5, RuntimeError('fail')), + kubernetes_management_benchmark._OpResult('op1', 1.0, 2.0, None), + kubernetes_management_benchmark._OpResult( + 'op2', 0.5, 0.5, RuntimeError('fail') + ), ] samples = kubernetes_management_benchmark._OpSamples( 'Op', results, attempted_ops=2 @@ -521,15 +529,38 @@ def testSuccessRateFiftyPercentWhenHalfFail(self): self.assertAlmostEqual(50.0, rate.value) def testAttemptedOpsExceedingExecutedOpsLowersRate(self): - results = [('op1', 1.0, 2.0, None)] + results = [kubernetes_management_benchmark._OpResult('op1', 1.0, 2.0, None)] samples = kubernetes_management_benchmark._OpSamples( 'Op', results, attempted_ops=3 ) rate = next(s for s in samples if s.metric == 'Op_SuccessRate') self.assertAlmostEqual(100.0 / 3, rate.value, places=3) + def testCountsExposedAsSeparateMetrics(self): + """630: Total/Executed/Successful/Skipped each emitted as a metric.""" + results = [ + kubernetes_management_benchmark._OpResult('op1', 1.0, 2.0, None), + kubernetes_management_benchmark._OpResult( + 'op2', 0.5, 0.5, Exception('e') + ), + ] + samples = kubernetes_management_benchmark._OpSamples( + 'Op', results, attempted_ops=3 + ) + metrics = {s.metric: s.value for s in samples} + self.assertEqual(3, metrics['Op_TotalOps']) + self.assertEqual(2, metrics['Op_ExecutedOps']) + self.assertEqual(1, metrics['Op_SuccessfulOps']) + self.assertEqual(1, metrics['Op_SkippedOps']) + def testSuccessRateMetadataFields(self): - results = [('op1', 1.0, 2.0, None), ('op2', 0.5, 0.5, Exception('err'))] + """SuccessRate sample carries the op-count metadata fields.""" + results = [ + kubernetes_management_benchmark._OpResult('op1', 1.0, 2.0, None), + kubernetes_management_benchmark._OpResult( + 'op2', 0.5, 0.5, Exception('err') + ), + ] samples = kubernetes_management_benchmark._OpSamples( 'Op', results, attempted_ops=3 ) @@ -539,8 +570,17 @@ def testSuccessRateMetadataFields(self): self.assertEqual('1', rate.metadata['successful_ops']) self.assertEqual('1', rate.metadata['skipped_ops']) + def testZeroAttemptedOpsRaisesRunError(self): + """total==0 indicates a dispatch/setup failure; fail loudly (623).""" + with self.assertRaises(errors.Benchmarks.RunError): + kubernetes_management_benchmark._OpSamples('Op', [], attempted_ops=0) + def testFailedOpIncludesErrorMessage(self): - results = [('fail-op', 0.5, 0.5, RuntimeError('oops'))] + results = [ + kubernetes_management_benchmark._OpResult( + 'fail-op', 0.5, 0.5, RuntimeError('oops') + ) + ] samples = kubernetes_management_benchmark._OpSamples( 'Op', results, attempted_ops=1 ) @@ -549,7 +589,13 @@ def testFailedOpIncludesErrorMessage(self): self.assertIn('oops', init_s.metadata['error']) def testAggregatesGeneratedForTwoOrMoreSuccesses(self): - results = [(f'op{i}', float(i), float(i) * 2, None) for i in range(1, 4)] + """Aggregate stat samples appear once there are >=2 successes.""" + results = [ + kubernetes_management_benchmark._OpResult( + f'op{i}', float(i), float(i) * 2, None + ) + for i in range(1, 4) + ] samples = kubernetes_management_benchmark._OpSamples( 'Op', results, attempted_ops=3 ) @@ -558,14 +604,20 @@ def testAggregatesGeneratedForTwoOrMoreSuccesses(self): self.assertIn('Op_EndToEndLatency_Mean', metrics) def testAggregatesNotGeneratedForSingleSuccess(self): - results = [('op1', 1.0, 2.0, None)] + results = [kubernetes_management_benchmark._OpResult('op1', 1.0, 2.0, None)] samples = kubernetes_management_benchmark._OpSamples( 'Op', results, attempted_ops=1 ) self.assertNotIn('Op_InitiationLatency_Mean', [s.metric for s in samples]) def testOutliersGeneratedForFourOrMoreSuccesses(self): - results = [(f'op{i}', float(i), float(i) * 2, None) for i in range(1, 6)] + """Outlier-count samples appear once there are >=4 successes.""" + results = [ + kubernetes_management_benchmark._OpResult( + f'op{i}', float(i), float(i) * 2, None + ) + for i in range(1, 6) + ] samples = kubernetes_management_benchmark._OpSamples( 'Op', results, attempted_ops=5 ) @@ -574,7 +626,12 @@ def testOutliersGeneratedForFourOrMoreSuccesses(self): self.assertIn('Op_EndToEndLatency_OutlierCount', metrics) def testOutliersNotGeneratedForThreeOrFewerSuccesses(self): - results = [(f'op{i}', float(i), float(i) * 2, None) for i in range(1, 4)] + results = [ + kubernetes_management_benchmark._OpResult( + f'op{i}', float(i), float(i) * 2, None + ) + for i in range(1, 4) + ] samples = kubernetes_management_benchmark._OpSamples( 'Op', results, attempted_ops=3 ) @@ -707,11 +764,15 @@ def testRunCallsCleanStartSweep(self): with mock.patch.object( kubernetes_management_benchmark, '_CleanStartSweep' ) as mock_clean, mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + kubernetes_management_benchmark, + '_RunConcurrentNodePoolOps', + return_value=[], ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + kubernetes_management_benchmark, + '_RunOverlappingClusterUpdate', + return_value=[], ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] ): kubernetes_management_benchmark.Run(bm_spec) self.assertEqual(mock_clean.call_count, 2) @@ -723,17 +784,21 @@ def testRunCallsCleanStartSweep(self): k8s_mgmt_large_scale_nodepools=10, ) def testRunOnlyScenarioACallsOnlyA(self): - """Tests that Run only calls _RunScenarioA when scenarios=['A'].""" + """Run dispatches only to _RunConcurrentNodePoolOps for that scenario.""" cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( kubernetes_management_benchmark, '_CleanStartSweep' ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + kubernetes_management_benchmark, + '_RunConcurrentNodePoolOps', + return_value=[], ) as mock_a, mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + kubernetes_management_benchmark, + '_RunOverlappingClusterUpdate', + return_value=[], ) as mock_b, mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] ) as mock_c: kubernetes_management_benchmark.Run(bm_spec) mock_a.assert_called_once() @@ -746,17 +811,21 @@ def testRunOnlyScenarioACallsOnlyA(self): k8s_mgmt_large_scale_nodepools=10, ) def testRunOnlyScenarioBCallsOnlyB(self): - """Tests that Run only calls _RunScenarioB when scenarios=['B'].""" + """Run dispatches only to _RunOverlappingClusterUpdate for that scenario.""" cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( kubernetes_management_benchmark, '_CleanStartSweep' ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + kubernetes_management_benchmark, + '_RunConcurrentNodePoolOps', + return_value=[], ) as mock_a, mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + kubernetes_management_benchmark, + '_RunOverlappingClusterUpdate', + return_value=[], ) as mock_b, mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] ) as mock_c: kubernetes_management_benchmark.Run(bm_spec) mock_a.assert_not_called() @@ -769,17 +838,21 @@ def testRunOnlyScenarioBCallsOnlyB(self): k8s_mgmt_large_scale_nodepools=42, ) def testRunScenarioCPassesLargeScaleFlag(self): - """Tests that Run passes the large-scale-nodepools flag to _RunScenarioC.""" + """Run passes the large-scale-nodepools flag down to _ScaleToPoolCount.""" cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( kubernetes_management_benchmark, '_CleanStartSweep' ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + kubernetes_management_benchmark, + '_RunConcurrentNodePoolOps', + return_value=[], ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + kubernetes_management_benchmark, + '_RunOverlappingClusterUpdate', + return_value=[], ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] ) as mock_c: kubernetes_management_benchmark.Run(bm_spec) mock_c.assert_called_once() @@ -792,18 +865,22 @@ def testRunScenarioCPassesLargeScaleFlag(self): k8s_mgmt_large_scale_nodepools=100, ) def testRunScenarioCScaleSweepRunsTwice(self): - """Tests that Run calls _RunScenarioC once per scale in the sweep.""" + """Tests that Run calls _ScaleToPoolCount once per scale in the sweep.""" cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( kubernetes_management_benchmark, '_CleanStartSweep' ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + kubernetes_management_benchmark, + '_RunConcurrentNodePoolOps', + return_value=[], ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + kubernetes_management_benchmark, + '_RunOverlappingClusterUpdate', + return_value=[], ), mock.patch.object( kubernetes_management_benchmark, - '_RunScenarioC', + '_ScaleToPoolCount', return_value=[_make_sample('m', 1.0)], ) as mock_c: kubernetes_management_benchmark.Run(bm_spec) @@ -818,24 +895,28 @@ def testRunScenarioCScaleSweepRunsTwice(self): k8s_mgmt_large_scale_nodepools=10, ) def testRunTagsScenarioCScaleInMetadata(self): - """Tests that Run adds scenario_c_scale to each sample's metadata.""" + """Tests that Run adds large_scale_scale to each sample's metadata.""" cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) test_sample = _make_sample('metric', 1.0) with mock.patch.object( kubernetes_management_benchmark, '_CleanStartSweep' ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioA', return_value=[] + kubernetes_management_benchmark, + '_RunConcurrentNodePoolOps', + return_value=[], ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + kubernetes_management_benchmark, + '_RunOverlappingClusterUpdate', + return_value=[], ), mock.patch.object( kubernetes_management_benchmark, - '_RunScenarioC', + '_ScaleToPoolCount', return_value=[test_sample], ): samples = kubernetes_management_benchmark.Run(bm_spec) - self.assertIn('scenario_c_scale', samples[0].metadata) - self.assertEqual('10', samples[0].metadata['scenario_c_scale']) + self.assertIn('large_scale_scale', samples[0].metadata) + self.assertEqual('10', samples[0].metadata['large_scale_scale']) @flagsaver.flagsaver( k8s_mgmt_scenarios=['concurrent_node_pool_ops'], @@ -851,12 +932,14 @@ def testRunTagsAllSamplesWithRunMetadata(self): kubernetes_management_benchmark, '_CleanStartSweep' ), mock.patch.object( kubernetes_management_benchmark, - '_RunScenarioA', + '_RunConcurrentNodePoolOps', return_value=[test_sample], ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + kubernetes_management_benchmark, + '_RunOverlappingClusterUpdate', + return_value=[], ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] ): samples = kubernetes_management_benchmark.Run(bm_spec) meta = samples[0].metadata @@ -882,12 +965,14 @@ def testRunUsesExplicitVersionFlags(self): kubernetes_management_benchmark, '_CleanStartSweep' ), mock.patch.object( kubernetes_management_benchmark, - '_RunScenarioA', + '_RunConcurrentNodePoolOps', return_value=[_make_sample('m', 1.0)], ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + kubernetes_management_benchmark, + '_RunOverlappingClusterUpdate', + return_value=[], ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] ): samples = kubernetes_management_benchmark.Run(bm_spec) cluster.ResolveNodePoolVersions.assert_not_called() @@ -907,12 +992,14 @@ def testRunAutoResolvesVersionsWhenFlagsAbsent(self): kubernetes_management_benchmark, '_CleanStartSweep' ), mock.patch.object( kubernetes_management_benchmark, - '_RunScenarioA', + '_RunConcurrentNodePoolOps', return_value=[_make_sample('m', 1.0)], ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioB', return_value=[] + kubernetes_management_benchmark, + '_RunOverlappingClusterUpdate', + return_value=[], ), mock.patch.object( - kubernetes_management_benchmark, '_RunScenarioC', return_value=[] + kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] ): samples = kubernetes_management_benchmark.Run(bm_spec) cluster.ResolveNodePoolVersions.assert_called_once() @@ -920,7 +1007,7 @@ def testRunAutoResolvesVersionsWhenFlagsAbsent(self): class RunScenarioATest(pkb_common_test_case.PkbCommonTestCase): - """Tests for the _RunScenarioA phase-by-phase create/delete path.""" + """Tests the _RunConcurrentNodePoolOps create/delete path.""" @flagsaver.flagsaver( k8s_mgmt_concurrent_nodepools=2, @@ -930,11 +1017,13 @@ class RunScenarioATest(pkb_common_test_case.PkbCommonTestCase): def testProducesCreateAndDeleteSamples(self): """Tests Scenario A produces Create and Delete samples.""" cluster = _make_mock_cluster(pool_names=['pkbma000', 'pkbma001']) - samples = kubernetes_management_benchmark._RunScenarioA(cluster, '1.33') + samples = kubernetes_management_benchmark._RunConcurrentNodePoolOps( + cluster, '1.33' + ) metrics = {s.metric for s in samples} - self.assertTrue(any('ScenarioA_Create' in m for m in metrics)) - self.assertTrue(any('ScenarioA_Delete' in m for m in metrics)) - self.assertFalse(any('ScenarioA_Upgrade' in m for m in metrics)) + self.assertTrue(any('ConcurrentOps_Create' in m for m in metrics)) + self.assertTrue(any('ConcurrentOps_Delete' in m for m in metrics)) + self.assertFalse(any('ConcurrentOps_Upgrade' in m for m in metrics)) @flagsaver.flagsaver( k8s_mgmt_concurrent_nodepools=2, @@ -942,9 +1031,9 @@ def testProducesCreateAndDeleteSamples(self): k8s_mgmt_max_concurrent=50, ) def testPassesInitialVersionToCreate(self): - """Tests _RunScenarioA passes initial_version to CreateNodePoolAsync.""" + """_RunConcurrentNodePoolOps passes initial_version to creates.""" cluster = _make_mock_cluster(pool_names=['pkbma000', 'pkbma001']) - kubernetes_management_benchmark._RunScenarioA(cluster, '1.33') + kubernetes_management_benchmark._RunConcurrentNodePoolOps(cluster, '1.33') for call in cluster.CreateNodePoolAsync.call_args_list: kw = call.kwargs if call.kwargs else {} pos = call.args @@ -959,25 +1048,32 @@ def testPassesInitialVersionToCreate(self): k8s_mgmt_max_concurrent=50, ) def testDeleteUsesLivePoolList(self): - """Tests that _RunScenarioA deletes only the pools it finds at runtime.""" + """_RunConcurrentNodePoolOps deletes only pools found at runtime.""" cluster = _make_mock_cluster(pool_names=['pkbma000']) - kubernetes_management_benchmark._RunScenarioA(cluster, '1.33') + kubernetes_management_benchmark._RunConcurrentNodePoolOps(cluster, '1.33') self.assertEqual(1, cluster.DeleteNodePoolAsync.call_count) class RunScenarioBTest(pkb_common_test_case.PkbCommonTestCase): - """Tests for the _RunScenarioB cluster-update + nodepool-create scenario.""" + """Tests the _RunOverlappingClusterUpdate overlap scenario.""" @flagsaver.flagsaver( k8s_mgmt_nodes_per_nodepool=1, k8s_mgmt_max_concurrent=50, ) def testProducesClusterUpdateAndNodePoolCreateSamples(self): + """Overlap scenario emits both cluster-update and create samples.""" cluster = _make_mock_cluster(pool_names=[]) - samples = kubernetes_management_benchmark._RunScenarioB(cluster, '1.33') + samples = kubernetes_management_benchmark._RunOverlappingClusterUpdate( + cluster, '1.33' + ) metrics = {s.metric for s in samples} - self.assertTrue(any('ScenarioB_ClusterUpdate' in m for m in metrics)) - self.assertTrue(any('ScenarioB_NodePoolCreate' in m for m in metrics)) + self.assertTrue( + any('OverlappingUpdate_ClusterUpdate' in m for m in metrics) + ) + self.assertTrue( + any('OverlappingUpdate_NodePoolCreate' in m for m in metrics) + ) @flagsaver.flagsaver( k8s_mgmt_nodes_per_nodepool=1, @@ -985,9 +1081,11 @@ def testProducesClusterUpdateAndNodePoolCreateSamples(self): ) def testDeletesTestPoolAfterRun(self): cluster = _make_mock_cluster(pool_names=[]) - kubernetes_management_benchmark._RunScenarioB(cluster, '1.33') + kubernetes_management_benchmark._RunOverlappingClusterUpdate( + cluster, '1.33' + ) cluster.DeleteNodePool.assert_called_once_with( - kubernetes_management_benchmark._SCENARIO_B_NAME + kubernetes_management_benchmark._OVERLAPPING_POOL_NAME ) @flagsaver.flagsaver( @@ -998,16 +1096,20 @@ def testDeleteFailureRaisesInScenarioB(self): cluster = _make_mock_cluster(pool_names=[]) cluster.DeleteNodePool.side_effect = RuntimeError('delete failed') with self.assertRaises(RuntimeError): - kubernetes_management_benchmark._RunScenarioB(cluster, '1.33') + kubernetes_management_benchmark._RunOverlappingClusterUpdate( + cluster, '1.33' + ) @flagsaver.flagsaver( k8s_mgmt_nodes_per_nodepool=1, k8s_mgmt_max_concurrent=50, ) def testPassesInitialVersionToCreate(self): - """Tests _RunScenarioB passes initial_version to CreateNodePoolAsync.""" + """_RunOverlappingClusterUpdate passes initial_version to the create.""" cluster = _make_mock_cluster(pool_names=[]) - kubernetes_management_benchmark._RunScenarioB(cluster, '1.33') + kubernetes_management_benchmark._RunOverlappingClusterUpdate( + cluster, '1.33' + ) for call in cluster.CreateNodePoolAsync.call_args_list: kw = call.kwargs if call.kwargs else {} pos = call.args @@ -1018,7 +1120,7 @@ def testPassesInitialVersionToCreate(self): class RunScenarioCTest(pkb_common_test_case.PkbCommonTestCase): - """Tests for the _RunScenarioC large-scale create-and-delete scenario.""" + """Tests for the _ScaleToPoolCount large-scale create-and-delete scenario.""" @flagsaver.flagsaver( k8s_mgmt_nodes_per_nodepool=1, @@ -1026,12 +1128,12 @@ class RunScenarioCTest(pkb_common_test_case.PkbCommonTestCase): ) def testProducesCreateAndDeleteSamples(self): cluster = _make_mock_cluster(pool_names=['pkbmc0000', 'pkbmc0001']) - samples = kubernetes_management_benchmark._RunScenarioC( + samples = kubernetes_management_benchmark._ScaleToPoolCount( cluster, '1.33', scale=2 ) metrics = {s.metric for s in samples} - self.assertTrue(any('ScenarioC_Create' in m for m in metrics)) - self.assertTrue(any('ScenarioC_Delete' in m for m in metrics)) + self.assertTrue(any('LargeScale_Create' in m for m in metrics)) + self.assertTrue(any('LargeScale_Delete' in m for m in metrics)) @flagsaver.flagsaver( k8s_mgmt_nodes_per_nodepool=1, @@ -1040,11 +1142,11 @@ def testProducesCreateAndDeleteSamples(self): def testZeroLivePoolsRecordsZeroDeleteSuccessRate(self): """Tests Scenario C records 0% delete rate when no live pools exist.""" cluster = _make_mock_cluster(pool_names=[]) - samples = kubernetes_management_benchmark._RunScenarioC( + samples = kubernetes_management_benchmark._ScaleToPoolCount( cluster, '1.33', scale=3 ) delete_rate = next( - s for s in samples if s.metric == 'ScenarioC_Delete_SuccessRate' + s for s in samples if s.metric == 'LargeScale_Delete_SuccessRate' ) self.assertEqual(0.0, delete_rate.value) cluster.DeleteNodePoolAsync.assert_not_called() @@ -1055,7 +1157,7 @@ def testZeroLivePoolsRecordsZeroDeleteSuccessRate(self): ) def testDeleteUsesLiveListNotOriginalCreateList(self): cluster = _make_mock_cluster(pool_names=['pkbmc0000', 'pkbmc0001']) - kubernetes_management_benchmark._RunScenarioC(cluster, '1.33', scale=3) + kubernetes_management_benchmark._ScaleToPoolCount(cluster, '1.33', scale=3) self.assertEqual(2, cluster.DeleteNodePoolAsync.call_count) @flagsaver.flagsaver( @@ -1065,11 +1167,11 @@ def testDeleteUsesLiveListNotOriginalCreateList(self): def testCreateSuccessRateUsesScaleAsDenominator(self): """Tests Scenario C create success rate uses scale as total_ops.""" cluster = _make_mock_cluster(pool_names=['pkbmc0000']) - samples = kubernetes_management_benchmark._RunScenarioC( + samples = kubernetes_management_benchmark._ScaleToPoolCount( cluster, '1.33', scale=3 ) create_rate = next( - s for s in samples if s.metric == 'ScenarioC_Create_SuccessRate' + s for s in samples if s.metric == 'LargeScale_Create_SuccessRate' ) self.assertLessEqual(create_rate.value, 100.0) self.assertEqual('3', create_rate.metadata['total_ops']) From 0e1dba30978d84677a4098fc36bdd1d52072fe3c Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Wed, 10 Jun 2026 12:49:36 +0000 Subject: [PATCH 06/15] kubernetes_management: sweep node pools after each scenario instead of at start; Cleanup reuses sweep --- .../kubernetes_management_benchmark.py | 36 +++++++++--------- .../kubernetes_management_benchmark_test.py | 37 ++++++++++--------- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index 049a2b8c8c..8bde5be40b 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -223,14 +223,18 @@ def Prepare(benchmark_spec: bm_spec.BenchmarkSpec) -> None: ) -def _CleanStartSweep(cluster: kubernetes_cluster.KubernetesCluster) -> None: - """Deletes any stale pkbm* node pools so each run starts clean (spec C.2).""" - stale = [n for n in cluster.GetNodePoolNames() if n.startswith(_PREFIX)] - if not stale: - logging.info("CleanStart: no stale pools found — clean start confirmed.") +def _SweepNodePools(cluster: kubernetes_cluster.KubernetesCluster) -> None: + """Deletes all pkbm* node pools, blocking until each delete completes. + + Called after each scenario so the next one starts from a clean cluster, + and from Cleanup() as a final best-effort teardown. + """ + pools = [n for n in cluster.GetNodePoolNames() if n.startswith(_PREFIX)] + if not pools: + logging.info("Sweep: no pkbm* pools present — cluster is clean.") return - logging.info("CleanStart: deleting %d stale pools: %s", len(stale), stale) - background_tasks.RunThreaded(cluster.DeleteNodePool, stale) + logging.info("Sweep: deleting %d pkbm* pools: %s", len(pools), pools) + background_tasks.RunThreaded(cluster.DeleteNodePool, pools) def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: @@ -238,9 +242,6 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: cluster = benchmark_spec.container_cluster assert isinstance(cluster, kubernetes_cluster.KubernetesCluster) - # Spec C.2: start clean. - _CleanStartSweep(cluster) - # Resolve the initial node-pool version once; log clearly; tag every sample. flag_initial = _INITIAL_VERSION.value if not flag_initial: @@ -266,14 +267,14 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: if "concurrent_node_pool_ops" in scenarios: samples += _RunConcurrentNodePoolOps(cluster, initial) + # Each scenario leaves the cluster clean for the next one. + _SweepNodePools(cluster) if "overlapping_cluster_update" in scenarios: samples += _RunOverlappingClusterUpdate(cluster, initial) + _SweepNodePools(cluster) if "large_scale_provisioning" in scenarios: - # Stale pools from earlier scenarios may still be in Deleting state and - # count toward AKS's 100-pool cluster limit; sweep before the scale work - # so we don't hit MaxAgentPoolCountReached mid-run. - _CleanStartSweep(cluster) samples += _SweepScales(cluster, initial) + _SweepNodePools(cluster) # Tag all samples with version path and run config for published results. run_meta = { @@ -297,11 +298,8 @@ def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec) -> None: ["delete", "pod", _SLEEP_POD_NAME, "--ignore-not-found"], raise_on_failure=False, ) - leftover = [n for n in cluster.GetNodePoolNames() if n.startswith(_PREFIX)] - if not leftover: - return - logging.info("Cleanup: deleting %d leftover node pools", len(leftover)) - background_tasks.RunThreaded(cluster.DeleteNodePool, leftover) + # Final teardown reuses the same sweep the scenarios use. + _SweepNodePools(cluster) def _SweepScales( diff --git a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py index 9518c2ede6..f3de54c591 100644 --- a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py +++ b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py @@ -282,14 +282,14 @@ def testCleanupHandlesNoneCluster(self): kubernetes_management_benchmark.Cleanup(bm_spec) -class CleanStartSweepTest(pkb_common_test_case.PkbCommonTestCase): - """Tests for the _CleanStartSweep helper function.""" +class SweepNodePoolsTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _SweepNodePools helper function.""" def testDeletesStalePkbmPools(self): cluster = _make_mock_cluster( pool_names=['pkbma000', 'pkbmc0001', 'user-pool'] ) - kubernetes_management_benchmark._CleanStartSweep(cluster) + kubernetes_management_benchmark._SweepNodePools(cluster) deleted = {c.args[0] for c in cluster.DeleteNodePool.call_args_list} self.assertIn('pkbma000', deleted) self.assertIn('pkbmc0001', deleted) @@ -297,14 +297,14 @@ def testDeletesStalePkbmPools(self): def testDoesNothingWhenNoPkbmPools(self): cluster = _make_mock_cluster(pool_names=['user-pool', 'default-pool']) - kubernetes_management_benchmark._CleanStartSweep(cluster) + kubernetes_management_benchmark._SweepNodePools(cluster) cluster.DeleteNodePool.assert_not_called() - def testCleanStartSweepRaisesOnGetNodePoolNamesException(self): + def testSweepRaisesOnGetNodePoolNamesException(self): cluster = _make_mock_cluster() cluster.GetNodePoolNames.side_effect = RuntimeError('API error') with self.assertRaises(RuntimeError): - kubernetes_management_benchmark._CleanStartSweep(cluster) + kubernetes_management_benchmark._SweepNodePools(cluster) class ResultsTest(pkb_common_test_case.PkbCommonTestCase): @@ -757,12 +757,12 @@ class RunTest(pkb_common_test_case.PkbCommonTestCase): k8s_mgmt_scale_sweep=[], k8s_mgmt_large_scale_nodepools=10, ) - def testRunCallsCleanStartSweep(self): - """Tests that Run invokes _CleanStartSweep before executing scenarios.""" + def testRunSweepsAfterEachScenario(self): + """Run sweeps node pools after each scenario that executes.""" cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_CleanStartSweep' + kubernetes_management_benchmark, '_SweepNodePools' ) as mock_clean, mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -775,7 +775,8 @@ def testRunCallsCleanStartSweep(self): kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] ): kubernetes_management_benchmark.Run(bm_spec) - self.assertEqual(mock_clean.call_count, 2) + # All three scenarios run by default -> one sweep after each. + self.assertEqual(mock_clean.call_count, 3) mock_clean.assert_called_with(cluster) @flagsaver.flagsaver( @@ -788,7 +789,7 @@ def testRunOnlyScenarioACallsOnlyA(self): cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_CleanStartSweep' + kubernetes_management_benchmark, '_SweepNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -815,7 +816,7 @@ def testRunOnlyScenarioBCallsOnlyB(self): cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_CleanStartSweep' + kubernetes_management_benchmark, '_SweepNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -842,7 +843,7 @@ def testRunScenarioCPassesLargeScaleFlag(self): cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_CleanStartSweep' + kubernetes_management_benchmark, '_SweepNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -869,7 +870,7 @@ def testRunScenarioCScaleSweepRunsTwice(self): cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_CleanStartSweep' + kubernetes_management_benchmark, '_SweepNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -900,7 +901,7 @@ def testRunTagsScenarioCScaleInMetadata(self): bm_spec = _make_mock_benchmark_spec(cluster) test_sample = _make_sample('metric', 1.0) with mock.patch.object( - kubernetes_management_benchmark, '_CleanStartSweep' + kubernetes_management_benchmark, '_SweepNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -929,7 +930,7 @@ def testRunTagsAllSamplesWithRunMetadata(self): bm_spec = _make_mock_benchmark_spec(cluster) test_sample = _make_sample('m', 1.0) with mock.patch.object( - kubernetes_management_benchmark, '_CleanStartSweep' + kubernetes_management_benchmark, '_SweepNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -962,7 +963,7 @@ def testRunUsesExplicitVersionFlags(self): cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_CleanStartSweep' + kubernetes_management_benchmark, '_SweepNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -989,7 +990,7 @@ def testRunAutoResolvesVersionsWhenFlagsAbsent(self): cluster.ResolveNodePoolVersions.return_value = ('1.33', '1.34') bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_CleanStartSweep' + kubernetes_management_benchmark, '_SweepNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', From 620fd568f7465365a7b53bc051179c6184936e33 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Wed, 10 Jun 2026 12:52:45 +0000 Subject: [PATCH 07/15] kubernetes_management: condense changelog, document _OpResult fields, parameterize name tests --- CHANGES.next.md | 5 +- .../kubernetes_management_benchmark.py | 30 +++++++---- .../kubernetes_management_benchmark_test.py | 54 +++++++------------ 3 files changed, 41 insertions(+), 48 deletions(-) diff --git a/CHANGES.next.md b/CHANGES.next.md index 6c1461a725..9440b7739f 100644 --- a/CHANGES.next.md +++ b/CHANGES.next.md @@ -1,9 +1,6 @@ ### New features: - Add kubernetes_management benchmark for measuring GKE/EKS/AKS management - plane API responsiveness. (from @ashishsuneja) -- Add KubernetesCluster base class management plane abstract methods: - CreateNodePool, DeleteNodePool, UpgradeNodePool, UpdateCluster and - their async counterparts. (from @ashishsuneja) + plane API responsiveness. ### Breaking changes: diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index 8bde5be40b..c4a36517c2 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -149,17 +149,26 @@ def _ScalePoolName(i): @dataclasses.dataclass class _OpResult: - """Holds timing and outcome for a single async management-plane operation.""" + """Timing and outcome for a single async management-plane operation. + + Attributes: + name: Node-pool (or operation) name the result is for. + initiation_latency: Seconds from issuing the async API call until it is + accepted and an operation handle is returned (time to *start*). + end_to_end_latency: Seconds from issuing the call until the operation + fully completes (initiation plus server-side execution). + error: The exception raised if the operation failed, else None. + """ name: str - init_dur: float - e2e_dur: float + initiation_latency: float + end_to_end_latency: float error: Exception | None = None def __iter__(self): yield self.name - yield self.init_dur - yield self.e2e_dur + yield self.initiation_latency + yield self.end_to_end_latency yield self.error @@ -608,19 +617,22 @@ def _OpSamples( meta["error"] = str(r.error)[:200] else: success += 1 - init_latencies.append(r.init_dur) - e2e_latencies.append(r.e2e_dur) + init_latencies.append(r.initiation_latency) + e2e_latencies.append(r.end_to_end_latency) samples.append( sample.Sample( f"{metric_prefix}_InitiationLatency", - r.init_dur, + r.initiation_latency, "seconds", dict(meta), ) ) samples.append( sample.Sample( - f"{metric_prefix}_EndToEndLatency", r.e2e_dur, "seconds", dict(meta) + f"{metric_prefix}_EndToEndLatency", + r.end_to_end_latency, + "seconds", + dict(meta), ) ) diff --git a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py index f3de54c591..e916be9cb5 100644 --- a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py +++ b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py @@ -22,6 +22,7 @@ from absl import flags from absl.testing import flagsaver +from absl.testing import parameterized from perfkitbenchmarker import errors from perfkitbenchmarker import sample from perfkitbenchmarker.linux_benchmarks import kubernetes_management_benchmark @@ -80,49 +81,32 @@ def _make_mock_config(cluster_type='Kubernetes'): return cfg -class ScenarioNameTest(pkb_common_test_case.PkbCommonTestCase): - """Tests for _SCENARIO_A_NAME, _OVERLAPPING_POOL_NAME, _SCENARIO_C_NAME.""" +class NodePoolNameTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the node-pool name-generation helpers.""" - def testScenarioANameZeroPadsToThreeDigits(self): - self.assertEqual( - 'pkbma000', - kubernetes_management_benchmark._ConcurrentPoolName(0), - ) - - def testScenarioANameTwoDigitIndex(self): - self.assertEqual( - 'pkbma042', - kubernetes_management_benchmark._ConcurrentPoolName(42), - ) - - def testScenarioANameMaxThreeDigits(self): - self.assertEqual( - 'pkbma999', - kubernetes_management_benchmark._ConcurrentPoolName(999), - ) - - def testScenarioBNameIsConstant(self): - self.assertEqual( - 'pkbmb', - kubernetes_management_benchmark._OVERLAPPING_POOL_NAME, - ) - - def testScenarioCNameZeroPadsToFourDigits(self): + @parameterized.named_parameters( + ('zero', 0, 'pkbma000'), + ('two_digit', 42, 'pkbma042'), + ('max_three_digit', 999, 'pkbma999'), + ) + def testConcurrentPoolNameZeroPadsToThreeDigits(self, index, expected): self.assertEqual( - 'pkbmc0000', - kubernetes_management_benchmark._ScalePoolName(0), + expected, kubernetes_management_benchmark._ConcurrentPoolName(index) ) - def testScenarioCNameSingleDigitIndex(self): + @parameterized.named_parameters( + ('zero', 0, 'pkbmc0000'), + ('single_digit', 7, 'pkbmc0007'), + ('four_digit', 1000, 'pkbmc1000'), + ) + def testScalePoolNameZeroPadsToFourDigits(self, index, expected): self.assertEqual( - 'pkbmc0007', - kubernetes_management_benchmark._ScalePoolName(7), + expected, kubernetes_management_benchmark._ScalePoolName(index) ) - def testScenarioCNameFourDigitIndex(self): + def testOverlappingPoolNameIsConstant(self): self.assertEqual( - 'pkbmc1000', - kubernetes_management_benchmark._ScalePoolName(1000), + 'pkbmb', kubernetes_management_benchmark._OVERLAPPING_POOL_NAME ) def testAllNamesWithinAksLimit(self): From e2a106fa4f932a357b1d7f021abc00276a5786dc Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Fri, 12 Jun 2026 09:38:00 +0000 Subject: [PATCH 08/15] kubernetes_management: remove BaseEksCluster AddNodepool no-op stub (folds #6750) --- perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py index a57a43057a..753e1ec30f 100644 --- a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py +++ b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py @@ -375,9 +375,6 @@ def GetNodePoolNames(self) -> list[str]: nodegroups = json.loads(stdout) return [ng['Name'] for ng in nodegroups] - def AddNodepool(self, batch_name, pool_id): - pass - class EksCluster(BaseEksCluster): """Class representing an Elastic Kubernetes Service cluster.""" From 17df7f8b2ecb92ab6e0e8c6cc1c9d7c8eb907163 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Fri, 12 Jun 2026 11:29:13 +0000 Subject: [PATCH 09/15] kubernetes_management: fail-hard by default, tolerant large_scale; OpTiming/ThreadSafeResults redesign --- .../kubernetes_management_benchmark.py | 349 ++++++++++------- .../kubernetes_management_benchmark_test.py | 366 +++++++++--------- 2 files changed, 391 insertions(+), 324 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index c4a36517c2..4d8bbefe91 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -137,6 +137,10 @@ def _ConcurrentPoolName(i): + """Returns the i-th concurrent-ops pool name. + + Three-digit zero-padded so names stay within AKS's 12-char node-pool limit. + """ return f"{_PREFIX}a{i:03d}" @@ -148,28 +152,23 @@ def _ScalePoolName(i): @dataclasses.dataclass -class _OpResult: - """Timing and outcome for a single async management-plane operation. +class OpTiming: + """Latency of a single async management-plane operation. + + Pure timing data — the metric name is supplied by the sample builder, and + failures abort the run rather than being recorded here (so there is no + error field). large_scale_provisioning, which tolerates partial failure, + tracks failed pool names separately. Attributes: - name: Node-pool (or operation) name the result is for. initiation_latency: Seconds from issuing the async API call until it is accepted and an operation handle is returned (time to *start*). end_to_end_latency: Seconds from issuing the call until the operation fully completes (initiation plus server-side execution). - error: The exception raised if the operation failed, else None. """ - name: str initiation_latency: float end_to_end_latency: float - error: Exception | None = None - - def __iter__(self): - yield self.name - yield self.initiation_latency - yield self.end_to_end_latency - yield self.error def GetConfig(user_config): @@ -232,17 +231,17 @@ def Prepare(benchmark_spec: bm_spec.BenchmarkSpec) -> None: ) -def _SweepNodePools(cluster: kubernetes_cluster.KubernetesCluster) -> None: - """Deletes all pkbm* node pools, blocking until each delete completes. +def _ClearNodePools(cluster: kubernetes_cluster.KubernetesCluster) -> None: + """Clears all pkbm* node pools, blocking until each delete completes. Called after each scenario so the next one starts from a clean cluster, and from Cleanup() as a final best-effort teardown. """ pools = [n for n in cluster.GetNodePoolNames() if n.startswith(_PREFIX)] if not pools: - logging.info("Sweep: no pkbm* pools present — cluster is clean.") + logging.info("Clear: no pkbm* pools present — cluster is clean.") return - logging.info("Sweep: deleting %d pkbm* pools: %s", len(pools), pools) + logging.info("Clear: deleting %d pkbm* pools: %s", len(pools), pools) background_tasks.RunThreaded(cluster.DeleteNodePool, pools) @@ -277,13 +276,13 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: if "concurrent_node_pool_ops" in scenarios: samples += _RunConcurrentNodePoolOps(cluster, initial) # Each scenario leaves the cluster clean for the next one. - _SweepNodePools(cluster) + _ClearNodePools(cluster) if "overlapping_cluster_update" in scenarios: samples += _RunOverlappingClusterUpdate(cluster, initial) - _SweepNodePools(cluster) + _ClearNodePools(cluster) if "large_scale_provisioning" in scenarios: samples += _SweepScales(cluster, initial) - _SweepNodePools(cluster) + _ClearNodePools(cluster) # Tag all samples with version path and run config for published results. run_meta = { @@ -308,7 +307,7 @@ def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec) -> None: raise_on_failure=False, ) # Final teardown reuses the same sweep the scenarios use. - _SweepNodePools(cluster) + _ClearNodePools(cluster) def _SweepScales( @@ -347,7 +346,7 @@ def _RunConcurrentNodePoolOps( configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] samples: list[sample.Sample] = [] - # ── Phase 1: concurrent creates ───────────────────────────────────────── + # ── Phase 1: concurrent creates (fail-hard — any failure aborts) ──────── create_results = _RunAsync( kickoff=lambda cfg: cluster.CreateNodePoolAsync( cfg, node_version=initial @@ -356,26 +355,18 @@ def _RunConcurrentNodePoolOps( items=configs_, get_name=lambda cfg: cfg.name, ) - samples += _OpSamples( - "ConcurrentOps_Create", create_results, attempted_ops=len(pool_names) - ) + samples += _OpSamples("ConcurrentOps_Create", create_results) - # ── Phase 2: concurrent deletes (live-list to catch EKS rollbacks) ────── + # ── Phase 2: concurrent deletes (live-list; all creates succeeded) ────── alive = _LiveNodePoolNames(cluster, f"{_PREFIX}a") - logging.info( - "concurrent_node_pool_ops: %d live pools for delete (originally %d)", - len(alive), - n, - ) + logging.info("concurrent_node_pool_ops: deleting %d pools", len(alive)) delete_results = _RunAsync( kickoff=cluster.DeleteNodePoolAsync, wait_fn=cluster.WaitForOperation, items=alive, get_name=str, ) - # attempted_ops=n: success rate reflects original request, not just live. - # EKS rolls back timed-out pools silently — without this shows 100%. - samples += _OpSamples("ConcurrentOps_Delete", delete_results, attempted_ops=n) + samples += _OpSamples("ConcurrentOps_Delete", delete_results) return samples @@ -390,38 +381,34 @@ def _RunOverlappingClusterUpdate( """ logging.info("Scenario B: overlapping cluster update + node-pool create") cfg = _MakeNodePoolConfig(cluster, _OVERLAPPING_POOL_NAME) - results = _Results() + results = ThreadSafeResults() def DoClusterUpdate(): - init, e2e, err = _TimedAsync( - cluster.UpdateClusterAsync, cluster.WaitForOperation - ) - results.add("OverlappingUpdate_ClusterUpdate", init, e2e, err) + timing = _TimedAsync(cluster.UpdateClusterAsync, cluster.WaitForOperation) + results.add("OverlappingUpdate_ClusterUpdate", timing) logging.info( - "Scenario B ClusterUpdate: init=%.2fs e2e=%.2fs ok=%s", - init, - e2e, - err is None, + "Scenario B ClusterUpdate: init=%.2fs e2e=%.2fs", + timing.initiation_latency, + timing.end_to_end_latency, ) def DoCreate(): - init, e2e, err = _TimedAsync( + timing = _TimedAsync( lambda: cluster.CreateNodePoolAsync(cfg, node_version=initial), cluster.WaitForOperation, ) - results.add("OverlappingUpdate_NodePoolCreate", init, e2e, err) + results.add("OverlappingUpdate_NodePoolCreate", timing) logging.info( - "Scenario B NodePoolCreate: init=%.2fs e2e=%.2fs ok=%s", - init, - e2e, - err is None, + "Scenario B NodePoolCreate: init=%.2fs e2e=%.2fs", + timing.initiation_latency, + timing.end_to_end_latency, ) background_tasks.RunThreaded(lambda fn: fn(), [DoClusterUpdate, DoCreate]) samples: list[sample.Sample] = [] - for entry in results.entries: - samples += _OpSamples(entry.name, [entry], attempted_ops=1) + for name, timing in results.entries: + samples += _OpSamples(name, [(name, timing)]) # Remove test pool (best-effort). cluster.DeleteNodePool(_OVERLAPPING_POOL_NAME) @@ -450,8 +437,8 @@ def _ScaleToPoolCount( configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] samples: list[sample.Sample] = [] - # ── Creates ────────────────────────────────────────────────────────────── - create_results = _RunAsync( + # ── Creates (tolerant — partial failure expected at scale) ─────────────── + create_results, create_failed = _RunAsyncTolerant( kickoff=lambda cfg: cluster.CreateNodePoolAsync( cfg, node_version=initial ), @@ -459,12 +446,15 @@ def _ScaleToPoolCount( items=configs_, get_name=lambda cfg: cfg.name, ) - created_ok = sum(1 for r in create_results if r.error is None) logging.info( - "Scenario C scale=%d: %d/%d creates succeeded", scale, created_ok, scale + "Scenario C scale=%d: %d/%d creates succeeded (%d failed)", + scale, + len(create_results), + scale, + len(create_failed), ) - samples += _OpSamples( - "LargeScale_Create", create_results, attempted_ops=scale + samples += _LargeScaleSamples( + "LargeScale_Create", create_results, create_failed, attempted_ops=scale ) # ── Deletes (live-list) ────────────────────────────────────────────────── @@ -479,18 +469,20 @@ def _ScaleToPoolCount( ) if not alive: logging.info("Scenario C scale=%d: all creates rolled back.", scale) - samples += _OpSamples("LargeScale_Delete", [], attempted_ops=scale) + samples += _LargeScaleSamples( + "LargeScale_Delete", [], [], attempted_ops=scale + ) return samples - delete_results = _RunAsync( + delete_results, delete_failed = _RunAsyncTolerant( kickoff=cluster.DeleteNodePoolAsync, wait_fn=cluster.WaitForOperation, items=alive, get_name=str, ) # attempted_ops=scale: accurate rate against original request count. - samples += _OpSamples( - "LargeScale_Delete", delete_results, attempted_ops=scale + samples += _LargeScaleSamples( + "LargeScale_Delete", delete_results, delete_failed, attempted_ops=scale ) return samples @@ -500,43 +492,40 @@ def _ScaleToPoolCount( # --------------------------------------------------------------------------- -class _Results: - """Thread-safe collector for (name, init_latency, e2e_latency, error).""" +class ThreadSafeResults: + """Thread-safe collector of (name, OpTiming) pairs from concurrent ops.""" def __init__(self): self._lock = threading.Lock() - self.entries: list[_OpResult] = [] + self.entries: list[tuple[str, OpTiming]] = [] + self.failed: list[str] = [] - def add( - self, name: str, init_dur: float, e2e_dur: float, err: Exception | None - ) -> None: - result = _OpResult(name, init_dur, e2e_dur, err) + def add(self, name: str, timing: OpTiming) -> None: with self._lock: - self.entries.append(result) + self.entries.append((name, timing)) + + def add_failure(self, name: str) -> None: + with self._lock: + self.failed.append(name) def _TimedAsync( kickoff: Callable[[], str], wait_fn: Callable[[str], None], -) -> tuple[float, float, Exception | None]: - """Runs kickoff() then wait_fn(handle); returns (init_lat, e2e_lat, err). +) -> OpTiming: + """Runs kickoff() then wait_fn(handle); returns the OpTiming. - init_lat = time for kickoff() to return (API accepted). - e2e_lat = total wall time including wait. On kickoff failure both are set - to elapsed time at failure point. + Lets exceptions propagate — a failed management-plane op aborts the + benchmark rather than being silently absorbed. initiation_latency is the + time for kickoff() to return (API accepted); end_to_end_latency is total + wall time including the wait. """ init_start = time.monotonic() - try: - handle = kickoff() - except Exception as exc: # pylint: disable=broad-except - elapsed = time.monotonic() - init_start - return elapsed, elapsed, exc - init_dur = time.monotonic() - init_start - try: - wait_fn(handle) - return init_dur, time.monotonic() - init_start, None - except Exception as exc: # pylint: disable=broad-except - return init_dur, time.monotonic() - init_start, exc + handle = kickoff() + initiation_latency = time.monotonic() - init_start + wait_fn(handle) + end_to_end_latency = time.monotonic() - init_start + return OpTiming(initiation_latency, end_to_end_latency) def _RunAsync( @@ -544,33 +533,72 @@ def _RunAsync( wait_fn: Callable[[str], None], items: list, get_name: Callable[[object], str], -) -> list[tuple[str, float, float, Exception | None]]: - """Fires kickoff(item) concurrently for all items; returns timed results. +) -> list[tuple[str, OpTiming]]: + """Fires kickoff(item) concurrently; returns (name, OpTiming) per item. - Uses background_tasks.RunThreaded with a concurrency cap for streaming + Fail-hard: any op that raises aborts the run (RunThreaded propagates the + exception). Used by the create/upgrade/delete scenarios where a single + failure is a benchmark failure. Uses a concurrency cap for streaming execution — completed ops free their slot immediately for the next one. """ if not items: return [] - results = _Results() + results = ThreadSafeResults() cap = min(len(items), _MAX_CONCURRENT.value) def DoWrap(item): - init_dur, e2e_dur, err = _TimedAsync(lambda: kickoff(item), wait_fn) + timing = _TimedAsync(lambda: kickoff(item), wait_fn) name = get_name(item) - results.add(name, init_dur, e2e_dur, err) + results.add(name, timing) logging.info( - "%s ok=%s initiation=%.2fs end_to_end=%.2fs", + "%s initiation=%.2fs end_to_end=%.2fs", name, - err is None, - init_dur, - e2e_dur, + timing.initiation_latency, + timing.end_to_end_latency, ) background_tasks.RunThreaded(DoWrap, items, max_concurrent_threads=cap) return results.entries +def _RunAsyncTolerant( + kickoff: Callable, + wait_fn: Callable[[str], None], + items: list, + get_name: Callable[[object], str], +) -> tuple[list[tuple[str, OpTiming]], list[str]]: + """Like _RunAsync but tolerates per-op failures (large_scale only). + + Returns (successful (name, OpTiming) pairs, failed names). A failing op is + caught, its name recorded, and execution continues — appropriate only for + large-scale provisioning where overshooting quota is an expected scenario, + not a benchmark failure. + """ + if not items: + return [], [] + results = ThreadSafeResults() + cap = min(len(items), _MAX_CONCURRENT.value) + + def DoWrap(item): + name = get_name(item) + try: + timing = _TimedAsync(lambda: kickoff(item), wait_fn) + except Exception as exc: # pylint: disable=broad-except + results.add_failure(name) + logging.warning("%s FAILED: %s", name, str(exc)[:200]) + return + results.add(name, timing) + logging.info( + "%s initiation=%.2fs end_to_end=%.2fs", + name, + timing.initiation_latency, + timing.end_to_end_latency, + ) + + background_tasks.RunThreaded(DoWrap, items, max_concurrent_threads=cap) + return results.entries, results.failed + + def _MakeNodePoolConfig( cluster: kubernetes_cluster.KubernetesCluster, name: str, @@ -593,36 +621,30 @@ def _LiveNodePoolNames( def _OpSamples( metric_prefix: str, - results: list[_OpResult], - attempted_ops: int | None = None, + results: list[tuple[str, OpTiming]], ) -> list[sample.Sample]: - """Per-op + aggregate samples for initiation and end-to-end latency. + """Per-op + aggregate latency samples for fail-hard scenarios. + + Every op in `results` succeeded (a failure would have aborted the run), so + there is no success-rate or error accounting here — just initiation and + end-to-end latency per op, plus aggregate stats. Args: metric_prefix: prefix for all metric names. - results: list of (operation_name, init_lat, e2e_lat, err). - attempted_ops: total ops originally requested. Used as the denominator - for SuccessRate so EKS-rolled-back pools (which never - appear in results) are counted as failures, not ignored. - If None, len(results) is used (original behavior). + results: (name, OpTiming) pairs from _RunAsync. """ samples: list[sample.Sample] = [] init_latencies: list[float] = [] e2e_latencies: list[float] = [] - success = 0 - - for r in results: - meta = {"operation_name": r.name, "success": str(r.error is None)} - if r.error is not None: - meta["error"] = str(r.error)[:200] - else: - success += 1 - init_latencies.append(r.initiation_latency) - e2e_latencies.append(r.end_to_end_latency) + + for name, timing in results: + meta = {"operation_name": name} + init_latencies.append(timing.initiation_latency) + e2e_latencies.append(timing.end_to_end_latency) samples.append( sample.Sample( f"{metric_prefix}_InitiationLatency", - r.initiation_latency, + timing.initiation_latency, "seconds", dict(meta), ) @@ -630,51 +652,105 @@ def _OpSamples( samples.append( sample.Sample( f"{metric_prefix}_EndToEndLatency", - r.end_to_end_latency, + timing.end_to_end_latency, "seconds", dict(meta), ) ) - # ── Counts + success rate ────────────────────────────────────────────── - total = attempted_ops if attempted_ops is not None else len(results) - executed = len(results) - if total == 0: + samples += _AggregateAndOutlierSamples( + metric_prefix, init_latencies, e2e_latencies + ) + return samples + + +def _LargeScaleSamples( + metric_prefix: str, + results: list[tuple[str, OpTiming]], + failed: list[str], + attempted_ops: int, +) -> list[sample.Sample]: + """Latency + success/failure accounting for the tolerant large-scale path. + + Unlike _OpSamples, large_scale_provisioning tolerates partial failure, so + this reports how many ops succeeded/failed (against the originally-attempted + count) and lists the failed pool names in metadata. + + Args: + metric_prefix: prefix for all metric names. + results: successful (name, OpTiming) pairs. + failed: names of ops that failed. + attempted_ops: total ops originally requested (the denominator). + """ + samples: list[sample.Sample] = [] + init_latencies: list[float] = [] + e2e_latencies: list[float] = [] + + for name, timing in results: + meta = {"operation_name": name} + init_latencies.append(timing.initiation_latency) + e2e_latencies.append(timing.end_to_end_latency) + samples.append( + sample.Sample( + f"{metric_prefix}_InitiationLatency", + timing.initiation_latency, + "seconds", + dict(meta), + ) + ) + samples.append( + sample.Sample( + f"{metric_prefix}_EndToEndLatency", + timing.end_to_end_latency, + "seconds", + dict(meta), + ) + ) + + if attempted_ops == 0: raise errors.Benchmarks.RunError( - f"{metric_prefix}: zero operations attempted — the scenario " - "produced no work, which indicates a setup or dispatch failure." + f"{metric_prefix}: zero operations attempted — the scenario produced " + "no work, which indicates a setup or dispatch failure." ) - # Expose each count as its own metric (not just SuccessRate metadata). + succeeded = len(results) count_meta = { - "total_ops": str(total), - "executed_ops": str(executed), - "successful_ops": str(success), - "skipped_ops": str(total - executed), + "total_ops": str(attempted_ops), + "succeeded_ops": str(succeeded), + "failed_ops": str(attempted_ops - succeeded), + "failed_pools": ",".join(failed) if failed else "none", } - for count_label, count_value in ( - ("TotalOps", total), - ("ExecutedOps", executed), - ("SuccessfulOps", success), - ("SkippedOps", total - executed), + for label, value in ( + ("TotalOps", attempted_ops), + ("SucceededOps", succeeded), + ("FailedOps", attempted_ops - succeeded), ): samples.append( sample.Sample( - f"{metric_prefix}_{count_label}", - count_value, - "count", - dict(count_meta), + f"{metric_prefix}_{label}", value, "count", dict(count_meta) ) ) samples.append( sample.Sample( f"{metric_prefix}_SuccessRate", - 100.0 * success / total, + 100.0 * succeeded / attempted_ops, "percent", dict(count_meta), ) ) - # ── Aggregate stats (successful ops only) ──────────────────────────────── + samples += _AggregateAndOutlierSamples( + metric_prefix, init_latencies, e2e_latencies + ) + return samples + + +def _AggregateAndOutlierSamples( + metric_prefix: str, + init_latencies: list[float], + e2e_latencies: list[float], +) -> list[sample.Sample]: + """Emits aggregate stats (>=2 samples) and outlier counts (>=4 samples).""" + samples: list[sample.Sample] = [] for phase_label, latencies in ( ("InitiationLatency", init_latencies), ("EndToEndLatency", e2e_latencies), @@ -683,7 +759,6 @@ def _OpSamples( samples += _AggregateSamples(metric_prefix, phase_label, latencies) if len(latencies) >= 4: samples += _OutlierSamples(metric_prefix, phase_label, latencies) - return samples diff --git a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py index e916be9cb5..8998242c6c 100644 --- a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py +++ b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py @@ -266,14 +266,14 @@ def testCleanupHandlesNoneCluster(self): kubernetes_management_benchmark.Cleanup(bm_spec) -class SweepNodePoolsTest(pkb_common_test_case.PkbCommonTestCase): - """Tests for the _SweepNodePools helper function.""" +class ClearNodePoolsTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _ClearNodePools helper function.""" def testDeletesStalePkbmPools(self): cluster = _make_mock_cluster( pool_names=['pkbma000', 'pkbmc0001', 'user-pool'] ) - kubernetes_management_benchmark._SweepNodePools(cluster) + kubernetes_management_benchmark._ClearNodePools(cluster) deleted = {c.args[0] for c in cluster.DeleteNodePool.call_args_list} self.assertIn('pkbma000', deleted) self.assertIn('pkbmc0001', deleted) @@ -281,42 +281,44 @@ def testDeletesStalePkbmPools(self): def testDoesNothingWhenNoPkbmPools(self): cluster = _make_mock_cluster(pool_names=['user-pool', 'default-pool']) - kubernetes_management_benchmark._SweepNodePools(cluster) + kubernetes_management_benchmark._ClearNodePools(cluster) cluster.DeleteNodePool.assert_not_called() - def testSweepRaisesOnGetNodePoolNamesException(self): + def testClearRaisesOnGetNodePoolNamesException(self): cluster = _make_mock_cluster() cluster.GetNodePoolNames.side_effect = RuntimeError('API error') with self.assertRaises(RuntimeError): - kubernetes_management_benchmark._SweepNodePools(cluster) + kubernetes_management_benchmark._ClearNodePools(cluster) -class ResultsTest(pkb_common_test_case.PkbCommonTestCase): - """Tests for the _Results result-accumulator helper.""" +class ThreadSafeResultsTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the ThreadSafeResults collector.""" def testAddSingleEntry(self): - r = kubernetes_management_benchmark._Results() - r.add('op1', 0.1, 1.0, None) + r = kubernetes_management_benchmark.ThreadSafeResults() + r.add('op1', kubernetes_management_benchmark.OpTiming(0.1, 1.0)) self.assertLen(r.entries, 1) - name, init, e2e, err = r.entries[0] + name, timing = r.entries[0] self.assertEqual('op1', name) - self.assertAlmostEqual(0.1, init, places=5) - self.assertAlmostEqual(1.0, e2e, places=5) - self.assertIsNone(err) + self.assertAlmostEqual(0.1, timing.initiation_latency, places=5) + self.assertAlmostEqual(1.0, timing.end_to_end_latency, places=5) - def testAddMultipleEntries(self): - r = kubernetes_management_benchmark._Results() - r.add('op1', 0.1, 1.0, None) - r.add('op2', 0.2, 2.0, ValueError('fail')) - self.assertLen(r.entries, 2) + def testAddFailureRecordsName(self): + r = kubernetes_management_benchmark.ThreadSafeResults() + r.add_failure('bad-op') + self.assertEqual(['bad-op'], r.failed) + self.assertEmpty(r.entries) def testAddIsThreadSafe(self): """Tests that concurrent add() calls from multiple threads are safe.""" - r = kubernetes_management_benchmark._Results() + r = kubernetes_management_benchmark.ThreadSafeResults() n = 100 def _add(i): - r.add(f'op{i}', float(i), float(i) * 2, None) + r.add( + f'op{i}', + kubernetes_management_benchmark.OpTiming(float(i), float(i) * 2), + ) threads = [threading.Thread(target=_add, args=(i,)) for i in range(n)] for t in threads: @@ -325,13 +327,6 @@ def _add(i): t.join() self.assertLen(r.entries, n) - def testAddPreservesError(self): - r = kubernetes_management_benchmark._Results() - exc = RuntimeError('test error') - r.add('failing-op', 0.5, 0.5, exc) - _, _, _, err = r.entries[0] - self.assertIs(exc, err) - class TimedAsyncTest(pkb_common_test_case.PkbCommonTestCase): """Tests for the _TimedAsync timing helper.""" @@ -339,44 +334,34 @@ class TimedAsyncTest(pkb_common_test_case.PkbCommonTestCase): def testSuccessfulKickoffAndWait(self): kickoff = mock.Mock(return_value='op-handle') wait_fn = mock.Mock(return_value=None) - init_lat, e2e_lat, err = kubernetes_management_benchmark._TimedAsync( - kickoff, wait_fn - ) + timing = kubernetes_management_benchmark._TimedAsync(kickoff, wait_fn) kickoff.assert_called_once() wait_fn.assert_called_once_with('op-handle') - self.assertIsNone(err) - self.assertGreaterEqual(init_lat, 0.0) - self.assertGreaterEqual(e2e_lat, init_lat) + self.assertGreaterEqual(timing.initiation_latency, 0.0) + self.assertGreaterEqual( + timing.end_to_end_latency, timing.initiation_latency + ) - def testKickoffFailureReturnsError(self): + def testKickoffFailurePropagates(self): exc = RuntimeError('kickoff failed') kickoff = mock.Mock(side_effect=exc) wait_fn = mock.Mock() - init_lat, e2e_lat, err = kubernetes_management_benchmark._TimedAsync( - kickoff, wait_fn - ) - self.assertIs(exc, err) + with self.assertRaises(RuntimeError): + kubernetes_management_benchmark._TimedAsync(kickoff, wait_fn) wait_fn.assert_not_called() - self.assertAlmostEqual(init_lat, e2e_lat, places=2) - def testWaitFailureReturnsError(self): + def testWaitFailurePropagates(self): exc = RuntimeError('wait failed') kickoff = mock.Mock(return_value='op-handle') wait_fn = mock.Mock(side_effect=exc) - _, e2e_lat, err = kubernetes_management_benchmark._TimedAsync( - kickoff, wait_fn - ) - self.assertIs(exc, err) - self.assertGreater(e2e_lat, 0.0) + with self.assertRaises(RuntimeError): + kubernetes_management_benchmark._TimedAsync(kickoff, wait_fn) def testInitLatencyNotGreaterThanE2eLatency(self): kickoff = mock.Mock(return_value='handle') wait_fn = mock.Mock(side_effect=lambda _: time.sleep(0.01)) - init_lat, e2e_lat, err = kubernetes_management_benchmark._TimedAsync( - kickoff, wait_fn - ) - self.assertIsNone(err) - self.assertLessEqual(init_lat, e2e_lat) + timing = kubernetes_management_benchmark._TimedAsync(kickoff, wait_fn) + self.assertLessEqual(timing.initiation_latency, timing.end_to_end_latency) def testHandlePassedToWaitFn(self): kickoff = mock.Mock(return_value='my-op-handle') @@ -386,7 +371,7 @@ def testHandlePassedToWaitFn(self): class RunAsyncTest(pkb_common_test_case.PkbCommonTestCase): - """Tests for the _RunAsync concurrent execution helper.""" + """Tests for the _RunAsync fail-hard concurrent execution helper.""" def testEmptyItemsReturnsEmptyList(self): results = kubernetes_management_benchmark._RunAsync( @@ -405,17 +390,16 @@ def testReturnsOneResultPerItem(self): kickoff=kickoff, wait_fn=wait_fn, items=['a', 'b', 'c'], get_name=str ) self.assertLen(results, 3) - self.assertEqual({'a', 'b', 'c'}, {name for name, _, _, _ in results}) + self.assertEqual({'a', 'b', 'c'}, {name for name, _ in results}) @flagsaver.flagsaver(k8s_mgmt_max_concurrent=50) - def testKickoffErrorCapturedInResults(self): + def testKickoffErrorPropagates(self): + """Fail-hard: a failing op raises rather than being captured.""" kickoff = mock.Mock(side_effect=RuntimeError('kaboom')) - results = kubernetes_management_benchmark._RunAsync( - kickoff=kickoff, wait_fn=mock.Mock(), items=['x'], get_name=str - ) - self.assertLen(results, 1) - _, _, _, err = results[0] - self.assertIsNotNone(err) + with self.assertRaises(Exception): + kubernetes_management_benchmark._RunAsync( + kickoff=kickoff, wait_fn=mock.Mock(), items=['x'], get_name=str + ) @flagsaver.flagsaver(k8s_mgmt_max_concurrent=2) def testConcurrencyCapDoesNotDropItems(self): @@ -437,10 +421,50 @@ def testGetNameCallableApplied(self): items=[cfg], get_name=lambda c: c.name, ) - name, _, _, _ = results[0] + name, _ = results[0] self.assertEqual('poolname', name) +class RunAsyncTolerantTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _RunAsyncTolerant helper (large_scale path).""" + + @flagsaver.flagsaver(k8s_mgmt_max_concurrent=50) + def testAllSucceed(self): + results, failed = kubernetes_management_benchmark._RunAsyncTolerant( + kickoff=mock.Mock(return_value='op'), + wait_fn=mock.Mock(return_value=None), + items=['a', 'b'], + get_name=str, + ) + self.assertLen(results, 2) + self.assertEmpty(failed) + + @flagsaver.flagsaver(k8s_mgmt_max_concurrent=50) + def testFailuresRecordedNotRaised(self): + """Tolerant path catches failures and records their names.""" + + def _kickoff(item): + if item == 'b': + raise RuntimeError('b failed') + return 'op' + + results, failed = kubernetes_management_benchmark._RunAsyncTolerant( + kickoff=_kickoff, + wait_fn=mock.Mock(return_value=None), + items=['a', 'b', 'c'], + get_name=str, + ) + self.assertLen(results, 2) + self.assertEqual(['b'], failed) + + def testEmptyItemsReturnsEmpty(self): + results, failed = kubernetes_management_benchmark._RunAsyncTolerant( + kickoff=mock.Mock(), wait_fn=mock.Mock(), items=[], get_name=str + ) + self.assertEmpty(results) + self.assertEmpty(failed) + + class MakeNodePoolConfigTest(pkb_common_test_case.PkbCommonTestCase): """Tests for the _MakeNodePoolConfig factory.""" @@ -467,163 +491,131 @@ def testDoesNotMutateDefaultNodepool(self): class OpSamplesTest(pkb_common_test_case.PkbCommonTestCase): - """Tests for the _OpSamples sample-generation helper.""" + """Tests for the _OpSamples latency-only sample helper (fail-hard path).""" - def testEmptyResultsYieldsSuccessRateOfZero(self): - samples = kubernetes_management_benchmark._OpSamples( - 'PrefixOp', [], attempted_ops=5 - ) - rate = next(s for s in samples if s.metric == 'PrefixOp_SuccessRate') - self.assertEqual(0.0, rate.value) + def testEmptyResultsYieldsNoSamples(self): + samples = kubernetes_management_benchmark._OpSamples('PrefixOp', []) + self.assertEmpty(samples) def testPerOpInitiationAndE2eSamplesGenerated(self): results = [ - kubernetes_management_benchmark._OpResult('op1', 0.1, 1.0, None), - kubernetes_management_benchmark._OpResult('op2', 0.2, 2.0, None), + ('op1', kubernetes_management_benchmark.OpTiming(0.1, 1.0)), + ('op2', kubernetes_management_benchmark.OpTiming(0.2, 2.0)), ] - samples = kubernetes_management_benchmark._OpSamples( - 'MyOp', results, attempted_ops=2 - ) + samples = kubernetes_management_benchmark._OpSamples('MyOp', results) metrics = [s.metric for s in samples] self.assertIn('MyOp_InitiationLatency', metrics) self.assertIn('MyOp_EndToEndLatency', metrics) - def testSuccessRateHundredPercentWhenAllSucceed(self): - results = [ - kubernetes_management_benchmark._OpResult('op1', 1.0, 2.0, None), - kubernetes_management_benchmark._OpResult('op2', 0.5, 1.5, None), - ] - samples = kubernetes_management_benchmark._OpSamples( - 'Op', results, attempted_ops=2 - ) - rate = next(s for s in samples if s.metric == 'Op_SuccessRate') - self.assertAlmostEqual(100.0, rate.value) - - def testSuccessRateFiftyPercentWhenHalfFail(self): - results = [ - kubernetes_management_benchmark._OpResult('op1', 1.0, 2.0, None), - kubernetes_management_benchmark._OpResult( - 'op2', 0.5, 0.5, RuntimeError('fail') - ), - ] - samples = kubernetes_management_benchmark._OpSamples( - 'Op', results, attempted_ops=2 - ) - rate = next(s for s in samples if s.metric == 'Op_SuccessRate') - self.assertAlmostEqual(50.0, rate.value) - - def testAttemptedOpsExceedingExecutedOpsLowersRate(self): - results = [kubernetes_management_benchmark._OpResult('op1', 1.0, 2.0, None)] - samples = kubernetes_management_benchmark._OpSamples( - 'Op', results, attempted_ops=3 - ) - rate = next(s for s in samples if s.metric == 'Op_SuccessRate') - self.assertAlmostEqual(100.0 / 3, rate.value, places=3) - - def testCountsExposedAsSeparateMetrics(self): - """630: Total/Executed/Successful/Skipped each emitted as a metric.""" + def testNoSuccessRateOrCountMetrics(self): + """Fail-hard path emits no SuccessRate/count metrics (B1).""" results = [ - kubernetes_management_benchmark._OpResult('op1', 1.0, 2.0, None), - kubernetes_management_benchmark._OpResult( - 'op2', 0.5, 0.5, Exception('e') - ), + ('op1', kubernetes_management_benchmark.OpTiming(1.0, 2.0)), + ('op2', kubernetes_management_benchmark.OpTiming(0.5, 1.5)), ] - samples = kubernetes_management_benchmark._OpSamples( - 'Op', results, attempted_ops=3 - ) - metrics = {s.metric: s.value for s in samples} - self.assertEqual(3, metrics['Op_TotalOps']) - self.assertEqual(2, metrics['Op_ExecutedOps']) - self.assertEqual(1, metrics['Op_SuccessfulOps']) - self.assertEqual(1, metrics['Op_SkippedOps']) - - def testSuccessRateMetadataFields(self): - """SuccessRate sample carries the op-count metadata fields.""" - results = [ - kubernetes_management_benchmark._OpResult('op1', 1.0, 2.0, None), - kubernetes_management_benchmark._OpResult( - 'op2', 0.5, 0.5, Exception('err') - ), - ] - samples = kubernetes_management_benchmark._OpSamples( - 'Op', results, attempted_ops=3 - ) - rate = next(s for s in samples if s.metric == 'Op_SuccessRate') - self.assertEqual('3', rate.metadata['total_ops']) - self.assertEqual('2', rate.metadata['executed_ops']) - self.assertEqual('1', rate.metadata['successful_ops']) - self.assertEqual('1', rate.metadata['skipped_ops']) - - def testZeroAttemptedOpsRaisesRunError(self): - """total==0 indicates a dispatch/setup failure; fail loudly (623).""" - with self.assertRaises(errors.Benchmarks.RunError): - kubernetes_management_benchmark._OpSamples('Op', [], attempted_ops=0) + samples = kubernetes_management_benchmark._OpSamples('Op', results) + metrics = {s.metric for s in samples} + self.assertNotIn('Op_SuccessRate', metrics) + self.assertNotIn('Op_TotalOps', metrics) - def testFailedOpIncludesErrorMessage(self): - results = [ - kubernetes_management_benchmark._OpResult( - 'fail-op', 0.5, 0.5, RuntimeError('oops') - ) - ] - samples = kubernetes_management_benchmark._OpSamples( - 'Op', results, attempted_ops=1 - ) + def testOperationNameInMetadata(self): + results = [('mypool', kubernetes_management_benchmark.OpTiming(1.0, 2.0))] + samples = kubernetes_management_benchmark._OpSamples('Op', results) init_s = next(s for s in samples if s.metric == 'Op_InitiationLatency') - self.assertIn('error', init_s.metadata) - self.assertIn('oops', init_s.metadata['error']) + self.assertEqual('mypool', init_s.metadata['operation_name']) - def testAggregatesGeneratedForTwoOrMoreSuccesses(self): - """Aggregate stat samples appear once there are >=2 successes.""" + def testAggregatesGeneratedForTwoOrMore(self): results = [ - kubernetes_management_benchmark._OpResult( - f'op{i}', float(i), float(i) * 2, None + ( + f'op{i}', + kubernetes_management_benchmark.OpTiming(float(i), float(i) * 2), ) for i in range(1, 4) ] - samples = kubernetes_management_benchmark._OpSamples( - 'Op', results, attempted_ops=3 - ) + samples = kubernetes_management_benchmark._OpSamples('Op', results) metrics = [s.metric for s in samples] self.assertIn('Op_InitiationLatency_Mean', metrics) self.assertIn('Op_EndToEndLatency_Mean', metrics) - def testAggregatesNotGeneratedForSingleSuccess(self): - results = [kubernetes_management_benchmark._OpResult('op1', 1.0, 2.0, None)] - samples = kubernetes_management_benchmark._OpSamples( - 'Op', results, attempted_ops=1 - ) + def testAggregatesNotGeneratedForSingle(self): + results = [('op1', kubernetes_management_benchmark.OpTiming(1.0, 2.0))] + samples = kubernetes_management_benchmark._OpSamples('Op', results) self.assertNotIn('Op_InitiationLatency_Mean', [s.metric for s in samples]) - def testOutliersGeneratedForFourOrMoreSuccesses(self): - """Outlier-count samples appear once there are >=4 successes.""" + def testOutliersGeneratedForFourOrMore(self): results = [ - kubernetes_management_benchmark._OpResult( - f'op{i}', float(i), float(i) * 2, None + ( + f'op{i}', + kubernetes_management_benchmark.OpTiming(float(i), float(i) * 2), ) for i in range(1, 6) ] - samples = kubernetes_management_benchmark._OpSamples( - 'Op', results, attempted_ops=5 - ) + samples = kubernetes_management_benchmark._OpSamples('Op', results) metrics = [s.metric for s in samples] self.assertIn('Op_InitiationLatency_OutlierCount', metrics) - self.assertIn('Op_EndToEndLatency_OutlierCount', metrics) - def testOutliersNotGeneratedForThreeOrFewerSuccesses(self): + def testOutliersNotGeneratedForThreeOrFewer(self): results = [ - kubernetes_management_benchmark._OpResult( - f'op{i}', float(i), float(i) * 2, None + ( + f'op{i}', + kubernetes_management_benchmark.OpTiming(float(i), float(i) * 2), ) for i in range(1, 4) ] - samples = kubernetes_management_benchmark._OpSamples( - 'Op', results, attempted_ops=3 - ) + samples = kubernetes_management_benchmark._OpSamples('Op', results) self.assertNotIn( 'Op_InitiationLatency_OutlierCount', [s.metric for s in samples] ) +class LargeScaleSamplesTest(pkb_common_test_case.PkbCommonTestCase): + """Tests for the _LargeScaleSamples tolerant-path helper.""" + + def testSuccessRateHundredWhenAllSucceed(self): + results = [ + ('op1', kubernetes_management_benchmark.OpTiming(1.0, 2.0)), + ('op2', kubernetes_management_benchmark.OpTiming(0.5, 1.5)), + ] + samples = kubernetes_management_benchmark._LargeScaleSamples( + 'Op', results, [], attempted_ops=2 + ) + rate = next(s for s in samples if s.metric == 'Op_SuccessRate') + self.assertAlmostEqual(100.0, rate.value) + + def testSuccessRateReflectsFailures(self): + results = [('op1', kubernetes_management_benchmark.OpTiming(1.0, 2.0))] + samples = kubernetes_management_benchmark._LargeScaleSamples( + 'Op', results, ['op2', 'op3'], attempted_ops=3 + ) + rate = next(s for s in samples if s.metric == 'Op_SuccessRate') + self.assertAlmostEqual(100.0 / 3, rate.value, places=3) + + def testFailedPoolsListedInMetadata(self): + results = [('op1', kubernetes_management_benchmark.OpTiming(1.0, 2.0))] + samples = kubernetes_management_benchmark._LargeScaleSamples( + 'Op', results, ['op2', 'op3'], attempted_ops=3 + ) + failed = next(s for s in samples if s.metric == 'Op_FailedOps') + self.assertEqual(2, failed.value) + self.assertEqual('op2,op3', failed.metadata['failed_pools']) + + def testCountMetricsExposed(self): + results = [('op1', kubernetes_management_benchmark.OpTiming(1.0, 2.0))] + samples = kubernetes_management_benchmark._LargeScaleSamples( + 'Op', results, ['op2'], attempted_ops=2 + ) + metrics = {s.metric: s.value for s in samples} + self.assertEqual(2, metrics['Op_TotalOps']) + self.assertEqual(1, metrics['Op_SucceededOps']) + self.assertEqual(1, metrics['Op_FailedOps']) + + def testZeroAttemptedRaisesRunError(self): + with self.assertRaises(errors.Benchmarks.RunError): + kubernetes_management_benchmark._LargeScaleSamples( + 'Op', [], [], attempted_ops=0 + ) + + class AggregateSamplesTest(pkb_common_test_case.PkbCommonTestCase): """Tests for the _AggregateSamples statistics helper.""" @@ -746,7 +738,7 @@ def testRunSweepsAfterEachScenario(self): cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_SweepNodePools' + kubernetes_management_benchmark, '_ClearNodePools' ) as mock_clean, mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -773,7 +765,7 @@ def testRunOnlyScenarioACallsOnlyA(self): cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_SweepNodePools' + kubernetes_management_benchmark, '_ClearNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -800,7 +792,7 @@ def testRunOnlyScenarioBCallsOnlyB(self): cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_SweepNodePools' + kubernetes_management_benchmark, '_ClearNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -827,7 +819,7 @@ def testRunScenarioCPassesLargeScaleFlag(self): cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_SweepNodePools' + kubernetes_management_benchmark, '_ClearNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -854,7 +846,7 @@ def testRunScenarioCScaleSweepRunsTwice(self): cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_SweepNodePools' + kubernetes_management_benchmark, '_ClearNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -885,7 +877,7 @@ def testRunTagsScenarioCScaleInMetadata(self): bm_spec = _make_mock_benchmark_spec(cluster) test_sample = _make_sample('metric', 1.0) with mock.patch.object( - kubernetes_management_benchmark, '_SweepNodePools' + kubernetes_management_benchmark, '_ClearNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -914,7 +906,7 @@ def testRunTagsAllSamplesWithRunMetadata(self): bm_spec = _make_mock_benchmark_spec(cluster) test_sample = _make_sample('m', 1.0) with mock.patch.object( - kubernetes_management_benchmark, '_SweepNodePools' + kubernetes_management_benchmark, '_ClearNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -947,7 +939,7 @@ def testRunUsesExplicitVersionFlags(self): cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_SweepNodePools' + kubernetes_management_benchmark, '_ClearNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', @@ -974,7 +966,7 @@ def testRunAutoResolvesVersionsWhenFlagsAbsent(self): cluster.ResolveNodePoolVersions.return_value = ('1.33', '1.34') bm_spec = _make_mock_benchmark_spec(cluster) with mock.patch.object( - kubernetes_management_benchmark, '_SweepNodePools' + kubernetes_management_benchmark, '_ClearNodePools' ), mock.patch.object( kubernetes_management_benchmark, '_RunConcurrentNodePoolOps', From f431c3f1ece312233f52c37cd1de2cbc9d57e88b Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Fri, 12 Jun 2026 11:43:58 +0000 Subject: [PATCH 10/15] kubernetes_management: rename scale->goal_nodepools (nit 334) --- .../kubernetes_management_benchmark.py | 70 +++++++++++-------- .../kubernetes_management_benchmark_test.py | 18 ++--- 2 files changed, 50 insertions(+), 38 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index 4d8bbefe91..c4d2b262a5 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -314,24 +314,26 @@ def _SweepScales( cluster: kubernetes_cluster.KubernetesCluster, initial: str, ) -> list[sample.Sample]: - """Runs large-scale provisioning across each requested scale. + """Runs large-scale provisioning at each requested goal node-pool count. - Scales come from --k8s_mgmt_scale_sweep when set, else the single - --k8s_mgmt_large_scale_nodepools value. Each scale's samples are tagged - with large_scale_scale so results stay distinguishable. + Goal counts come from --k8s_mgmt_scale_sweep when set, else the single + --k8s_mgmt_large_scale_nodepools value. Each count's samples are tagged + with goal_nodepools so results stay distinguishable. """ - scales = ( + goal_counts = ( [int(x.strip()) for x in _SCALE_SWEEP.value] if _SCALE_SWEEP.value else [_LARGE_SCALE_NODEPOOLS.value] ) - logging.info("large_scale_provisioning: scale sweep = %s", scales) + logging.info( + "large_scale_provisioning: goal node-pool counts = %s", goal_counts + ) samples: list[sample.Sample] = [] - for scale in scales: - scale_samples = _ScaleToPoolCount(cluster, initial, scale) - for s in scale_samples: - s.metadata["large_scale_scale"] = str(scale) - samples += scale_samples + for goal_nodepools in goal_counts: + goal_samples = _ScaleToPoolCount(cluster, initial, goal_nodepools) + for s in goal_samples: + s.metadata["goal_nodepools"] = str(goal_nodepools) + samples += goal_samples return samples @@ -418,26 +420,26 @@ def DoCreate(): def _ScaleToPoolCount( cluster: kubernetes_cluster.KubernetesCluster, initial: str, - scale: int, + goal_nodepools: int, ) -> list[sample.Sample]: - """Large-scale node-pool provisioning at a given scale. + """Large-goal_nodepools node-pool provisioning to a goal node-pool count. - Streams all `scale` creates through a single executor capped at + Streams all `goal_nodepools` creates through a single executor capped at _MAX_CONCURRENT workers — as each op completes the next starts immediately (no batch barriers). Delete uses a live-list so EKS-rolled-back pools are excluded from the denominator correctly. """ logging.info( - "Scenario C: scale=%d, max_concurrent=%d, initial_version=%s", - scale, + "large_scale goal=%d, max_concurrent=%d, initial_version=%s", + goal_nodepools, _MAX_CONCURRENT.value, initial, ) - pool_names = [_ScalePoolName(i) for i in range(scale)] + pool_names = [_ScalePoolName(i) for i in range(goal_nodepools)] configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] samples: list[sample.Sample] = [] - # ── Creates (tolerant — partial failure expected at scale) ─────────────── + # ── Creates (tolerant — partial failure expected at scale) ────────────── create_results, create_failed = _RunAsyncTolerant( kickoff=lambda cfg: cluster.CreateNodePoolAsync( cfg, node_version=initial @@ -447,30 +449,35 @@ def _ScaleToPoolCount( get_name=lambda cfg: cfg.name, ) logging.info( - "Scenario C scale=%d: %d/%d creates succeeded (%d failed)", - scale, + "large_scale goal=%d: %d/%d creates succeeded (%d failed)", + goal_nodepools, len(create_results), - scale, + goal_nodepools, len(create_failed), ) samples += _LargeScaleSamples( - "LargeScale_Create", create_results, create_failed, attempted_ops=scale + "LargeScale_Create", + create_results, + create_failed, + attempted_ops=goal_nodepools, ) # ── Deletes (live-list) ────────────────────────────────────────────────── alive = _LiveNodePoolNames(cluster, f"{_PREFIX}c") logging.info( - "Scenario C scale=%d: %d live pools for delete (originally %d;" + "large_scale goal=%d: %d live pools for delete (originally %d;" + " %d rolled back by cloud)", - scale, + goal_nodepools, len(alive), - scale, - scale - len(alive), + goal_nodepools, + goal_nodepools - len(alive), ) if not alive: - logging.info("Scenario C scale=%d: all creates rolled back.", scale) + logging.info( + "large_scale goal=%d: all creates rolled back.", goal_nodepools + ) samples += _LargeScaleSamples( - "LargeScale_Delete", [], [], attempted_ops=scale + "LargeScale_Delete", [], [], attempted_ops=goal_nodepools ) return samples @@ -480,9 +487,12 @@ def _ScaleToPoolCount( items=alive, get_name=str, ) - # attempted_ops=scale: accurate rate against original request count. + # attempted_ops=goal_nodepools: accurate rate against original request count. samples += _LargeScaleSamples( - "LargeScale_Delete", delete_results, delete_failed, attempted_ops=scale + "LargeScale_Delete", + delete_results, + delete_failed, + attempted_ops=goal_nodepools, ) return samples diff --git a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py index 8998242c6c..c9fa2b90ea 100644 --- a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py +++ b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py @@ -871,8 +871,8 @@ def testRunScenarioCScaleSweepRunsTwice(self): k8s_mgmt_scale_sweep=['10'], k8s_mgmt_large_scale_nodepools=10, ) - def testRunTagsScenarioCScaleInMetadata(self): - """Tests that Run adds large_scale_scale to each sample's metadata.""" + def testRunTagsScenarioCGoalInMetadata(self): + """Tests that Run adds goal_nodepools to each sample's metadata.""" cluster = _make_mock_cluster() bm_spec = _make_mock_benchmark_spec(cluster) test_sample = _make_sample('metric', 1.0) @@ -892,8 +892,8 @@ def testRunTagsScenarioCScaleInMetadata(self): return_value=[test_sample], ): samples = kubernetes_management_benchmark.Run(bm_spec) - self.assertIn('large_scale_scale', samples[0].metadata) - self.assertEqual('10', samples[0].metadata['large_scale_scale']) + self.assertIn('goal_nodepools', samples[0].metadata) + self.assertEqual('10', samples[0].metadata['goal_nodepools']) @flagsaver.flagsaver( k8s_mgmt_scenarios=['concurrent_node_pool_ops'], @@ -1106,7 +1106,7 @@ class RunScenarioCTest(pkb_common_test_case.PkbCommonTestCase): def testProducesCreateAndDeleteSamples(self): cluster = _make_mock_cluster(pool_names=['pkbmc0000', 'pkbmc0001']) samples = kubernetes_management_benchmark._ScaleToPoolCount( - cluster, '1.33', scale=2 + cluster, '1.33', goal_nodepools=2 ) metrics = {s.metric for s in samples} self.assertTrue(any('LargeScale_Create' in m for m in metrics)) @@ -1120,7 +1120,7 @@ def testZeroLivePoolsRecordsZeroDeleteSuccessRate(self): """Tests Scenario C records 0% delete rate when no live pools exist.""" cluster = _make_mock_cluster(pool_names=[]) samples = kubernetes_management_benchmark._ScaleToPoolCount( - cluster, '1.33', scale=3 + cluster, '1.33', goal_nodepools=3 ) delete_rate = next( s for s in samples if s.metric == 'LargeScale_Delete_SuccessRate' @@ -1134,7 +1134,9 @@ def testZeroLivePoolsRecordsZeroDeleteSuccessRate(self): ) def testDeleteUsesLiveListNotOriginalCreateList(self): cluster = _make_mock_cluster(pool_names=['pkbmc0000', 'pkbmc0001']) - kubernetes_management_benchmark._ScaleToPoolCount(cluster, '1.33', scale=3) + kubernetes_management_benchmark._ScaleToPoolCount( + cluster, '1.33', goal_nodepools=3 + ) self.assertEqual(2, cluster.DeleteNodePoolAsync.call_count) @flagsaver.flagsaver( @@ -1145,7 +1147,7 @@ def testCreateSuccessRateUsesScaleAsDenominator(self): """Tests Scenario C create success rate uses scale as total_ops.""" cluster = _make_mock_cluster(pool_names=['pkbmc0000']) samples = kubernetes_management_benchmark._ScaleToPoolCount( - cluster, '1.33', scale=3 + cluster, '1.33', goal_nodepools=3 ) create_rate = next( s for s in samples if s.metric == 'LargeScale_Create_SuccessRate' From 6e38531db41b439ebeb43ac6499703764f4b8e5e Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Thu, 18 Jun 2026 19:34:49 +0000 Subject: [PATCH 11/15] kubernetes_management: split large_scale_provisioning into a follow-up PR --- .../kubernetes_management_benchmark.py | 278 +-------------- .../kubernetes_management_benchmark_test.py | 329 +----------------- 2 files changed, 5 insertions(+), 602 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index c4d2b262a5..3db5a4839c 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -13,13 +13,11 @@ # limitations under the License. """Benchmark for Kubernetes management plane operations. -Measures GKE/EKS/AKS control-plane API responsiveness via three scenarios: +Measures GKE/EKS/AKS control-plane API responsiveness via two scenarios: concurrent_node_pool_ops: concurrent node-pool create/delete. overlapping_cluster_update: node-pool create overlapping a cluster update. - large_scale_provisioning: large-scale node-pool provisioning (scale/sweep). Optimizations for minimum run time: - - Streaming concurrency in large_scale_provisioning (no batch barriers) - Reduced poll_interval in provider WaitForOperation (5s vs 10s) - Per-op threads capped at _MAX_CONCURRENT to avoid OS limits - Accurate delete success rate via attempted_ops denominator @@ -68,13 +66,9 @@ # overlapping_cluster_update: run a cluster update and a node-pool create # simultaneously; measures behaviour when a cluster-scoped op overlaps a # node-pool-scoped one. -# large_scale_provisioning: create then delete a large number of node pools -# (optionally swept via --k8s_mgmt_scale_sweep); measures scaling limits -# and large-batch provisioning latency. _VALID_SCENARIOS = frozenset({ "concurrent_node_pool_ops", "overlapping_cluster_update", - "large_scale_provisioning", }) # ── Shared flags (apply across all scenarios) ── @@ -83,11 +77,9 @@ [ "concurrent_node_pool_ops", "overlapping_cluster_update", - "large_scale_provisioning", ], "Comma-separated subset of scenarios to run. Valid values: " - + "concurrent_node_pool_ops, overlapping_cluster_update, " - + "large_scale_provisioning.", + + "concurrent_node_pool_ops, overlapping_cluster_update.", ) _NODES_PER_NODEPOOL = flags.DEFINE_integer( "k8s_mgmt_nodes_per_nodepool", @@ -114,24 +106,6 @@ "Kubernetes version for newly-created node pools (N-1). None = auto.", ) -# ── large_scale_provisioning flags ── -_LARGE_SCALE_NODEPOOLS = flags.DEFINE_integer( - "k8s_mgmt_large_scale_nodepools", - 1000, - "Number of node pools to provision in the large_scale_provisioning " - + "scenario. Spec target is 1000; ensure VPC/quota is available before " - + "running.", -) -_SCALE_SWEEP = flags.DEFINE_list( - "k8s_mgmt_scale_sweep", - [], - "Comma-separated list of node-pool counts for the large_scale_provisioning " - + "scale sweep. Each scale runs as a separate sub-run with full " - + "create/delete cycle. Example:" - " --k8s_mgmt_scale_sweep=10,50,100,500,1000. " - + "If empty, uses --k8s_mgmt_large_scale_nodepools.", -) - # AKS caps node-pool names at 12 chars — keep all names within that limit. _PREFIX = "pkbm" @@ -147,18 +121,13 @@ def _ConcurrentPoolName(i): _OVERLAPPING_POOL_NAME = f"{_PREFIX}b" -def _ScalePoolName(i): - return f"{_PREFIX}c{i:04d}" - - @dataclasses.dataclass class OpTiming: """Latency of a single async management-plane operation. Pure timing data — the metric name is supplied by the sample builder, and failures abort the run rather than being recorded here (so there is no - error field). large_scale_provisioning, which tolerates partial failure, - tracks failed pool names separately. + error field). Attributes: initiation_latency: Seconds from issuing the async API call until it is @@ -185,19 +154,6 @@ def CheckPrerequisites( f"Invalid value(s) for --k8s_mgmt_scenarios: {invalid}. " + f"Valid options: {sorted(_VALID_SCENARIOS)}." ) - selected = {s.strip() for s in _SCENARIOS.value} - if _SCALE_SWEEP.value and "large_scale_provisioning" not in selected: - raise errors.Config.InvalidValue( - "--k8s_mgmt_scale_sweep applies only to the large_scale_provisioning " - + "scenario, which is not selected." - ) - for s in _SCALE_SWEEP.value: - try: - int(s.strip()) - except ValueError as e: - raise errors.Config.InvalidValue( - f"Non-integer value in --k8s_mgmt_scale_sweep: {s!r}" - ) from e if benchmark_config.container_cluster.type != "Kubernetes": raise errors.Config.InvalidValue( "kubernetes_management benchmark requires a Kubernetes" @@ -280,9 +236,6 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: if "overlapping_cluster_update" in scenarios: samples += _RunOverlappingClusterUpdate(cluster, initial) _ClearNodePools(cluster) - if "large_scale_provisioning" in scenarios: - samples += _SweepScales(cluster, initial) - _ClearNodePools(cluster) # Tag all samples with version path and run config for published results. run_meta = { @@ -310,33 +263,6 @@ def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec) -> None: _ClearNodePools(cluster) -def _SweepScales( - cluster: kubernetes_cluster.KubernetesCluster, - initial: str, -) -> list[sample.Sample]: - """Runs large-scale provisioning at each requested goal node-pool count. - - Goal counts come from --k8s_mgmt_scale_sweep when set, else the single - --k8s_mgmt_large_scale_nodepools value. Each count's samples are tagged - with goal_nodepools so results stay distinguishable. - """ - goal_counts = ( - [int(x.strip()) for x in _SCALE_SWEEP.value] - if _SCALE_SWEEP.value - else [_LARGE_SCALE_NODEPOOLS.value] - ) - logging.info( - "large_scale_provisioning: goal node-pool counts = %s", goal_counts - ) - samples: list[sample.Sample] = [] - for goal_nodepools in goal_counts: - goal_samples = _ScaleToPoolCount(cluster, initial, goal_nodepools) - for s in goal_samples: - s.metadata["goal_nodepools"] = str(goal_nodepools) - samples += goal_samples - return samples - - def _RunConcurrentNodePoolOps( cluster: kubernetes_cluster.KubernetesCluster, initial: str, @@ -417,86 +343,6 @@ def DoCreate(): return samples -def _ScaleToPoolCount( - cluster: kubernetes_cluster.KubernetesCluster, - initial: str, - goal_nodepools: int, -) -> list[sample.Sample]: - """Large-goal_nodepools node-pool provisioning to a goal node-pool count. - - Streams all `goal_nodepools` creates through a single executor capped at - _MAX_CONCURRENT workers — as each op completes the next starts immediately - (no batch barriers). Delete uses a live-list so EKS-rolled-back pools are - excluded from the denominator correctly. - """ - logging.info( - "large_scale goal=%d, max_concurrent=%d, initial_version=%s", - goal_nodepools, - _MAX_CONCURRENT.value, - initial, - ) - pool_names = [_ScalePoolName(i) for i in range(goal_nodepools)] - configs_ = [_MakeNodePoolConfig(cluster, name) for name in pool_names] - samples: list[sample.Sample] = [] - - # ── Creates (tolerant — partial failure expected at scale) ────────────── - create_results, create_failed = _RunAsyncTolerant( - kickoff=lambda cfg: cluster.CreateNodePoolAsync( - cfg, node_version=initial - ), - wait_fn=cluster.WaitForOperation, - items=configs_, - get_name=lambda cfg: cfg.name, - ) - logging.info( - "large_scale goal=%d: %d/%d creates succeeded (%d failed)", - goal_nodepools, - len(create_results), - goal_nodepools, - len(create_failed), - ) - samples += _LargeScaleSamples( - "LargeScale_Create", - create_results, - create_failed, - attempted_ops=goal_nodepools, - ) - - # ── Deletes (live-list) ────────────────────────────────────────────────── - alive = _LiveNodePoolNames(cluster, f"{_PREFIX}c") - logging.info( - "large_scale goal=%d: %d live pools for delete (originally %d;" - + " %d rolled back by cloud)", - goal_nodepools, - len(alive), - goal_nodepools, - goal_nodepools - len(alive), - ) - if not alive: - logging.info( - "large_scale goal=%d: all creates rolled back.", goal_nodepools - ) - samples += _LargeScaleSamples( - "LargeScale_Delete", [], [], attempted_ops=goal_nodepools - ) - return samples - - delete_results, delete_failed = _RunAsyncTolerant( - kickoff=cluster.DeleteNodePoolAsync, - wait_fn=cluster.WaitForOperation, - items=alive, - get_name=str, - ) - # attempted_ops=goal_nodepools: accurate rate against original request count. - samples += _LargeScaleSamples( - "LargeScale_Delete", - delete_results, - delete_failed, - attempted_ops=goal_nodepools, - ) - return samples - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -571,44 +417,6 @@ def DoWrap(item): return results.entries -def _RunAsyncTolerant( - kickoff: Callable, - wait_fn: Callable[[str], None], - items: list, - get_name: Callable[[object], str], -) -> tuple[list[tuple[str, OpTiming]], list[str]]: - """Like _RunAsync but tolerates per-op failures (large_scale only). - - Returns (successful (name, OpTiming) pairs, failed names). A failing op is - caught, its name recorded, and execution continues — appropriate only for - large-scale provisioning where overshooting quota is an expected scenario, - not a benchmark failure. - """ - if not items: - return [], [] - results = ThreadSafeResults() - cap = min(len(items), _MAX_CONCURRENT.value) - - def DoWrap(item): - name = get_name(item) - try: - timing = _TimedAsync(lambda: kickoff(item), wait_fn) - except Exception as exc: # pylint: disable=broad-except - results.add_failure(name) - logging.warning("%s FAILED: %s", name, str(exc)[:200]) - return - results.add(name, timing) - logging.info( - "%s initiation=%.2fs end_to_end=%.2fs", - name, - timing.initiation_latency, - timing.end_to_end_latency, - ) - - background_tasks.RunThreaded(DoWrap, items, max_concurrent_threads=cap) - return results.entries, results.failed - - def _MakeNodePoolConfig( cluster: kubernetes_cluster.KubernetesCluster, name: str, @@ -674,86 +482,6 @@ def _OpSamples( return samples -def _LargeScaleSamples( - metric_prefix: str, - results: list[tuple[str, OpTiming]], - failed: list[str], - attempted_ops: int, -) -> list[sample.Sample]: - """Latency + success/failure accounting for the tolerant large-scale path. - - Unlike _OpSamples, large_scale_provisioning tolerates partial failure, so - this reports how many ops succeeded/failed (against the originally-attempted - count) and lists the failed pool names in metadata. - - Args: - metric_prefix: prefix for all metric names. - results: successful (name, OpTiming) pairs. - failed: names of ops that failed. - attempted_ops: total ops originally requested (the denominator). - """ - samples: list[sample.Sample] = [] - init_latencies: list[float] = [] - e2e_latencies: list[float] = [] - - for name, timing in results: - meta = {"operation_name": name} - init_latencies.append(timing.initiation_latency) - e2e_latencies.append(timing.end_to_end_latency) - samples.append( - sample.Sample( - f"{metric_prefix}_InitiationLatency", - timing.initiation_latency, - "seconds", - dict(meta), - ) - ) - samples.append( - sample.Sample( - f"{metric_prefix}_EndToEndLatency", - timing.end_to_end_latency, - "seconds", - dict(meta), - ) - ) - - if attempted_ops == 0: - raise errors.Benchmarks.RunError( - f"{metric_prefix}: zero operations attempted — the scenario produced " - "no work, which indicates a setup or dispatch failure." - ) - succeeded = len(results) - count_meta = { - "total_ops": str(attempted_ops), - "succeeded_ops": str(succeeded), - "failed_ops": str(attempted_ops - succeeded), - "failed_pools": ",".join(failed) if failed else "none", - } - for label, value in ( - ("TotalOps", attempted_ops), - ("SucceededOps", succeeded), - ("FailedOps", attempted_ops - succeeded), - ): - samples.append( - sample.Sample( - f"{metric_prefix}_{label}", value, "count", dict(count_meta) - ) - ) - samples.append( - sample.Sample( - f"{metric_prefix}_SuccessRate", - 100.0 * succeeded / attempted_ops, - "percent", - dict(count_meta), - ) - ) - - samples += _AggregateAndOutlierSamples( - metric_prefix, init_latencies, e2e_latencies - ) - return samples - - def _AggregateAndOutlierSamples( metric_prefix: str, init_latencies: list[float], diff --git a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py index c9fa2b90ea..d2afdd72f1 100644 --- a/tests/linux_benchmarks/kubernetes_management_benchmark_test.py +++ b/tests/linux_benchmarks/kubernetes_management_benchmark_test.py @@ -94,16 +94,6 @@ def testConcurrentPoolNameZeroPadsToThreeDigits(self, index, expected): expected, kubernetes_management_benchmark._ConcurrentPoolName(index) ) - @parameterized.named_parameters( - ('zero', 0, 'pkbmc0000'), - ('single_digit', 7, 'pkbmc0007'), - ('four_digit', 1000, 'pkbmc1000'), - ) - def testScalePoolNameZeroPadsToFourDigits(self, index, expected): - self.assertEqual( - expected, kubernetes_management_benchmark._ScalePoolName(index) - ) - def testOverlappingPoolNameIsConstant(self): self.assertEqual( 'pkbmb', kubernetes_management_benchmark._OVERLAPPING_POOL_NAME @@ -114,10 +104,6 @@ def testAllNamesWithinAksLimit(self): self.assertLessEqual( len(kubernetes_management_benchmark._ConcurrentPoolName(i)), 12 ) - for i in range(10000): - self.assertLessEqual( - len(kubernetes_management_benchmark._ScalePoolName(i)), 12 - ) self.assertLessEqual( len(kubernetes_management_benchmark._OVERLAPPING_POOL_NAME), 12 ) @@ -131,7 +117,6 @@ def testValidScenariosPass(self): k8s_mgmt_scenarios=[ 'concurrent_node_pool_ops', 'overlapping_cluster_update', - 'large_scale_provisioning', ] ): kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) @@ -155,34 +140,11 @@ def testNonKubernetesClusterTypeRaises(self): _make_mock_config(cluster_type='Mesos') ) - def testInvalidScaleSweepRaises(self): - with flagsaver.flagsaver( - k8s_mgmt_scenarios=['large_scale_provisioning'], - k8s_mgmt_scale_sweep=['10', 'abc'], - ): - with self.assertRaises(errors.Config.InvalidValue): - kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) - - def testValidScaleSweepPasses(self): - with flagsaver.flagsaver( - k8s_mgmt_scenarios=['large_scale_provisioning'], - k8s_mgmt_scale_sweep=['10', '50', '100'], - ): - kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) - def testLowercaseScenarioRaises(self): with flagsaver.flagsaver(k8s_mgmt_scenarios=['a']): with self.assertRaises(errors.Config.InvalidValue): kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) - def testScaleSweepWithoutLargeScaleRaises(self): - with flagsaver.flagsaver( - k8s_mgmt_scenarios=['concurrent_node_pool_ops'], - k8s_mgmt_scale_sweep=['10', '50'], - ): - with self.assertRaises(errors.Config.InvalidValue): - kubernetes_management_benchmark.CheckPrerequisites(_make_mock_config()) - class PrepareTest(pkb_common_test_case.PkbCommonTestCase): """Tests for the Prepare benchmark lifecycle function.""" @@ -425,46 +387,6 @@ def testGetNameCallableApplied(self): self.assertEqual('poolname', name) -class RunAsyncTolerantTest(pkb_common_test_case.PkbCommonTestCase): - """Tests for the _RunAsyncTolerant helper (large_scale path).""" - - @flagsaver.flagsaver(k8s_mgmt_max_concurrent=50) - def testAllSucceed(self): - results, failed = kubernetes_management_benchmark._RunAsyncTolerant( - kickoff=mock.Mock(return_value='op'), - wait_fn=mock.Mock(return_value=None), - items=['a', 'b'], - get_name=str, - ) - self.assertLen(results, 2) - self.assertEmpty(failed) - - @flagsaver.flagsaver(k8s_mgmt_max_concurrent=50) - def testFailuresRecordedNotRaised(self): - """Tolerant path catches failures and records their names.""" - - def _kickoff(item): - if item == 'b': - raise RuntimeError('b failed') - return 'op' - - results, failed = kubernetes_management_benchmark._RunAsyncTolerant( - kickoff=_kickoff, - wait_fn=mock.Mock(return_value=None), - items=['a', 'b', 'c'], - get_name=str, - ) - self.assertLen(results, 2) - self.assertEqual(['b'], failed) - - def testEmptyItemsReturnsEmpty(self): - results, failed = kubernetes_management_benchmark._RunAsyncTolerant( - kickoff=mock.Mock(), wait_fn=mock.Mock(), items=[], get_name=str - ) - self.assertEmpty(results) - self.assertEmpty(failed) - - class MakeNodePoolConfigTest(pkb_common_test_case.PkbCommonTestCase): """Tests for the _MakeNodePoolConfig factory.""" @@ -568,54 +490,6 @@ def testOutliersNotGeneratedForThreeOrFewer(self): ) -class LargeScaleSamplesTest(pkb_common_test_case.PkbCommonTestCase): - """Tests for the _LargeScaleSamples tolerant-path helper.""" - - def testSuccessRateHundredWhenAllSucceed(self): - results = [ - ('op1', kubernetes_management_benchmark.OpTiming(1.0, 2.0)), - ('op2', kubernetes_management_benchmark.OpTiming(0.5, 1.5)), - ] - samples = kubernetes_management_benchmark._LargeScaleSamples( - 'Op', results, [], attempted_ops=2 - ) - rate = next(s for s in samples if s.metric == 'Op_SuccessRate') - self.assertAlmostEqual(100.0, rate.value) - - def testSuccessRateReflectsFailures(self): - results = [('op1', kubernetes_management_benchmark.OpTiming(1.0, 2.0))] - samples = kubernetes_management_benchmark._LargeScaleSamples( - 'Op', results, ['op2', 'op3'], attempted_ops=3 - ) - rate = next(s for s in samples if s.metric == 'Op_SuccessRate') - self.assertAlmostEqual(100.0 / 3, rate.value, places=3) - - def testFailedPoolsListedInMetadata(self): - results = [('op1', kubernetes_management_benchmark.OpTiming(1.0, 2.0))] - samples = kubernetes_management_benchmark._LargeScaleSamples( - 'Op', results, ['op2', 'op3'], attempted_ops=3 - ) - failed = next(s for s in samples if s.metric == 'Op_FailedOps') - self.assertEqual(2, failed.value) - self.assertEqual('op2,op3', failed.metadata['failed_pools']) - - def testCountMetricsExposed(self): - results = [('op1', kubernetes_management_benchmark.OpTiming(1.0, 2.0))] - samples = kubernetes_management_benchmark._LargeScaleSamples( - 'Op', results, ['op2'], attempted_ops=2 - ) - metrics = {s.metric: s.value for s in samples} - self.assertEqual(2, metrics['Op_TotalOps']) - self.assertEqual(1, metrics['Op_SucceededOps']) - self.assertEqual(1, metrics['Op_FailedOps']) - - def testZeroAttemptedRaisesRunError(self): - with self.assertRaises(errors.Benchmarks.RunError): - kubernetes_management_benchmark._LargeScaleSamples( - 'Op', [], [], attempted_ops=0 - ) - - class AggregateSamplesTest(pkb_common_test_case.PkbCommonTestCase): """Tests for the _AggregateSamples statistics helper.""" @@ -724,41 +598,8 @@ def testReturnsSingleSample(self): class RunTest(pkb_common_test_case.PkbCommonTestCase): """Tests for the Run benchmark entry-point function.""" - @flagsaver.flagsaver( - k8s_mgmt_scenarios=[ - 'concurrent_node_pool_ops', - 'overlapping_cluster_update', - 'large_scale_provisioning', - ], - k8s_mgmt_scale_sweep=[], - k8s_mgmt_large_scale_nodepools=10, - ) - def testRunSweepsAfterEachScenario(self): - """Run sweeps node pools after each scenario that executes.""" - cluster = _make_mock_cluster() - bm_spec = _make_mock_benchmark_spec(cluster) - with mock.patch.object( - kubernetes_management_benchmark, '_ClearNodePools' - ) as mock_clean, mock.patch.object( - kubernetes_management_benchmark, - '_RunConcurrentNodePoolOps', - return_value=[], - ), mock.patch.object( - kubernetes_management_benchmark, - '_RunOverlappingClusterUpdate', - return_value=[], - ), mock.patch.object( - kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] - ): - kubernetes_management_benchmark.Run(bm_spec) - # All three scenarios run by default -> one sweep after each. - self.assertEqual(mock_clean.call_count, 3) - mock_clean.assert_called_with(cluster) - @flagsaver.flagsaver( k8s_mgmt_scenarios=['concurrent_node_pool_ops'], - k8s_mgmt_scale_sweep=[], - k8s_mgmt_large_scale_nodepools=10, ) def testRunOnlyScenarioACallsOnlyA(self): """Run dispatches only to _RunConcurrentNodePoolOps for that scenario.""" @@ -774,18 +615,13 @@ def testRunOnlyScenarioACallsOnlyA(self): kubernetes_management_benchmark, '_RunOverlappingClusterUpdate', return_value=[], - ) as mock_b, mock.patch.object( - kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] - ) as mock_c: + ) as mock_b: kubernetes_management_benchmark.Run(bm_spec) mock_a.assert_called_once() mock_b.assert_not_called() - mock_c.assert_not_called() @flagsaver.flagsaver( k8s_mgmt_scenarios=['overlapping_cluster_update'], - k8s_mgmt_scale_sweep=[], - k8s_mgmt_large_scale_nodepools=10, ) def testRunOnlyScenarioBCallsOnlyB(self): """Run dispatches only to _RunOverlappingClusterUpdate for that scenario.""" @@ -801,104 +637,13 @@ def testRunOnlyScenarioBCallsOnlyB(self): kubernetes_management_benchmark, '_RunOverlappingClusterUpdate', return_value=[], - ) as mock_b, mock.patch.object( - kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] - ) as mock_c: + ) as mock_b: kubernetes_management_benchmark.Run(bm_spec) mock_a.assert_not_called() mock_b.assert_called_once() - mock_c.assert_not_called() - - @flagsaver.flagsaver( - k8s_mgmt_scenarios=['large_scale_provisioning'], - k8s_mgmt_scale_sweep=[], - k8s_mgmt_large_scale_nodepools=42, - ) - def testRunScenarioCPassesLargeScaleFlag(self): - """Run passes the large-scale-nodepools flag down to _ScaleToPoolCount.""" - cluster = _make_mock_cluster() - bm_spec = _make_mock_benchmark_spec(cluster) - with mock.patch.object( - kubernetes_management_benchmark, '_ClearNodePools' - ), mock.patch.object( - kubernetes_management_benchmark, - '_RunConcurrentNodePoolOps', - return_value=[], - ), mock.patch.object( - kubernetes_management_benchmark, - '_RunOverlappingClusterUpdate', - return_value=[], - ), mock.patch.object( - kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] - ) as mock_c: - kubernetes_management_benchmark.Run(bm_spec) - mock_c.assert_called_once() - _, _, scale = mock_c.call_args.args - self.assertEqual(42, scale) - - @flagsaver.flagsaver( - k8s_mgmt_scenarios=['large_scale_provisioning'], - k8s_mgmt_scale_sweep=['10', '50'], - k8s_mgmt_large_scale_nodepools=100, - ) - def testRunScenarioCScaleSweepRunsTwice(self): - """Tests that Run calls _ScaleToPoolCount once per scale in the sweep.""" - cluster = _make_mock_cluster() - bm_spec = _make_mock_benchmark_spec(cluster) - with mock.patch.object( - kubernetes_management_benchmark, '_ClearNodePools' - ), mock.patch.object( - kubernetes_management_benchmark, - '_RunConcurrentNodePoolOps', - return_value=[], - ), mock.patch.object( - kubernetes_management_benchmark, - '_RunOverlappingClusterUpdate', - return_value=[], - ), mock.patch.object( - kubernetes_management_benchmark, - '_ScaleToPoolCount', - return_value=[_make_sample('m', 1.0)], - ) as mock_c: - kubernetes_management_benchmark.Run(bm_spec) - self.assertEqual(2, mock_c.call_count) - scales = [call.args[2] for call in mock_c.call_args_list] - self.assertIn(10, scales) - self.assertIn(50, scales) - - @flagsaver.flagsaver( - k8s_mgmt_scenarios=['large_scale_provisioning'], - k8s_mgmt_scale_sweep=['10'], - k8s_mgmt_large_scale_nodepools=10, - ) - def testRunTagsScenarioCGoalInMetadata(self): - """Tests that Run adds goal_nodepools to each sample's metadata.""" - cluster = _make_mock_cluster() - bm_spec = _make_mock_benchmark_spec(cluster) - test_sample = _make_sample('metric', 1.0) - with mock.patch.object( - kubernetes_management_benchmark, '_ClearNodePools' - ), mock.patch.object( - kubernetes_management_benchmark, - '_RunConcurrentNodePoolOps', - return_value=[], - ), mock.patch.object( - kubernetes_management_benchmark, - '_RunOverlappingClusterUpdate', - return_value=[], - ), mock.patch.object( - kubernetes_management_benchmark, - '_ScaleToPoolCount', - return_value=[test_sample], - ): - samples = kubernetes_management_benchmark.Run(bm_spec) - self.assertIn('goal_nodepools', samples[0].metadata) - self.assertEqual('10', samples[0].metadata['goal_nodepools']) @flagsaver.flagsaver( k8s_mgmt_scenarios=['concurrent_node_pool_ops'], - k8s_mgmt_scale_sweep=[], - k8s_mgmt_large_scale_nodepools=10, ) def testRunTagsAllSamplesWithRunMetadata(self): """Tests that Run adds version and config keys to all sample metadata.""" @@ -915,8 +660,6 @@ def testRunTagsAllSamplesWithRunMetadata(self): kubernetes_management_benchmark, '_RunOverlappingClusterUpdate', return_value=[], - ), mock.patch.object( - kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] ): samples = kubernetes_management_benchmark.Run(bm_spec) meta = samples[0].metadata @@ -931,8 +674,6 @@ def testRunTagsAllSamplesWithRunMetadata(self): @flagsaver.flagsaver( k8s_mgmt_scenarios=['concurrent_node_pool_ops'], k8s_mgmt_initial_version='1.30', - k8s_mgmt_scale_sweep=[], - k8s_mgmt_large_scale_nodepools=10, ) def testRunUsesExplicitVersionFlags(self): """Tests that Run uses explicit version flags over auto-resolved ones.""" @@ -948,8 +689,6 @@ def testRunUsesExplicitVersionFlags(self): kubernetes_management_benchmark, '_RunOverlappingClusterUpdate', return_value=[], - ), mock.patch.object( - kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] ): samples = kubernetes_management_benchmark.Run(bm_spec) cluster.ResolveNodePoolVersions.assert_not_called() @@ -957,8 +696,6 @@ def testRunUsesExplicitVersionFlags(self): @flagsaver.flagsaver( k8s_mgmt_scenarios=['concurrent_node_pool_ops'], - k8s_mgmt_scale_sweep=[], - k8s_mgmt_large_scale_nodepools=10, ) def testRunAutoResolvesVersionsWhenFlagsAbsent(self): """Tests Run calls ResolveNodePoolVersions when version flags absent.""" @@ -975,8 +712,6 @@ def testRunAutoResolvesVersionsWhenFlagsAbsent(self): kubernetes_management_benchmark, '_RunOverlappingClusterUpdate', return_value=[], - ), mock.patch.object( - kubernetes_management_benchmark, '_ScaleToPoolCount', return_value=[] ): samples = kubernetes_management_benchmark.Run(bm_spec) cluster.ResolveNodePoolVersions.assert_called_once() @@ -1096,65 +831,5 @@ def testPassesInitialVersionToCreate(self): self.assertEqual('1.33', node_version) -class RunScenarioCTest(pkb_common_test_case.PkbCommonTestCase): - """Tests for the _ScaleToPoolCount large-scale create-and-delete scenario.""" - - @flagsaver.flagsaver( - k8s_mgmt_nodes_per_nodepool=1, - k8s_mgmt_max_concurrent=50, - ) - def testProducesCreateAndDeleteSamples(self): - cluster = _make_mock_cluster(pool_names=['pkbmc0000', 'pkbmc0001']) - samples = kubernetes_management_benchmark._ScaleToPoolCount( - cluster, '1.33', goal_nodepools=2 - ) - metrics = {s.metric for s in samples} - self.assertTrue(any('LargeScale_Create' in m for m in metrics)) - self.assertTrue(any('LargeScale_Delete' in m for m in metrics)) - - @flagsaver.flagsaver( - k8s_mgmt_nodes_per_nodepool=1, - k8s_mgmt_max_concurrent=50, - ) - def testZeroLivePoolsRecordsZeroDeleteSuccessRate(self): - """Tests Scenario C records 0% delete rate when no live pools exist.""" - cluster = _make_mock_cluster(pool_names=[]) - samples = kubernetes_management_benchmark._ScaleToPoolCount( - cluster, '1.33', goal_nodepools=3 - ) - delete_rate = next( - s for s in samples if s.metric == 'LargeScale_Delete_SuccessRate' - ) - self.assertEqual(0.0, delete_rate.value) - cluster.DeleteNodePoolAsync.assert_not_called() - - @flagsaver.flagsaver( - k8s_mgmt_nodes_per_nodepool=1, - k8s_mgmt_max_concurrent=50, - ) - def testDeleteUsesLiveListNotOriginalCreateList(self): - cluster = _make_mock_cluster(pool_names=['pkbmc0000', 'pkbmc0001']) - kubernetes_management_benchmark._ScaleToPoolCount( - cluster, '1.33', goal_nodepools=3 - ) - self.assertEqual(2, cluster.DeleteNodePoolAsync.call_count) - - @flagsaver.flagsaver( - k8s_mgmt_nodes_per_nodepool=1, - k8s_mgmt_max_concurrent=50, - ) - def testCreateSuccessRateUsesScaleAsDenominator(self): - """Tests Scenario C create success rate uses scale as total_ops.""" - cluster = _make_mock_cluster(pool_names=['pkbmc0000']) - samples = kubernetes_management_benchmark._ScaleToPoolCount( - cluster, '1.33', goal_nodepools=3 - ) - create_rate = next( - s for s in samples if s.metric == 'LargeScale_Create_SuccessRate' - ) - self.assertLessEqual(create_rate.value, 100.0) - self.assertEqual('3', create_rate.metadata['total_ops']) - - if __name__ == '__main__': unittest.main() From 95624987c990bc78a7857607afd8618e30ff0b84 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Tue, 23 Jun 2026 09:22:44 +0000 Subject: [PATCH 12/15] kubernetes_management: address review feedback (round 3) --- .../kubernetes_management_benchmark.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py index 3db5a4839c..596a324d04 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_management_benchmark.py @@ -52,8 +52,8 @@ kubernetes_management: description: > Benchmarks GKE/EKS/AKS management plane operations: concurrent node pool - create/delete, overlapping cluster + node-pool ops, and large-scale - provisioning. Focused on control-plane API responsiveness. + create/delete, and overlapping cluster + node-pool ops. Focused on + control-plane API responsiveness. container_cluster: type: Kubernetes vm_count: 1 @@ -164,7 +164,6 @@ def CheckPrerequisites( def Prepare(benchmark_spec: bm_spec.BenchmarkSpec) -> None: """Deploys a sleep pod to confirm data-plane reachability.""" cluster = benchmark_spec.container_cluster - # Type narrowing for pytype; reachability is confirmed by the sleep pod below. assert isinstance(cluster, kubernetes_cluster.KubernetesCluster) benchmark_spec.always_call_cleanup = True logging.info( @@ -207,12 +206,11 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]: assert isinstance(cluster, kubernetes_cluster.KubernetesCluster) # Resolve the initial node-pool version once; log clearly; tag every sample. - flag_initial = _INITIAL_VERSION.value - if not flag_initial: - resolved_initial, _ = cluster.ResolveNodePoolVersions() - flag_initial = resolved_initial - initial = flag_initial - source = "flag" if _INITIAL_VERSION.value else "auto-resolved" + initial = _INITIAL_VERSION.value + source = "flag" if initial else "auto-resolved" + if not initial: + initial, _ = cluster.ResolveNodePoolVersions() + assert initial is not None logging.info( "NodePool version (%s): initial=%s " @@ -255,6 +253,7 @@ def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec) -> None: cluster = benchmark_spec.container_cluster if cluster is None: return + assert isinstance(cluster, kubernetes_cluster.KubernetesCluster) kubectl.RunKubectlCommand( ["delete", "pod", _SLEEP_POD_NAME, "--ignore-not-found"], raise_on_failure=False, @@ -307,7 +306,9 @@ def _RunOverlappingClusterUpdate( Both ops kick off async on separate threads; initiation + E2E latency recorded independently. Overlap window = ClusterUpdate E2E latency. """ - logging.info("Scenario B: overlapping cluster update + node-pool create") + logging.info( + "overlapping_cluster_update: cluster update + node-pool create" + ) cfg = _MakeNodePoolConfig(cluster, _OVERLAPPING_POOL_NAME) results = ThreadSafeResults() @@ -315,7 +316,7 @@ def DoClusterUpdate(): timing = _TimedAsync(cluster.UpdateClusterAsync, cluster.WaitForOperation) results.add("OverlappingUpdate_ClusterUpdate", timing) logging.info( - "Scenario B ClusterUpdate: init=%.2fs e2e=%.2fs", + "overlapping_cluster_update ClusterUpdate: init=%.2fs e2e=%.2fs", timing.initiation_latency, timing.end_to_end_latency, ) @@ -327,7 +328,7 @@ def DoCreate(): ) results.add("OverlappingUpdate_NodePoolCreate", timing) logging.info( - "Scenario B NodePoolCreate: init=%.2fs e2e=%.2fs", + "overlapping_cluster_update NodePoolCreate: init=%.2fs e2e=%.2fs", timing.initiation_latency, timing.end_to_end_latency, ) From 566291839995d4165cb3242a1992c1e80cb8a725 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Mon, 29 Jun 2026 11:13:49 +0000 Subject: [PATCH 13/15] kubernetes_management: move changelog entry under New features section --- CHANGES.next.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CHANGES.next.md b/CHANGES.next.md index 9440b7739f..a2e329a92e 100644 --- a/CHANGES.next.md +++ b/CHANGES.next.md @@ -1,7 +1,3 @@ -### New features: -- Add kubernetes_management benchmark for measuring GKE/EKS/AKS management - plane API responsiveness. - ### Breaking changes: - Added --accept_licenses flag. User have to turn this flag on to acknowledge @@ -288,6 +284,8 @@ - Re-enable support for Rocky Linux 8, 9, and 10 for the Azure provider. - Add Ubuntu 26.04 support for GCP, AWS, and Azure Providers. - Add a kubernetes-native benchmark for MySQL using sysbench +- Add kubernetes_management benchmark for measuring GKE/EKS/AKS management + plane API responsiveness. ### Enhancements: From 1f3622529bd1c1a368b0b616cf845e52818f6497 Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Mon, 29 Jun 2026 17:54:05 +0000 Subject: [PATCH 14/15] container_service_mock: implement GetNodePoolNames on TestKubernetesCluster The new GetNodePoolNames abstract method (added to KubernetesCluster base class in this PR) broke 38 tests in container_service_test.py -- TestKubernetesCluster couldn't be instantiated without an implementation. Adds a trivial stub matching the existing _Create/_Delete style. --- tests/container_service_mock.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/container_service_mock.py b/tests/container_service_mock.py index 5fb9829c38..98960c848d 100644 --- a/tests/container_service_mock.py +++ b/tests/container_service_mock.py @@ -26,6 +26,9 @@ def _Create(self): def _Delete(self): pass + def GetNodePoolNames(self) -> list[str]: + return [] + def CreateTestKubernetesCluster( container_cluster_spec: container_spec.ContainerClusterSpec | None = None, From 15ee9b3cd6b352628505bf78a00fd3fd9ad953ef Mon Sep 17 00:00:00 2001 From: Ashish Suneja Date: Tue, 30 Jun 2026 10:52:47 +0000 Subject: [PATCH 15/15] provision_container_cluster_benchmark_test: implement GetNodePoolNames stub Same fix as container_service_mock.py -- a second TestKubernetesCluster mock in this file also needs the new abstract method stub. --- .../provision_container_cluster_benchmark_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/linux_benchmarks/provision_container_cluster_benchmark_test.py b/tests/linux_benchmarks/provision_container_cluster_benchmark_test.py index 7b64d89124..e0ab7ccf31 100644 --- a/tests/linux_benchmarks/provision_container_cluster_benchmark_test.py +++ b/tests/linux_benchmarks/provision_container_cluster_benchmark_test.py @@ -26,6 +26,9 @@ def __init__(self): kubernetes_cluster.kubernetes_commands.GetEvents ) + def GetNodePoolNames(self) -> list[str]: + return [] + _CONTAINER_START_YAML = """ - apiVersion: v1