From 3956dcde4d3caf14905f2e578685459f74bc3efd Mon Sep 17 00:00:00 2001 From: Eric Hole Date: Tue, 26 May 2026 16:58:26 -0700 Subject: [PATCH 1/2] aws/key: use fileb:// for ec2 import-key-pair --public-key-material `aws ec2 import-key-pair --public-key-material` rejects raw OpenSSH public keys as "Invalid base64" when the value is passed as a CLI string. The CLI expects either base64-encoded bytes via the string form, or raw bytes via a fileb:// URI. PKB was passing the raw key string via cat -> --public-key-material=$keyfile, which produces the rejected case. Switch to fileb:// so the CLI reads the bytes directly. Without this fix, AwsKeyFileManager.ImportKeyfile raises PrepareException on every AWS provisioning, blocking EksCluster (and AwsVm) Create(). --- perfkitbenchmarker/providers/aws/aws_virtual_machine.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/perfkitbenchmarker/providers/aws/aws_virtual_machine.py b/perfkitbenchmarker/providers/aws/aws_virtual_machine.py index 4c26a23fcd..854d91ee2a 100644 --- a/perfkitbenchmarker/providers/aws/aws_virtual_machine.py +++ b/perfkitbenchmarker/providers/aws/aws_virtual_machine.py @@ -441,8 +441,10 @@ def ImportKeyfile(cls, region): with cls._lock: if _GetKeyfileSetKey(region) in cls.imported_keyfile_set: return - cat_cmd = ['cat', vm_util.GetPublicKeyPath()] - keyfile, _ = vm_util.IssueRetryableCommand(cat_cmd) + # `aws ec2 import-key-pair --public-key-material` rejects a raw + # OpenSSH-formatted key with "Invalid base64" when the value comes + # in as a CLI string. The `fileb://` URI tells the CLI to read the + # file as bytes and send them through, which AWS accepts. formatted_tags = util.FormatTagSpecifications( 'key-pair', util.MakeDefaultTags() ) @@ -451,7 +453,7 @@ def ImportKeyfile(cls, region): '--region=%s' % region, 'import-key-pair', '--key-name=%s' % cls.GetKeyNameForRun(), - '--public-key-material=%s' % keyfile, + '--public-key-material=fileb://%s' % vm_util.GetPublicKeyPath(), '--tag-specifications=%s' % formatted_tags, ] _, stderr, retcode = vm_util.IssueCommand( From c20ce1b497eacf2450e90a55ee2449c971280f31 Mon Sep 17 00:00:00 2001 From: Eric Hole Date: Fri, 5 Jun 2026 16:07:52 +0000 Subject: [PATCH 2/2] eks: support nodepool labels and taints Add _ParseTaint helper and update _RenderNodeGroupJson in BaseEksCluster to pass node_labels and node_taints through to eksctl's node group JSON. Taints are parsed from 'key=value:Effect' or 'key:Effect' strings into the dict format eksctl expects. --- perfkitbenchmarker/configs/container_spec.py | 10 +++++ .../aws/elastic_kubernetes_service.py | 25 +++++++++-- .../resources/container_service/container.py | 2 + .../container_service/container_cluster.py | 2 + tests/providers/aws/aws_cluster_test.py | 1 - .../aws/elastic_kubernetes_service_test.py | 41 +++++++++++++++++++ 6 files changed, 77 insertions(+), 4 deletions(-) diff --git a/perfkitbenchmarker/configs/container_spec.py b/perfkitbenchmarker/configs/container_spec.py index 1f808ad066..5071eba87d 100644 --- a/perfkitbenchmarker/configs/container_spec.py +++ b/perfkitbenchmarker/configs/container_spec.py @@ -243,6 +243,8 @@ def __init__( self.vm_spec: virtual_machine_spec.BaseVmSpec self.machine_families: list[str] | None self.sandbox_config: SandboxSpec | None + self.node_labels: dict[str, str] | None + self.node_taints: list[str] | None @classmethod def _GetOptionDecoderConstructions(cls): @@ -273,6 +275,14 @@ def _GetOptionDecoderConstructions(cls): ), 'vm_spec': (spec.PerCloudConfigDecoder, {}), 'sandbox_config': (_SandboxDecoder, {'default': None}), + 'node_labels': ( + option_decoders.TypeVerifier, + {'valid_types': (dict,), 'default': None, 'none_ok': True}, + ), + 'node_taints': ( + option_decoders.TypeVerifier, + {'valid_types': (list,), 'default': None, 'none_ok': True}, + ), }) return result diff --git a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py index a57a43057a..d628a8fcc8 100644 --- a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py +++ b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py @@ -101,6 +101,22 @@ def ApplyInferenceS3PvAndPvc() -> None: logging.info('Successfully applied S3 PVC.') +def _ParseTaint(taint_str: str) -> dict[str, str]: + """Parses a taint string into eksctl's dict format. + + Args: + taint_str: Taint in 'key=value:Effect' or 'key:Effect' format. + + Returns: + Dict with 'key', 'value' (optional), and 'effect' keys for eksctl. + """ + key_value, effect = taint_str.rsplit(':', 1) + if '=' in key_value: + key, value = key_value.split('=', 1) + return {'key': key, 'value': value, 'effect': effect} + return {'key': key_value, 'effect': effect} + + class BaseEksCluster(kubernetes_cluster.KubernetesCluster): """Shared base class for Elastic Kubernetes Service cluster auto mode & not.""" @@ -195,16 +211,19 @@ def _RenderNodeGroupJson( self, nodepool: container.BaseNodePoolConfig ) -> dict[str, Any]: """Constructs the node group json dictionary.""" + labels = {'pkb_nodepool': nodepool.name} + if nodepool.node_labels: + labels.update(nodepool.node_labels) group_json = { 'name': nodepool.name, 'instanceType': nodepool.machine_type, 'desiredCapacity': nodepool.num_nodes, 'amiFamily': 'AmazonLinux2023', 'tags': util.MakeDefaultTags(), - 'labels': { - 'pkb_nodepool': nodepool.name, - }, + 'labels': labels, } + if nodepool.node_taints: + group_json['taints'] = [_ParseTaint(t) for t in nodepool.node_taints] if nodepool.min_nodes != nodepool.max_nodes: group_json['minSize'] = nodepool.min_nodes group_json['maxSize'] = nodepool.max_nodes diff --git a/perfkitbenchmarker/resources/container_service/container.py b/perfkitbenchmarker/resources/container_service/container.py index 3e05a1ec2b..2850ded4ce 100644 --- a/perfkitbenchmarker/resources/container_service/container.py +++ b/perfkitbenchmarker/resources/container_service/container.py @@ -184,6 +184,8 @@ def __init__( self.disk_size: int = vm_spec.boot_disk_size self.gpu_type: str | None = vm_spec.gpu_type self.gpu_count: int | None = vm_spec.gpu_count + self.node_labels: dict[str, str] | None = None + self.node_taints: list[str] | None = None # Defined by GceVirtualMachineConfig. Used by google_kubernetes_engine # pylint: disable=g-missing-from-attributes self.sandbox_config: container_spec_lib.SandboxSpec | None = None diff --git a/perfkitbenchmarker/resources/container_service/container_cluster.py b/perfkitbenchmarker/resources/container_service/container_cluster.py index 9458662c98..176d4ce25c 100644 --- a/perfkitbenchmarker/resources/container_service/container_cluster.py +++ b/perfkitbenchmarker/resources/container_service/container_cluster.py @@ -116,6 +116,8 @@ def _InitializeNodePool( nodepool_spec.machine_families, ) nodepool_config.sandbox_config = nodepool_spec.sandbox_config + nodepool_config.node_labels = nodepool_spec.node_labels + nodepool_config.node_taints = nodepool_spec.node_taints nodepool_config.zone = zone nodepool_config.num_nodes = nodepool_spec.vm_count if nodepool_spec.min_vm_count is None: diff --git a/tests/providers/aws/aws_cluster_test.py b/tests/providers/aws/aws_cluster_test.py index 53df198301..c17a3ce08d 100644 --- a/tests/providers/aws/aws_cluster_test.py +++ b/tests/providers/aws/aws_cluster_test.py @@ -58,7 +58,6 @@ def setUp(self): def testCreateDependencies(self): self.mock_issue.side_effect = [ - ('fake_key', None, None), # Mock 'cat key' from __init__ ('', None, None), # Mock import key from ImportKeyfile # Mock vpc creation ('''Creating CloudFormation stack... diff --git a/tests/providers/aws/elastic_kubernetes_service_test.py b/tests/providers/aws/elastic_kubernetes_service_test.py index 42450bd63c..771d231d13 100644 --- a/tests/providers/aws/elastic_kubernetes_service_test.py +++ b/tests/providers/aws/elastic_kubernetes_service_test.py @@ -237,6 +237,47 @@ def testEksClusterNodepoolsAutoscaling(self): self.assertEqual(node_groups[1]['maxSize'], 10) self.assertEqual(node_groups[1]['desiredCapacity'], 3) + def testEksClusterNodepoolLabels(self): + cluster = elastic_kubernetes_service.EksCluster(EKS_SPEC) + nodepool = cluster.default_nodepool + nodepool.node_labels = {'env': 'prod', 'team': 'ml'} + actual = cluster._RenderNodeGroupJson(nodepool) + self.assertEqual( + actual['labels'], + {'pkb_nodepool': nodepool.name, 'env': 'prod', 'team': 'ml'}, + ) + + def testEksClusterNodepoolTaints(self): + cluster = elastic_kubernetes_service.EksCluster(EKS_SPEC) + nodepool = cluster.default_nodepool + nodepool.node_taints = [ + 'sandbox.gke.io/runtime=runsc:NoSchedule', + 'dedicated:NoExecute', + ] + actual = cluster._RenderNodeGroupJson(nodepool) + self.assertEqual( + actual['taints'], + [ + { + 'key': 'sandbox.gke.io/runtime', + 'value': 'runsc', + 'effect': 'NoSchedule', + }, + {'key': 'dedicated', 'effect': 'NoExecute'}, + ], + ) + + + def testParseTaint(self): + self.assertEqual( + elastic_kubernetes_service._ParseTaint('key=value:NoSchedule'), + {'key': 'key', 'value': 'value', 'effect': 'NoSchedule'}, + ) + self.assertEqual( + elastic_kubernetes_service._ParseTaint('dedicated:NoExecute'), + {'key': 'dedicated', 'effect': 'NoExecute'}, + ) + def testGetNodePoolNames(self): # Mock the output of the aws cli command cluster = elastic_kubernetes_service.EksCluster(EKS_SPEC)