diff --git a/docs/user-guide/02_pod-and-resource-naming-conventions/02_naming-conventions.md b/docs/user-guide/02_pod-and-resource-naming-conventions/02_naming-conventions.md index aeacf1786..f5fa3584e 100644 --- a/docs/user-guide/02_pod-and-resource-naming-conventions/02_naming-conventions.md +++ b/docs/user-guide/02_pod-and-resource-naming-conventions/02_naming-conventions.md @@ -158,6 +158,43 @@ To deploy a PodCliqueSet with this structure and explore the naming hierarchy th **You control:** PodCliqueSet name, PodClique template names, PCSG template names **Grove generates:** All resource instances with hierarchical naming +### Using Generated Names in User-Managed HPAs + +Grove can create HPAs for PodCliques and PodCliqueScalingGroups when the target has autoscaling configured. If you prefer to create an HPA yourself, leave autoscaling unset for that target and use the resource name in `scaleTargetRef`. + +List the PodCliqueSet and generated targets: + +```bash +kubectl get pcs +kubectl get pcsg -l app.kubernetes.io/part-of= +kubectl get pclq -l app.kubernetes.io/part-of= +``` + +Then use the selected target name in the HPA: + +```yaml +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: example-pcsg-hpa +spec: + scaleTargetRef: + apiVersion: grove.io/v1alpha1 + kind: PodCliqueScalingGroup + name: + minReplicas: 1 + maxReplicas: 5 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 +``` + +For the whole workload, use `kind: PodCliqueSet` and the PodCliqueSet name. For standalone PodCliques, use `kind: PodClique` and the selected PodClique name. PodCliques that belong to a PodCliqueScalingGroup should be scaled through the PodCliqueScalingGroup instead. + ## Key Takeaways 1. **Self-Documenting Hierarchy**: Pod names encode the complete hierarchy from PodCliqueSet → PCSG (if applicable) → PodClique → Pod, making `kubectl get pods` output immediately understandable. @@ -177,4 +214,3 @@ Now that you understand Grove's naming scheme and best practices: - **See it in action**: Continue to the [Hands-On Example](./03_hands-on-example.md) to deploy an example system and observe the naming hierarchy firsthand. - **Learn programmatic discovery**: Head to the [Environment Variables guide](../03_environment-variables-for-pod-discovery/01_overview.md) to learn how to use these names programmatically for pod discovery, including how Grove injects environment variables and how to construct FQDNs for pod-to-pod communication. - diff --git a/operator/internal/controller/podclique/reconcilestatus.go b/operator/internal/controller/podclique/reconcilestatus.go index 74ded2305..dc663f76c 100644 --- a/operator/internal/controller/podclique/reconcilestatus.go +++ b/operator/internal/controller/podclique/reconcilestatus.go @@ -165,9 +165,11 @@ func mutateUpdatedReplica(pclq *grovecorev1alpha1.PodClique, existingPods []*cor } } -// mutateSelector creates and sets the label selector for autoscaler use when scaling is configured +// mutateSelector publishes the label selector on the PodClique /scale subresource so HPAs can +// target the PodClique. PodCliques that belong to a PodCliqueScalingGroup are scaled via the PCSG +// and must not advertise their own selector. func mutateSelector(pcsName string, pclq *grovecorev1alpha1.PodClique) error { - if pclq.Spec.ScaleConfig == nil { + if _, isPCSGMember := pclq.Labels[apicommon.LabelPodCliqueScalingGroup]; isPCSGMember { return nil } labels := lo.Assign( diff --git a/operator/internal/controller/podclique/reconcilestatus_test.go b/operator/internal/controller/podclique/reconcilestatus_test.go index 132ef8c57..f2a48f4a5 100644 --- a/operator/internal/controller/podclique/reconcilestatus_test.go +++ b/operator/internal/controller/podclique/reconcilestatus_test.go @@ -26,6 +26,7 @@ import ( internalconstants "github.com/ai-dynamo/grove/operator/internal/constants" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" @@ -350,3 +351,53 @@ func TestComputeMinAvailableBreachedConditionPartialScheduleRegression(t *testin }) } } + +// TestMutateSelector verifies the /scale selector is published for standalone PodCliques (with or +// without ScaleConfig) and suppressed for PodCliques that belong to a PodCliqueScalingGroup, +// regardless of whether the PodClique itself has ScaleConfig set. +func TestMutateSelector(t *testing.T) { + const ( + pcsName = "test-pcs" + standaloneFQN = "test-pcs-0-frontend" + pcsgMemberFQN = "test-pcs-0-prefill-0-worker" + pcsgName = "test-pcs-0-prefill" + ) + withScale := &grovecorev1alpha1.AutoScalingConfig{MaxReplicas: 5} + + tests := []struct { + name string + pclqName string + pcsgMemberLabel string // empty == standalone + scaleConfig *grovecorev1alpha1.AutoScalingConfig + expectSelectorOK bool + }{ + {name: "standalone, no ScaleConfig", pclqName: standaloneFQN, expectSelectorOK: true}, + {name: "standalone, with ScaleConfig", pclqName: standaloneFQN, scaleConfig: withScale, expectSelectorOK: true}, + {name: "PCSG member, no ScaleConfig", pclqName: pcsgMemberFQN, pcsgMemberLabel: pcsgName, expectSelectorOK: false}, + // PCSG members are scaled through the PCSG; their own ScaleConfig (if any) must not + // resurrect the selector and re-expose them as an HPA target. + {name: "PCSG member, with ScaleConfig", pclqName: pcsgMemberFQN, pcsgMemberLabel: pcsgName, scaleConfig: withScale, expectSelectorOK: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + labels := map[string]string{} + if tt.pcsgMemberLabel != "" { + labels[apicommon.LabelPodCliqueScalingGroup] = tt.pcsgMemberLabel + } + pclq := &grovecorev1alpha1.PodClique{ + ObjectMeta: metav1.ObjectMeta{Name: tt.pclqName, Labels: labels}, + Spec: grovecorev1alpha1.PodCliqueSpec{ScaleConfig: tt.scaleConfig}, + } + + err := mutateSelector(pcsName, pclq) + assert.NoError(t, err) + if tt.expectSelectorOK { + require.NotNil(t, pclq.Status.Selector, "selector should be populated") + assert.Contains(t, *pclq.Status.Selector, apicommon.LabelPodClique+"="+pclq.Name) + } else { + assert.Nil(t, pclq.Status.Selector, "selector should not be populated for PCSG-member PCLQ") + } + }) + } +} diff --git a/operator/internal/controller/podcliquescalinggroup/reconcilestatus.go b/operator/internal/controller/podcliquescalinggroup/reconcilestatus.go index 4682478be..ffbfe95c1 100644 --- a/operator/internal/controller/podcliquescalinggroup/reconcilestatus.go +++ b/operator/internal/controller/podcliquescalinggroup/reconcilestatus.go @@ -312,13 +312,14 @@ func (r *Reconciler) getPodCliquesPerPCSGReplica(ctx context.Context, pcsName st return pclqsPerPCSGReplica, nil } -// mutateSelector creates and sets the label selector for autoscaler use when scaling is configured +// mutateSelector publishes the label selector on the PodCliqueScalingGroup /scale subresource so +// HPAs can target the PCSG. func mutateSelector(pcs *grovecorev1alpha1.PodCliqueSet, pcsg *grovecorev1alpha1.PodCliqueScalingGroup) error { pcsReplicaIndex, err := k8sutils.GetPodCliqueSetReplicaIndex(pcsg.ObjectMeta) if err != nil { return err } - matchingPCSGConfig, ok := lo.Find(pcs.Spec.Template.PodCliqueScalingGroupConfigs, func(pcsgConfig grovecorev1alpha1.PodCliqueScalingGroupConfig) bool { + _, ok := lo.Find(pcs.Spec.Template.PodCliqueScalingGroupConfigs, func(pcsgConfig grovecorev1alpha1.PodCliqueScalingGroupConfig) bool { pcsgFQN := apicommon.GeneratePodCliqueScalingGroupName(apicommon.ResourceNameReplica{Name: pcs.Name, Replica: pcsReplicaIndex}, pcsgConfig.Name) return pcsgFQN == pcsg.Name }) @@ -326,10 +327,6 @@ func mutateSelector(pcs *grovecorev1alpha1.PodCliqueSet, pcsg *grovecorev1alpha1 // This should ideally never happen but if you find a PCSG that is not defined in PCS then just ignore it. return nil } - // No ScaleConfig has been defined of this PCSG, therefore there is no need to add a selector in the status. - if matchingPCSGConfig.ScaleConfig == nil { - return nil - } labels := lo.Assign( apicommon.GetDefaultLabelsForPodCliqueSetManagedResources(pcs.Name), map[string]string{ diff --git a/operator/internal/controller/podcliquescalinggroup/reconcilestatus_test.go b/operator/internal/controller/podcliquescalinggroup/reconcilestatus_test.go index 7c6a01497..9e8335a15 100644 --- a/operator/internal/controller/podcliquescalinggroup/reconcilestatus_test.go +++ b/operator/internal/controller/podcliquescalinggroup/reconcilestatus_test.go @@ -21,6 +21,7 @@ import ( "testing" "time" + apicommon "github.com/ai-dynamo/grove/operator/api/common" "github.com/ai-dynamo/grove/operator/api/common/constants" grovecorev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1" internalconstants "github.com/ai-dynamo/grove/operator/internal/constants" @@ -859,3 +860,62 @@ func assertCondition(t *testing.T, pcsg *grovecorev1alpha1.PodCliqueScalingGroup isBreached := condition.Status == metav1.ConditionTrue assert.Equal(t, expectBreached, isBreached, "condition breach status mismatch") } + +// TestMutateSelector verifies the /scale selector is published for PCSGs regardless of whether +// ScaleConfig is set in the parent PodCliqueSet template, and is suppressed for PCSGs whose name +// does not match any config in the PodCliqueSet template. +func TestMutateSelector(t *testing.T) { + const ( + pcsName = "test-pcs" + pcsgConfigName = "prefill" + ) + pcsgFQN := apicommon.GeneratePodCliqueScalingGroupName(apicommon.ResourceNameReplica{Name: pcsName, Replica: 0}, pcsgConfigName) + withScale := &grovecorev1alpha1.AutoScalingConfig{MaxReplicas: 5} + + tests := []struct { + name string + pcsgName string // FQN to put on the PCSG; defaults to pcsgFQN + pcsgConfigScaleConfig *grovecorev1alpha1.AutoScalingConfig + expectSelectorPopulated bool + }{ + {name: "no ScaleConfig still publishes selector", expectSelectorPopulated: true}, + {name: "ScaleConfig present publishes selector", pcsgConfigScaleConfig: withScale, expectSelectorPopulated: true}, + // PCSG whose name does not match any config in the PCS template (e.g. stale object + // during a rename) must not publish a selector. + {name: "unknown PCSG name does not publish selector", pcsgName: "test-pcs-0-stale", expectSelectorPopulated: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pcs := &grovecorev1alpha1.PodCliqueSet{ + ObjectMeta: metav1.ObjectMeta{Name: pcsName}, + Spec: grovecorev1alpha1.PodCliqueSetSpec{ + Template: grovecorev1alpha1.PodCliqueSetTemplateSpec{ + PodCliqueScalingGroupConfigs: []grovecorev1alpha1.PodCliqueScalingGroupConfig{ + {Name: pcsgConfigName, ScaleConfig: tt.pcsgConfigScaleConfig}, + }, + }, + }, + } + name := tt.pcsgName + if name == "" { + name = pcsgFQN + } + pcsg := &grovecorev1alpha1.PodCliqueScalingGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{apicommon.LabelPodCliqueSetReplicaIndex: "0"}, + }, + } + + err := mutateSelector(pcs, pcsg) + require.NoError(t, err) + if tt.expectSelectorPopulated { + require.NotNil(t, pcsg.Status.Selector) + assert.Contains(t, *pcsg.Status.Selector, apicommon.LabelPodCliqueScalingGroup+"="+pcsg.Name) + } else { + assert.Nil(t, pcsg.Status.Selector) + } + }) + } +} diff --git a/operator/internal/controller/podcliqueset/reconcilestatus.go b/operator/internal/controller/podcliqueset/reconcilestatus.go index ca16fc02c..6c38c8d01 100644 --- a/operator/internal/controller/podcliqueset/reconcilestatus.go +++ b/operator/internal/controller/podcliqueset/reconcilestatus.go @@ -23,6 +23,7 @@ import ( "slices" "strconv" + apicommon "github.com/ai-dynamo/grove/operator/api/common" apicommonconstants "github.com/ai-dynamo/grove/operator/api/common/constants" grovecorev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1" "github.com/ai-dynamo/grove/operator/internal/clustertopology" @@ -36,6 +37,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -55,6 +57,10 @@ func (r *Reconciler) reconcileStatus(ctx context.Context, logger logr.Logger, pc return ctrlcommon.ReconcileWithErrors("failed to mutate TopologyLevelsUnavailable condition", err) } + if err = mutateSelector(pcs); err != nil { + return ctrlcommon.ReconcileWithErrors("failed to update selector for PodCliqueSet", err) + } + // Skip the status update when every mutate* above left status byte-identical to what // the previous reconcile already persisted. The mutators are the only code writing // pcs.Status here, so equality means there is nothing for the apiserver to store. @@ -91,6 +97,21 @@ func (r *Reconciler) mutateReplicas(ctx context.Context, logger logr.Logger, pcs return nil } +// mutateSelector publishes the label selector on the PodCliqueSet /scale subresource so HPAs can +// target the whole PodCliqueSet. PCS is the top-level scope, so the default labels are already +// the complete selector; no per-resource narrowing label like PodClique/PodCliqueScalingGroup is +// needed. +func mutateSelector(pcs *grovecorev1alpha1.PodCliqueSet) error { + selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ + MatchLabels: apicommon.GetDefaultLabelsForPodCliqueSetManagedResources(pcs.Name), + }) + if err != nil { + return fmt.Errorf("%w: failed to create label selector for PodCliqueSet %v", err, client.ObjectKeyFromObject(pcs)) + } + pcs.Status.Selector = ptr.To(selector.String()) + return nil +} + // pcsReplicaStats aggregates replica- and child-level update progress derived from a single // pass over the informer cache. type pcsReplicaStats struct { diff --git a/operator/internal/controller/podcliqueset/reconcilestatus_test.go b/operator/internal/controller/podcliqueset/reconcilestatus_test.go index 00a56b9cc..0fcc9e434 100644 --- a/operator/internal/controller/podcliqueset/reconcilestatus_test.go +++ b/operator/internal/controller/podcliqueset/reconcilestatus_test.go @@ -21,6 +21,7 @@ import ( "testing" "time" + apicommon "github.com/ai-dynamo/grove/operator/api/common" apicommonconstants "github.com/ai-dynamo/grove/operator/api/common/constants" configv1alpha1 "github.com/ai-dynamo/grove/operator/api/config/v1alpha1" grovecorev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1" @@ -32,6 +33,7 @@ import ( "github.com/stretchr/testify/require" apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" @@ -882,3 +884,43 @@ func pcsgName(replicaIndex int) string { return "" } } + +// TestMutateSelector verifies the /scale selector is always published for PodCliqueSet, scoped to +// resources managed by Grove for this PodCliqueSet (matched by `app.kubernetes.io/managed-by` and +// `app.kubernetes.io/part-of`). It also asserts the rendered selector parses back into a usable +// label selector that matches a Pod carrying the PCS-managed default labels. +func TestMutateSelector(t *testing.T) { + tests := []struct { + name string + existingSelector *string + }{ + {name: "publishes selector for fresh PodCliqueSet"}, + // PCS always rewrites Status.Selector, so a stale value from a previous reconcile must + // not survive into the new status. + {name: "overwrites a stale selector", existingSelector: ptr.To("app.kubernetes.io/part-of=stale-pcs")}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pcs := &grovecorev1alpha1.PodCliqueSet{ + ObjectMeta: metav1.ObjectMeta{Name: testPCSName}, + Status: grovecorev1alpha1.PodCliqueSetStatus{Selector: tt.existingSelector}, + } + + err := mutateSelector(pcs) + require.NoError(t, err) + require.NotNil(t, pcs.Status.Selector) + + assert.Contains(t, *pcs.Status.Selector, apicommon.LabelManagedByKey+"="+apicommon.LabelManagedByValue) + assert.Contains(t, *pcs.Status.Selector, apicommon.LabelPartOfKey+"="+testPCSName) + + parsed, err := labels.Parse(*pcs.Status.Selector) + require.NoError(t, err, "rendered selector must parse as a valid label selector") + podLabels := labels.Set{ + apicommon.LabelManagedByKey: apicommon.LabelManagedByValue, + apicommon.LabelPartOfKey: testPCSName, + } + assert.True(t, parsed.Matches(podLabels), "selector should match a Pod carrying the PCS-managed default labels") + }) + } +}