diff --git a/operator/internal/controller/podclique/components/resourceclaim/resourceclaim.go b/operator/internal/controller/podclique/components/resourceclaim/resourceclaim.go index 2bb362709..25a6bce6b 100644 --- a/operator/internal/controller/podclique/components/resourceclaim/resourceclaim.go +++ b/operator/internal/controller/podclique/components/resourceclaim/resourceclaim.go @@ -31,6 +31,7 @@ import ( "github.com/go-logr/logr" resourcev1 "k8s.io/api/resource/v1" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -64,6 +65,9 @@ func (r _resource) GetExistingResourceNames(ctx context.Context, _ logr.Logger, client.InNamespace(pclqObjMeta.Namespace), client.MatchingLabels(pclqResourceClaimLabels(pclqObjMeta)), ); err != nil { + if meta.IsNoMatchError(err) { + return nil, nil + } return nil, groveerr.WrapError(err, errSyncPCLQLevelRC, component.OperationGetExistingResourceNames, @@ -188,12 +192,16 @@ func pclqResourceClaimLabels(pclqObjMeta metav1.ObjectMeta) map[string]string { // This is required because the PCLQ finalizer's verifyNoResourcesAwaitsCleanup // blocks finalizer removal until all owned resources are gone; relying solely on // GC would create a deadlock since GC only fires after the PCLQ is fully deleted. -func (r _resource) Delete(ctx context.Context, _ logr.Logger, pclqObjMeta metav1.ObjectMeta) error { +func (r _resource) Delete(ctx context.Context, logger logr.Logger, pclqObjMeta metav1.ObjectMeta) error { labels := pclqResourceClaimLabels(pclqObjMeta) if err := r.client.DeleteAllOf(ctx, &resourcev1.ResourceClaim{}, client.InNamespace(pclqObjMeta.Namespace), client.MatchingLabels(labels), ); err != nil { + if meta.IsNoMatchError(err) { + logger.V(1).Info("ResourceClaim API not served by cluster, skipping delete", "namespace", pclqObjMeta.Namespace, "podClique", pclqObjMeta.Name, "err", err.Error()) + return nil + } return groveerr.WrapError(err, errDeletePCLQLevelRC, component.OperationDelete, diff --git a/operator/internal/controller/podclique/components/resourceclaim/resourceclaim_test.go b/operator/internal/controller/podclique/components/resourceclaim/resourceclaim_test.go index d06336b40..70e52d065 100644 --- a/operator/internal/controller/podclique/components/resourceclaim/resourceclaim_test.go +++ b/operator/internal/controller/podclique/components/resourceclaim/resourceclaim_test.go @@ -26,12 +26,27 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" resourcev1 "k8s.io/api/resource/v1" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/client/interceptor" ) +// noResourceClaimAPIInterceptor simulates a cluster whose apiserver does not +// serve resource.k8s.io ResourceClaim by returning a NoKindMatchError. +var noResourceClaimAPIInterceptor = interceptor.Funcs{ + List: func(_ context.Context, _ client.WithWatch, _ client.ObjectList, _ ...client.ListOption) error { + return &meta.NoKindMatchError{GroupKind: schema.GroupKind{Group: resourcev1.GroupName, Kind: "ResourceClaim"}} + }, + DeleteAllOf: func(_ context.Context, _ client.WithWatch, _ client.Object, _ ...client.DeleteAllOfOption) error { + return &meta.NoKindMatchError{GroupKind: schema.GroupKind{Group: resourcev1.GroupName, Kind: "ResourceClaim"}} + }, +} + func newTestScheme() *runtime.Scheme { scheme := runtime.NewScheme() _ = grovecorev1alpha1.AddToScheme(scheme) @@ -191,6 +206,15 @@ func TestGetExistingResourceNames(t *testing.T) { require.NoError(t, err) assert.Equal(t, []string{pclqName + "-all-gpu-mps"}, names) }) + + t.Run("returns empty when ResourceClaim API is not served", func(t *testing.T) { + cl := fake.NewClientBuilder().WithScheme(scheme).WithInterceptorFuncs(noResourceClaimAPIInterceptor).Build() + r := _resource{client: cl, scheme: scheme} + + names, err := r.GetExistingResourceNames(context.Background(), logr.Discard(), pclqMeta) + require.NoError(t, err) + assert.Empty(t, names) + }) } // --- Sync --- @@ -387,4 +411,17 @@ func TestDelete(t *testing.T) { err := r.Delete(context.Background(), logr.Discard(), pclqMeta) require.NoError(t, err) }) + + t.Run("no-op when ResourceClaim API is not served", func(t *testing.T) { + pclqMeta := metav1.ObjectMeta{ + Name: pclqName, + Namespace: namespace, + Labels: map[string]string{apicommon.LabelPartOfKey: pcsName}, + } + cl := fake.NewClientBuilder().WithScheme(scheme).WithInterceptorFuncs(noResourceClaimAPIInterceptor).Build() + r := _resource{client: cl, scheme: scheme} + + err := r.Delete(context.Background(), logr.Discard(), pclqMeta) + require.NoError(t, err) + }) }