Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,24 @@ type ProfileProjectionConfig struct {
// StrictValidation rejects rules with profileDependency>0 but no profileDataRequired.
// Defaults to false (soft mode: log + metric only).
StrictValidation bool `mapstructure:"strictValidation"`
// ServerSideUserManagedMerge, when true, trusts the storage backend to merge
// user-managed (ug-) ApplicationProfile / NetworkNeighborhood into the
// consolidated ContainerProfile server-side (kubescape/storage#319 serves the
// already-merged CP on GET). node-agent then skips its redundant client-side
// ug- fetch + projection in the ContainerProfile cache.
//
// Default false preserves the client-side merge, so this flag must only be
// enabled once the storage floor that ships #319 is confirmed deployed:
// enabling it against pre-#319 storage would silently drop ug- exception
// enforcement (the merged CP isn't served, and node-agent no longer overlays
// ug- itself).
//
// Load-bearing assumption: storage#319 merges on GET only, NOT on List/Watch.
// node-agent's CP reads are GET-driven (storage.ProfileClient.GetContainerProfile),
// so the merged view is always observed. If CP reads ever move to a list-watch
// informer, the server-side merge is bypassed and ug- overlays disappear —
// re-evaluate this flag before doing so.
ServerSideUserManagedMerge bool `mapstructure:"serverSideUserManagedMergeEnabled"`
}

type EventDedupConfig struct {
Expand Down
15 changes: 14 additions & 1 deletion pkg/objectcache/containerprofilecache/containerprofilecache.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,13 @@ func (c *ContainerProfileCacheImpl) tryPopulateEntry(
// name returned by GetSlug(false). Until that aggregation runs the Get
// returns 404 — we record pending and the reconciler retries on each
// tick.
//
// When ServerSideUserManagedMerge is enabled, this GET also transparently
// returns the user-managed (ug-) overlay already merged in: storage#319's
// merged-first read path serves the merged CP when an overlay exists and
// falls back to the observed CP otherwise. That merge is GET-only (not
// List/Watch), and this cache is GET-driven, so the merged view is always
// observed. See config.ProfileProjectionConfig.ServerSideUserManagedMerge.
var (
cp *v1beta1.ContainerProfile
cpErr error
Expand All @@ -333,9 +340,15 @@ func (c *ContainerProfileCacheImpl) tryPopulateEntry(
// annotation and merged them on top of the base profile; we read them
// directly by their well-known name instead, avoiding a List and an
// annotation filter. Both are optional: nil on 404.
//
// Skipped entirely when ServerSideUserManagedMerge is enabled: the CP fetched
// above already has the ug- overlay merged in server-side (storage#319), so
// re-fetching and re-projecting it here would be redundant work. Leaving
// userManagedAP/NN nil makes the projection pass and RV bookkeeping below
// no-op naturally.
var userManagedAP *v1beta1.ApplicationProfile
var userManagedNN *v1beta1.NetworkNeighborhood
if workloadName != "" {
if !c.cfg.ProfileProjection.ServerSideUserManagedMerge && workloadName != "" {
ugName := helpersv1.UserApplicationProfilePrefix + workloadName
var ugAPErr error
_ = c.refreshRPC(ctx, func(rctx context.Context) error {
Expand Down
175 changes: 173 additions & 2 deletions pkg/objectcache/containerprofilecache/containerprofilecache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ type fakeProfileClient struct {
overlayOnly string

getCPCalls int
// ugAPCalls / ugNNCalls count GETs for "ug-"-prefixed names so tests can
// assert that the client-side user-managed fetch is (or isn't) issued.
ugAPCalls int
ugNNCalls int
}

var _ storage.ProfileClient = (*fakeProfileClient)(nil)
Expand All @@ -62,6 +66,7 @@ func TestShouldLogOptionalUserManagedFetchError(t *testing.T) {

func (f *fakeProfileClient) GetApplicationProfile(_ context.Context, _, name string) (*v1beta1.ApplicationProfile, error) {
if len(name) >= 3 && name[:3] == helpersv1.UserApplicationProfilePrefix {
f.ugAPCalls++
return f.userManagedAP, nil
}
if f.overlayOnly != "" && name != f.overlayOnly {
Expand All @@ -71,6 +76,7 @@ func (f *fakeProfileClient) GetApplicationProfile(_ context.Context, _, name str
}
func (f *fakeProfileClient) GetNetworkNeighborhood(_ context.Context, _, name string) (*v1beta1.NetworkNeighborhood, error) {
if len(name) >= 3 && name[:3] == helpersv1.UserNetworkNeighborhoodPrefix {
f.ugNNCalls++
return f.userManagedNN, nil
}
if f.overlayOnly != "" && name != f.overlayOnly {
Expand All @@ -89,14 +95,28 @@ func (f *fakeProfileClient) ListNetworkNeighborhoods(_ context.Context, _ string
return &v1beta1.NetworkNeighborhoodList{}, nil
}

// newTestCache returns a cache wired with an in-memory K8sObjectCacheMock.
// newTestCache returns a cache wired with an in-memory K8sObjectCacheMock and
// the default config (client-side ug- merge enabled).
func newTestCache(t *testing.T, client storage.ProfileClient) (*ContainerProfileCacheImpl, *objectcache.K8sObjectCacheMock) {
t.Helper()
return newTestCacheWithConfig(t, client, config.Config{ProfilesCacheRefreshRate: 30 * time.Second})
}

// newTestCacheWithConfig is newTestCache with a caller-supplied config, used by
// the ServerSideUserManagedMerge tests to flip ProfileProjection settings.
func newTestCacheWithConfig(t *testing.T, client storage.ProfileClient, cfg config.Config) (*ContainerProfileCacheImpl, *objectcache.K8sObjectCacheMock) {
t.Helper()
k8s := &objectcache.K8sObjectCacheMock{}
cfg := config.Config{ProfilesCacheRefreshRate: 30 * time.Second}
return NewContainerProfileCache(cfg, client, k8s, nil), k8s
}

// serverSideMergeConfig returns a config with ServerSideUserManagedMerge on.
func serverSideMergeConfig() config.Config {
cfg := config.Config{ProfilesCacheRefreshRate: 30 * time.Second}
cfg.ProfileProjection.ServerSideUserManagedMerge = true
return cfg
}

// primeSharedData stashes a WatchedContainerData so waitForSharedContainerData
// resolves instantly. It builds a real InstanceID from a pod because the cache
// code calls .GetOneTimeSlug and .GetTemplateHash on it.
Expand Down Expand Up @@ -331,6 +351,157 @@ func TestGetContainerProfile_Miss(t *testing.T) {
require.Error(t, state.Error)
}

// execSpec is the projection spec used by the ServerSideUserManagedMerge tests:
// project all execs so merged paths surface in Projected.Execs.Values.
func execSpec() objectcache.RuleProjectionSpec {
return objectcache.RuleProjectionSpec{
Execs: objectcache.FieldSpec{InUse: true, All: true},
Hash: "server-side-merge-test",
}
}

// TestServerSideMerge_SkipsUgFetch verifies that with ServerSideUserManagedMerge
// enabled, addContainer does NOT issue the client-side "ug-" AP/NN GETs and does
// NOT client-merge them. The ug- exception must instead arrive via the
// server-merged CP returned by GetContainerProfile.
func TestServerSideMerge_SkipsUgFetch(t *testing.T) {
// CP simulates storage#319's merged-first GET: /bin/base (observed) plus
// /bin/server-ug (already merged from the ug- overlay server-side).
cp := &v1beta1.ContainerProfile{
ObjectMeta: metav1.ObjectMeta{
Name: "cp-base",
Namespace: "default",
ResourceVersion: "1",
Annotations: map[string]string{
helpersv1.CompletionMetadataKey: helpersv1.Full,
helpersv1.StatusMetadataKey: helpersv1.Completed,
},
},
Spec: v1beta1.ContainerProfileSpec{
Execs: []v1beta1.ExecCalls{{Path: "/bin/base"}, {Path: "/bin/server-ug"}},
},
}
// This ug- AP must NEVER be fetched/merged client-side under the flag. Its
// distinctive exec (/bin/client-only) is the canary: if it shows up in the
// projection, the client-side merge wrongly ran.
userManagedAP := &v1beta1.ApplicationProfile{
ObjectMeta: metav1.ObjectMeta{Name: "ug-nginx", Namespace: "default", ResourceVersion: "9"},
Spec: v1beta1.ApplicationProfileSpec{
Containers: []v1beta1.ApplicationProfileContainer{{
Name: "nginx",
Execs: []v1beta1.ExecCalls{{Path: "/bin/client-only"}},
}},
},
}
client := &fakeProfileClient{cp: cp, userManagedAP: userManagedAP}
c, k8s := newTestCacheWithConfig(t, client, serverSideMergeConfig())
c.SetProjectionSpec(execSpec())

id := "container-server-side"
primeSharedData(t, k8s, id, "wlid://cluster-a/namespace-default/deployment-nginx")
require.NoError(t, c.addContainer(eventContainer(id), context.Background()))

cached := c.GetProjectedContainerProfile(id)
require.NotNil(t, cached)
_, hasBase := cached.Execs.Values["/bin/base"]
_, hasServerUg := cached.Execs.Values["/bin/server-ug"]
_, hasClientOnly := cached.Execs.Values["/bin/client-only"]
assert.True(t, hasBase, "base CP exec must be present")
assert.True(t, hasServerUg, "server-merged ug- exec must be present (came via the CP GET)")
assert.False(t, hasClientOnly, "client-side ug- merge must NOT run under ServerSideUserManagedMerge")

assert.Equal(t, 0, client.ugAPCalls, "no client-side ug- AP fetch under the flag")
assert.Equal(t, 0, client.ugNNCalls, "no client-side ug- NN fetch under the flag")

entry, ok := c.entries.Load(id)
require.True(t, ok)
assert.Empty(t, entry.UserManagedAPRV, "UserManagedAPRV must stay empty under the flag")
assert.Empty(t, entry.UserManagedNNRV, "UserManagedNNRV must stay empty under the flag")
}

// TestServerSideMerge_LabelOverlayStillApplies verifies that the label-driven
// user-defined overlay (pass 2) is unaffected by ServerSideUserManagedMerge:
// it is a separate, still-client-side feature.
func TestServerSideMerge_LabelOverlayStillApplies(t *testing.T) {
cp := &v1beta1.ContainerProfile{
ObjectMeta: metav1.ObjectMeta{
Name: "cp-1", Namespace: "default", ResourceVersion: "1",
Annotations: map[string]string{helpersv1.StatusMetadataKey: helpersv1.Completed},
},
Spec: v1beta1.ContainerProfileSpec{Execs: []v1beta1.ExecCalls{{Path: "/bin/base"}}},
}
userAP := &v1beta1.ApplicationProfile{
ObjectMeta: metav1.ObjectMeta{Name: "override", Namespace: "default", ResourceVersion: "u1"},
Spec: v1beta1.ApplicationProfileSpec{
Containers: []v1beta1.ApplicationProfileContainer{{
Name: "nginx",
Execs: []v1beta1.ExecCalls{{Path: "/bin/overlay"}},
}},
},
}
client := &fakeProfileClient{cp: cp, ap: userAP, overlayOnly: "override"}
c, k8s := newTestCacheWithConfig(t, client, serverSideMergeConfig())
c.SetProjectionSpec(execSpec())

id := "container-overlay-flagged"
primeSharedData(t, k8s, id, "wlid://cluster-a/namespace-default/deployment-nginx")
ev := eventContainer(id)
ev.K8s.PodLabels = map[string]string{helpersv1.UserDefinedProfileMetadataKey: "override"}
require.NoError(t, c.addContainer(ev, context.Background()))

cached := c.GetProjectedContainerProfile(id)
require.NotNil(t, cached)
_, hasBase := cached.Execs.Values["/bin/base"]
_, hasOverlay := cached.Execs.Values["/bin/overlay"]
assert.True(t, hasBase, "base CP exec must be present")
assert.True(t, hasOverlay, "label-driven user-defined overlay must still merge under the flag")

assert.Equal(t, 0, client.ugAPCalls, "label overlay must not trigger ug- fetches")
entry, ok := c.entries.Load(id)
require.True(t, ok)
require.NotNil(t, entry.UserAPRef, "user-defined overlay ref must be recorded")
assert.Equal(t, "override", entry.UserAPRef.Name)
}

// TestClientSideMerge_DefaultFetchesUg pins the default (flag-off) behavior:
// the client-side ug- fetch IS issued and merged. This is the safety baseline
// that ServerSideUserManagedMerge opts out of.
func TestClientSideMerge_DefaultFetchesUg(t *testing.T) {
cp := &v1beta1.ContainerProfile{
ObjectMeta: metav1.ObjectMeta{
Name: "cp-base", Namespace: "default", ResourceVersion: "1",
Annotations: map[string]string{helpersv1.StatusMetadataKey: helpersv1.Completed},
},
Spec: v1beta1.ContainerProfileSpec{Execs: []v1beta1.ExecCalls{{Path: "/bin/base"}}},
}
userManagedAP := &v1beta1.ApplicationProfile{
ObjectMeta: metav1.ObjectMeta{Name: "ug-nginx", Namespace: "default", ResourceVersion: "9"},
Spec: v1beta1.ApplicationProfileSpec{
Containers: []v1beta1.ApplicationProfileContainer{{
Name: "nginx",
Execs: []v1beta1.ExecCalls{{Path: "/bin/client-merged"}},
}},
},
}
client := &fakeProfileClient{cp: cp, userManagedAP: userManagedAP}
c, k8s := newTestCache(t, client) // default config: flag OFF
c.SetProjectionSpec(execSpec())

id := "container-default"
primeSharedData(t, k8s, id, "wlid://cluster-a/namespace-default/deployment-nginx")
require.NoError(t, c.addContainer(eventContainer(id), context.Background()))

cached := c.GetProjectedContainerProfile(id)
require.NotNil(t, cached)
_, hasClientMerged := cached.Execs.Values["/bin/client-merged"]
assert.True(t, hasClientMerged, "default behavior must client-merge the ug- AP")
assert.Greater(t, client.ugAPCalls, 0, "default behavior must fetch the ug- AP")

entry, ok := c.entries.Load(id)
require.True(t, ok)
assert.Equal(t, "9", entry.UserManagedAPRV, "default behavior records UserManagedAPRV")
}

// TestStorageError_NoEntry ensures storage errors don't panic and don't
// populate a cache entry.
func TestStorageError_NoEntry(t *testing.T) {
Expand Down
8 changes: 7 additions & 1 deletion pkg/objectcache/containerprofilecache/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,9 +330,15 @@ func (c *ContainerProfileCacheImpl) refreshOneEntry(ctx context.Context, id stri
helpers.String("status", cp.Annotations[helpersv1.StatusMetadataKey]))
return
}
// User-managed (ug-) re-fetch. Skipped when ServerSideUserManagedMerge is
// enabled: the CP re-fetched above already carries the merged ug- overlay
// (storage#319), so leaving these nil makes ladder pass #1 in
// rebuildEntryFromSources a no-op and the RV fast-skip below treats them as
// "still absent" (rvsMatchAP(nil, "") == true) since UserManagedAPRV/NNRV are
// never set under this flag.
var userManagedAP *v1beta1.ApplicationProfile
var userManagedNN *v1beta1.NetworkNeighborhood
if e.WorkloadName != "" {
if !c.cfg.ProfileProjection.ServerSideUserManagedMerge && e.WorkloadName != "" {
ugAPName := helpersv1.UserApplicationProfilePrefix + e.WorkloadName
var userManagedAPErr error
_ = c.refreshRPC(ctx, func(rctx context.Context) error {
Expand Down
58 changes: 58 additions & 0 deletions pkg/objectcache/containerprofilecache/reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1353,6 +1353,64 @@ func TestUserManagedProfileMerged(t *testing.T) {
assert.Equal(t, "9", entry.UserManagedAPRV, "UserManagedAPRV recorded at add time")
}

// TestServerSideMerge_RefreshSkipsUgRefetch verifies the refresh path honors
// ServerSideUserManagedMerge: when a cached entry is refreshed, the reconciler
// re-fetches the (server-merged) CP but does NOT re-fetch the ug- AP/NN. A CP
// RV bump forces a rebuild so we exercise the full ladder, not just the
// fast-skip.
func TestServerSideMerge_RefreshSkipsUgRefetch(t *testing.T) {
cp := &v1beta1.ContainerProfile{
ObjectMeta: metav1.ObjectMeta{
Name: "cp", Namespace: "default", ResourceVersion: "200", // bumped vs seeded RV=100
Annotations: map[string]string{helpersv1.StatusMetadataKey: helpersv1.Completed},
},
Spec: v1beta1.ContainerProfileSpec{Execs: []v1beta1.ExecCalls{{Path: "/bin/base"}, {Path: "/bin/server-ug"}}},
}
userManagedAP := &v1beta1.ApplicationProfile{
ObjectMeta: metav1.ObjectMeta{Name: "ug-nginx", Namespace: "default", ResourceVersion: "9"},
Spec: v1beta1.ApplicationProfileSpec{
Containers: []v1beta1.ApplicationProfileContainer{{
Name: "nginx",
Execs: []v1beta1.ExecCalls{{Path: "/bin/client-only"}},
}},
},
}
client := &fakeProfileClient{cp: cp, userManagedAP: userManagedAP}
k8s := &objectcache.K8sObjectCacheMock{}
c := NewContainerProfileCache(serverSideMergeConfig(), client, k8s, nil)
c.SetProjectionSpec(execSpec())

id := "c1"
c.entries.Set(id, &CachedContainerProfile{
Projected: Apply(nil, cp, nil),
State: &objectcache.ProfileState{Name: cp.Name},
ContainerName: "nginx",
PodName: "nginx-abc",
Namespace: "default",
PodUID: "uid-1",
CPName: "cp",
WorkloadName: "nginx", // would drive ug- lookups when the flag is OFF
RV: "100", // stale vs CP RV=200 → forces rebuild
})

c.refreshAllEntries(context.Background())

assert.Equal(t, 0, client.ugAPCalls, "refresh must not re-fetch ug- AP under the flag")
assert.Equal(t, 0, client.ugNNCalls, "refresh must not re-fetch ug- NN under the flag")

cached := c.GetProjectedContainerProfile(id)
require.NotNil(t, cached)
_, hasServerUg := cached.Execs.Values["/bin/server-ug"]
_, hasClientOnly := cached.Execs.Values["/bin/client-only"]
assert.True(t, hasServerUg, "rebuilt projection must include the server-merged ug- exec")
assert.False(t, hasClientOnly, "rebuilt projection must NOT include client-only ug- exec")

entry, ok := c.entries.Load(id)
require.True(t, ok)
assert.Equal(t, "200", entry.RV, "rebuild picked up the new CP RV")
assert.Empty(t, entry.UserManagedAPRV, "UserManagedAPRV stays empty under the flag")
}

// TestSpecChange_TriggersReprojection — T5 nudge integration.
//
// After SetProjectionSpec is called with a new spec, RefreshAllEntriesForTest
Expand Down
5 changes: 5 additions & 0 deletions tests/chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ nodeAgent:
profileProjection:
detailedMetricsEnabled: true
strictValidation: false
# Skip node-agent's client-side user-managed (ug-) AP/NN merge and rely on
# storage#319's server-side merge (served already-merged on ContainerProfile
# GET). Keep false until the storage version that ships #319 is deployed:
# enabling it against older storage drops ug- exception enforcement.
serverSideUserManagedMergeEnabled: false

serviceMonitor:
enabled: true
Expand Down
Loading