diff --git a/go.mod b/go.mod index 3e1d3709e..d34fd6015 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ go 1.25.8 require ( github.com/SergJa/jsonhash v0.0.0-20210531165746-fc45f346aa74 github.com/anchore/syft v1.42.3 - github.com/armosec/armoapi-go v0.0.696 + github.com/armosec/armoapi-go v0.0.719 github.com/armosec/utils-k8s-go v0.0.30 github.com/containers/common v0.63.0 github.com/deckarep/golang-set/v2 v2.7.0 diff --git a/go.sum b/go.sum index 2fc66f055..ea95f56ec 100644 --- a/go.sum +++ b/go.sum @@ -100,8 +100,8 @@ github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmV github.com/armon/go-metrics v0.3.10/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/armosec/armoapi-go v0.0.696 h1:+0Ll7y4oWNaKEO47qbGDFIQLxkSJeKYzylS0FwI84XE= -github.com/armosec/armoapi-go v0.0.696/go.mod h1:9jAH0g8ZsryhiBDd/aNMX4+n10bGwTx/doWCyyjSxts= +github.com/armosec/armoapi-go v0.0.719 h1:eo35KTOPS0vM3asLfDONwNScZ+1FRhUprpNWs2czXCM= +github.com/armosec/armoapi-go v0.0.719/go.mod h1:9jAH0g8ZsryhiBDd/aNMX4+n10bGwTx/doWCyyjSxts= github.com/armosec/gojay v1.2.17 h1:VSkLBQzD1c2V+FMtlGFKqWXNsdNvIKygTKJI9ysY8eM= github.com/armosec/gojay v1.2.17/go.mod h1:vuvX3DlY0nbVrJ0qCklSS733AWMoQboq3cFyuQW9ybc= github.com/armosec/utils-go v0.0.58 h1:g9RnRkxZAmzTfPe2ruMo2OXSYLwVSegQSkSavOfmaIE= diff --git a/main.go b/main.go index 2a4147640..5a50a724a 100644 --- a/main.go +++ b/main.go @@ -107,8 +107,18 @@ func main() { cleanupHandler := file.NewResourcesCleanupHandler(osFs, file.DefaultStorageRoot, pool, watchDispatcher, cfg.CleanupInterval, cfg.DefaultNamespace, kubernetesAPI, relevancyEnabled) go cleanupHandler.RunCleanupTask(ctx) + // Shared open-protection store: seeded from static config and, when an + // operator-managed ConfigMap is configured, kept in sync by a reloader so + // rule-binding changes adjust the pinned sensitive prefixes without a restart. + openProtectionStore := file.NewOpenProtectionStore(cfg.ProtectedOpenMatchers) + if cfg.OpenProtectionConfigMapName != "" { + reloader := file.NewOpenProtectionReloader(client, cfg.DefaultNamespace, cfg.OpenProtectionConfigMapName, cfg.OpenProtectionRefreshInterval, openProtectionStore) + go reloader.Run(ctx) + } + // start the server options := server.NewWardleServerOptions(os.Stdout, os.Stderr, osFs, pool, cfg, watchDispatcher, cleanupHandler) + options.OpenProtectionStore = openProtectionStore cmd := server.NewCommandStartWardleServer(ctx, options, false) logger.L().Info("APIServer starting") code := cli.Run(cmd) diff --git a/pkg/apiserver/apiserver.go b/pkg/apiserver/apiserver.go index 1ee303514..7d96f883c 100644 --- a/pkg/apiserver/apiserver.go +++ b/pkg/apiserver/apiserver.go @@ -82,11 +82,12 @@ func init() { // ExtraConfig holds custom apiserver config type ExtraConfig struct { - CleanupHandler *file.ResourcesCleanupHandler - OsFs afero.Fs - Pool *sqlitemigration.Pool - StorageConfig config.Config - WatchDispatcher *file.WatchDispatcher + CleanupHandler *file.ResourcesCleanupHandler + OsFs afero.Fs + Pool *sqlitemigration.Pool + StorageConfig config.Config + WatchDispatcher *file.WatchDispatcher + OpenProtectionStore *file.OpenProtectionStore } // Config defines the config for the apiserver @@ -143,7 +144,7 @@ func (c completedConfig) New() (*WardleServer, error) { storageImpl = file.NewStorageImpl(c.ExtraConfig.OsFs, file.DefaultStorageRoot, c.ExtraConfig.Pool, c.ExtraConfig.WatchDispatcher, Scheme) applicationProfileStorageImpl = file.NewApplicationProfileStorage(file.NewStorageImplWithCollector(c.ExtraConfig.OsFs, file.DefaultStorageRoot, c.ExtraConfig.Pool, c.ExtraConfig.WatchDispatcher, Scheme, file.NewApplicationProfileProcessor(c.ExtraConfig.StorageConfig))) - containerProfileStorageImpl = file.NewContainerProfileRESTStorage(file.NewStorageImplWithCollector(c.ExtraConfig.OsFs, file.DefaultStorageRoot, c.ExtraConfig.Pool, c.ExtraConfig.WatchDispatcher, Scheme, file.NewContainerProfileProcessor(c.ExtraConfig.StorageConfig, c.ExtraConfig.CleanupHandler))) + containerProfileStorageImpl = file.NewContainerProfileRESTStorage(file.NewStorageImplWithCollector(c.ExtraConfig.OsFs, file.DefaultStorageRoot, c.ExtraConfig.Pool, c.ExtraConfig.WatchDispatcher, Scheme, file.NewContainerProfileProcessor(c.ExtraConfig.StorageConfig, c.ExtraConfig.CleanupHandler, c.ExtraConfig.OpenProtectionStore))) networkNeighborhoodStorageImpl = file.NewNetworkNeighborhoodStorage(file.NewStorageImplWithCollector(c.ExtraConfig.OsFs, file.DefaultStorageRoot, c.ExtraConfig.Pool, c.ExtraConfig.WatchDispatcher, Scheme, file.NewNetworkNeighborhoodProcessor(c.ExtraConfig.StorageConfig))) configScanStorageImpl = file.NewConfigurationScanSummaryStorage(storageImpl) vulnerabilitySummaryStorage = file.NewVulnerabilitySummaryStorage(storageImpl) diff --git a/pkg/cmd/server/start.go b/pkg/cmd/server/start.go index a84344bcd..75fe0805f 100644 --- a/pkg/cmd/server/start.go +++ b/pkg/cmd/server/start.go @@ -71,11 +71,12 @@ type WardleServerOptions struct { AlternateDNS []string - CleanupHandler *file.ResourcesCleanupHandler - OsFs afero.Fs - Pool *sqlitemigration.Pool - StorageConfig config.Config - WatchDispatcher *file.WatchDispatcher + CleanupHandler *file.ResourcesCleanupHandler + OsFs afero.Fs + Pool *sqlitemigration.Pool + StorageConfig config.Config + WatchDispatcher *file.WatchDispatcher + OpenProtectionStore *file.OpenProtectionStore } func WardleVersionToKubeVersion(ver *version.Version) *version.Version { @@ -288,11 +289,12 @@ func (o *WardleServerOptions) Config() (*apiserver.Config, error) { c := &apiserver.Config{ GenericConfig: serverConfig, ExtraConfig: apiserver.ExtraConfig{ - CleanupHandler: o.CleanupHandler, - OsFs: o.OsFs, - Pool: o.Pool, - StorageConfig: o.StorageConfig, - WatchDispatcher: o.WatchDispatcher, + CleanupHandler: o.CleanupHandler, + OsFs: o.OsFs, + Pool: o.Pool, + StorageConfig: o.StorageConfig, + WatchDispatcher: o.WatchDispatcher, + OpenProtectionStore: o.OpenProtectionStore, }, } return c, nil diff --git a/pkg/config/config.go b/pkg/config/config.go index 9239f2915..01374ac0a 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -38,6 +38,24 @@ type Config struct { DefaultWorkerCount int `mapstructure:"defaultWorkerCount"` DefaultMaxObjectSize int `mapstructure:"defaultMaxObjectSize"` + // ProtectedOpenMatchers is the union of sensitive open matchers + // (exact/prefix/suffix/contains) declared by the active rules' + // profileDataRequired.opens. The container-profile processor pins these + // (and their ancestors) to literal during deflation so anomaly rules such + // as R0010 keep working. In cluster this is populated by the operator/helm + // from the versioned rule library (armotypes.UnionOpenProtection); the + // zero value preserves legacy collapse behaviour. + ProtectedOpenMatchers armotypes.OpenMatchers `mapstructure:"protectedOpenMatchers"` + + // OpenProtectionConfigMapName, when non-empty, names a ConfigMap (in + // DefaultNamespace) that the operator keeps in sync with the union of active + // rules' profileDataRequired.opens. The apiserver polls it every + // OpenProtectionRefreshInterval and refreshes the container-profile + // processor's protection, so rule-binding changes take effect without a + // restart. Empty disables the reader and falls back to ProtectedOpenMatchers. + OpenProtectionConfigMapName string `mapstructure:"openProtectionConfigMapName"` + OpenProtectionRefreshInterval time.Duration `mapstructure:"openProtectionRefreshInterval"` + // Debugging QueueManagerEnabled bool `mapstructure:"queueManagerEnabled"` QueueTimeoutPrint bool `mapstructure:"queueTimeoutPrint"` @@ -66,6 +84,7 @@ func LoadConfig(path string) (Config, error) { v.SetDefault("queueTimeoutPrint", false) v.SetDefault("queueTimeout", 60) v.SetDefault("queueProcessingStatsPrint", false) + v.SetDefault("openProtectionRefreshInterval", time.Minute) v.SetDefault("kindQueues", map[string]KindQueueConfig{ "applicationprofiles": { QueueLength: 50, diff --git a/pkg/registry/file/containerprofile_processor.go b/pkg/registry/file/containerprofile_processor.go index f08920afd..7422c7683 100644 --- a/pkg/registry/file/containerprofile_processor.go +++ b/pkg/registry/file/containerprofile_processor.go @@ -45,13 +45,26 @@ type ContainerProfileProcessor struct { MaxContainerProfileSize int ContainerProfileStorage ContainerProfileStorage ConsolidatedSlugChannel chan ConsolidatedSlugData + // protection holds the active union of sensitive open matchers (exact/prefix/ + // suffix/contains) declared by active rules' profileDataRequired.opens. Matched + // prefixes (and their ancestors) are pinned to literal during deflation so + // rules like R0010 keep working. It is read on every PreSave via Get and + // refreshed out-of-band by an OpenProtectionReloader (in cluster, fed from the + // operator-published ConfigMap); the zero value preserves legacy collapse + // behaviour. + protection *OpenProtectionStore } -func NewContainerProfileProcessor(cfg config.Config, cleanupHandler *ResourcesCleanupHandler) *ContainerProfileProcessor { +func NewContainerProfileProcessor(cfg config.Config, cleanupHandler *ResourcesCleanupHandler, protection *OpenProtectionStore) *ContainerProfileProcessor { hostType := cfg.HostType if hostType == "" { hostType = armotypes.HostTypeKubernetes } + if protection == nil { + // Seed from static config when no shared store is injected (e.g. backend + // callers and tests that don't run a reloader). + protection = NewOpenProtectionStore(cfg.ProtectedOpenMatchers) + } return &ContainerProfileProcessor{ CleanupHandler: cleanupHandler, CleanupInterval: cfg.CleanupInterval, @@ -60,6 +73,28 @@ func NewContainerProfileProcessor(cfg config.Config, cleanupHandler *ResourcesCl HostType: hostType, Interval: 30 * time.Second, MaxContainerProfileSize: cfg.MaxApplicationProfileSize, + protection: protection, + } +} + +// ProtectionStore returns the processor's shared open-protection store so callers +// (e.g. main wiring a reloader) can refresh it after construction. +func (a *ContainerProfileProcessor) ProtectionStore() *OpenProtectionStore { + return a.protection +} + +// OpenProtectionFromMatchers converts the shared armoapi-go matcher union into +// the analyzer's collapse-protection input. It is the single conversion point +// reused by every environment: in cluster (NewContainerProfileProcessor, from +// config) and the backend (postgres-connector, from rules it loads out of +// MongoDB via armotypes.UnionOpenProtection). Keeping it here means callers only +// need armotypes + this package, not the low-level dynamicpathdetector. +func OpenProtectionFromMatchers(m armotypes.OpenMatchers) dynamicpathdetector.OpenProtection { + return dynamicpathdetector.OpenProtection{ + Exact: m.Exact, + Prefix: m.Prefix, + Suffix: m.Suffix, + Contains: m.Contains, } } @@ -178,7 +213,7 @@ func (a *ContainerProfileProcessor) PreSave(ctx context.Context, object runtime. } else { logger.L().Debug("ContainerProfileProcessor.PreSave - failed to get sbom name", loggerhelpers.Error(err), loggerhelpers.String("imageTag", profile.Spec.ImageTag), loggerhelpers.String("imageID", profile.Spec.ImageID)) } - profile.Spec = DeflateContainerProfileSpec(profile.Spec, sbomSet) + profile.Spec = DeflateContainerProfileSpec(profile.Spec, sbomSet, a.protection.Get()) size += len(profile.Spec.Execs) size += len(profile.Spec.Opens) size += len(profile.Spec.Syscalls) @@ -807,8 +842,27 @@ func (a *ContainerProfileProcessor) getAggregatedData(ctx context.Context, key s return status, completion, hash } -func DeflateContainerProfileSpec(container softwarecomposition.ContainerProfileSpec, sbomSet mapset.Set[string]) softwarecomposition.ContainerProfileSpec { - opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, dynamicpathdetector.NewPathAnalyzer(OpenDynamicThreshold), sbomSet) +// DeflateContainerProfileSpec generalises a profile's high-cardinality fields +// (opens, endpoints, …) so stored profiles stay bounded. openProtection is the +// union of sensitive open matchers that rules depend on (their +// profileDataRequired.opens: exact/prefix/suffix/contains). The open analyzer +// pins the matched prefixes and their ancestors to literal, so they are never +// folded into a wildcard such as /etc/⋯ or /⋯/⋯. That keeps anomaly rules like +// R0010 able to distinguish a never-before-seen sensitive path from a +// generalised one. The matcher set is sourced per-environment (rules CRD +// in-cluster, MongoDB in the backend) but applied here via the same shared +// strategy. Pass a zero OpenProtection to disable protection (legacy behaviour). +func DeflateContainerProfileSpec(container softwarecomposition.ContainerProfileSpec, sbomSet mapset.Set[string], openProtection dynamicpathdetector.OpenProtection) softwarecomposition.ContainerProfileSpec { + var protectedPrefixes []string + if !openProtection.Empty() { + openPaths := make([]string, len(container.Opens)) + for i := range container.Opens { + openPaths[i] = container.Opens[i].Path + } + protectedPrefixes = openProtection.ProtectedPrefixes(openPaths) + } + openAnalyzer := dynamicpathdetector.NewPathAnalyzerWithConfigsAndProtection(OpenDynamicThreshold, nil, protectedPrefixes) + opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, openAnalyzer, sbomSet) if err != nil { logger.L().Debug("ContainerProfileProcessor.deflateContainerProfileSpec - falling back to DeflateStringer for opens", loggerhelpers.Error(err)) opens = DeflateStringer(container.Opens) diff --git a/pkg/registry/file/dynamicpathdetector/analyzer.go b/pkg/registry/file/dynamicpathdetector/analyzer.go index 65cfc1443..fe78a746e 100644 --- a/pkg/registry/file/dynamicpathdetector/analyzer.go +++ b/pkg/registry/file/dynamicpathdetector/analyzer.go @@ -34,14 +34,154 @@ func NewPathAnalyzer(threshold int) *PathAnalyzer { // configs is copied so the caller can reuse or mutate the slice without // affecting the analyzer. func NewPathAnalyzerWithConfigs(defaultThreshold int, configs []CollapseConfig) *PathAnalyzer { + return NewPathAnalyzerWithConfigsAndProtection(defaultThreshold, configs, nil) +} + +// NewPathAnalyzerWithConfigsAndProtection is NewPathAnalyzerWithConfigs plus a +// set of rule-protected prefixes. Any trie node that lies on the ancestor +// chain of — or within the subtree of — a protected prefix is pinned to +// literal entries (never collapsed to ⋯/*), regardless of threshold. This is +// the shared strategy behind rule-aware collapse: callers compute the protected +// prefix set from their own rule source (CRD in-cluster, MongoDB in the +// backend) and feed it in here, so a sensitive path declared by a rule's +// profileDataRequired (e.g. opens prefix "/etc/shadow") survives generalisation +// and remains exactly matchable. protected is normalised and copied. +func NewPathAnalyzerWithConfigsAndProtection(defaultThreshold int, configs []CollapseConfig, protected []string) *PathAnalyzer { copied := make([]CollapseConfig, len(configs)) copy(copied, configs) + pinAncestors, protectedRoots := buildProtectionIndex(NormalizeProtectedPrefixes(protected)) return &PathAnalyzer{ - RootNodes: make(map[string]*SegmentNode), - threshold: defaultThreshold, - configs: copied, - defaultCfg: CollapseConfig{Prefix: "/", Threshold: defaultThreshold}, + RootNodes: make(map[string]*SegmentNode), + threshold: defaultThreshold, + configs: copied, + defaultCfg: CollapseConfig{Prefix: "/", Threshold: defaultThreshold}, + pinAncestors: pinAncestors, + protectedRoots: protectedRoots, + } +} + +// buildProtectionIndex precomputes the lookup sets protectedNode uses from the +// normalised protected prefixes. Returns (nil, nil) when there is no protection. +func buildProtectionIndex(prefixes []string) (pinAncestors, protectedRoots map[string]struct{}) { + if len(prefixes) == 0 { + return nil, nil + } + pinAncestors = make(map[string]struct{}) + protectedRoots = make(map[string]struct{}, len(prefixes)) + for _, p := range prefixes { + protectedRoots[p] = struct{}{} + pinAncestors["/"] = struct{}{} + for i := 1; i < len(p); i++ { + if p[i] == '/' { + pinAncestors[p[:i]] = struct{}{} // each intermediate dir prefix + } + } + pinAncestors[p] = struct{}{} // and the prefix itself + } + return pinAncestors, protectedRoots +} + +// NormalizeProtectedPrefixes cleans caller-supplied protected prefixes into the +// boundary-comparable form protectedNode expects: absolute, path.Clean'd, no +// trailing slash (except root). Empty/relative entries are dropped. Exported so +// each environment's wiring can reuse the exact same normalisation. +func NormalizeProtectedPrefixes(prefixes []string) []string { + if len(prefixes) == 0 { + return nil + } + out := make([]string, 0, len(prefixes)) + seen := make(map[string]struct{}, len(prefixes)) + for _, p := range prefixes { + if p == "" || p[0] != '/' { + continue // must be absolute; ignore garbage rather than mis-pin + } + c := path.Clean(p) + if _, ok := seen[c]; ok { + continue + } + seen[c] = struct{}{} + out = append(out, c) + } + return out +} + +// protectedNode reports whether the trie node at pathPrefix must be pinned to +// literal (never collapsed). True when pathPrefix is an ancestor-or-equal of a +// protected prefix (so no wildcard forms ABOVE/AT the sensitive level, e.g. +// pinning "/" and "/etc" stops /⋯ and /etc/⋯) OR a descendant-or-equal of one +// (so the sensitive subtree itself stays literal). pathPrefix arrives as a path +// walked so far (possibly with a trailing slash, e.g. "/etc/"); the empty +// string denotes the root boundary and is treated as an ancestor of everything. +func (ua *PathAnalyzer) protectedNode(pathPrefix string) bool { + if len(ua.pinAncestors) == 0 { + return false + } + norm := normalizeNodePath(pathPrefix) // "" and "/foo/" → "/" and "/foo" + // Case 1: norm is an ancestor-or-self of a protected prefix — O(1). + if _, ok := ua.pinAncestors[norm]; ok { + return true } + // Fast reject: if norm's top-level dir isn't even an ancestor of a protected + // prefix, norm cannot lie inside a protected subtree. Makes the common node + // (e.g. under /usr or /proc) O(1). + if _, ok := ua.pinAncestors[topDir(norm)]; !ok { + return false + } + // Case 2: norm is inside a protected subtree — walk its proper ancestors and + // check against the (small) set of protected roots. Bounded by path depth, + // independent of the number of protected prefixes. + for cur := parentPath(norm); ; cur = parentPath(cur) { + if _, ok := ua.protectedRoots[cur]; ok { + return true + } + if cur == "/" { + return false + } + } +} + +// normalizeNodePath maps a walked path prefix to its boundary-comparable form: +// the empty string (root boundary) becomes "/", and trailing slashes are +// stripped (e.g. "/etc/" → "/etc"). +func normalizeNodePath(p string) string { + if p == "" { + return "/" + } + for len(p) > 1 && p[len(p)-1] == '/' { + p = p[:len(p)-1] + } + return p +} + +// parentPath returns the parent directory of a normalized absolute path; the +// parent of "/" (or a single-segment path) is "/". +func parentPath(p string) string { + i := strings.LastIndexByte(p, '/') + if i <= 0 { + return "/" + } + return p[:i] +} + +// topDir returns the first-level directory of a normalized absolute path: +// "/etc/passwd" → "/etc", "/etc" → "/etc", "/" → "/". +func topDir(p string) string { + if len(p) < 2 { + return "/" + } + if i := strings.IndexByte(p[1:], '/'); i >= 0 { + return p[:i+1] + } + return p +} + +// pinnedThreshold returns NeverCollapseThreshold for protected nodes, otherwise +// the configured effective threshold for pathPrefix. +func (ua *PathAnalyzer) pinnedThreshold(pathPrefix string) int { + if ua.protectedNode(pathPrefix) { + return NeverCollapseThreshold + } + return ua.effectiveThreshold(pathPrefix) } // effectiveThreshold returns the collapse threshold applicable to the given @@ -165,8 +305,8 @@ func (ua *PathAnalyzer) processSegments(node *SegmentNode, p string) string { // collapse threshold configured for this node's path?". Here we // do want p[:i] — updateNodeStats then collapses the current // node's children to ⋯ when Count > threshold. - insertThreshold := ua.effectiveThreshold(p[:start]) - collapseThreshold := ua.effectiveThreshold(p[:i]) + insertThreshold := ua.pinnedThreshold(p[:start]) + collapseThreshold := ua.pinnedThreshold(p[:i]) currentNode = ua.processSegment(currentNode, segment, insertThreshold) ua.updateNodeStats(currentNode, collapseThreshold) buf = append(buf, currentNode.SegmentName...) @@ -332,7 +472,11 @@ func (ua *PathAnalyzer) createDynamicNode(node *SegmentNode) *SegmentNode { // Threshold is passed in by the caller so per-prefix overrides (via // CollapseConfig) can take effect without this function knowing about them. func (ua *PathAnalyzer) updateNodeStats(node *SegmentNode, threshold int) { - if node.Count > threshold && !node.IsNextDynamic() { + effectiveThreshold := threshold + if threshold == NeverCollapseThreshold && node.Count > PinnedSubtreeBudget { + effectiveThreshold = PinnedSubtreeBudget + } + if node.Count > effectiveThreshold && !node.IsNextDynamic() { dynamicChild := &SegmentNode{ SegmentName: DynamicIdentifier, Count: 0, diff --git a/pkg/registry/file/dynamicpathdetector/protection.go b/pkg/registry/file/dynamicpathdetector/protection.go new file mode 100644 index 000000000..3549a2f5f --- /dev/null +++ b/pkg/registry/file/dynamicpathdetector/protection.go @@ -0,0 +1,81 @@ +package dynamicpathdetector + +import "strings" + +// OpenProtection is the storage-side, generation-time mirror of a rule's +// profileDataRequired.opens matchers (node-agent typesv1.PatternObject kinds: +// exact / prefix / suffix / contains). It is the shared input to rule-aware +// collapse: each environment maps its own rule source — the rules CRD in +// cluster, MongoDB in the backend — into the union of these four matcher kinds, +// and ProtectedPrefixes turns them (with the raw opened paths) into the set of +// prefixes the analyzer must pin to literal so they survive generalisation. +// +// Reuse note: the canonical schema lives in node-agent +// (pkg/rulemanager/types/v1.PatternObject) and the projection/query side +// compiles it via objectcache.CompileSpec. node-agent imports storage (not the +// reverse), so this mirror is the lowest common home for the matcher used by +// BOTH the query side (was_path_opened) and this generation side; longer term +// the query side can delegate here to remove the duplication. +type OpenProtection struct { + Exact []string + Prefix []string + Suffix []string + Contains []string +} + +// Empty reports whether no matcher is declared, so callers can skip the work +// and preserve exact legacy collapse behaviour. +func (p OpenProtection) Empty() bool { + return len(p.Exact)+len(p.Prefix)+len(p.Suffix)+len(p.Contains) == 0 +} + +// ProtectedPrefixes returns the prefixes to pin given the raw opened paths. +// +// Exact and Prefix carry a fixed leading directory, so they pin statically: +// their ancestor chain is kept literal even if the sensitive file was never +// opened during learning — which is what stops a busy sibling set from +// collapsing the parent (e.g. /etc → /etc/⋯) and spuriously covering a +// first-ever sensitive access. +// +// CRITICAL/KNOWN LIMITATION: Suffix and Contains have no fixed location, so they +// are resolved against the actually-opened paths. Any observed path that matches +// is pinned, keeping its ancestor chain literal. +// However, because this is learning-dependent, if NO path matching the matcher +// was opened during learning, the containing directory (if high-cardinality) +// will collapse (e.g. /etc/ssh → /etc/ssh/⋯). Consequently, a first-ever runtime +// access to a matching path (e.g. /etc/ssh/ssh_host_ed25519_key) will be covered +// by the wildcard and the rule will not fire. Suffix/Contains protection is +// therefore best-effort, and rule authors should steer toward Exact/Prefix +// matchers for paths where detection must be guaranteed. +func (p OpenProtection) ProtectedPrefixes(openPaths []string) []string { + if p.Empty() { + return nil + } + out := make([]string, 0, len(p.Exact)+len(p.Prefix)) + out = append(out, p.Exact...) + out = append(out, p.Prefix...) + if len(p.Suffix) > 0 || len(p.Contains) > 0 { + for _, op := range openPaths { + if p.matchesUnanchored(op) { + out = append(out, op) // pin this open's ancestor chain + } + } + } + return out +} + +// matchesUnanchored reports whether path matches a Suffix or Contains matcher — +// the location-independent kinds that must be resolved against observed paths. +func (p OpenProtection) matchesUnanchored(path string) bool { + for _, s := range p.Suffix { + if s != "" && strings.HasSuffix(path, s) { + return true + } + } + for _, c := range p.Contains { + if c != "" && strings.Contains(path, c) { + return true + } + } + return false +} diff --git a/pkg/registry/file/dynamicpathdetector/protection_test.go b/pkg/registry/file/dynamicpathdetector/protection_test.go new file mode 100644 index 000000000..7ba528e18 --- /dev/null +++ b/pkg/registry/file/dynamicpathdetector/protection_test.go @@ -0,0 +1,164 @@ +package dynamicpathdetector + +import ( + "fmt" + "testing" +) + +// genWith builds the trie from paths then re-analyzes each to collect the +// final generalized pattern set (mirrors AnalyzeOpens' two-pass shape). +func genWith(a *PathAnalyzer, paths []string) map[string]struct{} { + for _, p := range paths { + _, _ = AnalyzeOpen(p, a) + } + out := map[string]struct{}{} + for _, p := range paths { + r, _ := AnalyzeOpen(p, a) + out[r] = struct{}{} + } + return out +} + +func coveredBy(pats map[string]struct{}, target string) string { + for p := range pats { + if CompareDynamic(p, target) { + return p + } + } + return "" +} + +// TestProtectedPrefixKeepsSensitiveDetectable is the regression for the R0010 +// false-negative: with /etc far over the collapse threshold, an unprotected +// profile folds /etc into /etc/⋯ (or the root into /⋯/⋯), which spuriously +// covers a never-seen /etc/shadow.evil and makes was_path_opened return true. +// Protecting the rule prefix /etc/shadow must keep that path space literal +// while still collapsing unrelated high-cardinality trees like /proc. +func TestProtectedPrefixKeepsSensitiveDetectable(t *testing.T) { + paths := []string{"/etc/shadow"} + for i := 0; i < 80; i++ { // /etc way over OpenDynamicThreshold + paths = append(paths, fmt.Sprintf("/etc/file%d", i)) + } + for d := 0; d < 80; d++ { // /proc huge: must still collapse + for f := 0; f < 80; f++ { + paths = append(paths, fmt.Sprintf("/proc/%d/task%d", d, f)) + } + } + + // Precondition: without protection the novel sensitive path IS covered. + plain := genWith(NewPathAnalyzer(OpenDynamicThreshold), paths) + if by := coveredBy(plain, "/etc/shadow.evil"); by == "" { + t.Fatalf("precondition failed: unprotected profile should cover /etc/shadow.evil (got patterns %v)", keysOf(plain)) + } + + // With protection of /etc/shadow. + prot := genWith( + NewPathAnalyzerWithConfigsAndProtection(OpenDynamicThreshold, nil, []string{"/etc/shadow"}), + paths, + ) + if by := coveredBy(prot, "/etc/shadow.evil"); by != "" { + t.Errorf("novel /etc/shadow.evil still covered by %q — protection failed", by) + } + if _, ok := prot["/etc/shadow"]; !ok { + t.Errorf("expected /etc/shadow retained as a literal; got %v", keysOf(prot)) + } + // Exact learned /etc/shadow is still recognised (no false positive on baseline). + if coveredBy(prot, "/etc/shadow") == "" { + t.Errorf("expected learned /etc/shadow to be matched") + } + // /proc must still collapse — protection of /etc must not disable bloat + // control elsewhere. 6400 proc paths must not survive as literals. + if len(prot) > 200 { + t.Errorf("expected /proc subtree to collapse (bounded pattern set); got %d patterns", len(prot)) + } +} + +func keysOf(m map[string]struct{}) []string { + out := make([]string, 0, len(m)) + for k := range m { + out = append(out, k) + } + return out +} + +// TestOpenProtectionExactSuffixContains exercises all four matcher kinds from a +// realistic R0010-style profileDataRequired against a noisy, high-cardinality +// open set, asserting that never-seen sensitive paths stay detectable while +// unrelated trees (/proc) still collapse. +func TestOpenProtectionExactSuffixContains(t *testing.T) { + prot := OpenProtection{ + Exact: []string{"/etc/sudoers"}, + Prefix: []string{"/etc/shadow", "/etc/sudoers.d/"}, + Suffix: []string{"_key"}, // ssh host keys + Contains: []string{"/.ssh/"}, // per-user ssh material, location unknown + } + + var opens []string + opens = append(opens, + "/etc/sudoers", + "/etc/sudoers.d/90-cloud-init", + "/etc/ssh/ssh_host_rsa_key", // suffix _key + "/home/alice/.ssh/id_rsa", // contains /.ssh/ + ) + for i := 0; i < 90; i++ { // /etc way over threshold + opens = append(opens, fmt.Sprintf("/etc/file%d", i)) + } + for u := 0; u < 60; u++ { // many home users (would collapse /home) + opens = append(opens, fmt.Sprintf("/home/user%d/.bashrc", u)) + } + for d := 0; d < 80; d++ { // /proc must still collapse + for f := 0; f < 80; f++ { + opens = append(opens, fmt.Sprintf("/proc/%d/task%d", d, f)) + } + } + + prefixes := prot.ProtectedPrefixes(opens) + got := genWith(NewPathAnalyzerWithConfigsAndProtection(OpenDynamicThreshold, nil, prefixes), opens) + + // Never-seen sensitive paths must NOT be covered by any wildcard. + novel := map[string]string{ + "/etc/shadow.evil": "prefix /etc/shadow", + "/etc/sudoers.bak": "exact /etc/sudoers (pins /etc)", + "/etc/sudoers.d/99-evil": "prefix /etc/sudoers.d/", + "/etc/ssh/ssh_host_ed25519_key": "suffix _key (pins /etc/ssh)", + "/home/alice/.ssh/evil_authorized": "contains /.ssh/ (pins alice's .ssh)", + } + for p, why := range novel { + if by := coveredBy(got, p); by != "" { + t.Errorf("novel %q covered by %q — protection failed (%s)", p, by, why) + } + } + + // Bloat control preserved: /proc (6400 paths) must still collapse. + if len(got) > 400 { + t.Errorf("expected /proc to collapse (bounded pattern set); got %d patterns", len(got)) + } + + // Sanity: with no protection the same set DOES cover a novel sensitive path. + plain := genWith(NewPathAnalyzer(OpenDynamicThreshold), opens) + if coveredBy(plain, "/etc/shadow.evil") == "" { + t.Fatalf("precondition: unprotected run should cover /etc/shadow.evil") + } +} + +// TestProtectedPrefixBudgetCollapse asserts that when the number of literal children under +// a pinned ancestor exceeds PinnedSubtreeBudget, the node falls back to collapse anyway +// to prevent the entire profile from becoming TooLarge. +func TestProtectedPrefixBudgetCollapse(t *testing.T) { + // PinnedSubtreeBudget is 500. We will open 505 files under /etc. + paths := []string{"/etc/shadow"} + for i := 0; i <= PinnedSubtreeBudget; i++ { + paths = append(paths, fmt.Sprintf("/etc/file%d", i)) + } + + // With protection, but exceeding the budget, the subtree should collapse. + got := genWith( + NewPathAnalyzerWithConfigsAndProtection(OpenDynamicThreshold, nil, []string{"/etc/shadow"}), + paths, + ) + + // Since /etc collapsed, we expect /etc/shadow.evil to be covered by /etc/⋯. + if by := coveredBy(got, "/etc/shadow.evil"); by == "" { + t.Errorf("expected /etc/shadow.evil to be covered by /etc/⋯ after exceeding PinnedSubtreeBudget, but it was not (patterns: %v)", keysOf(got)) + } +} diff --git a/pkg/registry/file/dynamicpathdetector/types.go b/pkg/registry/file/dynamicpathdetector/types.go index c0edef66a..122977fff 100644 --- a/pkg/registry/file/dynamicpathdetector/types.go +++ b/pkg/registry/file/dynamicpathdetector/types.go @@ -1,5 +1,7 @@ package dynamicpathdetector +import "math" + // --- Identifier constants --- // DynamicIdentifier matches exactly one path segment (single-segment wildcard). // WildcardIdentifier matches zero-or-more path segments (glob-style **). @@ -8,6 +10,20 @@ const ( WildcardIdentifier string = "*" ) +// NeverCollapseThreshold is an effective threshold so high a node's children +// can never exceed it, i.e. the node is pinned to literal entries. Used for +// rule-protected prefixes (see PathAnalyzer.protected): keeping a sensitive +// prefix and its ancestors literal is what lets anomaly rules such as R0010 +// ("unexpected /etc/shadow access") distinguish a never-seen path from a +// generalised wildcard like /etc/⋯ or /⋯/⋯ that would otherwise cover it. +const NeverCollapseThreshold = math.MaxInt + +// PinnedSubtreeBudget is the maximum number of literal children a protected/pinned +// node is allowed to accumulate before we fall back to collapsing it. This prevents +// size blowup and "TooLarge" status (which clears the entire profile spec downstream) +// when a protected directory (like /etc) receives a very high number of unique opens. +const PinnedSubtreeBudget = 500 + // --- Default collapse thresholds --- // OpenDynamicThreshold is the fallback threshold used by AnalyzeOpens when // no more-specific CollapseConfig matches the walked path prefix. @@ -78,6 +94,18 @@ type PathAnalyzer struct { threshold int // fallback threshold when no config matches configs []CollapseConfig // per-prefix overrides; longest prefix wins defaultCfg CollapseConfig // explicit fallback; equivalent to {Prefix:"/", Threshold: threshold} + + // Rule-protected prefixes, precomputed into two sets so protectedNode is + // O(1) for the common (non-protected) node and never scales with the number + // of protected prefixes. Both nil when no protection is configured. + // pinAncestors: every dir-prefix (incl. self) of every protected prefix — + // e.g. "/etc/shadow" contributes {"/", "/etc", "/etc/shadow"}. A node + // whose path is in here is an ancestor-or-self of a protected prefix and + // must stay literal so no wildcard forms at/above the sensitive level. + // protectedRoots: the protected prefixes themselves, used to detect nodes + // *inside* a protected subtree (which must also stay literal). + pinAncestors map[string]struct{} + protectedRoots map[string]struct{} } func (sn *SegmentNode) IsNextDynamic() bool { diff --git a/pkg/registry/file/openprotection.go b/pkg/registry/file/openprotection.go new file mode 100644 index 000000000..9123b1d4a --- /dev/null +++ b/pkg/registry/file/openprotection.go @@ -0,0 +1,161 @@ +package file + +import ( + "context" + "encoding/json" + "fmt" + "sync" + "time" + + "github.com/armosec/armoapi-go/armotypes" + "github.com/kubescape/go-logger" + loggerhelpers "github.com/kubescape/go-logger/helpers" + "github.com/kubescape/storage/pkg/registry/file/dynamicpathdetector" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +// OpenProtectionConfigMapKey is the key under which the open-protection union +// (JSON-encoded armotypes.OpenMatchers) is stored in the source ConfigMap. +const OpenProtectionConfigMapKey = "openProtection" + +// DefaultOpenProtectionRefreshInterval is how often the reloader re-reads the +// source ConfigMap when no interval is configured. Rule bindings change rarely, +// so a coarse interval keeps API-server load negligible while still picking up +// operator-published changes within about a minute. +const DefaultOpenProtectionRefreshInterval = time.Minute + +// OpenProtectionStore is a concurrency-safe holder for the active open-protection +// matchers. The container-profile processor reads it on every PreSave (the open- +// event hot path, by far the most common) via Get, while a reloader goroutine +// swaps it whenever the source ConfigMap changes via Set. Reads take a read-lock +// and copy the small value, so profile deflation never races a refresh. +type OpenProtectionStore struct { + mu sync.RWMutex + current dynamicpathdetector.OpenProtection +} + +// NewOpenProtectionStore seeds a store with the initial matchers (e.g. a static +// value from config used until the first successful ConfigMap read, or the sole +// value in environments without a reloader). +func NewOpenProtectionStore(initial armotypes.OpenMatchers) *OpenProtectionStore { + return &OpenProtectionStore{ + current: OpenProtectionFromMatchers(initial), + } +} + +// Get returns the current protection. The returned value shares the underlying +// slices with the stored value; callers must treat it as read-only. Profile +// deflation only ranges over the slices, so this is safe and allocation-free on +// the hot path. +func (s *OpenProtectionStore) Get() dynamicpathdetector.OpenProtection { + s.mu.RLock() + defer s.mu.RUnlock() + return s.current +} + +// Set replaces the current protection with the union derived from m. Set is +// called by the reloader (rarely); Get is called on the hot path (often), which +// is why the lock favours readers. +func (s *OpenProtectionStore) Set(m armotypes.OpenMatchers) { + p := OpenProtectionFromMatchers(m) + s.mu.Lock() + s.current = p + s.mu.Unlock() +} + +// ParseOpenProtectionConfigMap extracts the open-protection union from a +// ConfigMap's data. The union is stored as JSON under OpenProtectionConfigMapKey. +// A missing or empty key yields an empty (legacy, no-protection) union without +// error, so the producer can clear protection by removing the key. +func ParseOpenProtectionConfigMap(data map[string]string) (armotypes.OpenMatchers, error) { + raw, ok := data[OpenProtectionConfigMapKey] + if !ok || raw == "" { + return armotypes.OpenMatchers{}, nil + } + var m armotypes.OpenMatchers + if err := json.Unmarshal([]byte(raw), &m); err != nil { + return armotypes.OpenMatchers{}, fmt.Errorf("parse open-protection configmap key %q: %w", OpenProtectionConfigMapKey, err) + } + return m, nil +} + +// OpenProtectionReloader periodically reads the open-protection ConfigMap and +// updates the shared store, so the storage apiserver tracks the set of sensitive +// prefixes published by the operator (the union of active rules' +// profileDataRequired.opens) without a restart. +// +// This is the in-cluster reader side of the "operator writes one object, storage +// refreshes periodically" wiring. The producer side — the operator watching +// RuntimeRuleAlertBinding, resolving selectors against the rule library, and +// writing this ConfigMap — is implemented separately. The reader tolerates the +// ConfigMap being absent (operator not yet deployed) by keeping the current +// protection rather than wiping it, which avoids a transient unprotection window +// and errs toward keeping sensitive paths detectable. +type OpenProtectionReloader struct { + client kubernetes.Interface + namespace string + name string + interval time.Duration + store *OpenProtectionStore +} + +// NewOpenProtectionReloader builds a reloader for the ConfigMap namespace/name, +// refreshing into store every interval (a non-positive interval falls back to +// DefaultOpenProtectionRefreshInterval). +func NewOpenProtectionReloader(client kubernetes.Interface, namespace, name string, interval time.Duration, store *OpenProtectionStore) *OpenProtectionReloader { + if interval <= 0 { + interval = DefaultOpenProtectionRefreshInterval + } + return &OpenProtectionReloader{ + client: client, + namespace: namespace, + name: name, + interval: interval, + store: store, + } +} + +// reloadOnce reads the ConfigMap and applies it to the store. A NotFound is +// treated as "no source published yet" and keeps the current protection, so we +// never drop protection because the operator hasn't created the ConfigMap. +func (r *OpenProtectionReloader) reloadOnce(ctx context.Context) error { + cm, err := r.client.CoreV1().ConfigMaps(r.namespace).Get(ctx, r.name, metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + logger.L().Debug("open-protection configmap not found; keeping current protection", + loggerhelpers.String("namespace", r.namespace), + loggerhelpers.String("name", r.name)) + return nil + } + return err + } + m, err := ParseOpenProtectionConfigMap(cm.Data) + if err != nil { + return err + } + r.store.Set(m) + return nil +} + +// Run blocks, refreshing on each tick until ctx is cancelled. It performs an +// immediate initial read so the apiserver adopts the published protection +// without waiting a full interval. +func (r *OpenProtectionReloader) Run(ctx context.Context) { + if err := r.reloadOnce(ctx); err != nil { + logger.L().Ctx(ctx).Warning("open-protection initial reload failed", loggerhelpers.Error(err)) + } + ticker := time.NewTicker(r.interval) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + if err := r.reloadOnce(ctx); err != nil { + logger.L().Ctx(ctx).Warning("open-protection reload failed", loggerhelpers.Error(err)) + } + } + } +} diff --git a/pkg/registry/file/openprotection_test.go b/pkg/registry/file/openprotection_test.go new file mode 100644 index 000000000..70a82784a --- /dev/null +++ b/pkg/registry/file/openprotection_test.go @@ -0,0 +1,115 @@ +package file + +import ( + "context" + "testing" + "time" + + "github.com/armosec/armoapi-go/armotypes" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" +) + +func TestParseOpenProtectionConfigMap(t *testing.T) { + t.Run("missing key yields empty union", func(t *testing.T) { + m, err := ParseOpenProtectionConfigMap(map[string]string{"other": "x"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !m.Empty() { + t.Fatalf("expected empty union, got %+v", m) + } + }) + + t.Run("empty value yields empty union", func(t *testing.T) { + m, err := ParseOpenProtectionConfigMap(map[string]string{OpenProtectionConfigMapKey: ""}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !m.Empty() { + t.Fatalf("expected empty union, got %+v", m) + } + }) + + t.Run("valid json parses all matcher kinds", func(t *testing.T) { + raw := `{"prefix":["/etc/shadow"],"exact":["/etc/sudoers"],"contains":["/.ssh/"],"suffix":[".key"]}` + m, err := ParseOpenProtectionConfigMap(map[string]string{OpenProtectionConfigMapKey: raw}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(m.Prefix) != 1 || m.Prefix[0] != "/etc/shadow" { + t.Errorf("prefix mismatch: %+v", m.Prefix) + } + if len(m.Exact) != 1 || m.Exact[0] != "/etc/sudoers" { + t.Errorf("exact mismatch: %+v", m.Exact) + } + if len(m.Contains) != 1 || m.Contains[0] != "/.ssh/" { + t.Errorf("contains mismatch: %+v", m.Contains) + } + if len(m.Suffix) != 1 || m.Suffix[0] != ".key" { + t.Errorf("suffix mismatch: %+v", m.Suffix) + } + }) + + t.Run("invalid json errors", func(t *testing.T) { + if _, err := ParseOpenProtectionConfigMap(map[string]string{OpenProtectionConfigMapKey: "{not json"}); err == nil { + t.Fatal("expected error for invalid json") + } + }) +} + +func TestOpenProtectionStoreGetSet(t *testing.T) { + s := NewOpenProtectionStore(armotypes.OpenMatchers{Prefix: []string{"/etc/shadow"}}) + if got := s.Get(); len(got.Prefix) != 1 || got.Prefix[0] != "/etc/shadow" { + t.Fatalf("seed not applied: %+v", got) + } + + s.Set(armotypes.OpenMatchers{Exact: []string{"/etc/sudoers"}}) + got := s.Get() + if len(got.Prefix) != 0 { + t.Errorf("expected prefix cleared after Set, got %+v", got.Prefix) + } + if len(got.Exact) != 1 || got.Exact[0] != "/etc/sudoers" { + t.Errorf("expected exact replaced, got %+v", got.Exact) + } +} + +func TestOpenProtectionReloaderReloadOnce(t *testing.T) { + const ns, name = "kubescape", "storage-open-protection" + + t.Run("present configmap is applied", func(t *testing.T) { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: ns, Name: name}, + Data: map[string]string{OpenProtectionConfigMapKey: `{"prefix":["/etc/shadow"]}`}, + } + client := fake.NewSimpleClientset(cm) + store := NewOpenProtectionStore(armotypes.OpenMatchers{}) + r := NewOpenProtectionReloader(client, ns, name, time.Minute, store) + if err := r.reloadOnce(context.Background()); err != nil { + t.Fatalf("reloadOnce: %v", err) + } + if got := store.Get(); len(got.Prefix) != 1 || got.Prefix[0] != "/etc/shadow" { + t.Fatalf("expected protection from configmap, got %+v", got) + } + }) + + t.Run("missing configmap keeps current protection", func(t *testing.T) { + client := fake.NewSimpleClientset() // no configmap + store := NewOpenProtectionStore(armotypes.OpenMatchers{Prefix: []string{"/etc/shadow"}}) + r := NewOpenProtectionReloader(client, ns, name, time.Minute, store) + if err := r.reloadOnce(context.Background()); err != nil { + t.Fatalf("reloadOnce should tolerate NotFound: %v", err) + } + if got := store.Get(); len(got.Prefix) != 1 || got.Prefix[0] != "/etc/shadow" { + t.Fatalf("expected seeded protection preserved on NotFound, got %+v", got) + } + }) + + t.Run("default interval applied for non-positive", func(t *testing.T) { + r := NewOpenProtectionReloader(fake.NewSimpleClientset(), ns, name, 0, NewOpenProtectionStore(armotypes.OpenMatchers{})) + if r.interval != DefaultOpenProtectionRefreshInterval { + t.Fatalf("expected default interval, got %v", r.interval) + } + }) +}