diff --git a/bench/internal/schema/schema.go b/bench/internal/schema/schema.go index a520143..3153a7a 100644 --- a/bench/internal/schema/schema.go +++ b/bench/internal/schema/schema.go @@ -60,7 +60,7 @@ type Run struct { // to the workload it's profiling. Above the budget = regression. // - KernelResolutionRate: fraction of kernel-side samples in // perf-agent #1's own pprof that resolved to a named symbol -// instead of "0x". A drop = blazesym + kallsyms fallback +// instead of "0x". A drop = blazesym kernel symbolization // broke (the original v1.2.0 lockdown class of bug). type SelfMetrics struct { WorkloadPID int `json:"workload_pid"` diff --git a/cpu/cpu_arm64_bpfel.o b/cpu/cpu_arm64_bpfel.o index 4097cc6..a60f653 100644 Binary files a/cpu/cpu_arm64_bpfel.o and b/cpu/cpu_arm64_bpfel.o differ diff --git a/cpu/cpu_x86_bpfel.o b/cpu/cpu_x86_bpfel.o index 4097cc6..a60f653 100644 Binary files a/cpu/cpu_x86_bpfel.o and b/cpu/cpu_x86_bpfel.o differ diff --git a/offcpu/offcpu_arm64_bpfel.o b/offcpu/offcpu_arm64_bpfel.o index c70de07..32d9f70 100644 Binary files a/offcpu/offcpu_arm64_bpfel.o and b/offcpu/offcpu_arm64_bpfel.o differ diff --git a/offcpu/offcpu_x86_bpfel.o b/offcpu/offcpu_x86_bpfel.o index c70de07..ca7b5a9 100644 Binary files a/offcpu/offcpu_x86_bpfel.o and b/offcpu/offcpu_x86_bpfel.o differ diff --git a/perfagent/metrics_endpoint.go b/perfagent/metrics_endpoint.go index 155087d..73c394e 100644 --- a/perfagent/metrics_endpoint.go +++ b/perfagent/metrics_endpoint.go @@ -35,11 +35,9 @@ func metricsHandlerFor(getCounters func() symbolize.CountersSnapshot) http.Handl writeMetricLine(w, "perf_agent_symbolize_kernel_input_ips_total", "counter", "total kernel IPs handed into SymbolizeKernel", s.KernelInputIPs) writeMetricLine(w, "perf_agent_symbolize_kernel_batch_failures_total", - "counter", "batches where every symbolizer (blazesym + kallsyms) failed", s.KernelBatchFailures) - writeMetricLine(w, "perf_agent_symbolize_kernel_fallback_engaged", - "gauge", "1 when symbolizer switched to pure-Go kallsyms (lockdown-class hosts)", s.KernelFallbackEngaged) + "counter", "batches where blazesym kernel symbolization failed", s.KernelBatchFailures) writeMetricLine(w, "perf_agent_symbolize_kernel_raw_addr_frames_total", - "counter", "kernel IPs that fell to raw-hex synthesis (both symbolizers failed)", s.KernelRawAddrFrames) + "counter", "kernel IPs that fell to raw-hex synthesis (blazesym failed)", s.KernelRawAddrFrames) writeMetricLine(w, "perf_agent_symbolize_kernel_lockdown_eperm_total", "counter", "BLAZE_ERR_PERMISSION_DENIED events from blazesym (canonical lockdown signature)", s.KernelLockdownEPERM) writeMetricLine(w, "perf_agent_symbolize_kernel_other_err_total", diff --git a/perfagent/metrics_endpoint_test.go b/perfagent/metrics_endpoint_test.go index 0641519..721751b 100644 --- a/perfagent/metrics_endpoint_test.go +++ b/perfagent/metrics_endpoint_test.go @@ -17,13 +17,12 @@ import ( // dashboards downstream. func TestMetricsHandler_PrometheusFormat(t *testing.T) { snap := symbolize.CountersSnapshot{ - KernelBatches: 5, - KernelInputIPs: 42, - KernelBatchFailures: 1, - KernelFallbackEngaged: 1, - KernelRawAddrFrames: 3, - KernelLockdownEPERM: 7, - KernelOtherErr: 2, + KernelBatches: 5, + KernelInputIPs: 42, + KernelBatchFailures: 1, + KernelRawAddrFrames: 3, + KernelLockdownEPERM: 7, + KernelOtherErr: 2, } h := metricsHandlerFor(func() symbolize.CountersSnapshot { return snap }) @@ -44,8 +43,6 @@ func TestMetricsHandler_PrometheusFormat(t *testing.T) { "# HELP perf_agent_symbolize_kernel_batches_total", "# TYPE perf_agent_symbolize_kernel_batches_total counter", "perf_agent_symbolize_kernel_batches_total 5", - "# TYPE perf_agent_symbolize_kernel_fallback_engaged gauge", - "perf_agent_symbolize_kernel_fallback_engaged 1", "perf_agent_symbolize_kernel_lockdown_eperm_total 7", "perf_agent_symbolize_kernel_other_err_total 2", "perf_agent_symbolize_kernel_raw_addr_frames_total 3", diff --git a/profile/offcpu_dwarf_arm64_bpfel.o b/profile/offcpu_dwarf_arm64_bpfel.o index af5d633..866f88f 100644 Binary files a/profile/offcpu_dwarf_arm64_bpfel.o and b/profile/offcpu_dwarf_arm64_bpfel.o differ diff --git a/profile/offcpu_dwarf_x86_bpfel.o b/profile/offcpu_dwarf_x86_bpfel.o index b2e3071..8eaf121 100644 Binary files a/profile/offcpu_dwarf_x86_bpfel.o and b/profile/offcpu_dwarf_x86_bpfel.o differ diff --git a/profile/perf_arm64_bpfel.o b/profile/perf_arm64_bpfel.o index 331c86c..7d35547 100644 Binary files a/profile/perf_arm64_bpfel.o and b/profile/perf_arm64_bpfel.o differ diff --git a/profile/perf_dwarf_arm64_bpfel.o b/profile/perf_dwarf_arm64_bpfel.o index 3bd5db0..bab0bc6 100644 Binary files a/profile/perf_dwarf_arm64_bpfel.o and b/profile/perf_dwarf_arm64_bpfel.o differ diff --git a/profile/perf_dwarf_x86_bpfel.o b/profile/perf_dwarf_x86_bpfel.o index e3b527b..82b09e5 100644 Binary files a/profile/perf_dwarf_x86_bpfel.o and b/profile/perf_dwarf_x86_bpfel.o differ diff --git a/profile/perf_x86_bpfel.o b/profile/perf_x86_bpfel.o index 331c86c..7d35547 100644 Binary files a/profile/perf_x86_bpfel.o and b/profile/perf_x86_bpfel.o differ diff --git a/symbolize/allocs_budget_test.go b/symbolize/allocs_budget_test.go index fa65b28..9d1bb56 100644 --- a/symbolize/allocs_budget_test.go +++ b/symbolize/allocs_budget_test.go @@ -4,47 +4,6 @@ import ( "testing" ) -// TestAllocsBudget_ParseKallsymsLine asserts the per-line parser -// remains allocation-free. Catches PRs that accidentally -// reintroduce strings.Fields / strconv.ParseUint or other -// allocating helpers into the hot path (the regression that -// dogfood iter 5 surfaced via mallocgc / sweepone in user-side -// profiles). -// -// Budget: 0 allocs per call. Anything > 0 indicates a regression. -func TestAllocsBudget_ParseKallsymsLine(t *testing.T) { - const budget = 0 - line := []byte("ffffffffc0001234 T kvm_vcpu_ioctl [kvm]") - got := testing.AllocsPerRun(1000, func() { - _, _, _, _, _ = parseKallsymsLine(line) - }) - if got > float64(budget) { - t.Errorf("parseKallsymsLine allocs/op = %.2f, want <= %d", got, budget) - } -} - -// TestAllocsBudget_ResolveKernelIPs caps the per-Resolve-call -// allocation count. The current implementation allocates exactly -// one []Frame slice per call (the return value), so the budget -// is 1. Going above means someone added per-IP allocation — -// likely a [Name string conversion inside the hot loop instead -// of using pre-interned strings. -func TestAllocsBudget_ResolveKernelIPs(t *testing.T) { - const budget = 1 - k := &kallsymsSymbolizer{ - addrs: []uint64{0xffffffff80001000, 0xffffffff80002000, 0xffffffff80003000}, - names: []string{"sym_a", "sym_b", "sym_c"}, - modules: []string{"", "", ""}, - } - ips := []uint64{0xffffffff80001042, 0xffffffff80002042, 0xffffffff80003042} - got := testing.AllocsPerRun(1000, func() { - _ = k.Resolve(ips) - }) - if got > float64(budget) { - t.Errorf("Resolve allocs/op = %.2f, want <= %d", got, budget) - } -} - // TestAllocsBudget_LatencyHistRecord caps the per-Record cost // of the histogram on the hot path. Record runs under a mutex // but should never allocate — the ring buffer is fixed-size. diff --git a/symbolize/blazesym_eperm_marker_test.go b/symbolize/blazesym_eperm_marker_test.go deleted file mode 100644 index fbc2ea5..0000000 --- a/symbolize/blazesym_eperm_marker_test.go +++ /dev/null @@ -1,78 +0,0 @@ -package symbolize - -import ( - "os" - "path/filepath" - "testing" -) - -// withMarkerCacheDir overrides the XDG_CACHE_HOME so the EPERM -// marker lands in a t.TempDir(). Avoids polluting the user's -// real ~/.cache. Also pins the boot_id reader. -func withMarkerCacheDir(t *testing.T, bootID [16]byte) func() { - t.Helper() - dir := t.TempDir() - prevXDG, hadPrev := os.LookupEnv("XDG_CACHE_HOME") - t.Setenv("XDG_CACHE_HOME", dir) - prevBoot := readBootIDFn - readBootIDFn = func() ([16]byte, error) { return bootID, nil } - return func() { - readBootIDFn = prevBoot - if hadPrev { - _ = os.Setenv("XDG_CACHE_HOME", prevXDG) - } else { - _ = os.Unsetenv("XDG_CACHE_HOME") - } - } -} - -// TestBlazesymEPERMMarker_Roundtrip asserts writing the marker -// then checking with the same boot_id reports it as present. -func TestBlazesymEPERMMarker_Roundtrip(t *testing.T) { - cleanup := withMarkerCacheDir(t, [16]byte{1, 2, 3, 4}) - defer cleanup() - - if blazesymEPERMMarkerExists() { - t.Fatalf("marker exists before write") - } - if err := writeBlazesymEPERMMarker(); err != nil { - t.Fatalf("writeBlazesymEPERMMarker: %v", err) - } - if !blazesymEPERMMarkerExists() { - t.Errorf("marker missing after write") - } -} - -// TestBlazesymEPERMMarker_BootIDScoped asserts the marker for one -// boot_id is invisible under a different boot_id. Critical: after -// a reboot, lockdown state may differ — must not assume EPERM -// persists. -func TestBlazesymEPERMMarker_BootIDScoped(t *testing.T) { - cleanup := withMarkerCacheDir(t, [16]byte{1, 2, 3, 4}) - defer cleanup() - if err := writeBlazesymEPERMMarker(); err != nil { - t.Fatalf("write: %v", err) - } - // Switch to a different boot_id; marker should appear absent. - readBootIDFn = func() ([16]byte, error) { return [16]byte{9, 9, 9, 9}, nil } - if blazesymEPERMMarkerExists() { - t.Errorf("marker visible under different boot_id") - } -} - -// TestBlazesymEPERMMarker_PathIncludesBootID is a structural -// check: the marker filename literally encodes the boot_id hex -// so multiple boots' markers can coexist without colliding. -func TestBlazesymEPERMMarker_PathIncludesBootID(t *testing.T) { - cleanup := withMarkerCacheDir(t, [16]byte{0xab, 0xcd}) - defer cleanup() - bootID, _ := readBootIDFn() - path := blazesymEPERMMarkerPath(bootID) - if !filepath.IsAbs(path) { - t.Errorf("marker path not absolute: %s", path) - } - // abcd0000...0000 hex form should appear in the filename. - if filepath.Base(path) == "" { - t.Fatalf("empty basename") - } -} diff --git a/symbolize/kallsyms.go b/symbolize/kallsyms.go deleted file mode 100644 index 9d1ff75..0000000 --- a/symbolize/kallsyms.go +++ /dev/null @@ -1,271 +0,0 @@ -package symbolize - -import ( - "bufio" - "fmt" - "os" - "sort" -) - -// parseKallsymsLine extracts (addr, type, name, module) from one -// /proc/kallsyms line without allocating. Returns ok=false on -// malformed lines. -// -// Format: "<16-hex-addr> [ \t]+[module]" -// Example: "ffffffff80c6a050 T __x64_sys_open" -// Example: "ffffffff8e2b0010 T kvm_init [kvm]" -// -// Name and module are slices into the input buffer; the caller is -// responsible for copying them out before the buffer is reused. -func parseKallsymsLine(line []byte) (addr uint64, typ byte, name, module []byte, ok bool) { - i := 0 - // hex addr — accept any run of hex digits, stop at first non-hex - for i < len(line) { - c := line[i] - v := uint64(0) - switch { - case c >= '0' && c <= '9': - v = uint64(c - '0') - case c >= 'a' && c <= 'f': - v = uint64(c-'a') + 10 - case c >= 'A' && c <= 'F': - v = uint64(c-'A') + 10 - default: - goto endAddr - } - addr = addr<<4 | v - i++ - } -endAddr: - if i == 0 { - return 0, 0, nil, nil, false - } - if i >= len(line) || line[i] != ' ' { - return 0, 0, nil, nil, false - } - i++ - if i >= len(line) { - return 0, 0, nil, nil, false - } - typ = line[i] - i++ - if i >= len(line) || line[i] != ' ' { - return 0, 0, nil, nil, false - } - i++ - nameStart := i - for i < len(line) && line[i] != ' ' && line[i] != '\t' { - i++ - } - name = line[nameStart:i] - // optional module: whitespace then "[modname]" - for i < len(line) && (line[i] == ' ' || line[i] == '\t') { - i++ - } - if i < len(line) { - module = line[i:] - } - return addr, typ, name, module, true -} - -// kallsymsSymbolizer resolves kernel addresses by binary search -// against a snapshot of /proc/kallsyms. Used as the lockdown-safe -// fallback when blazesym fails with permission-denied (e.g., on -// Secure-Boot hosts where blazesym's kernel source tries /proc/kcore -// and gets EACCES, even with vmlinux=""). -// -// Resolution is name + offset only — no inline expansion, no -// file:line. /proc/kallsyms doesn't carry DWARF, and that's -// acceptable: operators get readable flame graphs, which is the -// load-bearing property. -type kallsymsSymbolizer struct { - addrs []uint64 // sorted ascending; parallel to names/modules - names []string - modules []string // "" for vmlinux, "[xfs]" etc. for modules -} - -// newKallsymsSymbolizer materializes the /proc/kallsyms index, -// preferring the on-disk cache (keyed by kernel boot_id) over a -// fresh parse. The fresh-parse path is unchanged from iter 5 -// (allocation-free byte-level scan + module intern); the cache -// path skips it entirely when valid. -// -// Decision order: -// 1. Try loadCachedKallsyms. Hit → return; cache key (boot_id) -// guarantees correctness. -// 2. Miss / stale / corrupt → fall through to parseKallsymsFresh -// and best-effort write the cache for next time. -// 3. Fresh parse fails → return the parse error. -// -// On lockdown hosts where blazesym hits EPERM on /proc/kcore and -// every agent invocation otherwise pays the ~0.8s kallsyms parse, -// the cache drops first-symbol latency to single-digit -// milliseconds. On non-lockdown hosts the kallsymsSymbolizer -// path isn't engaged at all, so this code has zero cost there. -func newKallsymsSymbolizer() (*kallsymsSymbolizer, error) { - if s, err := loadCachedKallsyms(); err == nil { - return s, nil - } - s, err := parseKallsymsFresh() - if err != nil { - return nil, err - } - // Best-effort: cache-write failures don't affect this call. - _ = writeKallsymsCache(s) - return s, nil -} - -// parseKallsymsFresh is the slow path: read and parse -// /proc/kallsyms from scratch. Tuning history kept here because -// this is also the path that PRODUCES the cache contents — the -// disk cache speeds up subsequent invocations, but the first -// invocation on every boot still pays this cost. -// -// Tuned across two bench-self iterations: -// - iter 3: wrap the file in a 256 KiB bufio.Reader so each -// read() pulls many lines at once (the kernel synthesizes -// kallsyms via vsnprintf per read; small reads forced -// repeated trips through that path). -// - iter 5: byte-level allocation-free line parser, replacing -// strings.Fields + strconv.ParseUint. The previous version -// was the top allocation source in perf-agent's user-side -// pprof: 3M kallsyms lines × strings.Fields × ParseUint = -// ~9M+ allocations, triggering noticeable GC pressure -// (sweepone, tryDeferToSpanScan, mallocgc). -// -// One string allocation per KEPT symbol (the name; copies out of -// the read buffer so the buffer can be reused). Modules deduped -// via an intern map: a typical kernel has tens of modules but -// millions of module symbols. -func parseKallsymsFresh() (*kallsymsSymbolizer, error) { - f, err := os.Open("/proc/kallsyms") - if err != nil { - return nil, fmt.Errorf("kallsyms: open: %w", err) - } - defer func() { _ = f.Close() }() - - var ( - addrs []uint64 - names []string - modules []string - sawNZ bool - ) - moduleIntern := make(map[string]string, 64) - br := bufio.NewReaderSize(f, 256*1024) - sc := bufio.NewScanner(br) - // Token buffer: 4 KiB initial (typical line), 1 MiB max for - // pathologically long module-symbol names. - sc.Buffer(make([]byte, 0, 4096), 1<<20) - for sc.Scan() { - line := sc.Bytes() - addr, typ, nameBytes, modBytes, ok := parseKallsymsLine(line) - if !ok { - continue - } - // Type filter: only addressable code symbols. Matches the - // kinds the awk hack in resolve_user_addrs.py keeps for - // userspace; same logic applies to kernel symbols. - switch typ { - case 'T', 't', 'W', 'w', 'i': - default: - continue - } - if addr != 0 { - sawNZ = true - } - module := "" - if len(modBytes) > 0 { - s := string(modBytes) - if interned, ok := moduleIntern[s]; ok { - module = interned - } else { - moduleIntern[s] = s - module = s - } - } - addrs = append(addrs, addr) - names = append(names, string(nameBytes)) // one alloc per kept name - modules = append(modules, module) - } - if err := sc.Err(); err != nil { - return nil, fmt.Errorf("kallsyms: scan: %w", err) - } - if len(addrs) == 0 || !sawNZ { - return nil, ErrKernelSymbolsUnavailable - } - - // /proc/kallsyms is already sorted by address on every supported - // kernel, but the contract isn't formally documented anywhere we - // can lean on — sort defensively. - idx := make([]int, len(addrs)) - for i := range idx { - idx[i] = i - } - sort.Slice(idx, func(i, j int) bool { return addrs[idx[i]] < addrs[idx[j]] }) - sortedAddrs := make([]uint64, len(addrs)) - sortedNames := make([]string, len(addrs)) - sortedModules := make([]string, len(addrs)) - for i, j := range idx { - sortedAddrs[i] = addrs[j] - sortedNames[i] = names[j] - sortedModules[i] = modules[j] - } - return &kallsymsSymbolizer{ - addrs: sortedAddrs, - names: sortedNames, - modules: sortedModules, - }, nil -} - -// maxKallsymsOffset bounds how far past a symbol an IP may land -// before we treat the resolution as bogus. The awk hack uses 64 KiB -// for userspace; kernel functions tend to be smaller, but conservatively -// 64 KiB rejects only obvious mis-attributions (gaps between subsystem -// regions in vmlinux). -const maxKallsymsOffset = 0x10000 - -// Resolve returns one Frame per IP via at-or-below binary search. -// IPs that map to a symbol within maxKallsymsOffset get the symbol -// name + module + offset. IPs outside that window get a raw-hex Name -// and Reason=FailureUnknownAddress, matching the rawKernelAddrFrames -// posture so kernel context still survives into the pprof. -func (k *kallsymsSymbolizer) Resolve(ips []uint64) []Frame { - out := make([]Frame, len(ips)) - for i, ip := range ips { - // sort.Search returns the lowest j with addrs[j] > ip; - // the matching symbol is at j-1 (largest addr <= ip). - j := sort.Search(len(k.addrs), func(j int) bool { return k.addrs[j] > ip }) - if j == 0 { - out[i] = Frame{ - Address: ip, - Name: fmt.Sprintf("0x%x", ip), - Module: "[kernel.kallsyms]", - Reason: FailureUnknownAddress, - } - continue - } - symIdx := j - 1 - symAddr := k.addrs[symIdx] - offset := ip - symAddr - if offset > maxKallsymsOffset { - out[i] = Frame{ - Address: ip, - Name: fmt.Sprintf("0x%x", ip), - Module: "[kernel.kallsyms]", - Reason: FailureUnknownAddress, - } - continue - } - module := k.modules[symIdx] - if module == "" { - module = "[kernel.kallsyms]" - } - out[i] = Frame{ - Address: ip, - Name: k.names[symIdx], - Module: module, - Offset: offset, - } - } - return out -} diff --git a/symbolize/kallsyms_bench_test.go b/symbolize/kallsyms_bench_test.go deleted file mode 100644 index 43c2fab..0000000 --- a/symbolize/kallsyms_bench_test.go +++ /dev/null @@ -1,108 +0,0 @@ -package symbolize - -import ( - "os" - "path/filepath" - "testing" -) - -// BenchmarkParseKallsymsFresh measures end-to-end /proc/kallsyms -// parse cost. This is the cold-cache path that every perf-agent -// invocation pays on lockdown hosts before iter 6's disk cache -// shipped; the benchmark exists to catch regressions in the -// allocation-free parser (iter 5) and the 256 KiB read buffer -// (iter 3). Skips on hosts where kallsyms is unreadable. -// -// Reference numbers on a Ryzen 9 7940HS / Fedora 44 kernel -// 7.0.8-200, ~225k T/t/W/w/i symbols after filtering: -// ~200 ms/op, 1 alloc-per-symbol (the Name string copy). -func BenchmarkParseKallsymsFresh(b *testing.B) { - if !kallsymsReadable() { - b.Skip("requires kptr_restrict=0") - } - b.ReportAllocs() - for b.Loop() { - s, err := parseKallsymsFresh() - if err != nil { - b.Fatalf("parseKallsymsFresh: %v", err) - } - if len(s.addrs) == 0 { - b.Fatalf("empty result") - } - } -} - -// BenchmarkLoadCachedKallsyms measures the warm-cache path — -// the disk format read + decode. Expected to be ~50x faster -// than BenchmarkParseKallsymsFresh; if the gap closes, the -// cache format or read path has regressed. -func BenchmarkLoadCachedKallsyms(b *testing.B) { - if !kallsymsReadable() { - b.Skip("requires kptr_restrict=0") - } - // Prime the cache once outside the timed loop. - tmpDir := b.TempDir() - cachePath := filepath.Join(tmpDir, "kallsyms.cache") - prevPath := cachePathFn - prevBoot := readBootIDFn - cachePathFn = func() string { return cachePath } - readBootIDFn = func() ([16]byte, error) { return [16]byte{1, 2, 3, 4}, nil } - defer func() { - cachePathFn = prevPath - readBootIDFn = prevBoot - }() - fresh, err := parseKallsymsFresh() - if err != nil { - b.Fatalf("parseKallsymsFresh: %v", err) - } - if err := writeKallsymsCache(fresh); err != nil { - b.Fatalf("writeKallsymsCache: %v", err) - } - if _, err := os.Stat(cachePath); err != nil { - b.Fatalf("cache not written: %v", err) - } - - b.ResetTimer() - b.ReportAllocs() - for b.Loop() { - s, err := loadCachedKallsyms() - if err != nil { - b.Fatalf("loadCachedKallsyms: %v", err) - } - if len(s.addrs) == 0 { - b.Fatalf("empty cache read") - } - } -} - -// BenchmarkResolveKernelIPs measures the per-IP resolve cost -// (binary search + frame construction) at production batch -// size. The path is hot — every BPF kernel sample goes through -// it on lockdown hosts. -func BenchmarkResolveKernelIPs(b *testing.B) { - if !kallsymsReadable() { - b.Skip("requires kptr_restrict=0") - } - k, err := parseKallsymsFresh() - if err != nil { - b.Fatalf("parseKallsymsFresh: %v", err) - } - // Probe addresses spread across the kallsyms range so binary - // search doesn't degenerate to one bucket. - probes := make([]uint64, 0, 32) - step := len(k.addrs) / 32 - if step == 0 { - step = 1 - } - for i := 0; i < len(k.addrs) && len(probes) < 32; i += step { - probes = append(probes, k.addrs[i]+1) // +1 to land inside the function - } - b.ResetTimer() - b.ReportAllocs() - for b.Loop() { - frames := k.Resolve(probes) - if len(frames) != len(probes) { - b.Fatalf("frame count mismatch") - } - } -} diff --git a/symbolize/kallsyms_cache.go b/symbolize/kallsyms_cache.go deleted file mode 100644 index 7c227f1..0000000 --- a/symbolize/kallsyms_cache.go +++ /dev/null @@ -1,293 +0,0 @@ -package symbolize - -import ( - "bufio" - "encoding/binary" - "encoding/hex" - "errors" - "fmt" - "io" - "os" - "path/filepath" - "strings" -) - -// Disk cache for the parsed /proc/kallsyms index. Motivated by -// bench-self iter 6: on lockdown hosts (Secure Boot, -// integrity-locked) every perf-agent invocation has to parse the -// 3M-line kallsyms file from scratch because blazesym's kernel -// source hits EPERM on /proc/kcore. The parse itself is fast -// (allocation-free per iter 5) but the kernel synthesizes the -// file via vsnprintf on each read syscall — that's the floor. -// -// Cache key: the kernel boot_id. Kernel symbol addresses change -// only across reboots (KASLR), so the boot_id is the right -// invalidation signal — drops the cache exactly when it would -// be wrong, never falsely keeps a stale copy. -// -// Format (little-endian): -// -// header: magic u32 | version u32 | boot_id [16]byte | n_syms u32 -// per-symbol: addr u64 | name_len u16 | module_len u16 | name | module -// -// One symbol record averages ~50 bytes; a typical kernel's filtered -// kallsyms (~1.5M kept after the T/t/W/w/i filter) lands at ~70 MiB -// on disk. Small enough to ship in ~/.cache without ceremony. -const ( - kallsymsCacheMagic uint32 = 0x4B414C53 // 'KALS' - kallsymsCacheVersion uint32 = 1 - kallsymsHeaderSize = 4 + 4 + 16 + 4 -) - -// errKallsymsCacheStale signals the cache was produced under a -// different kernel boot_id. Caller must reparse /proc/kallsyms. -var errKallsymsCacheStale = errors.New("kallsyms: cache stale (kernel rebooted)") - -// errKallsymsCacheCorrupt signals an unreadable or wrong-magic file. -// Non-fatal: caller falls back to a fresh parse and the next -// successful parse will overwrite the bad file. -var errKallsymsCacheCorrupt = errors.New("kallsyms: cache corrupt") - -// Indirection seams so tests can pin the cache path + boot_id. -// Production: cachePathFn = kallsymsDefaultCachePath, -// readBootIDFn = readBootID. -var ( - cachePathFn = kallsymsDefaultCachePath - readBootIDFn = readBootID -) - -// kallsymsDefaultCachePath honors $XDG_CACHE_HOME and falls back to -// ~/.cache; final fallback is /tmp so the cache works even for -// daemons with no $HOME set. -func kallsymsDefaultCachePath() string { - return filepath.Join(kallsymsCacheDir(), "kallsyms.cache") -} - -// kallsymsCacheDir returns the directory holding all per-boot -// cache artifacts (kallsyms parse + blazesym-EPERM marker). -func kallsymsCacheDir() string { - base := os.Getenv("XDG_CACHE_HOME") - if base == "" { - if home, err := os.UserHomeDir(); err == nil && home != "" { - base = filepath.Join(home, ".cache") - } else { - base = "/tmp" - } - } - return filepath.Join(base, "perf-agent") -} - -// blazesymEPERMMarkerPath returns the path of the -// "blazesym EPERM'd on this boot" marker. The boot_id is encoded -// in the filename so a reboot invalidates the signal — same -// rationale as the kallsyms cache key. -func blazesymEPERMMarkerPath(bootID [16]byte) string { - return filepath.Join(kallsymsCacheDir(), fmt.Sprintf("blazesym-eperm-%x", bootID)) -} - -// blazesymEPERMMarkerExists reports whether blazesym was observed -// to EPERM on this boot. Best-effort: any error is treated as -// "no marker", so a stat failure can't cause false fast-path -// engagement. -func blazesymEPERMMarkerExists() bool { - bootID, err := readBootIDFn() - if err != nil { - return false - } - _, err = os.Stat(blazesymEPERMMarkerPath(bootID)) - return err == nil -} - -// writeBlazesymEPERMMarker creates the marker file. Best-effort: -// failure is logged but never affects the current symbolize call. -func writeBlazesymEPERMMarker() error { - bootID, err := readBootIDFn() - if err != nil { - return err - } - path := blazesymEPERMMarkerPath(bootID) - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return err - } - // Touch the file; content doesn't matter, only existence. - f, err := os.Create(path) - if err != nil { - return err - } - return f.Close() -} - -// readBootID parses /proc/sys/kernel/random/boot_id (a UUID like -// "12345678-1234-1234-1234-1234567890ab") into raw bytes. -func readBootID() ([16]byte, error) { - var out [16]byte - body, err := os.ReadFile("/proc/sys/kernel/random/boot_id") - if err != nil { - return out, err - } - text := strings.TrimSpace(string(body)) - text = strings.ReplaceAll(text, "-", "") - if len(text) != 32 { - return out, fmt.Errorf("kallsyms: unexpected boot_id length %d", len(text)) - } - b, err := hex.DecodeString(text) - if err != nil { - return out, fmt.Errorf("kallsyms: parse boot_id: %w", err) - } - copy(out[:], b) - return out, nil -} - -// loadCachedKallsyms tries to materialize a kallsymsSymbolizer from -// the on-disk cache. Returns: -// - (s, nil) on success -// - (nil, errKallsymsCacheStale) when boot_id changed -// - (nil, errKallsymsCacheCorrupt) on magic / size mismatch -// - (nil, fs error) on missing file / read failure -// -// The caller treats any error as "fall back to a fresh parse". -func loadCachedKallsyms() (*kallsymsSymbolizer, error) { - path := cachePathFn() - data, err := os.ReadFile(path) - if err != nil { - return nil, err - } - if len(data) < kallsymsHeaderSize { - return nil, errKallsymsCacheCorrupt - } - magic := binary.LittleEndian.Uint32(data[0:4]) - version := binary.LittleEndian.Uint32(data[4:8]) - if magic != kallsymsCacheMagic || version != kallsymsCacheVersion { - return nil, errKallsymsCacheCorrupt - } - var cachedBoot [16]byte - copy(cachedBoot[:], data[8:24]) - currentBoot, err := readBootIDFn() - if err != nil { - return nil, fmt.Errorf("kallsyms: read boot_id: %w", err) - } - if cachedBoot != currentBoot { - return nil, errKallsymsCacheStale - } - nSyms := int(binary.LittleEndian.Uint32(data[24:28])) - - addrs := make([]uint64, nSyms) - names := make([]string, nSyms) - modules := make([]string, nSyms) - modIntern := make(map[string]string, 64) - off := kallsymsHeaderSize - for i := range nSyms { - if off+12 > len(data) { - return nil, errKallsymsCacheCorrupt - } - addrs[i] = binary.LittleEndian.Uint64(data[off : off+8]) - nameLen := int(binary.LittleEndian.Uint16(data[off+8 : off+10])) - modLen := int(binary.LittleEndian.Uint16(data[off+10 : off+12])) - off += 12 - if off+nameLen+modLen > len(data) { - return nil, errKallsymsCacheCorrupt - } - names[i] = string(data[off : off+nameLen]) - off += nameLen - if modLen > 0 { - s := string(data[off : off+modLen]) - if interned, ok := modIntern[s]; ok { - modules[i] = interned - } else { - modIntern[s] = s - modules[i] = s - } - off += modLen - } - } - return &kallsymsSymbolizer{ - addrs: addrs, - names: names, - modules: modules, - }, nil -} - -// writeKallsymsCache serializes s to the cache path. Best-effort: -// errors are returned but callers MUST treat write failure as -// non-fatal (the next run just re-parses). Writes go to a temp -// file and rename for atomicity — partial writes never expose a -// corrupt file to concurrent readers. -func writeKallsymsCache(s *kallsymsSymbolizer) error { - bootID, err := readBootIDFn() - if err != nil { - return err - } - path := cachePathFn() - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return err - } - tmp := path + ".tmp" - f, err := os.Create(tmp) - if err != nil { - return err - } - // On any error after this point: close and unlink the temp. - cleanup := func() { - _ = f.Close() - _ = os.Remove(tmp) - } - bw := bufio.NewWriterSize(f, 256*1024) - if err := writeKallsymsCacheTo(bw, s, bootID); err != nil { - cleanup() - return err - } - if err := bw.Flush(); err != nil { - cleanup() - return err - } - if err := f.Close(); err != nil { - _ = os.Remove(tmp) - return err - } - return os.Rename(tmp, path) -} - -// writeKallsymsCacheTo encodes the header + symbol stream onto w. -// Split out from writeKallsymsCache so tests can verify the -// format without touching disk. -func writeKallsymsCacheTo(w io.Writer, s *kallsymsSymbolizer, bootID [16]byte) error { - var hdr [kallsymsHeaderSize]byte - binary.LittleEndian.PutUint32(hdr[0:4], kallsymsCacheMagic) - binary.LittleEndian.PutUint32(hdr[4:8], kallsymsCacheVersion) - copy(hdr[8:24], bootID[:]) - binary.LittleEndian.PutUint32(hdr[24:28], uint32(len(s.addrs))) - if _, err := w.Write(hdr[:]); err != nil { - return err - } - var symHdr [12]byte - for i, addr := range s.addrs { - name := s.names[i] - mod := s.modules[i] - // Name / module lengths must fit in uint16. Truncate - // pathologically long names rather than fail the whole - // write (no real-world kernel symbol exceeds 65 KiB). - nameLen := len(name) - if nameLen > 0xFFFF { - nameLen = 0xFFFF - } - modLen := len(mod) - if modLen > 0xFFFF { - modLen = 0xFFFF - } - binary.LittleEndian.PutUint64(symHdr[0:8], addr) - binary.LittleEndian.PutUint16(symHdr[8:10], uint16(nameLen)) - binary.LittleEndian.PutUint16(symHdr[10:12], uint16(modLen)) - if _, err := w.Write(symHdr[:]); err != nil { - return err - } - if _, err := io.WriteString(w, name[:nameLen]); err != nil { - return err - } - if modLen > 0 { - if _, err := io.WriteString(w, mod[:modLen]); err != nil { - return err - } - } - } - return nil -} diff --git a/symbolize/kallsyms_cache_test.go b/symbolize/kallsyms_cache_test.go deleted file mode 100644 index 561f3b6..0000000 --- a/symbolize/kallsyms_cache_test.go +++ /dev/null @@ -1,147 +0,0 @@ -package symbolize - -import ( - "errors" - "os" - "path/filepath" - "testing" -) - -// withCacheDir overrides the cache path to a test-local temp dir -// and the boot-id reader to a deterministic value. Returns a -// cleanup func. -func withCacheDir(t *testing.T, bootID [16]byte) (string, func()) { - t.Helper() - dir := t.TempDir() - path := filepath.Join(dir, "kallsyms.cache") - prevPath := cachePathFn - prevBoot := readBootIDFn - cachePathFn = func() string { return path } - readBootIDFn = func() ([16]byte, error) { return bootID, nil } - return path, func() { - cachePathFn = prevPath - readBootIDFn = prevBoot - } -} - -// TestKallsymsCache_Roundtrip writes a synthetic symbolizer to the -// cache, then loads it back, and verifies the loaded copy resolves -// addresses to the same symbols. Sanity check for the binary -// format: addr → name/module pairs preserved exactly, including -// the module intern map's identity invariant (same module text -// resolves to the same Go string). -func TestKallsymsCache_Roundtrip(t *testing.T) { - _, cleanup := withCacheDir(t, [16]byte{1, 2, 3, 4}) - defer cleanup() - - want := &kallsymsSymbolizer{ - addrs: []uint64{0xffffffff80001000, 0xffffffff80002000, 0xffffffffc0001000}, - names: []string{"do_sys_openat2", "vfs_open", "kvm_vcpu_ioctl"}, - modules: []string{"", "", "[kvm]"}, - } - - if err := writeKallsymsCache(want); err != nil { - t.Fatalf("writeKallsymsCache: %v", err) - } - - got, err := loadCachedKallsyms() - if err != nil { - t.Fatalf("loadCachedKallsyms: %v", err) - } - if len(got.addrs) != len(want.addrs) { - t.Fatalf("addrs len = %d, want %d", len(got.addrs), len(want.addrs)) - } - for i := range want.addrs { - if got.addrs[i] != want.addrs[i] { - t.Errorf("addrs[%d] = %#x, want %#x", i, got.addrs[i], want.addrs[i]) - } - if got.names[i] != want.names[i] { - t.Errorf("names[%d] = %q, want %q", i, got.names[i], want.names[i]) - } - if got.modules[i] != want.modules[i] { - t.Errorf("modules[%d] = %q, want %q", i, got.modules[i], want.modules[i]) - } - } - - // Resolution still works: pick the kvm address + 0x42, expect - // the kvm module marker on the resolved frame. - frames := got.Resolve([]uint64{0xffffffffc0001042}) - if frames[0].Module != "[kvm]" { - t.Errorf("Module = %q, want [kvm]", frames[0].Module) - } -} - -// TestKallsymsCache_StaleBootID asserts a cache produced under one -// boot_id is rejected when the current boot_id has changed (reboot). -// Critical for correctness: kernel module addresses change on -// reboot and a stale cache would mis-attribute every kernel frame. -func TestKallsymsCache_StaleBootID(t *testing.T) { - path, cleanup := withCacheDir(t, [16]byte{1, 2, 3, 4}) - defer cleanup() - - if err := writeKallsymsCache(&kallsymsSymbolizer{ - addrs: []uint64{0xffffffff80001000}, - names: []string{"sym"}, - modules: []string{""}, - }); err != nil { - t.Fatalf("writeKallsymsCache: %v", err) - } - if _, err := os.Stat(path); err != nil { - t.Fatalf("cache not written: %v", err) - } - - // Simulate reboot: change the boot_id the loader sees. - readBootIDFn = func() ([16]byte, error) { return [16]byte{9, 9, 9, 9}, nil } - - _, err := loadCachedKallsyms() - if !errors.Is(err, errKallsymsCacheStale) { - t.Errorf("loadCachedKallsyms err = %v, want errKallsymsCacheStale", err) - } -} - -// TestKallsymsCache_Missing covers the "cold cache" case — first -// run on a host. Loader returns an error (not panic), caller falls -// back to a fresh parse. -func TestKallsymsCache_Missing(t *testing.T) { - _, cleanup := withCacheDir(t, [16]byte{1, 2, 3, 4}) - defer cleanup() - - if _, err := loadCachedKallsyms(); err == nil { - t.Fatalf("loadCachedKallsyms returned nil err on missing cache") - } -} - -// TestKallsymsCache_CorruptIsNonFatal asserts a corrupt file is -// detected (wrong magic) and reported as an error rather than -// causing a panic or hang. Hosts with partial writes from killed -// agents would otherwise re-hit the issue on every startup. -func TestKallsymsCache_CorruptIsNonFatal(t *testing.T) { - path, cleanup := withCacheDir(t, [16]byte{1, 2, 3, 4}) - defer cleanup() - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - t.Fatalf("mkdir: %v", err) - } - if err := os.WriteFile(path, []byte("garbage-not-a-cache"), 0o644); err != nil { - t.Fatalf("write garbage: %v", err) - } - if _, err := loadCachedKallsyms(); err == nil { - t.Errorf("loadCachedKallsyms returned nil on corrupt file") - } -} - -// TestReadBootIDLive: smoke test against the real -// /proc/sys/kernel/random/boot_id. Skips if unreadable (e.g., -// CI sandbox). -func TestReadBootIDLive(t *testing.T) { - if _, err := os.Stat("/proc/sys/kernel/random/boot_id"); err != nil { - t.Skip("no /proc/sys/kernel/random/boot_id on this host") - } - b, err := readBootID() - if err != nil { - t.Fatalf("readBootID: %v", err) - } - var zero [16]byte - if b == zero { - t.Errorf("boot_id read as all-zero — parser likely failed") - } -} diff --git a/symbolize/kallsyms_parse_test.go b/symbolize/kallsyms_parse_test.go deleted file mode 100644 index 03fdc0b..0000000 --- a/symbolize/kallsyms_parse_test.go +++ /dev/null @@ -1,85 +0,0 @@ -package symbolize - -import ( - "bytes" - "testing" -) - -// TestParseKallsymsLine_Plain covers the no-module form. -func TestParseKallsymsLine_Plain(t *testing.T) { - line := []byte("ffffffff80c6a050 T __x64_sys_open") - addr, typ, name, module, ok := parseKallsymsLine(line) - if !ok { - t.Fatalf("parse failed") - } - if addr != 0xffffffff80c6a050 { - t.Errorf("addr = %#x", addr) - } - if typ != 'T' { - t.Errorf("typ = %q", typ) - } - if !bytes.Equal(name, []byte("__x64_sys_open")) { - t.Errorf("name = %q", name) - } - if len(module) != 0 { - t.Errorf("module = %q, want empty", module) - } -} - -// TestParseKallsymsLine_WithModule covers the "[modname]" suffix. -func TestParseKallsymsLine_WithModule(t *testing.T) { - line := []byte("ffffffffc0001000 t kvm_vcpu_ioctl [kvm]") - addr, typ, name, module, ok := parseKallsymsLine(line) - if !ok { - t.Fatalf("parse failed") - } - if addr != 0xffffffffc0001000 { - t.Errorf("addr = %#x", addr) - } - if typ != 't' { - t.Errorf("typ = %q", typ) - } - if !bytes.Equal(name, []byte("kvm_vcpu_ioctl")) { - t.Errorf("name = %q", name) - } - if !bytes.Equal(module, []byte("[kvm]")) { - t.Errorf("module = %q", module) - } -} - -// TestParseKallsymsLine_LongAddrAccepted: kallsyms emits 16-hex-digit -// uppercase too on some kernels; parser must not assume case. -func TestParseKallsymsLine_LongAddrAccepted(t *testing.T) { - line := []byte("FFFFFFFF80C6A050 T some_sym") - addr, _, _, _, ok := parseKallsymsLine(line) - if !ok { - t.Fatalf("parse failed") - } - if addr != 0xFFFFFFFF80C6A050 { - t.Errorf("addr = %#x", addr) - } -} - -// TestParseKallsymsLine_Empty / malformed return ok=false. -func TestParseKallsymsLine_Empty(t *testing.T) { - for _, in := range [][]byte{ - nil, - []byte(""), - []byte("not_a_hex_addr"), - []byte("ffffffff80c6a050"), // missing type + name - []byte("ffffffff80c6a050 T"), // missing name - } { - if _, _, _, _, ok := parseKallsymsLine(in); ok { - t.Errorf("parse %q unexpectedly succeeded", in) - } - } -} - -// BenchmarkParseKallsymsLine measures the per-line cost so future -// kallsyms-parser changes can show their improvement (or regression). -func BenchmarkParseKallsymsLine(b *testing.B) { - line := []byte("ffffffffc0001234 T kvm_vcpu_ioctl [kvm]") - for b.Loop() { - _, _, _, _, _ = parseKallsymsLine(line) - } -} diff --git a/symbolize/kallsyms_test.go b/symbolize/kallsyms_test.go deleted file mode 100644 index aed3ad8..0000000 --- a/symbolize/kallsyms_test.go +++ /dev/null @@ -1,136 +0,0 @@ -package symbolize - -import ( - "strings" - "testing" -) - -// TestKallsymsSymbolizerResolveAtOrBelow asserts the binary search -// semantic: an IP that lands in the middle of a function resolves to -// the function's name with the right offset. -func TestKallsymsSymbolizerResolveAtOrBelow(t *testing.T) { - k := &kallsymsSymbolizer{ - addrs: []uint64{0xffffffff81000000, 0xffffffff81000100, 0xffffffff81000200}, - names: []string{"do_sys_openat2", "vfs_open", "tcp_sendmsg"}, - modules: []string{"", "", ""}, - } - frames := k.Resolve([]uint64{ - 0xffffffff81000000, // exact match → do_sys_openat2 - 0xffffffff8100007f, // 0x7f past do_sys_openat2 start → still in do_sys_openat2 - 0xffffffff81000180, // 0x80 past vfs_open start → vfs_open - }) - want := []struct { - name string - offset uint64 - }{ - {"do_sys_openat2", 0x0}, - {"do_sys_openat2", 0x7f}, - {"vfs_open", 0x80}, - } - for i, w := range want { - if frames[i].Name != w.name { - t.Errorf("frame[%d].Name = %q, want %q", i, frames[i].Name, w.name) - } - if frames[i].Offset != w.offset { - t.Errorf("frame[%d].Offset = %#x, want %#x", i, frames[i].Offset, w.offset) - } - if frames[i].Module != "[kernel.kallsyms]" { - t.Errorf("frame[%d].Module = %q, want [kernel.kallsyms]", i, frames[i].Module) - } - } -} - -// TestKallsymsSymbolizerModuleSymbols asserts that module symbols -// retain their module marker (e.g., "[kvm]") in Frame.Module. -func TestKallsymsSymbolizerModuleSymbols(t *testing.T) { - k := &kallsymsSymbolizer{ - addrs: []uint64{0xffffffffc0001000}, - names: []string{"kvm_vcpu_ioctl"}, - modules: []string{"[kvm]"}, - } - frames := k.Resolve([]uint64{0xffffffffc0001042}) - if frames[0].Name != "kvm_vcpu_ioctl" { - t.Errorf("Name = %q, want kvm_vcpu_ioctl", frames[0].Name) - } - if frames[0].Module != "[kvm]" { - t.Errorf("Module = %q, want [kvm]", frames[0].Module) - } - if frames[0].Offset != 0x42 { - t.Errorf("Offset = %#x, want 0x42", frames[0].Offset) - } -} - -// TestKallsymsSymbolizerRejectsWildOffset asserts that an IP that -// lands "obviously too far" past the closest symbol (> 64 KiB) is -// reported as unknown instead of being mis-attributed to a distant -// function. Matches the awk-hack guard rail. -func TestKallsymsSymbolizerRejectsWildOffset(t *testing.T) { - k := &kallsymsSymbolizer{ - addrs: []uint64{0xffffffff81000000}, - names: []string{"do_sys_openat2"}, - modules: []string{""}, - } - // 0x20000 (128 KiB) past the only known symbol → reject. - frames := k.Resolve([]uint64{0xffffffff81020000}) - if frames[0].Name == "do_sys_openat2" { - t.Errorf("wild offset attributed to do_sys_openat2; want raw-hex name") - } - if frames[0].Reason != FailureUnknownAddress { - t.Errorf("Reason = %v, want FailureUnknownAddress", frames[0].Reason) - } -} - -// TestKallsymsSymbolizerBelowFirstSymbol asserts that an IP below the -// lowest symbol address is reported as unknown rather than wrapping -// into the high end of the table. -func TestKallsymsSymbolizerBelowFirstSymbol(t *testing.T) { - k := &kallsymsSymbolizer{ - addrs: []uint64{0xffffffff81000000}, - names: []string{"do_sys_openat2"}, - modules: []string{""}, - } - frames := k.Resolve([]uint64{0xffffffff80ffffff}) - if frames[0].Reason != FailureUnknownAddress { - t.Errorf("Reason = %v, want FailureUnknownAddress (IP below first symbol)", frames[0].Reason) - } -} - -// TestNewKallsymsSymbolizerLive asserts that on a host with -// kptr_restrict=0 the parser produces a non-empty index and resolves -// real kallsyms addresses to named frames. Skips when the host has -// kallsyms restricted. -// -// We don't assert the exact symbol name returned: /proc/kallsyms -// frequently has aliased symbols at the same address (e.g., kernel -// entry points expose both their canonical name and a __pi_* alias), -// and the sort order across aliases is implementation-defined. The -// load-bearing property is that resolution produces *some* valid -// symbol — not "[unknown]" or a raw hex — for an address that came -// from kallsyms itself. -func TestNewKallsymsSymbolizerLive(t *testing.T) { - if !kallsymsReadable() { - t.Skip("requires kptr_restrict=0") - } - k, err := newKallsymsSymbolizer() - if err != nil { - t.Fatalf("newKallsymsSymbolizer: %v", err) - } - if len(k.addrs) == 0 { - t.Fatalf("empty kallsyms index") - } - - addr, _ := pickKnownKernelSymbol(t) - // Probe addr + 1: any aliased symbol at addr is still a valid - // resolution since +1 lands inside whichever function the kernel - // chose to start there. - frames := k.Resolve([]uint64{addr + 1}) - if frames[0].Reason == FailureUnknownAddress { - t.Fatalf("addr+1 resolved as unknown; want named symbol (got %+v)", frames[0]) - } - if strings.HasPrefix(frames[0].Name, "0x") { - t.Fatalf("got raw-hex name %q; want named symbol", frames[0].Name) - } - if frames[0].Offset != 1 { - t.Errorf("offset = %d, want 1 (probe was addr+1)", frames[0].Offset) - } -} diff --git a/symbolize/local_kernel.go b/symbolize/local_kernel.go index 8801587..32b867b 100644 --- a/symbolize/local_kernel.go +++ b/symbolize/local_kernel.go @@ -18,13 +18,13 @@ static blaze_symbolizer_opts make_kernel_opts(_Bool code_info, _Bool inlined_fns // make_kernel_src returns the kernel source blazesym uses by default: // kallsyms=NULL → /proc/kallsyms, vmlinux=NULL → blazesym auto-scans -// /sys/kernel/btf/vmlinux, /boot/vmlinux-*, /proc/kcore, and friends -// for DWARF. On hosts with kernel lockdown=integrity (Secure Boot) one -// of those open() calls returns EACCES — most commonly /proc/kcore, -// which has CAP_SYS_RAWIO + CAP_DAC_READ_SEARCH requirements — and -// blazesym surfaces it as BLAZE_ERR_PERMISSION_DENIED for the whole -// batch. SymbolizeKernel handles this by falling back to a pure-Go -// /proc/kallsyms symbolizer (kallsyms.go). +// /boot/vmlinux-* and /usr/lib/debug/boot/ for DWARF. Since blazesym +// v0.2.4 (commit 987d36c) the KASLR offset — the only thing that +// needed /proc/kcore — is queried lazily and only when a vmlinux DWARF +// resolver is actually present. On the common lockdown=integrity host +// (no /boot/vmlinux DWARF installed) blazesym resolves kallsyms-only +// without ever touching /proc/kcore, so the BLAZE_ERR_PERMISSION_DENIED +// that the old pure-Go fallback existed for no longer occurs. static blaze_symbolize_src_kernel make_kernel_src(void) { blaze_symbolize_src_kernel src; memset(&src, 0, sizeof(src)); @@ -44,7 +44,6 @@ import "C" import ( "bufio" - "errors" "fmt" "os" "strconv" @@ -55,68 +54,31 @@ import ( "unsafe" ) -// errBlazePermissionDenied signals that blazesym returned -// BLAZE_ERR_PERMISSION_DENIED for the kernel source. The -// SymbolizeKernel fallback ladder converts this into a switch to the -// pure-Go /proc/kallsyms symbolizer for the symbolizer's lifetime. -var errBlazePermissionDenied = errors.New("symbolize: blazesym permission denied (kernel lockdown?)") - -// forceFallbackEnv lets operators (and integration tests) force the -// pure-Go /proc/kallsyms fallback without waiting for blazesym to -// fail first. Set PERFAGENT_FORCE_KERNEL_FALLBACK=1 to skip the CGO -// blazesym path on hosts known to be locked down — avoids one wasted -// CGO call per sample batch — and to exercise the fallback in CI on -// hosts that don't naturally hit EPERM. -const forceFallbackEnv = "PERFAGENT_FORCE_KERNEL_FALLBACK" - -// LocalKernelSymbolizer resolves kernel-mode addresses via blazesym, -// with a transparent pure-Go /proc/kallsyms fallback for hosts where -// blazesym can't read its required kernel images (lockdown=integrity, -// Secure Boot, missing CAP_SYS_RAWIO/CAP_DAC_READ_SEARCH). +// LocalKernelSymbolizer resolves kernel-mode addresses via blazesym. // // blazesym path: gives function name + offset + inline expansion + -// source file:line when the host kernel exposes vmlinux DWARF and -// /proc/kcore. Used by default on permissive hosts. -// -// Pure-Go kallsyms path (see kallsyms.go): gives function name + -// offset + module marker only. No DWARF, no inline frames. Sufficient -// for flame graphs and operator decoding — and works under -// lockdown=integrity, which is the common production case. +// source file:line when the host kernel exposes vmlinux DWARF; falls +// back internally to kallsyms-only resolution (name + offset) when no +// vmlinux DWARF is present — including on lockdown=integrity hosts, +// where it no longer needs /proc/kcore (blazesym >= v0.2.4). // -// The fallback decision is sticky: once we've seen -// BLAZE_ERR_PERMISSION_DENIED on this host, every subsequent batch -// goes straight to the pure-Go path. Re-probing blazesym on every -// batch would waste a CGO call per sample. +// If blazesym fails for any reason, SymbolizeKernel preserves the raw +// kernel addresses (Name="0x") so the kernel side of the stack +// still survives into the pprof. type LocalKernelSymbolizer struct { csym *C.blaze_symbolizer closed atomic.Bool mu sync.Mutex - // callBlazesym is the seam under SymbolizeKernel. In production - // it points to invoke (which routes to cgoSymbolize or to the - // pure-Go kallsymsSymbolizer based on useFallback). Tests swap - // it for a stub so the Go-level fallback ladder can be exercised - // without a real blazesym handle and without a real - // /proc/kallsyms read. - callBlazesym func(ips []uint64, useFallback bool) ([]Frame, error) - - // fallback is set once blazesym reports permission-denied on the - // CGO path, or at construction time when forceFallbackEnv is set. - // Subsequent batches skip the failing CGO path and go straight to - // the pure-Go /proc/kallsyms symbolizer. - fallback atomic.Bool - - // kallsymsOnce + kallsymsCache + kallsymsErr lazily build the - // pure-Go /proc/kallsyms index on the first fallback batch and - // reuse it for the symbolizer's lifetime. Parsing is ~3M lines - // of /proc/kallsyms on a typical x86_64 — one-time cost. - kallsymsOnce sync.Once - kallsymsCache *kallsymsSymbolizer - kallsymsErr error + // symbolize is the seam under SymbolizeKernel; in production it + // points to cgoSymbolize. Tests swap it for a stub so the + // raw-address backstop path can be exercised without a real + // blazesym handle. + symbolize func(ips []uint64) ([]Frame, error) - // stats counts observability events (batch counts, fallback - // engagement, raw-address synthesis). Exposed via Stats() for - // end-of-run logging and future /metrics scrape. + // stats counts observability events (batch counts, raw-address + // synthesis, blazesym error buckets). Exposed via Stats() for + // end-of-run logging and the /metrics scrape. stats Counters } @@ -144,33 +106,14 @@ func NewLocalKernelSymbolizer() (*LocalKernelSymbolizer, error) { return nil, fmt.Errorf("blaze_symbolizer_new_opts returned NULL") } s := &LocalKernelSymbolizer{csym: csym} - s.callBlazesym = s.invoke - if os.Getenv(forceFallbackEnv) == "1" { - // Bump the counter so the end-of-run log reflects that we - // ran in fallback mode, matching the semantic when the - // switch happens naturally via EPERM. Without this, the - // forced-fallback case would log fallback_engaged=0 and - // operators couldn't tell the kallsyms path was used. - s.fallback.Store(true) - s.stats.KernelFallbackEngaged.Add(1) - } else if blazesymEPERMMarkerExists() { - // We've seen blazesym EPERM on this boot already. Skip the - // failing attempt — it would just re-read /proc/kallsyms, - // hit EPERM on /proc/kcore (lockdown), and waste ~110ms of - // CPU per agent invocation. Marker is boot_id-scoped, so a - // reboot reverts to attempting blazesym fresh. - s.fallback.Store(true) - s.stats.KernelFallbackEngaged.Add(1) - } + s.symbolize = s.cgoSymbolize return s, nil } -// SymbolizeKernel resolves kernel addresses to frames. On -// BLAZE_ERR_PERMISSION_DENIED from the CGO path, transparently -// switches to the pure-Go /proc/kallsyms symbolizer for the -// symbolizer's remaining lifetime. If even that fails, returns -// raw-address frames (Name="0x", Reason=FailureMissingSymbols) -// so kernel context survives into the pprof. +// SymbolizeKernel resolves kernel addresses to frames via blazesym. +// If blazesym fails, returns raw-address frames (Name="0x", +// Reason=FailureMissingSymbols) so kernel context survives into the +// pprof. func (s *LocalKernelSymbolizer) SymbolizeKernel(ips []uint64) ([]Frame, error) { if s.closed.Load() { return nil, ErrClosed @@ -198,78 +141,23 @@ func (s *LocalKernelSymbolizer) SymbolizeKernel(ips []uint64) ([]Frame, error) { s.stats.KernelBatchHist.Record(uint64(time.Since(t0).Microseconds())) }() - // Sticky fallback: once we've seen permission-denied on the CGO - // path, this host won't recover within the symbolizer's lifetime. - // Skip blazesym on every subsequent batch. - if s.fallback.Load() { - frames, err := s.callBlazesym(ips, true) - if err != nil { - s.stats.KernelBatchFailures.Add(1) - s.stats.KernelRawAddrFrames.Add(uint64(len(ips))) - return rawKernelAddrFrames(ips), nil - } - return frames, nil - } - - frames, err := s.callBlazesym(ips, false) + frames, err := s.symbolize(ips) if err == nil { return frames, nil } - if errors.Is(err, errBlazePermissionDenied) { - if s.fallback.CompareAndSwap(false, true) { - s.stats.KernelFallbackEngaged.Add(1) - // Persist the fact that blazesym EPERM'd on this - // boot so the next perf-agent invocation can skip - // the failing attempt at construction time. Best - // effort — the in-process sticky bit handles the - // rest of this invocation regardless. - _ = writeBlazesymEPERMMarker() - } - frames, err = s.callBlazesym(ips, true) - if err == nil { - return frames, nil - } - } - // Both paths failed — preserve raw kernel addresses so the - // kernel side of the stack survives into the pprof. + // blazesym failed — preserve raw kernel addresses so the kernel + // side of the stack survives into the pprof. s.stats.KernelBatchFailures.Add(1) s.stats.KernelRawAddrFrames.Add(uint64(len(ips))) return rawKernelAddrFrames(ips), nil } -// invoke is the production callBlazesym. useFallback=false routes to -// the CGO blazesym path (full inline + DWARF); useFallback=true -// routes to the pure-Go /proc/kallsyms symbolizer (name+offset only, -// but lockdown-safe). -func (s *LocalKernelSymbolizer) invoke(ips []uint64, useFallback bool) ([]Frame, error) { - if useFallback { - ks, err := s.getKallsymsFallback() - if err != nil { - return nil, err - } - return ks.Resolve(ips), nil - } - return s.cgoSymbolize(ips) -} - -// getKallsymsFallback returns the lazily-built pure-Go /proc/kallsyms -// symbolizer. Built exactly once per LocalKernelSymbolizer lifetime; -// subsequent calls return the cached instance. -func (s *LocalKernelSymbolizer) getKallsymsFallback() (*kallsymsSymbolizer, error) { - s.kallsymsOnce.Do(func() { - s.kallsymsCache, s.kallsymsErr = newKallsymsSymbolizer() - }) - return s.kallsymsCache, s.kallsymsErr -} - -// cgoSymbolize invokes blazesym's kernel source. Returns -// errBlazePermissionDenied on BLAZE_ERR_PERMISSION_DENIED so the -// fallback ladder can switch to the pure-Go path; other blazesym -// errors propagate as wrapped errors. +// cgoSymbolize invokes blazesym's kernel source. // -// Bumps reason-bucketed counters at the error site (roadmap #4) -// so end-of-run logs / future /metrics scrapes can distinguish a -// lockdown host (high KernelLockdownEPERM) from a buggy blazesym +// Bumps reason-bucketed counters at the error site (roadmap #4) so +// end-of-run logs / the /metrics scrape can distinguish a lockdown +// host that still EPERMs (KernelLockdownEPERM — only the narrow +// vmlinux-DWARF-installed case post-v0.2.4) from a buggy blazesym // (any KernelOtherErr at all). func (s *LocalKernelSymbolizer) cgoSymbolize(ips []uint64) ([]Frame, error) { src := C.make_kernel_src() @@ -279,9 +167,9 @@ func (s *LocalKernelSymbolizer) cgoSymbolize(ips []uint64) ([]Frame, error) { errc := C.blaze_err_last() if errc == C.BLAZE_ERR_PERMISSION_DENIED { s.stats.KernelLockdownEPERM.Add(1) - return nil, errBlazePermissionDenied + } else { + s.stats.KernelOtherErr.Add(1) } - s.stats.KernelOtherErr.Add(1) errStr := C.GoString(C.blaze_err_str(errc)) return nil, fmt.Errorf("blaze_symbolize_kernel_abs_addrs: %s (code %d)", errStr, int(errc)) } diff --git a/symbolize/local_kernel_fallback_test.go b/symbolize/local_kernel_fallback_test.go deleted file mode 100644 index 45d6db0..0000000 --- a/symbolize/local_kernel_fallback_test.go +++ /dev/null @@ -1,142 +0,0 @@ -package symbolize - -import ( - "errors" - "fmt" - "testing" -) - -// stubKernelSymbolizer builds a LocalKernelSymbolizer with no CGO state -// so the Go-level fallback ladder in SymbolizeKernel can be exercised -// without a real blazesym handle. The caller's `call` stands in for the -// CGO blazesym invocation: useFallback reflects which kernel source -// variant the production code would have asked for. -func stubKernelSymbolizer(call func(ips []uint64, useFallback bool) ([]Frame, error)) *LocalKernelSymbolizer { - s := &LocalKernelSymbolizer{} - s.callBlazesym = call - return s -} - -// TestSymbolizeKernel_RetriesOnPermissionDenied covers Bug 1: when -// blazesym's default kernel source returns BLAZE_ERR_PERMISSION_DENIED -// (the lockdown=integrity case where on-disk vmlinux candidates are -// unreadable), SymbolizeKernel must retry once asking for the -// kallsyms-only path (vmlinux=""), and surface those frames. -func TestSymbolizeKernel_RetriesOnPermissionDenied(t *testing.T) { - var defaultCalls, fallbackCalls int - s := stubKernelSymbolizer(func(ips []uint64, useFallback bool) ([]Frame, error) { - if !useFallback { - defaultCalls++ - return nil, errBlazePermissionDenied - } - fallbackCalls++ - out := make([]Frame, len(ips)) - for i, ip := range ips { - out[i] = Frame{Address: ip, Name: "stub_sym", Module: "[kernel.kallsyms]"} - } - return out, nil - }) - - frames, err := s.SymbolizeKernel([]uint64{0xffffffff80001000}) - if err != nil { - t.Fatalf("SymbolizeKernel: %v", err) - } - if len(frames) != 1 || frames[0].Name != "stub_sym" { - t.Fatalf("got %+v, want resolved frame from fallback path", frames) - } - if defaultCalls != 1 { - t.Errorf("default-path calls = %d, want 1", defaultCalls) - } - if fallbackCalls != 1 { - t.Errorf("fallback-path calls = %d, want 1", fallbackCalls) - } -} - -// TestSymbolizeKernel_StickyFallback verifies that once SymbolizeKernel -// has observed permission-denied on the default path and switched to -// the fallback, it skips the default path for the symbolizer's -// remaining lifetime — that path is going to fail with the same error -// on the same host, so re-probing it wastes a CGO call per batch. -func TestSymbolizeKernel_StickyFallback(t *testing.T) { - var defaultCalls, fallbackCalls int - s := stubKernelSymbolizer(func(ips []uint64, useFallback bool) ([]Frame, error) { - if !useFallback { - defaultCalls++ - return nil, errBlazePermissionDenied - } - fallbackCalls++ - return []Frame{{Address: ips[0], Name: "ok"}}, nil - }) - - for i := range 3 { - if _, err := s.SymbolizeKernel([]uint64{uint64(0xffffffff80001000) + uint64(i)}); err != nil { - t.Fatalf("batch %d: %v", i, err) - } - } - if defaultCalls != 1 { - t.Errorf("default-path calls = %d, want 1 (sticky after first EPERM)", defaultCalls) - } - if fallbackCalls != 3 { - t.Errorf("fallback-path calls = %d, want 3", fallbackCalls) - } -} - -// TestSymbolizeKernel_RawAddressesOnTotalFailure covers Bug 2: when -// both the default and fallback blazesym paths fail, SymbolizeKernel -// must synthesize Frames with the raw kernel address rendered as -// "0x" in Name and Reason=FailureMissingSymbols, so the kernel -// portion of the stack survives into the pprof. Previously the whole -// batch was discarded, dropping kernel context entirely. -func TestSymbolizeKernel_RawAddressesOnTotalFailure(t *testing.T) { - s := stubKernelSymbolizer(func(ips []uint64, useFallback bool) ([]Frame, error) { - return nil, errors.New("blazesym total failure") - }) - - ips := []uint64{0xffffffff80001234, 0xffffffff80005678} - frames, err := s.SymbolizeKernel(ips) - if err != nil { - t.Fatalf("SymbolizeKernel: expected nil err with raw fallback, got %v", err) - } - if len(frames) != len(ips) { - t.Fatalf("got %d frames, want %d", len(frames), len(ips)) - } - for i, f := range frames { - wantName := fmt.Sprintf("0x%x", ips[i]) - if f.Name != wantName { - t.Errorf("frame[%d].Name = %q, want %q", i, f.Name, wantName) - } - if f.Module != "[kernel.kallsyms]" { - t.Errorf("frame[%d].Module = %q, want [kernel.kallsyms]", i, f.Module) - } - if f.Reason != FailureMissingSymbols { - t.Errorf("frame[%d].Reason = %v, want FailureMissingSymbols", i, f.Reason) - } - if f.Address != ips[i] { - t.Errorf("frame[%d].Address = %#x, want %#x", i, f.Address, ips[i]) - } - } -} - -// TestSymbolizeKernel_DefaultPathSucceedsNoRetry confirms the happy -// path: when the default blazesym call succeeds, the fallback is not -// consulted and the symbolizer stays out of sticky-fallback mode. -func TestSymbolizeKernel_DefaultPathSucceedsNoRetry(t *testing.T) { - var defaultCalls, fallbackCalls int - s := stubKernelSymbolizer(func(ips []uint64, useFallback bool) ([]Frame, error) { - if useFallback { - fallbackCalls++ - t.Fatalf("fallback path called unexpectedly") - } - defaultCalls++ - return []Frame{{Address: ips[0], Name: "ok"}}, nil - }) - if _, err := s.SymbolizeKernel([]uint64{0xffffffff80001000}); err != nil { - t.Fatalf("SymbolizeKernel: %v", err) - } - if _, err := s.SymbolizeKernel([]uint64{0xffffffff80002000}); err != nil { - t.Fatalf("second batch: %v", err) - } - if defaultCalls != 2 || fallbackCalls != 0 { - t.Errorf("default=%d fallback=%d, want 2 / 0", defaultCalls, fallbackCalls) - } -} diff --git a/symbolize/stats.go b/symbolize/stats.go index 9f3bdd7..17c8448 100644 --- a/symbolize/stats.go +++ b/symbolize/stats.go @@ -12,11 +12,11 @@ import ( // Why: under kernel lockdown=integrity the v1.2.0 M1 symbolizer // silently dropped every kernel frame for the lifetime of the agent // — nothing surfaced the problem to operators. These counters make -// "blazesym broke, fallback engaged" / "frames dropped to raw-hex" -// observable so a self-profile lane or /metrics scrape can alert. +// "blazesym broke" / "frames dropped to raw-hex" observable so a +// self-profile lane or /metrics scrape can alert. type Counters struct { // KernelBatches is the number of SymbolizeKernel calls that - // reached the blazesym/fallback layer (after the empty-input + // reached the blazesym layer (after the empty-input // short-circuit). KernelBatches atomic.Uint64 @@ -24,21 +24,14 @@ type Counters struct { // SymbolizeKernel across all batches. KernelInputIPs atomic.Uint64 - // KernelBatchFailures is the number of batches where every - // symbolizer (blazesym + kallsyms fallback) returned an error, - // forcing the raw-address synthesis path. + // KernelBatchFailures is the number of batches where blazesym + // returned an error, forcing the raw-address synthesis path. KernelBatchFailures atomic.Uint64 - // KernelFallbackEngaged is 1 once the symbolizer has switched - // from the blazesym path to the pure-Go /proc/kallsyms path - // (sticky for the symbolizer's lifetime). Non-zero is the - // canary for lockdown=integrity hosts. - KernelFallbackEngaged atomic.Uint64 - // KernelRawAddrFrames is the cumulative count of kernel IPs // that fell to the raw-address synthesis path (Frame.Name = - // "0x"). High values mean blazesym + kallsyms both failed - // — likely a configuration problem on the host. + // "0x"). High values mean blazesym failed — likely a + // configuration problem on the host. KernelRawAddrFrames atomic.Uint64 // Reason-bucketed counters for batch-level blazesym failures. @@ -47,8 +40,10 @@ type Counters struct { // throwing some other error" without re-instrumenting. // // KernelLockdownEPERM bumps each time blazesym returns - // BLAZE_ERR_PERMISSION_DENIED — high values + matching - // KernelFallbackEngaged=1 is the canonical lockdown signature. + // BLAZE_ERR_PERMISSION_DENIED. Post-v0.2.4 this should only fire + // on the narrow lockdown + /boot/vmlinux-DWARF-installed host + // class; a steady stream of these means kernel frames are + // dropping to raw addresses and the host needs attention. KernelLockdownEPERM atomic.Uint64 // KernelOtherErr bumps when blazesym returns a non-EPERM @@ -71,37 +66,35 @@ type Counters struct { // individual fields atomically, not the struct as a whole — fine for // observability reads). type CountersSnapshot struct { - KernelBatches uint64 - KernelInputIPs uint64 - KernelBatchFailures uint64 - KernelFallbackEngaged uint64 - KernelRawAddrFrames uint64 - KernelLockdownEPERM uint64 - KernelOtherErr uint64 - KernelBatchHist LatencyHistSnapshot + KernelBatches uint64 + KernelInputIPs uint64 + KernelBatchFailures uint64 + KernelRawAddrFrames uint64 + KernelLockdownEPERM uint64 + KernelOtherErr uint64 + KernelBatchHist LatencyHistSnapshot } // Snapshot returns the current counter values as a plain struct. func (c *Counters) Snapshot() CountersSnapshot { return CountersSnapshot{ - KernelBatches: c.KernelBatches.Load(), - KernelInputIPs: c.KernelInputIPs.Load(), - KernelBatchFailures: c.KernelBatchFailures.Load(), - KernelFallbackEngaged: c.KernelFallbackEngaged.Load(), - KernelRawAddrFrames: c.KernelRawAddrFrames.Load(), - KernelLockdownEPERM: c.KernelLockdownEPERM.Load(), - KernelOtherErr: c.KernelOtherErr.Load(), - KernelBatchHist: c.KernelBatchHist.Snapshot(), + KernelBatches: c.KernelBatches.Load(), + KernelInputIPs: c.KernelInputIPs.Load(), + KernelBatchFailures: c.KernelBatchFailures.Load(), + KernelRawAddrFrames: c.KernelRawAddrFrames.Load(), + KernelLockdownEPERM: c.KernelLockdownEPERM.Load(), + KernelOtherErr: c.KernelOtherErr.Load(), + KernelBatchHist: c.KernelBatchHist.Snapshot(), } } // String formats the snapshot as a one-line log message — emitted at -// agent shutdown so operators see fallback engagement and frame -// drops without having to add a metrics scrape. +// agent shutdown so operators see frame drops without having to add a +// metrics scrape. func (s CountersSnapshot) String() string { return fmt.Sprintf( - "symbolize: batches=%d input_ips=%d batch_failures=%d fallback_engaged=%d raw_addr_frames=%d eperm=%d other_err=%d batch_p50_us=%d batch_p99_us=%d batch_max_us=%d", - s.KernelBatches, s.KernelInputIPs, s.KernelBatchFailures, s.KernelFallbackEngaged, s.KernelRawAddrFrames, + "symbolize: batches=%d input_ips=%d batch_failures=%d raw_addr_frames=%d eperm=%d other_err=%d batch_p50_us=%d batch_p99_us=%d batch_max_us=%d", + s.KernelBatches, s.KernelInputIPs, s.KernelBatchFailures, s.KernelRawAddrFrames, s.KernelLockdownEPERM, s.KernelOtherErr, s.KernelBatchHist.P50Us, s.KernelBatchHist.P99Us, s.KernelBatchHist.MaxUs, ) diff --git a/symbolize/stats_test.go b/symbolize/stats_test.go index 1861b4a..862d385 100644 --- a/symbolize/stats_test.go +++ b/symbolize/stats_test.go @@ -6,34 +6,40 @@ import ( "testing" ) +// stubKernelSymbolizer builds a LocalKernelSymbolizer whose blazesym +// seam is replaced with call, so the raw-address backstop and stats +// bookkeeping can be exercised without a real blazesym handle. +func stubKernelSymbolizer(call func(ips []uint64) ([]Frame, error)) *LocalKernelSymbolizer { + s := &LocalKernelSymbolizer{} + s.symbolize = call + return s +} + // TestCounters_SnapshotZero asserts a freshly-constructed Counters // reports all-zeros. func TestCounters_SnapshotZero(t *testing.T) { var c Counters s := c.Snapshot() if s.KernelBatches != 0 || s.KernelBatchFailures != 0 || - s.KernelFallbackEngaged != 0 || s.KernelRawAddrFrames != 0 || - s.KernelInputIPs != 0 { + s.KernelRawAddrFrames != 0 || s.KernelInputIPs != 0 { t.Errorf("zero snapshot non-zero: %+v", s) } } // TestCounters_StringContainsBumpedFields asserts the human-readable // String() form surfaces every counter that's been bumped — used for -// the end-of-run log line so operators see fallback engagement and -// failure counts without having to add a /metrics scrape. +// the end-of-run log line so operators see failure counts without +// having to add a /metrics scrape. func TestCounters_StringContainsBumpedFields(t *testing.T) { var c Counters c.KernelBatches.Add(5) - c.KernelFallbackEngaged.Add(1) c.KernelRawAddrFrames.Add(42) c.KernelLockdownEPERM.Add(7) c.KernelOtherErr.Add(2) out := c.Snapshot().String() for _, want := range []string{ - "batches=5", "fallback_engaged=1", "raw_addr_frames=42", - "eperm=7", "other_err=2", + "batches=5", "raw_addr_frames=42", "eperm=7", "other_err=2", } { if !strings.Contains(out, want) { t.Errorf("snapshot string missing %q: %s", want, out) @@ -45,7 +51,7 @@ func TestCounters_StringContainsBumpedFields(t *testing.T) { // successful blazesym batch the input-IPs and batches counters move, // nothing else. func TestLocalKernelSymbolizer_StatsHappyPath(t *testing.T) { - s := stubKernelSymbolizer(func(ips []uint64, useFallback bool) ([]Frame, error) { + s := stubKernelSymbolizer(func(ips []uint64) ([]Frame, error) { return []Frame{{Address: ips[0], Name: "ok"}}, nil }) _, _ = s.SymbolizeKernel([]uint64{0xffffffff80001000, 0xffffffff80002000}) @@ -56,66 +62,28 @@ func TestLocalKernelSymbolizer_StatsHappyPath(t *testing.T) { if got.KernelInputIPs != 2 { t.Errorf("KernelInputIPs = %d, want 2", got.KernelInputIPs) } - if got.KernelFallbackEngaged != 0 { - t.Errorf("KernelFallbackEngaged = %d, want 0", got.KernelFallbackEngaged) - } if got.KernelRawAddrFrames != 0 { t.Errorf("KernelRawAddrFrames = %d, want 0", got.KernelRawAddrFrames) } } -// TestLocalKernelSymbolizer_StatsFallbackEngages asserts the -// fallback_engaged counter bumps exactly once when the default path -// first returns permission-denied, and stays at 1 on subsequent -// batches (sticky). -func TestLocalKernelSymbolizer_StatsFallbackEngages(t *testing.T) { - s := stubKernelSymbolizer(func(ips []uint64, useFallback bool) ([]Frame, error) { - if !useFallback { - return nil, errBlazePermissionDenied - } - return []Frame{{Address: ips[0], Name: "ok"}}, nil - }) - for i := range 3 { - _, _ = s.SymbolizeKernel([]uint64{uint64(0xffffffff80001000) + uint64(i)}) - } - got := s.Stats() - if got.KernelFallbackEngaged != 1 { - t.Errorf("KernelFallbackEngaged = %d, want 1 (sticky)", got.KernelFallbackEngaged) - } - if got.KernelBatches != 3 { - t.Errorf("KernelBatches = %d, want 3", got.KernelBatches) - } -} - -// TestLocalKernelSymbolizer_StatsForcedFallbackBumpsCounter asserts -// that pre-seeding fallback mode (the PERFAGENT_FORCE_KERNEL_FALLBACK=1 -// path) also marks fallback_engaged > 0 — without this, the -// end-of-run log would say fallback_engaged=0 even when the -// symbolizer ran entirely on the kallsyms path, leaving operators -// unable to tell which mode produced their pprof. -func TestLocalKernelSymbolizer_StatsForcedFallbackBumpsCounter(t *testing.T) { - s := stubKernelSymbolizer(func(ips []uint64, useFallback bool) ([]Frame, error) { - return []Frame{{Address: ips[0], Name: "ok"}}, nil - }) - // Simulate constructor-time forced fallback. - s.fallback.Store(true) - s.stats.KernelFallbackEngaged.Add(1) - _, _ = s.SymbolizeKernel([]uint64{0xffffffff80001000}) - got := s.Stats() - if got.KernelFallbackEngaged != 1 { - t.Errorf("KernelFallbackEngaged = %d, want 1 (forced-fallback)", got.KernelFallbackEngaged) - } -} - -// TestLocalKernelSymbolizer_StatsRawAddrFramesOnTotalFailure asserts -// that when both blazesym and the fallback fail, raw_addr_frames -// reflects the number of IPs that fell to the raw-hex path. -func TestLocalKernelSymbolizer_StatsRawAddrFramesOnTotalFailure(t *testing.T) { - s := stubKernelSymbolizer(func(ips []uint64, useFallback bool) ([]Frame, error) { +// TestLocalKernelSymbolizer_StatsRawAddrFramesOnFailure asserts that +// when blazesym fails, the IPs fall to the raw-hex backstop and both +// raw_addr_frames and batch_failures reflect it. +func TestLocalKernelSymbolizer_StatsRawAddrFramesOnFailure(t *testing.T) { + s := stubKernelSymbolizer(func(ips []uint64) ([]Frame, error) { return nil, errors.New("blazesym broken") }) ips := []uint64{0xffffffff80001000, 0xffffffff80002000, 0xffffffff80003000} - _, _ = s.SymbolizeKernel(ips) + frames, _ := s.SymbolizeKernel(ips) + if len(frames) != len(ips) { + t.Fatalf("got %d frames, want %d", len(frames), len(ips)) + } + for i, f := range frames { + if f.Reason != FailureMissingSymbols || f.Address != ips[i] { + t.Errorf("frame %d = %+v, want raw-addr backstop for %#x", i, f, ips[i]) + } + } got := s.Stats() if got.KernelRawAddrFrames != uint64(len(ips)) { t.Errorf("KernelRawAddrFrames = %d, want %d", got.KernelRawAddrFrames, len(ips)) diff --git a/test/integration_test.go b/test/integration_test.go index e74437b..16c764e 100644 --- a/test/integration_test.go +++ b/test/integration_test.go @@ -1913,67 +1913,6 @@ func countDistinctNonSentinelPIDsInPerfData(body []byte) int { return distinct } -// TestKernelStackResolution_ForcedFallback covers the kernel-lockdown -// path: on hosts with lockdown=integrity, blazesym's default kernel -// source fails with BLAZE_ERR_PERMISSION_DENIED and SymbolizeKernel -// retries against the kallsyms-only source (vmlinux=""). This test -// pins the fallback path on every host by setting -// PERFAGENT_FORCE_KERNEL_FALLBACK=1, so a regression in the fallback -// implementation surfaces in normal CI without needing an actual -// locked-down kernel. -// -// Also explicitly verifies that --pid mode (not --all) resolves -// kernel symbols — kernel-mode resolution is a per-symbolizer property -// and should not depend on system-wide capture scope. -func TestKernelStackResolution_ForcedFallback(t *testing.T) { - t.Helper() - requireBPFRunnable(t, getAgentPath(t)) - - if !readKptrRestrictZero() { - t.Skip("requires kptr_restrict=0 (the fallback path resolves via /proc/kallsyms)") - } - - bin := getAgentPath(t) - - cmd, cleanup := spawnIoBoundWorkload(t) - defer cleanup() - - out := filepath.Join(t.TempDir(), "profile.pb.gz") - agent := exec.Command(bin, - "--profile", - "--kernel-stacks", - "--pid", strconv.Itoa(cmd.Process.Pid), - "--duration", "3s", - "--profile-output", out, - ) - // Force the kallsyms-only path so this test always exercises the - // fallback branch, regardless of whether the running kernel has - // lockdown=integrity. - agent.Env = append(os.Environ(), "PERFAGENT_FORCE_KERNEL_FALLBACK=1") - agent.Stdout = os.Stdout - agent.Stderr = os.Stderr - if err := agent.Run(); err != nil { - t.Fatalf("perf-agent run: %v", err) - } - - p := parseProfile(t, out) - got := map[string]bool{} - for _, fn := range p.Function { - got[fn.Name] = true - } - - // The fallback path must still surface kernel symbols. Use the - // same regex as TestKernelStackResolution: common syscall / VFS / - // scheduler / TCP entry points produced by the io_bound workload. - kernelRe := regexp.MustCompile(`^(do_sys_|ksys_|__x64_sys_|vfs_|__schedule|read_|sock_|tcp_)`) - for name := range got { - if kernelRe.MatchString(name) { - return - } - } - t.Fatalf("forced-fallback path produced no kernel symbol; got: %v", sortedKeys(got)) -} - // spawnIoBoundWorkload starts the Go io_bound workload (heavy /dev/zero reads // → frequent syscall/kernel frames) and returns the running command plus a // cleanup func that kills it.