From 264c8dc8c3bfa3f76cc80bd92622e2d660a5782d Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Wed, 13 May 2026 16:50:04 -0700 Subject: [PATCH 1/5] feat(fc): drain virtio-balloon free-page-hinting before pause Drains virtio-balloon free-page-hinting before pause so snapshots don't capture pages the guest already considers free. Balloon install gated by free-page-hinting-install (bool LD flag); kernel-side eligibility targeted via the LD context (kernel/FC version). On pause we call start_balloon_hinting(acknowledge_on_stop=true) and poll describe_balloon_hinting until host_cmd == DONE, gated by free-page-hinting-timeout-ms (int LD flag, ms; 0 = disabled). Hot path: post-pause we trigger an FC metrics flush but don't wait for the reader, trading per-pause counter precision for pause latency. Includes cmd/resume-build -fph-bench and scripts/bench-fph.sh for offline FPR vs FPR+FPH comparison. --- .../orchestrator/cmd/create-build/main.go | 7 + .../cmd/resume-build/fph_bench.go | 331 ++++++++++++++++++ .../orchestrator/cmd/resume-build/main.go | 51 ++- packages/orchestrator/go.mod | 2 +- .../orchestrator/pkg/sandbox/fc/client.go | 43 ++- .../pkg/sandbox/fc/drain_balloon_test.go | 33 ++ .../orchestrator/pkg/sandbox/fc/fc_metrics.go | 97 ++++- .../pkg/sandbox/fc/fc_metrics_test.go | 37 ++ .../orchestrator/pkg/sandbox/fc/fph_gates.go | 17 + .../orchestrator/pkg/sandbox/fc/process.go | 83 ++++- packages/orchestrator/pkg/sandbox/sandbox.go | 36 ++ packages/orchestrator/scripts/bench-fph.sh | 64 ++++ .../shared/pkg/fcversion/sandbox_features.go | 4 + packages/shared/pkg/featureflags/flags.go | 12 +- 14 files changed, 797 insertions(+), 20 deletions(-) create mode 100644 packages/orchestrator/cmd/resume-build/fph_bench.go create mode 100644 packages/orchestrator/pkg/sandbox/fc/drain_balloon_test.go create mode 100644 packages/orchestrator/pkg/sandbox/fc/fc_metrics_test.go create mode 100644 packages/orchestrator/pkg/sandbox/fc/fph_gates.go create mode 100755 packages/orchestrator/scripts/bench-fph.sh diff --git a/packages/orchestrator/cmd/create-build/main.go b/packages/orchestrator/cmd/create-build/main.go index d67097e39f..79af6dcb2d 100644 --- a/packages/orchestrator/cmd/create-build/main.go +++ b/packages/orchestrator/cmd/create-build/main.go @@ -76,6 +76,11 @@ func main() { log.Fatal("-to-build required") } + // Bake free-page-hinting into the balloon device at boot. The install bit + // is read from the offline LD store at the boot-time Create call and + // cannot be added on resume, so it must be flipped before builder.Build. + featureflags.NewBoolFlag("free-page-hinting-install", true) + // Suppress other noisy output unless verbose, but keep std log for fatal errors if !*verbose { cmdutil.SuppressNoisyLogsKeepStdLog() @@ -363,6 +368,8 @@ func doBuild( ReadyCmd: readyCmd, KernelVersion: kernel, FirecrackerVersion: fc, + FreePageReporting: true, + TeamID: "local", Steps: steps, } diff --git a/packages/orchestrator/cmd/resume-build/fph_bench.go b/packages/orchestrator/cmd/resume-build/fph_bench.go new file mode 100644 index 0000000000..ee8525ca13 --- /dev/null +++ b/packages/orchestrator/cmd/resume-build/fph_bench.go @@ -0,0 +1,331 @@ +package main + +import ( + "context" + "errors" + "fmt" + "math" + "os" + "path/filepath" + "time" + + "github.com/google/uuid" + + "github.com/e2b-dev/infra/packages/orchestrator/cmd/internal/cmdutil" + "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox" + "github.com/e2b-dev/infra/packages/shared/pkg/featureflags" + "github.com/e2b-dev/infra/packages/shared/pkg/storage" +) + +type fphBenchOptions struct { + enabled bool + workload string + iterations int + delay time.Duration +} + +const fphBenchDrainTimeout = 5 * time.Second + +type fphBenchSample struct { + memfileBytes int64 + hintCount uint64 + hintBytes uint64 + reportCount uint64 + reportBytes uint64 + resume time.Duration + pause time.Duration + err error +} + +// fphBench runs the workload N times under FPR-only and N times under FPR+FPH, +// uploading each snapshot to local storage and measuring the resulting memfile +// data size. Smaller is better. +func (r *runner) fphBench(ctx context.Context, opts fphBenchOptions) error { + if opts.iterations <= 0 { + return errors.New("fph-bench: iterations must be > 0") + } + + fmt.Printf("📊 FPH bench (%d iterations per arm)\n", opts.iterations) + fmt.Printf(" Workload: %s\n", opts.workload) + if opts.delay > 0 { + fmt.Printf(" Pause delay: %s (lets continuous FPR settle before drain)\n", opts.delay) + } + fmt.Println() + + noFph := make([]fphBenchSample, 0, opts.iterations) + withFph := make([]fphBenchSample, 0, opts.iterations) + + for i := range opts.iterations { + if ctx.Err() != nil { + break + } + fmt.Printf("[%d/%d] FPR-only ... ", i+1, opts.iterations) + s := r.fphBenchOnce(ctx, opts, false) + noFph = append(noFph, s) + fmt.Println(fphBenchSampleString(s)) + + if ctx.Err() != nil { + break + } + fmt.Printf("[%d/%d] FPR + FPH ... ", i+1, opts.iterations) + s = r.fphBenchOnce(ctx, opts, true) + withFph = append(withFph, s) + fmt.Println(fphBenchSampleString(s)) + } + + fmt.Println() + printFphBenchSummary("FPR-only", noFph) + printFphBenchSummary("FPR + FPH", withFph) + printFphBenchDelta(noFph, withFph) + checkFphActuallyHinted(withFph) + + return firstErr(noFph, withFph) +} + +// checkFphActuallyHinted warns if the FPR+FPH arm never observed any FPH +// activity — the drain regressed or the workload froze nothing. +func checkFphActuallyHinted(samples []fphBenchSample) { + ok := successfulSamples(samples) + if len(ok) == 0 { + return + } + for _, s := range ok { + if s.hintCount > 0 { + return + } + } + fmt.Println() + fmt.Println(" ⚠️ FPR+FPH arm reported hint_count=0 across all iterations.") + fmt.Println(" Either DrainBalloon isn't actually driving an FPH cycle, or the") + fmt.Println(" workload didn't free anything. Try a heavier workload (the default") + fmt.Println(" frees ~256 MiB) or check the trace span 'drain-balloon'.") +} + +// fphBenchOnce runs one resume → workload → optional delay → pause cycle. +func (r *runner) fphBenchOnce(ctx context.Context, opts fphBenchOptions, withFph bool) fphBenchSample { + if withFph { + featureflags.NewBoolFlag("free-page-hinting-install", true) + featureflags.NewIntFlag("free-page-hinting-timeout-ms", int(fphBenchDrainTimeout/time.Millisecond)) + } else { + featureflags.NewBoolFlag("free-page-hinting-install", false) + featureflags.NewIntFlag("free-page-hinting-timeout-ms", 0) + } + + buildID := uuid.New().String() + runtime := sandbox.RuntimeMetadata{ + TemplateID: r.buildID, + TeamID: "local", + SandboxID: fmt.Sprintf("fph-bench-%d", time.Now().UnixNano()), + ExecutionID: fmt.Sprintf("fph-bench-exec-%d", time.Now().UnixNano()), + } + + t0 := time.Now() + sbx, err := r.factory.ResumeSandbox(ctx, r.tmpl, r.sbxConfig, runtime, t0, t0.Add(24*time.Hour), nil) + resumeDur := time.Since(t0) + if err != nil { + return fphBenchSample{resume: resumeDur, err: fmt.Errorf("resume: %w", err)} + } + defer sbx.Close(context.WithoutCancel(ctx)) + + if err := runCommandInSandbox(ctx, sbx, opts.workload); err != nil { + return fphBenchSample{resume: resumeDur, err: fmt.Errorf("workload: %w", err)} + } + + if opts.delay > 0 { + select { + case <-ctx.Done(): + return fphBenchSample{resume: resumeDur, err: ctx.Err()} + case <-time.After(opts.delay): + } + } + + origMeta, err := r.tmpl.Metadata() + if err != nil { + return fphBenchSample{resume: resumeDur, err: fmt.Errorf("metadata: %w", err)} + } + newMeta := origMeta + newMeta.Template.BuildID = buildID + + pauseStart := time.Now() + snapshot, err := sbx.Pause(ctx, newMeta, sandbox.SnapshotUseCasePause) + pauseDur := time.Since(pauseStart) + if err != nil { + return fphBenchSample{resume: resumeDur, pause: pauseDur, err: fmt.Errorf("pause: %w", err)} + } + defer snapshot.Close(context.WithoutCancel(ctx)) + + balloon, _ := sbx.FlushAndReadBalloonMetrics(ctx) + + upload, err := sandbox.NewUpload(ctx, nil, snapshot, r.storage, storage.CompressConfig{}, nil, "", nil) + if err != nil { + return fphBenchSample{resume: resumeDur, pause: pauseDur, err: fmt.Errorf("upload prepare: %w", err)} + } + if err := upload.Run(ctx); err != nil { + return fphBenchSample{resume: resumeDur, pause: pauseDur, err: fmt.Errorf("upload: %w", err)} + } + + memfileBytes, err := readLocalMemfileSize(buildID) + if err != nil { + return fphBenchSample{resume: resumeDur, pause: pauseDur, err: fmt.Errorf("memfile size: %w", err)} + } + + cleanupLocalBuild(buildID) + + return fphBenchSample{ + memfileBytes: memfileBytes, + hintCount: balloon.HintCount, + hintBytes: balloon.HintFreed, + reportCount: balloon.ReportCount, + reportBytes: balloon.ReportFreed, + resume: resumeDur, + pause: pauseDur, + } +} + +func readLocalMemfileSize(buildID string) (int64, error) { + basePath := os.Getenv("LOCAL_TEMPLATE_STORAGE_BASE_PATH") + if basePath == "" { + return 0, errors.New("LOCAL_TEMPLATE_STORAGE_BASE_PATH not set; -fph-bench requires local storage") + } + path := filepath.Join(basePath, buildID, storage.MemfileName) + + return cmdutil.GetActualFileSize(path) +} + +func cleanupLocalBuild(buildID string) { + basePath := os.Getenv("LOCAL_TEMPLATE_STORAGE_BASE_PATH") + if basePath == "" { + return + } + _ = os.RemoveAll(filepath.Join(basePath, buildID)) +} + +func fphBenchSampleString(s fphBenchSample) string { + if s.err != nil { + return fmt.Sprintf("❌ %v", s.err) + } + + return fmt.Sprintf("memfile=%s fpr=%d/%s fph=%d/%s (resume %s, pause %s)", + fmtBytes(s.memfileBytes), + s.reportCount, fmtBytes(int64(s.reportBytes)), + s.hintCount, fmtBytes(int64(s.hintBytes)), + fmtDur(s.resume), fmtDur(s.pause)) +} + +func printFphBenchSummary(label string, samples []fphBenchSample) { + ok := successfulSamples(samples) + if len(ok) == 0 { + fmt.Printf(" %-9s: no successful samples\n", label) + + return + } + + bytesAvg, bytesStd := meanStd(intSlice(ok, func(s fphBenchSample) int64 { return s.memfileBytes })) + pauseAvg, _ := meanStd(intSlice(ok, func(s fphBenchSample) int64 { return int64(s.pause) })) + fphAvg, _ := meanStd(intSlice(ok, func(s fphBenchSample) int64 { return int64(s.hintBytes) })) + fprAvg, _ := meanStd(intSlice(ok, func(s fphBenchSample) int64 { return int64(s.reportBytes) })) + + fmt.Printf(" %-9s: memfile %s ± %s fpr_freed %s fph_freed %s pause avg %s (n=%d)\n", + label, + fmtBytes(int64(bytesAvg)), fmtBytes(int64(bytesStd)), + fmtBytes(int64(fprAvg)), + fmtBytes(int64(fphAvg)), + fmtDur(time.Duration(pauseAvg)), + len(ok)) +} + +func printFphBenchDelta(noFph, withFph []fphBenchSample) { + a := successfulSamples(noFph) + b := successfulSamples(withFph) + if len(a) == 0 || len(b) == 0 { + return + } + + noFphAvg, _ := meanStd(intSlice(a, func(s fphBenchSample) int64 { return s.memfileBytes })) + withFphAvg, _ := meanStd(intSlice(b, func(s fphBenchSample) int64 { return s.memfileBytes })) + + delta := noFphAvg - withFphAvg + pct := 0.0 + if noFphAvg > 0 { + pct = delta / noFphAvg * 100 + } + + pauseNoFph, _ := meanStd(intSlice(a, func(s fphBenchSample) int64 { return int64(s.pause) })) + pauseWithFph, _ := meanStd(intSlice(b, func(s fphBenchSample) int64 { return int64(s.pause) })) + pauseDelta := pauseWithFph - pauseNoFph + + fmt.Println() + fmt.Printf(" FPH freed extra: %s (%.1f%% of FPR-only memfile)\n", fmtBytes(int64(delta)), pct) + fmt.Printf(" Pause overhead : %s\n", fmtDur(time.Duration(pauseDelta))) +} + +func successfulSamples(samples []fphBenchSample) []fphBenchSample { + out := samples[:0:0] + for _, s := range samples { + if s.err == nil { + out = append(out, s) + } + } + + return out +} + +func intSlice[T any](items []T, f func(T) int64) []int64 { + out := make([]int64, len(items)) + for i, x := range items { + out[i] = f(x) + } + + return out +} + +func meanStd(xs []int64) (mean, std float64) { + if len(xs) == 0 { + return 0, 0 + } + var sum float64 + for _, x := range xs { + sum += float64(x) + } + mean = sum / float64(len(xs)) + + if len(xs) < 2 { + return mean, 0 + } + var v float64 + for _, x := range xs { + d := float64(x) - mean + v += d * d + } + std = math.Sqrt(v / float64(len(xs)-1)) + + return mean, std +} + +func fmtBytes(n int64) string { + if n < 0 { + return "-" + fmtBytes(-n) + } + switch { + case n < 1<<10: + return fmt.Sprintf("%d B", n) + case n < 1<<20: + return fmt.Sprintf("%.1f KiB", float64(n)/(1<<10)) + case n < 1<<30: + return fmt.Sprintf("%.1f MiB", float64(n)/(1<<20)) + default: + return fmt.Sprintf("%.2f GiB", float64(n)/(1<<30)) + } +} + +func firstErr(groups ...[]fphBenchSample) error { + for _, g := range groups { + for _, s := range g { + if s.err != nil { + return s.err + } + } + } + + return nil +} diff --git a/packages/orchestrator/cmd/resume-build/main.go b/packages/orchestrator/cmd/resume-build/main.go index dd780e13da..2fd3e9bb6b 100644 --- a/packages/orchestrator/cmd/resume-build/main.go +++ b/packages/orchestrator/cmd/resume-build/main.go @@ -71,11 +71,19 @@ func main() { optimize := flag.Bool("optimize", false, "collect fresh prefetch mapping after pause (resumes snapshot to record page faults)") shell := flag.Bool("shell", false, "attach an interactive PTY shell via envd (no sshd required in the sandbox)") - // Enables the pre-pause reclaim chain with sensible per-step caps. + fphTimeoutMs := flag.Int("fph-timeout-ms", 0, "override free-page-hinting-timeout-ms LD flag (0 = use LD default)") reclaim := flag.Bool("reclaim", false, "enable pre-pause reclaim chain (fstrim 500ms, sync 500ms, drop_caches 200ms, compact 1s)") + fphBench := flag.Bool("fph-bench", false, "compare pause memfile size with vs without FPH; requires -cmd-pause workload, uses -iterations (default 3), forces FPR on") + fphBenchDelay := flag.Duration("fph-bench-delay", 0, "wait this long between workload completion and pause (lets FPR settle)") + flag.Parse() + if *fphTimeoutMs > 0 { + featureflags.NewIntFlag("free-page-hinting-timeout-ms", *fphTimeoutMs) + featureflags.NewBoolFlag("free-page-hinting-install", true) + } + if *reclaim { featureflags.NewJSONFlag("guest-pause-reclaim", ldvalue.FromJSONMarshal(map[string]int{ "sync": 500, @@ -119,8 +127,9 @@ func main() { isPauseMode := pauseCount > 0 - // Interactive pause modes are incompatible with iterations. - if *iterations > 0 && (*signalPause != "" || *cmdPause != "" || *cmdSignalPause != "") { + // Interactive pause modes are incompatible with iterations (fph-bench + // reuses -cmd-pause as its workload and runs its own iteration loop). + if !*fphBench && *iterations > 0 && (*signalPause != "" || *cmdPause != "" || *cmdSignalPause != "") { log.Fatal("-signal-pause, -cmd-pause, and -cmd-signal-pause are incompatible with -iterations") } @@ -140,6 +149,15 @@ func main() { log.Fatal("-shell can only be used in interactive mode (no -cmd, no pause flags, no -iterations)") } + if *fphBench { + if *cmdPause == "" { + log.Fatal("-fph-bench requires -cmd-pause to specify the workload") + } + if *fphTimeoutMs > 0 { + log.Fatal("-fph-bench manages -fph-timeout-ms internally; do not set it") + } + } + // Generate new build ID if not specified and pause mode is enabled outputBuildID := *toBuild if isPauseMode && outputBuildID == "" { @@ -174,7 +192,17 @@ func main() { iterations: *iterations, } - err := run(ctx, *fromBuild, *iterations, *coldStart, *noPrefetch, *noEgress, *verbose, *shell, pauseOpts, runOpts) + fphBenchOpts := fphBenchOptions{ + enabled: *fphBench, + workload: *cmdPause, + iterations: *iterations, + delay: *fphBenchDelay, + } + if *fphBench && fphBenchOpts.iterations <= 0 { + fphBenchOpts.iterations = 3 + } + + err := run(ctx, *fromBuild, *iterations, *coldStart, *noPrefetch, *noEgress, *verbose, *shell, pauseOpts, runOpts, fphBenchOpts) cancel() if err != nil { @@ -989,7 +1017,7 @@ func (r *runner) benchmark(ctx context.Context, n int) error { return lastErr } -func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefetch, noEgress, verbose, shell bool, pauseOpts pauseOptions, runOpts runOptions) error { +func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefetch, noEgress, verbose, shell bool, pauseOpts pauseOptions, runOpts runOptions, fphBenchOpts fphBenchOptions) error { // Silence other loggers unless verbose mode var l logger.Logger if !verbose { @@ -1135,10 +1163,11 @@ func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefe token := "local" sbxCfg := sandbox.NewConfig(sandbox.Config{ - BaseTemplateID: buildID, - Vcpu: 1, - RamMB: 512, - Envd: sandbox.EnvdMetadata{Vars: map[string]string{}, AccessToken: &token, Version: "1.0.0"}, + BaseTemplateID: buildID, + Vcpu: 1, + RamMB: 512, + FreePageReporting: fphBenchOpts.enabled, + Envd: sandbox.EnvdMetadata{Vars: map[string]string{}, AccessToken: &token, Version: "1.0.0"}, FirecrackerConfig: fc.Config{ KernelVersion: meta.Template.KernelVersion, FirecrackerVersion: meta.Template.FirecrackerVersion, @@ -1158,6 +1187,10 @@ func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefe sbxConfig: sbxCfg, } + if fphBenchOpts.enabled { + return r.fphBench(ctx, fphBenchOpts) + } + if runOpts.enabled() { return r.cmdMode(ctx, runOpts) } diff --git a/packages/orchestrator/go.mod b/packages/orchestrator/go.mod index 399f8a0fb2..ec35bfa0d7 100644 --- a/packages/orchestrator/go.mod +++ b/packages/orchestrator/go.mod @@ -36,6 +36,7 @@ require ( github.com/gin-contrib/size v1.0.2 github.com/gin-gonic/gin v1.12.0 github.com/go-git/go-billy/v5 v5.9.0 + github.com/go-openapi/runtime v0.29.2 github.com/go-openapi/strfmt v0.25.0 github.com/google/go-containerregistry v0.21.5 github.com/google/nftables v0.3.0 @@ -168,7 +169,6 @@ require ( github.com/go-openapi/jsonpointer v0.22.4 // indirect github.com/go-openapi/jsonreference v0.21.3 // indirect github.com/go-openapi/loads v0.23.2 // indirect - github.com/go-openapi/runtime v0.29.2 // indirect github.com/go-openapi/spec v0.22.1 // indirect github.com/go-openapi/swag v0.25.4 // indirect github.com/go-openapi/swag/cmdutils v0.25.4 // indirect diff --git a/packages/orchestrator/pkg/sandbox/fc/client.go b/packages/orchestrator/pkg/sandbox/fc/client.go index 06cdcd7779..7dfa4a2fda 100644 --- a/packages/orchestrator/pkg/sandbox/fc/client.go +++ b/packages/orchestrator/pkg/sandbox/fc/client.go @@ -4,11 +4,13 @@ package fc import ( "context" + "errors" "fmt" "runtime" "github.com/RoaringBitmap/roaring/v2" "github.com/firecracker-microvm/firecracker-go-sdk" + openapiruntime "github.com/go-openapi/runtime" "github.com/go-openapi/strfmt" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/template" @@ -430,9 +432,9 @@ func (c *apiClient) startVM(ctx context.Context) error { } // installBalloon attaches a zero-MiB balloon device. Individual balloon -// features (free-page-reporting today, free-page-hinting next) are toggled -// via parameters so callers can opt in to any subset independently. -func (c *apiClient) installBalloon(ctx context.Context, freePageReporting bool) error { +// features (free-page-reporting, free-page-hinting) are toggled via +// parameters so callers can opt in to any subset independently. +func (c *apiClient) installBalloon(ctx context.Context, freePageReporting, freePageHinting bool) error { ctx, span := tracer.Start(ctx, "install-balloon") defer span.End() @@ -445,6 +447,7 @@ func (c *apiClient) installBalloon(ctx context.Context, freePageReporting bool) AmountMib: &amountMib, DeflateOnOom: &deflateOnOom, FreePageReporting: freePageReporting, + FreePageHinting: freePageHinting, }, } @@ -456,6 +459,40 @@ func (c *apiClient) installBalloon(ctx context.Context, freePageReporting bool) return nil } +func (c *apiClient) startBalloonHinting(ctx context.Context, acknowledgeOnStop bool) error { + params := operations.StartBalloonHintingParams{ + Context: ctx, + Body: &models.BalloonStartCmd{AcknowledgeOnStop: acknowledgeOnStop}, + } + _, err := c.client.Operations.StartBalloonHinting(¶ms) + if err != nil { + // FC returns 204 (no content) on success, but the FC OpenAPI spec only + // declares 200/400 — go-swagger treats any other 2xx as "unexpected + // success" and surfaces it as a *runtime.APIError. Honour the 2xx. + var apiErr *openapiruntime.APIError + if errors.As(err, &apiErr) && apiErr.IsSuccess() { + return nil + } + + return fmt.Errorf("error starting balloon hinting: %w", err) + } + + return nil +} + +func (c *apiClient) describeBalloonHinting(ctx context.Context) (hostCmd int64, err error) { + params := operations.DescribeBalloonHintingParams{Context: ctx} + res, err := c.client.Operations.DescribeBalloonHinting(¶ms) + if err != nil { + return 0, err + } + if res.Payload.HostCmd != nil { + hostCmd = *res.Payload.HostCmd + } + + return hostCmd, nil +} + func (c *apiClient) memoryMapping(ctx context.Context) (*memory.Mapping, error) { params := operations.GetMemoryMappingsParams{ Context: ctx, diff --git a/packages/orchestrator/pkg/sandbox/fc/drain_balloon_test.go b/packages/orchestrator/pkg/sandbox/fc/drain_balloon_test.go new file mode 100644 index 0000000000..f19187b7fb --- /dev/null +++ b/packages/orchestrator/pkg/sandbox/fc/drain_balloon_test.go @@ -0,0 +1,33 @@ +//go:build linux + +package fc + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Regression: when hostBefore was already freePageHintDone (steady state +// after a prior cycle, common after resume-from-snapshot) and the new +// cycle completed between polls, an earlier sawBump check missed the +// transition and hung until ctx timeout. +func TestPollFphDone_FastCycleAfterPriorDone(t *testing.T) { + t.Parallel() + + calls := 0 + describe := func(_ context.Context) (int64, error) { + calls++ + + return freePageHintDone, nil + } + + ctx, cancel := context.WithTimeout(t.Context(), time.Second) + defer cancel() + + require.NoError(t, pollFphDone(ctx, describe)) + assert.Equal(t, 1, calls) +} diff --git a/packages/orchestrator/pkg/sandbox/fc/fc_metrics.go b/packages/orchestrator/pkg/sandbox/fc/fc_metrics.go index d575907982..041a036680 100644 --- a/packages/orchestrator/pkg/sandbox/fc/fc_metrics.go +++ b/packages/orchestrator/pkg/sandbox/fc/fc_metrics.go @@ -7,6 +7,7 @@ import ( "context" "encoding/json" "errors" + "fmt" "os" "time" @@ -108,10 +109,98 @@ type firecrackerBlockMetrics struct { RemainingReqsCount uint64 `json:"remaining_reqs_count"` } +// firecrackerBalloonMetrics is a subset of Firecracker's BalloonDeviceMetrics. +// Counters are SharedIncMetric — each flush emits the delta since the previous +// serialize, so we accumulate them in the reader. +type firecrackerBalloonMetrics struct { + FreePageHintCount uint64 `json:"free_page_hint_count"` + FreePageHintFreed uint64 `json:"free_page_hint_freed"` + FreePageHintFails uint64 `json:"free_page_hint_fails"` + FreePageReportCount uint64 `json:"free_page_report_count"` + FreePageReportFreed uint64 `json:"free_page_report_freed"` + FreePageReportFails uint64 `json:"free_page_report_fails"` +} + // firecrackerMetrics is the top-level structure of one Firecracker metrics JSON line. type firecrackerMetrics struct { - Net firecrackerNetMetrics `json:"net"` - Block firecrackerBlockMetrics `json:"block"` + Net firecrackerNetMetrics `json:"net"` + Block firecrackerBlockMetrics `json:"block"` + Balloon firecrackerBalloonMetrics `json:"balloon"` +} + +// BalloonMetricsSnapshot is the cumulative-since-FC-start view of +// virtio-balloon counters, exposed via Process.BalloonMetrics. +type BalloonMetricsSnapshot struct { + HintCount uint64 + HintFreed uint64 + HintFails uint64 + ReportCount uint64 + ReportFreed uint64 + ReportFails uint64 +} + +// fphFlushReadTimeout caps how long FlushAndReadBalloonMetrics waits for the +// metrics-reader goroutine to consume FC's response line. +const fphFlushReadTimeout = 2 * time.Second + +func accumulateBalloon(prev *BalloonMetricsSnapshot, b firecrackerBalloonMetrics) BalloonMetricsSnapshot { + next := BalloonMetricsSnapshot{ + HintCount: b.FreePageHintCount, + HintFreed: b.FreePageHintFreed, + HintFails: b.FreePageHintFails, + ReportCount: b.FreePageReportCount, + ReportFreed: b.FreePageReportFreed, + ReportFails: b.FreePageReportFails, + } + if prev != nil { + next.HintCount += prev.HintCount + next.HintFreed += prev.HintFreed + next.HintFails += prev.HintFails + next.ReportCount += prev.ReportCount + next.ReportFreed += prev.ReportFreed + next.ReportFails += prev.ReportFails + } + + return next +} + +// BalloonMetrics returns the cumulative virtio-balloon counters observed so far. +func (p *Process) BalloonMetrics() BalloonMetricsSnapshot { + if cur := p.balloonAccum.Load(); cur != nil { + return *cur + } + + return BalloonMetricsSnapshot{} +} + +// FlushMetrics triggers an FC metrics flush. Non-blocking on the reader. +func (p *Process) FlushMetrics(ctx context.Context) error { + return p.client.flushMetrics(ctx) +} + +// FlushAndReadBalloonMetrics flushes and waits for the reader to ingest the +// resulting line, returning the updated cumulative snapshot. On flush error +// (e.g. FC already torn down) returns the last observed snapshot. +func (p *Process) FlushAndReadBalloonMetrics(ctx context.Context) (BalloonMetricsSnapshot, error) { + pre := p.balloonAccum.Load() + if err := p.client.flushMetrics(ctx); err != nil { + return p.BalloonMetrics(), fmt.Errorf("flush metrics: %w", err) + } + + deadline := time.Now().Add(fphFlushReadTimeout) + for { + if cur := p.balloonAccum.Load(); cur != pre { + return p.BalloonMetrics(), nil + } + if time.Now().After(deadline) { + return p.BalloonMetrics(), errors.New("timeout waiting for fresh balloon metrics line") + } + select { + case <-time.After(5 * time.Millisecond): + case <-ctx.Done(): + return p.BalloonMetrics(), ctx.Err() + } + } } // startMetricsReader opens the metrics FIFO and starts a goroutine that reads @@ -262,6 +351,10 @@ func (p *Process) startMetricsReader(ctx context.Context) { if b.NoAvailBuffer > 0 { fcBlockNoAvailBuffer.Add(ctx, int64(b.NoAvailBuffer)) } + + // Balloon: SharedIncMetric resets on flush, so accumulate. + next := accumulateBalloon(p.balloonAccum.Load(), m.Balloon) + p.balloonAccum.Store(&next) } if err := scanner.Err(); err != nil { diff --git a/packages/orchestrator/pkg/sandbox/fc/fc_metrics_test.go b/packages/orchestrator/pkg/sandbox/fc/fc_metrics_test.go new file mode 100644 index 0000000000..506d17400e --- /dev/null +++ b/packages/orchestrator/pkg/sandbox/fc/fc_metrics_test.go @@ -0,0 +1,37 @@ +//go:build linux + +package fc + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Confirms the JSON tags on firecrackerBalloonMetrics match the keys FC +// emits, so the reader actually populates the snapshot in production. +func TestFirecrackerMetrics_ParsesBalloonLine(t *testing.T) { + t.Parallel() + + const line = `{ + "net": {}, + "block": {}, + "balloon": { + "free_page_hint_count": 11, + "free_page_hint_freed": 46137344, + "free_page_report_count": 2, + "free_page_report_freed": 8388608 + } + }` + + var m firecrackerMetrics + require.NoError(t, json.Unmarshal([]byte(line), &m)) + + snap := accumulateBalloon(nil, m.Balloon) + assert.Equal(t, uint64(11), snap.HintCount) + assert.Equal(t, uint64(46137344), snap.HintFreed) + assert.Equal(t, uint64(2), snap.ReportCount) + assert.Equal(t, uint64(8388608), snap.ReportFreed) +} diff --git a/packages/orchestrator/pkg/sandbox/fc/fph_gates.go b/packages/orchestrator/pkg/sandbox/fc/fph_gates.go new file mode 100644 index 0000000000..2287836fd4 --- /dev/null +++ b/packages/orchestrator/pkg/sandbox/fc/fph_gates.go @@ -0,0 +1,17 @@ +package fc + +import ( + "github.com/e2b-dev/infra/packages/shared/pkg/fcversion" +) + +// FCSupportsFreePageHinting reports whether the FC version exposes +// virtio-balloon free-page-hinting. Kernel-side eligibility is gated separately +// via LaunchDarkly. +func FCSupportsFreePageHinting(fcVersion string) bool { + info, err := fcversion.New(fcVersion) + if err != nil { + return false + } + + return info.HasFreePageHinting() +} diff --git a/packages/orchestrator/pkg/sandbox/fc/process.go b/packages/orchestrator/pkg/sandbox/fc/process.go index fe9acf8fde..9130c9844e 100644 --- a/packages/orchestrator/pkg/sandbox/fc/process.go +++ b/packages/orchestrator/pkg/sandbox/fc/process.go @@ -28,6 +28,7 @@ import ( "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/rootfs" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/socket" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/template" + "github.com/e2b-dev/infra/packages/shared/pkg/fc/client/operations" "github.com/e2b-dev/infra/packages/shared/pkg/keys" "github.com/e2b-dev/infra/packages/shared/pkg/logger" sbxlogger "github.com/e2b-dev/infra/packages/shared/pkg/logger/sandbox" @@ -135,6 +136,11 @@ type Process struct { Exit *utils.ErrorOnce client *apiClient + + // balloonAccum holds the cumulative virtio-balloon counters across + // all FC metrics flushes. FC's SharedIncMetric resets on each flush, + // so the metrics-reader goroutine sums per-line deltas into here. + balloonAccum atomic.Pointer[BalloonMetricsSnapshot] } func NewProcess( @@ -302,6 +308,7 @@ func (p *Process) Create( memoryMB int64, hugePages bool, freePageReporting bool, + freePageHinting bool, options ProcessOptions, txRateLimit RateLimiterConfig, driveRateLimit RateLimiterConfig, @@ -444,14 +451,16 @@ func (p *Process) Create( } telemetry.ReportEvent(ctx, "set fc entropy config") - if freePageReporting { - err = p.client.installBalloon(ctx, freePageReporting) - if err != nil { + if freePageReporting || freePageHinting { + if err := p.client.installBalloon(ctx, freePageReporting, freePageHinting); err != nil { fcStopErr := p.Stop(ctx) return errors.Join(fmt.Errorf("error installing balloon device: %w", err), fcStopErr) } - telemetry.ReportEvent(ctx, "installed balloon device") + telemetry.ReportEvent(ctx, "installed balloon device", + attribute.Bool("balloon.free_page_reporting", freePageReporting), + attribute.Bool("balloon.free_page_hinting", freePageHinting), + ) } err = p.client.startVM(ctx) @@ -715,6 +724,72 @@ func (p *Process) Pause(ctx context.Context) error { return p.client.pauseVM(ctx) } +// freePageHintDone is FC's FREE_PAGE_HINT_DONE: the host_cmd value FC writes +// back after the guest's FREE_PAGE_HINT_STOP when start used acknowledge_on_stop. +const freePageHintDone int64 = 1 + +// DrainBalloon triggers a free-page-hinting run and blocks until the cycle +// completes or ctx fires. No-op on FC < v1.14 and when no balloon is configured. +func (p *Process) DrainBalloon(ctx context.Context) error { + ctx, span := tracer.Start(ctx, "drain-balloon") + outcome := "ok" + defer func() { + span.SetAttributes(attribute.String("drain-balloon.outcome", outcome)) + span.End() + }() + + if !FCSupportsFreePageHinting(p.Versions.FirecrackerVersion) { + outcome = "fc-unsupported" + + return nil + } + + if err := p.client.startBalloonHinting(ctx, true); err != nil { + var notConfigured *operations.StartBalloonHintingBadRequest + if errors.As(err, ¬Configured) { + outcome = "not-configured" + + return nil + } + + outcome = "start-failed" + + return fmt.Errorf("start balloon hinting: %w", err) + } + + if err := pollFphDone(ctx, p.client.describeBalloonHinting); err != nil { + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + outcome = "timeout" + } else { + outcome = "describe-failed" + } + + return err + } + + return nil +} + +func pollFphDone(ctx context.Context, describe func(ctx context.Context) (int64, error)) error { + backoff := 5 * time.Millisecond + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(backoff): + } + + host, err := describe(ctx) + if err != nil { + return fmt.Errorf("balloon hinting status: %w", err) + } + if host == freePageHintDone { + return nil + } + backoff = min(backoff*2, 50*time.Millisecond) + } +} + // CreateSnapshot VM needs to be paused before creating a snapshot. func (p *Process) CreateSnapshot(ctx context.Context, snapfilePath string) error { ctx, childSpan := tracer.Start(ctx, "create-snapshot-fc") diff --git a/packages/orchestrator/pkg/sandbox/sandbox.go b/packages/orchestrator/pkg/sandbox/sandbox.go index 8cc61d7a73..7c9a019c99 100644 --- a/packages/orchestrator/pkg/sandbox/sandbox.go +++ b/packages/orchestrator/pkg/sandbox/sandbox.go @@ -11,6 +11,7 @@ import ( "time" "github.com/google/uuid" + "github.com/launchdarkly/go-sdk-common/v3/ldcontext" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" @@ -169,6 +170,17 @@ type RuntimeMetadata struct { SandboxType SandboxType } +// sandboxLDContext builds an LD context with kernel/FC-version attributes for +// per-sandbox flag targeting. +func sandboxLDContext(runtime RuntimeMetadata, config *Config) ldcontext.Context { + return ldcontext.NewBuilder(runtime.SandboxID). + Kind(featureflags.SandboxKind). + SetString(featureflags.SandboxTemplateAttribute, runtime.TemplateID). + SetString(featureflags.SandboxKernelVersionAttribute, config.FirecrackerConfig.KernelVersion). + SetString(featureflags.SandboxFirecrackerVersionAttribute, config.FirecrackerConfig.FirecrackerVersion). + Build() +} + type Resources struct { Slot *network.Slot rootfs rootfs.Provider @@ -491,6 +503,9 @@ func (f *Factory) CreateSandbox( return nil }) + freePageHinting := fc.FCSupportsFreePageHinting(config.FirecrackerConfig.FirecrackerVersion) && + f.featureFlags.BoolFlag(ctx, featureflags.FreePageHintingInstallFlag, sandboxLDContext(runtime, config)) + err = fcHandle.Create( ctx, sbxlogger.SandboxMetadata{ @@ -502,6 +517,7 @@ func (f *Factory) CreateSandbox( config.RamMB, config.HugePages, config.FreePageReporting, + freePageHinting, processOptions, fc.RateLimiterConfig{ Ops: fc.TokenBucketConfig(throttleConfig.Ops), @@ -1065,10 +1081,24 @@ func (s *Sandbox) Pause( // all default to 0 which disables the chain entirely. Non-fatal. s.bestEffortReclaim(ctx) + // Drain free-page-hinting before pause so the snapshot doesn't capture + // pages the guest already considers free. Timeout=0 disables. + if t := time.Duration(s.featureFlags.IntFlag(ctx, featureflags.FreePageHintingTimeoutMs, sandboxLDContext(s.Runtime, s.Config))) * time.Millisecond; t > 0 { + drainCtx, cancel := context.WithTimeout(ctx, t) + if err := s.process.DrainBalloon(drainCtx); err != nil { + telemetry.ReportError(ctx, "balloon hinting drain failed (continuing pause)", err) + } + cancel() + } + if err := s.process.Pause(ctx); err != nil { return nil, fmt.Errorf("failed to pause VM: %w", err) } + // Best-effort flush before the rootfs export goroutine closes the FC API + // socket. Non-blocking on the reader; trades precision for pause latency. + _ = s.process.FlushMetrics(ctx) + // Snapfile is not closed as it's returned and cached for later use (like resume) snapfile := template.NewLocalFileLink(cachePaths.CacheSnapfile()) cleanup.AddNoContext(ctx, snapfile.Close) @@ -1147,6 +1177,12 @@ func (s *Sandbox) Pause( }, nil } +// FlushAndReadBalloonMetrics triggers an FC metrics flush and returns the +// updated cumulative virtio-balloon counters. Used by the FPH bench. +func (s *Sandbox) FlushAndReadBalloonMetrics(ctx context.Context) (fc.BalloonMetricsSnapshot, error) { + return s.process.FlushAndReadBalloonMetrics(ctx) +} + // MemoryPrefetchData returns the ordered page fault data for prefetch mapping. func (s *Sandbox) MemoryPrefetchData(ctx context.Context) (block.PrefetchData, error) { prefetchData, err := s.Resources.memory.PrefetchData(ctx) diff --git a/packages/orchestrator/scripts/bench-fph.sh b/packages/orchestrator/scripts/bench-fph.sh new file mode 100755 index 0000000000..0b2dd9c43c --- /dev/null +++ b/packages/orchestrator/scripts/bench-fph.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# Compare pause memfile size with vs without virtio-balloon free-page-hinting. +# +# Resumes the given build, runs a workload that dirties then frees ~256 MiB in +# the guest, pauses, and reports the resulting memfile data-layer size for +# both arms (FPR-only and FPR + FPH). FPH wins when it converts more freed +# pages into Empty mappings, shrinking the data layer. +# +# Usage: bench-fph.sh [--delay 0s] [--iterations 3] [--workload ""] +# +# Requires: root, FC v1.14+ template, .local-build/ checkout (or set +# LOCAL_TEMPLATE_STORAGE_BASE_PATH). + +set -euo pipefail + +if [[ $EUID -ne 0 ]]; then + echo "must run as root" >&2 + exit 1 +fi + +if [[ $# -lt 1 ]]; then + echo "usage: $0 [--delay 0s] [--iterations 3] [--workload ]" >&2 + exit 1 +fi + +BUILD_ID="$1" +shift + +DELAY="0s" +ITERATIONS="3" +# 256 MiB allocate + touch + free. Python bytearray of this size is mmap'd by +# glibc, so del() returns it directly to the kernel buddy allocator. The sleep +# gives FPR + buddy coalescing a moment to settle. +WORKLOAD='python3 - <&2; exit 1 ;; + esac +done + +cd "$(dirname "$0")/.." + +# Honour an explicit $GO so users on Mise/asdf can pass the right go binary +# through sudo (snap's /snap/bin/go is often the wrong version). +GO_BIN="${GO:-go}" + +exec "$GO_BIN" run ./cmd/resume-build \ + -from-build "$BUILD_ID" \ + -fph-bench \ + -cmd-pause "$WORKLOAD" \ + -fph-bench-delay "$DELAY" \ + -iterations "$ITERATIONS" diff --git a/packages/shared/pkg/fcversion/sandbox_features.go b/packages/shared/pkg/fcversion/sandbox_features.go index b768f199d4..821508d0c4 100644 --- a/packages/shared/pkg/fcversion/sandbox_features.go +++ b/packages/shared/pkg/fcversion/sandbox_features.go @@ -11,3 +11,7 @@ func (v *Info) HasHugePages() bool { func (v *Info) HasFreePageReporting() bool { return v.lastReleaseVersion.Major() > 1 || (v.lastReleaseVersion.Major() == 1 && v.lastReleaseVersion.Minor() >= 14) } + +func (v *Info) HasFreePageHinting() bool { + return v.lastReleaseVersion.Major() > 1 || (v.lastReleaseVersion.Major() == 1 && v.lastReleaseVersion.Minor() >= 14) +} diff --git a/packages/shared/pkg/featureflags/flags.go b/packages/shared/pkg/featureflags/flags.go index 6d75252a05..a171ae08e2 100644 --- a/packages/shared/pkg/featureflags/flags.go +++ b/packages/shared/pkg/featureflags/flags.go @@ -124,6 +124,12 @@ var ( SandboxLabelBasedSchedulingFlag = NewBoolFlag("sandbox-label-based-scheduling", false) OptimisticResourceAccountingFlag = NewBoolFlag("sandbox-placement-optimistic-resource-accounting", false) FreePageReportingFlag = NewBoolFlag("free-page-reporting", false) + // FreePageHintingInstallFlag controls whether FreePageHinting=true is + // configured on the balloon at install time. Just having FPH on the + // balloon doesn't trigger the kernel race fixed in + // https://lore.kernel.org/lkml/20240429125100.7393-1-david@redhat.com/ + // — that race is on the actual hinting flow, gated by FreePageHintingTimeoutMs. + FreePageHintingInstallFlag = NewBoolFlag("free-page-hinting-install", false) NetworkTransformRulesFlag = NewBoolFlag("network-transform-rules", env.IsDevelopment()) ) @@ -164,7 +170,11 @@ var ( BestOfKMaxOvercommit = NewIntFlag("best-of-k-max-overcommit", 400) // Default R=4 (stored as percentage, max over-commit ratio) BestOfKAlpha = NewIntFlag("best-of-k-alpha", 50) // Default Alpha=0.5 (stored as percentage for int flag, current usage weight) EnvdInitTimeoutMilliseconds = NewIntFlag("envd-init-request-timeout-milliseconds", 50) // Timeout for envd init request in milliseconds - HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s) + // FreePageHintingTimeoutMs gates the pre-pause balloon FPH drain. 0 + // disables. Evaluated with sandbox/kernel-version LD context so operators + // can roll out only on guests with the kernel race fix. + FreePageHintingTimeoutMs = NewIntFlag("free-page-hinting-timeout-ms", 0) + HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s) MaxCacheWriterConcurrencyFlag = NewIntFlag("max-cache-writer-concurrency", 10) // BuildCacheMaxUsagePercentage the maximum percentage of the cache disk storage From 929aa7c3c03cba90edb5e72dcb997f829ea65cc1 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Wed, 13 May 2026 16:58:59 -0700 Subject: [PATCH 2/5] chore(fph): trim bench tooling for review --- .../orchestrator/cmd/create-build/main.go | 4 +- .../cmd/resume-build/fph_bench.go | 262 ++++-------------- .../orchestrator/cmd/resume-build/main.go | 24 +- .../pkg/sandbox/fc/fc_metrics_test.go | 37 --- .../orchestrator/pkg/sandbox/fc/process.go | 5 +- packages/orchestrator/scripts/bench-fph.sh | 44 +-- 6 files changed, 77 insertions(+), 299 deletions(-) delete mode 100644 packages/orchestrator/pkg/sandbox/fc/fc_metrics_test.go diff --git a/packages/orchestrator/cmd/create-build/main.go b/packages/orchestrator/cmd/create-build/main.go index 79af6dcb2d..8fe6a832b8 100644 --- a/packages/orchestrator/cmd/create-build/main.go +++ b/packages/orchestrator/cmd/create-build/main.go @@ -76,9 +76,7 @@ func main() { log.Fatal("-to-build required") } - // Bake free-page-hinting into the balloon device at boot. The install bit - // is read from the offline LD store at the boot-time Create call and - // cannot be added on resume, so it must be flipped before builder.Build. + // FPH must be installed at boot — flag is read by Create, not Resume. featureflags.NewBoolFlag("free-page-hinting-install", true) // Suppress other noisy output unless verbose, but keep std log for fatal errors diff --git a/packages/orchestrator/cmd/resume-build/fph_bench.go b/packages/orchestrator/cmd/resume-build/fph_bench.go index ee8525ca13..5e497fd62f 100644 --- a/packages/orchestrator/cmd/resume-build/fph_bench.go +++ b/packages/orchestrator/cmd/resume-build/fph_bench.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "math" "os" "path/filepath" "time" @@ -28,80 +27,75 @@ const fphBenchDrainTimeout = 5 * time.Second type fphBenchSample struct { memfileBytes int64 - hintCount uint64 hintBytes uint64 - reportCount uint64 reportBytes uint64 - resume time.Duration pause time.Duration err error } // fphBench runs the workload N times under FPR-only and N times under FPR+FPH, -// uploading each snapshot to local storage and measuring the resulting memfile -// data size. Smaller is better. +// printing the resulting memfile data size and pause time per iteration. +// Smaller memfile is better. func (r *runner) fphBench(ctx context.Context, opts fphBenchOptions) error { if opts.iterations <= 0 { return errors.New("fph-bench: iterations must be > 0") } + fmt.Printf("FPH bench (%d iterations per arm, workload=%q)\n", opts.iterations, opts.workload) - fmt.Printf("📊 FPH bench (%d iterations per arm)\n", opts.iterations) - fmt.Printf(" Workload: %s\n", opts.workload) - if opts.delay > 0 { - fmt.Printf(" Pause delay: %s (lets continuous FPR settle before drain)\n", opts.delay) - } - fmt.Println() - - noFph := make([]fphBenchSample, 0, opts.iterations) - withFph := make([]fphBenchSample, 0, opts.iterations) + noFph := r.fphBenchArm(ctx, opts, false, "FPR-only ") + withFph := r.fphBenchArm(ctx, opts, true, "FPR + FPH") - for i := range opts.iterations { - if ctx.Err() != nil { - break + avg := func(s []fphBenchSample, f func(fphBenchSample) int64) int64 { + var sum, n int64 + for _, x := range s { + if x.err == nil { + sum += f(x) + n++ + } } - fmt.Printf("[%d/%d] FPR-only ... ", i+1, opts.iterations) - s := r.fphBenchOnce(ctx, opts, false) - noFph = append(noFph, s) - fmt.Println(fphBenchSampleString(s)) - - if ctx.Err() != nil { - break + if n == 0 { + return 0 } - fmt.Printf("[%d/%d] FPR + FPH ... ", i+1, opts.iterations) - s = r.fphBenchOnce(ctx, opts, true) - withFph = append(withFph, s) - fmt.Println(fphBenchSampleString(s)) + return sum / n } + memfile := func(s fphBenchSample) int64 { return s.memfileBytes } + pause := func(s fphBenchSample) int64 { return int64(s.pause) } - fmt.Println() - printFphBenchSummary("FPR-only", noFph) - printFphBenchSummary("FPR + FPH", withFph) - printFphBenchDelta(noFph, withFph) - checkFphActuallyHinted(withFph) + fmt.Printf("FPR-only : memfile %s pause %s\n", fmtMiB(avg(noFph, memfile)), time.Duration(avg(noFph, pause))) + fmt.Printf("FPR + FPH: memfile %s pause %s\n", fmtMiB(avg(withFph, memfile)), time.Duration(avg(withFph, pause))) - return firstErr(noFph, withFph) + for _, g := range [][]fphBenchSample{noFph, withFph} { + for _, s := range g { + if s.err != nil { + return s.err + } + } + } + return nil } -// checkFphActuallyHinted warns if the FPR+FPH arm never observed any FPH -// activity — the drain regressed or the workload froze nothing. -func checkFphActuallyHinted(samples []fphBenchSample) { - ok := successfulSamples(samples) - if len(ok) == 0 { - return - } - for _, s := range ok { - if s.hintCount > 0 { - return +func (r *runner) fphBenchArm(ctx context.Context, opts fphBenchOptions, withFph bool, label string) []fphBenchSample { + out := make([]fphBenchSample, 0, opts.iterations) + for i := range opts.iterations { + if ctx.Err() != nil { + break + } + s := r.fphBenchOnce(ctx, opts, withFph) + out = append(out, s) + if s.err != nil { + fmt.Printf("[%d/%d] %s: %v\n", i+1, opts.iterations, label, s.err) + continue } + fmt.Printf("[%d/%d] %s: memfile %s fpr_freed %s fph_freed %s pause %s\n", + i+1, opts.iterations, label, + fmtMiB(s.memfileBytes), + fmtMiB(int64(s.reportBytes)), + fmtMiB(int64(s.hintBytes)), + s.pause.Round(time.Millisecond)) } - fmt.Println() - fmt.Println(" ⚠️ FPR+FPH arm reported hint_count=0 across all iterations.") - fmt.Println(" Either DrainBalloon isn't actually driving an FPH cycle, or the") - fmt.Println(" workload didn't free anything. Try a heavier workload (the default") - fmt.Println(" frees ~256 MiB) or check the trace span 'drain-balloon'.") + return out } -// fphBenchOnce runs one resume → workload → optional delay → pause cycle. func (r *runner) fphBenchOnce(ctx context.Context, opts fphBenchOptions, withFph bool) fphBenchSample { if withFph { featureflags.NewBoolFlag("free-page-hinting-install", true) @@ -112,36 +106,35 @@ func (r *runner) fphBenchOnce(ctx context.Context, opts fphBenchOptions, withFph } buildID := uuid.New().String() + defer cleanupLocalBuild(buildID) + runtime := sandbox.RuntimeMetadata{ TemplateID: r.buildID, TeamID: "local", SandboxID: fmt.Sprintf("fph-bench-%d", time.Now().UnixNano()), ExecutionID: fmt.Sprintf("fph-bench-exec-%d", time.Now().UnixNano()), } - t0 := time.Now() sbx, err := r.factory.ResumeSandbox(ctx, r.tmpl, r.sbxConfig, runtime, t0, t0.Add(24*time.Hour), nil) - resumeDur := time.Since(t0) if err != nil { - return fphBenchSample{resume: resumeDur, err: fmt.Errorf("resume: %w", err)} + return fphBenchSample{err: fmt.Errorf("resume: %w", err)} } defer sbx.Close(context.WithoutCancel(ctx)) if err := runCommandInSandbox(ctx, sbx, opts.workload); err != nil { - return fphBenchSample{resume: resumeDur, err: fmt.Errorf("workload: %w", err)} + return fphBenchSample{err: fmt.Errorf("workload: %w", err)} } - if opts.delay > 0 { select { case <-ctx.Done(): - return fphBenchSample{resume: resumeDur, err: ctx.Err()} + return fphBenchSample{err: ctx.Err()} case <-time.After(opts.delay): } } origMeta, err := r.tmpl.Metadata() if err != nil { - return fphBenchSample{resume: resumeDur, err: fmt.Errorf("metadata: %w", err)} + return fphBenchSample{err: fmt.Errorf("metadata: %w", err)} } newMeta := origMeta newMeta.Template.BuildID = buildID @@ -150,7 +143,7 @@ func (r *runner) fphBenchOnce(ctx context.Context, opts fphBenchOptions, withFph snapshot, err := sbx.Pause(ctx, newMeta, sandbox.SnapshotUseCasePause) pauseDur := time.Since(pauseStart) if err != nil { - return fphBenchSample{resume: resumeDur, pause: pauseDur, err: fmt.Errorf("pause: %w", err)} + return fphBenchSample{pause: pauseDur, err: fmt.Errorf("pause: %w", err)} } defer snapshot.Close(context.WithoutCancel(ctx)) @@ -158,26 +151,21 @@ func (r *runner) fphBenchOnce(ctx context.Context, opts fphBenchOptions, withFph upload, err := sandbox.NewUpload(ctx, nil, snapshot, r.storage, storage.CompressConfig{}, nil, "", nil) if err != nil { - return fphBenchSample{resume: resumeDur, pause: pauseDur, err: fmt.Errorf("upload prepare: %w", err)} + return fphBenchSample{pause: pauseDur, err: fmt.Errorf("upload prepare: %w", err)} } if err := upload.Run(ctx); err != nil { - return fphBenchSample{resume: resumeDur, pause: pauseDur, err: fmt.Errorf("upload: %w", err)} + return fphBenchSample{pause: pauseDur, err: fmt.Errorf("upload: %w", err)} } memfileBytes, err := readLocalMemfileSize(buildID) if err != nil { - return fphBenchSample{resume: resumeDur, pause: pauseDur, err: fmt.Errorf("memfile size: %w", err)} + return fphBenchSample{pause: pauseDur, err: fmt.Errorf("memfile size: %w", err)} } - cleanupLocalBuild(buildID) - return fphBenchSample{ memfileBytes: memfileBytes, - hintCount: balloon.HintCount, hintBytes: balloon.HintFreed, - reportCount: balloon.ReportCount, reportBytes: balloon.ReportFreed, - resume: resumeDur, pause: pauseDur, } } @@ -187,145 +175,13 @@ func readLocalMemfileSize(buildID string) (int64, error) { if basePath == "" { return 0, errors.New("LOCAL_TEMPLATE_STORAGE_BASE_PATH not set; -fph-bench requires local storage") } - path := filepath.Join(basePath, buildID, storage.MemfileName) - - return cmdutil.GetActualFileSize(path) + return cmdutil.GetActualFileSize(filepath.Join(basePath, buildID, storage.MemfileName)) } func cleanupLocalBuild(buildID string) { - basePath := os.Getenv("LOCAL_TEMPLATE_STORAGE_BASE_PATH") - if basePath == "" { - return - } - _ = os.RemoveAll(filepath.Join(basePath, buildID)) -} - -func fphBenchSampleString(s fphBenchSample) string { - if s.err != nil { - return fmt.Sprintf("❌ %v", s.err) - } - - return fmt.Sprintf("memfile=%s fpr=%d/%s fph=%d/%s (resume %s, pause %s)", - fmtBytes(s.memfileBytes), - s.reportCount, fmtBytes(int64(s.reportBytes)), - s.hintCount, fmtBytes(int64(s.hintBytes)), - fmtDur(s.resume), fmtDur(s.pause)) -} - -func printFphBenchSummary(label string, samples []fphBenchSample) { - ok := successfulSamples(samples) - if len(ok) == 0 { - fmt.Printf(" %-9s: no successful samples\n", label) - - return - } - - bytesAvg, bytesStd := meanStd(intSlice(ok, func(s fphBenchSample) int64 { return s.memfileBytes })) - pauseAvg, _ := meanStd(intSlice(ok, func(s fphBenchSample) int64 { return int64(s.pause) })) - fphAvg, _ := meanStd(intSlice(ok, func(s fphBenchSample) int64 { return int64(s.hintBytes) })) - fprAvg, _ := meanStd(intSlice(ok, func(s fphBenchSample) int64 { return int64(s.reportBytes) })) - - fmt.Printf(" %-9s: memfile %s ± %s fpr_freed %s fph_freed %s pause avg %s (n=%d)\n", - label, - fmtBytes(int64(bytesAvg)), fmtBytes(int64(bytesStd)), - fmtBytes(int64(fprAvg)), - fmtBytes(int64(fphAvg)), - fmtDur(time.Duration(pauseAvg)), - len(ok)) -} - -func printFphBenchDelta(noFph, withFph []fphBenchSample) { - a := successfulSamples(noFph) - b := successfulSamples(withFph) - if len(a) == 0 || len(b) == 0 { - return - } - - noFphAvg, _ := meanStd(intSlice(a, func(s fphBenchSample) int64 { return s.memfileBytes })) - withFphAvg, _ := meanStd(intSlice(b, func(s fphBenchSample) int64 { return s.memfileBytes })) - - delta := noFphAvg - withFphAvg - pct := 0.0 - if noFphAvg > 0 { - pct = delta / noFphAvg * 100 - } - - pauseNoFph, _ := meanStd(intSlice(a, func(s fphBenchSample) int64 { return int64(s.pause) })) - pauseWithFph, _ := meanStd(intSlice(b, func(s fphBenchSample) int64 { return int64(s.pause) })) - pauseDelta := pauseWithFph - pauseNoFph - - fmt.Println() - fmt.Printf(" FPH freed extra: %s (%.1f%% of FPR-only memfile)\n", fmtBytes(int64(delta)), pct) - fmt.Printf(" Pause overhead : %s\n", fmtDur(time.Duration(pauseDelta))) -} - -func successfulSamples(samples []fphBenchSample) []fphBenchSample { - out := samples[:0:0] - for _, s := range samples { - if s.err == nil { - out = append(out, s) - } - } - - return out -} - -func intSlice[T any](items []T, f func(T) int64) []int64 { - out := make([]int64, len(items)) - for i, x := range items { - out[i] = f(x) - } - - return out -} - -func meanStd(xs []int64) (mean, std float64) { - if len(xs) == 0 { - return 0, 0 - } - var sum float64 - for _, x := range xs { - sum += float64(x) - } - mean = sum / float64(len(xs)) - - if len(xs) < 2 { - return mean, 0 - } - var v float64 - for _, x := range xs { - d := float64(x) - mean - v += d * d - } - std = math.Sqrt(v / float64(len(xs)-1)) - - return mean, std -} - -func fmtBytes(n int64) string { - if n < 0 { - return "-" + fmtBytes(-n) - } - switch { - case n < 1<<10: - return fmt.Sprintf("%d B", n) - case n < 1<<20: - return fmt.Sprintf("%.1f KiB", float64(n)/(1<<10)) - case n < 1<<30: - return fmt.Sprintf("%.1f MiB", float64(n)/(1<<20)) - default: - return fmt.Sprintf("%.2f GiB", float64(n)/(1<<30)) + if base := os.Getenv("LOCAL_TEMPLATE_STORAGE_BASE_PATH"); base != "" { + _ = os.RemoveAll(filepath.Join(base, buildID)) } } -func firstErr(groups ...[]fphBenchSample) error { - for _, g := range groups { - for _, s := range g { - if s.err != nil { - return s.err - } - } - } - - return nil -} +func fmtMiB(n int64) string { return fmt.Sprintf("%.1f MiB", float64(n)/(1<<20)) } diff --git a/packages/orchestrator/cmd/resume-build/main.go b/packages/orchestrator/cmd/resume-build/main.go index 2fd3e9bb6b..8a4fa26c0c 100644 --- a/packages/orchestrator/cmd/resume-build/main.go +++ b/packages/orchestrator/cmd/resume-build/main.go @@ -127,8 +127,7 @@ func main() { isPauseMode := pauseCount > 0 - // Interactive pause modes are incompatible with iterations (fph-bench - // reuses -cmd-pause as its workload and runs its own iteration loop). + // fph-bench reuses -cmd-pause and -iterations. if !*fphBench && *iterations > 0 && (*signalPause != "" || *cmdPause != "" || *cmdSignalPause != "") { log.Fatal("-signal-pause, -cmd-pause, and -cmd-signal-pause are incompatible with -iterations") } @@ -149,13 +148,8 @@ func main() { log.Fatal("-shell can only be used in interactive mode (no -cmd, no pause flags, no -iterations)") } - if *fphBench { - if *cmdPause == "" { - log.Fatal("-fph-bench requires -cmd-pause to specify the workload") - } - if *fphTimeoutMs > 0 { - log.Fatal("-fph-bench manages -fph-timeout-ms internally; do not set it") - } + if *fphBench && (*cmdPause == "" || *fphTimeoutMs > 0) { + log.Fatal("-fph-bench requires -cmd-pause and is incompatible with -fph-timeout-ms") } // Generate new build ID if not specified and pause mode is enabled @@ -192,15 +186,11 @@ func main() { iterations: *iterations, } - fphBenchOpts := fphBenchOptions{ - enabled: *fphBench, - workload: *cmdPause, - iterations: *iterations, - delay: *fphBenchDelay, - } - if *fphBench && fphBenchOpts.iterations <= 0 { - fphBenchOpts.iterations = 3 + benchIters := *iterations + if *fphBench && benchIters <= 0 { + benchIters = 3 } + fphBenchOpts := fphBenchOptions{enabled: *fphBench, workload: *cmdPause, iterations: benchIters, delay: *fphBenchDelay} err := run(ctx, *fromBuild, *iterations, *coldStart, *noPrefetch, *noEgress, *verbose, *shell, pauseOpts, runOpts, fphBenchOpts) cancel() diff --git a/packages/orchestrator/pkg/sandbox/fc/fc_metrics_test.go b/packages/orchestrator/pkg/sandbox/fc/fc_metrics_test.go deleted file mode 100644 index 506d17400e..0000000000 --- a/packages/orchestrator/pkg/sandbox/fc/fc_metrics_test.go +++ /dev/null @@ -1,37 +0,0 @@ -//go:build linux - -package fc - -import ( - "encoding/json" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -// Confirms the JSON tags on firecrackerBalloonMetrics match the keys FC -// emits, so the reader actually populates the snapshot in production. -func TestFirecrackerMetrics_ParsesBalloonLine(t *testing.T) { - t.Parallel() - - const line = `{ - "net": {}, - "block": {}, - "balloon": { - "free_page_hint_count": 11, - "free_page_hint_freed": 46137344, - "free_page_report_count": 2, - "free_page_report_freed": 8388608 - } - }` - - var m firecrackerMetrics - require.NoError(t, json.Unmarshal([]byte(line), &m)) - - snap := accumulateBalloon(nil, m.Balloon) - assert.Equal(t, uint64(11), snap.HintCount) - assert.Equal(t, uint64(46137344), snap.HintFreed) - assert.Equal(t, uint64(2), snap.ReportCount) - assert.Equal(t, uint64(8388608), snap.ReportFreed) -} diff --git a/packages/orchestrator/pkg/sandbox/fc/process.go b/packages/orchestrator/pkg/sandbox/fc/process.go index 9130c9844e..c794e23ed8 100644 --- a/packages/orchestrator/pkg/sandbox/fc/process.go +++ b/packages/orchestrator/pkg/sandbox/fc/process.go @@ -137,9 +137,8 @@ type Process struct { client *apiClient - // balloonAccum holds the cumulative virtio-balloon counters across - // all FC metrics flushes. FC's SharedIncMetric resets on each flush, - // so the metrics-reader goroutine sums per-line deltas into here. + // balloonAccum is the cumulative virtio-balloon snapshot summed by the + // metrics-reader goroutine (FC's SharedIncMetric resets per flush). balloonAccum atomic.Pointer[BalloonMetricsSnapshot] } diff --git a/packages/orchestrator/scripts/bench-fph.sh b/packages/orchestrator/scripts/bench-fph.sh index 0b2dd9c43c..0884f1f69a 100755 --- a/packages/orchestrator/scripts/bench-fph.sh +++ b/packages/orchestrator/scripts/bench-fph.sh @@ -1,45 +1,22 @@ #!/usr/bin/env bash # Compare pause memfile size with vs without virtio-balloon free-page-hinting. -# -# Resumes the given build, runs a workload that dirties then frees ~256 MiB in -# the guest, pauses, and reports the resulting memfile data-layer size for -# both arms (FPR-only and FPR + FPH). FPH wins when it converts more freed -# pages into Empty mappings, shrinking the data layer. -# -# Usage: bench-fph.sh [--delay 0s] [--iterations 3] [--workload ""] -# -# Requires: root, FC v1.14+ template, .local-build/ checkout (or set -# LOCAL_TEMPLATE_STORAGE_BASE_PATH). +# Usage: bench-fph.sh [--delay 0s] [--iterations 3] [--workload ] +# Requires root, FC v1.14+ template, and LOCAL_TEMPLATE_STORAGE_BASE_PATH. set -euo pipefail -if [[ $EUID -ne 0 ]]; then - echo "must run as root" >&2 - exit 1 -fi - +if [[ $EUID -ne 0 ]]; then echo "must run as root" >&2; exit 1; fi if [[ $# -lt 1 ]]; then echo "usage: $0 [--delay 0s] [--iterations 3] [--workload ]" >&2 exit 1 fi -BUILD_ID="$1" -shift - +BUILD_ID="$1"; shift DELAY="0s" ITERATIONS="3" -# 256 MiB allocate + touch + free. Python bytearray of this size is mmap'd by -# glibc, so del() returns it directly to the kernel buddy allocator. The sleep -# gives FPR + buddy coalescing a moment to settle. -WORKLOAD='python3 - < Date: Wed, 13 May 2026 17:05:32 -0700 Subject: [PATCH 3/5] fix(fph-bench): satisfy nlreturn lint --- packages/orchestrator/cmd/resume-build/fph_bench.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/orchestrator/cmd/resume-build/fph_bench.go b/packages/orchestrator/cmd/resume-build/fph_bench.go index 5e497fd62f..bf3ef3ce11 100644 --- a/packages/orchestrator/cmd/resume-build/fph_bench.go +++ b/packages/orchestrator/cmd/resume-build/fph_bench.go @@ -56,6 +56,7 @@ func (r *runner) fphBench(ctx context.Context, opts fphBenchOptions) error { if n == 0 { return 0 } + return sum / n } memfile := func(s fphBenchSample) int64 { return s.memfileBytes } @@ -71,6 +72,7 @@ func (r *runner) fphBench(ctx context.Context, opts fphBenchOptions) error { } } } + return nil } @@ -84,6 +86,7 @@ func (r *runner) fphBenchArm(ctx context.Context, opts fphBenchOptions, withFph out = append(out, s) if s.err != nil { fmt.Printf("[%d/%d] %s: %v\n", i+1, opts.iterations, label, s.err) + continue } fmt.Printf("[%d/%d] %s: memfile %s fpr_freed %s fph_freed %s pause %s\n", @@ -93,6 +96,7 @@ func (r *runner) fphBenchArm(ctx context.Context, opts fphBenchOptions, withFph fmtMiB(int64(s.hintBytes)), s.pause.Round(time.Millisecond)) } + return out } @@ -175,6 +179,7 @@ func readLocalMemfileSize(buildID string) (int64, error) { if basePath == "" { return 0, errors.New("LOCAL_TEMPLATE_STORAGE_BASE_PATH not set; -fph-bench requires local storage") } + return cmdutil.GetActualFileSize(filepath.Join(basePath, buildID, storage.MemfileName)) } From 65cbf19cbce1124ab47f7b76d3a7a186bdd25b3c Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Thu, 14 May 2026 16:48:37 -0700 Subject: [PATCH 4/5] refactor(fph): merge install + drain timeout into one JSON flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces `free-page-hinting-install` (bool) and the prior `free-page-hinting-timeout-ms` (int) with a single `free-page-hinting-config` JSON flag keyed by `enabled`, `pause`, `build` (matching SnapshotUseCase). Lets operators install FPH on the balloon but disable the pre-pause drain per use case — e.g. keep it on for normal pause and off for template build, where it was observed to grow the memfile. --- .../cmd/resume-build/fph_bench.go | 10 +++-- .../orchestrator/cmd/resume-build/main.go | 8 ++-- packages/orchestrator/pkg/sandbox/sandbox.go | 6 +-- packages/shared/pkg/featureflags/flags.go | 39 +++++++++++++------ 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/packages/orchestrator/cmd/resume-build/fph_bench.go b/packages/orchestrator/cmd/resume-build/fph_bench.go index bf3ef3ce11..f9bd0b941a 100644 --- a/packages/orchestrator/cmd/resume-build/fph_bench.go +++ b/packages/orchestrator/cmd/resume-build/fph_bench.go @@ -9,6 +9,7 @@ import ( "time" "github.com/google/uuid" + "github.com/launchdarkly/go-sdk-common/v3/ldvalue" "github.com/e2b-dev/infra/packages/orchestrator/cmd/internal/cmdutil" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox" @@ -102,11 +103,12 @@ func (r *runner) fphBenchArm(ctx context.Context, opts fphBenchOptions, withFph func (r *runner) fphBenchOnce(ctx context.Context, opts fphBenchOptions, withFph bool) fphBenchSample { if withFph { - featureflags.NewBoolFlag("free-page-hinting-install", true) - featureflags.NewIntFlag("free-page-hinting-timeout-ms", int(fphBenchDrainTimeout/time.Millisecond)) + featureflags.NewJSONFlag("free-page-hinting-config", ldvalue.FromJSONMarshal(map[string]any{ + "enabled": true, + "pause": int(fphBenchDrainTimeout / time.Millisecond), + })) } else { - featureflags.NewBoolFlag("free-page-hinting-install", false) - featureflags.NewIntFlag("free-page-hinting-timeout-ms", 0) + featureflags.NewJSONFlag("free-page-hinting-config", ldvalue.Null()) } buildID := uuid.New().String() diff --git a/packages/orchestrator/cmd/resume-build/main.go b/packages/orchestrator/cmd/resume-build/main.go index 8a4fa26c0c..417413b150 100644 --- a/packages/orchestrator/cmd/resume-build/main.go +++ b/packages/orchestrator/cmd/resume-build/main.go @@ -71,7 +71,7 @@ func main() { optimize := flag.Bool("optimize", false, "collect fresh prefetch mapping after pause (resumes snapshot to record page faults)") shell := flag.Bool("shell", false, "attach an interactive PTY shell via envd (no sshd required in the sandbox)") - fphTimeoutMs := flag.Int("fph-timeout-ms", 0, "override free-page-hinting-timeout-ms LD flag (0 = use LD default)") + fphTimeoutMs := flag.Int("fph-timeout-ms", 0, "override free-page-hinting-config pause timeout LD flag (0 = use LD default)") reclaim := flag.Bool("reclaim", false, "enable pre-pause reclaim chain (fstrim 500ms, sync 500ms, drop_caches 200ms, compact 1s)") fphBench := flag.Bool("fph-bench", false, "compare pause memfile size with vs without FPH; requires -cmd-pause workload, uses -iterations (default 3), forces FPR on") @@ -80,8 +80,10 @@ func main() { flag.Parse() if *fphTimeoutMs > 0 { - featureflags.NewIntFlag("free-page-hinting-timeout-ms", *fphTimeoutMs) - featureflags.NewBoolFlag("free-page-hinting-install", true) + featureflags.NewJSONFlag("free-page-hinting-config", ldvalue.FromJSONMarshal(map[string]any{ + "enabled": true, + "pause": *fphTimeoutMs, + })) } if *reclaim { diff --git a/packages/orchestrator/pkg/sandbox/sandbox.go b/packages/orchestrator/pkg/sandbox/sandbox.go index 7c9a019c99..2912def24d 100644 --- a/packages/orchestrator/pkg/sandbox/sandbox.go +++ b/packages/orchestrator/pkg/sandbox/sandbox.go @@ -504,7 +504,7 @@ func (f *Factory) CreateSandbox( }) freePageHinting := fc.FCSupportsFreePageHinting(config.FirecrackerConfig.FirecrackerVersion) && - f.featureFlags.BoolFlag(ctx, featureflags.FreePageHintingInstallFlag, sandboxLDContext(runtime, config)) + featureflags.IsFreePageHintingEnabled(ctx, f.featureFlags, sandboxLDContext(runtime, config)) err = fcHandle.Create( ctx, @@ -1082,8 +1082,8 @@ func (s *Sandbox) Pause( s.bestEffortReclaim(ctx) // Drain free-page-hinting before pause so the snapshot doesn't capture - // pages the guest already considers free. Timeout=0 disables. - if t := time.Duration(s.featureFlags.IntFlag(ctx, featureflags.FreePageHintingTimeoutMs, sandboxLDContext(s.Runtime, s.Config))) * time.Millisecond; t > 0 { + // pages the guest already considers free. Timeout per use case; 0 disables. + if t := featureflags.GetFreePageHintingTimeout(ctx, s.featureFlags, string(useCase), sandboxLDContext(s.Runtime, s.Config)); t > 0 { drainCtx, cancel := context.WithTimeout(ctx, t) if err := s.process.DrainBalloon(drainCtx); err != nil { telemetry.ReportError(ctx, "balloon hinting drain failed (continuing pause)", err) diff --git a/packages/shared/pkg/featureflags/flags.go b/packages/shared/pkg/featureflags/flags.go index a171ae08e2..d8d5279c72 100644 --- a/packages/shared/pkg/featureflags/flags.go +++ b/packages/shared/pkg/featureflags/flags.go @@ -124,12 +124,6 @@ var ( SandboxLabelBasedSchedulingFlag = NewBoolFlag("sandbox-label-based-scheduling", false) OptimisticResourceAccountingFlag = NewBoolFlag("sandbox-placement-optimistic-resource-accounting", false) FreePageReportingFlag = NewBoolFlag("free-page-reporting", false) - // FreePageHintingInstallFlag controls whether FreePageHinting=true is - // configured on the balloon at install time. Just having FPH on the - // balloon doesn't trigger the kernel race fixed in - // https://lore.kernel.org/lkml/20240429125100.7393-1-david@redhat.com/ - // — that race is on the actual hinting flow, gated by FreePageHintingTimeoutMs. - FreePageHintingInstallFlag = NewBoolFlag("free-page-hinting-install", false) NetworkTransformRulesFlag = NewBoolFlag("network-transform-rules", env.IsDevelopment()) ) @@ -170,11 +164,7 @@ var ( BestOfKMaxOvercommit = NewIntFlag("best-of-k-max-overcommit", 400) // Default R=4 (stored as percentage, max over-commit ratio) BestOfKAlpha = NewIntFlag("best-of-k-alpha", 50) // Default Alpha=0.5 (stored as percentage for int flag, current usage weight) EnvdInitTimeoutMilliseconds = NewIntFlag("envd-init-request-timeout-milliseconds", 50) // Timeout for envd init request in milliseconds - // FreePageHintingTimeoutMs gates the pre-pause balloon FPH drain. 0 - // disables. Evaluated with sandbox/kernel-version LD context so operators - // can roll out only on guests with the kernel race fix. - FreePageHintingTimeoutMs = NewIntFlag("free-page-hinting-timeout-ms", 0) - HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s) + HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s) MaxCacheWriterConcurrencyFlag = NewIntFlag("max-cache-writer-concurrency", 10) // BuildCacheMaxUsagePercentage the maximum percentage of the cache disk storage @@ -234,6 +224,33 @@ var ( // Example: {"sync":500,"drop_caches":200,"compact_memory":1000,"fstrim":500} var ReclaimConfigFlag = NewJSONFlag("guest-pause-reclaim", ldvalue.Null()) +// FreePageHintingConfig controls virtio-balloon free-page-hinting. +// "enabled" configures FreePageHinting=true on the balloon at install time +// (kernel-side eligibility is targeted separately via the LD context — the +// race fixed in https://lore.kernel.org/lkml/20240429125100.7393-1-david@redhat.com/ +// is on the hinting flow, gated by the per-use-case timeouts below). +// "pause"/"build" are pre-pause drain timeouts in ms keyed by SnapshotUseCase; +// missing/zero/negative disables the drain for that use case. +// Example: {"enabled": true, "pause": 500, "build": 0} +var FreePageHintingConfig = NewJSONFlag("free-page-hinting-config", ldvalue.Null()) + +// IsFreePageHintingEnabled reports whether FPH should be configured on the +// balloon at install time. +func IsFreePageHintingEnabled(ctx context.Context, ff *Client, contexts ...ldcontext.Context) bool { + return ff.JSONFlag(ctx, FreePageHintingConfig, contexts...).GetByKey("enabled").BoolValue() +} + +// GetFreePageHintingTimeout returns the pre-pause FPH drain timeout for the +// given SnapshotUseCase. Zero means disabled. +func GetFreePageHintingTimeout(ctx context.Context, ff *Client, useCase string, contexts ...ldcontext.Context) time.Duration { + ms := ff.JSONFlag(ctx, FreePageHintingConfig, contexts...).GetByKey(useCase).IntValue() + if ms <= 0 { + return 0 + } + + return time.Duration(ms) * time.Millisecond +} + type ReclaimConfig struct { Sync time.Duration DropCaches time.Duration From dcc9dcd2be7ce29ee4287956ecb7415928fdd5d7 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Thu, 14 May 2026 16:48:43 -0700 Subject: [PATCH 5/5] fix(create-build): gate FreePageReporting on Firecracker version Mirror prod's gating from `pkg/template/server/create_template.go`: balloon installation fails on FC <1.14, so the local CLI must not hardcode `FreePageReporting: true`. --- packages/orchestrator/cmd/create-build/main.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/packages/orchestrator/cmd/create-build/main.go b/packages/orchestrator/cmd/create-build/main.go index 8fe6a832b8..4d293a4e92 100644 --- a/packages/orchestrator/cmd/create-build/main.go +++ b/packages/orchestrator/cmd/create-build/main.go @@ -18,6 +18,7 @@ import ( "time" "github.com/launchdarkly/go-sdk-common/v3/ldlog" + "github.com/launchdarkly/go-sdk-common/v3/ldvalue" "go.opentelemetry.io/otel/metric/noop" "go.uber.org/zap" "go.uber.org/zap/zapcore" @@ -39,6 +40,7 @@ import ( "github.com/e2b-dev/infra/packages/orchestrator/pkg/template/build/metrics" artifactsregistry "github.com/e2b-dev/infra/packages/shared/pkg/artifacts-registry" "github.com/e2b-dev/infra/packages/shared/pkg/dockerhub" + "github.com/e2b-dev/infra/packages/shared/pkg/fcversion" "github.com/e2b-dev/infra/packages/shared/pkg/featureflags" templatemanager "github.com/e2b-dev/infra/packages/shared/pkg/grpc/template-manager" "github.com/e2b-dev/infra/packages/shared/pkg/logger" @@ -77,7 +79,9 @@ func main() { } // FPH must be installed at boot — flag is read by Create, not Resume. - featureflags.NewBoolFlag("free-page-hinting-install", true) + featureflags.NewJSONFlag("free-page-hinting-config", ldvalue.FromJSONMarshal(map[string]any{ + "enabled": true, + })) // Suppress other noisy output unless verbose, but keep std log for fatal errors if !*verbose { @@ -354,6 +358,13 @@ func doBuild( }) } + // Mirror prod gating in pkg/template/server/create_template.go: balloon + // is rejected on FC <1.14, so we can't unconditionally request FPR. + fcInfo, err := fcversion.New(fc) + if err != nil { + return fmt.Errorf("invalid firecracker version %q: %w", fc, err) + } + tmpl := config.TemplateConfig{ Version: templates.TemplateV2LatestVersion, TemplateID: templateID, @@ -366,7 +377,7 @@ func doBuild( ReadyCmd: readyCmd, KernelVersion: kernel, FirecrackerVersion: fc, - FreePageReporting: true, + FreePageReporting: fcInfo.HasFreePageReporting(), TeamID: "local", Steps: steps, }