Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions packages/orchestrator/cmd/create-build/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"time"

"github.com/launchdarkly/go-sdk-common/v3/ldlog"
"github.com/launchdarkly/go-sdk-common/v3/ldvalue"
"go.opentelemetry.io/otel/metric/noop"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
Expand All @@ -39,6 +40,7 @@ import (
"github.com/e2b-dev/infra/packages/orchestrator/pkg/template/build/metrics"
artifactsregistry "github.com/e2b-dev/infra/packages/shared/pkg/artifacts-registry"
"github.com/e2b-dev/infra/packages/shared/pkg/dockerhub"
"github.com/e2b-dev/infra/packages/shared/pkg/fcversion"
"github.com/e2b-dev/infra/packages/shared/pkg/featureflags"
templatemanager "github.com/e2b-dev/infra/packages/shared/pkg/grpc/template-manager"
"github.com/e2b-dev/infra/packages/shared/pkg/logger"
Expand Down Expand Up @@ -76,6 +78,11 @@ func main() {
log.Fatal("-to-build required")
}

// FPH must be installed at boot — flag is read by Create, not Resume.
featureflags.NewJSONFlag("free-page-hinting-config", ldvalue.FromJSONMarshal(map[string]any{
"enabled": true,
}))

// Suppress other noisy output unless verbose, but keep std log for fatal errors
if !*verbose {
cmdutil.SuppressNoisyLogsKeepStdLog()
Expand Down Expand Up @@ -351,6 +358,13 @@ func doBuild(
})
}

// Mirror prod gating in pkg/template/server/create_template.go: balloon
// is rejected on FC <1.14, so we can't unconditionally request FPR.
fcInfo, err := fcversion.New(fc)
if err != nil {
return fmt.Errorf("invalid firecracker version %q: %w", fc, err)
}

tmpl := config.TemplateConfig{
Version: templates.TemplateV2LatestVersion,
TemplateID: templateID,
Expand All @@ -363,6 +377,8 @@ func doBuild(
ReadyCmd: readyCmd,
KernelVersion: kernel,
FirecrackerVersion: fc,
FreePageReporting: fcInfo.HasFreePageReporting(),
TeamID: "local",
Steps: steps,
}

Expand Down
194 changes: 194 additions & 0 deletions packages/orchestrator/cmd/resume-build/fph_bench.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package main

import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"time"

"github.com/google/uuid"
"github.com/launchdarkly/go-sdk-common/v3/ldvalue"

"github.com/e2b-dev/infra/packages/orchestrator/cmd/internal/cmdutil"
"github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox"
"github.com/e2b-dev/infra/packages/shared/pkg/featureflags"
"github.com/e2b-dev/infra/packages/shared/pkg/storage"
)

type fphBenchOptions struct {
enabled bool
workload string
iterations int
delay time.Duration
}

const fphBenchDrainTimeout = 5 * time.Second

type fphBenchSample struct {
memfileBytes int64
hintBytes uint64
reportBytes uint64
pause time.Duration
err error
}

// fphBench runs the workload N times under FPR-only and N times under FPR+FPH,
// printing the resulting memfile data size and pause time per iteration.
// Smaller memfile is better.
func (r *runner) fphBench(ctx context.Context, opts fphBenchOptions) error {
if opts.iterations <= 0 {
return errors.New("fph-bench: iterations must be > 0")
}
fmt.Printf("FPH bench (%d iterations per arm, workload=%q)\n", opts.iterations, opts.workload)

noFph := r.fphBenchArm(ctx, opts, false, "FPR-only ")
withFph := r.fphBenchArm(ctx, opts, true, "FPR + FPH")

avg := func(s []fphBenchSample, f func(fphBenchSample) int64) int64 {
var sum, n int64
for _, x := range s {
if x.err == nil {
sum += f(x)
n++
}
}
if n == 0 {
return 0
}

return sum / n
}
memfile := func(s fphBenchSample) int64 { return s.memfileBytes }
pause := func(s fphBenchSample) int64 { return int64(s.pause) }

fmt.Printf("FPR-only : memfile %s pause %s\n", fmtMiB(avg(noFph, memfile)), time.Duration(avg(noFph, pause)))
fmt.Printf("FPR + FPH: memfile %s pause %s\n", fmtMiB(avg(withFph, memfile)), time.Duration(avg(withFph, pause)))

for _, g := range [][]fphBenchSample{noFph, withFph} {
for _, s := range g {
if s.err != nil {
return s.err
}
}
}

return nil
}

func (r *runner) fphBenchArm(ctx context.Context, opts fphBenchOptions, withFph bool, label string) []fphBenchSample {
out := make([]fphBenchSample, 0, opts.iterations)
for i := range opts.iterations {
if ctx.Err() != nil {
break
}
s := r.fphBenchOnce(ctx, opts, withFph)
out = append(out, s)
if s.err != nil {
fmt.Printf("[%d/%d] %s: %v\n", i+1, opts.iterations, label, s.err)

continue
}
fmt.Printf("[%d/%d] %s: memfile %s fpr_freed %s fph_freed %s pause %s\n",
i+1, opts.iterations, label,
fmtMiB(s.memfileBytes),
fmtMiB(int64(s.reportBytes)),
fmtMiB(int64(s.hintBytes)),
s.pause.Round(time.Millisecond))
}

return out
}

func (r *runner) fphBenchOnce(ctx context.Context, opts fphBenchOptions, withFph bool) fphBenchSample {
if withFph {
featureflags.NewJSONFlag("free-page-hinting-config", ldvalue.FromJSONMarshal(map[string]any{
"enabled": true,
"pause": int(fphBenchDrainTimeout / time.Millisecond),
}))
} else {
featureflags.NewJSONFlag("free-page-hinting-config", ldvalue.Null())
}

buildID := uuid.New().String()
defer cleanupLocalBuild(buildID)

runtime := sandbox.RuntimeMetadata{
TemplateID: r.buildID,
TeamID: "local",
SandboxID: fmt.Sprintf("fph-bench-%d", time.Now().UnixNano()),
ExecutionID: fmt.Sprintf("fph-bench-exec-%d", time.Now().UnixNano()),
}
t0 := time.Now()
sbx, err := r.factory.ResumeSandbox(ctx, r.tmpl, r.sbxConfig, runtime, t0, t0.Add(24*time.Hour), nil)
if err != nil {
return fphBenchSample{err: fmt.Errorf("resume: %w", err)}
}
defer sbx.Close(context.WithoutCancel(ctx))

if err := runCommandInSandbox(ctx, sbx, opts.workload); err != nil {
return fphBenchSample{err: fmt.Errorf("workload: %w", err)}
}
if opts.delay > 0 {
select {
case <-ctx.Done():
return fphBenchSample{err: ctx.Err()}
case <-time.After(opts.delay):
}
}

origMeta, err := r.tmpl.Metadata()
if err != nil {
return fphBenchSample{err: fmt.Errorf("metadata: %w", err)}
}
newMeta := origMeta
newMeta.Template.BuildID = buildID

pauseStart := time.Now()
snapshot, err := sbx.Pause(ctx, newMeta, sandbox.SnapshotUseCasePause)
pauseDur := time.Since(pauseStart)
if err != nil {
return fphBenchSample{pause: pauseDur, err: fmt.Errorf("pause: %w", err)}
}
defer snapshot.Close(context.WithoutCancel(ctx))

balloon, _ := sbx.FlushAndReadBalloonMetrics(ctx)
Comment thread
cursor[bot] marked this conversation as resolved.

upload, err := sandbox.NewUpload(ctx, nil, snapshot, r.storage, storage.CompressConfig{}, nil, "", nil)
if err != nil {
return fphBenchSample{pause: pauseDur, err: fmt.Errorf("upload prepare: %w", err)}
}
if err := upload.Run(ctx); err != nil {
return fphBenchSample{pause: pauseDur, err: fmt.Errorf("upload: %w", err)}
}

memfileBytes, err := readLocalMemfileSize(buildID)
if err != nil {
return fphBenchSample{pause: pauseDur, err: fmt.Errorf("memfile size: %w", err)}
}

return fphBenchSample{
memfileBytes: memfileBytes,
hintBytes: balloon.HintFreed,
reportBytes: balloon.ReportFreed,
pause: pauseDur,
}
}

func readLocalMemfileSize(buildID string) (int64, error) {
basePath := os.Getenv("LOCAL_TEMPLATE_STORAGE_BASE_PATH")
if basePath == "" {
return 0, errors.New("LOCAL_TEMPLATE_STORAGE_BASE_PATH not set; -fph-bench requires local storage")
}

return cmdutil.GetActualFileSize(filepath.Join(basePath, buildID, storage.MemfileName))
}

func cleanupLocalBuild(buildID string) {
if base := os.Getenv("LOCAL_TEMPLATE_STORAGE_BASE_PATH"); base != "" {
_ = os.RemoveAll(filepath.Join(base, buildID))
}
}

func fmtMiB(n int64) string { return fmt.Sprintf("%.1f MiB", float64(n)/(1<<20)) }
43 changes: 34 additions & 9 deletions packages/orchestrator/cmd/resume-build/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,21 @@ func main() {
optimize := flag.Bool("optimize", false, "collect fresh prefetch mapping after pause (resumes snapshot to record page faults)")
shell := flag.Bool("shell", false, "attach an interactive PTY shell via envd (no sshd required in the sandbox)")

// Enables the pre-pause reclaim chain with sensible per-step caps.
fphTimeoutMs := flag.Int("fph-timeout-ms", 0, "override free-page-hinting-config pause timeout LD flag (0 = use LD default)")
reclaim := flag.Bool("reclaim", false, "enable pre-pause reclaim chain (fstrim 500ms, sync 500ms, drop_caches 200ms, compact 1s)")

fphBench := flag.Bool("fph-bench", false, "compare pause memfile size with vs without FPH; requires -cmd-pause workload, uses -iterations (default 3), forces FPR on")
fphBenchDelay := flag.Duration("fph-bench-delay", 0, "wait this long between workload completion and pause (lets FPR settle)")

flag.Parse()

if *fphTimeoutMs > 0 {
featureflags.NewJSONFlag("free-page-hinting-config", ldvalue.FromJSONMarshal(map[string]any{
"enabled": true,
"pause": *fphTimeoutMs,
}))
}

if *reclaim {
featureflags.NewJSONFlag("guest-pause-reclaim", ldvalue.FromJSONMarshal(map[string]int{
"sync": 500,
Expand Down Expand Up @@ -119,8 +129,8 @@ func main() {

isPauseMode := pauseCount > 0

// Interactive pause modes are incompatible with iterations.
if *iterations > 0 && (*signalPause != "" || *cmdPause != "" || *cmdSignalPause != "") {
// fph-bench reuses -cmd-pause and -iterations.
if !*fphBench && *iterations > 0 && (*signalPause != "" || *cmdPause != "" || *cmdSignalPause != "") {
log.Fatal("-signal-pause, -cmd-pause, and -cmd-signal-pause are incompatible with -iterations")
}

Expand All @@ -140,6 +150,10 @@ func main() {
log.Fatal("-shell can only be used in interactive mode (no -cmd, no pause flags, no -iterations)")
}

if *fphBench && (*cmdPause == "" || *fphTimeoutMs > 0) {
log.Fatal("-fph-bench requires -cmd-pause and is incompatible with -fph-timeout-ms")
}

// Generate new build ID if not specified and pause mode is enabled
outputBuildID := *toBuild
if isPauseMode && outputBuildID == "" {
Expand Down Expand Up @@ -174,7 +188,13 @@ func main() {
iterations: *iterations,
}

err := run(ctx, *fromBuild, *iterations, *coldStart, *noPrefetch, *noEgress, *verbose, *shell, pauseOpts, runOpts)
benchIters := *iterations
if *fphBench && benchIters <= 0 {
benchIters = 3
}
fphBenchOpts := fphBenchOptions{enabled: *fphBench, workload: *cmdPause, iterations: benchIters, delay: *fphBenchDelay}

err := run(ctx, *fromBuild, *iterations, *coldStart, *noPrefetch, *noEgress, *verbose, *shell, pauseOpts, runOpts, fphBenchOpts)
cancel()

if err != nil {
Expand Down Expand Up @@ -989,7 +1009,7 @@ func (r *runner) benchmark(ctx context.Context, n int) error {
return lastErr
}

func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefetch, noEgress, verbose, shell bool, pauseOpts pauseOptions, runOpts runOptions) error {
func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefetch, noEgress, verbose, shell bool, pauseOpts pauseOptions, runOpts runOptions, fphBenchOpts fphBenchOptions) error {
// Silence other loggers unless verbose mode
var l logger.Logger
if !verbose {
Expand Down Expand Up @@ -1135,10 +1155,11 @@ func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefe

token := "local"
sbxCfg := sandbox.NewConfig(sandbox.Config{
BaseTemplateID: buildID,
Vcpu: 1,
RamMB: 512,
Envd: sandbox.EnvdMetadata{Vars: map[string]string{}, AccessToken: &token, Version: "1.0.0"},
BaseTemplateID: buildID,
Vcpu: 1,
RamMB: 512,
FreePageReporting: fphBenchOpts.enabled,
Envd: sandbox.EnvdMetadata{Vars: map[string]string{}, AccessToken: &token, Version: "1.0.0"},
FirecrackerConfig: fc.Config{
KernelVersion: meta.Template.KernelVersion,
FirecrackerVersion: meta.Template.FirecrackerVersion,
Expand All @@ -1158,6 +1179,10 @@ func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefe
sbxConfig: sbxCfg,
}

if fphBenchOpts.enabled {
return r.fphBench(ctx, fphBenchOpts)
}

if runOpts.enabled() {
return r.cmdMode(ctx, runOpts)
}
Expand Down
2 changes: 1 addition & 1 deletion packages/orchestrator/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ require (
github.com/gin-contrib/size v1.0.2
github.com/gin-gonic/gin v1.12.0
github.com/go-git/go-billy/v5 v5.9.0
github.com/go-openapi/runtime v0.29.2
github.com/go-openapi/strfmt v0.25.0
github.com/google/go-containerregistry v0.21.5
github.com/google/nftables v0.3.0
Expand Down Expand Up @@ -168,7 +169,6 @@ require (
github.com/go-openapi/jsonpointer v0.22.4 // indirect
github.com/go-openapi/jsonreference v0.21.3 // indirect
github.com/go-openapi/loads v0.23.2 // indirect
github.com/go-openapi/runtime v0.29.2 // indirect
github.com/go-openapi/spec v0.22.1 // indirect
github.com/go-openapi/swag v0.25.4 // indirect
github.com/go-openapi/swag/cmdutils v0.25.4 // indirect
Expand Down
Loading
Loading