Skip to content
45 changes: 32 additions & 13 deletions packages/orchestrator/pkg/sandbox/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -1094,7 +1094,7 @@ func (s *Sandbox) Pause(
}

// Start POSTPROCESSING
memfileDiff, memfileDiffHeader, err := pauseProcessMemory(
memfileDiff, memfileDiffHeader, memfileDiffStats, err := pauseProcessMemory(
ctx,
buildID,
originalMemfile.Header(),
Expand All @@ -1107,7 +1107,7 @@ func (s *Sandbox) Pause(
}
cleanup.AddNoContext(ctx, memfileDiff.Close)

rootfsDiff, rootfsDiffHeader, err := pauseProcessRootfs(
rootfsDiff, rootfsDiffHeader, rootfsDiffStats, err := pauseProcessRootfs(
ctx,
buildID,
originalRootfs.Header(),
Expand Down Expand Up @@ -1135,8 +1135,10 @@ func (s *Sandbox) Pause(
Metafile: metadataFileLink,
MemfileDiff: memfileDiff,
MemfileDiffHeader: memfileDiffHeader,
MemfileDiffStats: memfileDiffStats,
RootfsDiff: rootfsDiff,
RootfsDiffHeader: rootfsDiffHeader,
RootfsDiffStats: rootfsDiffStats,

BuildID: buildID,

Expand All @@ -1161,15 +1163,17 @@ func pauseProcessMemory(
diffMetadata *header.DiffMetadata,
cacheDir string,
fc *fc.Process,
) (d build.Diff, h *header.Header, e error) {
) (d build.Diff, h *header.Header, s SnapshotDiffStats, e error) {
ctx, span := tracer.Start(ctx, "process-memory")
defer span.End()

header, err := diffMetadata.ToDiffHeader(ctx, originalHeader, buildID)
if err != nil {
return nil, nil, fmt.Errorf("failed to create memfile header: %w", err)
return nil, nil, SnapshotDiffStats{}, fmt.Errorf("failed to create memfile header: %w", err)
}

stats := diffStatsFromMetadata(diffMetadata, originalHeader)

memfileDiffPath := build.GenerateDiffCachePath(cacheDir, buildID.String(), build.Memfile)

cache, err := fc.ExportMemory(
Expand All @@ -1179,7 +1183,7 @@ func pauseProcessMemory(
diffMetadata.BlockSize,
)
if err != nil {
return nil, nil, fmt.Errorf("failed to export memory: %w", err)
return nil, nil, SnapshotDiffStats{}, fmt.Errorf("failed to export memory: %w", err)
}

diff, err := build.NewLocalDiffFromCache(
Expand All @@ -1188,10 +1192,10 @@ func pauseProcessMemory(
)
if err != nil {
// Close the cache even if the diff creation fails.
return nil, nil, fmt.Errorf("failed to create local diff from cache: %w", errors.Join(err, cache.Close()))
return nil, nil, SnapshotDiffStats{}, fmt.Errorf("failed to create local diff from cache: %w", errors.Join(err, cache.Close()))
}

return diff, header, nil
return diff, header, stats, nil
}

func pauseProcessRootfs(
Expand All @@ -1200,37 +1204,52 @@ func pauseProcessRootfs(
originalHeader *header.Header,
diffCreator DiffCreator,
cacheDir string,
) (d build.Diff, h *header.Header, e error) {
) (d build.Diff, h *header.Header, s SnapshotDiffStats, e error) {
ctx, span := tracer.Start(ctx, "process-rootfs")
defer span.End()

rootfsDiffFile, err := build.NewLocalDiffFile(cacheDir, buildId.String(), build.Rootfs)
if err != nil {
return nil, nil, fmt.Errorf("failed to create rootfs diff: %w", err)
return nil, nil, SnapshotDiffStats{}, fmt.Errorf("failed to create rootfs diff: %w", err)
}

rootfsDiffMetadata, err := diffCreator.process(ctx, rootfsDiffFile.File)
if err != nil {
err = errors.Join(err, rootfsDiffFile.Close())

return nil, nil, fmt.Errorf("error creating diff: %w", err)
return nil, nil, SnapshotDiffStats{}, fmt.Errorf("error creating diff: %w", err)
}
telemetry.ReportEvent(ctx, "exported rootfs")

rootfsDiff, err := rootfsDiffFile.CloseToDiff(int64(originalHeader.Metadata.BlockSize))
if err != nil {
return nil, nil, fmt.Errorf("failed to convert rootfs diff file to local diff: %w", err)
return nil, nil, SnapshotDiffStats{}, fmt.Errorf("failed to convert rootfs diff file to local diff: %w", err)
}
telemetry.ReportEvent(ctx, "converted rootfs diff file to local diff")

rootfsHeader, err := rootfsDiffMetadata.ToDiffHeader(ctx, originalHeader, buildId)
if err != nil {
err = errors.Join(err, rootfsDiff.Close())

return nil, nil, fmt.Errorf("failed to create rootfs header: %w", err)
return nil, nil, SnapshotDiffStats{}, fmt.Errorf("failed to create rootfs header: %w", err)
}

return rootfsDiff, rootfsHeader, nil
stats := diffStatsFromMetadata(rootfsDiffMetadata, originalHeader)

return rootfsDiff, rootfsHeader, stats, nil
}

func diffStatsFromMetadata(d *header.DiffMetadata, original *header.Header) SnapshotDiffStats {
if d == nil || original == nil {
return SnapshotDiffStats{}
}
bs := int64(original.Metadata.BlockSize)

return SnapshotDiffStats{
DirtyBytes: int64(d.Dirty.GetCardinality()) * bs,
EmptyBytes: int64(d.Empty.GetCardinality()) * bs,
TotalBytes: int64(original.Metadata.Size),
}
Comment thread
ValentaTomas marked this conversation as resolved.
Outdated
}

// createCgroup creates a cgroup for sandbox resource accounting.
Expand Down
8 changes: 8 additions & 0 deletions packages/orchestrator/pkg/sandbox/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,19 @@ import (
"github.com/e2b-dev/infra/packages/shared/pkg/storage/header"
)

type SnapshotDiffStats struct {
DirtyBytes int64
EmptyBytes int64
TotalBytes int64
}

type Snapshot struct {
MemfileDiff build.Diff
MemfileDiffHeader *header.Header
MemfileDiffStats SnapshotDiffStats
RootfsDiff build.Diff
RootfsDiffHeader *header.Header
RootfsDiffStats SnapshotDiffStats
Snapfile template.File
Metafile template.File
BuildID uuid.UUID
Expand Down
58 changes: 58 additions & 0 deletions packages/orchestrator/pkg/sandbox/snapshot_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package sandbox

import (
"context"

"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"

"github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
"github.com/e2b-dev/infra/packages/shared/pkg/utils"
)

var (
snapshotDiffBytes = utils.Must(telemetry.GetHistogram(meter, telemetry.SnapshotDiffBytes))
snapshotDiffRatioBp = utils.Must(telemetry.GetHistogram(meter, telemetry.SnapshotDiffRatioBp))
snapshotTotalBytes = utils.Must(telemetry.GetHistogram(meter, telemetry.SnapshotTotalBytes))
)

type SnapshotUseCase string

const (
SnapshotUseCasePause SnapshotUseCase = "pause"
SnapshotUseCaseBuild SnapshotUseCase = "build"
)

func RecordSnapshotDiffMetrics(ctx context.Context, snap *Snapshot, useCase SnapshotUseCase) {
if snap == nil {
return
}
uc := attribute.String("use_case", string(useCase))
emitSnapshotDiff(ctx, "memfile", snap.MemfileDiffStats, uc)
emitSnapshotDiff(ctx, "rootfs", snap.RootfsDiffStats, uc)
}

func emitSnapshotDiff(ctx context.Context, fileType string, s SnapshotDiffStats, uc attribute.KeyValue) {
ft := attribute.String("file_type", fileType)
snapshotTotalBytes.Record(ctx, s.TotalBytes, metric.WithAttributes(ft, uc))
for kind, b := range map[string]int64{"dirty": s.DirtyBytes, "empty": s.EmptyBytes} {
attrs := metric.WithAttributes(ft, attribute.String("kind", kind), uc)
snapshotDiffBytes.Record(ctx, b, attrs)
snapshotDiffRatioBp.Record(ctx, ratioBp(b, s.TotalBytes), attrs)
}
}

func ratioBp(num, denom int64) int64 {
if denom <= 0 {
return 0
}
bp := num * 10000 / denom
if bp < 0 {
return 0
}
if bp > 10000 {
return 10000
}

return bp
}
1 change: 1 addition & 0 deletions packages/orchestrator/pkg/server/sandboxes.go
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,7 @@ func (s *Server) snapshotAndCacheSandbox(
if err != nil {
return nil, fmt.Errorf("error snapshotting sandbox: %w", err)
}
sandbox.RecordSnapshotDiffMetrics(ctx, snapshot, sandbox.SnapshotUseCasePause)

err = s.templateCache.AddSnapshot(
ctx,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ func (lb *LayerExecutor) PauseAndUpload(
if err != nil {
return fmt.Errorf("error processing vm: %w", err)
}
sandbox.RecordSnapshotDiffMetrics(ctx, snapshot, sandbox.SnapshotUseCaseBuild)

// Add snapshot to template cache so it can be used immediately
err = lb.templateCache.AddSnapshot(
Expand Down
13 changes: 13 additions & 0 deletions packages/shared/pkg/telemetry/meters.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ const (
SandboxFCBlockRateLimiterEventCount HistogramType = "orchestrator.sandbox.fc.block.rate_limiter_event_count"
SandboxFCBlockIOEngineThrottled HistogramType = "orchestrator.sandbox.fc.block.io_engine_throttled"
SandboxFCBlockRemainingReqs HistogramType = "orchestrator.sandbox.fc.block.remaining_reqs"

// Snapshot diff histograms (file_type=memfile|rootfs, kind=dirty|empty, use_case=pause|build).
SnapshotDiffBytes HistogramType = "orchestrator.sandbox.snapshot.diff.bytes"
SnapshotDiffRatioBp HistogramType = "orchestrator.sandbox.snapshot.diff.ratio_bp"
SnapshotTotalBytes HistogramType = "orchestrator.sandbox.snapshot.total.bytes"
)

const (
Expand Down Expand Up @@ -352,6 +357,10 @@ var histogramDesc = map[HistogramType]string{
SandboxFCBlockRateLimiterEventCount: "Distribution of Firecracker VMM block rate limiter events per metrics flush",
SandboxFCBlockIOEngineThrottled: "Distribution of Firecracker VMM block ops throttled by io_uring engine per metrics flush",
SandboxFCBlockRemainingReqs: "Distribution of Firecracker VMM block queue remaining-request events per metrics flush",

SnapshotDiffBytes: "Per-snapshot dirty/empty bytes per file",
SnapshotDiffRatioBp: "Per-snapshot dirty/empty as fraction of total mapped size, in basis points (10000=100%)",
SnapshotTotalBytes: "Per-snapshot total mapped size of the file",
}

var histogramUnits = map[HistogramType]string{
Expand Down Expand Up @@ -382,6 +391,10 @@ var histogramUnits = map[HistogramType]string{
SandboxFCBlockRateLimiterEventCount: "{event}",
SandboxFCBlockIOEngineThrottled: "{op}",
SandboxFCBlockRemainingReqs: "{event}",

SnapshotDiffBytes: "{By}",
SnapshotDiffRatioBp: "{1}",
SnapshotTotalBytes: "{By}",
}

func GetHistogram(meter metric.Meter, name HistogramType) (metric.Int64Histogram, error) {
Expand Down
Loading