diff --git a/.dockerignore b/.dockerignore index df466067e1..8c08126b3c 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,3 +3,4 @@ .gopath bundles vendor/pkg +balena-benchmarking diff --git a/balena-benchmarking/README.md b/balena-benchmarking/README.md new file mode 100644 index 0000000000..1c29e47fc7 --- /dev/null +++ b/balena-benchmarking/README.md @@ -0,0 +1,34 @@ +# balenaEngine Benchmarking + +This directory contains some scripts we use to benchmark balenaEngine. They are +not super stable and ready for public consumption, but they are good enough for +informing decisions when working in improvements. + +Currently, there's actually just one script here. + +## `delta-benchmarks.sh` + +This script collects some metrics on the generation of deltas. Namely, it +measures how long it takes, how much memory it uses, and how large are the +resulting deltas. + +The script does this for a list of references (branches, tags, commits) defined +in the `branches` variable at its start, and for each of the test cases defined +in `testCases`. You can customize these two variables as you need. + +You need to run this as the superuser ()`root`), from the root of the +balena-engine repository. Something like this should work: + +```sh +sudo ./balena-benchmarking/delta-benchmarks.sh +``` + +All required images will be pulled into +`./balena-benchmarking/balenad-data-root`. If you already have pulled all +images, you can run a bit faster by using this: + +```sh +sudo SKIP_PULL=y ./balena-benchmarking/delta-benchmarks.sh +``` + +Results are written to `./balena-benchmarking/delta.csv`. diff --git a/balena-benchmarking/delta-benchmarks.sh b/balena-benchmarking/delta-benchmarks.sh new file mode 100755 index 0000000000..6c4220cb22 --- /dev/null +++ b/balena-benchmarking/delta-benchmarks.sh @@ -0,0 +1,185 @@ +#!/bin/bash + +# Environment variables affecting the script behavior: +# +# - SKIP_PULL: if set to 'y', images will not be pulled before running the +# benchmarks. Useful to spare some time if you are sure you already have all +# needed images locally. + +set -e + +# Branches to benchmark. +branches=( + "master" + "lmb/librsync-memory" +) + +# Test cases to benchmark (test case name, basis image, target image). +testCases=( + "busybox-1.36.0-1.36.1 busybox:1.36.0 busybox:1.36.1" + "busybox-1.25.0-1.36.1 busybox:1.25.0 busybox:1.36.1" + "debian-10.0-11.7 debian:10.0 debian:11.7" + "debian-11.6-11.7 debian:11.6 debian:11.7" + "debian-slim-11.7 debian:11.7-slim debian:11.7" + "debian-11.7-slim debian:11.7 debian:11.7-slim" + "ubuntu-18.04-23.04 ubuntu:18.04 ubuntu:23.04" + "alpine-3.7-3.18 alpine:3.7 alpine:3.18" + "audio-aarch64-0.5.5-0.5.6 bh.cr/balenalabs/audio-aarch64/0.5.5 bh.cr/balenalabs/audio-aarch64/0.5.6" + "audio-amd64-0.5.5-0.5.6 bh.cr/balenalabs/audio-amd64/0.5.5 bh.cr/balenalabs/audio-amd64/0.5.6" + "browser-aarch64-2.3.7-2.4.7 bh.cr/balenalabs/browser-aarch64/2.3.7 bh.cr/balenalabs/browser-aarch64/2.4.7" + "browser-amd64-2.3.7-2.4.7 bh.cr/balenalabs/browser-amd64/2.3.7 bh.cr/balenalabs/browser-amd64/2.4.7" + "nodered-aarch64-2.4.0-2.4.1 bh.cr/balenalabs/balena-node-red-aarch64/2.4.0 bh.cr/balenalabs/balena-node-red-aarch64/2.4.1" + "ca-priv-amd64-0.0.12-0.0.13 bh.cr/balena/ca-private-amd64/0.0.12 bh.cr/balena/ca-private-amd64/0.0.13" + "ca-priv-amd64-0.0.13-0.0.12 bh.cr/balena/ca-private-amd64/0.0.13 bh.cr/balena/ca-private-amd64/0.0.12" + "unzoner-armv7hf-1.2.0-1.2.23 bh.cr/belodetek/unzoner-armv7hf/1.2.0 bh.cr/belodetek/unzoner-armv7hf/1.2.23" + "unzoner-armv7hf-1.2.23-1.2.0 bh.cr/belodetek/unzoner-armv7hf/1.2.23 bh.cr/belodetek/unzoner-armv7hf/1.2.0" + # TODO maybe: https://gitlab.com/nvidia/container-images/l4t-base +) + +balenadDataRoot="./balena-benchmarking/balenad-data-root" +balenadPIDFile="/var/run/balena-engine.pid" +deltaTag="balena-engine-delta-benchmark-image" + +function assertRunningFromRepoRoot() { + if [ ! -f "Makefile" ]; then + echo "Please run from the root of the balena-engine repository." + exit 1 + fi +} + +# Build balenaEngine from branch $1. +function buildBalenaEngine() { + echo + echo "BUILDING BALENA ENGINE FROM BRANCH $1" + echo + git checkout "$1" + make dynbinary +} + +function startBalenad() { + echo "Starting balenad..." + mkdir -p "$balenadDataRoot" + + if [ -f "$balenadPIDFile" ]; then + killBalenad + fi + + balenad --data-root "$balenadDataRoot" --pidfile $balenadPIDFile &> /dev/null & + echo -n "Waiting for balenad to start... " + + while [ ! -f "$balenadPIDFile" ]; do + sleep 1 + done + + while [ ! balena-engine info &> /dev/null ]; do + sleep 1 + done + echo " done! (PID = $(cat "$balenadPIDFile"))" +} + +function killBalenad() { + echo "Killing balenad..." + + if [ ! -f "$balenadPIDFile" ]; then + return + fi + kill $(cat $balenadPIDFile) + sleep 5 + if [ -f "$balenadPIDFile" ]; then + echo "balenaEngine still running, killing with -KILL" + kill -KILL $(cat $balenadPIDFile) + fi +} + +function pullAllImages() { + if [ "$SKIP_PULL" == "y" ]; then + return + fi + + echo + echo "PULLING ALL IMAGES" + echo + + buildBalenaEngine "master" + + startBalenad + + for testCase in "${testCases[@]}"; do + tcBasis=$(echo $testCase | awk '{print $2}') + tcTarget=$(echo $testCase | awk '{print $3}') + + balena-engine pull "$tcBasis" + balena-engine pull "$tcTarget" + done + + killBalenad +} + +function balenadMaxMemory() { + if [ ! -f "$balenadPIDFile" ]; then + echo "balenad not running!" + exit 1 + fi + + # Read the high water mark (VmHWM) of the balenad process. + cat /proc/$(cat "$balenadPIDFile")/status | grep VmHWM | awk '{print $2}' +} + +export PATH="$(pwd)/bundles/dynbinary-daemon:$PATH" + +assertRunningFromRepoRoot + +# Remember the current branch so we can switch back to it later. +originalBranch=$(git rev-parse --abbrev-ref HEAD) +echo "Running from this branch: $originalBranch" + +pullAllImages + +# The CSV file where results will be stored. +csvResults="./balena-benchmarking/delta.csv" +tmpResults="./balena-benchmarking/delta.tmp" + +# Initialize the CSV files with headers. +echo "Case,Branch,BasisSize,DeltaSize,DeltaTime,DeltaMem" > "$csvResults" +rm -f "$tmpResults" + +for branch in "${branches[@]}"; do + echo "Running benchmarks for branch $branch" + + buildBalenaEngine "$branch" + + for testCase in "${testCases[@]}"; do + tcName=$(echo $testCase | awk '{print $1}') + tcBasis=$(echo $testCase | awk '{print $2}') + tcTarget=$(echo $testCase | awk '{print $3}') + + echo "Running benchmark for $branch / $tcName" + + startBalenad + + # baselineMemInKB=$(balenadMaxMemory) + deltaTimeInSecs=$(\time -f%e balena-engine image delta "$tcBasis" "$tcTarget" --tag "$deltaTag" 2>&1 | tail -n 1) + usedMemInKB=$(balenadMaxMemory) + usedMemInBytes=$((usedMemInKB * 1024)) + basisSizeInBytes=$(balena-engine inspect "$tcBasis" --format "{{.Size}}") + deltaSizeInBytes=$(balena-engine inspect "$deltaTag" --format "{{.Size}}") + + # Collect data. + echo "$tcName,$branch,$basisSizeInBytes,$deltaSizeInBytes,$deltaTimeInSecs,$usedMemInBytes" >> "$tmpResults" + + # Thanks Engine, you may go now. + killBalenad + done + + echo "Done with branch $branch" +done + +echo "Preparing final results..." +sort "$tmpResults" >> "$csvResults" +rm -f "$tmpResults" + +# Switch back to the original branch. +echo "Restoring original branch $originalBranch..." +git checkout "$originalBranch" + +echo "Done with everything!" diff --git a/daemon/images/image_delta.go b/daemon/images/image_delta.go index 05c4051f76..1e61ad804a 100644 --- a/daemon/images/image_delta.go +++ b/daemon/images/image_delta.go @@ -65,7 +65,10 @@ func (i *ImageService) DeltaCreate(deltaSrc, deltaDest string, options types.Ima defer progressReader.Close() sigStart := time.Now() - srcSig, err := librsync.Signature(bufio.NewReaderSize(progressReader, 65536), ioutil.Discard, 512, 32, librsync.BLAKE2_SIG_MAGIC) + + bufioReader := bufio.NewReaderSize(progressReader, 65536) + numBlocks := int(srcDataLen/512) + 1 + srcSig, err := librsync.SignatureWithBlockCount(bufioReader, ioutil.Discard, 512, 32, librsync.BLAKE2_SIG_MAGIC, numBlocks) if err != nil { return err } diff --git a/vendor.conf b/vendor.conf index ebff4f8499..9a2e722ded 100644 --- a/vendor.conf +++ b/vendor.conf @@ -195,7 +195,7 @@ github.com/willf/bitset 559910e8471e48d76d9e5a1ba158 github.com/urfave/cli c71fbcefd21552b70cd625b2c54466006e258ad7 # v1.22.1 # balena deltas -github.com/balena-os/librsync-go 7b435f8f590637e8ba24e72f7cfc2d62f17e3848 # v0.8.5 +github.com/balena-os/librsync-go 114605554123d206908a21541aad3dc1fbb6ac0e # lmb/prealloc-sig-stuff github.com/balena-os/circbuf 2d080deeceffbd01dea0fcfa165dce64d2d6c9fc # v0.1.3 # runc diff --git a/vendor/github.com/balena-os/librsync-go/signature.go b/vendor/github.com/balena-os/librsync-go/signature.go index e47ac79025..a88b6d51b2 100644 --- a/vendor/github.com/balena-os/librsync-go/signature.go +++ b/vendor/github.com/balena-os/librsync-go/signature.go @@ -37,6 +37,13 @@ func CalcStrongSum(data []byte, sigType MagicNumber, strongLen uint32) ([]byte, } func Signature(input io.Reader, output io.Writer, blockLen, strongLen uint32, sigType MagicNumber) (*SignatureType, error) { + return SignatureWithBlockCount(input, output, blockLen, strongLen, sigType, 0) +} + +// SignatureWithBlockCount is a version of Signature that allows the caller to +// pass in the expected number of blocks in the Signature. This is used to +// pre-allocate the internal data structures. +func SignatureWithBlockCount(input io.Reader, output io.Writer, blockLen, strongLen uint32, sigType MagicNumber, blockCount int) (*SignatureType, error) { var maxStrongLen uint32 switch sigType { @@ -68,7 +75,8 @@ func Signature(input io.Reader, output io.Writer, blockLen, strongLen uint32, si block := make([]byte, blockLen) var ret SignatureType - ret.weak2block = make(map[uint32]int) + ret.weak2block = make(map[uint32]int, blockCount) + ret.strongSigs = make([][]byte, 0, blockCount) ret.sigType = sigType ret.strongLen = strongLen ret.blockLen = blockLen