Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,24 @@ CONTRACT
use as little disk space as possible (see the --depth and --git-gc flags),
but this is not part of the contract.

SYNC PHASES

git-sync operates in two phases:

Initial sync:
git-sync retries until its first successful sync with the remote
repo. During this phase, the retry interval is controlled by
--init-period (falling back to --period if unset) and the failure
limit is controlled by --init-max-failures (falling back to
--max-failures when set to 0). This phase is useful for tolerating
transient connectivity issues at startup while still giving up
eventually.

Steady state:
Once the first sync succeeds, git-sync polls the remote at the
--period interval and tolerates failures up to --max-failures before
aborting. --init-period and --init-max-failures no longer apply.

OPTIONS

Many options can be specified as either a commandline flag or an environment
Expand Down Expand Up @@ -365,6 +383,19 @@ OPTIONS
Enable the pprof debug endpoints on git-sync's HTTP endpoint at
/debug/pprof. Requires --http-bind to be specified.

--init-max-failures <int>, $GITSYNC_INIT_MAX_FAILURES
The number of consecutive failures allowed before aborting during
the initial sync phase (before the first successful sync). Once
the initial sync succeeds, --max-failures applies instead. Set
to 0 (the default) to disable this separate limit and fall
through to --max-failures for the entire run.

--init-period <duration>, $GITSYNC_INIT_PERIOD
How long to wait between sync attempts until the first successful
sync. Once the initial sync succeeds, --period is used instead.
This must be at least 10ms if set. If not specified, --period is
used for all sync attempts.

--link <string>, $GITSYNC_LINK
The path to at which to create a symlink which points to the
current git directory, at the currently synced hash. This may be
Expand Down
86 changes: 83 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,12 @@ func main() {
flErrorFile := pflag.String("error-file",
envString("", "GITSYNC_ERROR_FILE", "GIT_SYNC_ERROR_FILE"),
"the path (absolute or relative to --root) to an optional file into which errors will be written (defaults to disabled)")
flInitMaxFailures := pflag.Int("init-max-failures",
Comment thread
knQzx marked this conversation as resolved.
Outdated
envInt(0, "GITSYNC_INIT_MAX_FAILURES"),
"the number of consecutive failures allowed before aborting during the initial sync phase; 0 disables the check (falls through to --max-failures)")
flInitPeriod := pflag.Duration("init-period",
envDuration(0, "GITSYNC_INIT_PERIOD"),
"how long to wait between sync attempts until the first success, must be >= 10ms if set; if unset, --period is used")
flPeriod := pflag.Duration("period",
envDuration(10*time.Second, "GITSYNC_PERIOD", "GIT_SYNC_PERIOD"),
"how long to wait between syncs, must be >= 10ms; --wait overrides this")
Expand Down Expand Up @@ -469,6 +475,12 @@ func main() {
if *flPeriod < 10*time.Millisecond {
fatalConfigErrorf(log, true, "invalid flag: --period must be at least 10ms")
}
if *flInitPeriod != 0 && *flInitPeriod < 10*time.Millisecond {
Comment thread
knQzx marked this conversation as resolved.
Outdated
fatalConfigErrorf(log, true, "invalid flag: --init-period must be at least 10ms")
}
if *flInitMaxFailures < 0 {
fatalConfigErrorf(log, true, "invalid flag: --init-max-failures must not be negative")
}

if *flDeprecatedChmod != 0 {
fatalConfigErrorf(log, true, "deprecated flag: --change-permissions is no longer supported")
Expand Down Expand Up @@ -912,20 +924,35 @@ func main() {

failCount := 0
syncCount := uint64(0)
initialSyncDone := false
for {
start := time.Now()
ctx, cancel := context.WithTimeout(context.Background(), *flSyncTimeout)
Comment thread
thockin marked this conversation as resolved.

if changed, hash, err := git.SyncRepo(ctx, refreshCreds); err != nil {
failCount++
updateSyncMetrics(metricKeyError, start)
if *flMaxFailures >= 0 && failCount >= *flMaxFailures {
if isInitFailuresExceeded(*flInitMaxFailures, initialSyncDone, failCount) {
Comment thread
knQzx marked this conversation as resolved.
Outdated
log.Error(err, "too many initial sync failures, aborting", "initMaxFailures", *flInitMaxFailures, "failCount", failCount)
os.Exit(1)
}
// During the initial sync phase, --init-max-failures (if set) is
// the authoritative limit; --max-failures applies once init is
// done, or when --init-max-failures is not set.
useMainLimit := initialSyncDone || *flInitMaxFailures == 0
if useMainLimit && *flMaxFailures >= 0 && failCount >= *flMaxFailures {
// Exit after too many retries, maybe the error is not recoverable.
log.Error(err, "too many failures, aborting", "failCount", failCount)
os.Exit(1)
}
log.Error(err, "error syncing repo, will retry", "failCount", failCount)
} else {
if !initialSyncDone {
initialSyncDone = true
if *flInitPeriod != 0 {
log.V(0).Info("initial sync complete, switching to normal period", "initPeriod", flInitPeriod.String(), "period", flPeriod.String())
}
}
// this might have been called before, but also might not have
setRepoReady()
// We treat the first loop as a sync, including sending hooks.
Expand Down Expand Up @@ -989,12 +1016,14 @@ func main() {
log.DeleteErrorFile()
}

log.V(3).Info("next sync", "waitTime", flPeriod.String(), "syncCount", syncCount)
// Use init-period for retries before the first successful sync.
waitTime := chooseWaitTime(*flPeriod, *flInitPeriod, initialSyncDone)
Comment thread
knQzx marked this conversation as resolved.
Outdated
log.V(3).Info("next sync", "waitTime", waitTime.String(), "syncCount", syncCount)
cancel()

// Sleep until the next sync. If syncSig is set then the sleep may
// be interrupted by that signal.
t := time.NewTimer(*flPeriod)
t := time.NewTimer(waitTime)
select {
case <-t.C:
case <-sigChan:
Expand Down Expand Up @@ -1144,6 +1173,26 @@ func setRepoReady() {
repoReady = true
}

// chooseWaitTime returns the appropriate wait duration based on whether the
// initial sync has completed. If initPeriod is non-zero and the initial sync
// is not yet done, it returns initPeriod. Otherwise it returns period.
func chooseWaitTime(period, initPeriod time.Duration, initialSyncDone bool) time.Duration {
if !initialSyncDone && initPeriod != 0 {
return initPeriod
}
return period
}

// isInitFailuresExceeded returns true if the initial sync has failed more than
// initMaxFailures consecutive times. A non-positive initMaxFailures disables
// the check.
func isInitFailuresExceeded(initMaxFailures int, initialSyncDone bool, failCount int) bool {
if initMaxFailures <= 0 || initialSyncDone {
return false
}
return failCount >= initMaxFailures
}

// Do no work, but don't do something that triggers go's runtime into thinking
// it is deadlocked.
func sleepForever() {
Expand Down Expand Up @@ -2397,6 +2446,24 @@ CONTRACT
use as little disk space as possible (see the --depth and --git-gc flags),
but this is not part of the contract.

SYNC PHASES

git-sync operates in two phases:

Initial sync:
git-sync retries until its first successful sync with the remote
repo. During this phase, the retry interval is controlled by
--init-period (falling back to --period if unset) and the failure
limit is controlled by --init-max-failures (falling back to
--max-failures when set to 0). This phase is useful for tolerating
transient connectivity issues at startup while still giving up
eventually.

Steady state:
Once the first sync succeeds, git-sync polls the remote at the
--period interval and tolerates failures up to --max-failures before
aborting. --init-period and --init-max-failures no longer apply.

OPTIONS

Many options can be specified as either a commandline flag or an environment
Expand Down Expand Up @@ -2556,6 +2623,19 @@ OPTIONS
Enable the pprof debug endpoints on git-sync's HTTP endpoint at
/debug/pprof. Requires --http-bind to be specified.

--init-max-failures <int>, $GITSYNC_INIT_MAX_FAILURES
The number of consecutive failures allowed before aborting during
the initial sync phase (before the first successful sync). Once
the initial sync succeeds, --max-failures applies instead. Set
to 0 (the default) to disable this separate limit and fall
Comment thread
knQzx marked this conversation as resolved.
Outdated
through to --max-failures for the entire run.

--init-period <duration>, $GITSYNC_INIT_PERIOD
How long to wait between sync attempts until the first successful
sync. Once the initial sync succeeds, --period is used instead.
This must be at least 10ms if set. If not specified, --period is
used for all sync attempts.

--link <string>, $GITSYNC_LINK
The path to at which to create a symlink which points to the
current git directory, at the currently synced hash. This may be
Expand Down
101 changes: 101 additions & 0 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,107 @@ func TestTouch(t *testing.T) {
}
}

func TestChooseWaitTime(t *testing.T) {
period := 10 * time.Second
initPeriod := 500 * time.Millisecond

cases := []struct {
name string
period time.Duration
initPeriod time.Duration
initialSyncDone bool
expected time.Duration
}{{
name: "no init-period, not done",
period: period,
initPeriod: 0,
initialSyncDone: false,
expected: period,
}, {
name: "no init-period, done",
period: period,
initPeriod: 0,
initialSyncDone: true,
expected: period,
}, {
name: "init-period set, not done",
period: period,
initPeriod: initPeriod,
initialSyncDone: false,
expected: initPeriod,
}, {
name: "init-period set, done",
period: period,
initPeriod: initPeriod,
initialSyncDone: true,
expected: period,
}}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := chooseWaitTime(tc.period, tc.initPeriod, tc.initialSyncDone)
if got != tc.expected {
t.Errorf("expected %v, got %v", tc.expected, got)
}
})
}
}

func TestIsInitFailuresExceeded(t *testing.T) {
cases := []struct {
name string
initMaxFailures int
initialSyncDone bool
failCount int
expected bool
}{{
name: "disabled (zero)",
initMaxFailures: 0,
initialSyncDone: false,
failCount: 100,
expected: false,
}, {
name: "disabled (negative)",
initMaxFailures: -1,
initialSyncDone: false,
failCount: 100,
expected: false,
}, {
name: "limit set, already done",
initMaxFailures: 3,
initialSyncDone: true,
failCount: 100,
expected: false,
}, {
name: "limit set, not done, under limit",
initMaxFailures: 5,
initialSyncDone: false,
failCount: 3,
expected: false,
}, {
name: "limit set, not done, at limit",
initMaxFailures: 3,
initialSyncDone: false,
failCount: 3,
expected: true,
}, {
name: "limit set, not done, over limit",
initMaxFailures: 3,
initialSyncDone: false,
failCount: 5,
expected: true,
}}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := isInitFailuresExceeded(tc.initMaxFailures, tc.initialSyncDone, tc.failCount)
if got != tc.expected {
t.Errorf("expected %v, got %v", tc.expected, got)
}
})
}
}

func TestHasGitLockFile(t *testing.T) {
testCases := map[string]struct {
inputFilePath []string
Expand Down
49 changes: 49 additions & 0 deletions test_e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3683,6 +3683,55 @@ function e2e::exechook_git_archive() {
assert_tgz_archive "$ROOT/link/archive.tgz"
}

##############################################
# Test init-period uses faster interval for initial sync
##############################################
function e2e::init_period_faster_initial_sync() {
# First sync
echo "${FUNCNAME[0]} 1" > "$REPO/file"
git -C "$REPO" commit -qam "${FUNCNAME[0]} 1"

GIT_SYNC \
--period=100s \
--init-period=100ms \
--repo="file://$REPO" \
--root="$ROOT" \
--link="link" \
&
# With init-period=100ms, sync should happen quickly even though
# period=100s. If init-period were not working, this would time out.
wait_for_sync "${MAXWAIT}"
assert_link_exists "$ROOT/link"
assert_file_exists "$ROOT/link/file"
assert_file_eq "$ROOT/link/file" "${FUNCNAME[0]} 1"
assert_metric_eq "${METRIC_GOOD_SYNC_COUNT}" 1

# After initial sync, period should switch to the normal 100s.
# Make a new commit and verify it does NOT sync quickly (because
# we're now using the slow period).
echo "${FUNCNAME[0]} 2" > "$REPO/file"
git -C "$REPO" commit -qam "${FUNCNAME[0]} 2"
# Wait a bit - should NOT have synced since normal period is 100s
sleep 3
assert_file_eq "$ROOT/link/file" "${FUNCNAME[0]} 1"
assert_metric_eq "${METRIC_GOOD_SYNC_COUNT}" 1
}

##############################################
# Test init-max-failures aborts after N failed attempts
##############################################
function e2e::init_max_failures_exceeded() {
assert_fail \
GIT_SYNC \
--period=100ms \
--init-max-failures=3 \
--max-failures=-1 \
--repo="file:///does/not/exist" \
--root="$ROOT" \
--link="link"
assert_file_absent "$ROOT/link/file"
}

#
# main
#
Expand Down