Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,24 @@ CONTRACT
use as little disk space as possible (see the --depth and --git-gc flags),
but this is not part of the contract.

SYNC PHASES

git-sync operates in two phases:

Initial sync:
git-sync retries until its first successful sync with the remote
repo. During this phase, the retry interval is controlled by
--init-period (falling back to --period if unset) and the failure
limit is controlled by --init-max-failures (falling back to
--max-failures when unset). This phase is useful for tolerating
transient connectivity issues at startup while still giving up
eventually.

Steady state:
Once the first sync succeeds, git-sync polls the remote at the
--period interval and tolerates failures up to --max-failures before
aborting. --init-period and --init-max-failures no longer apply.

OPTIONS

Many options can be specified as either a commandline flag or an environment
Expand Down Expand Up @@ -365,6 +383,20 @@ OPTIONS
Enable the pprof debug endpoints on git-sync's HTTP endpoint at
/debug/pprof. Requires --http-bind to be specified.

--init-max-failures <int>, $GITSYNC_INIT_MAX_FAILURES
The number of consecutive failures allowed before aborting during
the initial sync phase (before the first successful sync). Once
the initial sync succeeds, --max-failures applies instead.
Setting this to a negative value will retry forever during the
initial sync. If this flag is not set, --max-failures applies
to the initial sync phase as well.

--init-period <duration>, $GITSYNC_INIT_PERIOD
How long to wait between sync attempts until the first successful
sync. Once the initial sync succeeds, --period is used instead.
This must be at least 10ms if set. If not specified, --period is
used for all sync attempts.

--link <string>, $GITSYNC_LINK
The path to at which to create a symlink which points to the
current git directory, at the currently synced hash. This may be
Expand Down
75 changes: 70 additions & 5 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ func main() {
flErrorFile := pflag.String("error-file",
envString("", "GITSYNC_ERROR_FILE", "GIT_SYNC_ERROR_FILE"),
"the path (absolute or relative to --root) to an optional file into which errors will be written (defaults to disabled)")
flInitPeriod := pflag.Duration("init-period",
envDuration(0, "GITSYNC_INIT_PERIOD"),
"how long to wait between sync attempts until the first success, must be >= 10ms if set; if unset, --period is used")
flPeriod := pflag.Duration("period",
envDuration(10*time.Second, "GITSYNC_PERIOD", "GIT_SYNC_PERIOD"),
"how long to wait between syncs, must be >= 10ms; --wait overrides this")
Expand All @@ -198,6 +201,9 @@ func main() {
flMaxFailures := pflag.Int("max-failures",
envInt(0, "GITSYNC_MAX_FAILURES", "GIT_SYNC_MAX_FAILURES"),
"the number of consecutive failures allowed before aborting (-1 will retry forever")
flInitMaxFailures := pflag.Int("init-max-failures",
envInt(0, "GITSYNC_INIT_MAX_FAILURES"),
"the number of consecutive failures allowed before aborting during the initial sync phase; a negative value retries forever; if unset, --max-failures applies instead")
flTouchFile := pflag.String("touch-file",
envString("", "GITSYNC_TOUCH_FILE", "GIT_SYNC_TOUCH_FILE"),
"the path (absolute or relative to --root) to an optional file which will be touched whenever a sync completes (defaults to disabled)")
Expand Down Expand Up @@ -376,6 +382,11 @@ func main() {

pflag.Parse()

// Was --init-max-failures explicitly set (CLI or env)? envInt applies
// the env value as the pflag default, so pflag.Changed alone misses it.
_, initMaxFailuresEnv := os.LookupEnv("GITSYNC_INIT_MAX_FAILURES")
initMaxFailuresSet := initMaxFailuresEnv || pflag.Lookup("init-max-failures").Changed

// Handle print-and-exit cases.
if *flVersion {
fmt.Fprintln(os.Stdout, version.VERSION)
Expand Down Expand Up @@ -469,6 +480,11 @@ func main() {
if *flPeriod < 10*time.Millisecond {
fatalConfigErrorf(log, true, "invalid flag: --period must be at least 10ms")
}
if *flInitPeriod == 0 {
*flInitPeriod = *flPeriod
} else if *flInitPeriod < 10*time.Millisecond {
fatalConfigErrorf(log, true, "invalid flag: --init-period must be at least 10ms")
}

if *flDeprecatedChmod != 0 {
fatalConfigErrorf(log, true, "deprecated flag: --change-permissions is no longer supported")
Expand Down Expand Up @@ -912,20 +928,37 @@ func main() {

failCount := 0
syncCount := uint64(0)
initialSyncDone := false
waitTime := *flInitPeriod
// getMaxFailures returns the effective max-failure limit for the current
// phase. During the initial sync phase, --init-max-failures (if set)
// overrides --max-failures; otherwise --max-failures applies.
getMaxFailures := func() int {
if !initialSyncDone && initMaxFailuresSet {
return *flInitMaxFailures
}
return *flMaxFailures
}
for {
start := time.Now()
ctx, cancel := context.WithTimeout(context.Background(), *flSyncTimeout)
Comment thread
thockin marked this conversation as resolved.

if changed, hash, err := git.SyncRepo(ctx, refreshCreds); err != nil {
failCount++
updateSyncMetrics(metricKeyError, start)
if *flMaxFailures >= 0 && failCount >= *flMaxFailures {
// Exit after too many retries, maybe the error is not recoverable.
log.Error(err, "too many failures, aborting", "failCount", failCount)
if maxFails := getMaxFailures(); maxFails >= 0 && failCount >= maxFails {
log.Error(err, "too many failures, aborting", "failCount", failCount, "maxFailures", maxFails)
os.Exit(1)
}
log.Error(err, "error syncing repo, will retry", "failCount", failCount)
} else {
if !initialSyncDone {
initialSyncDone = true
waitTime = *flPeriod
if *flInitPeriod != *flPeriod {
log.V(0).Info("initial sync complete, switching to normal period", "initPeriod", flInitPeriod.String(), "period", flPeriod.String())
}
}
// this might have been called before, but also might not have
setRepoReady()
// We treat the first loop as a sync, including sending hooks.
Expand Down Expand Up @@ -989,12 +1022,12 @@ func main() {
log.DeleteErrorFile()
}

log.V(3).Info("next sync", "waitTime", flPeriod.String(), "syncCount", syncCount)
log.V(3).Info("next sync", "waitTime", waitTime.String(), "syncCount", syncCount)
cancel()

// Sleep until the next sync. If syncSig is set then the sleep may
// be interrupted by that signal.
t := time.NewTimer(*flPeriod)
t := time.NewTimer(waitTime)
select {
case <-t.C:
case <-sigChan:
Expand Down Expand Up @@ -2397,6 +2430,24 @@ CONTRACT
use as little disk space as possible (see the --depth and --git-gc flags),
but this is not part of the contract.

SYNC PHASES

git-sync operates in two phases:

Initial sync:
git-sync retries until its first successful sync with the remote
repo. During this phase, the retry interval is controlled by
--init-period (falling back to --period if unset) and the failure
limit is controlled by --init-max-failures (falling back to
--max-failures when unset). This phase is useful for tolerating
transient connectivity issues at startup while still giving up
eventually.

Steady state:
Once the first sync succeeds, git-sync polls the remote at the
--period interval and tolerates failures up to --max-failures before
aborting. --init-period and --init-max-failures no longer apply.

OPTIONS

Many options can be specified as either a commandline flag or an environment
Expand Down Expand Up @@ -2556,6 +2607,20 @@ OPTIONS
Enable the pprof debug endpoints on git-sync's HTTP endpoint at
/debug/pprof. Requires --http-bind to be specified.

--init-max-failures <int>, $GITSYNC_INIT_MAX_FAILURES
The number of consecutive failures allowed before aborting during
the initial sync phase (before the first successful sync). Once
the initial sync succeeds, --max-failures applies instead.
Setting this to a negative value will retry forever during the
initial sync. If this flag is not set, --max-failures applies
to the initial sync phase as well.

--init-period <duration>, $GITSYNC_INIT_PERIOD
How long to wait between sync attempts until the first successful
sync. Once the initial sync succeeds, --period is used instead.
This must be at least 10ms if set. If not specified, --period is
used for all sync attempts.

--link <string>, $GITSYNC_LINK
The path to at which to create a symlink which points to the
current git directory, at the currently synced hash. This may be
Expand Down
49 changes: 49 additions & 0 deletions test_e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3683,6 +3683,55 @@ function e2e::exechook_git_archive() {
assert_tgz_archive "$ROOT/link/archive.tgz"
}

##############################################
# Test init-period uses faster interval for initial sync
##############################################
function e2e::init_period_faster_initial_sync() {
# First sync
echo "${FUNCNAME[0]} 1" > "$REPO/file"
git -C "$REPO" commit -qam "${FUNCNAME[0]} 1"

GIT_SYNC \
--period=100s \
--init-period=100ms \
--repo="file://$REPO" \
--root="$ROOT" \
--link="link" \
&
# With init-period=100ms, sync should happen quickly even though
# period=100s. If init-period were not working, this would time out.
wait_for_sync "${MAXWAIT}"
assert_link_exists "$ROOT/link"
assert_file_exists "$ROOT/link/file"
assert_file_eq "$ROOT/link/file" "${FUNCNAME[0]} 1"
assert_metric_eq "${METRIC_GOOD_SYNC_COUNT}" 1

# After initial sync, period should switch to the normal 100s.
# Make a new commit and verify it does NOT sync quickly (because
# we're now using the slow period).
echo "${FUNCNAME[0]} 2" > "$REPO/file"
git -C "$REPO" commit -qam "${FUNCNAME[0]} 2"
# Wait a bit - should NOT have synced since normal period is 100s
sleep 3
assert_file_eq "$ROOT/link/file" "${FUNCNAME[0]} 1"
assert_metric_eq "${METRIC_GOOD_SYNC_COUNT}" 1
}

##############################################
# Test init-max-failures aborts after N failed attempts
##############################################
function e2e::init_max_failures_exceeded() {
assert_fail \
GIT_SYNC \
--period=100ms \
--init-max-failures=3 \
--max-failures=-1 \
--repo="file:///does/not/exist" \
--root="$ROOT" \
--link="link"
assert_file_absent "$ROOT/link/file"
}

#
# main
#
Expand Down
Loading