Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions src/main/java/io/cryostat/agent/ConfigModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,12 @@ public abstract class ConfigModule {
"cryostat.agent.registration.circuit-breaker-duration";
public static final String CRYOSTAT_AGENT_REGISTRATION_MIN_COOLDOWN_MS =
"cryostat.agent.registration.min-cooldown-ms";
public static final String CRYOSTAT_AGENT_REGISTRATION_COOLDOWN_JITTER_FACTOR =
"cryostat.agent.registration.cooldown-jitter-factor";
public static final String CRYOSTAT_AGENT_REGISTRATION_RETRY_BACKOFF_JITTER_FACTOR =
"cryostat.agent.registration.retry-backoff-jitter-factor";
public static final String CRYOSTAT_AGENT_REGISTRATION_MIN_INTERVAL =
"cryostat.agent.registration.min-interval";
public static final String CRYOSTAT_AGENT_EXIT_SIGNALS = "cryostat.agent.exit.signals";
public static final String CRYOSTAT_AGENT_EXIT_DEREGISTRATION_TIMEOUT_MS =
"cryostat.agent.exit.deregistration.timeout-ms";
Expand Down Expand Up @@ -1012,6 +1018,32 @@ public static Duration provideCryostatAgentRegistrationMinCooldownMs(SmallRyeCon
return Duration.ofMillis(cooldownMs);
}

@Provides
@Singleton
@Named(CRYOSTAT_AGENT_REGISTRATION_COOLDOWN_JITTER_FACTOR)
public static double provideCryostatAgentRegistrationCooldownJitterFactor(
SmallRyeConfig config) {
return config.getValue(CRYOSTAT_AGENT_REGISTRATION_COOLDOWN_JITTER_FACTOR, double.class);
}

@Provides
@Singleton
@Named(CRYOSTAT_AGENT_REGISTRATION_RETRY_BACKOFF_JITTER_FACTOR)
public static double provideCryostatAgentRegistrationRetryBackoffJitterFactor(
SmallRyeConfig config) {
return config.getValue(
CRYOSTAT_AGENT_REGISTRATION_RETRY_BACKOFF_JITTER_FACTOR, double.class);
}

@Provides
@Singleton
@Named(CRYOSTAT_AGENT_REGISTRATION_MIN_INTERVAL)
public static Duration provideCryostatAgentRegistrationMinInterval(SmallRyeConfig config) {
String intervalStr =
config.getValue(CRYOSTAT_AGENT_REGISTRATION_MIN_INTERVAL, String.class);
return Duration.parse(intervalStr);
}

@Provides
@Singleton
@Named(CRYOSTAT_AGENT_HARVESTER_PERIOD_MS)
Expand Down
9 changes: 9 additions & 0 deletions src/main/java/io/cryostat/agent/MainModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,12 @@ public static Registration provideRegistration(
Duration circuitBreakerOpenDuration,
@Named(ConfigModule.CRYOSTAT_AGENT_REGISTRATION_MIN_COOLDOWN_MS)
Duration minCooldownDuration,
@Named(ConfigModule.CRYOSTAT_AGENT_REGISTRATION_COOLDOWN_JITTER_FACTOR)
double cooldownJitterFactor,
@Named(ConfigModule.CRYOSTAT_AGENT_REGISTRATION_RETRY_BACKOFF_JITTER_FACTOR)
double retryBackoffJitterFactor,
@Named(ConfigModule.CRYOSTAT_AGENT_REGISTRATION_MIN_INTERVAL)
Duration minRegistrationInterval,
SecureRandom random) {
Logger log = LoggerFactory.getLogger(Registration.class);
return new Registration(
Expand Down Expand Up @@ -919,6 +925,9 @@ public static Registration provideRegistration(
circuitBreakerThreshold,
circuitBreakerOpenDuration,
minCooldownDuration,
cooldownJitterFactor,
retryBackoffJitterFactor,
minRegistrationInterval,
random);
}

Expand Down
78 changes: 72 additions & 6 deletions src/main/java/io/cryostat/agent/Registration.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ public class Registration {
private final int circuitBreakerThreshold;
private final Duration circuitBreakerOpenDuration;
private final Duration minCooldownDuration;
private final double cooldownJitterFactor;
private final double retryBackoffJitterFactor;
private final Duration minRegistrationInterval;
private final Random random;

private final PluginInfo pluginInfo = new PluginInfo();
Expand All @@ -80,7 +83,6 @@ public class Registration {
private ScheduledFuture<?> registrationCheckTask;

private final AtomicInteger consecutiveFailures = new AtomicInteger(0);
private static final double JITTER_FACTOR = 0.1;
private volatile CircuitState circuitState = CircuitState.CLOSED;
private volatile Instant circuitOpenedAt = null;

Expand All @@ -91,6 +93,9 @@ public class Registration {
private volatile ScheduledFuture<?> cooldownExitTask = null;
private final Object cooldownLock = new Object();

private volatile Instant lastRegistrationAttempt = Instant.MIN;
private final Object registrationLock = new Object();

private enum CircuitState {
CLOSED,
OPEN,
Expand All @@ -102,7 +107,7 @@ private enum CircuitState {
CryostatClient cryostat,
CallbackResolver callbackResolver,
WebServer webServer,
io.cryostat.agent.util.AppNameResolver appNameResolver,
AppNameResolver appNameResolver,
String instanceId,
String jvmId,
String appName,
Expand All @@ -118,6 +123,9 @@ private enum CircuitState {
int circuitBreakerThreshold,
Duration circuitBreakerOpenDuration,
Duration minCooldownDuration,
double cooldownJitterFactor,
double retryBackoffJitterFactor,
Duration minRegistrationInterval,
Random random) {
this.executor = executor;
this.cryostat = cryostat;
Expand All @@ -139,6 +147,9 @@ private enum CircuitState {
this.circuitBreakerThreshold = circuitBreakerThreshold;
this.circuitBreakerOpenDuration = circuitBreakerOpenDuration;
this.minCooldownDuration = minCooldownDuration;
this.cooldownJitterFactor = cooldownJitterFactor;
this.retryBackoffJitterFactor = retryBackoffJitterFactor;
this.minRegistrationInterval = minRegistrationInterval;
this.random = random;
}

Expand Down Expand Up @@ -242,7 +253,42 @@ public void addRegistrationListener(Consumer<RegistrationEvent> listener) {
this.listeners.add(listener);
}

/**
* Determine when the next registration attempt is allowed. This prevents rapid-fire
* registration attempts from external triggers.
*
* @return the instant when registration may next be attempted
*/
private Instant shouldAttemptRegistrationAt() {
synchronized (registrationLock) {
Instant now = Instant.now();
Instant nextAllowed = lastRegistrationAttempt.plus(minRegistrationInterval);

if (now.isBefore(nextAllowed)) {
Duration remaining = Duration.between(now, nextAllowed);
log.debug(
"Skipping registration attempt - minimum interval not met. Last attempt:"
+ " {}, next allowed: {} (in {})",
lastRegistrationAttempt,
nextAllowed,
remaining);
return nextAllowed;
}

lastRegistrationAttempt = now;
return now;
}
}

void tryRegister() {
Instant shouldAttemptRegistrationAt = shouldAttemptRegistrationAt();
if (Instant.now().isBefore(shouldAttemptRegistrationAt)) {
long delay = Duration.between(Instant.now(), shouldAttemptRegistrationAt).toMillis();
executor.schedule(
() -> notify(RegistrationEvent.State.REFRESHING), delay, TimeUnit.MILLISECONDS);
return;
}

if (currentRegistration != null && !currentRegistration.isDone()) {
log.warn("Cancelling previous registration attempt");
currentRegistration.cancel(true);
Expand Down Expand Up @@ -391,7 +437,7 @@ private long calculateBackoffMs() {
return registrationRetryMs;
}

double jitter = 1.0 + (random.nextDouble() * 2 - 1) * JITTER_FACTOR;
double jitter = 1.0 + (random.nextDouble() * 2 - 1) * retryBackoffJitterFactor;
long backoff =
(long)
(registrationRetryMs
Expand All @@ -403,6 +449,24 @@ private long calculateBackoffMs() {
return backoff;
}

/**
* Calculate cooldown duration with jitter to prevent thundering herd problem. Adds random
* variation based on the configured jitter factor to the base duration so that multiple agents
* don't all exit cooldown simultaneously.
*
* @param baseDuration Base cooldown duration (e.g., PT30S)
* @return Duration with jitter applied
*/
Duration calculateCooldownWithJitter(Duration baseDuration) {
long baseMs = baseDuration.toMillis();
// Add jitter: range is (1 - jitterFactor) to (1 + jitterFactor) times base duration
// For jitterFactor=0.2, this gives 0.8x to 1.2x base duration
double jitterRange = cooldownJitterFactor * 2;
double jitterFactor = (1.0 - cooldownJitterFactor) + (random.nextDouble() * jitterRange);
long jitteredMs = (long) (baseMs * jitterFactor);
return Duration.ofMillis(jitteredMs);
}

/**
* Check if the Agent is currently in cooldown period.
*
Expand All @@ -425,9 +489,11 @@ private void enterCooldown(Duration duration) {
cooldownExitTask.cancel(false);
}

cooldownUntil = Instant.now().plus(duration);
Duration jitteredDuration = calculateCooldownWithJitter(duration);
cooldownUntil = Instant.now().plus(jitteredDuration);
log.debug(
"Entering cooldown for {} after {} consecutive failures",
"Entering cooldown for {} (base: {}) after {} consecutive failures",
jitteredDuration,
duration,
consecutiveFailures.get());
notify(RegistrationEvent.State.COOLDOWN);
Expand All @@ -450,7 +516,7 @@ private void enterCooldown(Duration duration) {

cooldownExitTask =
executor.schedule(
this::exitCooldown, duration.toMillis(), TimeUnit.MILLISECONDS);
this::exitCooldown, jitteredDuration.toMillis(), TimeUnit.MILLISECONDS);
}
}

Expand Down
3 changes: 3 additions & 0 deletions src/main/resources/META-INF/microprofile-config.properties
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ cryostat.agent.registration.backoff-multiplier=2.0
cryostat.agent.registration.circuit-breaker-threshold=10
cryostat.agent.registration.circuit-breaker-duration=PT30S
cryostat.agent.registration.min-cooldown-ms=30000
cryostat.agent.registration.cooldown-jitter-factor=0.2
cryostat.agent.registration.retry-backoff-jitter-factor=0.1
cryostat.agent.registration.min-interval=PT10S
cryostat.agent.exit.deregistration.timeout-ms=10000

cryostat.agent.publish.context=
Expand Down
Loading
Loading