Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
9073427
Tentacle script abandonment (re-landed on main)
jimmyp Jun 2, 2026
4f3fc4d
Address review: remove abandon capability check, drop abandon metrics…
jimmyp Jun 2, 2026
e797a67
Apply review: concise AbandonScript summary on ITentacleClient
jimmyp Jun 2, 2026
06517d6
Address review: V1/K8s abandon throws + orchestrator is abandon-aware…
jimmyp Jun 2, 2026
ffa5b82
Test at the right boundary: drop duplicate/wrong-level abandon tests
jimmyp Jun 2, 2026
d03dcfb
Add AbandonScriptAsync to IAsyncScriptServiceV2 (unblocks server test…
jimmyp Jun 2, 2026
65a6db5
Fix race in abandon integration tests: assert ExecuteScript's result,…
jimmyp Jun 3, 2026
96babdd
Reword abandon wait/cleanup comments in SilentProcessRunner
jimmyp Jun 4, 2026
23059e4
Address PR review feedback on abandon
jimmyp Jun 4, 2026
7ee0fdb
Keep grandchild cleanup best-effort (can't assert a reparented non-ch…
jimmyp Jun 4, 2026
86ef03a
Gate abandon on the advertised capability, not just "is V2", + test r…
jimmyp Jun 5, 2026
d4e85ba
Restore @jimmyp's applied suggestions clobbered by my force-push
jimmyp Jun 5, 2026
5fd82a5
Put SupportsAbandon on ScriptServiceVersion (ScriptServiceVersion2Wit…
jimmyp Jun 5, 2026
21b8d2a
Address review: ScriptServiceVersion.IsV2, split escalation test, V1 …
jimmyp Jun 5, 2026
d195d65
Make the escalation split meaningful: each test asserts a distinct ef…
jimmyp Jun 5, 2026
a2e283b
Merge remote-tracking branch 'origin/main' into jimpelletier/eft-3295…
jimmyp Jun 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

477 changes: 477 additions & 0 deletions docs/superpowers/specs/2026-05-21-tentacle-script-abandon-design.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,17 @@ public static bool HasScriptServiceV2(this CapabilitiesResponseV2 capabilities)

return capabilities.SupportedCapabilities.Contains(nameof(IScriptServiceV2));
}

public static bool HasAbandonScript(this CapabilitiesResponseV2 capabilities)
{
if (capabilities?.SupportedCapabilities?.Any() != true)
{
return false;
}

// Both sides nameof IScriptServiceV2.AbandonScript, so the strings match and a rename
// on either side can't silently drift the capability check.
return capabilities.SupportedCapabilities.Contains(nameof(IScriptServiceV2.AbandonScript));
Comment on lines +26 to +28

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the names ever changes that would be a bug, since we would break compatibility.

No Action but the comment is perhaps misleading almost suggesting we can change it.

}
}
}
14 changes: 13 additions & 1 deletion source/Octopus.Tentacle.Client/ITentacleClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using Octopus.Tentacle.Client.Scripts.Models;
using Octopus.Tentacle.Contracts;
using Octopus.Tentacle.Contracts.Logging;
using Octopus.Tentacle.Contracts.ScriptServiceV2;

namespace Octopus.Tentacle.Client
{
Expand All @@ -31,7 +32,8 @@ Task<ScriptExecutionResult> ExecuteScript(
OnScriptStatusResponseReceived onScriptStatusResponseReceived,
OnScriptCompleted onScriptCompleted,
ITentacleClientTaskLog logger,
CancellationToken scriptExecutionCancellationToken);
CancellationToken scriptExecutionCancellationToken,
TimeSpan? abandonAfterCancellationPendingFor = null);

/// <summary>
/// Start the script.
Expand Down Expand Up @@ -59,6 +61,16 @@ Task<ScriptOperationExecutionResult> StartScript(ExecuteScriptCommand command,
/// <returns>The result, which includes the CommandContext for the next command</returns>
Task<ScriptOperationExecutionResult> CancelScript(CommandContext commandContext, ITentacleClientTaskLog logger);

/// <summary>
/// Abandon a running script. This attempts cancellation, but if necessary leaves the script
/// running in the OS but no longer has Tentacle watching or managing it.
/// </summary>
/// <param name="scriptTicket">The ticket of the script to abandon</param>
/// <param name="logger">Used to output user orientated log messages</param>
/// <param name="cancellationToken">Cancels the RPC call</param>
/// <returns>The current status snapshot of the script at the time abandon was processed</returns>
Task<ScriptStatusResponseV2> AbandonScript(ScriptTicket scriptTicket, ITentacleClientTaskLog logger, CancellationToken cancellationToken);

/// <summary>
/// Complete the script.
/// </summary>
Expand Down
10 changes: 9 additions & 1 deletion source/Octopus.Tentacle.Client/ScriptExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,15 @@ public async Task<ScriptOperationExecutionResult> CancelScript(CommandContext co

return await scriptExecutor.CancelScript(commandContext);
}


public async Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext)
{
var scriptExecutorFactory = CreateScriptExecutorFactory();
var scriptExecutor = scriptExecutorFactory.CreateScriptExecutor(commandContext.ScripServiceVersionUsed);

return await scriptExecutor.AbandonScript(commandContext);
}

public async Task<ScriptStatus?> CompleteScript(CommandContext commandContext, CancellationToken cancellationToken)
{
var scriptExecutorFactory = CreateScriptExecutorFactory();
Expand Down
7 changes: 7 additions & 0 deletions source/Octopus.Tentacle.Client/Scripts/IScriptExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ Task<ScriptOperationExecutionResult> StartScript(ExecuteScriptCommand command,
/// <returns>The result, which includes the CommandContext for the next command</returns>
Task<ScriptOperationExecutionResult> CancelScript(CommandContext commandContext);

/// <summary>
/// Abandon the script. Signals Tentacle to stop waiting for the script to cancel and make the tentacle
/// available to run more scripts with the same isolation mutex.
/// </summary>
/// <param name="commandContext">The CommandContext from the previous command</param>
Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext);

/// <summary>
/// Complete the script.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@ async Task<KubernetesScriptStatusResponseV1> CancelScriptAction(CancellationToke
return Map(kubernetesScriptStatusResponseV1);
}

public Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext)
// KubernetesScriptServiceV1 has no abandon verb. The orchestrator only escalates to abandon
// when the Tentacle advertised the abandon capability (K8s agents never do), so it won't
// escalate here; reaching this is a bug.
=> throw new NotSupportedException("KubernetesScriptServiceV1 cannot abandon a script; it has no abandon verb. Cancel the script instead.");

public async Task<ScriptStatus?> CompleteScript(CommandContext lastStatusResponse, CancellationToken scriptExecutionCancellationToken)
{
using var activity = TentacleClient.ActivitySource.StartActivity($"{nameof(KubernetesScriptServiceV1Executor)}.{nameof(CompleteScript)}");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,20 @@ sealed class ObservingScriptOrchestrator
readonly OnScriptStatusResponseReceived onScriptStatusResponseReceived;
readonly OnScriptCompleted onScriptCompleted;
readonly IScriptExecutor scriptExecutor;
readonly TimeSpan? abandonAfterCancellationPendingFor;

public ObservingScriptOrchestrator(
IScriptObserverBackoffStrategy scriptObserverBackOffStrategy,
OnScriptStatusResponseReceived onScriptStatusResponseReceived,
OnScriptCompleted onScriptCompleted,
IScriptExecutor scriptExecutor)
IScriptExecutor scriptExecutor,
TimeSpan? abandonAfterCancellationPendingFor = null)
{
this.scriptExecutor = scriptExecutor;
this.scriptObserverBackOffStrategy = scriptObserverBackOffStrategy;
this.onScriptStatusResponseReceived = onScriptStatusResponseReceived;
this.onScriptCompleted = onScriptCompleted;
this.abandonAfterCancellationPendingFor = abandonAfterCancellationPendingFor;
}

public async Task<ScriptExecutionResult> ExecuteScript(ExecuteScriptCommand command, CancellationToken scriptExecutionCancellationToken)
Expand Down Expand Up @@ -80,12 +83,28 @@ async Task<ScriptOperationExecutionResult> ObserveUntilComplete(
var iteration = 0;
var cancellationIteration = 0;
var lastResult = startScriptResult;
var stopwatch = new Stopwatch();

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: rename to something like cancelling duration


while (lastResult.ScriptStatus.State != ProcessState.Complete)
{
if (scriptExecutionCancellationToken.IsCancellationRequested)
{
lastResult = await scriptExecutor.CancelScript(lastResult.ContextForNextCommand).ConfigureAwait(false);
// Record when cancellation first fired so we can escalate to abandon after the threshold.
if (!stopwatch.IsRunning)
{
stopwatch.Start();
}
Comment thread
jimmyp marked this conversation as resolved.

// Only escalate to abandon when the Tentacle advertised the abandon capability. Old V2
// Tentacles (pre-abandon) and V1/Kubernetes don't, so we keep cancelling rather than
// calling a verb they don't have.
var shouldAbandon = lastResult.ContextForNextCommand.ScripServiceVersionUsed.SupportsAbandon

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should drop the capabilities into the CommandContext to avoid needing to have ScriptServiceVersion2WithAbandon since if we kept adding capabilities that would get longer and longer.

The CommandContext is stored in V2 messages, so we would need to consider the upgrade case (I think just upgrading to an empty set of capabilities would be sufficient)

If we do decide to keep ScriptServiceVersion2WithAbandon for backwards compatibility we are stuck with it.

&& abandonAfterCancellationPendingFor.HasValue
&& stopwatch.Elapsed >= abandonAfterCancellationPendingFor.Value;

lastResult = shouldAbandon
? await scriptExecutor.AbandonScript(lastResult.ContextForNextCommand).ConfigureAwait(false)
: await scriptExecutor.CancelScript(lastResult.ContextForNextCommand).ConfigureAwait(false);
}
else
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ public IScriptExecutor CreateScriptExecutor(ScriptServiceVersion scriptServiceTo
logger);
}

if (scriptServiceToUse == ScriptServiceVersion.ScriptServiceVersion2)
if (scriptServiceToUse.IsV2)
{
return new ScriptServiceV2Executor(
scriptServiceToUse,
allClients.ScriptServiceV2,
rpcCallExecutor,
clientOperationMetricsBuilder,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ public async Task<ScriptOperationExecutionResult> CancelScript(CommandContext co
return Map(response);
}

public Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext)
// ScriptServiceV1 has no abandon verb. The orchestrator only escalates to abandon when the
// Tentacle advertised the abandon capability, so it won't escalate here; reaching this is a bug.
=> throw new NotSupportedException("ScriptServiceV1 cannot abandon a script; it has no abandon verb. Cancel the script instead.");

public async Task<ScriptStatus?> CompleteScript(CommandContext lastStatusResponse, CancellationToken scriptExecutionCancellationToken)
{
try
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Threading.Tasks;
using Halibut;
using Halibut.ServiceModel;
using Octopus.Tentacle.Client.Capabilities;
using Octopus.Tentacle.Client.EventDriven;
using Octopus.Tentacle.Client.Execution;
using Octopus.Tentacle.Client.Observability;
Expand All @@ -23,15 +24,18 @@ class ScriptServiceV2Executor : IScriptExecutor
readonly TimeSpan onCancellationAbandonCompleteScriptAfter;
readonly ITentacleClientTaskLog logger;
readonly TentacleClientOptions clientOptions;
readonly ScriptServiceVersion scriptServiceVersion;

public ScriptServiceV2Executor(
ScriptServiceVersion scriptServiceVersion,
IAsyncClientScriptServiceV2 clientScriptServiceV2,
RpcCallExecutor rpcCallExecutor,
ClientOperationMetricsBuilder clientOperationMetricsBuilder,
TimeSpan onCancellationAbandonCompleteScriptAfter,
TentacleClientOptions clientOptions,
ITentacleClientTaskLog logger)
{
this.scriptServiceVersion = scriptServiceVersion;
this.clientScriptServiceV2 = clientScriptServiceV2;
this.rpcCallExecutor = rpcCallExecutor;
this.clientOperationMetricsBuilder = clientOperationMetricsBuilder;
Expand All @@ -58,11 +62,11 @@ StartScriptCommandV2 Map(ExecuteScriptCommand command)
shellScriptCommand.Files.ToArray());
}

static ScriptOperationExecutionResult Map(ScriptStatusResponseV2 scriptStatusResponse)
ScriptOperationExecutionResult Map(ScriptStatusResponseV2 scriptStatusResponse)
{
return new (
new ScriptStatus(scriptStatusResponse.State, scriptStatusResponse.ExitCode, scriptStatusResponse.Logs),
new CommandContext(scriptStatusResponse.Ticket, scriptStatusResponse.NextLogSequence, ScriptServiceVersion.ScriptServiceVersion2));
new CommandContext(scriptStatusResponse.Ticket, scriptStatusResponse.NextLogSequence, scriptServiceVersion));
}

public async Task<ScriptOperationExecutionResult> StartScript(ExecuteScriptCommand executeScriptCommand,
Expand Down Expand Up @@ -115,7 +119,7 @@ void OnErrorAction(Exception ex)
if (!startScriptCallIsConnecting || startScriptCallIsBeingRetried)
{
// We want to cancel the potentially started script, and wait till it finishes. By returning a result, the outer orchestration will take care of this.
return ScriptOperationExecutionResult.CreateScriptStartedResult(command.ScriptTicket, ScriptServiceVersion.ScriptServiceVersion2);
return ScriptOperationExecutionResult.CreateScriptStartedResult(command.ScriptTicket, scriptServiceVersion);
}

// If the StartScript call was not in-flight or being retries then we know the script has not started executing on Tentacle
Expand Down Expand Up @@ -173,6 +177,27 @@ async Task<ScriptStatusResponseV2> CancelScriptAction(CancellationToken ct)
return Map(scriptStatusResponseV2);
}

public async Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext)
{
using var activity = TentacleClient.ActivitySource.StartActivity($"{nameof(ScriptServiceV2Executor)}.{nameof(AbandonScript)}");

async Task<ScriptStatusResponseV2> AbandonScriptAction(CancellationToken ct)
{
var request = new AbandonScriptCommandV2(commandContext.ScriptTicket, commandContext.NextLogSequence);
return await clientScriptServiceV2.AbandonScriptAsync(request, new HalibutProxyRequestOptions(ct));
}

var scriptStatusResponseV2 = await rpcCallExecutor.Execute(
retriesEnabled: clientOptions.RpcRetrySettings.RetriesEnabled,
RpcCall.Create<IScriptServiceV2>(nameof(IScriptServiceV2.AbandonScript)),
AbandonScriptAction,
logger,
clientOperationMetricsBuilder,
// Like CancelScript, abandon must not be cancelled — it stops the script on Tentacle.
CancellationToken.None).ConfigureAwait(false);
return Map(scriptStatusResponseV2);
}

public async Task<ScriptStatus?> CompleteScript(CommandContext lastStatusResponse, CancellationToken scriptExecutionCancellationToken)
{
using var activity = TentacleClient.ActivitySource.StartActivity($"{nameof(ScriptServiceV2Executor)}.{nameof(CompleteScript)}");
Expand Down
11 changes: 10 additions & 1 deletion source/Octopus.Tentacle.Client/Scripts/ScriptServiceVersion.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
namespace Octopus.Tentacle.Client.Scripts
namespace Octopus.Tentacle.Client.Scripts
{
public record ScriptServiceVersion(string Value)
{
public static ScriptServiceVersion ScriptServiceVersion1 = new(nameof(ScriptServiceVersion1));
public static ScriptServiceVersion ScriptServiceVersion2 = new(nameof(ScriptServiceVersion2));
public static ScriptServiceVersion ScriptServiceVersion2WithAbandon = new(nameof(ScriptServiceVersion2WithAbandon));
public static ScriptServiceVersion KubernetesScriptServiceVersion1 = new(nameof(KubernetesScriptServiceVersion1));

// True for both V2 variants — they talk to the same ScriptServiceV2; ...WithAbandon just also
// advertised the abandon verb.
public bool IsV2 => Value == nameof(ScriptServiceVersion2) || Value == nameof(ScriptServiceVersion2WithAbandon);

// Only a V2 Tentacle that advertised the AbandonScript capability supports abandon. Old V2
// Tentacles predate the verb, so they select ScriptServiceVersion2, not ...WithAbandon.
public bool SupportsAbandon => Value == nameof(ScriptServiceVersion2WithAbandon);

public override string ToString() => Value;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ ScriptServiceVersion DetermineShellScriptServiceVersionToUse(CapabilitiesRespons
logger.Verbose(clientOptions.RpcRetrySettings.RetriesEnabled
? $"RPC call retries are enabled. Retry timeout {rpcCallExecutor.RetryTimeout.TotalSeconds} seconds"
: "RPC call retries are disabled.");
return ScriptServiceVersion.ScriptServiceVersion2;
// Old V2 Tentacles are V2 but predate the abandon verb; only pick the abandon-capable
// variant when the Tentacle actually advertised AbandonScript.
return tentacleCapabilities.HasAbandonScript()
? ScriptServiceVersion.ScriptServiceVersion2WithAbandon
: ScriptServiceVersion.ScriptServiceVersion2;
}

logger.Verbose("RPC call retries are enabled but will not be used for Script Execution as a compatible ScriptService was not found. Please upgrade Tentacle to enable this feature.");
Expand Down
28 changes: 26 additions & 2 deletions source/Octopus.Tentacle.Client/TentacleClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
using Octopus.Tentacle.Contracts.Capabilities;
using Octopus.Tentacle.Contracts.Logging;
using Octopus.Tentacle.Contracts.Observability;
using Octopus.Tentacle.Contracts.ScriptServiceV2;
using ITentacleClientObserver = Octopus.Tentacle.Contracts.Observability.ITentacleClientObserver;

namespace Octopus.Tentacle.Client
Expand Down Expand Up @@ -168,7 +169,8 @@ public async Task<ScriptExecutionResult> ExecuteScript(ExecuteScriptCommand exec
OnScriptStatusResponseReceived onScriptStatusResponseReceived,
OnScriptCompleted onScriptCompleted,
ITentacleClientTaskLog logger,
CancellationToken scriptExecutionCancellationToken)
CancellationToken scriptExecutionCancellationToken,
TimeSpan? abandonAfterCancellationPendingFor = null)
{
using var activity = ActivitySource.StartActivity($"{nameof(TentacleClient)}.{nameof(ExecuteScript)}");
activity?.AddTag("octopus.tentacle.script.files", string.Join(",", executeScriptCommand.Files.Select(f => f.Name)));
Expand All @@ -188,7 +190,8 @@ public async Task<ScriptExecutionResult> ExecuteScript(ExecuteScriptCommand exec
var orchestrator = new ObservingScriptOrchestrator(scriptObserverBackOffStrategy,
onScriptStatusResponseReceived,
onScriptCompleted,
scriptExecutor);
scriptExecutor,
abandonAfterCancellationPendingFor);

var result = await orchestrator.ExecuteScript(executeScriptCommand, scriptExecutionCancellationToken);

Expand Down Expand Up @@ -260,6 +263,27 @@ public async Task<ScriptOperationExecutionResult> CancelScript(CommandContext co
return await scriptExecutor.CancelScript(commandContext);
}

public async Task<ScriptStatusResponseV2> AbandonScript(ScriptTicket scriptTicket, ITentacleClientTaskLog logger, CancellationToken cancellationToken)
{
using var activity = ActivitySource.StartActivity($"{nameof(TentacleClient)}.{nameof(AbandonScript)}");
activity?.AddTag("octopus.tentacle.script.ticket", scriptTicket.TaskId);

async Task<ScriptStatusResponseV2> AbandonScriptAction(CancellationToken ct)
{
var request = new AbandonScriptCommandV2(scriptTicket, lastLogSequence: 0);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think giving a lastLogSequence of 0 will ask for ALL logs to be returned from the very beginning of the script. I think cancel above is using the CommandContext to get the "tail" of the logs rather than all the logs. It also goes via the scriptExecutor. Why is this not following the same pattern?

return await allClients.ScriptServiceV2.AbandonScriptAsync(request, new HalibutProxyRequestOptions(ct));
}

return await rpcCallExecutor.Execute(
retriesEnabled: clientOptions.RpcRetrySettings.RetriesEnabled,
RpcCall.Create<IScriptServiceV2>(nameof(IScriptServiceV2.AbandonScript)),
AbandonScriptAction,
logger,
// Abandon is a one-shot RPC; like CancelScript we don't track operation metrics for it.
ClientOperationMetricsBuilder.Start(),
cancellationToken).ConfigureAwait(false);
}

public async Task<ScriptStatus?> CompleteScript(CommandContext commandContext, ITentacleClientTaskLog logger, CancellationToken scriptExecutionCancellationToken)
{
using var activity = ActivitySource.StartActivity($"{nameof(TentacleClient)}.{nameof(CompleteScript)}");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,17 @@ public ScriptBuilder CreateFile(string file)
return this;
}

public ScriptBuilder WritePidToFile(string file)
{
bashScript.AppendLine($@"
echo $$ > '{file}'
");
windowsScript.AppendLine($@"
$PID | Out-File -FilePath '{file}' -Encoding ASCII
");
return this;
}

public ScriptBuilder WaitForFileToExist(string fileToWaitFor)
{
bashScript.AppendLine($@"
Expand Down
Loading