Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
9073427
Tentacle script abandonment (re-landed on main)
jimmyp Jun 2, 2026
4f3fc4d
Address review: remove abandon capability check, drop abandon metrics…
jimmyp Jun 2, 2026
e797a67
Apply review: concise AbandonScript summary on ITentacleClient
jimmyp Jun 2, 2026
06517d6
Address review: V1/K8s abandon throws + orchestrator is abandon-aware…
jimmyp Jun 2, 2026
ffa5b82
Test at the right boundary: drop duplicate/wrong-level abandon tests
jimmyp Jun 2, 2026
d03dcfb
Add AbandonScriptAsync to IAsyncScriptServiceV2 (unblocks server test…
jimmyp Jun 2, 2026
65a6db5
Fix race in abandon integration tests: assert ExecuteScript's result,…
jimmyp Jun 3, 2026
96babdd
Reword abandon wait/cleanup comments in SilentProcessRunner
jimmyp Jun 4, 2026
23059e4
Address PR review feedback on abandon
jimmyp Jun 4, 2026
7ee0fdb
Keep grandchild cleanup best-effort (can't assert a reparented non-ch…
jimmyp Jun 4, 2026
86ef03a
Gate abandon on the advertised capability, not just "is V2", + test r…
jimmyp Jun 5, 2026
d4e85ba
Restore @jimmyp's applied suggestions clobbered by my force-push
jimmyp Jun 5, 2026
5fd82a5
Put SupportsAbandon on ScriptServiceVersion (ScriptServiceVersion2Wit…
jimmyp Jun 5, 2026
21b8d2a
Address review: ScriptServiceVersion.IsV2, split escalation test, V1 …
jimmyp Jun 5, 2026
d195d65
Make the escalation split meaningful: each test asserts a distinct ef…
jimmyp Jun 5, 2026
a2e283b
Merge remote-tracking branch 'origin/main' into jimpelletier/eft-3295…
jimmyp Jun 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

477 changes: 477 additions & 0 deletions docs/superpowers/specs/2026-05-21-tentacle-script-abandon-design.md

Large diffs are not rendered by default.

14 changes: 13 additions & 1 deletion source/Octopus.Tentacle.Client/ITentacleClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using Octopus.Tentacle.Client.Scripts.Models;
using Octopus.Tentacle.Contracts;
using Octopus.Tentacle.Contracts.Logging;
using Octopus.Tentacle.Contracts.ScriptServiceV2;

namespace Octopus.Tentacle.Client
{
Expand All @@ -31,7 +32,8 @@ Task<ScriptExecutionResult> ExecuteScript(
OnScriptStatusResponseReceived onScriptStatusResponseReceived,
OnScriptCompleted onScriptCompleted,
ITentacleClientTaskLog logger,
CancellationToken scriptExecutionCancellationToken);
CancellationToken scriptExecutionCancellationToken,
TimeSpan? abandonAfterCancellationPendingFor = null);

/// <summary>
/// Start the script.
Expand Down Expand Up @@ -59,6 +61,16 @@ Task<ScriptOperationExecutionResult> StartScript(ExecuteScriptCommand command,
/// <returns>The result, which includes the CommandContext for the next command</returns>
Task<ScriptOperationExecutionResult> CancelScript(CommandContext commandContext, ITentacleClientTaskLog logger);

/// <summary>
/// Abandon a running script. This attempts cancellation, but if necessary leaves the script
/// running in the OS but no longer has Tentacle watching or managing it.
/// </summary>
/// <param name="scriptTicket">The ticket of the script to abandon</param>
/// <param name="logger">Used to output user orientated log messages</param>
/// <param name="cancellationToken">Cancels the RPC call</param>
/// <returns>The current status snapshot of the script at the time abandon was processed</returns>
Task<ScriptStatusResponseV2> AbandonScript(ScriptTicket scriptTicket, ITentacleClientTaskLog logger, CancellationToken cancellationToken);

/// <summary>
/// Complete the script.
/// </summary>
Expand Down
10 changes: 9 additions & 1 deletion source/Octopus.Tentacle.Client/ScriptExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,15 @@ public async Task<ScriptOperationExecutionResult> CancelScript(CommandContext co

return await scriptExecutor.CancelScript(commandContext);
}


public async Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext)
{
var scriptExecutorFactory = CreateScriptExecutorFactory();
var scriptExecutor = scriptExecutorFactory.CreateScriptExecutor(commandContext.ScripServiceVersionUsed);

return await scriptExecutor.AbandonScript(commandContext);
}

public async Task<ScriptStatus?> CompleteScript(CommandContext commandContext, CancellationToken cancellationToken)
{
var scriptExecutorFactory = CreateScriptExecutorFactory();
Expand Down
7 changes: 7 additions & 0 deletions source/Octopus.Tentacle.Client/Scripts/IScriptExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ Task<ScriptOperationExecutionResult> StartScript(ExecuteScriptCommand command,
/// <returns>The result, which includes the CommandContext for the next command</returns>
Task<ScriptOperationExecutionResult> CancelScript(CommandContext commandContext);

/// <summary>
/// Abandon the script. Signals Tentacle to stop waiting for the script to cancel and make the tentacle
/// available to run more scripts with the same isolation mutex.
/// </summary>
/// <param name="commandContext">The CommandContext from the previous command</param>
Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext);

/// <summary>
/// Complete the script.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,12 @@ async Task<KubernetesScriptStatusResponseV1> CancelScriptAction(CancellationToke
return Map(kubernetesScriptStatusResponseV1);
}

public Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext)
// KubernetesScriptServiceV1 has no abandon verb. The orchestrator checks
// ScriptServiceVersion.SupportsAbandon and won't escalate here, so reaching this is a bug —
// throw rather than quietly cancel. Hung pods are recovered by deleting the pod instead.
Comment thread
jimmyp marked this conversation as resolved.
Outdated
=> throw new NotSupportedException("KubernetesScriptServiceV1 cannot abandon a script; it has no abandon verb. Cancel the script instead.");

public async Task<ScriptStatus?> CompleteScript(CommandContext lastStatusResponse, CancellationToken scriptExecutionCancellationToken)
{
using var activity = TentacleClient.ActivitySource.StartActivity($"{nameof(KubernetesScriptServiceV1Executor)}.{nameof(CompleteScript)}");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,20 @@ sealed class ObservingScriptOrchestrator
readonly OnScriptStatusResponseReceived onScriptStatusResponseReceived;
readonly OnScriptCompleted onScriptCompleted;
readonly IScriptExecutor scriptExecutor;
readonly TimeSpan? abandonAfterCancellationPendingFor;

public ObservingScriptOrchestrator(
IScriptObserverBackoffStrategy scriptObserverBackOffStrategy,
OnScriptStatusResponseReceived onScriptStatusResponseReceived,
OnScriptCompleted onScriptCompleted,
IScriptExecutor scriptExecutor)
IScriptExecutor scriptExecutor,
TimeSpan? abandonAfterCancellationPendingFor = null)
{
this.scriptExecutor = scriptExecutor;
this.scriptObserverBackOffStrategy = scriptObserverBackOffStrategy;
this.onScriptStatusResponseReceived = onScriptStatusResponseReceived;
this.onScriptCompleted = onScriptCompleted;
this.abandonAfterCancellationPendingFor = abandonAfterCancellationPendingFor;
}

public async Task<ScriptExecutionResult> ExecuteScript(ExecuteScriptCommand command, CancellationToken scriptExecutionCancellationToken)
Expand Down Expand Up @@ -80,12 +83,27 @@ async Task<ScriptOperationExecutionResult> ObserveUntilComplete(
var iteration = 0;
var cancellationIteration = 0;
var lastResult = startScriptResult;
var stopwatch = new Stopwatch();

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: rename to something like cancelling duration


while (lastResult.ScriptStatus.State != ProcessState.Complete)
{
if (scriptExecutionCancellationToken.IsCancellationRequested)
{
lastResult = await scriptExecutor.CancelScript(lastResult.ContextForNextCommand).ConfigureAwait(false);
// Record when cancellation first fired so we can escalate to abandon after the threshold.
if (!stopwatch.IsRunning)
{
stopwatch.Start();
}
Comment thread
jimmyp marked this conversation as resolved.

// Only escalate to abandon when the script service can actually abandon. On versions
// that can't (V1, Kubernetes) we keep cancelling rather than calling a verb they don't have.
var shouldAbandon = lastResult.ContextForNextCommand.ScripServiceVersionUsed.SupportsAbandon

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should drop the capabilities into the CommandContext to avoid needing to have ScriptServiceVersion2WithAbandon since if we kept adding capabilities that would get longer and longer.

The CommandContext is stored in V2 messages, so we would need to consider the upgrade case (I think just upgrading to an empty set of capabilities would be sufficient)

If we do decide to keep ScriptServiceVersion2WithAbandon for backwards compatibility we are stuck with it.

&& abandonAfterCancellationPendingFor.HasValue
&& stopwatch.Elapsed >= abandonAfterCancellationPendingFor.Value;

lastResult = shouldAbandon
? await scriptExecutor.AbandonScript(lastResult.ContextForNextCommand).ConfigureAwait(false)
: await scriptExecutor.CancelScript(lastResult.ContextForNextCommand).ConfigureAwait(false);
}
else
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ public async Task<ScriptOperationExecutionResult> CancelScript(CommandContext co
return Map(response);
}

public Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext)
// ScriptServiceV1 has no abandon verb. The orchestrator checks ScriptServiceVersion.SupportsAbandon
// and won't escalate here, so reaching this is a bug — throw rather than quietly cancel.
=> throw new NotSupportedException("ScriptServiceV1 cannot abandon a script; it has no abandon verb. Cancel the script instead.");

public async Task<ScriptStatus?> CompleteScript(CommandContext lastStatusResponse, CancellationToken scriptExecutionCancellationToken)
{
try
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Threading.Tasks;
using Halibut;
using Halibut.ServiceModel;
using Octopus.Tentacle.Client.Capabilities;
using Octopus.Tentacle.Client.EventDriven;
using Octopus.Tentacle.Client.Execution;
using Octopus.Tentacle.Client.Observability;
Expand Down Expand Up @@ -173,6 +174,27 @@ async Task<ScriptStatusResponseV2> CancelScriptAction(CancellationToken ct)
return Map(scriptStatusResponseV2);
}

public async Task<ScriptOperationExecutionResult> AbandonScript(CommandContext commandContext)
{
using var activity = TentacleClient.ActivitySource.StartActivity($"{nameof(ScriptServiceV2Executor)}.{nameof(AbandonScript)}");

async Task<ScriptStatusResponseV2> AbandonScriptAction(CancellationToken ct)
{
var request = new AbandonScriptCommandV2(commandContext.ScriptTicket, commandContext.NextLogSequence);
return await clientScriptServiceV2.AbandonScriptAsync(request, new HalibutProxyRequestOptions(ct));
}

var scriptStatusResponseV2 = await rpcCallExecutor.Execute(
retriesEnabled: clientOptions.RpcRetrySettings.RetriesEnabled,
RpcCall.Create<IScriptServiceV2>(nameof(IScriptServiceV2.AbandonScript)),
AbandonScriptAction,
logger,
clientOperationMetricsBuilder,
// Like CancelScript, abandon must not be cancelled — it stops the script on Tentacle.
CancellationToken.None).ConfigureAwait(false);
return Map(scriptStatusResponseV2);
}

public async Task<ScriptStatus?> CompleteScript(CommandContext lastStatusResponse, CancellationToken scriptExecutionCancellationToken)
{
using var activity = TentacleClient.ActivitySource.StartActivity($"{nameof(ScriptServiceV2Executor)}.{nameof(CompleteScript)}");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
namespace Octopus.Tentacle.Client.Scripts
namespace Octopus.Tentacle.Client.Scripts
{
public record ScriptServiceVersion(string Value)
{
public static ScriptServiceVersion ScriptServiceVersion1 = new(nameof(ScriptServiceVersion1));
public static ScriptServiceVersion ScriptServiceVersion2 = new(nameof(ScriptServiceVersion2));
public static ScriptServiceVersion KubernetesScriptServiceVersion1 = new(nameof(KubernetesScriptServiceVersion1));

// Only ScriptServiceV2 has the AbandonScript verb, so the orchestrator checks this before it
// escalates a stuck cancel to abandon and never calls abandon where it can't work. This assumes
// a V2 tentacle advertises AbandonScript (true from this build forward); the server doesn't enable
// abandon against older tentacles, so we don't re-check the capability per call here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can remove this comment

public bool SupportsAbandon => Value == nameof(ScriptServiceVersion2);
Comment thread
jimmyp marked this conversation as resolved.
Outdated

public override string ToString() => Value;
}
}
28 changes: 26 additions & 2 deletions source/Octopus.Tentacle.Client/TentacleClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
using Octopus.Tentacle.Contracts.Capabilities;
using Octopus.Tentacle.Contracts.Logging;
using Octopus.Tentacle.Contracts.Observability;
using Octopus.Tentacle.Contracts.ScriptServiceV2;
using ITentacleClientObserver = Octopus.Tentacle.Contracts.Observability.ITentacleClientObserver;

namespace Octopus.Tentacle.Client
Expand Down Expand Up @@ -168,7 +169,8 @@ public async Task<ScriptExecutionResult> ExecuteScript(ExecuteScriptCommand exec
OnScriptStatusResponseReceived onScriptStatusResponseReceived,
OnScriptCompleted onScriptCompleted,
ITentacleClientTaskLog logger,
CancellationToken scriptExecutionCancellationToken)
CancellationToken scriptExecutionCancellationToken,
TimeSpan? abandonAfterCancellationPendingFor = null)
{
using var activity = ActivitySource.StartActivity($"{nameof(TentacleClient)}.{nameof(ExecuteScript)}");
activity?.AddTag("octopus.tentacle.script.files", string.Join(",", executeScriptCommand.Files.Select(f => f.Name)));
Expand All @@ -188,7 +190,8 @@ public async Task<ScriptExecutionResult> ExecuteScript(ExecuteScriptCommand exec
var orchestrator = new ObservingScriptOrchestrator(scriptObserverBackOffStrategy,
onScriptStatusResponseReceived,
onScriptCompleted,
scriptExecutor);
scriptExecutor,
abandonAfterCancellationPendingFor);

var result = await orchestrator.ExecuteScript(executeScriptCommand, scriptExecutionCancellationToken);

Expand Down Expand Up @@ -260,6 +263,27 @@ public async Task<ScriptOperationExecutionResult> CancelScript(CommandContext co
return await scriptExecutor.CancelScript(commandContext);
}

public async Task<ScriptStatusResponseV2> AbandonScript(ScriptTicket scriptTicket, ITentacleClientTaskLog logger, CancellationToken cancellationToken)
{
using var activity = ActivitySource.StartActivity($"{nameof(TentacleClient)}.{nameof(AbandonScript)}");
activity?.AddTag("octopus.tentacle.script.ticket", scriptTicket.TaskId);

async Task<ScriptStatusResponseV2> AbandonScriptAction(CancellationToken ct)
{
var request = new AbandonScriptCommandV2(scriptTicket, lastLogSequence: 0);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think giving a lastLogSequence of 0 will ask for ALL logs to be returned from the very beginning of the script. I think cancel above is using the CommandContext to get the "tail" of the logs rather than all the logs. It also goes via the scriptExecutor. Why is this not following the same pattern?

return await allClients.ScriptServiceV2.AbandonScriptAsync(request, new HalibutProxyRequestOptions(ct));
}

return await rpcCallExecutor.Execute(
retriesEnabled: clientOptions.RpcRetrySettings.RetriesEnabled,
RpcCall.Create<IScriptServiceV2>(nameof(IScriptServiceV2.AbandonScript)),
AbandonScriptAction,
logger,
// Abandon is a one-shot RPC; like CancelScript we don't track operation metrics for it.
ClientOperationMetricsBuilder.Start(),
cancellationToken).ConfigureAwait(false);
}

public async Task<ScriptStatus?> CompleteScript(CommandContext commandContext, ITentacleClientTaskLog logger, CancellationToken scriptExecutionCancellationToken)
{
using var activity = ActivitySource.StartActivity($"{nameof(TentacleClient)}.{nameof(CompleteScript)}");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ public interface IAsyncClientScriptServiceV2
Task<ScriptStatusResponseV2> StartScriptAsync(StartScriptCommandV2 command, HalibutProxyRequestOptions proxyRequestOptions);
Task<ScriptStatusResponseV2> GetStatusAsync(ScriptStatusRequestV2 request, HalibutProxyRequestOptions proxyRequestOptions);
Task<ScriptStatusResponseV2> CancelScriptAsync(CancelScriptCommandV2 command, HalibutProxyRequestOptions proxyRequestOptions);
Task<ScriptStatusResponseV2> AbandonScriptAsync(AbandonScriptCommandV2 command, HalibutProxyRequestOptions proxyRequestOptions);
Task CompleteScriptAsync(CompleteScriptCommandV2 command, HalibutProxyRequestOptions proxyRequestOptions);
}
}
1 change: 1 addition & 0 deletions source/Octopus.Tentacle.Contracts/ScriptExitCodes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public static class ScriptExitCodes
public const int UnknownScriptExitCode = -45;
public const int UnknownResultExitCode = -46;
public const int PowerShellNeverStartedExitCode = -47;
public const int AbandonedExitCode = -48;

//Kubernetes Agent
public const int KubernetesScriptPodNotFound = -81;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
using System;

namespace Octopus.Tentacle.Contracts.ScriptServiceV2
{
public class AbandonScriptCommandV2
{
public AbandonScriptCommandV2(ScriptTicket ticket, long lastLogSequence)
{
Ticket = ticket;
LastLogSequence = lastLogSequence;
}

public ScriptTicket Ticket { get; }

public long LastLogSequence { get; }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ public interface IScriptServiceV2
ScriptStatusResponseV2 StartScript(StartScriptCommandV2 command);
ScriptStatusResponseV2 GetStatus(ScriptStatusRequestV2 request);
ScriptStatusResponseV2 CancelScript(CancelScriptCommandV2 command);
ScriptStatusResponseV2 AbandonScript(AbandonScriptCommandV2 command);
void CompleteScript(CompleteScriptCommandV2 command);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public interface IAsyncScriptServiceV2
Task<ScriptStatusResponseV2> StartScriptAsync(StartScriptCommandV2 command, CancellationToken cancellationToken);
Task<ScriptStatusResponseV2> GetStatusAsync(ScriptStatusRequestV2 request, CancellationToken cancellationToken);
Task<ScriptStatusResponseV2> CancelScriptAsync(CancelScriptCommandV2 command, CancellationToken cancellationToken);
Task<ScriptStatusResponseV2> AbandonScriptAsync(AbandonScriptCommandV2 command, CancellationToken cancellationToken);
Task CompleteScriptAsync(CompleteScriptCommandV2 command, CancellationToken cancellationToken);
}
}
Loading