-
Notifications
You must be signed in to change notification settings - Fork 28
ENGINE-1383 winrm fixes for reboot and panic resolve. #324
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: release-0.x
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -177,11 +177,46 @@ func (c Windows) CommandExist(h Host, cmd string) bool { | |
| return h.Execf("where /q %s", cmd) == nil | ||
| } | ||
|
|
||
| // Reboot executes the reboot command | ||
| // Reboot triggers an immediate forced restart by scheduling a SYSTEM-context | ||
| // one-shot task that runs 'shutdown /r /f /t 5', then immediately triggering | ||
| // and deleting it within the 5-second countdown window. | ||
| // | ||
| // Running via a scheduled task bypasses the filtered Administrator token used | ||
| // by WinRM sessions (e.g. AWS EC2) which lacks SeShutdownPrivilege. Issuing | ||
| // 'shutdown /r' directly in the WinRM session is silently ignored in that | ||
| // context. | ||
| // | ||
| // /sc onstart is used instead of /sc once to avoid schtasks writing a | ||
| // stderr warning about the start time being in the past, which rig treats | ||
| // as an error. The task is deleted immediately after triggering (while the | ||
| // 5-second timer counts down) so it does not re-fire on subsequent startups. | ||
| func (c Windows) Reboot(h Host) error { | ||
| if err := h.Exec("shutdown /r /t 5"); err != nil { | ||
| return fmt.Errorf("failed to reboot: %w", err) | ||
| const taskName = "RigReboot" | ||
| // Create a SYSTEM-context ONSTART task that runs 'shutdown /r /f /t 5'. | ||
| // The 5-second delay gives us time to delete the task before the OS | ||
| // actually executes the reboot, preventing it from firing again on the | ||
| // next startup. | ||
| create := fmt.Sprintf(`schtasks /create /tn "%s" /tr "shutdown /r /f /t 5" /sc onstart /f /ru SYSTEM`, taskName) | ||
| if err := h.Exec(create); err != nil { | ||
| return fmt.Errorf("failed to create reboot task: %w", err) | ||
|
Comment on lines
+194
to
+201
|
||
| } | ||
| run := fmt.Sprintf(`schtasks /run /tn "%s"`, taskName) | ||
| if err := h.Exec(run); err != nil { | ||
| // Tolerate connection-level errors; the OS may kill WinRM as it starts | ||
| // rebooting before the run command returns. | ||
| errMsg := err.Error() | ||
| if !strings.Contains(errMsg, "connection") && !strings.Contains(errMsg, "closed") && !strings.Contains(errMsg, "EOF") { | ||
| return fmt.Errorf("failed to run reboot task: %w", err) | ||
| } | ||
| } | ||
| // Delete the task immediately while the 5-second shutdown timer is still | ||
| // counting down. This prevents it from re-firing on subsequent startups. | ||
| del := fmt.Sprintf(`schtasks /delete /tn "%s" /f`, taskName) | ||
| // Best-effort: ignore delete errors — if the task fires before we can | ||
| // delete it, the caller is expected to delete it after reconnecting. | ||
| _ = h.Exec(del) | ||
|
Comment on lines
+212
to
+217
|
||
| // Allow Windows time to complete shutdown before waitForHost begins polling. | ||
| time.Sleep(15 * time.Second) | ||
| return nil | ||
|
Comment on lines
+218
to
220
|
||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -197,27 +197,18 @@ type Command struct { | |||
| } | ||||
|
|
||||
| // Wait blocks until the command finishes | ||||
| func (c *Command) Wait() (err error) { //nolint:nonamedreturns // needed for panic recovery | ||||
| defer func() { | ||||
| if r := recover(); err == nil && r != nil { | ||||
| if strings.Contains(fmt.Sprint(r), "close of closed channel") { | ||||
| log.Debugf("recovered from a panic in Command.Wait: %v", r) | ||||
| } else { | ||||
| panic(r) | ||||
| } | ||||
| } | ||||
| }() | ||||
|
|
||||
| // Wait blocks until the command finishes | ||||
|
||||
| // Wait blocks until the command finishes |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comment calls the scheduled task "one-shot", but the implementation uses
schtasks /sc onstart, which will run on every startup unless the task is successfully deleted. Either adjust the wording to reflect that it's an ONSTART task deleted as part of the reboot flow, or switch to a truly one-shot schedule (e.g.,/sc once) if feasible.