Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 24 additions & 21 deletions clicommand/job_promise_failure.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (

"github.com/buildkite/agent/v3/api"
"github.com/buildkite/agent/v3/internal/redact"
"github.com/buildkite/agent/v3/internal/socket"
"github.com/buildkite/agent/v3/jobapi"
"github.com/buildkite/agent/v3/logger"
"github.com/urfave/cli"
Expand Down Expand Up @@ -113,33 +112,37 @@ var JobPromiseFailureCommand = cli.Command{
return declarePromiseFailureDirectly(ctx, l, cfg, jobID, exitStatus, reason)
}

outcome, err := client.DeclarePromiseFailure(ctx, exitStatus, reason)
if err == nil {
if outcome == jobapi.PromiseFailureDebounced {
// Log at debug to avoid spamming job logs on repeated calls.
l.Debugf("Promised exit status %d already declared for job %s (debounced)", exitStatus, jobID)
} else {
l.Infof("Declared promised exit status %d for job %s", exitStatus, jobID)
}
return nil
result, err := client.DeclarePromiseFailure(ctx, exitStatus, reason)
if err != nil {
// We couldn't reach or use the Job API (or its response was lost).
// Declare directly so the promise still lands; the endpoint is idempotent
// for the same exit status, so a duplicate is safe.
l.Warnf("Couldn't reach the Job API to declare the promised failure; declaring it directly: %v", err)
return declarePromiseFailureDirectly(ctx, l, cfg, jobID, exitStatus, reason)
}

// The Job API returned a definitive HTTP error: the Buildkite API
// rejected the declaration (409, 422) or was unreachable after retries
// (502). Surface it rather than declaring again.
var apiErr socket.APIErr
if errors.As(err, &apiErr) {
return promiseFailureError(apiErr.StatusCode, err)
if !result.Accepted {
return promiseFailureResultError(result)
}

// We couldn't reach the Job API (or its response was lost). Declare
// directly so the promise still lands; the endpoint is idempotent for the
// same exit status, so a duplicate is safe.
l.Warnf("Couldn't reach the Job API to declare the promised failure; declaring it directly: %v", err)
return declarePromiseFailureDirectly(ctx, l, cfg, jobID, exitStatus, reason)
if result.Outcome == jobapi.PromiseFailureDebounced {
// Log at debug to avoid spamming job logs on repeated calls.
l.Debugf("Promised exit status %d already declared for job %s (debounced)", exitStatus, jobID)
} else {
l.Infof("Declared promised exit status %d for job %s", exitStatus, jobID)
}
return nil
},
}

func promiseFailureResultError(result *jobapi.PromiseFailureResponse) error {
err := errors.New(result.Error)
if result.Error == "" {
err = errors.New("buildkite API rejected the promised failure")
}
return promiseFailureError(result.UpstreamStatus, err)
}

// promiseFailureError wraps err with a human-readable message for the Buildkite
// API status code. The command exits non-zero so the failure is visible to
// scripts, which can append '|| true' to ignore it.
Expand Down
7 changes: 5 additions & 2 deletions internal/job/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/buildkite/agent/v3/internal/socket"
"github.com/buildkite/agent/v3/jobapi"
"github.com/buildkite/agent/v3/logger"
"github.com/buildkite/agent/v3/version"
)

// startJobAPI starts the job API server, iff the OS of the box supports it otherwise it returns a
Expand Down Expand Up @@ -84,8 +85,10 @@ func (e *Executor) declarePromiseFailure(ctx context.Context, exitStatus int, re
// logger.Discard keeps the access token (in HTTP debug dumps) out of the job
// log; retry warnings still go to the shell logger.
apiClient := api.NewClient(logger.Discard, api.Config{
Endpoint: e.shell.Env.GetString("BUILDKITE_AGENT_ENDPOINT", ""),
Token: e.shell.Env.GetString("BUILDKITE_AGENT_ACCESS_TOKEN", ""),
Endpoint: e.shell.Env.GetString("BUILDKITE_AGENT_ENDPOINT", ""),
Token: e.shell.Env.GetString("BUILDKITE_AGENT_ACCESS_TOKEN", ""),
DisableHTTP2: e.shell.Env.GetBool("BUILDKITE_NO_HTTP2", false),
UserAgent: version.UserAgent(),
})

req := &api.JobPromiseFailureRequest{
Expand Down
14 changes: 7 additions & 7 deletions jobapi/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,18 +127,18 @@ func (c *Client) RedactionCreate(ctx context.Context, text string) (string, erro
// given exit status and reason to the Buildkite API, blocking until it
// completes. The server debounces repeated and concurrent calls for the same
// exit status: concurrent callers share one in-flight call, and once it succeeds
// later calls return from the cache. On success it returns the outcome
// (PromiseFailureDeclared or PromiseFailureDebounced). A failed declaration is
// returned as a socket.APIErr carrying the HTTP status code (the Buildkite API's
// status when it responded, otherwise 502).
func (c *Client) DeclarePromiseFailure(ctx context.Context, exitStatus int, reason string) (string, error) {
// later calls return from the cache. If the Job API handles the request, the
// returned response describes whether the Buildkite API accepted or rejected the
// declaration. Errors are reserved for Job API transport/request failures.
func (c *Client) DeclarePromiseFailure(ctx context.Context, exitStatus int, reason string) (*PromiseFailureResponse, error) {
req := PromiseFailureRequest{
ExitStatus: exitStatus,
Reason: reason,
}
var resp PromiseFailureResponse
if err := c.client.Do(ctx, http.MethodPost, promiseFailureURL, &req, &resp); err != nil {
return "", err
return nil, err
}
return resp.Outcome, nil

return &resp, nil
}
39 changes: 26 additions & 13 deletions jobapi/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,17 @@ func (f *fakeServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
f.promised = append(f.promised, req)
// Exit status 99 stands in for a declaration the Buildkite API rejects.
if req.ExitStatus == 99 {
_ = socket.WriteError(w, "a different exit status was already declared", http.StatusConflict)
if err := json.NewEncoder(w).Encode(&PromiseFailureResponse{
Outcome: PromiseFailureDeclared,
Accepted: false,
UpstreamStatus: http.StatusConflict,
Error: "a different exit status was already declared",
}); err != nil {
_ = socket.WriteError(w, fmt.Sprintf("encoding response: %v", err), http.StatusInternalServerError)
}
return
}
if err := json.NewEncoder(w).Encode(&PromiseFailureResponse{Outcome: PromiseFailureDeclared}); err != nil {
if err := json.NewEncoder(w).Encode(&PromiseFailureResponse{Outcome: PromiseFailureDeclared, Accepted: true}); err != nil {
_ = socket.WriteError(w, fmt.Sprintf("encoding response: %v", err), http.StatusInternalServerError)
}
return
Expand Down Expand Up @@ -164,23 +171,29 @@ func TestClientDeclarePromiseFailure(t *testing.T) {

// A successful declaration returns the outcome and no error, and forwards the
// exit status and reason to the server.
outcome, err := cli.DeclarePromiseFailure(ctx, 1, "tests failed")
result, err := cli.DeclarePromiseFailure(ctx, 1, "tests failed")
if err != nil {
t.Fatalf("cli.DeclarePromiseFailure(1) error = %v", err)
}
if outcome != PromiseFailureDeclared {
t.Errorf("cli.DeclarePromiseFailure(1) outcome = %q, want %q", outcome, PromiseFailureDeclared)
if result.Outcome != PromiseFailureDeclared {
t.Errorf("cli.DeclarePromiseFailure(1) outcome = %q, want %q", result.Outcome, PromiseFailureDeclared)
}
if !result.Accepted {
t.Errorf("cli.DeclarePromiseFailure(1) accepted = false, want true")
}

// A rejected declaration surfaces a socket.APIErr carrying the Buildkite API
// status code, so the caller can produce an accurate message.
_, err = cli.DeclarePromiseFailure(ctx, 99, "")
var apiErr socket.APIErr
if !errors.As(err, &apiErr) {
t.Fatalf("cli.DeclarePromiseFailure(99) error = %v, want a socket.APIErr", err)
// A rejected declaration is still a successful Job API response. The response
// carries the Buildkite API status code so the caller can produce an accurate
// message without guessing where the error came from.
result, err = cli.DeclarePromiseFailure(ctx, 99, "")
if err != nil {
t.Fatalf("cli.DeclarePromiseFailure(99) error = %v, want nil", err)
}
if result.Accepted {
t.Errorf("cli.DeclarePromiseFailure(99) accepted = true, want false")
}
if apiErr.StatusCode != http.StatusConflict {
t.Errorf("apiErr.StatusCode = %d, want %d", apiErr.StatusCode, http.StatusConflict)
if result.UpstreamStatus != http.StatusConflict {
t.Errorf("result.UpstreamStatus = %d, want %d", result.UpstreamStatus, http.StatusConflict)
}

want := []PromiseFailureRequest{
Expand Down
10 changes: 10 additions & 0 deletions jobapi/payloads.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,14 @@ const (
type PromiseFailureResponse struct {
// Outcome is PromiseFailureDeclared or PromiseFailureDebounced.
Outcome string `json:"outcome"`

// Accepted reports whether the Buildkite API accepted the promised failure.
Accepted bool `json:"accepted"`

// UpstreamStatus is the Buildkite API status, when one was received.
// Network errors that never received a response leave this as 0.
UpstreamStatus int `json:"upstream_status,omitempty"`

// Error is the Buildkite API declaration error, if Accepted is false.
Error string `json:"error,omitempty"`
}
Loading