Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions agent/agent_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ type AgentConfiguration struct {
EnableJobLogTmpfile bool
JobLogPath string
WriteJobLogsToStdout bool
JobLogsOTLP bool
LogFormat string
Shell string
HooksShell string
Expand Down
32 changes: 18 additions & 14 deletions agent/job_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -706,25 +706,29 @@ BUILDKITE_AGENT_JWKS_KEY_ID`
setEnv("BUILDKITE_TRACING_BACKEND", r.conf.AgentConfiguration.TracingBackend)
setEnv("BUILDKITE_TRACING_SERVICE_NAME", r.conf.AgentConfiguration.TracingServiceName)

// Buildkite backend can provide a traceparent property on the job
// which can be propagated to the job tracing if OpenTelemetry is used
//
// https://www.w3.org/TR/trace-context/#traceparent-header
if r.conf.Job.TraceParent != "" {
setEnv("BUILDKITE_TRACING_TRACEPARENT", r.conf.Job.TraceParent)
}
// Buildkite backend may also provide a tracestate property on the job,
// which carries vendor-specific trace context alongside the traceparent.
//
// https://www.w3.org/TR/trace-context/#tracestate-header
if r.conf.Job.TraceState != "" {
setEnv("BUILDKITE_TRACING_TRACESTATE", r.conf.Job.TraceState)
}
if r.conf.AgentConfiguration.TracingPropagateTraceparent {
// Buildkite backend can provide a traceparent property on the job
// which can be propagated to the job tracing if OpenTelemetry is used.
//
// https://www.w3.org/TR/trace-context/#traceparent-header
if r.conf.Job.TraceParent != "" {
setEnv("BUILDKITE_TRACING_TRACEPARENT", r.conf.Job.TraceParent)
}
// Buildkite backend may also provide a tracestate property on the job,
// which carries vendor-specific trace context alongside traceparent.
//
// https://www.w3.org/TR/trace-context/#tracestate-header
if r.conf.Job.TraceState != "" {
setEnv("BUILDKITE_TRACING_TRACESTATE", r.conf.Job.TraceState)
}
setEnv("BUILDKITE_TRACING_PROPAGATE_TRACEPARENT", "true")
}
}

if r.conf.AgentConfiguration.JobLogsOTLP {
setEnv("BUILDKITE_JOB_LOGS_OTLP", "true")
}
Comment on lines +728 to +730

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Prevent jobs from enabling OTLP export

When the agent option is false, this branch does nothing, so any BUILDKITE_JOB_LOGS_OTLP=true supplied in the job environment survives into the bootstrap. The hidden bootstrap flag reads that same env var, and the exporter also honors job-supplied OTEL_EXPORTER_OTLP_*, so a pipeline can enable and redirect job-log export even when the agent operator did not opt in; force this env to the agent-configured boolean or remove/protect it before launching bootstrap.

Useful? React with 👍 / 👎.


setEnv("BUILDKITE_AGENT_DISABLE_WARNINGS_FOR", strings.Join(r.conf.AgentConfiguration.DisableWarningsFor, ","))

// see documentation for BuildkiteMessageMax
Expand Down
7 changes: 7 additions & 0 deletions clicommand/agent_start.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ type AgentStartConfig struct {

LogFormat string `cli:"log-format"`
WriteJobLogsToStdout bool `cli:"write-job-logs-to-stdout"`
JobLogsOTLP bool `cli:"job-logs-otlp"`
DisableWarningsFor []string `cli:"disable-warnings-for" normalize:"list"`

BuildPath string `cli:"build-path" normalize:"filepath" validate:"required"`
Expand Down Expand Up @@ -437,6 +438,11 @@ var AgentStartCommand = cli.Command{
Usage: "Writes job logs to the agent process' stdout. This simplifies log collection if running agents in Docker (default: false)",
EnvVar: "BUILDKITE_WRITE_JOB_LOGS_TO_STDOUT",
},
cli.BoolFlag{
Name: "job-logs-otlp",
Usage: "Export job logs directly as OpenTelemetry log records using the OTEL_EXPORTER_OTLP_LOGS_* / OTEL_EXPORTER_OTLP_* environment configuration (default: false)",
EnvVar: "BUILDKITE_JOB_LOGS_OTLP",
},
cli.StringFlag{
Name: "shell",
Value: DefaultShell(),
Expand Down Expand Up @@ -1107,6 +1113,7 @@ var AgentStartCommand = cli.Command{
EnableJobLogTmpfile: cfg.EnableJobLogTmpfile,
JobLogPath: cfg.JobLogPath,
WriteJobLogsToStdout: cfg.WriteJobLogsToStdout,
JobLogsOTLP: cfg.JobLogsOTLP,
LogFormat: cfg.LogFormat,
Shell: cfg.Shell,
HooksShell: cfg.HooksShell,
Expand Down
8 changes: 7 additions & 1 deletion clicommand/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ type BootstrapConfig struct {
TracingPropagateTraceparent bool `cli:"tracing-propagate-traceparent"`
TraceContextEncoding string `cli:"trace-context-encoding"`
NoJobAPI bool `cli:"no-job-api"`
JobLogsOTLP bool `cli:"job-logs-otlp"`
DisableWarningsFor []string `cli:"disable-warnings-for" normalize:"list"`
CheckoutAttempts int `cli:"checkout-attempts"`
}
Expand Down Expand Up @@ -361,12 +362,16 @@ var BootstrapCommand = cli.Command{
Usage: "Accept traceparent from Buildkite control plane (default: false)",
EnvVar: "BUILDKITE_TRACING_PROPAGATE_TRACEPARENT",
},

cli.BoolFlag{
Name: "no-job-api",
Usage: "Disables the Job API, which gives commands in jobs some abilities to introspect and mutate the state of the job (default: false)",
EnvVar: "BUILDKITE_AGENT_NO_JOB_API",
},
cli.BoolFlag{
Name: "job-logs-otlp",
EnvVar: "BUILDKITE_JOB_LOGS_OTLP",
Hidden: true,
},
cli.StringSliceFlag{
Name: "disable-warnings-for",
Usage: "A list of warning IDs to disable",
Expand Down Expand Up @@ -501,6 +506,7 @@ var BootstrapCommand = cli.Command{
TracingTraceState: cfg.TracingTraceState,
TracingPropagateTraceparent: cfg.TracingPropagateTraceparent,
JobAPI: !cfg.NoJobAPI,
JobLogsOTLP: cfg.JobLogsOTLP,
DisabledWarnings: cfg.DisableWarningsFor,
Secrets: cfg.Secrets,
CheckoutAttempts: cfg.CheckoutAttempts,
Expand Down
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,13 @@ require (
go.opentelemetry.io/contrib/propagators/jaeger v1.43.0
go.opentelemetry.io/contrib/propagators/ot v1.43.0
go.opentelemetry.io/otel v1.43.0
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.14.0
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0
go.opentelemetry.io/otel/log v0.15.0
go.opentelemetry.io/otel/sdk v1.43.0
go.opentelemetry.io/otel/sdk/log v0.14.0
go.opentelemetry.io/otel/trace v1.43.0
golang.org/x/crypto v0.52.0
golang.org/x/net v0.55.0
Expand Down
10 changes: 10 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -477,16 +477,26 @@ go.opentelemetry.io/contrib/propagators/ot v1.43.0 h1:Hh1HahlGc81AOE7siqi1tVOlba
go.opentelemetry.io/contrib/propagators/ot v1.43.0/go.mod h1:58MlyS7lghzYvAm5LN9gGmZpCMQEMB5vpZp9SRgOyE4=
go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I=
go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0=
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.14.0 h1:OMqPldHt79PqWKOMYIAQs3CxAi7RLgPxwfFSwr4ZxtM=
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.14.0/go.mod h1:1biG4qiqTxKiUCtoWDPpL3fB3KxVwCiGw81j3nKMuHE=
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0 h1:QQqYw3lkrzwVsoEX0w//EhH/TCnpRdEenKBOOEIMjWc=
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0/go.mod h1:gSVQcr17jk2ig4jqJ2DX30IdWH251JcNAecvrqTxH1s=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 h1:RAE+JPfvEmvy+0LzyUA25/SGawPwIUbZ6u0Wug54sLc=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0/go.mod h1:AGmbycVGEsRx9mXMZ75CsOyhSP6MFIcj/6dnG+vhVjk=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 h1:3iZJKlCZufyRzPzlQhUIWVmfltrXuGyfjREgGP3UUjc=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0/go.mod h1:/G+nUPfhq2e+qiXMGxMwumDrP5jtzU+mWN7/sjT2rak=
go.opentelemetry.io/otel/log v0.15.0 h1:0VqVnc3MgyYd7QqNVIldC3dsLFKgazR6P3P3+ypkyDY=
go.opentelemetry.io/otel/log v0.15.0/go.mod h1:9c/G1zbyZfgu1HmQD7Qj84QMmwTp2QCQsZH1aeoWDE4=
go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM=
go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY=
go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg=
go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg=
go.opentelemetry.io/otel/sdk/log v0.14.0 h1:JU/U3O7N6fsAXj0+CXz21Czg532dW2V4gG1HE/e8Zrg=
go.opentelemetry.io/otel/sdk/log v0.14.0/go.mod h1:imQvII+0ZylXfKU7/wtOND8Hn4OpT3YUoIgqJVksUkM=
go.opentelemetry.io/otel/sdk/log/logtest v0.14.0 h1:Ijbtz+JKXl8T2MngiwqBlPaHqc4YCaP/i13Qrow6gAM=
go.opentelemetry.io/otel/sdk/log/logtest v0.14.0/go.mod h1:dCU8aEL6q+L9cYTqcVOk8rM9Tp8WdnHOPLiBgp0SGOA=
go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw=
go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A=
go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A=
Expand Down
9 changes: 6 additions & 3 deletions internal/job/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ type ExecutorConfig struct {
TracingTraceParent string

// W3C tracestate accompanying TracingTraceParent. Plumbed through to the
// bootstrap environment whenever the server provides a value, but only
// attached to the OTel span context when TracingPropagateTraceparent is
// enabled (same opt-in gate as TracingTraceParent).
// bootstrap environment and attached to the OTel span context only when
// TracingPropagateTraceparent is enabled (same opt-in gate as
// TracingTraceParent).
TracingTraceState string

// Accept traceparent context from Buildkite control plane
Expand All @@ -216,6 +216,9 @@ type ExecutorConfig struct {
// Whether to start the JobAPI
JobAPI bool

// Whether to emit visible job process output as OTLP log records.
JobLogsOTLP bool

// The warnings that have been disabled by the user
DisabledWarnings []string

Expand Down
96 changes: 92 additions & 4 deletions internal/job/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,45 @@ type Executor struct {
// redactors for the job logs. The will be populated with values both from environment variable and through the Job API.
// In order for the latter to happen, a reference is passed into the the Job API server as well
redactors *replacer.Mux

// loggerTee carries the redacted shell logger output (section headers,
// prompts, comments, warnings) to stderr and, when OTLP job logging is
// enabled, mirrors it into the OTLP exporter so the exported records match
// the customer-facing Buildkite job log.
loggerTee *teeWriter
}

// teeWriter writes to a primary writer and, when configured, also to a
// secondary writer. The secondary sink can be attached after construction,
// which lets the executor mirror already-redacted shell logger output into the
// OTLP exporter once it has been initialised.
type teeWriter struct {
mu sync.Mutex
primary io.Writer
secondary io.Writer
}

func (w *teeWriter) Write(p []byte) (int, error) {
// Hold the lock for the whole write so that detaching the secondary sink
// (setSecondary(nil), called before the OTLP exporter is flushed and shut
// down) cannot complete while a write is still in flight. This guarantees
// no control output lands on the OTLP emitter after it has been flushed.
w.mu.Lock()
defer w.mu.Unlock()

n, err := w.primary.Write(p)
if w.secondary != nil {
// The secondary sink is best-effort: failures must not affect the
// primary (customer-facing) job log output.
_, _ = w.secondary.Write(p)
}
return n, err
}

func (w *teeWriter) setSecondary(secondary io.Writer) {
w.mu.Lock()
w.secondary = secondary
w.mu.Unlock()
}

// New returns a new executor instance
Expand Down Expand Up @@ -152,6 +191,34 @@ func (e *Executor) Run(ctx context.Context) (exitCode int) {
// Create an empty env for us to keep track of our env changes in
e.shell.Env = env.FromSlice(os.Environ())

// OTLP job log export lives entirely in the bootstrap process, which is the
// single home for this feature. When OpenTelemetry tracing is enabled, the
// per-command emit context carries the active hook/command span, so log
// records are trace-correlated. With tracing disabled, records are still
// emitted but remain uncorrelated.
if e.JobLogsOTLP {
jobLogger, err := newOTLPJobLogger(ctx, e)
if err != nil {
e.shell.Warningf("Failed to initialize OTLP job log exporter: %v", err)
} else {
e.shell.SetOutputInterceptor(jobLogger.Wrap)
// Mirror the redacted shell logger control output (section headers,
// prompts, comments, warnings) into OTLP so the exported records
// match the downloadable Buildkite job log and the UI stream.
if e.loggerTee != nil {
e.loggerTee.setSecondary(jobLogger.controlWriter())
}
defer func() {
if e.loggerTee != nil {
e.loggerTee.setSecondary(nil)
}
if err := jobLogger.Close(); err != nil {
e.shell.Warningf("Failed to close OTLP job log exporter: %v", err)
}
}()
}
}

// Initialize the job API, iff the experiment is enabled. Noop otherwise
if e.JobAPI {
cleanup, err := e.startJobAPI()
Expand Down Expand Up @@ -357,6 +424,18 @@ type HookConfig struct {
PluginName string
}

func hookLogAttributes(hookCfg HookConfig) map[string]string {
attrs := map[string]string{
"buildkite.phase": "hook",
"buildkite.hook.name": hookCfg.Name,
"buildkite.hook.scope": hookCfg.Scope,
}
if hookCfg.PluginName != "" {
attrs["buildkite.hook.plugin"] = hookCfg.PluginName
}
return attrs
}

func (e *Executor) tracingImplementationSpecificHookScope(scope string) string {
if e.TracingBackend != tracetools.BackendDatadog {
return scope
Expand Down Expand Up @@ -459,7 +538,7 @@ func (e *Executor) runUnwrappedHook(ctx context.Context, _ string, hookCfg HookC
environ.Set("BUILDKITE_HOOK_PATH", hookCfg.Path)
environ.Set("BUILDKITE_HOOK_SCOPE", hookCfg.Scope)

if err := e.shell.Command(hookCfg.Path).Run(ctx, shell.WithExtraEnv(environ)); err != nil {
if err := e.shell.Command(hookCfg.Path).Run(ctx, shell.WithExtraEnv(environ), shell.WithOutputAttributes(hookLogAttributes(hookCfg))); err != nil {
return err
}
// Passing an empty env changes through because in polyglot hook we can't detect
Expand Down Expand Up @@ -568,7 +647,7 @@ func (e *Executor) runWrappedShellScriptHook(ctx context.Context, hookName strin
r.Break()
return err
}
err = script.Run(ctx, shell.ShowPrompt(false), shell.WithExtraEnv(hookCfg.Env))
err = script.Run(ctx, shell.ShowPrompt(false), shell.WithExtraEnv(hookCfg.Env), shell.WithOutputAttributes(hookLogAttributes(hookCfg)))
if errors.Is(err, syscall.ETXTBSY) {
return err
}
Expand Down Expand Up @@ -1293,7 +1372,11 @@ func (e *Executor) defaultCommandPhase(ctx context.Context) (retErr error) {
e.shell.Promptf("%s", cmdToExec)
}

err = e.shell.Command(cmd[0], cmd[1:]...).Run(ctx, shell.ShowPrompt(false))
err = e.shell.Command(cmd[0], cmd[1:]...).Run(ctx, shell.ShowPrompt(false), shell.WithOutputAttributes(map[string]string{
"buildkite.phase": "command",
"buildkite.hook.name": "command",
"buildkite.hook.scope": "default",
}))
return err
}

Expand Down Expand Up @@ -1392,7 +1475,12 @@ func (e *Executor) setupRedactors(log shell.Logger, environ *env.Environment, st

stdoutRedactor := replacer.New(stdout, needles, redact.Redacted)
e.redactors.Append(stdoutRedactor)
loggerRedactor := replacer.New(stderr, needles, redact.Redacted)
// The shell logger writes through this redactor into loggerTee, whose
// primary sink is stderr. When OTLP job logging is enabled, a secondary
// sink is attached so the same already-redacted control output is mirrored
// into the OTLP exporter.
e.loggerTee = &teeWriter{primary: stderr}
loggerRedactor := replacer.New(e.loggerTee, needles, redact.Redacted)
e.redactors.Append(loggerRedactor)

logger := shell.NewWriterLogger(loggerRedactor, true, e.DisabledWarnings)
Expand Down
42 changes: 42 additions & 0 deletions internal/job/executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/buildkite/agent/v3/tracetools"
"github.com/google/go-cmp/cmp"
"github.com/opentracing/opentracing-go"
"go.opentelemetry.io/otel/trace"
"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/opentracer"
)

Expand Down Expand Up @@ -107,6 +108,47 @@ func TestStartTracing_Datadog(t *testing.T) {
stopper()
}

func TestContextWithTraceparentIfEnabledDoesNotAcceptServerTraceparentWithoutPropagation(t *testing.T) {
t.Parallel()

sh, err := shell.New(shell.WithLogger(shell.DiscardLogger))
if err != nil {
t.Fatalf("shell.New() error = %v", err)
}
e := New(ExecutorConfig{
TracingTraceParent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01",
})
e.shell = sh

ctx := e.contextWithTraceparentIfEnabled(t.Context())
if sc := trace.SpanContextFromContext(ctx); sc.IsValid() {
t.Fatalf("SpanContextFromContext(ctx).IsValid() = true, want false")
}
}

func TestContextWithTraceparentIfEnabledAcceptsServerTraceparentWithPropagation(t *testing.T) {
t.Parallel()

sh, err := shell.New(shell.WithLogger(shell.DiscardLogger))
if err != nil {
t.Fatalf("shell.New() error = %v", err)
}
e := New(ExecutorConfig{
TracingPropagateTraceparent: true,
TracingTraceParent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01",
})
e.shell = sh

ctx := e.contextWithTraceparentIfEnabled(t.Context())
sc := trace.SpanContextFromContext(ctx)
if !sc.IsValid() {
t.Fatalf("SpanContextFromContext(ctx).IsValid() = false, want true")
}
if got, want := sc.TraceID().String(), "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; got != want {
t.Fatalf("SpanContextFromContext(ctx).TraceID() = %q, want %q", got, want)
}
}

// newCancelTestExecutor returns an Executor whose shell.Env starts empty,
// suitable for exercising Cancel without depending on the host environment.
func newCancelTestExecutor(t *testing.T) *Executor {
Expand Down
Loading