Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions agent/agent_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,8 @@ type AgentConfiguration struct {
Profile string
RedactedVars []string
AcquireJob string
TracingBackend string
TracingServiceName string
TracingPropagateTraceparent bool
OpenTelemetryTracing bool
TelemetryServiceName string
DisableWarningsFor []string
AllowMultipartArtifactUpload bool
ArtifactUploadConcurrency int
Expand Down
16 changes: 6 additions & 10 deletions agent/job_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ func (r *JobRunner) createEnvironment(ctx context.Context) ([]string, error) {
if r.envShellFile != nil {
// Note that some variables in this list might not be defined later,
// when something comes to read the file. See below where they are
// added conditionally, e.g. BUILDKITE_TRACING_BACKEND.
// added conditionally, e.g. BUILDKITE_OPENTELEMETRY_TRACING.
// Docker in particular tolerates undefined vars in an env file
// without complaints.
const agentCfgVars = `BUILDKITE_GIT_CHECKOUT_FLAGS
Expand All @@ -416,11 +416,10 @@ BUILDKITE_SHELL
BUILDKITE_HOOKS_SHELL
BUILDKITE_SSH_KEYSCAN
BUILDKITE_STRICT_SINGLE_HOOKS
BUILDKITE_TRACING_BACKEND
BUILDKITE_TRACING_SERVICE_NAME
BUILDKITE_OPENTELEMETRY_TRACING
BUILDKITE_TELEMETRY_SERVICE_NAME
BUILDKITE_TRACING_TRACEPARENT
BUILDKITE_TRACING_TRACESTATE
BUILDKITE_TRACING_PROPAGATE_TRACEPARENT
BUILDKITE_AGENT_AWS_KMS_KEY
BUILDKITE_AGENT_GCP_KMS_KEY
BUILDKITE_AGENT_JWKS_FILE
Expand Down Expand Up @@ -638,9 +637,9 @@ BUILDKITE_AGENT_JWKS_KEY_ID`
}
setEnv("BUILDKITE_PLUGIN_VALIDATION", fmt.Sprint(enablePluginValidation))

if r.conf.AgentConfiguration.TracingBackend != "" {
setEnv("BUILDKITE_TRACING_BACKEND", r.conf.AgentConfiguration.TracingBackend)
setEnv("BUILDKITE_TRACING_SERVICE_NAME", r.conf.AgentConfiguration.TracingServiceName)
if r.conf.AgentConfiguration.OpenTelemetryTracing {
setEnv("BUILDKITE_OPENTELEMETRY_TRACING", "true")
setEnv("BUILDKITE_TELEMETRY_SERVICE_NAME", r.conf.AgentConfiguration.TelemetryServiceName)

// Buildkite backend can provide a traceparent property on the job
// which can be propagated to the job tracing if OpenTelemetry is used
Expand All @@ -656,9 +655,6 @@ BUILDKITE_AGENT_JWKS_KEY_ID`
if r.conf.Job.TraceState != "" {
setEnv("BUILDKITE_TRACING_TRACESTATE", r.conf.Job.TraceState)
}
if r.conf.AgentConfiguration.TracingPropagateTraceparent {
setEnv("BUILDKITE_TRACING_PROPAGATE_TRACEPARENT", "true")
}
}

setEnv("BUILDKITE_AGENT_DISABLE_WARNINGS_FOR", strings.Join(r.conf.AgentConfiguration.DisableWarningsFor, ","))
Expand Down
10 changes: 2 additions & 8 deletions agent/run_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -433,14 +433,8 @@ func (r *JobRunner) cleanup(ctx context.Context, wg *sync.WaitGroup, exit core.P

// Write some metrics about the job run
jobMetrics := r.conf.MetricsScope.With(metrics.Tags{"exit_code": strconv.Itoa(exit.Status)})

if exit.Status == 0 {
jobMetrics.Timing("jobs.duration.success", finishedAt.Sub(r.startedAt))
jobMetrics.Count("jobs.success", 1)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did we still want a success / failed metric here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#3874 specifically included in the changes:

Replaces the jobs.success and jobs.failed counters with jobs.finished — failure or success can be inferred with the exit_status tag that's applied to the metric

} else {
jobMetrics.Timing("jobs.duration.error", finishedAt.Sub(r.startedAt))
jobMetrics.Count("jobs.failed", 1)
}
jobMetrics.Timing("jobs.duration", finishedAt.Sub(r.startedAt))
jobMetrics.Count("jobs.finished", 1)

// Finish the build in the Buildkite Agent API
// Once we tell the API we're finished it might assign us new work, so make sure everything else is done first.
Expand Down
77 changes: 20 additions & 57 deletions clicommand/agent_start.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"errors"
"fmt"
"io"
"maps"
"net/url"
"os"
"os/signal"
Expand Down Expand Up @@ -39,7 +38,6 @@ import (
"github.com/buildkite/agent/v4/logger"
"github.com/buildkite/agent/v4/metrics"
"github.com/buildkite/agent/v4/status"
"github.com/buildkite/agent/v4/tracetools"
"github.com/buildkite/agent/v4/version"
"github.com/buildkite/shellwords"
"github.com/lestrrat-go/jwx/v2/jwk"
Expand Down Expand Up @@ -188,15 +186,12 @@ type AgentStartConfig struct {

HealthCheckAddr string `cli:"health-check-addr"`

// Datadog statsd metrics config
MetricsDatadog bool `cli:"metrics-datadog"`
MetricsDatadogHost string `cli:"metrics-datadog-host"`
MetricsDatadogDistributions bool `cli:"metrics-datadog-distributions"`
// Metrics config
OpenTelemetryMetrics bool `cli:"opentelemetry-metrics"`

// Tracing config
TracingBackend string `cli:"tracing-backend"`
TracingServiceName string `cli:"tracing-service-name"`
TracingPropagateTraceparent bool `cli:"tracing-propagate-traceparent"`
OpenTelemetryTracing bool `cli:"opentelemetry-tracing"`
TelemetryServiceName string `cli:"telemetry-service-name"`

// Other shared flags
StrictSingleHooks bool `cli:"strict-single-hooks"`
Expand Down Expand Up @@ -237,14 +232,10 @@ func (asc AgentStartConfig) Features(ctx context.Context) []string {
features = append(features, "acquire-job")
}

if asc.TracingBackend == tracetools.BackendOpenTelemetry {
if asc.OpenTelemetryTracing {
features = append(features, "opentelemetry-tracing")
}

if asc.TracingPropagateTraceparent {
features = append(features, "propagate-traceparent")
}

if asc.DisconnectAfterJob {
features = append(features, "disconnect-after-job")
}
Expand Down Expand Up @@ -289,8 +280,8 @@ func (asc AgentStartConfig) Features(ctx context.Context) []string {
features = append(features, "env-godebug")
}

if asc.MetricsDatadog {
features = append(features, "datadog-metrics")
if asc.OpenTelemetryMetrics {
features = append(features, "opentelemetry-metrics")
}

return features
Expand Down Expand Up @@ -622,20 +613,9 @@ var AgentStartCommand = &cli.Command{
Sources: cli.EnvVars("BUILDKITE_ALLOWED_PLUGINS"),
},
&cli.BoolFlag{
Name: "metrics-datadog",
Usage: "Send metrics to DogStatsD for Datadog (default: false)",
Sources: cli.EnvVars("BUILDKITE_METRICS_DATADOG"),
},
&cli.StringFlag{
Name: "metrics-datadog-host",
Usage: "The dogstatsd instance to send metrics to using udp",
Sources: cli.EnvVars("BUILDKITE_METRICS_DATADOG_HOST"),
Value: "127.0.0.1:8125",
},
&cli.BoolFlag{
Name: "metrics-datadog-distributions",
Usage: "Use Datadog Distributions for Timing metrics (default: false)",
Sources: cli.EnvVars("BUILDKITE_METRICS_DATADOG_DISTRIBUTIONS"),
Name: "opentelemetry-metrics",
Usage: "Enable agent metrics export over OpenTelemetry OTLP. Configure OTLP with standard OTEL_EXPORTER_OTLP_* env vars (default: false)",
Sources: cli.EnvVars("BUILDKITE_OPENTELEMETRY_METRICS"),
},
&cli.StringFlag{
Name: "log-format",
Expand Down Expand Up @@ -663,21 +643,15 @@ var AgentStartCommand = &cli.Command{
},
cancelSignalFlag,
cancelCleanupTimeoutFlag,
&cli.StringFlag{
Name: "tracing-backend",
Usage: `Enable tracing for build jobs by specifying a backend. Currently only "opentelemetry" (or empty) is supported`,
Sources: cli.EnvVars("BUILDKITE_TRACING_BACKEND"),
Value: "",
},
&cli.BoolFlag{
Name: "tracing-propagate-traceparent",
Usage: `Enable accepting traceparent context from Buildkite control plane (only supported for OpenTelemetry backend) (default: false)`,
Sources: cli.EnvVars("BUILDKITE_TRACING_PROPAGATE_TRACEPARENT"),
Name: "opentelemetry-tracing",
Usage: "Enable tracing for build jobs with OpenTelemetry OTLP. Configure OTLP with standard OTEL_EXPORTER_OTLP_* env vars (default: false)",
Sources: cli.EnvVars("BUILDKITE_OPENTELEMETRY_TRACING"),
},
&cli.StringFlag{
Name: "tracing-service-name",
Usage: "Service name to use when reporting traces.",
Sources: cli.EnvVars("BUILDKITE_TRACING_SERVICE_NAME"),
Name: "telemetry-service-name",
Usage: "Service name to use when reporting telemetry.",

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should the flag also change to telemetry-service-name?'

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, including the env var

Sources: cli.EnvVars("BUILDKITE_TELEMETRY_SERVICE_NAME"),
Value: "buildkite-agent",
},
&cli.StringFlag{
Expand Down Expand Up @@ -892,20 +866,10 @@ var AgentStartCommand = &cli.Command{
}

mc := metrics.NewCollector(l, metrics.CollectorConfig{
Datadog: cfg.MetricsDatadog,
DatadogHost: cfg.MetricsDatadogHost,
DatadogDistributions: cfg.MetricsDatadogDistributions,
Enabled: cfg.OpenTelemetryMetrics,
ServiceName: cfg.TelemetryServiceName,
})

// Sense check supported tracing backends, we don't want bootstrapped jobs to silently have no tracing
if _, has := tracetools.ValidTracingBackends[cfg.TracingBackend]; !has {
return fmt.Errorf(
"the given tracing backend %q is not supported. Valid backends are: %q",
cfg.TracingBackend,
slices.Collect(maps.Keys(tracetools.ValidTracingBackends)),
)
}

if experiments.IsEnabled(ctx, experiments.AgentAPI) {
shutdown, err := runAgentAPI(ctx, l, cfg.SocketsPath)
if err != nil {
Expand Down Expand Up @@ -1034,9 +998,8 @@ var AgentStartCommand = &cli.Command{
HooksShell: cfg.HooksShell,
RedactedVars: cfg.RedactedVars,
AcquireJob: cfg.AcquireJob,
TracingBackend: cfg.TracingBackend,
TracingServiceName: cfg.TracingServiceName,
TracingPropagateTraceparent: cfg.TracingPropagateTraceparent,
OpenTelemetryTracing: cfg.OpenTelemetryTracing,
TelemetryServiceName: cfg.TelemetryServiceName,
AllowMultipartArtifactUpload: !cfg.NoMultipartArtifactUpload,
ArtifactUploadConcurrency: cfg.ArtifactUploadConcurrency,
KubernetesExec: cfg.KubernetesExec,
Expand Down
10 changes: 10 additions & 0 deletions clicommand/agent_start_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"os"
"path/filepath"
"runtime"
"slices"
"testing"

"github.com/buildkite/agent/v4/core"
Expand All @@ -13,6 +14,15 @@ import (
"github.com/urfave/cli/v3"
)

func TestAgentStartFeatures_OpenTelemetryTracing(t *testing.T) {
t.Parallel()

features := AgentStartConfig{OpenTelemetryTracing: true}.Features(t.Context())
if !slices.Contains(features, "opentelemetry-tracing") {
t.Fatalf("Features() = %v, want opentelemetry-tracing", features)
}
}

func setupHooksPath(t *testing.T) (string, func()) {
t.Helper()

Expand Down
34 changes: 16 additions & 18 deletions clicommand/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/buildkite/agent/v4/internal/process"
"github.com/buildkite/agent/v4/internal/self"
"github.com/buildkite/agent/v4/logger"
"github.com/buildkite/agent/v4/tracetools"
"github.com/urfave/cli/v3"
)

Expand Down Expand Up @@ -108,11 +109,10 @@ type BootstrapConfig struct {
CancelSignalTimeout time.Duration `cli:"cancel-signal-timeout"`
CancelCleanupTimeout time.Duration `cli:"cancel-cleanup-timeout"`
RedactedVars []string `cli:"redacted-vars" normalize:"list"`
TracingBackend string `cli:"tracing-backend"`
TracingServiceName string `cli:"tracing-service-name"`
OpenTelemetryTracing bool `cli:"opentelemetry-tracing"`
TelemetryServiceName string `cli:"telemetry-service-name"`
TracingTraceParent string `cli:"tracing-traceparent"`
TracingTraceState string `cli:"tracing-tracestate"`
TracingPropagateTraceparent bool `cli:"tracing-propagate-traceparent"`
NoJobAPI bool `cli:"no-job-api"`
DisableWarningsFor []string `cli:"disable-warnings-for" normalize:"list"`
CheckoutAttempts int `cli:"checkout-attempts"`
Expand Down Expand Up @@ -336,16 +336,15 @@ var BootstrapCommand = &cli.Command{
Usage: "The specific phases to execute. The order they're defined is irrelevant.",
Sources: cli.EnvVars("BUILDKITE_BOOTSTRAP_PHASES"),
},
&cli.StringFlag{
Name: "tracing-backend",
Usage: "The name of the tracing backend to use.",
Sources: cli.EnvVars("BUILDKITE_TRACING_BACKEND"),
Value: "",
&cli.BoolFlag{
Name: "opentelemetry-tracing",
Usage: "Enable tracing for build jobs with OpenTelemetry OTLP. Configure OTLP with standard OTEL_EXPORTER_OTLP_* env vars (default: false)",
Sources: cli.EnvVars("BUILDKITE_OPENTELEMETRY_TRACING"),
},
&cli.StringFlag{
Name: "tracing-service-name",
Name: "telemetry-service-name",
Usage: "Service name to use when reporting traces.",
Sources: cli.EnvVars("BUILDKITE_TRACING_SERVICE_NAME"),
Sources: cli.EnvVars("BUILDKITE_TELEMETRY_SERVICE_NAME"),
Value: "buildkite-agent",
},
&cli.StringFlag{
Expand All @@ -360,11 +359,6 @@ var BootstrapCommand = &cli.Command{
Sources: cli.EnvVars("BUILDKITE_TRACING_TRACESTATE"),
Value: "",
},
&cli.BoolFlag{
Name: "tracing-propagate-traceparent",
Usage: "Accept traceparent from Buildkite control plane (default: false)",
Sources: cli.EnvVars("BUILDKITE_TRACING_PROPAGATE_TRACEPARENT"),
},

&cli.BoolFlag{
Name: "no-job-api",
Expand Down Expand Up @@ -428,6 +422,11 @@ var BootstrapCommand = &cli.Command{
return fmt.Errorf("failed to parse cancel-signal: %w", err)
}

tracingBackend := tracetools.BackendNone
if cfg.OpenTelemetryTracing {
tracingBackend = tracetools.BackendOpenTelemetry
}

// Configure the bootstraper
bootstrap := job.New(job.ExecutorConfig{
AgentName: cfg.AgentName,
Expand Down Expand Up @@ -486,11 +485,10 @@ var BootstrapCommand = &cli.Command{
HooksShell: cfg.HooksShell,
StrictSingleHooks: cfg.StrictSingleHooks,
Tag: cfg.Tag,
TracingBackend: cfg.TracingBackend,
TracingServiceName: cfg.TracingServiceName,
TracingBackend: tracingBackend,
TelemetryServiceName: cfg.TelemetryServiceName,
TracingTraceParent: cfg.TracingTraceParent,
TracingTraceState: cfg.TracingTraceState,
TracingPropagateTraceparent: cfg.TracingPropagateTraceparent,
JobAPI: !cfg.NoJobAPI,
DisabledWarnings: cfg.DisableWarningsFor,
Secrets: cfg.Secrets,
Expand Down
4 changes: 2 additions & 2 deletions clicommand/global.go
Original file line number Diff line number Diff line change
Expand Up @@ -584,8 +584,8 @@ func setupLoggerAndConfig[T any](ctx context.Context, c *cli.Command, opts ...co
}

if tracingBackend == tracetools.BackendOpenTelemetry {
serviceName := os.Getenv("BUILDKITE_TRACING_SERVICE_NAME")
if sn, err := reflections.GetField(cfg, "TracingServiceName"); err == nil {
serviceName := os.Getenv("BUILDKITE_TELEMETRY_SERVICE_NAME")
if sn, err := reflections.GetField(cfg, "TelemetryServicename"); err == nil {
if snStr, ok := sn.(string); ok && snStr != "" {
serviceName = snStr
}
Expand Down
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ require (
drjosh.dev/zzglob v0.4.3
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.14.0
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.8.0
github.com/DataDog/datadog-go/v5 v5.8.3
github.com/Khan/genqlient v0.8.1
github.com/aws/aws-sdk-go-v2 v1.42.0
github.com/aws/aws-sdk-go-v2/config v1.32.25
Expand Down Expand Up @@ -55,9 +54,13 @@ require (
go.opentelemetry.io/contrib/propagators/jaeger v1.44.0
go.opentelemetry.io/contrib/propagators/ot v1.44.0
go.opentelemetry.io/otel v1.44.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.39.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.39.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.44.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.44.0
go.opentelemetry.io/otel/metric v1.44.0
go.opentelemetry.io/otel/sdk v1.44.0
go.opentelemetry.io/otel/sdk/metric v1.44.0
go.opentelemetry.io/otel/trace v1.44.0
golang.org/x/net v0.56.0
golang.org/x/oauth2 v0.36.0
Expand All @@ -79,7 +82,6 @@ require (
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.22.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/internal v1.12.0 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.7.2 // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/agnivade/levenshtein v1.2.1 // indirect
github.com/alexflint/go-arg v1.5.1 // indirect
github.com/alexflint/go-scalar v1.2.0 // indirect
Expand Down Expand Up @@ -136,13 +138,11 @@ require (
github.com/rivo/uniseg v0.4.7 // indirect
github.com/saracen/zipextra v0.0.0-20250129175152-f1aa42d25216 // indirect
github.com/segmentio/asm v1.2.1 // indirect
github.com/stretchr/objx v0.5.2 // indirect
github.com/vektah/gqlparser/v2 v2.5.32 // indirect
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.44.0 // indirect
go.opentelemetry.io/otel/metric v1.44.0 // indirect
go.opentelemetry.io/proto/otlp v1.10.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.yaml.in/yaml/v2 v2.4.4 // indirect
Expand Down
Loading