From 5beca770e44991fc0f3009719d89c1d0f08a9f1b Mon Sep 17 00:00:00 2001 From: Andrey Lebedev Date: Tue, 24 Feb 2026 18:46:35 +0100 Subject: [PATCH 1/2] OCPBUGS-67161: Replace HTTP backend liveness check with admin socket check Use HAProxy admin socket `show version` command for the liveness probe instead of sending an HTTP request to the backend. This directly tests whether the HAProxy process is alive and responsive, rather than testing through the data plane. The HTTP-based liveness check counts against HAProxy's maxconn limit. When maxconn is reached due to client traffic, the liveness probe HTTP request gets queued or rejected, causing probe failures and unnecessary container restarts even though HAProxy is still running. The admin socket is not subject to maxconn, so the liveness probe remains reliable under high connection load. The readiness probe continues to use the HTTP backend check. Co-Authored-By: Claude Opus 4.6 --- pkg/cmd/infra/router/template.go | 4 +++- pkg/router/metrics/health.go | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/pkg/cmd/infra/router/template.go b/pkg/cmd/infra/router/template.go index 8b7fdabbc..3bd72304e 100644 --- a/pkg/cmd/infra/router/template.go +++ b/pkg/cmd/infra/router/template.go @@ -636,9 +636,11 @@ func (o *TemplateRouterOptions) Run(stopCh <-chan struct{}) error { return err } checkController := metrics.ControllerLive() + adminSocketURL := &url.URL{Scheme: "unix", Path: "/var/lib/haproxy/run/haproxy.sock"} + checkSocket := metrics.AdminSocketAvailable(adminSocketURL) liveChecks := []healthz.HealthChecker{checkController} if !(isTrue(env("ROUTER_BIND_PORTS_BEFORE_SYNC", ""))) { - liveChecks = append(liveChecks, checkBackend) + liveChecks = append(liveChecks, checkSocket) } kubeconfig, _, err := o.Config.KubeConfig() diff --git a/pkg/router/metrics/health.go b/pkg/router/metrics/health.go index 90e91041b..09ae0ebf5 100644 --- a/pkg/router/metrics/health.go +++ b/pkg/router/metrics/health.go @@ -115,3 +115,34 @@ func ProxyProtocolHTTPBackendAvailable(u *url.URL) healthz.HealthChecker { return nil }) } + +// AdminSocketAvailable returns a healthz check that verifies the +// HAProxy process is alive by sending "show version" to its admin socket and +// expecting a non-empty response. +func AdminSocketAvailable(u *url.URL) healthz.HealthChecker { + return healthz.NamedCheck("admin-socket", func(_ *http.Request) error { + conn, err := net.DialTimeout("unix", u.Path, 2*time.Second) + if err != nil { + return err + } + defer conn.Close() + + conn.SetDeadline(time.Now().Add(2 * time.Second)) + + if _, err := conn.Write([]byte("show version\n")); err != nil { + return err + } + + buf := make([]byte, 10) + n, err := conn.Read(buf) + if err != nil { + return err + } + if n == 0 { + return fmt.Errorf("empty response from admin socket") + } + + log.V(4).Info("probe succeeded", "url", u.String()) + return nil + }) +} From 4b76ac6210522ab0eb8344d48395c8603af9b249 Mon Sep 17 00:00:00 2001 From: Andrey Lebedev Date: Tue, 3 Mar 2026 10:51:04 +0100 Subject: [PATCH 2/2] OCPBUGS-67161: Define admin socket URL in a single place Move the admin socket URL definition to the top of the Run method and reuse it for the Prometheus collector ScrapeURI default, the liveness probe, and the ConfigManager connection info. Remove the hardcoded default from the haproxy metrics package. Co-Authored-By: Claude Opus 4.6 --- pkg/cmd/infra/router/template.go | 6 +++--- pkg/router/metrics/haproxy/haproxy.go | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pkg/cmd/infra/router/template.go b/pkg/cmd/infra/router/template.go index 3bd72304e..072f6a1f7 100644 --- a/pkg/cmd/infra/router/template.go +++ b/pkg/cmd/infra/router/template.go @@ -563,6 +563,7 @@ func (o *TemplateRouterOptions) Run(stopCh <-chan struct{}) error { var reloadCallbacks []func() + adminSocketURL := &url.URL{Scheme: "unix", Path: "/var/lib/haproxy/run/haproxy.sock"} statsPort := o.StatsPort switch { case o.MetricsType == "haproxy" && statsPort != 0: @@ -607,7 +608,7 @@ func (o *TemplateRouterOptions) Run(stopCh <-chan struct{}) error { collector, err := haproxy.NewPrometheusCollector(haproxy.PrometheusOptions{ // Only template router customizers who alter the image should need this - ScrapeURI: env("ROUTER_METRICS_HAPROXY_SCRAPE_URI", ""), + ScrapeURI: env("ROUTER_METRICS_HAPROXY_SCRAPE_URI", adminSocketURL.String()), // Only template router customizers who alter the image should need this PidFile: env("ROUTER_METRICS_HAPROXY_PID_FILE", ""), Timeout: timeout, @@ -636,7 +637,6 @@ func (o *TemplateRouterOptions) Run(stopCh <-chan struct{}) error { return err } checkController := metrics.ControllerLive() - adminSocketURL := &url.URL{Scheme: "unix", Path: "/var/lib/haproxy/run/haproxy.sock"} checkSocket := metrics.AdminSocketAvailable(adminSocketURL) liveChecks := []healthz.HealthChecker{checkController} if !(isTrue(env("ROUTER_BIND_PORTS_BEFORE_SYNC", ""))) { @@ -737,7 +737,7 @@ func (o *TemplateRouterOptions) Run(stopCh <-chan struct{}) error { return err } cmopts := templateplugin.ConfigManagerOptions{ - ConnectionInfo: "unix:///var/lib/haproxy/run/haproxy.sock", + ConnectionInfo: adminSocketURL.String(), CommitInterval: o.CommitInterval, BlueprintRoutes: blueprintRoutes, BlueprintRoutePoolSize: o.BlueprintRoutePoolSize, diff --git a/pkg/router/metrics/haproxy/haproxy.go b/pkg/router/metrics/haproxy/haproxy.go index 22b04ea18..dc753a574 100644 --- a/pkg/router/metrics/haproxy/haproxy.go +++ b/pkg/router/metrics/haproxy/haproxy.go @@ -732,9 +732,6 @@ func NewPrometheusCollector(opts PrometheusOptions) (*Exporter, error) { } func defaultOptions(opts PrometheusOptions) PrometheusOptions { - if len(opts.ScrapeURI) == 0 { - opts.ScrapeURI = "unix:///var/lib/haproxy/run/haproxy.sock" - } if len(opts.PidFile) == 0 { opts.PidFile = "/var/lib/haproxy/run/haproxy.pid" }