Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions api/envoy/data/core/v3/health_check_event.proto
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ message HealthCheckEjectUnhealthy {

// The type of failure that caused this ejection.
HealthCheckFailureType failure_type = 1 [(validate.rules).enum = {defined_only: true}];

// The HTTP status code of the health check response that triggered the ejection.
// Only set when the health checker type is HTTP and the failure type is ACTIVE.
// A value of 0 indicates that no HTTP status code was recorded (e.g., network-level failures
// or non-HTTP health checkers).
uint32 http_status_code = 2;
}

message HealthCheckAddHealthy {
Expand All @@ -111,6 +117,12 @@ message HealthCheckFailure {

// Whether this event is the result of the first ever health check on a host.
bool first_check = 2;

// The HTTP status code of the health check response that caused this failure.
// Only set when the health checker type is HTTP and the failure type is ACTIVE.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Only set when the health checker type is HTTP and the failure type is ACTIVE.
// Only set when the health checker type is HTTP and the failure type is ``ACTIVE``.

// A value of 0 indicates that no HTTP status code was recorded (e.g., network-level failures
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// A value of 0 indicates that no HTTP status code was recorded (e.g., network-level failures
// A value of ``0`` indicates that no HTTP status code was recorded (e.g., network-level failures

// or non-HTTP health checkers).
uint32 http_status_code = 3;
}

message DegradedHealthyHost {
Expand Down
8 changes: 8 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,14 @@ removed_config_or_runtime:
and legacy code path.

new_features:
- area: health_check
change: |
Added ``http_status_code`` field to :ref:`HealthCheckEjectUnhealthy
<envoy_v3_api_msg_data.core.v3.HealthCheckEjectUnhealthy>` and :ref:`HealthCheckFailure
<envoy_v3_api_msg_data.core.v3.HealthCheckFailure>` proto messages. When the health checker
type is HTTP and the failure type is ``ACTIVE``, the field is populated with the HTTP response
status code that caused the failure. For non-HTTP health checkers and network-level failures,
the field defaults to ``0``. Fixes :issue:`40221`.
- area: ratelimit
change: |
Added ``is_negative_hits`` boolean to the ``hits_addend``
Expand Down
9 changes: 7 additions & 2 deletions envoy/upstream/health_checker.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,22 +74,27 @@ class HealthCheckEventLogger {
* @param health_checker_type supplies the type of health checker that generated the event.
* @param host supplies the host that generated the event.
* @param failure_type supplies the type of health check failure.
* @param http_status_code the HTTP status code of the response that triggered the ejection,
* or 0 if not applicable (e.g., non-HTTP checkers or network-level failures).
*/
virtual void logEjectUnhealthy(envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host,
envoy::data::core::v3::HealthCheckFailureType failure_type) PURE;
envoy::data::core::v3::HealthCheckFailureType failure_type,
uint64_t http_status_code) PURE;

/**
* Log an unhealthy host event.
* @param health_checker_type supplies the type of health checker that generated the event.
* @param host supplies the host that generated the event.
* @param failure_type supplies the type of health check failure.
* @param first_check whether this is a failure on the first health check for this host.
* @param http_status_code the HTTP status code of the response that caused this failure,
* or 0 if not applicable (e.g., non-HTTP checkers or network-level failures).
*/
virtual void logUnhealthy(envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host,
envoy::data::core::v3::HealthCheckFailureType failure_type,
bool first_check) PURE;
bool first_check, uint64_t http_status_code) PURE;

/**
* Log a healthy host addition event.
Expand Down
29 changes: 20 additions & 9 deletions source/common/upstream/health_checker_event_logger.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,31 @@ namespace Upstream {
void HealthCheckEventLoggerImpl::logEjectUnhealthy(
envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host,
envoy::data::core::v3::HealthCheckFailureType failure_type) {
createHealthCheckEvent(health_checker_type, *host, [&failure_type](auto& event) {
event.mutable_eject_unhealthy_event()->set_failure_type(failure_type);
});
envoy::data::core::v3::HealthCheckFailureType failure_type, uint64_t http_status_code) {
createHealthCheckEvent(health_checker_type, *host,
[&failure_type, http_status_code](auto& event) {
event.mutable_eject_unhealthy_event()->set_failure_type(failure_type);
if (http_status_code != 0) {
event.mutable_eject_unhealthy_event()->set_http_status_code(
static_cast<uint32_t>(http_status_code));
}
});
}

void HealthCheckEventLoggerImpl::logUnhealthy(
envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host,
envoy::data::core::v3::HealthCheckFailureType failure_type, bool first_check) {
createHealthCheckEvent(health_checker_type, *host, [&first_check, &failure_type](auto& event) {
event.mutable_health_check_failure_event()->set_failure_type(failure_type);
event.mutable_health_check_failure_event()->set_first_check(first_check);
});
envoy::data::core::v3::HealthCheckFailureType failure_type, bool first_check,
uint64_t http_status_code) {
createHealthCheckEvent(
health_checker_type, *host, [&first_check, &failure_type, http_status_code](auto& event) {
event.mutable_health_check_failure_event()->set_failure_type(failure_type);
event.mutable_health_check_failure_event()->set_first_check(first_check);
if (http_status_code != 0) {
event.mutable_health_check_failure_event()->set_http_status_code(
static_cast<uint32_t>(http_status_code));
}
});
}

void HealthCheckEventLoggerImpl::logAddHealthy(
Expand Down
7 changes: 4 additions & 3 deletions source/common/upstream/health_checker_event_logger.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,16 @@ class HealthCheckEventLoggerImpl : public HealthCheckEventLogger {

void logEjectUnhealthy(envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host,
envoy::data::core::v3::HealthCheckFailureType failure_type) override;
envoy::data::core::v3::HealthCheckFailureType failure_type,
uint64_t http_status_code) override;
void logAddHealthy(envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host, bool first_check) override;
void logSuccessfulHealthCheck(envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host) override;
void logUnhealthy(envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host,
envoy::data::core::v3::HealthCheckFailureType failure_type,
bool first_check) override;
envoy::data::core::v3::HealthCheckFailureType failure_type, bool first_check,
uint64_t http_status_code) override;
void logDegraded(envoy::data::core::v3::HealthCheckerType health_checker_type,
const HostDescriptionConstSharedPtr& host) override;
void logNoLongerDegraded(envoy::data::core::v3::HealthCheckerType health_checker_type,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ bool networkHealthCheckFailureType(envoy::data::core::v3::HealthCheckFailureType
} // namespace

HealthTransition HealthCheckerImplBase::ActiveHealthCheckSession::setUnhealthy(
envoy::data::core::v3::HealthCheckFailureType type, bool retriable) {
envoy::data::core::v3::HealthCheckFailureType type, bool retriable, uint64_t http_status_code) {
// If we are unhealthy, reset the # of healthy to zero.
num_healthy_ = 0;

Expand All @@ -379,7 +379,8 @@ HealthTransition HealthCheckerImplBase::ActiveHealthCheckSession::setUnhealthy(
parent_.decHealthy();
changed_state = HealthTransition::Changed;
if (parent_.event_logger_) {
parent_.event_logger_->logEjectUnhealthy(parent_.healthCheckerType(), host_, type);
parent_.event_logger_->logEjectUnhealthy(parent_.healthCheckerType(), host_, type,
http_status_code);
}
} else {
changed_state = HealthTransition::ChangePending;
Expand All @@ -399,7 +400,8 @@ HealthTransition HealthCheckerImplBase::ActiveHealthCheckSession::setUnhealthy(
changed_state = clearPendingFlag(changed_state);

if ((first_check_ || parent_.always_log_health_check_failures_) && parent_.event_logger_) {
parent_.event_logger_->logUnhealthy(parent_.healthCheckerType(), host_, type, first_check_);
parent_.event_logger_->logUnhealthy(parent_.healthCheckerType(), host_, type, first_check_,
http_status_code);
}

parent_.stats_.failure_.inc();
Expand All @@ -415,8 +417,8 @@ HealthTransition HealthCheckerImplBase::ActiveHealthCheckSession::setUnhealthy(
}

void HealthCheckerImplBase::ActiveHealthCheckSession::handleFailure(
envoy::data::core::v3::HealthCheckFailureType type, bool retriable) {
HealthTransition changed_state = setUnhealthy(type, retriable);
envoy::data::core::v3::HealthCheckFailureType type, bool retriable, uint64_t http_status_code) {
HealthTransition changed_state = setUnhealthy(type, retriable, http_status_code);
// It's possible that the previous call caused this session to be deferred deleted.
if (timeout_timer_ != nullptr) {
timeout_timer_->disableTimer();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,16 @@ class HealthCheckerImplBase : public HealthChecker,
public:
~ActiveHealthCheckSession() override;
HealthTransition setUnhealthy(envoy::data::core::v3::HealthCheckFailureType type,
bool retriable);
bool retriable, uint64_t http_status_code = 0);
void onDeferredDeleteBase();
void start() { onInitialInterval(); }

protected:
ActiveHealthCheckSession(HealthCheckerImplBase& parent, HostSharedPtr host);

void handleSuccess(bool degraded = false);
void handleFailure(envoy::data::core::v3::HealthCheckFailureType type, bool retriable = false);
void handleFailure(envoy::data::core::v3::HealthCheckFailureType type, bool retriable = false,
uint64_t http_status_code = 0);

HostSharedPtr host_;

Expand Down
12 changes: 9 additions & 3 deletions source/extensions/health_checkers/http/health_checker_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,13 @@ HttpHealthCheckerImpl::HttpActiveHealthCheckSession::healthCheckResult() {
void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onResponseComplete() {
request_in_flight_ = false;

// Extract the HTTP response code for inclusion in health check events.
const uint64_t response_code =
response_headers_ != nullptr ? Http::Utility::getResponseStatus(*response_headers_) : 0;

// Store the raw HTTP response code on the host for HDS metadata reporting.
if (response_headers_ != nullptr) {
host_->setLastHealthCheckHttpStatus(Http::Utility::getResponseStatus(*response_headers_));
host_->setLastHealthCheckHttpStatus(response_code);
}

switch (healthCheckResult()) {
Expand All @@ -432,10 +436,12 @@ void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onResponseComplete() {
handleSuccess(true);
break;
case HealthCheckResult::Failed:
handleFailure(envoy::data::core::v3::ACTIVE, /*retriable=*/false);
handleFailure(envoy::data::core::v3::ACTIVE, /*retriable=*/false,
/*http_status_code=*/response_code);
break;
case HealthCheckResult::Retriable:
handleFailure(envoy::data::core::v3::ACTIVE, /*retriable=*/true);
handleFailure(envoy::data::core::v3::ACTIVE, /*retriable=*/true,
/*http_status_code=*/response_code);
break;
}

Expand Down
Loading
Loading