diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index abdd8303d7..c5339d411d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -99,3 +99,5 @@ tools/sidecar_mockgen/ @DataDog/libdatadog-php libdd-data-pipeline/src/otlp/ @DataDog/apm-sdk-capabilities-rust libdd-data-pipeline/tests/test_trace_exporter_otlp_export.rs @DataDog/apm-sdk-capabilities-rust libdd-trace-utils/src/otlp_encoder/ @DataDog/apm-sdk-capabilities-rust +datadog-sidecar/src/service/ffe_exposures_flusher.rs @DataDog/libdatadog-php @DataDog/libdatadog-apm @DataDog/feature-flagging-and-experimentation-sdk +datadog-sidecar/src/service/ffe_metrics_flusher.rs @DataDog/libdatadog-php @DataDog/libdatadog-apm @DataDog/feature-flagging-and-experimentation-sdk diff --git a/datadog-sidecar-ffi/src/lib.rs b/datadog-sidecar-ffi/src/lib.rs index 7cc6a3267d..9d88a72ef9 100644 --- a/datadog-sidecar-ffi/src/lib.rs +++ b/datadog-sidecar-ffi/src/lib.rs @@ -35,7 +35,7 @@ use datadog_sidecar::shm_remote_config::{path_for_remote_config, RemoteConfigRea use libc::c_char; use libdd_common::tag::Tag; use libdd_common::Endpoint; -use libdd_common_ffi::slice::{AsBytes, CharSlice}; +use libdd_common_ffi::slice::{AsBytes, ByteSlice, CharSlice}; use libdd_common_ffi::{self as ffi, MaybeError}; #[cfg(windows)] use libdd_crashtracker_ffi::Metadata; @@ -1116,6 +1116,73 @@ pub unsafe extern "C" fn ddog_sidecar_send_debugger_datum( ddog_sidecar_send_debugger_data(transport, instance_id, queue_id, vec![*payload]) } +/// Forward a single FFE (Feature Flag Evaluation) exposure batch payload to +/// the sidecar. The sidecar asynchronously POSTs it to the agent EVP proxy +/// at `/evp_proxy/v2/api/v2/exposures`. +/// +/// The payload is produced by `ddog_ffe_flush_exposures()` in `components-rs`. +/// A null or zero-length slice is a no-op (the PHP side indicates "nothing to +/// flush" by returning such a slice). +/// +/// # Safety +/// `payload` must be a valid UTF-8 `CharSlice` (as returned by +/// `ddog_ffe_flush_exposures`) or a default (null, 0) slice. +#[no_mangle] +#[allow(clippy::missing_safety_doc)] +pub unsafe extern "C" fn ddog_sidecar_send_ffe_exposures( + transport: &mut Box, + instance_id: &InstanceId, + queue_id: &QueueId, + payload: CharSlice, +) -> MaybeError { + if payload.is_empty() { + return MaybeError::None; + } + let payload = payload.to_utf8_lossy().into_owned(); + try_c!(blocking::enqueue_actions( + transport, + instance_id, + queue_id, + vec![SidecarAction::FfeExposures(payload)], + )); + MaybeError::None +} + +/// Forward a single FFE (Feature Flag Evaluation) metrics batch payload to +/// the sidecar. The sidecar asynchronously POSTs the OTLP/protobuf bytes to +/// the OTLP HTTP metrics intake at the given `endpoint` URL (typically the +/// value of `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT`, default +/// `http://localhost:4318/v1/metrics`). +/// +/// The PHP-side `OtlpMetricEncoder` produces `payload`. A null/empty payload +/// or an empty endpoint is a no-op. +/// +/// # Safety +/// `endpoint` must be a valid UTF-8 `CharSlice`. `payload` must be a valid +/// `ByteSlice` (as returned by the PHP encoder). +#[no_mangle] +#[allow(clippy::missing_safety_doc)] +pub unsafe extern "C" fn ddog_sidecar_send_ffe_metrics( + transport: &mut Box, + instance_id: &InstanceId, + queue_id: &QueueId, + endpoint: CharSlice, + payload: ByteSlice, +) -> MaybeError { + if endpoint.is_empty() || payload.is_empty() { + return MaybeError::None; + } + let endpoint = endpoint.to_utf8_lossy().into_owned(); + let payload = payload.as_slice().to_vec(); + try_c!(blocking::enqueue_actions( + transport, + instance_id, + queue_id, + vec![SidecarAction::FfeMetrics { endpoint, payload }], + )); + MaybeError::None +} + #[no_mangle] #[allow(clippy::missing_safety_doc)] #[allow(improper_ctypes_definitions)] // DebuggerPayload is just a pointer, we hide its internals diff --git a/datadog-sidecar/src/service/ffe_exposures_flusher.rs b/datadog-sidecar/src/service/ffe_exposures_flusher.rs new file mode 100644 index 0000000000..e4fcc80505 --- /dev/null +++ b/datadog-sidecar/src/service/ffe_exposures_flusher.rs @@ -0,0 +1,164 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Forwards FFE (Feature Flag Evaluation) exposure payloads from the PHP tracer +//! to the Datadog Agent's EVP proxy. +//! +//! Protocol matches dd-trace-go / dd-trace-rb / dd-trace-py / dd-trace-js / +//! dd-trace-dotnet: `POST /evp_proxy/v2/api/v2/exposures` with the header +//! `X-Datadog-EVP-Subdomain: event-platform-intake`. No agent capability gate. + +use http::uri::PathAndQuery; +use http::Method; +use libdd_capabilities::Bytes; +use libdd_capabilities_impl::HttpClientCapability; +use libdd_common::Endpoint; +use tracing::{debug, warn}; + +/// EVP proxy path for FFE exposure intake. +pub(crate) const EVP_EXPOSURES_PATH: &str = "/evp_proxy/v2/api/v2/exposures"; + +/// EVP subdomain that routes requests to event-platform intake. +pub(crate) const EVP_SUBDOMAIN_HEADER: &str = "X-Datadog-EVP-Subdomain"; +pub(crate) const EVP_SUBDOMAIN_VALUE: &str = "event-platform-intake"; + +const USER_AGENT: &str = concat!("ddtrace-php-sidecar/", env!("CARGO_PKG_VERSION")); + +/// Build the FFE exposure endpoint from a session's agent base endpoint. +/// Overrides only the path (`/evp_proxy/v2/api/v2/exposures`), preserving +/// scheme, authority, api_key (agentless), timeout, and test_token. +pub(crate) fn exposure_endpoint(base: &Endpoint) -> Option { + let mut parts = base.url.clone().into_parts(); + parts.path_and_query = Some(PathAndQuery::from_static(EVP_EXPOSURES_PATH)); + let url = http::Uri::from_parts(parts).ok()?; + Some(Endpoint { + url, + ..base.clone() + }) +} + +/// POST a single FFE exposure payload to the agent EVP proxy. +/// Fire-and-forget: non-2xx responses and network errors are logged at `debug` +/// and dropped (matches dd-trace-go behaviour). +pub(crate) async fn send_payload( + client: &C, + endpoint: &Endpoint, + payload: String, +) { + let builder = match endpoint.to_request_builder(USER_AGENT) { + Ok(b) => b, + Err(e) => { + debug!("ffe_exposures_flusher: failed to build request: {e:?}"); + return; + } + }; + + let req = match builder + .method(Method::POST) + .header("Content-Type", "application/json") + .header(EVP_SUBDOMAIN_HEADER, EVP_SUBDOMAIN_VALUE) + .body(Bytes::from(payload)) + { + Ok(r) => r, + Err(e) => { + debug!("ffe_exposures_flusher: failed to construct request body: {e:?}"); + return; + } + }; + + match client.request(req).await { + Ok(resp) => { + let status = resp.status(); + if !status.is_success() { + // dd-trace-go logs a readable error body on non-2xx. + let body_preview = truncate(resp.body().as_ref(), 256); + warn!("ffe_exposures_flusher: non-2xx response {status}: {body_preview}"); + } else { + debug!("ffe_exposures_flusher: sent exposure batch, status={status}"); + } + } + Err(e) => { + debug!("ffe_exposures_flusher: request failed: {e:?}"); + } + } +} + +fn truncate(bytes: &[u8], cap: usize) -> String { + let take = bytes.len().min(cap); + String::from_utf8_lossy(&bytes[..take]).into_owned() +} + +#[cfg(test)] +mod tests { + use super::*; + use httpmock::MockServer; + use libdd_capabilities_impl::NativeCapabilities; + + fn endpoint_for(server: &MockServer) -> Endpoint { + Endpoint { + url: server.url("/").parse().unwrap(), + ..Endpoint::default() + } + } + + /// V3: POST hits `/evp_proxy/v2/api/v2/exposures` with the correct + /// subdomain header and application/json content type. Body round-trips. + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn posts_to_evp_proxy() { + let server = MockServer::start_async().await; + let mock = server + .mock_async(|when, then| { + when.method(httpmock::Method::POST) + .path(EVP_EXPOSURES_PATH) + .header(EVP_SUBDOMAIN_HEADER, EVP_SUBDOMAIN_VALUE) + .header("content-type", "application/json"); + then.status(202); + }) + .await; + + let base = endpoint_for(&server); + let ep = exposure_endpoint(&base).unwrap(); + let client = NativeCapabilities::new_client(); + + let payload = + r#"{"context":{"service":"svc","env":"prod","version":"1"},"exposures":[]}"#.to_owned(); + send_payload(&client, &ep, payload.clone()).await; + + mock.assert_async().await; + + // Verify the endpoint was hit exactly once. + let calls = mock.calls_async().await; + assert_eq!(calls, 1); + } + + /// Non-2xx responses are logged and dropped; no panic, no retry. + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn non_2xx_does_not_panic() { + let server = MockServer::start_async().await; + let _mock = server + .mock_async(|when, then| { + when.method(httpmock::Method::POST).path(EVP_EXPOSURES_PATH); + then.status(500).body("intake overloaded"); + }) + .await; + + let base = endpoint_for(&server); + let ep = exposure_endpoint(&base).unwrap(); + let client = NativeCapabilities::new_client(); + send_payload(&client, &ep, "{}".to_owned()).await; + } + + #[test] + fn endpoint_preserves_authority_overrides_path() { + let base = Endpoint { + url: "http://agent.internal:8126/v0.4/traces".parse().unwrap(), + ..Endpoint::default() + }; + let ep = exposure_endpoint(&base).unwrap(); + assert_eq!(ep.url.scheme_str(), Some("http")); + assert_eq!(ep.url.authority().unwrap().as_str(), "agent.internal:8126"); + assert_eq!(ep.url.path(), EVP_EXPOSURES_PATH); + } +} diff --git a/datadog-sidecar/src/service/ffe_metrics_flusher.rs b/datadog-sidecar/src/service/ffe_metrics_flusher.rs new file mode 100644 index 0000000000..d516e63ed2 --- /dev/null +++ b/datadog-sidecar/src/service/ffe_metrics_flusher.rs @@ -0,0 +1,148 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Forwards FFE (Feature Flag Evaluation) metric payloads from the PHP tracer +//! to a user-configured OTLP HTTP metrics intake. +//! +//! Unlike `ffe_exposures_flusher`, which targets the Datadog Agent's EVP proxy, this +//! flusher targets an OpenTelemetry-compatible OTLP metrics endpoint +//! (typically configured via `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT`). The +//! endpoint is supplied per-call by the PHP tracer, not derived from the +//! sidecar session's agent base. PHP encodes the metric series as OTLP/protobuf +//! and the sidecar performs the HTTP POST. + +use http::Method; +use libdd_capabilities::Bytes; +use libdd_capabilities_impl::HttpClientCapability; +use libdd_common::Endpoint; +use tracing::{debug, warn}; + +const USER_AGENT: &str = concat!("ddtrace-php-sidecar/", env!("CARGO_PKG_VERSION")); + +/// Build an `Endpoint` for an OTLP metrics intake from a fully-qualified URL. +/// +/// Production callers supply the URL via the FFI (typically the value of +/// `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT`; the OpenTelemetry spec default is +/// `http://localhost:4318/v1/metrics`). +/// Returns `None` if the URL is unparseable. The OTLP endpoint is unrelated +/// to the Agent base, so we don't preserve any session fields here. +pub(crate) fn otlp_metrics_endpoint(url: &str) -> Option { + let url = url.parse().ok()?; + Some(Endpoint { + url, + ..Endpoint::default() + }) +} + +/// POST a single OTLP/protobuf metrics payload to the configured intake. +/// Fire-and-forget: non-2xx responses and network errors are logged and +/// dropped (matches dd-trace-go/py OTLP exporter behavior). +pub(crate) async fn send_payload( + client: &C, + endpoint: &Endpoint, + payload: Vec, +) { + let builder = match endpoint.to_request_builder(USER_AGENT) { + Ok(b) => b, + Err(e) => { + debug!("ffe_metrics_flusher: failed to build request: {e:?}"); + return; + } + }; + + let req = match builder + .method(Method::POST) + .header("Content-Type", "application/x-protobuf") + .body(Bytes::from(payload)) + { + Ok(r) => r, + Err(e) => { + debug!("ffe_metrics_flusher: failed to construct request body: {e:?}"); + return; + } + }; + + match client.request(req).await { + Ok(resp) => { + let status = resp.status(); + if !status.is_success() { + let body_preview = truncate(resp.body().as_ref(), 256); + warn!("ffe_metrics_flusher: non-2xx response {status}: {body_preview}"); + } else { + debug!("ffe_metrics_flusher: sent metric batch, status={status}"); + } + } + Err(e) => { + debug!("ffe_metrics_flusher: request failed: {e:?}"); + } + } +} + +fn truncate(bytes: &[u8], cap: usize) -> String { + let take = bytes.len().min(cap); + String::from_utf8_lossy(&bytes[..take]).into_owned() +} + +#[cfg(test)] +mod tests { + use super::*; + use httpmock::MockServer; + use libdd_capabilities_impl::NativeCapabilities; + + /// POST hits the configured OTLP metrics path with application/x-protobuf. + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn posts_protobuf_to_configured_endpoint() { + let server = MockServer::start_async().await; + let mock = server + .mock_async(|when, then| { + when.method(httpmock::Method::POST) + .path("/v1/metrics") + .header("content-type", "application/x-protobuf"); + then.status(202); + }) + .await; + + let url = server.url("/v1/metrics"); + let ep = otlp_metrics_endpoint(&url).unwrap(); + let client = NativeCapabilities::new_client(); + + // Tiny but valid protobuf-shaped bytes; the flusher does not inspect + // payload content, it just relays. + let payload = vec![0x0a, 0x00]; + send_payload(&client, &ep, payload).await; + + mock.assert_async().await; + assert_eq!(mock.calls_async().await, 1); + } + + /// Non-2xx responses are logged and dropped; no panic, no retry. + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn non_2xx_does_not_panic() { + let server = MockServer::start_async().await; + let _mock = server + .mock_async(|when, then| { + when.method(httpmock::Method::POST).path("/v1/metrics"); + then.status(500).body("intake overloaded"); + }) + .await; + + let url = server.url("/v1/metrics"); + let ep = otlp_metrics_endpoint(&url).unwrap(); + let client = NativeCapabilities::new_client(); + send_payload(&client, &ep, vec![0u8; 4]).await; + } + + #[test] + fn default_endpoint_is_parseable() { + let ep = otlp_metrics_endpoint("http://localhost:4318/v1/metrics").unwrap(); + assert_eq!(ep.url.scheme_str(), Some("http")); + assert_eq!(ep.url.path(), "/v1/metrics"); + } + + #[test] + fn invalid_url_returns_none() { + assert!(otlp_metrics_endpoint("not a url").is_none()); + } +} diff --git a/datadog-sidecar/src/service/mod.rs b/datadog-sidecar/src/service/mod.rs index bc5930fc78..506f28a3ed 100644 --- a/datadog-sidecar/src/service/mod.rs +++ b/datadog-sidecar/src/service/mod.rs @@ -28,6 +28,8 @@ pub mod agent_info; pub mod blocking; mod debugger_diagnostics_bookkeeper; pub mod exception_hash_rate_limiter; +pub(crate) mod ffe_exposures_flusher; +pub(crate) mod ffe_metrics_flusher; mod instance_id; mod queue_id; mod remote_configs; @@ -82,4 +84,20 @@ pub enum SidecarAction { Telemetry(TelemetryActions), AddTelemetryMetricPoint((String, f64, Vec)), PhpComposerTelemetryFile(PathBuf), + /// FFE exposure payload (a JSON batch envelope produced by + /// `ddog_ffe_flush_exposures`). The sidecar forwards it to the agent's + /// EVP proxy at `/evp_proxy/v2/api/v2/exposures` with the + /// `X-Datadog-EVP-Subdomain: event-platform-intake` header. + FfeExposures(String), + /// FFE evaluation-metric payload (OTLP/protobuf encoded by the PHP-side + /// `OtlpMetricEncoder`). The sidecar POSTs it as `application/x-protobuf` + /// to the user-configured OTLP metrics endpoint (typically + /// `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT`, default + /// `http://localhost:4318/v1/metrics`). The endpoint travels with the + /// payload because OTLP collectors are not the Datadog Agent and the + /// sidecar has no session-level OTLP base. + FfeMetrics { + endpoint: String, + payload: Vec, + }, } diff --git a/datadog-sidecar/src/service/sidecar_server.rs b/datadog-sidecar/src/service/sidecar_server.rs index e4841081a6..e9ef0f0f4c 100644 --- a/datadog-sidecar/src/service/sidecar_server.rs +++ b/datadog-sidecar/src/service/sidecar_server.rs @@ -35,6 +35,8 @@ use crate::service::debugger_diagnostics_bookkeeper::{ DebuggerDiagnosticsBookkeeper, DebuggerDiagnosticsBookkeeperStats, }; use crate::service::exception_hash_rate_limiter::EXCEPTION_HASH_LIMITER; +use crate::service::ffe_exposures_flusher; +use crate::service::ffe_metrics_flusher; use crate::service::remote_configs::{RemoteConfigNotifyTarget, RemoteConfigs}; use crate::service::stats_flusher::{ flush_all_stats_now, get_or_create_concentrator, stats_endpoint, ConcentratorKey, @@ -44,6 +46,7 @@ use crate::service::tracing::trace_flusher::TraceFlusherStats; use crate::tokio_util::run_or_spawn_shared; use datadog_live_debugger::sender::{agent_info_supports_debugger_v2_endpoint, DebuggerType}; use datadog_remote_config::fetch::{ConfigInvariants, ConfigOptions, MultiTargetStats}; +use libdd_capabilities_impl::{HttpClientCapability, NativeCapabilities}; use libdd_common::tag::Tag; use libdd_dogstatsd_client::{new, DogStatsDActionOwned}; use libdd_telemetry::config::Config; @@ -405,6 +408,51 @@ impl SidecarInterface for ConnectionSidecarHandler { trace_config.tracer_version.clone(), ); + // FFE actions are session-scoped, not application-scoped: dispatch them + // before the `applications.entry(queue_id)` check so they are not silently + // dropped when the PHP runtime hasn't yet registered the application via + // remote-config metadata. The PHP exposure/metric writers can fire as soon + // as evaluations begin, which is often earlier than the first RC config + // registration call. + let actions: Vec = actions + .into_iter() + .filter(|a| match a { + SidecarAction::FfeExposures(payload) => { + if let Some(base) = trace_config.endpoint.as_ref() { + if let Some(ep) = ffe_exposures_flusher::exposure_endpoint(base) { + let payload = payload.clone(); + tokio::spawn(async move { + let client = NativeCapabilities::new_client(); + ffe_exposures_flusher::send_payload(&client, &ep, payload).await; + }); + } else { + debug!( + "ffe_exposures_flusher: could not derive endpoint, dropping batch" + ); + } + } else { + debug!("ffe_exposures_flusher: no session endpoint, dropping batch"); + } + false + } + SidecarAction::FfeMetrics { endpoint, payload } => { + if let Some(ep) = ffe_metrics_flusher::otlp_metrics_endpoint(endpoint) { + let payload = payload.clone(); + tokio::spawn(async move { + let client = NativeCapabilities::new_client(); + ffe_metrics_flusher::send_payload(&client, &ep, payload).await; + }); + } else { + debug!( + "ffe_metrics_flusher: unparseable endpoint {endpoint:?}, dropping batch" + ); + } + false + } + _ => true, + }) + .collect(); + let rt_info = self.server.get_runtime(&instance_id); let mut applications = rt_info.lock_applications(); @@ -1075,4 +1123,81 @@ impl SidecarInterface for ConnectionSidecarHandler { } } +#[cfg(test)] +mod tests { + use super::*; + use httpmock::{Method::POST, MockServer}; + use tokio::time::{sleep, Duration as TokioDuration}; + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn ffe_actions_dispatch_without_registered_application() { + let http_server = MockServer::start_async().await; + let exposures_mock = http_server + .mock_async(|when, then| { + when.method(POST) + .path(ffe_exposures_flusher::EVP_EXPOSURES_PATH); + then.status(202); + }) + .await; + let metrics_mock = http_server + .mock_async(|when, then| { + when.method(POST).path("/v1/metrics"); + then.status(202); + }) + .await; + + let handler = ConnectionSidecarHandler::new(SidecarServer::default()); + let instance_id = InstanceId::new("session", "runtime"); + let queue_id = QueueId::from(42); + + handler + .server + .get_session(&instance_id.session_id) + .modify_trace_config(|cfg| { + let endpoint = Endpoint { + url: http_server.url("/").parse().unwrap(), + ..Endpoint::default() + }; + cfg.set_endpoint(endpoint).unwrap(); + }); + + assert!(!handler + .server + .get_runtime(&instance_id) + .lock_applications() + .contains_key(&queue_id)); + + handler + .enqueue_actions( + PeerCredentials::default(), + instance_id.clone(), + queue_id, + vec![ + SidecarAction::FfeExposures(r#"{"exposures":[]}"#.to_owned()), + SidecarAction::FfeMetrics { + endpoint: http_server.url("/v1/metrics"), + payload: vec![0x0a, 0x00], + }, + ], + ) + .await; + + for _ in 0..100 { + if exposures_mock.calls_async().await == 1 && metrics_mock.calls_async().await == 1 { + break; + } + sleep(TokioDuration::from_millis(10)).await; + } + + exposures_mock.assert_async().await; + metrics_mock.assert_async().await; + assert!(!handler + .server + .get_runtime(&instance_id) + .lock_applications() + .contains_key(&queue_id)); + } +} + // TODO: APMSP-1079 - Unit tests are sparse for the sidecar server. We should add more. diff --git a/datadog-sidecar/src/service/telemetry.rs b/datadog-sidecar/src/service/telemetry.rs index 201f72fb33..8a48c57612 100644 --- a/datadog-sidecar/src/service/telemetry.rs +++ b/datadog-sidecar/src/service/telemetry.rs @@ -454,6 +454,8 @@ impl TelemetryCachedClient { } } SidecarAction::PhpComposerTelemetryFile(_) => {} // handled separately + SidecarAction::FfeExposures(_) => {} // handled in sidecar_server + SidecarAction::FfeMetrics { .. } => {} // handled in sidecar_server } } actions