diff --git a/profiles/opentelemetry_dev/compose.yaml b/profiles/opentelemetry_dev/compose.yaml index d0f85c1..d4fcbf8 100644 --- a/profiles/opentelemetry_dev/compose.yaml +++ b/profiles/opentelemetry_dev/compose.yaml @@ -2,14 +2,12 @@ services: jaeger: container_name: jaeger - image: jaegertracing/all-in-one:latest + image: jaegertracing/jaeger:latest environment: # COLLECTOR_OTLP_ENABLED: "true" - JAEGER_DISABLED: "true" + # JAEGER_DISABLED: "true" ports: - "16686:16686" - - "14250:14250" - - "14268:14268" - "4317:4317" otel-collector: @@ -20,6 +18,7 @@ services: - "{OCI_ENV_DIR}/profiles/opentelemetry_dev/otel-collector:/etc/otel-collector" ports: - "1888" # pprof extension + - "8125:8125" - "8888:8888" # Prometheus metrics exposed by the collector - "8889:8889" # Prometheus exporter metrics - "13133:13133" # health_check extension diff --git a/profiles/opentelemetry_dev/otel-collector/otel-collector-config.yaml b/profiles/opentelemetry_dev/otel-collector/otel-collector-config.yaml index 952643a..79b4543 100644 --- a/profiles/opentelemetry_dev/otel-collector/otel-collector-config.yaml +++ b/profiles/opentelemetry_dev/otel-collector/otel-collector-config.yaml @@ -9,29 +9,46 @@ receivers: - https://* processors: - batch: + memory_limiter: - check_interval: 1s - limit_mib: 4000 - transform: - error_mode: ignore - metric_statements: - - context: metric - statements: - - set(description, "Duration of HTTP server requests.") where name == "http.server.duration" - - set(description, "Number of active HTTP server requests.") where name == "http.server.active_requests" + check_interval: 2s + limit_mib: 200 + + filter/filter_pulp_api_request_duration: + metrics: + metric: + - 'not (name == "api.request_duration")' + + filter/exclude_pulp_api: + metrics: + metric: + - 'name == "api.request_duration"' + + batch/api_aggregation: + timeout: 10s + + attributes/remove_worker_name: + actions: + - key: worker.name + action: delete + + groupbyattrs/api_aggregation: + keys: + - api.request_duration + + batch: exporters: prometheus: endpoint: 0.0.0.0:8889 # send_timestamps: true namespace: pulp - logging: - verbosity: detailed otlp/jaeger: endpoint: jaeger:4317 tls: insecure: true + debug: + verbosity: detailed extensions: health_check: @@ -46,11 +63,21 @@ service: receivers: [otlp] processors: [memory_limiter, batch] exporters: [otlp/jaeger] - metrics: + + metrics/aggregation: receivers: [otlp] - processors: [transform, batch, memory_limiter] - exporters: [logging, prometheus] - logs: + processors: + - memory_limiter + - filter/filter_pulp_api_request_duration + - attributes/remove_worker_name + - batch/api_aggregation + - groupbyattrs/api_aggregation + exporters: [prometheus] + + metrics/main: receivers: [otlp] - processors: [memory_limiter, batch] - exporters: [logging] + processors: + - memory_limiter + - filter/exclude_pulp_api + - batch + exporters: [prometheus] diff --git a/profiles/opentelemetry_dev/prometheus/prometheus.yml b/profiles/opentelemetry_dev/prometheus/prometheus.yml index 44ba431..57e637f 100644 --- a/profiles/opentelemetry_dev/prometheus/prometheus.yml +++ b/profiles/opentelemetry_dev/prometheus/prometheus.yml @@ -1,7 +1,7 @@ # my global config global: - # scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. - # evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). # Alertmanager configuration