diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 5138d991b7..d3dc3f9ff9 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -24,7 +24,7 @@ jobs: - "sample-operators/tomcat-operator" - "sample-operators/webpage" - "sample-operators/leader-election" - - "sample-operators/metrics-processing" + - "sample-operators/operations" runs-on: ubuntu-latest steps: - name: Checkout diff --git a/docs/content/en/blog/releases/v5-3-release.md b/docs/content/en/blog/releases/v5-3-release.md index 12b9bfd30e..632d82e5b1 100644 --- a/docs/content/en/blog/releases/v5-3-release.md +++ b/docs/content/en/blog/releases/v5-3-release.md @@ -97,7 +97,7 @@ A ready-to-use **Grafana dashboard** is included at [`observability/josdk-operator-metrics-dashboard.json`](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/observability/josdk-operator-metrics-dashboard.json). The -[`metrics-processing` sample operator](https://github.com/java-operator-sdk/java-operator-sdk/tree/main/sample-operators/metrics-processing) +[`operations` sample operator](https://github.com/java-operator-sdk/java-operator-sdk/tree/main/sample-operators/operations) provides a complete end-to-end setup with Prometheus, Grafana, and an OpenTelemetry Collector, installable via `observability/install-observability.sh`. This is a good starting point for verifying metrics in a real cluster. diff --git a/docs/content/en/docs/documentation/operations/_index.md b/docs/content/en/docs/documentation/operations/_index.md index 1056b33c24..82dcde49f1 100644 --- a/docs/content/en/docs/documentation/operations/_index.md +++ b/docs/content/en/docs/documentation/operations/_index.md @@ -4,3 +4,7 @@ weight: 80 --- This section covers operations-related features for running and managing operators in production. + +See the +[`operations` sample operator](https://github.com/java-operator-sdk/java-operator-sdk/tree/main/sample-operators/operations) +for a complete working example that demonstrates health probes, metrics, and Helm-based deployment. diff --git a/docs/content/en/docs/documentation/operations/health-probes.md b/docs/content/en/docs/documentation/operations/health-probes.md new file mode 100644 index 0000000000..6766c59be0 --- /dev/null +++ b/docs/content/en/docs/documentation/operations/health-probes.md @@ -0,0 +1,111 @@ +--- +title: Health Probes +weight: 85 +--- + +Operators running in Kubernetes should expose health probe endpoints so that the kubelet can detect startup +failures and runtime degradation. JOSDK provides the building blocks through its +[`RuntimeInfo`](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/RuntimeInfo.java) +API. + +## RuntimeInfo + +`RuntimeInfo` is available via `operator.getRuntimeInfo()` and exposes: + +| Method | Purpose | +|---|---| +| `isStarted()` | `true` once the operator and all its controllers have fully started | +| `allEventSourcesAreHealthy()` | `true` when every registered event source (informers, polling sources, etc.) reports a healthy status | +| `unhealthyEventSources()` | returns a map of controller name → unhealthy event sources, useful for diagnostics | +| `unhealthyInformerWrappingEventSourceHealthIndicator()` | returns a map of controller name → unhealthy informer-wrapping event sources, each exposing per-informer details via `InformerHealthIndicator` (`hasSynced()`, `isWatching()`, `isRunning()`, `getTargetNamespace()`) | + +In most cases a single readiness probe backed by `allEventSourcesAreHealthy()` is sufficient: before the +operator has fully started the informers will not have synced yet, so the check naturally covers the startup +case as well. Once running, it detects runtime degradation such as a lost watch connection. + +### Fine-Grained Informer Diagnostics + +For advanced use cases — such as exposing per-informer health in a diagnostic endpoint or logging which +specific namespace lost its watch — `unhealthyInformerWrappingEventSourceHealthIndicator()` gives access to +individual `InformerHealthIndicator` instances. Each indicator exposes `hasSynced()`, `isWatching()`, +`isRunning()`, and `getTargetNamespace()`. This is typically not needed for a standard health probe but can +be valuable for operational dashboards or troubleshooting. + +## Setting Up a Probe Endpoint + +The example below uses [Jetty](https://eclipse.dev/jetty/) to expose a `/healthz` endpoint. Any HTTP +server library works — the key is calling the `RuntimeInfo` methods to determine the response code. + +```java +import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.server.handler.ContextHandler; + +Operator operator = new Operator(); +operator.register(new MyReconciler()); + +// start the health server before the operator so probes can be queried during startup +var health = new ContextHandler(new HealthHandler(operator), "/healthz"); +Server server = new Server(8080); +server.setHandler(health); +server.start(); + +operator.start(); +``` + +Where `HealthHandler` extends `org.eclipse.jetty.server.Handler.Abstract` and checks +`operator.getRuntimeInfo().allEventSourcesAreHealthy()`. + +See the +[`operations` sample operator](https://github.com/java-operator-sdk/java-operator-sdk/tree/main/sample-operators/operations) +for a complete working example. + +## Kubernetes Deployment Configuration + +Once your operator exposes the probe endpoint, configure probes in your Deployment manifest. Both the +startup and readiness probes can point to the same `/healthz` endpoint — the startup probe simply uses a +higher `failureThreshold` to give the operator time to initialize: + +```yaml +containers: +- name: operator + ports: + - name: probes + containerPort: 8080 + startupProbe: + httpGet: + path: /healthz + port: probes + initialDelaySeconds: 1 + periodSeconds: 3 + failureThreshold: 20 + readinessProbe: + httpGet: + path: /healthz + port: probes + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 3 +``` + +The startup probe gives the operator time to start (up to ~60 s with the settings above). Once the startup +probe succeeds, the readiness probe takes over and will mark the pod as not-ready if any event source +becomes unhealthy. + +## Helm Chart Support + +The [generic Helm chart](/docs/documentation/operations/helm-chart) supports health probes out of the box. +Enable them in your `values.yaml`: + +```yaml +probes: + port: 8080 + startup: + enabled: true + path: /healthz + readiness: + enabled: true + path: /healthz +``` + +All probe timing parameters (`initialDelaySeconds`, `periodSeconds`, `failureThreshold`) have sensible +defaults and can be overridden. diff --git a/docs/content/en/docs/documentation/operations/helm-chart.md b/docs/content/en/docs/documentation/operations/helm-chart.md index 1758ac20af..a0901f31f7 100644 --- a/docs/content/en/docs/documentation/operations/helm-chart.md +++ b/docs/content/en/docs/documentation/operations/helm-chart.md @@ -11,7 +11,7 @@ patterns so you don't have to write a chart from scratch. The chart is maintaine Contributions are more than welcome. The chart is used in the -[`metrics-processing` sample operator E2E test](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/sample-operators/metrics-processing/src/test/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingE2E.java) +[`operations` sample operator E2E test](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/sample-operators/operations/src/test/java/io/javaoperatorsdk/operator/sample/operations/OperationsE2E.java) to deploy the operator to a cluster via Helm. ## What the Chart Provides @@ -80,16 +80,16 @@ for all available options. ## Usage Example -A working example of how to use the chart can be found in the metrics-processing sample operator's -[`helm-values.yaml`](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/sample-operators/metrics-processing/src/test/resources/helm-values.yaml): +A working example of how to use the chart can be found in the operations sample operator's +[`helm-values.yaml`](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/sample-operators/operations/src/test/resources/helm-values.yaml): ```yaml image: - repository: metrics-processing-operator + repository: operations-operator pullPolicy: Never tag: "latest" -nameOverride: "metrics-processing-operator" +nameOverride: "operations-operator" resources: {} diff --git a/docs/content/en/docs/documentation/operations/metrics.md b/docs/content/en/docs/documentation/operations/metrics.md index fc40070e46..1bf8e38368 100644 --- a/docs/content/en/docs/documentation/operations/metrics.md +++ b/docs/content/en/docs/documentation/operations/metrics.md @@ -103,9 +103,9 @@ observability sample (see below). #### Exploring metrics end-to-end The -[`metrics-processing` sample operator](https://github.com/java-operator-sdk/java-operator-sdk/tree/main/sample-operators/metrics-processing) +[`operations` sample operator](https://github.com/java-operator-sdk/java-operator-sdk/tree/main/sample-operators/operations) includes a full end-to-end test, -[`MetricsHandlingE2E`](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/sample-operators/metrics-processing/src/test/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingE2E.java), +[`OperationsE2E`](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/sample-operators/operations/src/test/java/io/javaoperatorsdk/operator/sample/metrics/OperationsE2E.java), that: 1. Installs a local observability stack (Prometheus, Grafana, OpenTelemetry Collector) via diff --git a/helm/generic-helm-chart/templates/deployment.yaml b/helm/generic-helm-chart/templates/deployment.yaml index dd06916155..e4ae86a930 100644 --- a/helm/generic-helm-chart/templates/deployment.yaml +++ b/helm/generic-helm-chart/templates/deployment.yaml @@ -54,6 +54,42 @@ spec: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ required "A valid .Values.image.repository is required" .Values.image.repository }}:{{ include "generic-operator.imageTag" . }}" imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- if or .Values.probes.startup.enabled .Values.probes.readiness.enabled .Values.probes.liveness.enabled }} + ports: + - name: probes + containerPort: {{ .Values.probes.port }} + protocol: TCP + {{- end }} + {{- if .Values.probes.startup.enabled }} + startupProbe: + httpGet: + path: {{ .Values.probes.startup.path }} + port: probes + initialDelaySeconds: {{ .Values.probes.startup.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.startup.periodSeconds }} + timeoutSeconds: {{ .Values.probes.startup.timeoutSeconds }} + failureThreshold: {{ .Values.probes.startup.failureThreshold }} + {{- end }} + {{- if .Values.probes.readiness.enabled }} + readinessProbe: + httpGet: + path: {{ .Values.probes.readiness.path }} + port: probes + initialDelaySeconds: {{ .Values.probes.readiness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.readiness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.readiness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.readiness.failureThreshold }} + {{- end }} + {{- if .Values.probes.liveness.enabled }} + livenessProbe: + httpGet: + path: {{ .Values.probes.liveness.path }} + port: probes + initialDelaySeconds: {{ .Values.probes.liveness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.liveness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.liveness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.liveness.failureThreshold }} + {{- end }} env: - name: OPERATOR_NAMESPACE valueFrom: diff --git a/helm/generic-helm-chart/tests/deployment_test.yaml b/helm/generic-helm-chart/tests/deployment_test.yaml index bdc8845ae9..a57f58dd13 100644 --- a/helm/generic-helm-chart/tests/deployment_test.yaml +++ b/helm/generic-helm-chart/tests/deployment_test.yaml @@ -288,3 +288,57 @@ tests: - equal: path: spec.template.spec.serviceAccountName value: my-operator + + - it: should not include probes by default + asserts: + - isNull: + path: spec.template.spec.containers[0].startupProbe + - isNull: + path: spec.template.spec.containers[0].readinessProbe + + - it: should add startup probe when enabled + documentSelector: + path: kind + value: Deployment + set: + probes.startup.enabled: true + asserts: + - equal: + path: spec.template.spec.containers[0].startupProbe.httpGet.path + value: /health/startup + - equal: + path: spec.template.spec.containers[0].startupProbe.httpGet.port + value: probes + - contains: + path: spec.template.spec.containers[0].ports + content: + name: probes + containerPort: 8080 + protocol: TCP + + - it: should add readiness probe when enabled + documentSelector: + path: kind + value: Deployment + set: + probes.readiness.enabled: true + asserts: + - equal: + path: spec.template.spec.containers[0].readinessProbe.httpGet.path + value: /health/ready + - equal: + path: spec.template.spec.containers[0].readinessProbe.httpGet.port + value: probes + + - it: should add both probes when both enabled + documentSelector: + path: kind + value: Deployment + set: + probes.startup.enabled: true + probes.readiness.enabled: true + asserts: + - isNotNull: + path: spec.template.spec.containers[0].startupProbe + - isNotNull: + path: spec.template.spec.containers[0].readinessProbe diff --git a/helm/generic-helm-chart/values.yaml b/helm/generic-helm-chart/values.yaml index 8ab452059c..b398a6357d 100644 --- a/helm/generic-helm-chart/values.yaml +++ b/helm/generic-helm-chart/values.yaml @@ -86,6 +86,9 @@ operatorConfig: + + + @@ -128,3 +131,32 @@ extraVolumeMounts: [] # RBAC configuration rbac: create: true + +# Health probes configuration +probes: + port: 8080 + startup: + enabled: false + path: /health/startup + initialDelaySeconds: 1 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 20 + readiness: + enabled: false + path: /health/ready + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 3 +# We provide an option to specify liveness probes. +# However, the framework itself does not define any runtime +# information what such probe should check. The only purpose here +# is to cover your domain specific use case. + liveness: + enabled: false + path: /health/live + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/observability/install-observability.sh b/observability/install-observability.sh index dd57e7b352..00df607ca3 100755 --- a/observability/install-observability.sh +++ b/observability/install-observability.sh @@ -237,7 +237,7 @@ kubectl wait --for=condition=ready pod --all -n cert-manager --timeout=300s 2>/d # Wait for observability pods echo -e "${YELLOW}Checking observability pods...${NC}" -kubectl wait --for=condition=ready pod --all -n observability --timeout=300s +kubectl wait --for=condition=ready pod --all -n observability --timeout=480s echo -e "${GREEN}✓ All pods are ready${NC}" diff --git a/operator-framework-junit/src/main/java/io/javaoperatorsdk/operator/junit/AbstractOperatorExtension.java b/operator-framework-junit/src/main/java/io/javaoperatorsdk/operator/junit/AbstractOperatorExtension.java index 0609850713..b11853c331 100644 --- a/operator-framework-junit/src/main/java/io/javaoperatorsdk/operator/junit/AbstractOperatorExtension.java +++ b/operator-framework-junit/src/main/java/io/javaoperatorsdk/operator/junit/AbstractOperatorExtension.java @@ -33,6 +33,7 @@ import org.slf4j.LoggerFactory; import io.fabric8.kubernetes.api.model.*; +import io.fabric8.kubernetes.api.model.apps.Deployment; import io.fabric8.kubernetes.client.KubernetesClient; import io.fabric8.kubernetes.client.KubernetesClientBuilder; import io.fabric8.kubernetes.client.dsl.NonNamespaceOperation; @@ -323,4 +324,122 @@ public AbstractBuilder withPerClassNamespaceNameSupplier( return this; } } + + public void logDiagnosticInfo(String namespace) { + logDiagnosticInfo(getInfrastructureKubernetesClient(), namespace); + } + + public void logDiagnosticInfo(KubernetesClient client, String namespace) { + try { + // Log deployment status + var deployments = client.apps().deployments().inNamespace(namespace).list().getItems(); + for (Deployment deployment : deployments) { + var status = deployment.getStatus(); + LOGGER.error( + "Deployment '{}': replicas={}, readyReplicas={}, availableReplicas={}," + + " unavailableReplicas={}, conditions={}", + deployment.getMetadata().getName(), + status != null ? status.getReplicas() : "null", + status != null ? status.getReadyReplicas() : "null", + status != null ? status.getAvailableReplicas() : "null", + status != null ? status.getUnavailableReplicas() : "null", + status != null ? status.getConditions() : "null"); + } + + // Log pod status and container details + var pods = client.pods().inNamespace(namespace).list().getItems(); + for (Pod pod : pods) { + var podStatus = pod.getStatus(); + LOGGER.error( + "Pod '{}': phase={}, reason={}, message={}", + pod.getMetadata().getName(), + podStatus != null ? podStatus.getPhase() : "null", + podStatus != null ? podStatus.getReason() : "null", + podStatus != null ? podStatus.getMessage() : "null"); + + if (podStatus != null && podStatus.getContainerStatuses() != null) { + for (ContainerStatus cs : podStatus.getContainerStatuses()) { + LOGGER.error( + " Container '{}': ready={}, restartCount={}, state={}", + cs.getName(), + cs.getReady(), + cs.getRestartCount(), + cs.getState()); + } + } + if (podStatus != null && podStatus.getInitContainerStatuses() != null) { + for (ContainerStatus cs : podStatus.getInitContainerStatuses()) { + LOGGER.error( + " InitContainer '{}': ready={}, restartCount={}, state={}", + cs.getName(), + cs.getReady(), + cs.getRestartCount(), + cs.getState()); + } + } + + // Log pod events + var events = + client + .v1() + .events() + .inNamespace(namespace) + .withField("involvedObject.name", pod.getMetadata().getName()) + .list() + .getItems(); + for (var event : events) { + LOGGER.error( + " Event: type={}, reason={}, message={}", + event.getType(), + event.getReason(), + event.getMessage()); + } + + // Try to get container logs + try { + String logs = + client + .pods() + .inNamespace(namespace) + .withName(pod.getMetadata().getName()) + .tailingLines(50) + .getLog(); + if (logs != null && !logs.isEmpty()) { + LOGGER.error(" Logs for pod '{}':\n{}", pod.getMetadata().getName(), logs); + } + } catch (Exception logEx) { + LOGGER.error( + " Could not retrieve logs for pod '{}'", pod.getMetadata().getName(), logEx); + } + } + + if (pods.isEmpty()) { + LOGGER.error( + "No pods found in namespace '{}'. The deployment may have failed to" + + " create pods. Check if the image exists and is pullable.", + namespace); + + // Log deployment events when no pods exist + for (Deployment deployment : deployments) { + var events = + client + .v1() + .events() + .inNamespace(namespace) + .withField("involvedObject.name", deployment.getMetadata().getName()) + .list() + .getItems(); + for (var event : events) { + LOGGER.error( + " Deployment event: type={}, reason={}, message={}", + event.getType(), + event.getReason(), + event.getMessage()); + } + } + } + } catch (Exception diagEx) { + LOGGER.error("Failed to collect diagnostic info: {}", diagEx.getMessage(), diagEx); + } + } } diff --git a/operator-framework-junit/src/main/java/io/javaoperatorsdk/operator/junit/ClusterDeployedOperatorExtension.java b/operator-framework-junit/src/main/java/io/javaoperatorsdk/operator/junit/ClusterDeployedOperatorExtension.java index 8e4e3d64d6..4bbfa3258d 100644 --- a/operator-framework-junit/src/main/java/io/javaoperatorsdk/operator/junit/ClusterDeployedOperatorExtension.java +++ b/operator-framework-junit/src/main/java/io/javaoperatorsdk/operator/junit/ClusterDeployedOperatorExtension.java @@ -31,10 +31,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import io.fabric8.kubernetes.api.model.ContainerStatus; import io.fabric8.kubernetes.api.model.HasMetadata; -import io.fabric8.kubernetes.api.model.Pod; -import io.fabric8.kubernetes.api.model.apps.Deployment; import io.fabric8.kubernetes.api.model.rbac.ClusterRoleBinding; import io.fabric8.kubernetes.client.KubernetesClient; import io.fabric8.kubernetes.client.KubernetesClientBuilder; @@ -119,7 +116,8 @@ protected void applyCrds(ExtensionContext context) { Thread.sleep(CRD_READY_WAIT); // readiness is not applicable for CRD, just wait a little var crdList = crd.get(); LOGGER.debug( - "Applied CRD with name: {}", + "Applied CRD from path: {} name: {}", + crdFile.getPath(), (crdList != null && !crdList.isEmpty() && crdList.get(0) != null) ? crdList.get(0).getMetadata().getName() : crdFile.getName()); @@ -154,7 +152,11 @@ protected void before(ExtensionContext context) { .resourceList(operatorDeployment) .waitUntilReady(operatorDeploymentTimeout.toMillis(), TimeUnit.MILLISECONDS); } catch (KubernetesClientTimeoutException e) { - logDiagnosticInfo(kubernetesClient); + LOGGER.error( + "Operator deployment timed out after {} seconds in namespace: {}", + operatorDeploymentTimeout.getSeconds(), + namespace); + logDiagnosticInfo(getInfrastructureKubernetesClient(), namespace); throw e; } LOGGER.debug("Operator resources deployed."); @@ -168,125 +170,6 @@ protected void deleteOperator() { .delete(); } - private void logDiagnosticInfo(KubernetesClient kubernetesClient) { - LOGGER.error( - "Operator deployment timed out after {} seconds in namespace: {}", - operatorDeploymentTimeout.getSeconds(), - namespace); - try { - // Log deployment status - var deployments = - kubernetesClient.apps().deployments().inNamespace(namespace).list().getItems(); - for (Deployment deployment : deployments) { - var status = deployment.getStatus(); - LOGGER.error( - "Deployment '{}': replicas={}, readyReplicas={}, availableReplicas={}," - + " unavailableReplicas={}, conditions={}", - deployment.getMetadata().getName(), - status != null ? status.getReplicas() : "null", - status != null ? status.getReadyReplicas() : "null", - status != null ? status.getAvailableReplicas() : "null", - status != null ? status.getUnavailableReplicas() : "null", - status != null ? status.getConditions() : "null"); - } - - // Log pod status and container details - var pods = kubernetesClient.pods().inNamespace(namespace).list().getItems(); - for (Pod pod : pods) { - var podStatus = pod.getStatus(); - LOGGER.error( - "Pod '{}': phase={}, reason={}, message={}", - pod.getMetadata().getName(), - podStatus != null ? podStatus.getPhase() : "null", - podStatus != null ? podStatus.getReason() : "null", - podStatus != null ? podStatus.getMessage() : "null"); - - if (podStatus != null && podStatus.getContainerStatuses() != null) { - for (ContainerStatus cs : podStatus.getContainerStatuses()) { - LOGGER.error( - " Container '{}': ready={}, restartCount={}, state={}", - cs.getName(), - cs.getReady(), - cs.getRestartCount(), - cs.getState()); - } - } - if (podStatus != null && podStatus.getInitContainerStatuses() != null) { - for (ContainerStatus cs : podStatus.getInitContainerStatuses()) { - LOGGER.error( - " InitContainer '{}': ready={}, restartCount={}, state={}", - cs.getName(), - cs.getReady(), - cs.getRestartCount(), - cs.getState()); - } - } - - // Log pod events - var events = - kubernetesClient - .v1() - .events() - .inNamespace(namespace) - .withField("involvedObject.name", pod.getMetadata().getName()) - .list() - .getItems(); - for (var event : events) { - LOGGER.error( - " Event: type={}, reason={}, message={}", - event.getType(), - event.getReason(), - event.getMessage()); - } - - // Try to get container logs - try { - String logs = - kubernetesClient - .pods() - .inNamespace(namespace) - .withName(pod.getMetadata().getName()) - .tailingLines(50) - .getLog(); - if (logs != null && !logs.isEmpty()) { - LOGGER.error(" Logs for pod '{}':\n{}", pod.getMetadata().getName(), logs); - } - } catch (Exception logEx) { - LOGGER.error( - " Could not retrieve logs for pod '{}'", pod.getMetadata().getName(), logEx); - } - } - - if (pods.isEmpty()) { - LOGGER.error( - "No pods found in namespace '{}'. The deployment may have failed to" - + " create pods. Check if the image exists and is pullable.", - namespace); - - // Log deployment events when no pods exist - for (Deployment deployment : deployments) { - var events = - kubernetesClient - .v1() - .events() - .inNamespace(namespace) - .withField("involvedObject.name", deployment.getMetadata().getName()) - .list() - .getItems(); - for (var event : events) { - LOGGER.error( - " Deployment event: type={}, reason={}, message={}", - event.getType(), - event.getReason(), - event.getMessage()); - } - } - } - } catch (Exception diagEx) { - LOGGER.error("Failed to collect diagnostic info: {}", diagEx.getMessage(), diagEx); - } - } - public static class Builder extends AbstractBuilder { private final List operatorDeployment; private Duration deploymentTimeout; diff --git a/sample-operators/metrics-processing/pom.xml b/sample-operators/operations/pom.xml similarity index 90% rename from sample-operators/metrics-processing/pom.xml rename to sample-operators/operations/pom.xml index c67f623e33..8667604f96 100644 --- a/sample-operators/metrics-processing/pom.xml +++ b/sample-operators/operations/pom.xml @@ -25,10 +25,10 @@ 5.3.4-SNAPSHOT - sample-metrics-processing + sample-operations jar - Operator SDK - Samples - Metrics processing - Showcases to handle metrics setup and deploys related tooling and dashboards + Operator SDK - Samples - Operations + Showcases operations related features setup like metrics, and deploys related tooling and dashboards @@ -82,6 +82,11 @@ awaitility compile + + org.eclipse.jetty + jetty-server + 12.1.8 + io.javaoperatorsdk operator-framework-junit @@ -99,7 +104,7 @@ gcr.io/distroless/java17-debian11 - metrics-processing-operator + operations-operator diff --git a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/AbstractMetricsHandlingReconciler.java b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/AbstractOperationsReconciler.java similarity index 77% rename from sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/AbstractMetricsHandlingReconciler.java rename to sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/AbstractOperationsReconciler.java index df83afdd6b..5f380da3fd 100644 --- a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/AbstractMetricsHandlingReconciler.java +++ b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/AbstractOperationsReconciler.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.javaoperatorsdk.operator.sample.metrics; +package io.javaoperatorsdk.operator.sample.operations; import java.util.concurrent.ThreadLocalRandom; @@ -24,19 +24,18 @@ import io.javaoperatorsdk.operator.api.reconciler.Context; import io.javaoperatorsdk.operator.api.reconciler.Reconciler; import io.javaoperatorsdk.operator.api.reconciler.UpdateControl; -import io.javaoperatorsdk.operator.sample.metrics.customresource.MetricsHandlingSpec; -import io.javaoperatorsdk.operator.sample.metrics.customresource.MetricsHandlingStatus; +import io.javaoperatorsdk.operator.sample.operations.customresource.OperationsSpec; +import io.javaoperatorsdk.operator.sample.operations.customresource.OperationsStatus; -public abstract class AbstractMetricsHandlingReconciler< - R extends CustomResource> +public abstract class AbstractOperationsReconciler< + R extends CustomResource> implements Reconciler { - private static final Logger log = - LoggerFactory.getLogger(AbstractMetricsHandlingReconciler.class); + private static final Logger log = LoggerFactory.getLogger(AbstractOperationsReconciler.class); private final long sleepMillis; - protected AbstractMetricsHandlingReconciler(long sleepMillis) { + protected AbstractOperationsReconciler(long sleepMillis) { this.sleepMillis = sleepMillis; } @@ -59,7 +58,7 @@ public UpdateControl reconcile(R resource, Context context) { var status = resource.getStatus(); if (status == null) { - status = new MetricsHandlingStatus(); + status = new OperationsStatus(); resource.setStatus(status); } diff --git a/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/HealthHandler.java b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/HealthHandler.java new file mode 100644 index 0000000000..156930b7f3 --- /dev/null +++ b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/HealthHandler.java @@ -0,0 +1,57 @@ +/* + * Copyright Java Operator SDK Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.javaoperatorsdk.operator.sample.operations; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; + +import org.eclipse.jetty.server.Handler; +import org.eclipse.jetty.server.Request; +import org.eclipse.jetty.server.Response; +import org.eclipse.jetty.util.Callback; + +import io.javaoperatorsdk.operator.Operator; + +/** + * Combined health endpoint that checks whether all event sources (informers, polling sources, etc.) + * are healthy. Before the operator has fully started the informers will not have synced yet, so + * this endpoint naturally covers the startup case as well. + */ +public class HealthHandler extends Handler.Abstract { + + private final Operator operator; + + public HealthHandler(Operator operator) { + this.operator = operator; + } + + @Override + public boolean handle(Request request, Response response, Callback callback) { + var runtimeInfo = operator.getRuntimeInfo(); + if (runtimeInfo.isStarted() && runtimeInfo.allEventSourcesAreHealthy()) { + sendMessage(response, 200, "healthy", callback); + } else { + sendMessage(response, 503, "not healthy", callback); + } + return true; + } + + static void sendMessage(Response response, int code, String message, Callback callback) { + response.setStatus(code); + response.getHeaders().put("Content-Type", "text/plain; charset=utf-8"); + response.write(true, ByteBuffer.wrap(message.getBytes(StandardCharsets.UTF_8)), callback); + } +} diff --git a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingReconciler1.java b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/OperationsReconciler1.java similarity index 64% rename from sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingReconciler1.java rename to sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/OperationsReconciler1.java index 3234deedaf..1720169c38 100644 --- a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingReconciler1.java +++ b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/OperationsReconciler1.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.javaoperatorsdk.operator.sample.metrics; +package io.javaoperatorsdk.operator.sample.operations; import java.util.List; @@ -24,35 +24,34 @@ import io.javaoperatorsdk.operator.processing.event.ResourceID; import io.javaoperatorsdk.operator.processing.event.source.EventSource; import io.javaoperatorsdk.operator.processing.event.source.timer.TimerEventSource; -import io.javaoperatorsdk.operator.sample.metrics.customresource.MetricsHandlingCustomResource1; +import io.javaoperatorsdk.operator.sample.operations.customresource.OperationsCustomResource1; -import static io.javaoperatorsdk.operator.sample.metrics.MetricsHandlingReconciler1.NAME; +import static io.javaoperatorsdk.operator.sample.operations.OperationsReconciler1.NAME; @ControllerConfiguration(name = NAME) -public class MetricsHandlingReconciler1 - extends AbstractMetricsHandlingReconciler { +public class OperationsReconciler1 extends AbstractOperationsReconciler { - public static final String NAME = "MetricsHandlingReconciler1"; + public static final String NAME = "OperationsReconciler1"; private static final long TIMER_DELAY = 5000; - private final TimerEventSource timerEventSource; + private final TimerEventSource timerEventSource; - public MetricsHandlingReconciler1() { + public OperationsReconciler1() { super(100); timerEventSource = new TimerEventSource<>(); } @SuppressWarnings("unchecked") @Override - public List> prepareEventSources( - EventSourceContext context) { + public List> prepareEventSources( + EventSourceContext context) { return List.of((EventSource) timerEventSource); } @Override - public UpdateControl reconcile( - MetricsHandlingCustomResource1 resource, Context context) { + public UpdateControl reconcile( + OperationsCustomResource1 resource, Context context) { var result = super.reconcile(resource, context); timerEventSource.scheduleOnce(ResourceID.fromResource(resource), TIMER_DELAY); return result; diff --git a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingReconciler2.java b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/OperationsReconciler2.java similarity index 66% rename from sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingReconciler2.java rename to sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/OperationsReconciler2.java index 0484d2848e..9565296bd0 100644 --- a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingReconciler2.java +++ b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/OperationsReconciler2.java @@ -13,18 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.javaoperatorsdk.operator.sample.metrics; +package io.javaoperatorsdk.operator.sample.operations; import io.javaoperatorsdk.operator.api.reconciler.ControllerConfiguration; -import io.javaoperatorsdk.operator.sample.metrics.customresource.MetricsHandlingCustomResource2; +import io.javaoperatorsdk.operator.sample.operations.customresource.OperationsCustomResource2; @ControllerConfiguration -public class MetricsHandlingReconciler2 - extends AbstractMetricsHandlingReconciler { +public class OperationsReconciler2 extends AbstractOperationsReconciler { - public static final String NAME = "MetricsHandlingReconciler2"; + public static final String NAME = "OperationsReconciler2"; - public MetricsHandlingReconciler2() { + public OperationsReconciler2() { super(150); } } diff --git a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingSampleOperator.java b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/OperationsSampleOperator.java similarity index 87% rename from sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingSampleOperator.java rename to sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/OperationsSampleOperator.java index 2c6b9c3e90..ba1cbec10a 100644 --- a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingSampleOperator.java +++ b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/OperationsSampleOperator.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.javaoperatorsdk.operator.sample.metrics; +package io.javaoperatorsdk.operator.sample.operations; import java.io.IOException; import java.io.InputStream; @@ -23,6 +23,8 @@ import java.util.HashMap; import java.util.Map; +import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.server.handler.ContextHandler; import org.jspecify.annotations.NonNull; import org.jspecify.annotations.Nullable; import org.slf4j.Logger; @@ -51,15 +53,15 @@ import io.micrometer.registry.otlp.OtlpConfig; import io.micrometer.registry.otlp.OtlpMeterRegistry; -public class MetricsHandlingSampleOperator { +public class OperationsSampleOperator { - private static final Logger log = LoggerFactory.getLogger(MetricsHandlingSampleOperator.class); + private static final Logger log = LoggerFactory.getLogger(OperationsSampleOperator.class); /** * Based on env variables a different flavor of Reconciler is used, showcasing how the same logic * can be implemented using the low level and higher level APIs. */ - public static void main(String[] args) { + public static void main(String[] args) throws Exception { log.info("Metrics Handling Sample Operator starting!"); var configProviders = new ArrayList(); @@ -71,11 +73,17 @@ public static void main(String[] args) { Operator operator = new Operator(o -> configLoader.applyConfigs().andThen(k -> k.withMetrics(metrics))); operator.register( - new MetricsHandlingReconciler1(), - configLoader.applyControllerConfigs(MetricsHandlingReconciler1.NAME)); + new OperationsReconciler1(), + configLoader.applyControllerConfigs(OperationsReconciler1.NAME)); operator.register( - new MetricsHandlingReconciler2(), - configLoader.applyControllerConfigs(MetricsHandlingReconciler2.NAME)); + new OperationsReconciler2(), + configLoader.applyControllerConfigs(OperationsReconciler2.NAME)); + var health = new ContextHandler(new HealthHandler(operator), "/health"); + Server server = new Server(8080); + server.setHandler(health); + server.start(); + log.info("Health probe server started on port 8080"); + operator.start(); } @@ -96,7 +104,7 @@ public static void main(String[] args) { @Override public Map resourceAttributes() { - return Map.of("service.name", "josdk", "operator", "metrics-processing"); + return Map.of("service.name", "josdk", "operator", "operations"); } }; @@ -139,7 +147,7 @@ public Duration step() { private static Map loadConfigFromYaml() { Map configMap = new HashMap<>(); try (InputStream inputStream = - MetricsHandlingSampleOperator.class.getResourceAsStream("/otlp-config.yaml")) { + OperationsSampleOperator.class.getResourceAsStream("/otlp-config.yaml")) { if (inputStream == null) { log.warn("otlp-config.yaml not found in resources, using default OTLP configuration"); return configMap; diff --git a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/customresource/MetricsHandlingCustomResource1.java b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/customresource/OperationsCustomResource1.java similarity index 75% rename from sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/customresource/MetricsHandlingCustomResource1.java rename to sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/customresource/OperationsCustomResource1.java index 892f663175..ba94f0c4cd 100644 --- a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/customresource/MetricsHandlingCustomResource1.java +++ b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/customresource/OperationsCustomResource1.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.javaoperatorsdk.operator.sample.metrics.customresource; +package io.javaoperatorsdk.operator.sample.operations.customresource; import io.fabric8.kubernetes.api.model.Namespaced; import io.fabric8.kubernetes.client.CustomResource; @@ -22,11 +22,11 @@ @Group("sample.javaoperatorsdk") @Version("v1") -public class MetricsHandlingCustomResource1 - extends CustomResource implements Namespaced { +public class OperationsCustomResource1 extends CustomResource + implements Namespaced { @Override public String toString() { - return "MetricsHandlingCustomResource1{" + "spec=" + spec + ", status=" + status + '}'; + return "OperationsCustomResource1{" + "spec=" + spec + ", status=" + status + '}'; } } diff --git a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/customresource/MetricsHandlingCustomResource2.java b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/customresource/OperationsCustomResource2.java similarity index 75% rename from sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/customresource/MetricsHandlingCustomResource2.java rename to sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/customresource/OperationsCustomResource2.java index 38abf2a322..9a3f2815d6 100644 --- a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/customresource/MetricsHandlingCustomResource2.java +++ b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/customresource/OperationsCustomResource2.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.javaoperatorsdk.operator.sample.metrics.customresource; +package io.javaoperatorsdk.operator.sample.operations.customresource; import io.fabric8.kubernetes.api.model.Namespaced; import io.fabric8.kubernetes.client.CustomResource; @@ -22,11 +22,11 @@ @Group("sample.javaoperatorsdk") @Version("v1") -public class MetricsHandlingCustomResource2 - extends CustomResource implements Namespaced { +public class OperationsCustomResource2 extends CustomResource + implements Namespaced { @Override public String toString() { - return "MetricsHandlingCustomResource2{" + "spec=" + spec + ", status=" + status + '}'; + return "OperationsCustomResource2{" + "spec=" + spec + ", status=" + status + '}'; } } diff --git a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/customresource/MetricsHandlingSpec.java b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/customresource/OperationsSpec.java similarity index 88% rename from sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/customresource/MetricsHandlingSpec.java rename to sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/customresource/OperationsSpec.java index 50016f03e0..cc3cda1edd 100644 --- a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/customresource/MetricsHandlingSpec.java +++ b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/customresource/OperationsSpec.java @@ -13,9 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.javaoperatorsdk.operator.sample.metrics.customresource; +package io.javaoperatorsdk.operator.sample.operations.customresource; -public class MetricsHandlingSpec { +public class OperationsSpec { private int number; diff --git a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/customresource/MetricsHandlingStatus.java b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/customresource/OperationsStatus.java similarity index 88% rename from sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/customresource/MetricsHandlingStatus.java rename to sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/customresource/OperationsStatus.java index 76c286cf80..dd7df45bdc 100644 --- a/sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/customresource/MetricsHandlingStatus.java +++ b/sample-operators/operations/src/main/java/io/javaoperatorsdk/operator/sample/operations/customresource/OperationsStatus.java @@ -13,9 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.javaoperatorsdk.operator.sample.metrics.customresource; +package io.javaoperatorsdk.operator.sample.operations.customresource; -public class MetricsHandlingStatus { +public class OperationsStatus { private int observedNumber; diff --git a/sample-operators/metrics-processing/src/main/resources/io/javaoperatorsdk/operator/sample/deployment.yaml b/sample-operators/operations/src/main/resources/io/javaoperatorsdk/operator/sample/deployment.yaml similarity index 100% rename from sample-operators/metrics-processing/src/main/resources/io/javaoperatorsdk/operator/sample/deployment.yaml rename to sample-operators/operations/src/main/resources/io/javaoperatorsdk/operator/sample/deployment.yaml diff --git a/sample-operators/metrics-processing/src/main/resources/log4j2.xml b/sample-operators/operations/src/main/resources/log4j2.xml similarity index 89% rename from sample-operators/metrics-processing/src/main/resources/log4j2.xml rename to sample-operators/operations/src/main/resources/log4j2.xml index 593f120e0b..190336f7d9 100644 --- a/sample-operators/metrics-processing/src/main/resources/log4j2.xml +++ b/sample-operators/operations/src/main/resources/log4j2.xml @@ -26,6 +26,9 @@ + + + diff --git a/sample-operators/metrics-processing/src/main/resources/otlp-config.yaml b/sample-operators/operations/src/main/resources/otlp-config.yaml similarity index 100% rename from sample-operators/metrics-processing/src/main/resources/otlp-config.yaml rename to sample-operators/operations/src/main/resources/otlp-config.yaml diff --git a/sample-operators/metrics-processing/src/test/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingE2E.java b/sample-operators/operations/src/test/java/io/javaoperatorsdk/operator/sample/operations/OperationsE2E.java similarity index 83% rename from sample-operators/metrics-processing/src/test/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingE2E.java rename to sample-operators/operations/src/test/java/io/javaoperatorsdk/operator/sample/operations/OperationsE2E.java index 34e96a5870..e30032eb8d 100644 --- a/sample-operators/metrics-processing/src/test/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingE2E.java +++ b/sample-operators/operations/src/test/java/io/javaoperatorsdk/operator/sample/operations/OperationsE2E.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.javaoperatorsdk.operator.sample.metrics; +package io.javaoperatorsdk.operator.sample.operations; import java.io.*; import java.net.HttpURLConnection; @@ -42,40 +42,40 @@ import io.javaoperatorsdk.operator.junit.AbstractOperatorExtension; import io.javaoperatorsdk.operator.junit.ClusterDeployedOperatorExtension; import io.javaoperatorsdk.operator.junit.LocallyRunOperatorExtension; -import io.javaoperatorsdk.operator.sample.metrics.customresource.MetricsHandlingCustomResource1; -import io.javaoperatorsdk.operator.sample.metrics.customresource.MetricsHandlingCustomResource2; -import io.javaoperatorsdk.operator.sample.metrics.customresource.MetricsHandlingSpec; +import io.javaoperatorsdk.operator.sample.operations.customresource.OperationsCustomResource1; +import io.javaoperatorsdk.operator.sample.operations.customresource.OperationsCustomResource2; +import io.javaoperatorsdk.operator.sample.operations.customresource.OperationsSpec; -import static io.javaoperatorsdk.operator.sample.metrics.MetricsHandlingSampleOperator.isLocal; +import static io.javaoperatorsdk.operator.sample.operations.OperationsSampleOperator.isLocal; import static org.assertj.core.api.Assertions.assertThat; import static org.awaitility.Awaitility.await; @TestInstance(TestInstance.Lifecycle.PER_CLASS) -class MetricsHandlingE2E { +class OperationsE2E { - static final Logger log = LoggerFactory.getLogger(MetricsHandlingE2E.class); + static final Logger log = LoggerFactory.getLogger(OperationsE2E.class); static final String OBSERVABILITY_NAMESPACE = "observability"; static final int PROMETHEUS_PORT = 9090; static final int OTEL_COLLECTOR_PORT = 4318; public static final Duration TEST_DURATION = Duration.ofSeconds(60); public static final String NAME_LABEL_KEY = "app.kubernetes.io/name"; - static final String HELM_RELEASE_NAME = "metrics-processing"; + static final String HELM_RELEASE_NAME = "operations"; private LocalPortForward prometheusPortForward; private LocalPortForward otelCollectorPortForward; static final KubernetesClient client = new KubernetesClientBuilder().build(); - MetricsHandlingE2E() {} + OperationsE2E() {} @RegisterExtension - AbstractOperatorExtension operator = + AbstractOperatorExtension extension = isLocal() ? LocallyRunOperatorExtension.builder() - .withReconciler(new MetricsHandlingReconciler1()) - .withReconciler(new MetricsHandlingReconciler2()) + .withReconciler(new OperationsReconciler1()) + .withReconciler(new OperationsReconciler2()) .withConfigurationService( - c -> c.withMetrics(MetricsHandlingSampleOperator.initOTLPMetrics(true))) + c -> c.withMetrics(OperationsSampleOperator.initOTLPMetrics(true))) .build() : ClusterDeployedOperatorExtension.builder().build(); @@ -103,16 +103,15 @@ void cleanup() throws IOException { } private void helmInstall() { + var namespace = getNamespace(); try { var chartPath = findProjectRoot("helm").toPath().resolve("helm/generic-helm-chart").toString(); - var valuesUrl = MetricsHandlingE2E.class.getClassLoader().getResource("helm-values.yaml"); + var valuesUrl = OperationsE2E.class.getClassLoader().getResource("helm-values.yaml"); if (valuesUrl == null) { throw new IllegalStateException("helm-values.yaml not found on classpath"); } var valuesPath = new File(valuesUrl.toURI()).getAbsolutePath(); - var namespace = getNamespace(); - log.info("Installing helm release '{}' into namespace '{}'", HELM_RELEASE_NAME, namespace); runCommand( "helm", @@ -125,15 +124,16 @@ private void helmInstall() { namespace, "--wait", "--timeout", - "2m"); + "5m"); log.info("Helm release '{}' installed successfully", HELM_RELEASE_NAME); } catch (Exception e) { + extension.logDiagnosticInfo(namespace); throw new RuntimeException("Failed to install helm chart", e); } } private String getNamespace() { - var ns = operator.getNamespace(); + var ns = extension.getNamespace(); return ns == null ? "default" : ns; } @@ -210,10 +210,10 @@ void testPropagatedMetrics() throws Exception { "Starting longevity metrics test (running for {} seconds)", TEST_DURATION.getSeconds()); // Create initial resources including ones that trigger failures - operator.create(createResource(MetricsHandlingCustomResource1.class, "test-success-1", 1)); - operator.create(createResource(MetricsHandlingCustomResource2.class, "test-success-2", 1)); - operator.create(createResource(MetricsHandlingCustomResource1.class, "test-fail-1", 1)); - operator.create(createResource(MetricsHandlingCustomResource2.class, "test-fail-2", 1)); + extension.create(createResource(OperationsCustomResource1.class, "test-success-1", 1)); + extension.create(createResource(OperationsCustomResource2.class, "test-success-2", 1)); + extension.create(createResource(OperationsCustomResource1.class, "test-fail-1", 1)); + extension.create(createResource(OperationsCustomResource2.class, "test-fail-2", 1)); // Continuously trigger reconciliations for ~50 seconds by alternating between // creating new resources, updating specs of existing ones, and deleting older dynamic ones @@ -226,19 +226,19 @@ void testPropagatedMetrics() throws Exception { switch (counter % 4) { case 0 -> { String name = "test-dynamic-1-" + counter; - operator.create(createResource(MetricsHandlingCustomResource1.class, name, counter * 3)); + extension.create(createResource(OperationsCustomResource1.class, name, counter * 3)); createdResource1Names.addLast(name); log.info("Iteration {}: created {}", counter, name); } case 1 -> { - var r1 = operator.get(MetricsHandlingCustomResource1.class, "test-success-1"); + var r1 = extension.get(OperationsCustomResource1.class, "test-success-1"); r1.getSpec().setNumber(counter * 7); - operator.replace(r1); + extension.replace(r1); log.info("Iteration {}: updated test-success-1 number to {}", counter, counter * 7); } case 2 -> { String name = "test-dynamic-2-" + counter; - operator.create(createResource(MetricsHandlingCustomResource2.class, name, counter * 5)); + extension.create(createResource(OperationsCustomResource2.class, name, counter * 5)); createdResource2Names.addLast(name); log.info("Iteration {}: created {}", counter, name); } @@ -248,16 +248,16 @@ void testPropagatedMetrics() throws Exception { && (createdResource2Names.isEmpty() || createdResource1Names.size() >= createdResource2Names.size())) { String name = createdResource1Names.pollFirst(); - var r = operator.get(MetricsHandlingCustomResource1.class, name); + var r = extension.get(OperationsCustomResource1.class, name); if (r != null) { - operator.delete(r); + extension.delete(r); log.info("Iteration {}: deleted {} ", counter, name); } } else if (!createdResource2Names.isEmpty()) { String name = createdResource2Names.pollFirst(); - var r = operator.get(MetricsHandlingCustomResource2.class, name); + var r = extension.get(OperationsCustomResource2.class, name); if (r != null) { - operator.delete(r); + extension.delete(r); log.info("Iteration {}: deleted {}", counter, name); } } @@ -346,12 +346,12 @@ private String queryPrometheus(String prometheusUrl, String query) throws IOExce } } - private > R createResource( + private > R createResource( Class type, String name, int number) { try { R resource = type.getDeclaredConstructor().newInstance(); resource.getMetadata().setName(name); - MetricsHandlingSpec spec = new MetricsHandlingSpec(); + OperationsSpec spec = new OperationsSpec(); spec.setNumber(number); resource.setSpec(spec); return resource; @@ -368,7 +368,7 @@ private void installObservabilityServices() { .resolve("observability/install-observability.sh") .toFile(); log.info("Running observability setup script: {}", scriptFile.getAbsolutePath()); - runCommand("/bin/sh", scriptFile.getAbsolutePath()); + runCommand("/bin/bash", scriptFile.getAbsolutePath()); log.info("Observability stack is ready"); } catch (Exception e) { log.error("Failed to setup observability stack", e); @@ -388,12 +388,16 @@ private static File findProjectRoot(String marker) throws IOException { } private static void runCommand(String... command) throws IOException, InterruptedException { - var process = new ProcessBuilder(command).redirectErrorStream(true).start(); - try (var reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) { + var process = new ProcessBuilder(command).start(); + try (var stdoutReader = new BufferedReader(new InputStreamReader(process.getInputStream())); + var stderrReader = new BufferedReader(new InputStreamReader(process.getErrorStream()))) { String line; - while ((line = reader.readLine()) != null) { + while ((line = stdoutReader.readLine()) != null) { log.info("{}: {}", command[0], line); } + while ((line = stderrReader.readLine()) != null) { + log.error("{}: {}", command[0], line); + } } int exitCode = process.waitFor(); if (exitCode != 0) { diff --git a/sample-operators/metrics-processing/src/test/resources/helm-values.yaml b/sample-operators/operations/src/test/resources/helm-values.yaml similarity index 74% rename from sample-operators/metrics-processing/src/test/resources/helm-values.yaml rename to sample-operators/operations/src/test/resources/helm-values.yaml index bb8e251139..ee9ac59823 100644 --- a/sample-operators/metrics-processing/src/test/resources/helm-values.yaml +++ b/sample-operators/operations/src/test/resources/helm-values.yaml @@ -14,15 +14,15 @@ # limitations under the License. # -# Helm values for metrics-processing operator E2E test deployment +# Helm values for operations operator E2E test deployment # Used with the generic-operator-chart from helm/generic-helm-chart/ image: - repository: metrics-processing-operator + repository: operations-operator pullPolicy: Never tag: "latest" -nameOverride: "metrics-processing-operator" +nameOverride: "operations-operator" resources: {} @@ -30,6 +30,14 @@ resources: {} primaryResources: - apiGroup: "sample.javaoperatorsdk" resources: - - metricshandlingcustomresource1s - - metricshandlingcustomresource2s + - operationscustomresource1s + - operationscustomresource2s + +probes: + startup: + enabled: true + path: /health + readiness: + enabled: true + path: /health diff --git a/sample-operators/pom.xml b/sample-operators/pom.xml index 25a745012c..504aba5e78 100644 --- a/sample-operators/pom.xml +++ b/sample-operators/pom.xml @@ -35,6 +35,6 @@ mysql-schema leader-election controller-namespace-deletion - metrics-processing + operations