From 514075a49699463e460e9bc7cc539c79a87c8c6b Mon Sep 17 00:00:00 2001 From: Hugo Santos Date: Sun, 12 Oct 2025 23:16:15 +0200 Subject: [PATCH 1/2] Rewrite service readiness check and remove orchestration service MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace port-forwarding based service readiness checks with a simpler, more reliable one-shot pod approach. This eliminates the orchestration service RPC entirely and makes readiness checks work consistently for both in-cluster and remote Kubernetes clusters. Key changes: - Rewrite AreServicesReady() to deploy one-shot checker pod with shell script - Use Chainguard busybox image (cgr.dev/chainguard/busybox:latest) - Use nc (netcat) for TCP connectivity tests instead of bash /dev/tcp - POSIX-compliant shell script for busybox compatibility - Delete orchestration/service and orchestration/proto packages (~580 lines) - Set UseOrchestrator = false (orchestrator no longer needed for readiness) - Make orchestrator deployment conditional on UseOrchestrator flag Benefits: - Actually tests TCP connect() from inside the cluster - Works for remote clusters via K8s API - Simple pass/fail via pod exit code - Built-in retry: 60 attempts × 500ms = 30s per port - More secure: Chainguard minimal, CVE-free image - Simpler: No RPC layer, no async port-forward errors 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- internal/prepare/orchestrator.go | 12 + .../runtime/kubernetes/clusternamespace.go | 30 +-- internal/runtime/kubernetes/readiness.go | 202 ++++++++++++-- orchestration/client/client.go | 18 +- orchestration/proto/service.pb.go | 252 ------------------ orchestration/proto/service.proto | 26 -- orchestration/proto/service_grpc.pb.go | 103 ------- orchestration/server/deps.fn.go | 8 - orchestration/server/server.cue | 1 - orchestration/service/deps.fn.go | 36 --- orchestration/service/service.cue | 31 --- orchestration/service/wire.go | 64 ----- 12 files changed, 198 insertions(+), 585 deletions(-) delete mode 100644 orchestration/proto/service.pb.go delete mode 100644 orchestration/proto/service.proto delete mode 100644 orchestration/proto/service_grpc.pb.go delete mode 100644 orchestration/service/deps.fn.go delete mode 100644 orchestration/service/service.cue delete mode 100644 orchestration/service/wire.go diff --git a/internal/prepare/orchestrator.go b/internal/prepare/orchestrator.go index 56b0431df..901e80f05 100644 --- a/internal/prepare/orchestrator.go +++ b/internal/prepare/orchestrator.go @@ -9,6 +9,7 @@ import ( "namespacelabs.dev/foundation/internal/runtime/kubernetes" "namespacelabs.dev/foundation/orchestration" + orchclient "namespacelabs.dev/foundation/orchestration/client" "namespacelabs.dev/foundation/schema" orchpb "namespacelabs.dev/foundation/schema/orchestration" "namespacelabs.dev/foundation/std/cfg" @@ -18,6 +19,10 @@ import ( func Orchestrator() ClusterStage { return ClusterStage{ Pre: func(ch chan *orchpb.Event) { + if !orchclient.UseOrchestrator { + // Skip orchestrator preparation if disabled + return + } ch <- &orchpb.Event{ ResourceId: "orchestrator", ResourceLabel: "Deploy Namespace Orchestrator", @@ -27,6 +32,9 @@ func Orchestrator() ClusterStage { } }, Post: func(ch chan *orchpb.Event) { + if !orchclient.UseOrchestrator { + return + } ch <- &orchpb.Event{ ResourceId: "orchestrator", Ready: orchpb.Event_READY, @@ -34,6 +42,10 @@ func Orchestrator() ClusterStage { } }, Run: func(ctx context.Context, env cfg.Context, devhost *schema.DevHost_ConfigureEnvironment, kube *kubernetes.Cluster, ch chan *orchpb.Event) error { + if !orchclient.UseOrchestrator { + // Skip orchestrator preparation if disabled + return nil + } return PrepareOrchestratorInKube(ctx, env, devhost, kube) }, } diff --git a/internal/runtime/kubernetes/clusternamespace.go b/internal/runtime/kubernetes/clusternamespace.go index 4c70d04b5..1cf57d662 100644 --- a/internal/runtime/kubernetes/clusternamespace.go +++ b/internal/runtime/kubernetes/clusternamespace.go @@ -12,8 +12,6 @@ import ( "net" "time" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" "google.golang.org/protobuf/types/known/anypb" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -27,7 +25,6 @@ import ( "namespacelabs.dev/foundation/internal/runtime" "namespacelabs.dev/foundation/internal/runtime/kubernetes/client" "namespacelabs.dev/foundation/internal/runtime/kubernetes/kubeobserver" - orchclient "namespacelabs.dev/foundation/orchestration/client" "namespacelabs.dev/foundation/schema" runtimepb "namespacelabs.dev/foundation/schema/runtime" "namespacelabs.dev/foundation/schema/storage" @@ -214,31 +211,8 @@ func (r *ClusterNamespace) isDeployableReady(ctx context.Context, srv runtime.De } func (r *ClusterNamespace) areServicesReady(ctx context.Context, srv runtime.Deployable) (ServiceReadiness, error) { - if client.IsInclusterClient(r.underlying.cli) { - return AreServicesReady(ctx, r.underlying.cli, r.target.namespace, srv) - } - - if !orchclient.UseOrchestrator { - fmt.Fprintf(console.Debug(ctx), "will not wait for services of server %s...\n", srv.GetName()) - return ServiceReadiness{Ready: true}, nil - } - - conn, err := orchclient.ConnectToOrchestrator(ctx, r.parent) - if err != nil { - return ServiceReadiness{}, err - } - - res, err := orchclient.CallAreServicesReady(ctx, conn, srv, r.target.namespace) - if err != nil { - if status.Code(err) == codes.Unimplemented { - fmt.Fprintf(console.Debug(ctx), "old orchestrator version, will not wait for services of server %s...\n", srv.GetName()) - return ServiceReadiness{Ready: true}, nil - } - - return ServiceReadiness{}, err - } - - return ServiceReadiness{Ready: res.Ready, Message: res.Message}, nil + // Use the port-forward based implementation which works for both in-cluster and remote clusters + return AreServicesReady(ctx, r.underlying, r.target.namespace, srv) } func (r *ClusterNamespace) isPodReady(ctx context.Context, srv runtime.Deployable) (bool, error) { diff --git a/internal/runtime/kubernetes/readiness.go b/internal/runtime/kubernetes/readiness.go index 7c011c9bf..d1990366b 100644 --- a/internal/runtime/kubernetes/readiness.go +++ b/internal/runtime/kubernetes/readiness.go @@ -7,17 +7,17 @@ package kubernetes import ( "context" "fmt" - "net" - "time" - v1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" + applycorev1 "k8s.io/client-go/applyconfigurations/core/v1" "namespacelabs.dev/foundation/framework/kubernetes/kubedef" "namespacelabs.dev/foundation/framework/kubernetes/kubeobj" "namespacelabs.dev/foundation/internal/fnerrors" "namespacelabs.dev/foundation/internal/runtime" - "namespacelabs.dev/foundation/internal/runtime/kubernetes/client" + "namespacelabs.dev/foundation/internal/runtime/kubernetes/kubeobserver" + "namespacelabs.dev/foundation/std/tasks" + "namespacelabs.dev/go-ids" ) type ServiceReadiness struct { @@ -25,37 +25,195 @@ type ServiceReadiness struct { Message string } -func AreServicesReady(ctx context.Context, cli *kubernetes.Clientset, namespace string, srv runtime.Deployable) (ServiceReadiness, error) { - if !client.IsInclusterClient(cli) { - return ServiceReadiness{}, fnerrors.InternalError("cannot check service readiness for remote kubernetes cluster") - } - +// AreServicesReady checks if all TCP ports of services for a deployable are accepting connections. +// It deploys a one-shot pod in the cluster that attempts TCP connections using in-cluster DNS. +// This works for both in-cluster and remote clusters since we use kubectl to run the pod. +func AreServicesReady(ctx context.Context, cluster *Cluster, namespace string, srv runtime.Deployable) (ServiceReadiness, error) { // TODO only check services that are required - services, err := cli.CoreV1().Services(namespace).List(ctx, metav1.ListOptions{ + services, err := cluster.cli.CoreV1().Services(namespace).List(ctx, metav1.ListOptions{ LabelSelector: kubeobj.SerializeSelector(kubedef.SelectById(srv)), }) if err != nil { return ServiceReadiness{}, err } + if len(services.Items) == 0 { + // No services to check + return ServiceReadiness{Ready: true}, nil + } + + // Build a shell script that checks all TCP ports with retry logic + // This runs inside the cluster so it can use service DNS names + // Uses POSIX-compliant shell syntax for busybox compatibility + script := "#!/bin/sh\nset -e\n" + for _, s := range services.Items { for _, port := range s.Spec.Ports { - if port.Protocol != v1.ProtocolTCP { + if port.Protocol != corev1.ProtocolTCP { continue } - addr := fmt.Sprintf("%s.%s.svc.cluster.local:%d", s.Name, s.Namespace, port.Port) + // Try to connect every 500ms for up to 30 seconds (60 attempts) + // Use nc (netcat) for TCP connectivity test - available in busybox + serviceDNS := fmt.Sprintf("%s.%s.svc.cluster.local", s.Name, s.Namespace) + script += fmt.Sprintf(` +echo "Checking %s:%d..." +i=0 +while [ $i -lt 60 ]; do + if timeout 0.1 nc -z %s %d 2>/dev/null; then + echo " ✓ %s:%d is ready" + break + fi + i=$((i + 1)) + if [ $i -eq 60 ]; then + echo " ✗ %s:%d failed after 30s" + exit 1 + fi + sleep 0.5 +done +`, serviceDNS, port.Port, serviceDNS, port.Port, serviceDNS, port.Port, serviceDNS, port.Port) + } + } - conn, err := net.DialTimeout("tcp", addr, 100*time.Millisecond) - if err != nil { - return ServiceReadiness{ - Ready: false, - Message: fmt.Sprintf("%q not ready: failed to dial %s:%d: %v", srv.GetName(), s.Name, port.Port, err), - }, nil - } - conn.Close() + script += "echo \"All services ready\"\nexit 0\n" + + // Create a unique pod name for this check + podName := fmt.Sprintf("ns-svc-check-%s", ids.NewRandomBase32ID(8)) + + // Use Chainguard's busybox image for the connection checker + // This is a minimal, secure base image with shell and basic networking tools + container := applycorev1.Container(). + WithName("checker"). + WithImage("cgr.dev/chainguard/busybox:latest"). + WithCommand("/bin/sh", "-c", script). + WithSecurityContext( + applycorev1.SecurityContext(). + WithReadOnlyRootFilesystem(true). + WithRunAsNonRoot(true). + WithRunAsUser(65532)) // nonroot user in Chainguard images + + podSpec := applycorev1.PodSpec(). + WithContainers(container). + WithRestartPolicy(corev1.RestartPolicyNever). + WithSecurityContext(applycorev1.PodSecurityContext()) + + pod := applycorev1.Pod(podName, namespace). + WithSpec(podSpec). + WithLabels(kubedef.SelectNamespaceDriver()). + WithLabels(kubedef.ManagedByUs()) + + // Create the pod + if _, err := cluster.cli.CoreV1().Pods(namespace).Apply(ctx, pod, kubedef.Ego()); err != nil { + return ServiceReadiness{}, fmt.Errorf("failed to create service readiness checker pod: %w", err) + } + + // Schedule cleanup + defer func() { + cluster.cli.CoreV1().Pods(namespace).Delete(context.Background(), podName, metav1.DeleteOptions{}) + }() + + // Wait for the pod to complete + var finalStatus corev1.PodStatus + if err := kubeobserver.WaitForCondition(ctx, cluster.cli, + tasks.Action("kubernetes.service-readiness-check"). + Arg("namespace", namespace). + Arg("deployable", srv.GetName()), + kubeobserver.WaitForPodConditition(namespace, kubeobserver.PickPod(podName), + func(status corev1.PodStatus) (bool, error) { + finalStatus = status + return status.Phase == corev1.PodSucceeded || status.Phase == corev1.PodFailed, nil + })); err != nil { + return ServiceReadiness{}, fmt.Errorf("service readiness check pod failed to complete: %w", err) + } + + // Check the exit code + if finalStatus.Phase == corev1.PodSucceeded { + return ServiceReadiness{Ready: true}, nil + } + + // Pod failed - extract error message from container status + var exitCode int32 + var reason string + for _, containerStatus := range finalStatus.ContainerStatuses { + if containerStatus.Name == "checker" && containerStatus.State.Terminated != nil { + exitCode = containerStatus.State.Terminated.ExitCode + reason = containerStatus.State.Terminated.Reason + break } } - return ServiceReadiness{Ready: true}, nil + if exitCode == 0 { + // Should not happen if phase is Failed, but handle it + return ServiceReadiness{Ready: true}, nil + } + + return ServiceReadiness{ + Ready: false, + Message: fmt.Sprintf("%q not ready: service connectivity check failed (exit code %d, reason: %s)", srv.GetName(), exitCode, reason), + }, nil +} + +// CheckServiceConnectivity is a helper that checks if a specific service port is accepting connections. +// It's used primarily for testing and debugging. +func CheckServiceConnectivity(ctx context.Context, cluster *Cluster, namespace, serviceName string, port int32) error { + podName := fmt.Sprintf("ns-svc-check-%s", ids.NewRandomBase32ID(8)) + serviceDNS := fmt.Sprintf("%s.%s.svc.cluster.local", serviceName, namespace) + + script := fmt.Sprintf(`#!/bin/sh +i=0 +while [ $i -lt 60 ]; do + if timeout 0.1 nc -z %s %d 2>/dev/null; then + exit 0 + fi + i=$((i + 1)) + sleep 0.5 +done +exit 1 +`, serviceDNS, port) + + container := applycorev1.Container(). + WithName("checker"). + WithImage("cgr.dev/chainguard/busybox:latest"). + WithCommand("/bin/sh", "-c", script). + WithSecurityContext( + applycorev1.SecurityContext(). + WithReadOnlyRootFilesystem(true). + WithRunAsNonRoot(true). + WithRunAsUser(65532)) + + podSpec := applycorev1.PodSpec(). + WithContainers(container). + WithRestartPolicy(corev1.RestartPolicyNever) + + pod := applycorev1.Pod(podName, namespace). + WithSpec(podSpec). + WithLabels(kubedef.ManagedByUs()) + + if _, err := cluster.cli.CoreV1().Pods(namespace).Apply(ctx, pod, kubedef.Ego()); err != nil { + return err + } + + defer func() { + cluster.cli.CoreV1().Pods(namespace).Delete(context.Background(), podName, metav1.DeleteOptions{}) + }() + + var finalStatus corev1.PodStatus + if err := kubeobserver.WaitForCondition(ctx, cluster.cli, + tasks.Action("kubernetes.check-service-connectivity"). + Arg("namespace", namespace). + Arg("service", serviceName). + Arg("port", fmt.Sprintf("%d", port)), + kubeobserver.WaitForPodConditition(namespace, kubeobserver.PickPod(podName), + func(status corev1.PodStatus) (bool, error) { + finalStatus = status + return status.Phase == corev1.PodSucceeded || status.Phase == corev1.PodFailed, nil + })); err != nil { + return err + } + + if finalStatus.Phase == corev1.PodSucceeded { + return nil + } + + return fnerrors.Newf("service %s:%d is not accepting connections", serviceName, port) } diff --git a/orchestration/client/client.go b/orchestration/client/client.go index 81647b921..88da54718 100644 --- a/orchestration/client/client.go +++ b/orchestration/client/client.go @@ -15,7 +15,6 @@ import ( "namespacelabs.dev/foundation/internal/compute" "namespacelabs.dev/foundation/internal/console" "namespacelabs.dev/foundation/internal/runtime" - "namespacelabs.dev/foundation/orchestration/proto" "namespacelabs.dev/foundation/orchestration/server/constants" "namespacelabs.dev/foundation/schema" "namespacelabs.dev/foundation/std/cfg" @@ -27,7 +26,10 @@ const ( ConnTimeout = time.Minute // TODO reduce - we've seen slow connections in CI ) -var UseOrchestrator = true +// UseOrchestrator controls whether to deploy the orchestrator. +// Historically this was used for service readiness checks, but that functionality has been removed. +// The orchestrator still provides Kubernetes controllers for runtime config management. +var UseOrchestrator = false type remoteOrchestrator struct { cluster runtime.ClusterNamespace @@ -74,15 +76,3 @@ func ConnectToOrchestrator(ctx context.Context, cluster runtime.Cluster) (*grpc. return raw.(*remoteOrchestrator).Connect(ctx) } - -func CallAreServicesReady(ctx context.Context, conn *grpc.ClientConn, srv runtime.Deployable, ns string) (*proto.AreServicesReadyResponse, error) { - req := &proto.AreServicesReadyRequest{ - Deployable: runtime.DeployableToProto(srv), - Namespace: ns, - } - - ctx, cancel := context.WithTimeout(ctx, ConnTimeout) - defer cancel() - - return proto.NewOrchestrationServiceClient(conn).AreServicesReady(ctx, req) -} diff --git a/orchestration/proto/service.pb.go b/orchestration/proto/service.pb.go deleted file mode 100644 index 78064cc04..000000000 --- a/orchestration/proto/service.pb.go +++ /dev/null @@ -1,252 +0,0 @@ -// Copyright 2022 Namespace Labs Inc; All rights reserved. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. - -// Code generated by protoc-gen-go. DO NOT EDIT. -// versions: -// protoc-gen-go v1.27.1 -// protoc (unknown) -// source: orchestration/proto/service.proto - -package proto - -import ( - protoreflect "google.golang.org/protobuf/reflect/protoreflect" - protoimpl "google.golang.org/protobuf/runtime/protoimpl" - runtime "namespacelabs.dev/foundation/schema/runtime" - reflect "reflect" - sync "sync" -) - -const ( - // Verify that this generated code is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) - // Verify that runtime/protoimpl is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) -) - -type AreServicesReadyRequest struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Deployable *runtime.Deployable `protobuf:"bytes,1,opt,name=deployable,proto3" json:"deployable,omitempty"` - Namespace string `protobuf:"bytes,2,opt,name=namespace,proto3" json:"namespace,omitempty"` -} - -func (x *AreServicesReadyRequest) Reset() { - *x = AreServicesReadyRequest{} - if protoimpl.UnsafeEnabled { - mi := &file_orchestration_proto_service_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *AreServicesReadyRequest) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*AreServicesReadyRequest) ProtoMessage() {} - -func (x *AreServicesReadyRequest) ProtoReflect() protoreflect.Message { - mi := &file_orchestration_proto_service_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use AreServicesReadyRequest.ProtoReflect.Descriptor instead. -func (*AreServicesReadyRequest) Descriptor() ([]byte, []int) { - return file_orchestration_proto_service_proto_rawDescGZIP(), []int{0} -} - -func (x *AreServicesReadyRequest) GetDeployable() *runtime.Deployable { - if x != nil { - return x.Deployable - } - return nil -} - -func (x *AreServicesReadyRequest) GetNamespace() string { - if x != nil { - return x.Namespace - } - return "" -} - -type AreServicesReadyResponse struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Ready bool `protobuf:"varint,1,opt,name=ready,proto3" json:"ready,omitempty"` - Message string `protobuf:"bytes,2,opt,name=message,proto3" json:"message,omitempty"` -} - -func (x *AreServicesReadyResponse) Reset() { - *x = AreServicesReadyResponse{} - if protoimpl.UnsafeEnabled { - mi := &file_orchestration_proto_service_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *AreServicesReadyResponse) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*AreServicesReadyResponse) ProtoMessage() {} - -func (x *AreServicesReadyResponse) ProtoReflect() protoreflect.Message { - mi := &file_orchestration_proto_service_proto_msgTypes[1] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use AreServicesReadyResponse.ProtoReflect.Descriptor instead. -func (*AreServicesReadyResponse) Descriptor() ([]byte, []int) { - return file_orchestration_proto_service_proto_rawDescGZIP(), []int{1} -} - -func (x *AreServicesReadyResponse) GetReady() bool { - if x != nil { - return x.Ready - } - return false -} - -func (x *AreServicesReadyResponse) GetMessage() string { - if x != nil { - return x.Message - } - return "" -} - -var File_orchestration_proto_service_proto protoreflect.FileDescriptor - -var file_orchestration_proto_service_proto_rawDesc = []byte{ - 0x0a, 0x21, 0x6f, 0x72, 0x63, 0x68, 0x65, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2f, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x12, 0x11, 0x6e, 0x73, 0x6c, 0x2e, 0x6f, 0x72, 0x63, 0x68, 0x65, 0x73, 0x74, - 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0x1f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2f, 0x72, - 0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x2f, 0x64, 0x65, 0x70, 0x6c, 0x6f, 0x79, 0x61, 0x62, 0x6c, - 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x7e, 0x0a, 0x17, 0x41, 0x72, 0x65, 0x53, 0x65, - 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x52, 0x65, 0x61, 0x64, 0x79, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x12, 0x45, 0x0a, 0x0a, 0x64, 0x65, 0x70, 0x6c, 0x6f, 0x79, 0x61, 0x62, 0x6c, 0x65, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x66, 0x6f, 0x75, 0x6e, 0x64, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x72, 0x75, 0x6e, 0x74, 0x69, - 0x6d, 0x65, 0x2e, 0x44, 0x65, 0x70, 0x6c, 0x6f, 0x79, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x0a, 0x64, - 0x65, 0x70, 0x6c, 0x6f, 0x79, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x6e, 0x61, 0x6d, - 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x6e, 0x61, - 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x4a, 0x0a, 0x18, 0x41, 0x72, 0x65, 0x53, 0x65, - 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x52, 0x65, 0x61, 0x64, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x65, 0x61, 0x64, 0x79, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x05, 0x72, 0x65, 0x61, 0x64, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, - 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, - 0x61, 0x67, 0x65, 0x32, 0x83, 0x01, 0x0a, 0x14, 0x4f, 0x72, 0x63, 0x68, 0x65, 0x73, 0x74, 0x72, - 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x6b, 0x0a, 0x10, - 0x41, 0x72, 0x65, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x52, 0x65, 0x61, 0x64, 0x79, - 0x12, 0x2a, 0x2e, 0x6e, 0x73, 0x6c, 0x2e, 0x6f, 0x72, 0x63, 0x68, 0x65, 0x73, 0x74, 0x72, 0x61, - 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x41, 0x72, 0x65, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, - 0x52, 0x65, 0x61, 0x64, 0x79, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2b, 0x2e, 0x6e, - 0x73, 0x6c, 0x2e, 0x6f, 0x72, 0x63, 0x68, 0x65, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x2e, 0x41, 0x72, 0x65, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x52, 0x65, 0x61, 0x64, - 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x42, 0x32, 0x5a, 0x30, 0x6e, 0x61, 0x6d, - 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x6c, 0x61, 0x62, 0x73, 0x2e, 0x64, 0x65, 0x76, 0x2f, 0x66, - 0x6f, 0x75, 0x6e, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x6f, 0x72, 0x63, 0x68, 0x65, 0x73, - 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x33, -} - -var ( - file_orchestration_proto_service_proto_rawDescOnce sync.Once - file_orchestration_proto_service_proto_rawDescData = file_orchestration_proto_service_proto_rawDesc -) - -func file_orchestration_proto_service_proto_rawDescGZIP() []byte { - file_orchestration_proto_service_proto_rawDescOnce.Do(func() { - file_orchestration_proto_service_proto_rawDescData = protoimpl.X.CompressGZIP(file_orchestration_proto_service_proto_rawDescData) - }) - return file_orchestration_proto_service_proto_rawDescData -} - -var file_orchestration_proto_service_proto_msgTypes = make([]protoimpl.MessageInfo, 2) -var file_orchestration_proto_service_proto_goTypes = []interface{}{ - (*AreServicesReadyRequest)(nil), // 0: nsl.orchestration.AreServicesReadyRequest - (*AreServicesReadyResponse)(nil), // 1: nsl.orchestration.AreServicesReadyResponse - (*runtime.Deployable)(nil), // 2: foundation.schema.runtime.Deployable -} -var file_orchestration_proto_service_proto_depIdxs = []int32{ - 2, // 0: nsl.orchestration.AreServicesReadyRequest.deployable:type_name -> foundation.schema.runtime.Deployable - 0, // 1: nsl.orchestration.OrchestrationService.AreServicesReady:input_type -> nsl.orchestration.AreServicesReadyRequest - 1, // 2: nsl.orchestration.OrchestrationService.AreServicesReady:output_type -> nsl.orchestration.AreServicesReadyResponse - 2, // [2:3] is the sub-list for method output_type - 1, // [1:2] is the sub-list for method input_type - 1, // [1:1] is the sub-list for extension type_name - 1, // [1:1] is the sub-list for extension extendee - 0, // [0:1] is the sub-list for field type_name -} - -func init() { file_orchestration_proto_service_proto_init() } -func file_orchestration_proto_service_proto_init() { - if File_orchestration_proto_service_proto != nil { - return - } - if !protoimpl.UnsafeEnabled { - file_orchestration_proto_service_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*AreServicesReadyRequest); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_orchestration_proto_service_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*AreServicesReadyResponse); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } - type x struct{} - out := protoimpl.TypeBuilder{ - File: protoimpl.DescBuilder{ - GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: file_orchestration_proto_service_proto_rawDesc, - NumEnums: 0, - NumMessages: 2, - NumExtensions: 0, - NumServices: 1, - }, - GoTypes: file_orchestration_proto_service_proto_goTypes, - DependencyIndexes: file_orchestration_proto_service_proto_depIdxs, - MessageInfos: file_orchestration_proto_service_proto_msgTypes, - }.Build() - File_orchestration_proto_service_proto = out.File - file_orchestration_proto_service_proto_rawDesc = nil - file_orchestration_proto_service_proto_goTypes = nil - file_orchestration_proto_service_proto_depIdxs = nil -} diff --git a/orchestration/proto/service.proto b/orchestration/proto/service.proto deleted file mode 100644 index a9042cbdc..000000000 --- a/orchestration/proto/service.proto +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2022 Namespace Labs Inc; All rights reserved. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. - -syntax = "proto3"; - -package nsl.orchestration; - -option go_package = "namespacelabs.dev/foundation/orchestration/proto"; - -import "schema/runtime/deployable.proto"; - - -message AreServicesReadyRequest { - foundation.schema.runtime.Deployable deployable = 1; - string namespace = 2; -} - -message AreServicesReadyResponse { - bool ready = 1; - string message = 2; -} - -service OrchestrationService { - rpc AreServicesReady(AreServicesReadyRequest) returns (AreServicesReadyResponse); -} \ No newline at end of file diff --git a/orchestration/proto/service_grpc.pb.go b/orchestration/proto/service_grpc.pb.go deleted file mode 100644 index 5c60b48fc..000000000 --- a/orchestration/proto/service_grpc.pb.go +++ /dev/null @@ -1,103 +0,0 @@ -// Code generated by protoc-gen-go-grpc. DO NOT EDIT. -// versions: -// - protoc-gen-go-grpc v1.2.0 -// - protoc (unknown) -// source: orchestration/proto/service.proto - -package proto - -import ( - context "context" - grpc "google.golang.org/grpc" - codes "google.golang.org/grpc/codes" - status "google.golang.org/grpc/status" -) - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the grpc package it is being compiled against. -// Requires gRPC-Go v1.32.0 or later. -const _ = grpc.SupportPackageIsVersion7 - -// OrchestrationServiceClient is the client API for OrchestrationService service. -// -// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. -type OrchestrationServiceClient interface { - AreServicesReady(ctx context.Context, in *AreServicesReadyRequest, opts ...grpc.CallOption) (*AreServicesReadyResponse, error) -} - -type orchestrationServiceClient struct { - cc grpc.ClientConnInterface -} - -func NewOrchestrationServiceClient(cc grpc.ClientConnInterface) OrchestrationServiceClient { - return &orchestrationServiceClient{cc} -} - -func (c *orchestrationServiceClient) AreServicesReady(ctx context.Context, in *AreServicesReadyRequest, opts ...grpc.CallOption) (*AreServicesReadyResponse, error) { - out := new(AreServicesReadyResponse) - err := c.cc.Invoke(ctx, "/nsl.orchestration.OrchestrationService/AreServicesReady", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -// OrchestrationServiceServer is the server API for OrchestrationService service. -// All implementations should embed UnimplementedOrchestrationServiceServer -// for forward compatibility -type OrchestrationServiceServer interface { - AreServicesReady(context.Context, *AreServicesReadyRequest) (*AreServicesReadyResponse, error) -} - -// UnimplementedOrchestrationServiceServer should be embedded to have forward compatible implementations. -type UnimplementedOrchestrationServiceServer struct { -} - -func (UnimplementedOrchestrationServiceServer) AreServicesReady(context.Context, *AreServicesReadyRequest) (*AreServicesReadyResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method AreServicesReady not implemented") -} - -// UnsafeOrchestrationServiceServer may be embedded to opt out of forward compatibility for this service. -// Use of this interface is not recommended, as added methods to OrchestrationServiceServer will -// result in compilation errors. -type UnsafeOrchestrationServiceServer interface { - mustEmbedUnimplementedOrchestrationServiceServer() -} - -func RegisterOrchestrationServiceServer(s grpc.ServiceRegistrar, srv OrchestrationServiceServer) { - s.RegisterService(&OrchestrationService_ServiceDesc, srv) -} - -func _OrchestrationService_AreServicesReady_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(AreServicesReadyRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(OrchestrationServiceServer).AreServicesReady(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/nsl.orchestration.OrchestrationService/AreServicesReady", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(OrchestrationServiceServer).AreServicesReady(ctx, req.(*AreServicesReadyRequest)) - } - return interceptor(ctx, in, info, handler) -} - -// OrchestrationService_ServiceDesc is the grpc.ServiceDesc for OrchestrationService service. -// It's only intended for direct use with grpc.RegisterService, -// and not to be introspected or modified (even as a copy) -var OrchestrationService_ServiceDesc = grpc.ServiceDesc{ - ServiceName: "nsl.orchestration.OrchestrationService", - HandlerType: (*OrchestrationServiceServer)(nil), - Methods: []grpc.MethodDesc{ - { - MethodName: "AreServicesReady", - Handler: _OrchestrationService_AreServicesReady_Handler, - }, - }, - Streams: []grpc.StreamDesc{}, - Metadata: "orchestration/proto/service.proto", -} diff --git a/orchestration/server/deps.fn.go b/orchestration/server/deps.fn.go index 415b297f0..d7fafa1f8 100644 --- a/orchestration/server/deps.fn.go +++ b/orchestration/server/deps.fn.go @@ -7,7 +7,6 @@ import ( "context" "namespacelabs.dev/foundation/orchestration/controllers" "namespacelabs.dev/foundation/orchestration/legacycontroller" - "namespacelabs.dev/foundation/orchestration/service" "namespacelabs.dev/foundation/std/go/core" "namespacelabs.dev/foundation/std/go/grpc/metrics" "namespacelabs.dev/foundation/std/go/server" @@ -30,12 +29,5 @@ func WireServices(ctx context.Context, srv server.Server, depgraph core.Dependen errs = append(errs, err) } - if err := depgraph.Instantiate(ctx, service.Provider__v9aee7, func(ctx context.Context, v interface{}) error { - service.WireService(ctx, srv.Scope(service.Package__v9aee7), v.(service.ServiceDeps)) - return nil - }); err != nil { - errs = append(errs, err) - } - return errs } diff --git a/orchestration/server/server.cue b/orchestration/server/server.cue index 4aa6c3af4..2c673d0e9 100644 --- a/orchestration/server/server.cue +++ b/orchestration/server/server.cue @@ -9,7 +9,6 @@ server: fn.#Server & { import: [ "namespacelabs.dev/foundation/orchestration/controllers", - "namespacelabs.dev/foundation/orchestration/service", "namespacelabs.dev/foundation/orchestration/legacycontroller", // TODO remove "namespacelabs.dev/foundation/std/grpc/logging", ] diff --git a/orchestration/service/deps.fn.go b/orchestration/service/deps.fn.go deleted file mode 100644 index abe210338..000000000 --- a/orchestration/service/deps.fn.go +++ /dev/null @@ -1,36 +0,0 @@ -// This file was automatically generated by Namespace. -// DO NOT EDIT. To update, re-run `ns generate`. - -package service - -import ( - "context" - "namespacelabs.dev/foundation/std/go/core" - "namespacelabs.dev/foundation/std/go/server" -) - -// Dependencies that are instantiated once for the lifetime of the service. -type ServiceDeps struct { -} - -// Verify that WireService is present and has the appropriate type. -type checkWireService func(context.Context, server.Registrar, ServiceDeps) - -var _ checkWireService = WireService - -var ( - Package__v9aee7 = &core.Package{ - PackageName: "namespacelabs.dev/foundation/orchestration/service", - } - - Provider__v9aee7 = core.Provider{ - Package: Package__v9aee7, - Instantiate: makeDeps__v9aee7, - } -) - -func makeDeps__v9aee7(ctx context.Context, di core.Dependencies) (_ interface{}, err error) { - var deps ServiceDeps - - return deps, nil -} diff --git a/orchestration/service/service.cue b/orchestration/service/service.cue deleted file mode 100644 index 046087d76..000000000 --- a/orchestration/service/service.cue +++ /dev/null @@ -1,31 +0,0 @@ -import ( - "namespacelabs.dev/foundation/std/fn" - "namespacelabs.dev/foundation/std/fn:inputs" -) - -$proto: inputs.#Proto & { - source: "../proto/service.proto" -} - -service: fn.#Service & { - framework: "GO" - exportService: $proto.services.OrchestrationService - - mounts: { - "/namespace/orchestration/data": ephemeral: { - size: "1GiB" // TODO consider raising this if we use orchestrator for deploying - } - "/namespace/orchestration/home": ephemeral: { - size: "1GiB" - } - } -} - -configure: fn.#Configure & { - startup: { - env: { - "NSDATA": "/namespace/orchestration/data" - "HOME": "/namespace/orchestration/home" - } - } -} diff --git a/orchestration/service/wire.go b/orchestration/service/wire.go deleted file mode 100644 index eb0d4762a..000000000 --- a/orchestration/service/wire.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2022 Namespace Labs Inc; All rights reserved. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. - -package service - -import ( - "context" - "fmt" - - k8s "k8s.io/client-go/kubernetes" - "k8s.io/client-go/rest" - "namespacelabs.dev/foundation/internal/console" - "namespacelabs.dev/foundation/internal/networking/ingress" - "namespacelabs.dev/foundation/internal/networking/ingress/nginx" - "namespacelabs.dev/foundation/internal/planning/deploy" - "namespacelabs.dev/foundation/internal/providers/gcp/gke" - "namespacelabs.dev/foundation/internal/runtime/kubernetes" - "namespacelabs.dev/foundation/internal/runtime/kubernetes/kubeops" - "namespacelabs.dev/foundation/orchestration/proto" - "namespacelabs.dev/foundation/std/go/server" - "namespacelabs.dev/foundation/std/tasks" - "namespacelabs.dev/foundation/universe/aws/iam" -) - -type Service struct { -} - -func (svc *Service) AreServicesReady(ctx context.Context, req *proto.AreServicesReadyRequest) (*proto.AreServicesReadyResponse, error) { - config, err := rest.InClusterConfig() - if err != nil { - return nil, fmt.Errorf("failed to create incluster config: %w", err) - } - clientset, err := k8s.NewForConfig(config) - if err != nil { - return nil, fmt.Errorf("failed to create incluster clientset: %w", err) - } - - res, err := kubernetes.AreServicesReady(ctx, clientset, req.Namespace, req.Deployable) - if err != nil { - return nil, err - } - - return &proto.AreServicesReadyResponse{ - Ready: res.Ready, - Message: res.Message, - }, nil -} - -func WireService(ctx context.Context, srv server.Registrar, deps ServiceDeps) { - proto.RegisterOrchestrationServiceServer(srv, &Service{}) - - kubernetes.Register() - kubeops.Register() - iam.RegisterGraphHandlers() - deploy.RegisterDeployOps() - ingress.RegisterIngressClass(nginx.IngressClass()) - gke.RegisterIngressClass() - - // Always log actions, we filter if we show them on the client. - tasks.LogActions = true - // Always log debug to console, we redirect the log on the client. - console.DebugToConsole = true -} From 641f58419e0cdde6172bf73c0ad195958ca14558 Mon Sep 17 00:00:00 2001 From: Hugo Santos Date: Sun, 12 Oct 2025 23:24:48 +0200 Subject: [PATCH 2/2] Fix service readiness timeout and use nc native timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Increase retry timeout from 30s to 2 minutes (240 attempts) - Replace 'timeout 0.1 nc' with 'nc -z -w 1' (nc native timeout) - The timeout command may not be available in busybox - Longer timeout needed for CI environments where services take longer to start 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- internal/runtime/kubernetes/readiness.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/internal/runtime/kubernetes/readiness.go b/internal/runtime/kubernetes/readiness.go index d1990366b..c1361d370 100644 --- a/internal/runtime/kubernetes/readiness.go +++ b/internal/runtime/kubernetes/readiness.go @@ -53,20 +53,20 @@ func AreServicesReady(ctx context.Context, cluster *Cluster, namespace string, s continue } - // Try to connect every 500ms for up to 30 seconds (60 attempts) + // Try to connect every 500ms for up to 2 minutes (240 attempts) // Use nc (netcat) for TCP connectivity test - available in busybox serviceDNS := fmt.Sprintf("%s.%s.svc.cluster.local", s.Name, s.Namespace) script += fmt.Sprintf(` echo "Checking %s:%d..." i=0 -while [ $i -lt 60 ]; do - if timeout 0.1 nc -z %s %d 2>/dev/null; then +while [ $i -lt 240 ]; do + if nc -z -w 1 %s %d 2>/dev/null; then echo " ✓ %s:%d is ready" break fi i=$((i + 1)) - if [ $i -eq 60 ]; then - echo " ✗ %s:%d failed after 30s" + if [ $i -eq 240 ]; then + echo " ✗ %s:%d failed after 2 minutes" exit 1 fi sleep 0.5 @@ -161,8 +161,8 @@ func CheckServiceConnectivity(ctx context.Context, cluster *Cluster, namespace, script := fmt.Sprintf(`#!/bin/sh i=0 -while [ $i -lt 60 ]; do - if timeout 0.1 nc -z %s %d 2>/dev/null; then +while [ $i -lt 240 ]; do + if nc -z -w 1 %s %d 2>/dev/null; then exit 0 fi i=$((i + 1))