diff --git a/PROJECT b/PROJECT index 600b78a4d..5db972e16 100644 --- a/PROJECT +++ b/PROJECT @@ -15,15 +15,6 @@ resources: domain: emqx.io group: apps kind: EMQX - path: github.com/emqx/emqx-operator/api/v2 - version: v2 -- api: - crdVersion: v1 - namespaced: true - controller: true - domain: emqx.io - group: apps - kind: Rebalance - path: github.com/emqx/emqx-operator/api/v2beta1 - version: v2beta1 + path: github.com/emqx/emqx-operator/api/v3alpha1 + version: v3alpha1 version: "3" diff --git a/README.md b/README.md index 192006dae..640bfafa5 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ This operator is compatible with the following EMQX releases: - EMQX 5.10 - EMQX 6.x +Requires Kubernetes >= 1.27. PVC auto-cleanup for core nodes (on scale-down and CR deletion) relies on the `StatefulSetAutoDeletePVC` feature gate, which is stable and enabled by default since Kubernetes 1.32. + ## Installation Here's the simplest way to install the operator. @@ -74,8 +76,8 @@ kubectl logs -l "control-plane=controller-manager" --tail=-1 --namespace emqx-op ### Prerequisites - go version v1.22.0+ - docker version 17.03+. -- kubectl version v1.24+. -- Access to a Kubernetes v1.24+ cluster. +- kubectl version v1.27+. +- Access to a Kubernetes v1.27+ cluster. ### To Deploy on the cluster **Build and push your image to the location specified by `OPERATOR_IMAGE`:** diff --git a/api/v2beta1/groupversion_info.go b/api/v2beta1/groupversion_info.go index 1d891045b..bbaf61c7d 100644 --- a/api/v2beta1/groupversion_info.go +++ b/api/v2beta1/groupversion_info.go @@ -14,7 +14,12 @@ See the License for the specific language governing permissions and limitations under the License. */ -// package v2beta1 contains API Schema definitions for the apps v2beta1 API group. +// Package v2beta1 contains Rebalance API Schema definitions for the apps.emqx.io v2beta1 API group. +// +// NOTE: The Rebalance CRD is currently NOT FUNCTIONAL. +// Type definitions are retained for future use but the CRD is not registered +// with the scheme, not installed in the cluster, and no controller reconciles it. +// // +kubebuilder:object:generate=true // +groupName=apps.emqx.io package v2beta1 diff --git a/api/v2beta1/rebalance_types.go b/api/v2beta1/rebalance_types.go index 3efb164cf..e9fe9d5d6 100644 --- a/api/v2beta1/rebalance_types.go +++ b/api/v2beta1/rebalance_types.go @@ -16,6 +16,14 @@ limitations under the License. package v2beta1 +// NOTE: The Rebalance CRD is currently NOT FUNCTIONAL. +// The type definitions are retained for future use but the CRD is not registered +// with the scheme and no controller reconciles it. Do not create Rebalance custom +// resources — they will be ignored by the operator. +// +// To re-enable, uncomment the init() function at the bottom of this file and +// wire up a corresponding controller in cmd/main.go. + import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -44,6 +52,7 @@ type RebalanceList struct { Items []Rebalance `json:"items"` } -func init() { - SchemeBuilder.Register(&Rebalance{}, &RebalanceList{}) -} +// NOT FUNCTIONAL — Rebalance CRD is disabled. Uncomment to re-enable. +// func init() { +// SchemeBuilder.Register(&Rebalance{}, &RebalanceList{}) +// } diff --git a/api/v2/const.go b/api/v3alpha1/const.go similarity index 87% rename from api/v2/const.go rename to api/v3alpha1/const.go index b3d479b25..547c6098d 100644 --- a/api/v2/const.go +++ b/api/v3alpha1/const.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package v2 +package v3alpha1 import corev1 "k8s.io/api/core/v1" @@ -36,6 +36,4 @@ const ( const ( // Whether the pod is responsible for DS replication DSReplicationSite corev1.PodConditionType = "apps.emqx.io/ds-replication-site" - // https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-readiness-gate - PodOnServing corev1.PodConditionType = "apps.emqx.io/on-serving" ) diff --git a/api/v2/emqx_types.go b/api/v3alpha1/emqx_types.go similarity index 98% rename from api/v2/emqx_types.go rename to api/v3alpha1/emqx_types.go index 60dd725c9..b3ec451c3 100644 --- a/api/v2/emqx_types.go +++ b/api/v3alpha1/emqx_types.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package v2 +package v3alpha1 import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" diff --git a/api/v2/emqx_types_spec.go b/api/v3alpha1/emqx_types_spec.go similarity index 90% rename from api/v2/emqx_types_spec.go rename to api/v3alpha1/emqx_types_spec.go index 664d05284..743d56474 100644 --- a/api/v2/emqx_types_spec.go +++ b/api/v3alpha1/emqx_types_spec.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package v2 +package v3alpha1 import ( corev1 "k8s.io/api/core/v1" @@ -23,6 +23,7 @@ import ( ) // EMQXSpec defines the desired state of EMQX. +// +kubebuilder:validation:XValidation:rule="!has(self.replicantTemplate) || !has(self.replicantTemplate.spec.replicas) || self.replicantTemplate.spec.replicas == 0 || self.coreTemplate.spec.replicas >= 2",message="Core-replicant clusters require at least 2 core replicas for rolling updates." type EMQXSpec struct { // EMQX container image. // More info: https://kubernetes.io/docs/concepts/containers/images @@ -58,11 +59,11 @@ type EMQXSpec struct { RevisionHistoryLimit int32 `json:"revisionHistoryLimit,omitempty"` // Cluster upgrade strategy settings. - // +kubebuilder:default={type:Recreate} + // +kubebuilder:default={type:RollingUpdate} UpdateStrategy UpdateStrategy `json:"updateStrategy,omitempty"` // Template for Pods running EMQX core nodes. - // +kubebuilder:default={spec:{replicas:2}} + // +kubebuilder:default={spec:{replicas:2,persistentVolumeClaimSpec:{accessModes:{"ReadWriteOnce"},resources:{requests:{storage:"500Mi"}}}}} CoreTemplate EMQXCoreTemplate `json:"coreTemplate,omitempty"` // Template for Pods running EMQX replicant nodes. @@ -118,14 +119,10 @@ type Config struct { type UpdateStrategy struct { // Determines how cluster upgrade is performed. - // * `Recreate`: Perform blue-green upgrade. - // +kubebuilder:validation:Enum=Recreate - // +kubebuilder:default=Recreate + // * `RollingUpdate`: Perform a rolling upgrade, updating pods one at a time. + // +kubebuilder:validation:Enum=RollingUpdate + // +kubebuilder:default=RollingUpdate Type string `json:"type,omitempty"` - // Number of seconds before connection evacuation starts. - // +kubebuilder:validation:Minimum=0 - // +kubebuilder:default=10 - InitialDelaySeconds int32 `json:"initialDelaySeconds,omitempty"` // Evacuation strategy settings. EvacuationStrategy EvacuationStrategy `json:"evacuationStrategy,omitempty"` } @@ -177,8 +174,7 @@ type EMQXCoreTemplateSpec struct { EMQXReplicantTemplateSpec `json:",inline"` // PVC specification for a core node data storage. - // Note: this field named inconsistently, it is actually just a `PersistentVolumeClaimSpec`. - VolumeClaimTemplates corev1.PersistentVolumeClaimSpec `json:"volumeClaimTemplates,omitempty"` + PersistentVolumeClaimSpec corev1.PersistentVolumeClaimSpec `json:"persistentVolumeClaimSpec,omitempty"` } type EMQXReplicantTemplateSpec struct { @@ -215,6 +211,12 @@ type EMQXReplicantTemplateSpec struct { // by specifying 0. This is a mutually exclusive setting with "minAvailable". // +kubebuilder:validation:XIntOrString MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"` + // MinReadySeconds is the minimum time (seconds) a pod must be Ready before it counts as available. + // For core nodes this is applied to the StatefulSet (mirrors apps/v1 StatefulSetSpec.minReadySeconds); + // for replicants, to the ReplicaSet (mirrors apps/v1 ReplicaSetSpec.minReadySeconds). + // Omitted or zero matches the apps/v1 default (0). + // +kubebuilder:validation:Minimum=0 + MinReadySeconds int32 `json:"minReadySeconds,omitempty"` // Entrypoint array. Not executed within a shell. // The container image's ENTRYPOINT is used if this is not provided. @@ -285,8 +287,10 @@ type EMQXReplicantTemplateSpec struct { LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty"` // Periodic probe of container service readiness. // Container will be removed from service endpoints if the probe fails. + // Strongly advised to keep the current default: it takes into account ongoing node evacuations managed + // by the Operator as part of scaling operations and rolling updates. // More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - // +kubebuilder:default={initialDelaySeconds:10,periodSeconds:5,failureThreshold:12,httpGet: {path:/status, port:"dashboard"}} + // +kubebuilder:default={initialDelaySeconds:10,periodSeconds:5,timeoutSeconds:3,failureThreshold:1,httpGet:{path:"/api/v5/load_rebalance/availability_check", port:"dashboard"}} ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty"` // StartupProbe indicates that the Pod has successfully initialized. // If specified, no other probes are executed until this completes successfully. @@ -318,3 +322,17 @@ func (spec *EMQXSpec) HasReplicants() bool { func (s *ServiceTemplate) IsEnabled() bool { return s.Enabled != nil && *s.Enabled } + +func (spec *EMQXSpec) NumCoreReplicas() int32 { + if spec.CoreTemplate.Spec.Replicas != nil { + return *spec.CoreTemplate.Spec.Replicas + } + return 1 +} + +func (spec *EMQXSpec) NumReplicantReplicas() int32 { + if spec.ReplicantTemplate != nil && spec.ReplicantTemplate.Spec.Replicas != nil { + return *spec.ReplicantTemplate.Spec.Replicas + } + return 0 +} diff --git a/api/v2/emqx_types_status.go b/api/v3alpha1/emqx_types_status.go similarity index 72% rename from api/v2/emqx_types_status.go rename to api/v3alpha1/emqx_types_status.go index 318e381d6..5f8419ecb 100644 --- a/api/v2/emqx_types_status.go +++ b/api/v3alpha1/emqx_types_status.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package v2 +package v3alpha1 import ( "slices" @@ -32,15 +32,15 @@ type EMQXStatus struct { // Status of each core node in the cluster. CoreNodes []EMQXNode `json:"coreNodes,omitempty"` // Summary status of the set of core nodes. - CoreNodesStatus EMQXNodesStatus `json:"coreNodesStatus,omitempty"` + CoreNodesStatus CoreNodesStatus `json:"coreNodesStatus,omitempty"` // Status of each replicant node in the cluster. ReplicantNodes []EMQXNode `json:"replicantNodes,omitempty"` // Summary status of the set of replicant nodes. - ReplicantNodesStatus EMQXNodesStatus `json:"replicantNodesStatus,omitempty"` + ReplicantNodesStatus ReplicantNodesStatus `json:"replicantNodesStatus,omitempty"` // Status of active node evacuations in the cluster. - NodeEvacuationsStatus []NodeEvacuationStatus `json:"nodeEvacuationsStatus,omitempty"` + NodeEvacuations []NodeEvacuationStatus `json:"nodeEvacuations,omitempty"` // Status of EMQX Durable Storage replication. DSReplication DSReplicationStatus `json:"dsReplication,omitempty"` } @@ -65,30 +65,40 @@ type NodeEvacuationStatus struct { InitialConnections int32 `json:"initialConnections,omitempty"` } -type EMQXNodesStatus struct { - // Total number of replicas. - Replicas int32 `json:"replicas,omitempty"` +// CoreNodesStatus is the summary status of core nodes managed by a single StatefulSet. +type CoreNodesStatus struct { // Number of ready replicas. - ReadyReplicas int32 `json:"readyReplicas,omitempty"` - // Current revision of the respective core or replicant set. + ReadyReplicas int32 `json:"readyReplicas"` + // Number of replicas already updated to the desired pod template. + UpdatedReplicas int32 `json:"updatedReplicas"` + // Number of replicas still running the previous pod template. + CurrentReplicas int32 `json:"currentReplicas"` +} + +// ReplicantNodesStatus is the summary status of the set of replicant nodes. +// The multi-ReplicaSet pattern requires revision tracking at the CR level. +type ReplicantNodesStatus struct { + // Number of ready replicas. + ReadyReplicas int32 `json:"readyReplicas"` + // Current revision of the replicant set. CurrentRevision string `json:"currentRevision,omitempty"` // Number of replicas running current revision. - CurrentReplicas int32 `json:"currentReplicas,omitempty"` - // Update revision of the respective core or replicant set. + CurrentReplicas int32 `json:"currentReplicas"` + // Update revision of the replicant set. // When different from the current revision, the set is being updated. UpdateRevision string `json:"updateRevision,omitempty"` // Number of replicas running update revision. - UpdateReplicas int32 `json:"updateReplicas,omitempty"` + UpdateReplicas int32 `json:"updateReplicas"` CollisionCount *int32 `json:"collisionCount,omitempty"` } type EMQXNode struct { // Node name - // +kubebuilder:example="emqx@emqx-core-557c8b7684-0.emqx-headless.default.svc.cluster.local" + // +kubebuilder:example="emqx@emqx-core-0.emqx-headless.default.svc.cluster.local" Name string `json:"name,omitempty"` // Corresponding pod name - // +kubebuilder:example="emqx-core-557c8b7684-0" + // +kubebuilder:example="emqx-core-0" PodName string `json:"podName,omitempty"` // Node status // +kubebuilder:example=running @@ -103,9 +113,9 @@ type EMQXNode struct { // +kubebuilder:example=core Role string `json:"role,omitempty"` // Number of MQTT sessions - Sessions int64 `json:"sessions,omitempty"` + Sessions int64 `json:"sessions"` // Number of connected MQTT clients - Connections int64 `json:"connections,omitempty"` + Connections int64 `json:"connections"` } func (s EMQXStatus) FindNode(node string) *EMQXNode { @@ -162,57 +172,29 @@ type DSDBReplicationStatus struct { } const ( - Initialized string = "Initialized" CoreNodesProgressing string = "CoreNodesProgressing" - CoreNodesReady string = "CoreNodesReady" ReplicantNodesProgressing string = "ReplicantNodesProgressing" - ReplicantNodesReady string = "ReplicantNodesReady" Available string = "Available" Ready string = "Ready" ) -func (s *EMQXStatus) ResetConditions(reason string) { - conditionTypes := []string{} - for _, c := range s.Conditions { - if c.Type != Initialized && c.Status == metav1.ConditionTrue { - conditionTypes = append(conditionTypes, c.Type) - } +func (s *EMQXStatus) SetCondition(ty string, status metav1.ConditionStatus, reason, message string) { + _, existing := s.GetCondition(ty) + if existing != nil && + existing.Status == status && + existing.Reason == reason && + existing.Message == message { + return } - for _, conditionType := range conditionTypes { - s.SetFalseCondition(conditionType, reason) + s.RemoveCondition(ty) + c := metav1.Condition{ + Type: ty, + Status: status, + Reason: reason, + Message: message, + LastTransitionTime: metav1.Now(), } -} - -func (s *EMQXStatus) SetCondition(c metav1.Condition) { - s.RemoveCondition(c.Type) - c.LastTransitionTime = metav1.Now() - s.Conditions = slices.Insert(s.Conditions, 0, c) -} - -func (s *EMQXStatus) SetTrueCondition(conditionType string) { - s.SetCondition(metav1.Condition{ - Type: conditionType, - Status: metav1.ConditionTrue, - Reason: conditionType, - }) -} - -func (s *EMQXStatus) SetFalseCondition(conditionType string, reason string) { - s.SetCondition(metav1.Condition{ - Type: conditionType, - Status: metav1.ConditionFalse, - Reason: reason, - }) -} - -func (s *EMQXStatus) GetLastTrueCondition() *metav1.Condition { - for i := range s.Conditions { - c := s.Conditions[i] - if c.Status == metav1.ConditionTrue { - return &c - } - } - return nil + s.Conditions = append(s.Conditions, c) } func (s *EMQXStatus) GetCondition(conditionType string) (int, *metav1.Condition) { diff --git a/api/v2/groupversion_info.go b/api/v3alpha1/groupversion_info.go similarity index 89% rename from api/v2/groupversion_info.go rename to api/v3alpha1/groupversion_info.go index 65bb49bb6..f8c666b08 100644 --- a/api/v2/groupversion_info.go +++ b/api/v3alpha1/groupversion_info.go @@ -14,10 +14,10 @@ See the License for the specific language governing permissions and limitations under the License. */ -// package v2 contains API Schema definitions for the apps v2 API group. +// Package v3alpha1 contains API Schema definitions for the apps.emqx.io v3alpha1 API group. // +kubebuilder:object:generate=true // +groupName=apps.emqx.io -package v2 +package v3alpha1 import ( "k8s.io/apimachinery/pkg/runtime/schema" @@ -26,7 +26,7 @@ import ( var ( // GroupVersion is group version used to register these objects. - GroupVersion = schema.GroupVersion{Group: "apps.emqx.io", Version: "v2"} + GroupVersion = schema.GroupVersion{Group: "apps.emqx.io", Version: "v3alpha1"} // SchemeBuilder is used to add go types to the GroupVersionKind scheme. SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} diff --git a/api/v2/labels.go b/api/v3alpha1/labels.go similarity index 98% rename from api/v2/labels.go rename to api/v3alpha1/labels.go index 50fc79730..c52ddffe8 100644 --- a/api/v2/labels.go +++ b/api/v3alpha1/labels.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package v2 +package v3alpha1 import ( "maps" diff --git a/api/v2/names.go b/api/v3alpha1/names.go similarity index 99% rename from api/v2/names.go rename to api/v3alpha1/names.go index cdea728cd..bc641297a 100644 --- a/api/v2/names.go +++ b/api/v3alpha1/names.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package v2 +package v3alpha1 import ( "fmt" diff --git a/api/v2/zz_generated.deepcopy.go b/api/v3alpha1/zz_generated.deepcopy.go similarity index 94% rename from api/v2/zz_generated.deepcopy.go rename to api/v3alpha1/zz_generated.deepcopy.go index bb475aec5..b9113708b 100644 --- a/api/v2/zz_generated.deepcopy.go +++ b/api/v3alpha1/zz_generated.deepcopy.go @@ -18,7 +18,7 @@ limitations under the License. // Code generated by controller-gen. DO NOT EDIT. -package v2 +package v3alpha1 import ( "k8s.io/api/core/v1" @@ -62,6 +62,21 @@ func (in *Config) DeepCopy() *Config { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CoreNodesStatus) DeepCopyInto(out *CoreNodesStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CoreNodesStatus. +func (in *CoreNodesStatus) DeepCopy() *CoreNodesStatus { + if in == nil { + return nil + } + out := new(CoreNodesStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DSDBReplicationStatus) DeepCopyInto(out *DSDBReplicationStatus) { *out = *in @@ -145,7 +160,7 @@ func (in *EMQXCoreTemplate) DeepCopy() *EMQXCoreTemplate { func (in *EMQXCoreTemplateSpec) DeepCopyInto(out *EMQXCoreTemplateSpec) { *out = *in in.EMQXReplicantTemplateSpec.DeepCopyInto(&out.EMQXReplicantTemplateSpec) - in.VolumeClaimTemplates.DeepCopyInto(&out.VolumeClaimTemplates) + in.PersistentVolumeClaimSpec.DeepCopyInto(&out.PersistentVolumeClaimSpec) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EMQXCoreTemplateSpec. @@ -205,26 +220,6 @@ func (in *EMQXNode) DeepCopy() *EMQXNode { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *EMQXNodesStatus) DeepCopyInto(out *EMQXNodesStatus) { - *out = *in - if in.CollisionCount != nil { - in, out := &in.CollisionCount, &out.CollisionCount - *out = new(int32) - **out = **in - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EMQXNodesStatus. -func (in *EMQXNodesStatus) DeepCopy() *EMQXNodesStatus { - if in == nil { - return nil - } - out := new(EMQXNodesStatus) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EMQXReplicantTemplate) DeepCopyInto(out *EMQXReplicantTemplate) { *out = *in @@ -446,15 +441,15 @@ func (in *EMQXStatus) DeepCopyInto(out *EMQXStatus) { *out = make([]EMQXNode, len(*in)) copy(*out, *in) } - in.CoreNodesStatus.DeepCopyInto(&out.CoreNodesStatus) + out.CoreNodesStatus = in.CoreNodesStatus if in.ReplicantNodes != nil { in, out := &in.ReplicantNodes, &out.ReplicantNodes *out = make([]EMQXNode, len(*in)) copy(*out, *in) } in.ReplicantNodesStatus.DeepCopyInto(&out.ReplicantNodesStatus) - if in.NodeEvacuationsStatus != nil { - in, out := &in.NodeEvacuationsStatus, &out.NodeEvacuationsStatus + if in.NodeEvacuations != nil { + in, out := &in.NodeEvacuations, &out.NodeEvacuations *out = make([]NodeEvacuationStatus, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) @@ -523,6 +518,26 @@ func (in *NodeEvacuationStatus) DeepCopy() *NodeEvacuationStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ReplicantNodesStatus) DeepCopyInto(out *ReplicantNodesStatus) { + *out = *in + if in.CollisionCount != nil { + in, out := &in.CollisionCount, &out.CollisionCount + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReplicantNodesStatus. +func (in *ReplicantNodesStatus) DeepCopy() *ReplicantNodesStatus { + if in == nil { + return nil + } + out := new(ReplicantNodesStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SecretRef) DeepCopyInto(out *SecretRef) { *out = *in diff --git a/cmd/main.go b/cmd/main.go index 2c70184d0..ee0f919fd 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -37,8 +37,7 @@ import ( metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" - crdv2 "github.com/emqx/emqx-operator/api/v2" - crdv2beta1 "github.com/emqx/emqx-operator/api/v2beta1" + crd "github.com/emqx/emqx-operator/api/v3alpha1" "github.com/emqx/emqx-operator/internal/controller" // +kubebuilder:scaffold:imports ) @@ -51,8 +50,7 @@ var ( func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) - utilruntime.Must(crdv2.AddToScheme(scheme)) - utilruntime.Must(crdv2beta1.AddToScheme(scheme)) + utilruntime.Must(crd.AddToScheme(scheme)) // +kubebuilder:scaffold:scheme } @@ -60,7 +58,7 @@ func init() { // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch // +kubebuilder:rbac:groups="",resources=persistentvolumes,verbs=get;list;watch;create;update // +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;create;update;delete -// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;update +// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;update;delete // +kubebuilder:rbac:groups="",resources=pods/status,verbs=patch // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;update // +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update @@ -168,10 +166,7 @@ func main() { os.Exit(1) } - if err = controller.NewRebalanceReconciler(mgr).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "Rebalance") - os.Exit(1) - } + // NOTE: Rebalance controller is disabled in this release. See api/v3alpha1/rebalance_types.go. // +kubebuilder:scaffold:builder if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { diff --git a/config/crd/bases/apps.emqx.io_emqxes.yaml b/config/crd/bases/apps.emqx.io_emqxes.yaml index 27a1a2837..993b3f492 100644 --- a/config/crd/bases/apps.emqx.io_emqxes.yaml +++ b/config/crd/bases/apps.emqx.io_emqxes.yaml @@ -23,7 +23,7 @@ spec: - jsonPath: .metadata.creationTimestamp name: Age type: date - name: v2 + name: v3alpha1 schema: openAPIV3Schema: description: Custom Resource representing an EMQX cluster. @@ -128,6 +128,12 @@ spec: coreTemplate: default: spec: + persistentVolumeClaimSpec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Mi replicas: 2 description: Template for Pods running EMQX core nodes. properties: @@ -6642,6 +6648,15 @@ spec: absence of the evicted pod. So for example you can prevent all voluntary evictions by specifying "100%". x-kubernetes-int-or-string: true + minReadySeconds: + description: |- + MinReadySeconds is the minimum time (seconds) a pod must be Ready before it counts as available. + For core nodes this is applied to the StatefulSet (mirrors apps/v1 StatefulSetSpec.minReadySeconds); + for replicants, to the ReplicaSet (mirrors apps/v1 ReplicaSetSpec.minReadySeconds). + Omitted or zero matches the apps/v1 default (0). + format: int32 + minimum: 0 + type: integer nodeName: description: |- Request to schedule this pod onto a specific node. @@ -6655,6 +6670,203 @@ spec: Must match a node's labels for the pod to be scheduled on that node. More info: https://kubernetes.io/docs/concepts/config/assign-pod-node/ type: object + persistentVolumeClaimSpec: + description: PVC specification for a core node data storage. + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query over volumes to + consider for binding. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference to the + PersistentVolume backing this claim. + type: string + type: object podSecurityContext: default: fsGroup: 1000 @@ -6917,15 +7129,18 @@ spec: type: array readinessProbe: default: - failureThreshold: 12 + failureThreshold: 1 httpGet: - path: /status + path: /api/v5/load_rebalance/availability_check port: dashboard initialDelaySeconds: 10 periodSeconds: 5 + timeoutSeconds: 3 description: |- Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. + Strongly advised to keep the current default: it takes into account ongoing node evacuations managed + by the Operator as part of scaling operations and rolling updates. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes properties: exec: @@ -7524,205 +7739,6 @@ spec: - whenUnsatisfiable type: object type: array - volumeClaimTemplates: - description: |- - PVC specification for a core node data storage. - Note: this field named inconsistently, it is actually just a `PersistentVolumeClaimSpec`. - properties: - accessModes: - description: |- - accessModes contains the desired access modes the volume should have. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 - items: - type: string - type: array - x-kubernetes-list-type: atomic - dataSource: - description: |- - dataSource field can be used to specify either: - * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) - * An existing PVC (PersistentVolumeClaim) - If the provisioner or an external controller can support the specified data source, - it will create a new volume based on the contents of the specified data source. - When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, - and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. - If the namespace is specified, then dataSourceRef will not be copied to dataSource. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type of resource being referenced - type: string - name: - description: Name is the name of resource being referenced - type: string - required: - - kind - - name - type: object - x-kubernetes-map-type: atomic - dataSourceRef: - description: |- - dataSourceRef specifies the object from which to populate the volume with data, if a non-empty - volume is desired. This may be any object from a non-empty API group (non - core object) or a PersistentVolumeClaim object. - When this field is specified, volume binding will only succeed if the type of - the specified object matches some installed volume populator or dynamic - provisioner. - This field will replace the functionality of the dataSource field and as such - if both fields are non-empty, they must have the same value. For backwards - compatibility, when namespace isn't specified in dataSourceRef, - both fields (dataSource and dataSourceRef) will be set to the same - value automatically if one of them is empty and the other is non-empty. - When namespace is specified in dataSourceRef, - dataSource isn't set to the same value and must be empty. - There are three important differences between dataSource and dataSourceRef: - * While dataSource only allows two specific types of objects, dataSourceRef - allows any non-core object, as well as PersistentVolumeClaim objects. - * While dataSource ignores disallowed values (dropping them), dataSourceRef - preserves all values, and generates an error if a disallowed value is - specified. - * While dataSource only allows local objects, dataSourceRef allows objects - in any namespaces. - (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. - (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type of resource being referenced - type: string - name: - description: Name is the name of resource being referenced - type: string - namespace: - description: |- - Namespace is the namespace of resource being referenced - Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. - (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. - type: string - required: - - kind - - name - type: object - resources: - description: |- - resources represents the minimum resources the volume should have. - If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements - that are lower than previous value but must still be higher than capacity recorded in the - status field of the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources - properties: - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - selector: - description: selector is a label query over volumes to - consider for binding. - properties: - matchExpressions: - description: matchExpressions is a list of label selector - requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - storageClassName: - description: |- - storageClassName is the name of the StorageClass required by the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 - type: string - volumeAttributesClassName: - description: |- - volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. - If specified, the CSI driver will create or update the volume with the attributes defined - in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. - If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be - set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource - exists. - More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). - type: string - volumeMode: - description: |- - volumeMode defines what type of volume is required by the claim. - Value of Filesystem is implied when not included in claim spec. - type: string - volumeName: - description: volumeName is the binding reference to the - PersistentVolume backing this claim. - type: string - type: object type: object x-kubernetes-validations: - message: minAvailable cannot be set when maxUnavailable is specified. @@ -15033,6 +15049,15 @@ spec: absence of the evicted pod. So for example you can prevent all voluntary evictions by specifying "100%". x-kubernetes-int-or-string: true + minReadySeconds: + description: |- + MinReadySeconds is the minimum time (seconds) a pod must be Ready before it counts as available. + For core nodes this is applied to the StatefulSet (mirrors apps/v1 StatefulSetSpec.minReadySeconds); + for replicants, to the ReplicaSet (mirrors apps/v1 ReplicaSetSpec.minReadySeconds). + Omitted or zero matches the apps/v1 default (0). + format: int32 + minimum: 0 + type: integer nodeName: description: |- Request to schedule this pod onto a specific node. @@ -15308,15 +15333,18 @@ spec: type: array readinessProbe: default: - failureThreshold: 12 + failureThreshold: 1 httpGet: - path: /status + path: /api/v5/load_rebalance/availability_check port: dashboard initialDelaySeconds: 10 periodSeconds: 5 + timeoutSeconds: 3 description: |- Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. + Strongly advised to keep the current default: it takes into account ongoing node evacuations managed + by the Operator as part of scaling operations and rolling updates. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes properties: exec: @@ -15936,7 +15964,7 @@ spec: type: string updateStrategy: default: - type: Recreate + type: RollingUpdate description: Cluster upgrade strategy settings. properties: evacuationStrategy: @@ -15975,24 +16003,24 @@ spec: minimum: 0 type: integer type: object - initialDelaySeconds: - default: 10 - description: Number of seconds before connection evacuation starts. - format: int32 - minimum: 0 - type: integer type: - default: Recreate + default: RollingUpdate description: |- Determines how cluster upgrade is performed. - * `Recreate`: Perform blue-green upgrade. + * `RollingUpdate`: Perform a rolling upgrade, updating pods one at a time. enum: - - Recreate + - RollingUpdate type: string type: object required: - image type: object + x-kubernetes-validations: + - message: Core-replicant clusters require at least 2 core replicas for + rolling updates. + rule: '!has(self.replicantTemplate) || !has(self.replicantTemplate.spec.replicas) + || self.replicantTemplate.spec.replicas == 0 || self.coreTemplate.spec.replicas + >= 2' status: description: Current status of the EMQX cluster. properties: @@ -16067,7 +16095,7 @@ spec: type: integer name: description: Node name - example: emqx@emqx-core-557c8b7684-0.emqx-headless.default.svc.cluster.local + example: emqx@emqx-core-0.emqx-headless.default.svc.cluster.local type: string otpRelease: description: Erlang/OTP version node is running on @@ -16075,7 +16103,7 @@ spec: type: string podName: description: Corresponding pod name - example: emqx-core-557c8b7684-0 + example: emqx-core-0 type: string role: description: Node role, either "core" or "replicant" @@ -16093,39 +16121,32 @@ spec: description: EMQX version example: 5.10.1 type: string + required: + - connections + - sessions type: object type: array coreNodesStatus: description: Summary status of the set of core nodes. properties: - collisionCount: - format: int32 - type: integer currentReplicas: - description: Number of replicas running current revision. + description: Number of replicas still running the previous pod + template. format: int32 type: integer - currentRevision: - description: Current revision of the respective core or replicant - set. - type: string readyReplicas: description: Number of ready replicas. format: int32 type: integer - replicas: - description: Total number of replicas. + updatedReplicas: + description: Number of replicas already updated to the desired + pod template. format: int32 type: integer - updateReplicas: - description: Number of replicas running update revision. - format: int32 - type: integer - updateRevision: - description: |- - Update revision of the respective core or replicant set. - When different from the current revision, the set is being updated. - type: string + required: + - currentReplicas + - readyReplicas + - updatedReplicas type: object dsReplication: description: Status of EMQX Durable Storage replication. @@ -16175,7 +16196,7 @@ spec: type: object type: array type: object - nodeEvacuationsStatus: + nodeEvacuations: description: Status of active node evacuations in the cluster. items: properties: @@ -16223,7 +16244,7 @@ spec: type: integer name: description: Node name - example: emqx@emqx-core-557c8b7684-0.emqx-headless.default.svc.cluster.local + example: emqx@emqx-core-0.emqx-headless.default.svc.cluster.local type: string otpRelease: description: Erlang/OTP version node is running on @@ -16231,7 +16252,7 @@ spec: type: string podName: description: Corresponding pod name - example: emqx-core-557c8b7684-0 + example: emqx-core-0 type: string role: description: Node role, either "core" or "replicant" @@ -16249,6 +16270,9 @@ spec: description: EMQX version example: 5.10.1 type: string + required: + - connections + - sessions type: object type: array replicantNodesStatus: @@ -16262,26 +16286,25 @@ spec: format: int32 type: integer currentRevision: - description: Current revision of the respective core or replicant - set. + description: Current revision of the replicant set. type: string readyReplicas: description: Number of ready replicas. format: int32 type: integer - replicas: - description: Total number of replicas. - format: int32 - type: integer updateReplicas: description: Number of replicas running update revision. format: int32 type: integer updateRevision: description: |- - Update revision of the respective core or replicant set. + Update revision of the replicant set. When different from the current revision, the set is being updated. type: string + required: + - currentReplicas + - readyReplicas + - updateReplicas type: object type: object type: object diff --git a/config/crd/bases/apps.emqx.io_rebalances.yaml b/config/crd/bases/apps.emqx.io_rebalances.yaml deleted file mode 100644 index 9d09cc541..000000000 --- a/config/crd/bases/apps.emqx.io_rebalances.yaml +++ /dev/null @@ -1,224 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.18.0 - name: rebalances.apps.emqx.io -spec: - group: apps.emqx.io - names: - kind: Rebalance - listKind: RebalanceList - plural: rebalances - singular: rebalance - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .status.phase - name: Status - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v2beta1 - schema: - openAPIV3Schema: - description: Rebalance is the Schema for the rebalances API. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - properties: - instanceName: - description: InstanceName represents the name of EMQX CR. - type: string - rebalanceStrategy: - description: |- - RebalanceStrategy represents the strategy of EMQX rebalancing - More info: https://docs.emqx.com/en/emqx/v5.10/deploy/cluster/rebalancing.html#rebalancing - properties: - absConnThreshold: - default: 1000 - description: |- - Represents the absolute threshold for checking connection balance. - Same as `abs-conn-threshold` in [EMQX Rebalancing](https://docs.emqx.com/en/emqx/v5.10/deploy/cluster/rebalancing.html#rebalancing). - format: int32 - minimum: 1 - type: integer - absSessThreshold: - default: 1000 - description: |- - Represents the absolute threshold for checking session connection balance. - Same as `abs-sess-threshold` in [EMQX Rebalancing](https://docs.emqx.com/en/emqx/v5.10/deploy/cluster/rebalancing.html#rebalancing) - format: int32 - minimum: 1 - type: integer - connEvictRate: - description: |- - Represents the source node client disconnect rate per second. - Same as `conn-evict-rate` in [EMQX Rebalancing](https://docs.emqx.com/en/emqx/v5.10/deploy/cluster/rebalancing.html#rebalancing). - format: int32 - minimum: 1 - type: integer - relConnThreshold: - default: "1.1" - description: |- - Represents the relative threshold for checkin connection balance. - Same as `rel-conn-threshold` in [EMQX Rebalancing](https://docs.emqx.com/en/emqx/v5.10/deploy/cluster/rebalancing.html#rebalancing). - Using floats is highly [discouraged](https://github.com/kubernetes-sigs/controller-tools/issues/245), defined as a _string_ instead. - Must be greater than 1.0. - pattern: ^[1-9]{1,8}\.[0-9]{1,8}$ - type: string - relSessThreshold: - default: "1.1" - description: |- - Represents the relative threshold for checking session connection balance. - Same as `rel-sess-threshold` in [EMQX Rebalancing](https://docs.emqx.com/en/emqx/v5.10/deploy/cluster/rebalancing.html#rebalancing). - Using floats is highly [discouraged](https://github.com/kubernetes-sigs/controller-tools/issues/245), defined as a _string_ instead. - Must be greater than 1.0. - pattern: ^[1-9]{1,8}\.[0-9]{1,8}$ - type: string - sessEvictRate: - default: 500 - description: |- - Represents the source node session evacuation rate per second. - Same as `sess-evict-rate` in [EMQX Rebalancing](https://docs.emqx.com/en/emqx/v5.10/deploy/cluster/rebalancing.html#rebalancing). - format: int32 - minimum: 1 - type: integer - waitHealthCheck: - default: 60 - description: |- - Represents the time (in seconds) to wait for the LB to remove the source node from the list of active backend nodes. - After the specified waiting time is exceeded, the rebalancing task will start. - Same as `wait-health-check` in [EMQX Rebalancing](https://docs.emqx.com/en/emqx/v5.10/deploy/cluster/rebalancing.html#rebalancing). - format: int32 - minimum: 0 - type: integer - waitTakeover: - default: 60 - description: |- - Represents the time in seconds to wait for a client to - reconnect to take over the session after all connections are disconnected. - Same as `wait-takeover` in [EMQX Rebalancing](https://docs.emqx.com/en/emqx/v5.10/deploy/cluster/rebalancing.html#rebalancing). - format: int32 - minimum: 0 - type: integer - required: - - connEvictRate - type: object - required: - - instanceName - - rebalanceStrategy - type: object - status: - properties: - completedTime: - description: CompletedTime Represents the time when the rebalance - job was completed. - format: date-time - type: string - conditions: - description: |- - The latest available observations of an object's current state. - When Rebalance fails, the condition will have type "Failed" and status false. - When Rebalance is in processing, the condition will have a type "Processing" and status true. - When Rebalance is completed, the condition will have a type "Complete" and status true. - items: - properties: - lastTransitionTime: - description: Last time the condition transitioned from one status - to another. - format: date-time - type: string - lastUpdateTime: - description: The last time this condition was updated. - format: date-time - type: string - message: - description: A human readable message indicating details about - the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: Status of rebalance condition type. one of Processing, - Complete, Failed. - type: string - required: - - status - - type - type: object - type: array - phase: - description: Phase represents the phase of Rebalance. - type: string - rebalanceStates: - items: - description: Rebalance defines the observed Rebalancing state of - EMQX - properties: - connection_eviction_rate: - description: ConnectionEvictionRate represents the node session - evacuation rate per second. - format: int32 - type: integer - coordinator_node: - description: CoordinatorNode represents the node currently undergoing - rebalancing. - type: string - donors: - description: Donors represent the source nodes for rebalancing. - items: - type: string - type: array - node: - description: |- - Which node this rebalancing state belongs to. - See `v2beta1.EMQXNode.Node`. - type: string - recipients: - description: Recipients represent the target node for rebalancing. - items: - type: string - type: array - session_eviction_rate: - description: SessionEvictionRate represents the node session - evacuation rate per second. - format: int32 - type: integer - state: - description: State of the rebalancing. - type: string - type: object - type: array - startedTime: - description: StartedTime Represents the time when rebalance job start. - format: date-time - type: string - type: object - type: object - served: true - storage: true - subresources: - status: {} diff --git a/config/crd/compat/v2beta1_patch.yaml b/config/crd/compat/v2beta1_patch.yaml deleted file mode 100644 index f7c9c37a9..000000000 --- a/config/crd/compat/v2beta1_patch.yaml +++ /dev/null @@ -1,7119 +0,0 @@ ---- -- op: add - path: /spec/versions/- - value: - name: v2beta1 - served: true - storage: false - deprecated: true - subresources: - scale: - specReplicasPath: .spec.replicantTemplate.spec.replicas - statusReplicasPath: .status.replicantNodeReplicas - status: {} - additionalPrinterColumns: - - jsonPath: .status.conditions[?(@.status=="True")].type - name: Status - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - properties: - bootstrapAPIKeys: - items: - properties: - key: - pattern: ^[a-zA-Z\d-_]+$ - type: string - secret: - maxLength: 128 - minLength: 3 - type: string - secretRef: - properties: - key: - properties: - secretKey: - pattern: ^[a-zA-Z\d-_]+$ - type: string - secretName: - type: string - required: - - secretKey - - secretName - type: object - secret: - properties: - secretKey: - pattern: ^[a-zA-Z\d-_]+$ - type: string - secretName: - type: string - required: - - secretKey - - secretName - type: object - required: - - key - - secret - type: object - type: object - type: array - clusterDomain: - default: cluster.local - type: string - config: - properties: - data: - type: string - mode: - default: Merge - enum: - - Merge - - Replace - type: string - type: object - coreTemplate: - default: - spec: - replicas: 1 - properties: - metadata: - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - properties: - affinity: - properties: - nodeAffinity: - properties: - preferredDuringSchedulingIgnoredDuringExecution: - items: - properties: - preference: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchFields: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - type: object - x-kubernetes-map-type: atomic - weight: - format: int32 - type: integer - required: - - preference - - weight - type: object - type: array - requiredDuringSchedulingIgnoredDuringExecution: - properties: - nodeSelectorTerms: - items: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchFields: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - type: object - x-kubernetes-map-type: atomic - type: array - required: - - nodeSelectorTerms - type: object - x-kubernetes-map-type: atomic - type: object - podAffinity: - properties: - preferredDuringSchedulingIgnoredDuringExecution: - items: - properties: - podAffinityTerm: - properties: - labelSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - items: - type: string - type: array - topologyKey: - type: string - required: - - topologyKey - type: object - weight: - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - requiredDuringSchedulingIgnoredDuringExecution: - items: - properties: - labelSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - items: - type: string - type: array - topologyKey: - type: string - required: - - topologyKey - type: object - type: array - type: object - podAntiAffinity: - properties: - preferredDuringSchedulingIgnoredDuringExecution: - items: - properties: - podAffinityTerm: - properties: - labelSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - items: - type: string - type: array - topologyKey: - type: string - required: - - topologyKey - type: object - weight: - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - requiredDuringSchedulingIgnoredDuringExecution: - items: - properties: - labelSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - items: - type: string - type: array - topologyKey: - type: string - required: - - topologyKey - type: object - type: array - type: object - type: object - args: - items: - type: string - type: array - command: - items: - type: string - type: array - containerSecurityContext: - default: - runAsGroup: 1000 - runAsNonRoot: true - runAsUser: 1000 - properties: - allowPrivilegeEscalation: - type: boolean - capabilities: - properties: - add: - items: - type: string - type: array - drop: - items: - type: string - type: array - type: object - privileged: - type: boolean - procMount: - type: string - readOnlyRootFilesystem: - type: boolean - runAsGroup: - format: int64 - type: integer - runAsNonRoot: - type: boolean - runAsUser: - format: int64 - type: integer - seLinuxOptions: - properties: - level: - type: string - role: - type: string - type: - type: string - user: - type: string - type: object - seccompProfile: - properties: - localhostProfile: - type: string - type: - type: string - required: - - type - type: object - windowsOptions: - properties: - gmsaCredentialSpec: - type: string - gmsaCredentialSpecName: - type: string - hostProcess: - type: boolean - runAsUserName: - type: string - type: object - type: object - env: - items: - properties: - name: - type: string - value: - type: string - valueFrom: - properties: - configMapKeyRef: - properties: - key: - type: string - name: - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - properties: - apiVersion: - type: string - fieldPath: - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - properties: - containerName: - type: string - divisor: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - properties: - key: - type: string - name: - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - envFrom: - items: - properties: - configMapRef: - properties: - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - type: string - secretRef: - properties: - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - extraContainers: - items: - properties: - args: - items: - type: string - type: array - command: - items: - type: string - type: array - env: - items: - properties: - name: - type: string - value: - type: string - valueFrom: - properties: - configMapKeyRef: - properties: - key: - type: string - name: - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - properties: - apiVersion: - type: string - fieldPath: - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - properties: - containerName: - type: string - divisor: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - properties: - key: - type: string - name: - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - envFrom: - items: - properties: - configMapRef: - properties: - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - type: string - secretRef: - properties: - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - image: - type: string - imagePullPolicy: - type: string - lifecycle: - properties: - postStart: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - sleep: - properties: - seconds: - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - sleep: - properties: - seconds: - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - name: - type: string - ports: - items: - properties: - containerPort: - format: int32 - type: integer - hostIP: - type: string - hostPort: - format: int32 - type: integer - name: - type: string - protocol: - default: TCP - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - resizePolicy: - items: - properties: - resourceName: - type: string - restartPolicy: - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - type: object - restartPolicy: - type: string - securityContext: - properties: - allowPrivilegeEscalation: - type: boolean - capabilities: - properties: - add: - items: - type: string - type: array - drop: - items: - type: string - type: array - type: object - privileged: - type: boolean - procMount: - type: string - readOnlyRootFilesystem: - type: boolean - runAsGroup: - format: int64 - type: integer - runAsNonRoot: - type: boolean - runAsUser: - format: int64 - type: integer - seLinuxOptions: - properties: - level: - type: string - role: - type: string - type: - type: string - user: - type: string - type: object - seccompProfile: - properties: - localhostProfile: - type: string - type: - type: string - required: - - type - type: object - windowsOptions: - properties: - gmsaCredentialSpec: - type: string - gmsaCredentialSpecName: - type: string - hostProcess: - type: boolean - runAsUserName: - type: string - type: object - type: object - startupProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - stdin: - type: boolean - stdinOnce: - type: boolean - terminationMessagePath: - type: string - terminationMessagePolicy: - type: string - tty: - type: boolean - volumeDevices: - items: - properties: - devicePath: - type: string - name: - type: string - required: - - devicePath - - name - type: object - type: array - volumeMounts: - items: - properties: - mountPath: - type: string - mountPropagation: - type: string - name: - type: string - readOnly: - type: boolean - subPath: - type: string - subPathExpr: - type: string - required: - - mountPath - - name - type: object - type: array - workingDir: - type: string - required: - - name - type: object - type: array - extraVolumeMounts: - items: - properties: - mountPath: - type: string - mountPropagation: - type: string - name: - type: string - readOnly: - type: boolean - subPath: - type: string - subPathExpr: - type: string - required: - - mountPath - - name - type: object - type: array - extraVolumes: - items: - properties: - awsElasticBlockStore: - properties: - fsType: - type: string - partition: - format: int32 - type: integer - readOnly: - type: boolean - volumeID: - type: string - required: - - volumeID - type: object - azureDisk: - properties: - cachingMode: - type: string - diskName: - type: string - diskURI: - type: string - fsType: - type: string - kind: - type: string - readOnly: - type: boolean - required: - - diskName - - diskURI - type: object - azureFile: - properties: - readOnly: - type: boolean - secretName: - type: string - shareName: - type: string - required: - - secretName - - shareName - type: object - cephfs: - properties: - monitors: - items: - type: string - type: array - path: - type: string - readOnly: - type: boolean - secretFile: - type: string - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - user: - type: string - required: - - monitors - type: object - cinder: - properties: - fsType: - type: string - readOnly: - type: boolean - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - volumeID: - type: string - required: - - volumeID - type: object - configMap: - properties: - defaultMode: - format: int32 - type: integer - items: - items: - properties: - key: - type: string - mode: - format: int32 - type: integer - path: - type: string - required: - - key - - path - type: object - type: array - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - csi: - properties: - driver: - type: string - fsType: - type: string - nodePublishSecretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - readOnly: - type: boolean - volumeAttributes: - additionalProperties: - type: string - type: object - required: - - driver - type: object - downwardAPI: - properties: - defaultMode: - format: int32 - type: integer - items: - items: - properties: - fieldRef: - properties: - apiVersion: - type: string - fieldPath: - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - format: int32 - type: integer - path: - type: string - resourceFieldRef: - properties: - containerName: - type: string - divisor: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - type: object - emptyDir: - properties: - medium: - type: string - sizeLimit: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - ephemeral: - properties: - volumeClaimTemplate: - properties: - metadata: - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - properties: - accessModes: - items: - type: string - type: array - dataSource: - properties: - apiGroup: - type: string - kind: - type: string - name: - type: string - required: - - kind - - name - type: object - x-kubernetes-map-type: atomic - dataSourceRef: - properties: - apiGroup: - type: string - kind: - type: string - name: - type: string - namespace: - type: string - required: - - kind - - name - type: object - resources: - properties: - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - type: object - selector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - storageClassName: - type: string - volumeAttributesClassName: - type: string - volumeMode: - type: string - volumeName: - type: string - type: object - required: - - spec - type: object - type: object - fc: - properties: - fsType: - type: string - lun: - format: int32 - type: integer - readOnly: - type: boolean - targetWWNs: - items: - type: string - type: array - wwids: - items: - type: string - type: array - type: object - flexVolume: - properties: - driver: - type: string - fsType: - type: string - options: - additionalProperties: - type: string - type: object - readOnly: - type: boolean - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - required: - - driver - type: object - flocker: - properties: - datasetName: - type: string - datasetUUID: - type: string - type: object - gcePersistentDisk: - properties: - fsType: - type: string - partition: - format: int32 - type: integer - pdName: - type: string - readOnly: - type: boolean - required: - - pdName - type: object - gitRepo: - properties: - directory: - type: string - repository: - type: string - revision: - type: string - required: - - repository - type: object - glusterfs: - properties: - endpoints: - type: string - path: - type: string - readOnly: - type: boolean - required: - - endpoints - - path - type: object - hostPath: - properties: - path: - type: string - type: - type: string - required: - - path - type: object - iscsi: - properties: - chapAuthDiscovery: - type: boolean - chapAuthSession: - type: boolean - fsType: - type: string - initiatorName: - type: string - iqn: - type: string - iscsiInterface: - type: string - lun: - format: int32 - type: integer - portals: - items: - type: string - type: array - readOnly: - type: boolean - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - targetPortal: - type: string - required: - - iqn - - lun - - targetPortal - type: object - name: - type: string - nfs: - properties: - path: - type: string - readOnly: - type: boolean - server: - type: string - required: - - path - - server - type: object - persistentVolumeClaim: - properties: - claimName: - type: string - readOnly: - type: boolean - required: - - claimName - type: object - photonPersistentDisk: - properties: - fsType: - type: string - pdID: - type: string - required: - - pdID - type: object - portworxVolume: - properties: - fsType: - type: string - readOnly: - type: boolean - volumeID: - type: string - required: - - volumeID - type: object - projected: - properties: - defaultMode: - format: int32 - type: integer - sources: - items: - properties: - clusterTrustBundle: - properties: - labelSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - name: - type: string - optional: - type: boolean - path: - type: string - signerName: - type: string - required: - - path - type: object - configMap: - properties: - items: - items: - properties: - key: - type: string - mode: - format: int32 - type: integer - path: - type: string - required: - - key - - path - type: object - type: array - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - downwardAPI: - properties: - items: - items: - properties: - fieldRef: - properties: - apiVersion: - type: string - fieldPath: - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - format: int32 - type: integer - path: - type: string - resourceFieldRef: - properties: - containerName: - type: string - divisor: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - type: object - secret: - properties: - items: - items: - properties: - key: - type: string - mode: - format: int32 - type: integer - path: - type: string - required: - - key - - path - type: object - type: array - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - serviceAccountToken: - properties: - audience: - type: string - expirationSeconds: - format: int64 - type: integer - path: - type: string - required: - - path - type: object - type: object - type: array - type: object - quobyte: - properties: - group: - type: string - readOnly: - type: boolean - registry: - type: string - tenant: - type: string - user: - type: string - volume: - type: string - required: - - registry - - volume - type: object - rbd: - properties: - fsType: - type: string - image: - type: string - keyring: - type: string - monitors: - items: - type: string - type: array - pool: - type: string - readOnly: - type: boolean - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - user: - type: string - required: - - image - - monitors - type: object - scaleIO: - properties: - fsType: - type: string - gateway: - type: string - protectionDomain: - type: string - readOnly: - type: boolean - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - sslEnabled: - type: boolean - storageMode: - type: string - storagePool: - type: string - system: - type: string - volumeName: - type: string - required: - - gateway - - secretRef - - system - type: object - secret: - properties: - defaultMode: - format: int32 - type: integer - items: - items: - properties: - key: - type: string - mode: - format: int32 - type: integer - path: - type: string - required: - - key - - path - type: object - type: array - optional: - type: boolean - secretName: - type: string - type: object - storageos: - properties: - fsType: - type: string - readOnly: - type: boolean - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - volumeName: - type: string - volumeNamespace: - type: string - type: object - vsphereVolume: - properties: - fsType: - type: string - storagePolicyID: - type: string - storagePolicyName: - type: string - volumePath: - type: string - required: - - volumePath - type: object - required: - - name - type: object - type: array - initContainers: - items: - properties: - args: - items: - type: string - type: array - command: - items: - type: string - type: array - env: - items: - properties: - name: - type: string - value: - type: string - valueFrom: - properties: - configMapKeyRef: - properties: - key: - type: string - name: - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - properties: - apiVersion: - type: string - fieldPath: - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - properties: - containerName: - type: string - divisor: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - properties: - key: - type: string - name: - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - envFrom: - items: - properties: - configMapRef: - properties: - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - type: string - secretRef: - properties: - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - image: - type: string - imagePullPolicy: - type: string - lifecycle: - properties: - postStart: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - sleep: - properties: - seconds: - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - sleep: - properties: - seconds: - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - name: - type: string - ports: - items: - properties: - containerPort: - format: int32 - type: integer - hostIP: - type: string - hostPort: - format: int32 - type: integer - name: - type: string - protocol: - default: TCP - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - resizePolicy: - items: - properties: - resourceName: - type: string - restartPolicy: - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - type: object - restartPolicy: - type: string - securityContext: - properties: - allowPrivilegeEscalation: - type: boolean - capabilities: - properties: - add: - items: - type: string - type: array - drop: - items: - type: string - type: array - type: object - privileged: - type: boolean - procMount: - type: string - readOnlyRootFilesystem: - type: boolean - runAsGroup: - format: int64 - type: integer - runAsNonRoot: - type: boolean - runAsUser: - format: int64 - type: integer - seLinuxOptions: - properties: - level: - type: string - role: - type: string - type: - type: string - user: - type: string - type: object - seccompProfile: - properties: - localhostProfile: - type: string - type: - type: string - required: - - type - type: object - windowsOptions: - properties: - gmsaCredentialSpec: - type: string - gmsaCredentialSpecName: - type: string - hostProcess: - type: boolean - runAsUserName: - type: string - type: object - type: object - startupProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - stdin: - type: boolean - stdinOnce: - type: boolean - terminationMessagePath: - type: string - terminationMessagePolicy: - type: string - tty: - type: boolean - volumeDevices: - items: - properties: - devicePath: - type: string - name: - type: string - required: - - devicePath - - name - type: object - type: array - volumeMounts: - items: - properties: - mountPath: - type: string - mountPropagation: - type: string - name: - type: string - readOnly: - type: boolean - subPath: - type: string - subPathExpr: - type: string - required: - - mountPath - - name - type: object - type: array - workingDir: - type: string - required: - - name - type: object - type: array - lifecycle: - properties: - postStart: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - sleep: - properties: - seconds: - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - sleep: - properties: - seconds: - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - default: - failureThreshold: 3 - httpGet: - path: /status - port: dashboard - initialDelaySeconds: 60 - periodSeconds: 30 - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - maxUnavailable: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - minAvailable: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - nodeName: - type: string - nodeSelector: - additionalProperties: - type: string - type: object - podSecurityContext: - default: - fsGroup: 1000 - fsGroupChangePolicy: Always - runAsGroup: 1000 - runAsUser: 1000 - supplementalGroups: - - 1000 - properties: - fsGroup: - format: int64 - type: integer - fsGroupChangePolicy: - type: string - runAsGroup: - format: int64 - type: integer - runAsNonRoot: - type: boolean - runAsUser: - format: int64 - type: integer - seLinuxOptions: - properties: - level: - type: string - role: - type: string - type: - type: string - user: - type: string - type: object - seccompProfile: - properties: - localhostProfile: - type: string - type: - type: string - required: - - type - type: object - supplementalGroups: - items: - format: int64 - type: integer - type: array - sysctls: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - windowsOptions: - properties: - gmsaCredentialSpec: - type: string - gmsaCredentialSpecName: - type: string - hostProcess: - type: boolean - runAsUserName: - type: string - type: object - type: object - ports: - items: - properties: - containerPort: - format: int32 - type: integer - hostIP: - type: string - hostPort: - format: int32 - type: integer - name: - type: string - protocol: - default: TCP - type: string - required: - - containerPort - type: object - type: array - readinessProbe: - default: - failureThreshold: 12 - httpGet: - path: /status - port: dashboard - initialDelaySeconds: 10 - periodSeconds: 5 - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - replicas: - default: 2 - format: int32 - type: integer - resources: - properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - type: object - startupProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - toleRations: - items: - properties: - effect: - type: string - key: - type: string - operator: - type: string - tolerationSeconds: - format: int64 - type: integer - value: - type: string - type: object - type: array - tolerations: - items: - properties: - effect: - type: string - key: - type: string - operator: - type: string - tolerationSeconds: - format: int64 - type: integer - value: - type: string - type: object - type: array - topologySpreadConstraints: - items: - properties: - labelSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - maxSkew: - format: int32 - type: integer - minDomains: - format: int32 - type: integer - nodeAffinityPolicy: - type: string - nodeTaintsPolicy: - type: string - topologyKey: - type: string - whenUnsatisfiable: - type: string - required: - - maxSkew - - topologyKey - - whenUnsatisfiable - type: object - type: array - volumeClaimTemplates: - properties: - accessModes: - items: - type: string - type: array - dataSource: - properties: - apiGroup: - type: string - kind: - type: string - name: - type: string - required: - - kind - - name - type: object - x-kubernetes-map-type: atomic - dataSourceRef: - properties: - apiGroup: - type: string - kind: - type: string - name: - type: string - namespace: - type: string - required: - - kind - - name - type: object - resources: - properties: - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - type: object - selector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - storageClassName: - type: string - volumeAttributesClassName: - type: string - volumeMode: - type: string - volumeName: - type: string - type: object - type: object - x-kubernetes-validations: - - message: minAvailable cannot be set when maxUnavailable is specified. These fields are mutually exclusive in PodDisruptionBudget. - rule: 'has(self.minAvailable) && has(self.maxUnavailable) ? false : true' - type: object - dashboardServiceTemplate: - properties: - enabled: - default: true - type: boolean - metadata: - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - properties: - allocateLoadBalancerNodePorts: - type: boolean - clusterIP: - type: string - clusterIPs: - items: - type: string - type: array - x-kubernetes-list-type: atomic - externalIPs: - items: - type: string - type: array - externalName: - type: string - externalTrafficPolicy: - type: string - healthCheckNodePort: - format: int32 - type: integer - internalTrafficPolicy: - type: string - ipFamilies: - items: - type: string - type: array - x-kubernetes-list-type: atomic - ipFamilyPolicy: - type: string - loadBalancerClass: - type: string - loadBalancerIP: - type: string - loadBalancerSourceRanges: - items: - type: string - type: array - ports: - items: - properties: - appProtocol: - type: string - name: - type: string - nodePort: - format: int32 - type: integer - port: - format: int32 - type: integer - protocol: - default: TCP - type: string - targetPort: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: array - x-kubernetes-list-map-keys: - - port - - protocol - x-kubernetes-list-type: map - publishNotReadyAddresses: - type: boolean - selector: - additionalProperties: - type: string - type: object - x-kubernetes-map-type: atomic - sessionAffinity: - type: string - sessionAffinityConfig: - properties: - clientIP: - properties: - timeoutSeconds: - format: int32 - type: integer - type: object - type: object - type: - type: string - type: object - type: object - image: - type: string - imagePullPolicy: - type: string - imagePullSecrets: - items: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - type: array - listenersServiceTemplate: - properties: - enabled: - default: true - type: boolean - metadata: - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - properties: - allocateLoadBalancerNodePorts: - type: boolean - clusterIP: - type: string - clusterIPs: - items: - type: string - type: array - x-kubernetes-list-type: atomic - externalIPs: - items: - type: string - type: array - externalName: - type: string - externalTrafficPolicy: - type: string - healthCheckNodePort: - format: int32 - type: integer - internalTrafficPolicy: - type: string - ipFamilies: - items: - type: string - type: array - x-kubernetes-list-type: atomic - ipFamilyPolicy: - type: string - loadBalancerClass: - type: string - loadBalancerIP: - type: string - loadBalancerSourceRanges: - items: - type: string - type: array - ports: - items: - properties: - appProtocol: - type: string - name: - type: string - nodePort: - format: int32 - type: integer - port: - format: int32 - type: integer - protocol: - default: TCP - type: string - targetPort: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: array - x-kubernetes-list-map-keys: - - port - - protocol - x-kubernetes-list-type: map - publishNotReadyAddresses: - type: boolean - selector: - additionalProperties: - type: string - type: object - x-kubernetes-map-type: atomic - sessionAffinity: - type: string - sessionAffinityConfig: - properties: - clientIP: - properties: - timeoutSeconds: - format: int32 - type: integer - type: object - type: object - type: - type: string - type: object - type: object - replicantTemplate: - properties: - metadata: - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - properties: - affinity: - properties: - nodeAffinity: - properties: - preferredDuringSchedulingIgnoredDuringExecution: - items: - properties: - preference: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchFields: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - type: object - x-kubernetes-map-type: atomic - weight: - format: int32 - type: integer - required: - - preference - - weight - type: object - type: array - requiredDuringSchedulingIgnoredDuringExecution: - properties: - nodeSelectorTerms: - items: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchFields: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - type: object - x-kubernetes-map-type: atomic - type: array - required: - - nodeSelectorTerms - type: object - x-kubernetes-map-type: atomic - type: object - podAffinity: - properties: - preferredDuringSchedulingIgnoredDuringExecution: - items: - properties: - podAffinityTerm: - properties: - labelSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - items: - type: string - type: array - topologyKey: - type: string - required: - - topologyKey - type: object - weight: - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - requiredDuringSchedulingIgnoredDuringExecution: - items: - properties: - labelSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - items: - type: string - type: array - topologyKey: - type: string - required: - - topologyKey - type: object - type: array - type: object - podAntiAffinity: - properties: - preferredDuringSchedulingIgnoredDuringExecution: - items: - properties: - podAffinityTerm: - properties: - labelSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - items: - type: string - type: array - topologyKey: - type: string - required: - - topologyKey - type: object - weight: - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - requiredDuringSchedulingIgnoredDuringExecution: - items: - properties: - labelSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - items: - type: string - type: array - topologyKey: - type: string - required: - - topologyKey - type: object - type: array - type: object - type: object - args: - items: - type: string - type: array - command: - items: - type: string - type: array - containerSecurityContext: - default: - runAsGroup: 1000 - runAsNonRoot: true - runAsUser: 1000 - properties: - allowPrivilegeEscalation: - type: boolean - capabilities: - properties: - add: - items: - type: string - type: array - drop: - items: - type: string - type: array - type: object - privileged: - type: boolean - procMount: - type: string - readOnlyRootFilesystem: - type: boolean - runAsGroup: - format: int64 - type: integer - runAsNonRoot: - type: boolean - runAsUser: - format: int64 - type: integer - seLinuxOptions: - properties: - level: - type: string - role: - type: string - type: - type: string - user: - type: string - type: object - seccompProfile: - properties: - localhostProfile: - type: string - type: - type: string - required: - - type - type: object - windowsOptions: - properties: - gmsaCredentialSpec: - type: string - gmsaCredentialSpecName: - type: string - hostProcess: - type: boolean - runAsUserName: - type: string - type: object - type: object - env: - items: - properties: - name: - type: string - value: - type: string - valueFrom: - properties: - configMapKeyRef: - properties: - key: - type: string - name: - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - properties: - apiVersion: - type: string - fieldPath: - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - properties: - containerName: - type: string - divisor: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - properties: - key: - type: string - name: - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - envFrom: - items: - properties: - configMapRef: - properties: - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - type: string - secretRef: - properties: - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - extraContainers: - items: - properties: - args: - items: - type: string - type: array - command: - items: - type: string - type: array - env: - items: - properties: - name: - type: string - value: - type: string - valueFrom: - properties: - configMapKeyRef: - properties: - key: - type: string - name: - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - properties: - apiVersion: - type: string - fieldPath: - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - properties: - containerName: - type: string - divisor: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - properties: - key: - type: string - name: - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - envFrom: - items: - properties: - configMapRef: - properties: - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - type: string - secretRef: - properties: - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - image: - type: string - imagePullPolicy: - type: string - lifecycle: - properties: - postStart: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - sleep: - properties: - seconds: - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - sleep: - properties: - seconds: - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - name: - type: string - ports: - items: - properties: - containerPort: - format: int32 - type: integer - hostIP: - type: string - hostPort: - format: int32 - type: integer - name: - type: string - protocol: - default: TCP - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - resizePolicy: - items: - properties: - resourceName: - type: string - restartPolicy: - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - type: object - restartPolicy: - type: string - securityContext: - properties: - allowPrivilegeEscalation: - type: boolean - capabilities: - properties: - add: - items: - type: string - type: array - drop: - items: - type: string - type: array - type: object - privileged: - type: boolean - procMount: - type: string - readOnlyRootFilesystem: - type: boolean - runAsGroup: - format: int64 - type: integer - runAsNonRoot: - type: boolean - runAsUser: - format: int64 - type: integer - seLinuxOptions: - properties: - level: - type: string - role: - type: string - type: - type: string - user: - type: string - type: object - seccompProfile: - properties: - localhostProfile: - type: string - type: - type: string - required: - - type - type: object - windowsOptions: - properties: - gmsaCredentialSpec: - type: string - gmsaCredentialSpecName: - type: string - hostProcess: - type: boolean - runAsUserName: - type: string - type: object - type: object - startupProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - stdin: - type: boolean - stdinOnce: - type: boolean - terminationMessagePath: - type: string - terminationMessagePolicy: - type: string - tty: - type: boolean - volumeDevices: - items: - properties: - devicePath: - type: string - name: - type: string - required: - - devicePath - - name - type: object - type: array - volumeMounts: - items: - properties: - mountPath: - type: string - mountPropagation: - type: string - name: - type: string - readOnly: - type: boolean - subPath: - type: string - subPathExpr: - type: string - required: - - mountPath - - name - type: object - type: array - workingDir: - type: string - required: - - name - type: object - type: array - extraVolumeMounts: - items: - properties: - mountPath: - type: string - mountPropagation: - type: string - name: - type: string - readOnly: - type: boolean - subPath: - type: string - subPathExpr: - type: string - required: - - mountPath - - name - type: object - type: array - extraVolumes: - items: - properties: - awsElasticBlockStore: - properties: - fsType: - type: string - partition: - format: int32 - type: integer - readOnly: - type: boolean - volumeID: - type: string - required: - - volumeID - type: object - azureDisk: - properties: - cachingMode: - type: string - diskName: - type: string - diskURI: - type: string - fsType: - type: string - kind: - type: string - readOnly: - type: boolean - required: - - diskName - - diskURI - type: object - azureFile: - properties: - readOnly: - type: boolean - secretName: - type: string - shareName: - type: string - required: - - secretName - - shareName - type: object - cephfs: - properties: - monitors: - items: - type: string - type: array - path: - type: string - readOnly: - type: boolean - secretFile: - type: string - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - user: - type: string - required: - - monitors - type: object - cinder: - properties: - fsType: - type: string - readOnly: - type: boolean - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - volumeID: - type: string - required: - - volumeID - type: object - configMap: - properties: - defaultMode: - format: int32 - type: integer - items: - items: - properties: - key: - type: string - mode: - format: int32 - type: integer - path: - type: string - required: - - key - - path - type: object - type: array - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - csi: - properties: - driver: - type: string - fsType: - type: string - nodePublishSecretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - readOnly: - type: boolean - volumeAttributes: - additionalProperties: - type: string - type: object - required: - - driver - type: object - downwardAPI: - properties: - defaultMode: - format: int32 - type: integer - items: - items: - properties: - fieldRef: - properties: - apiVersion: - type: string - fieldPath: - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - format: int32 - type: integer - path: - type: string - resourceFieldRef: - properties: - containerName: - type: string - divisor: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - type: object - emptyDir: - properties: - medium: - type: string - sizeLimit: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - ephemeral: - properties: - volumeClaimTemplate: - properties: - metadata: - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - properties: - accessModes: - items: - type: string - type: array - dataSource: - properties: - apiGroup: - type: string - kind: - type: string - name: - type: string - required: - - kind - - name - type: object - x-kubernetes-map-type: atomic - dataSourceRef: - properties: - apiGroup: - type: string - kind: - type: string - name: - type: string - namespace: - type: string - required: - - kind - - name - type: object - resources: - properties: - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - type: object - selector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - storageClassName: - type: string - volumeAttributesClassName: - type: string - volumeMode: - type: string - volumeName: - type: string - type: object - required: - - spec - type: object - type: object - fc: - properties: - fsType: - type: string - lun: - format: int32 - type: integer - readOnly: - type: boolean - targetWWNs: - items: - type: string - type: array - wwids: - items: - type: string - type: array - type: object - flexVolume: - properties: - driver: - type: string - fsType: - type: string - options: - additionalProperties: - type: string - type: object - readOnly: - type: boolean - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - required: - - driver - type: object - flocker: - properties: - datasetName: - type: string - datasetUUID: - type: string - type: object - gcePersistentDisk: - properties: - fsType: - type: string - partition: - format: int32 - type: integer - pdName: - type: string - readOnly: - type: boolean - required: - - pdName - type: object - gitRepo: - properties: - directory: - type: string - repository: - type: string - revision: - type: string - required: - - repository - type: object - glusterfs: - properties: - endpoints: - type: string - path: - type: string - readOnly: - type: boolean - required: - - endpoints - - path - type: object - hostPath: - properties: - path: - type: string - type: - type: string - required: - - path - type: object - iscsi: - properties: - chapAuthDiscovery: - type: boolean - chapAuthSession: - type: boolean - fsType: - type: string - initiatorName: - type: string - iqn: - type: string - iscsiInterface: - type: string - lun: - format: int32 - type: integer - portals: - items: - type: string - type: array - readOnly: - type: boolean - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - targetPortal: - type: string - required: - - iqn - - lun - - targetPortal - type: object - name: - type: string - nfs: - properties: - path: - type: string - readOnly: - type: boolean - server: - type: string - required: - - path - - server - type: object - persistentVolumeClaim: - properties: - claimName: - type: string - readOnly: - type: boolean - required: - - claimName - type: object - photonPersistentDisk: - properties: - fsType: - type: string - pdID: - type: string - required: - - pdID - type: object - portworxVolume: - properties: - fsType: - type: string - readOnly: - type: boolean - volumeID: - type: string - required: - - volumeID - type: object - projected: - properties: - defaultMode: - format: int32 - type: integer - sources: - items: - properties: - clusterTrustBundle: - properties: - labelSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - name: - type: string - optional: - type: boolean - path: - type: string - signerName: - type: string - required: - - path - type: object - configMap: - properties: - items: - items: - properties: - key: - type: string - mode: - format: int32 - type: integer - path: - type: string - required: - - key - - path - type: object - type: array - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - downwardAPI: - properties: - items: - items: - properties: - fieldRef: - properties: - apiVersion: - type: string - fieldPath: - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - format: int32 - type: integer - path: - type: string - resourceFieldRef: - properties: - containerName: - type: string - divisor: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - type: object - secret: - properties: - items: - items: - properties: - key: - type: string - mode: - format: int32 - type: integer - path: - type: string - required: - - key - - path - type: object - type: array - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - serviceAccountToken: - properties: - audience: - type: string - expirationSeconds: - format: int64 - type: integer - path: - type: string - required: - - path - type: object - type: object - type: array - type: object - quobyte: - properties: - group: - type: string - readOnly: - type: boolean - registry: - type: string - tenant: - type: string - user: - type: string - volume: - type: string - required: - - registry - - volume - type: object - rbd: - properties: - fsType: - type: string - image: - type: string - keyring: - type: string - monitors: - items: - type: string - type: array - pool: - type: string - readOnly: - type: boolean - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - user: - type: string - required: - - image - - monitors - type: object - scaleIO: - properties: - fsType: - type: string - gateway: - type: string - protectionDomain: - type: string - readOnly: - type: boolean - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - sslEnabled: - type: boolean - storageMode: - type: string - storagePool: - type: string - system: - type: string - volumeName: - type: string - required: - - gateway - - secretRef - - system - type: object - secret: - properties: - defaultMode: - format: int32 - type: integer - items: - items: - properties: - key: - type: string - mode: - format: int32 - type: integer - path: - type: string - required: - - key - - path - type: object - type: array - optional: - type: boolean - secretName: - type: string - type: object - storageos: - properties: - fsType: - type: string - readOnly: - type: boolean - secretRef: - properties: - name: - type: string - type: object - x-kubernetes-map-type: atomic - volumeName: - type: string - volumeNamespace: - type: string - type: object - vsphereVolume: - properties: - fsType: - type: string - storagePolicyID: - type: string - storagePolicyName: - type: string - volumePath: - type: string - required: - - volumePath - type: object - required: - - name - type: object - type: array - initContainers: - items: - properties: - args: - items: - type: string - type: array - command: - items: - type: string - type: array - env: - items: - properties: - name: - type: string - value: - type: string - valueFrom: - properties: - configMapKeyRef: - properties: - key: - type: string - name: - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - properties: - apiVersion: - type: string - fieldPath: - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - properties: - containerName: - type: string - divisor: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - properties: - key: - type: string - name: - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - envFrom: - items: - properties: - configMapRef: - properties: - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - type: string - secretRef: - properties: - name: - type: string - optional: - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - image: - type: string - imagePullPolicy: - type: string - lifecycle: - properties: - postStart: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - sleep: - properties: - seconds: - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - sleep: - properties: - seconds: - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - name: - type: string - ports: - items: - properties: - containerPort: - format: int32 - type: integer - hostIP: - type: string - hostPort: - format: int32 - type: integer - name: - type: string - protocol: - default: TCP - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - resizePolicy: - items: - properties: - resourceName: - type: string - restartPolicy: - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - type: object - restartPolicy: - type: string - securityContext: - properties: - allowPrivilegeEscalation: - type: boolean - capabilities: - properties: - add: - items: - type: string - type: array - drop: - items: - type: string - type: array - type: object - privileged: - type: boolean - procMount: - type: string - readOnlyRootFilesystem: - type: boolean - runAsGroup: - format: int64 - type: integer - runAsNonRoot: - type: boolean - runAsUser: - format: int64 - type: integer - seLinuxOptions: - properties: - level: - type: string - role: - type: string - type: - type: string - user: - type: string - type: object - seccompProfile: - properties: - localhostProfile: - type: string - type: - type: string - required: - - type - type: object - windowsOptions: - properties: - gmsaCredentialSpec: - type: string - gmsaCredentialSpecName: - type: string - hostProcess: - type: boolean - runAsUserName: - type: string - type: object - type: object - startupProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - stdin: - type: boolean - stdinOnce: - type: boolean - terminationMessagePath: - type: string - terminationMessagePolicy: - type: string - tty: - type: boolean - volumeDevices: - items: - properties: - devicePath: - type: string - name: - type: string - required: - - devicePath - - name - type: object - type: array - volumeMounts: - items: - properties: - mountPath: - type: string - mountPropagation: - type: string - name: - type: string - readOnly: - type: boolean - subPath: - type: string - subPathExpr: - type: string - required: - - mountPath - - name - type: object - type: array - workingDir: - type: string - required: - - name - type: object - type: array - lifecycle: - properties: - postStart: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - sleep: - properties: - seconds: - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - sleep: - properties: - seconds: - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - default: - failureThreshold: 3 - httpGet: - path: /status - port: dashboard - initialDelaySeconds: 60 - periodSeconds: 30 - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - maxUnavailable: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - minAvailable: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - nodeName: - type: string - nodeSelector: - additionalProperties: - type: string - type: object - podSecurityContext: - default: - fsGroup: 1000 - fsGroupChangePolicy: Always - runAsGroup: 1000 - runAsUser: 1000 - supplementalGroups: - - 1000 - properties: - fsGroup: - format: int64 - type: integer - fsGroupChangePolicy: - type: string - runAsGroup: - format: int64 - type: integer - runAsNonRoot: - type: boolean - runAsUser: - format: int64 - type: integer - seLinuxOptions: - properties: - level: - type: string - role: - type: string - type: - type: string - user: - type: string - type: object - seccompProfile: - properties: - localhostProfile: - type: string - type: - type: string - required: - - type - type: object - supplementalGroups: - items: - format: int64 - type: integer - type: array - sysctls: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - windowsOptions: - properties: - gmsaCredentialSpec: - type: string - gmsaCredentialSpecName: - type: string - hostProcess: - type: boolean - runAsUserName: - type: string - type: object - type: object - ports: - items: - properties: - containerPort: - format: int32 - type: integer - hostIP: - type: string - hostPort: - format: int32 - type: integer - name: - type: string - protocol: - default: TCP - type: string - required: - - containerPort - type: object - type: array - readinessProbe: - default: - failureThreshold: 12 - httpGet: - path: /status - port: dashboard - initialDelaySeconds: 10 - periodSeconds: 5 - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - replicas: - default: 2 - format: int32 - type: integer - resources: - properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - type: object - startupProbe: - properties: - exec: - properties: - command: - items: - type: string - type: array - type: object - failureThreshold: - format: int32 - type: integer - grpc: - properties: - port: - format: int32 - type: integer - service: - default: "" - type: string - required: - - port - type: object - httpGet: - properties: - host: - type: string - httpHeaders: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - path: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - scheme: - type: string - required: - - port - type: object - initialDelaySeconds: - format: int32 - type: integer - periodSeconds: - format: int32 - type: integer - successThreshold: - format: int32 - type: integer - tcpSocket: - properties: - host: - type: string - port: - anyOf: - - type: integer - - type: string - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - format: int64 - type: integer - timeoutSeconds: - format: int32 - type: integer - type: object - toleRations: - items: - properties: - effect: - type: string - key: - type: string - operator: - type: string - tolerationSeconds: - format: int64 - type: integer - value: - type: string - type: object - type: array - tolerations: - items: - properties: - effect: - type: string - key: - type: string - operator: - type: string - tolerationSeconds: - format: int64 - type: integer - value: - type: string - type: object - type: array - topologySpreadConstraints: - items: - properties: - labelSelector: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - items: - type: string - type: array - x-kubernetes-list-type: atomic - maxSkew: - format: int32 - type: integer - minDomains: - format: int32 - type: integer - nodeAffinityPolicy: - type: string - nodeTaintsPolicy: - type: string - topologyKey: - type: string - whenUnsatisfiable: - type: string - required: - - maxSkew - - topologyKey - - whenUnsatisfiable - type: object - type: array - type: object - x-kubernetes-validations: - - message: minAvailable cannot be set when maxUnavailable is specified. These fields are mutually exclusive in PodDisruptionBudget. - rule: 'has(self.minAvailable) && has(self.maxUnavailable) ? false : true' - type: object - revisionHistoryLimit: - default: 3 - format: int32 - type: integer - serviceAccountName: - type: string - updateStrategy: - default: - evacuationStrategy: - connEvictRate: 1000 - sessEvictRate: 1000 - waitTakeover: 10 - initialDelaySeconds: 10 - type: Recreate - properties: - evacuationStrategy: - properties: - connEvictRate: - default: 1000 - format: int32 - minimum: 1 - type: integer - sessEvictRate: - default: 1000 - format: int32 - minimum: 1 - type: integer - waitTakeover: - format: int32 - minimum: 0 - type: integer - type: object - initialDelaySeconds: - format: int32 - type: integer - type: - default: Recreate - enum: - - Recreate - type: string - type: object - required: - - image - type: object - status: - properties: - conditions: - items: - properties: - lastTransitionTime: - format: date-time - type: string - message: - maxLength: 32768 - type: string - observedGeneration: - format: int64 - minimum: 0 - type: integer - reason: - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ - type: string - status: - enum: - - "True" - - "False" - - Unknown - type: string - type: - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ - type: string - required: - - lastTransitionTime - - message - - reason - - status - - type - type: object - type: array - coreNodes: - items: - properties: - connections: - format: int64 - type: integer - controllerUID: - type: string - edition: - type: string - live_connections: - format: int64 - type: integer - node: - type: string - node_status: - type: string - otp_release: - type: string - podUID: - type: string - role: - type: string - version: - type: string - type: object - type: array - coreNodesStatus: - properties: - collisionCount: - format: int32 - type: integer - currentReplicas: - format: int32 - type: integer - currentRevision: - type: string - readyReplicas: - format: int32 - type: integer - replicas: - format: int32 - type: integer - updateReplicas: - format: int32 - type: integer - updateRevision: - type: string - type: object - nodeEvacuationsStatus: - items: - properties: - connection_eviction_rate: - format: int32 - type: integer - connection_goal: - format: int32 - type: integer - node: - type: string - session_eviction_rate: - format: int32 - type: integer - session_goal: - format: int32 - type: integer - session_recipients: - items: - type: string - type: array - state: - type: string - stats: - properties: - current_connected: - format: int32 - type: integer - current_sessions: - format: int32 - type: integer - initial_connected: - format: int32 - type: integer - initial_sessions: - format: int32 - type: integer - type: object - type: object - type: array - replicantNodes: - items: - properties: - connections: - format: int64 - type: integer - controllerUID: - type: string - edition: - type: string - live_connections: - format: int64 - type: integer - node: - type: string - node_status: - type: string - otp_release: - type: string - podUID: - type: string - role: - type: string - version: - type: string - type: object - type: array - replicantNodesStatus: - properties: - collisionCount: - format: int32 - type: integer - currentReplicas: - format: int32 - type: integer - currentRevision: - type: string - readyReplicas: - format: int32 - type: integer - replicas: - format: int32 - type: integer - updateReplicas: - format: int32 - type: integer - updateRevision: - type: string - type: object - type: object - type: object diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 566db44d6..cb9eb877c 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -1,10 +1,6 @@ resources: - bases/apps.emqx.io_emqxes.yaml -- bases/apps.emqx.io_rebalances.yaml +# - bases/apps.emqx.io_rebalances.yaml # +kubebuilder:scaffold:crdkustomizeresource -patches: -- path: ./compat/v2beta1_patch.yaml - target: - kind: CustomResourceDefinition - name: emqxes.apps.emqx.io +patches: [] diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 2e82e8494..b8159ee5a 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -44,6 +44,7 @@ rules: resources: - pods verbs: + - delete - get - list - update diff --git a/deploy/charts/emqx-operator/Chart.yaml b/deploy/charts/emqx-operator/Chart.yaml index 06cc83530..9329fd99f 100644 --- a/deploy/charts/emqx-operator/Chart.yaml +++ b/deploy/charts/emqx-operator/Chart.yaml @@ -6,11 +6,11 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 2.3.0-rc.1 +version: 3.0.0-rc.1 # This is the version number of the application being deployed. -appVersion: 2.3.0 +appVersion: 3.0.0 sources: - - https://github.com/emqx/emqx-operator/tree/main-2.3/deploy/charts/emqx-operator + - https://github.com/emqx/emqx-operator/tree/main-3.x/deploy/charts/emqx-operator - https://github.com/emqx/emqx diff --git a/deploy/charts/emqx-operator/templates/NOTES.txt b/deploy/charts/emqx-operator/templates/NOTES.txt index 73721a5e3..b54e80955 100644 --- a/deploy/charts/emqx-operator/templates/NOTES.txt +++ b/deploy/charts/emqx-operator/templates/NOTES.txt @@ -3,18 +3,3 @@ EMQX Operator Controller {{ .Chart.AppVersion }} has been deployed successfully! The operator is running in namespace: {{ .Release.Namespace }} Now you can create EMQX Custom Resources to deploy EMQX clusters. - -{{- if .Values.upgrade.preUpgradeCheck }} - -A pre-upgrade compatibility check Job ran automatically to ensure a smooth -upgrade from any prior installation. - -The check: - - Verified no legacy custom resources exist (emqxbrokers, emqxenterprises, - emqxplugins) that would be destroyed when Helm removes their CRDs - - Patched CRDs to remove conversion webhooks, preventing API server - errors during the operator replacement window - -To skip the pre-upgrade Job on future installs (e.g. in air-gapped environments): - --set upgrade.preUpgradeCheck=false -{{- end }} diff --git a/deploy/charts/emqx-operator/templates/controller-manager-rbac.yaml b/deploy/charts/emqx-operator/templates/controller-manager-rbac.yaml index f62964f3c..f0e4befd2 100644 --- a/deploy/charts/emqx-operator/templates/controller-manager-rbac.yaml +++ b/deploy/charts/emqx-operator/templates/controller-manager-rbac.yaml @@ -65,6 +65,7 @@ rules: resources: - pods verbs: + - delete - get - list - update @@ -104,14 +105,12 @@ rules: - apps.emqx.io resources: - emqxes/finalizers - - rebalances/finalizers verbs: - update - apiGroups: - apps.emqx.io resources: - emqxes/status - - rebalances/status verbs: - get - patch diff --git a/deploy/charts/emqx-operator/templates/pre-upgrade-job.yaml b/deploy/charts/emqx-operator/templates/pre-upgrade-job.yaml deleted file mode 100644 index 8e041a1a7..000000000 --- a/deploy/charts/emqx-operator/templates/pre-upgrade-job.yaml +++ /dev/null @@ -1,165 +0,0 @@ -{{- if .Values.upgrade.preUpgradeCheck }} ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ include "emqx-operator.fullname" . }}-pre-upgrade - namespace: {{ .Release.Namespace }} - labels: - {{- include "emqx-operator.labels" . | nindent 4 }} - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-10" - "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: {{ include "emqx-operator.fullname" . }}-pre-upgrade - labels: - {{- include "emqx-operator.labels" . | nindent 4 }} - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-10" - "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded -rules: -# Patch CRDs to remove conversion webhooks -- apiGroups: - - apiextensions.k8s.io - resources: - - customresourcedefinitions - verbs: - - get - - patch -# List legacy CRs to check if any exist before upgrade -- apiGroups: - - apps.emqx.io - resources: - - emqxbrokers - - emqxenterprises - - emqxplugins - verbs: - - list ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: {{ include "emqx-operator.fullname" . }}-pre-upgrade - labels: - {{- include "emqx-operator.labels" . | nindent 4 }} - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-10" - "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: {{ include "emqx-operator.fullname" . }}-pre-upgrade -subjects: -- kind: ServiceAccount - name: {{ include "emqx-operator.fullname" . }}-pre-upgrade - namespace: {{ .Release.Namespace }} ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: {{ include "emqx-operator.fullname" . }}-pre-upgrade - namespace: {{ .Release.Namespace }} - labels: - {{- include "emqx-operator.labels" . | nindent 4 }} - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-5" - "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded -spec: - backoffLimit: 3 - ttlSecondsAfterFinished: 300 - template: - metadata: - labels: - {{- include "emqx-operator.selectorLabels" . | nindent 8 }} - spec: - restartPolicy: OnFailure - serviceAccountName: {{ include "emqx-operator.fullname" . }}-pre-upgrade - securityContext: - runAsNonRoot: true - runAsUser: 65534 - containers: - - name: cleanup - image: "{{ .Values.upgrade.image.repository }}:{{ .Values.upgrade.image.tag }}" - imagePullPolicy: {{ .Values.upgrade.image.pullPolicy }} - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - "ALL" - command: - - /bin/sh - - -ec - - | - echo "=== EMQX Operator pre-upgrade cleanup (2.2.x -> 2.3.0) ===" - - # Step 1: Check for existing legacy custom resources. - # - # Legacy CRDs (emqxbrokers, emqxenterprises, emqxplugins) are present in - # 2.2.x chart templates but absent from 2.3.0, so Helm will delete them - # during upgrade. If any CRs exist under those CRDs they would be silently - # destroyed. Abort early and ask the user to migrate first. - BLOCKED="" - - if kubectl get crd emqxbrokers.apps.emqx.io >/dev/null 2>&1; then - N=$(kubectl get emqxbrokers --all-namespaces --no-headers 2>/dev/null | wc -l) - if [ "$N" -gt 0 ]; then - echo "ERROR: Found $N EmqxBroker CR(s)." - BLOCKED="emqxbrokers $BLOCKED" - fi - fi - - if kubectl get crd emqxenterprises.apps.emqx.io >/dev/null 2>&1; then - N=$(kubectl get emqxenterprises --all-namespaces --no-headers 2>/dev/null | wc -l) - if [ "$N" -gt 0 ]; then - echo "ERROR: Found $N EmqxEnterprise CR(s)." - BLOCKED="emqxenterprises $BLOCKED" - fi - fi - - if kubectl get crd emqxplugins.apps.emqx.io >/dev/null 2>&1; then - N=$(kubectl get emqxplugins --all-namespaces --no-headers 2>/dev/null | wc -l) - if [ "$N" -gt 0 ]; then - echo "ERROR: Found $N EmqxPlugin CR(s)." - BLOCKED="emqxplugins $BLOCKED" - fi - fi - - if [ -n "$BLOCKED" ]; then - echo "" - echo "Upgrade BLOCKED: legacy custom resources still exist ($BLOCKED)." - echo "These CRDs will be removed by the 2.3.0 chart, which would destroy the CRs." - echo "Please migrate or delete these resources before upgrading." - echo "See: https://github.com/emqx/emqx-operator/blob/main-2.3/README.md#from-22x" - exit 1 - fi - - # Step 2: Patch CRDs to remove conversion webhooks. - # - # The 2.2.x chart configures Webhook conversion on emqxes and rebalances - # CRDs, pointing at the operator's webhook endpoint. During upgrade Helm - # will tear down the old Deployment (and its Service) before applying the - # new CRD manifests. In the window between those two events the API server - # would fail to serve any request that triggers CRD conversion, because the - # webhook endpoint no longer exists. Patching the strategy to None *before* - # Helm starts the upgrade avoids this. - echo "--- Patching CRD emqxes.apps.emqx.io ..." - kubectl patch crd emqxes.apps.emqx.io \ - --type=json \ - -p='[{"op":"replace","path":"/spec/conversion","value":{"strategy":"None"}}]' \ - || echo "*** not found or already patched, skipping." - - echo "--- Patching CRD rebalances.apps.emqx.io ..." - kubectl patch crd rebalances.apps.emqx.io \ - --type=json \ - -p='[{"op":"replace","path":"/spec/conversion","value":{"strategy":"None"}}]' \ - || echo "*** not found or already patched, skipping." - - echo "=== Pre-upgrade cleanup complete ===" -{{- end }} diff --git a/deploy/charts/emqx-operator/values.yaml b/deploy/charts/emqx-operator/values.yaml index a27eddad6..e568d1506 100644 --- a/deploy/charts/emqx-operator/values.yaml +++ b/deploy/charts/emqx-operator/values.yaml @@ -12,9 +12,9 @@ singleNamespace: false # Development configures the logger to use a Zap development config # (stacktraces on warnings, no sampling), otherwise a Zap production # config will be used (stacktraces on errors, sampling). -# NOTE: On by default for 2.3.0 as it changes a lot, a higher risk of +# NOTE: On by default for 3.0.0 as it changes a lot, a higher risk of # bug reports. Impact is tolerable, usually not much debug output. -# TODO: Switch off by default in 2.3.1. +# TODO: Switch off by default in 3.1.0. development: true replicaCount: 1 @@ -65,27 +65,3 @@ nodeSelector: {} tolerations: [] affinity: {} - -## Upgrade settings. -## A pre-install/pre-upgrade Job runs automatically to ensure a smooth -## upgrade from earlier chart versions: -## 1. Checks for legacy custom resources (emqxbrokers, emqxenterprises, -## emqxplugins) and blocks the upgrade if any exist, because Helm -## will delete those CRDs (and their CRs) during the upgrade. -## 2. Patches CRD conversion webhooks to strategy "None" so the API -## server does not fail while the old operator is being replaced. -## -## The Job is fully idempotent and becomes a no-op on fresh installs. -upgrade: - ## Run the pre-upgrade compatibility check Job. - ## Enabled by default so that upgrades from 2.2.x work out of the box. - ## Set to false to skip the Job entirely (e.g. in air-gapped - ## environments where pulling the kubectl image is not possible). - preUpgradeCheck: true - - ## Image used for the pre-upgrade cleanup Job. - ## Must include kubectl and sh. - image: - repository: alpine/k8s - tag: "1.31.4" - pullPolicy: IfNotPresent diff --git a/internal/controller/add_bootstrap_resource.go b/internal/controller/add_bootstrap_resource.go index b686bda9e..599a987f6 100644 --- a/internal/controller/add_bootstrap_resource.go +++ b/internal/controller/add_bootstrap_resource.go @@ -9,7 +9,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" resources "github.com/emqx/emqx-operator/internal/controller/resources" "github.com/sethvargo/go-password/password" @@ -19,7 +19,7 @@ type addBootstrap struct { *EMQXReconciler } -func (a *addBootstrap) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { +func (a *addBootstrap) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { bootstrapAPIKeys, err := a.getAPIKeyString(r.ctx, instance) if err != nil { return subResult{err: emperror.Wrap(err, "failed to get bootstrap api keys")} @@ -42,7 +42,7 @@ func (a *addBootstrap) reconcile(r *reconcileRound, instance *crdv2.EMQX) subRes return subResult{} } -func (a *addBootstrap) getAPIKeyString(ctx context.Context, instance *crdv2.EMQX) (string, error) { +func (a *addBootstrap) getAPIKeyString(ctx context.Context, instance *crd.EMQX) (string, error) { var bootstrapAPIKeys string for _, apiKey := range instance.Spec.BootstrapAPIKeys { @@ -64,7 +64,7 @@ func (a *addBootstrap) getAPIKeyString(ctx context.Context, instance *crdv2.EMQX return bootstrapAPIKeys, nil } -func (a *addBootstrap) readSecret(ctx context.Context, instance *crdv2.EMQX, name string, key string) (string, error) { +func (a *addBootstrap) readSecret(ctx context.Context, instance *crd.EMQX, name string, key string) (string, error) { secret := &corev1.Secret{} if err := a.Client.Get(ctx, instance.NamespacedName(name), secret); err != nil { return "", emperror.Wrap(err, "failed to get secret") @@ -77,13 +77,13 @@ func (a *addBootstrap) readSecret(ctx context.Context, instance *crdv2.EMQX, nam return string(secret.Data[key]), nil } -func generateBootstrapAPIKeySecret(instance *crdv2.EMQX, bootstrapAPIKeys string) *corev1.Secret { +func generateBootstrapAPIKeySecret(instance *crd.EMQX, bootstrapAPIKeys string) *corev1.Secret { defPassword, _ := password.Generate(64, 10, 0, true, true) bootstrapAPIKeys += resources.DefaultBootstrapAPIKey + ":" + defPassword return resources.BootstrapAPIKey(instance).Secret(bootstrapAPIKeys) } -func generateNodeCookieSecret(instance *crdv2.EMQX, conf *config.EMQX) *corev1.Secret { +func generateNodeCookieSecret(instance *crd.EMQX, conf *config.EMQX) *corev1.Secret { cookie := conf.GetNodeCookie() if cookie == "" { cookie, _ = password.Generate(64, 10, 0, true, true) diff --git a/internal/controller/add_bootstrap_resource_suite_test.go b/internal/controller/add_bootstrap_resource_suite_test.go index af525dfd6..6f8fb961f 100644 --- a/internal/controller/add_bootstrap_resource_suite_test.go +++ b/internal/controller/add_bootstrap_resource_suite_test.go @@ -4,7 +4,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -12,7 +12,7 @@ import ( ) var _ = Describe("Reconciler addBootstrap", Ordered, func() { - var instance *crdv2.EMQX = &crdv2.EMQX{} + var instance *crd.EMQX = &crd.EMQX{} var ns *corev1.Namespace = &corev1.Namespace{} var a *addBootstrap @@ -77,7 +77,7 @@ var _ = Describe("Reconciler addBootstrap", Ordered, func() { It("should contain key and secret in bootstrap secret given initial values", func() { // Given - instance.Spec.BootstrapAPIKeys = []crdv2.BootstrapAPIKey{ + instance.Spec.BootstrapAPIKeys = []crd.BootstrapAPIKey{ { Key: "test_key", Secret: "test_secret", @@ -104,15 +104,15 @@ var _ = Describe("Reconciler addBootstrap", Ordered, func() { It("should contain key and secret in bootstrap secret given SecretRef values", func() { // Given - instance.Spec.BootstrapAPIKeys = []crdv2.BootstrapAPIKey{ + instance.Spec.BootstrapAPIKeys = []crd.BootstrapAPIKey{ { - SecretRef: &crdv2.SecretRef{ - Key: crdv2.KeyRef{ + SecretRef: &crd.SecretRef{ + Key: crd.KeyRef{ // Note: a lowercase RFC 1123 subdomain must consist of lower case alphanumeric characters SecretName: "test-key-secret", SecretKey: "key", }, - Secret: crdv2.KeyRef{ + Secret: crd.KeyRef{ SecretName: "test-value-secret", SecretKey: "secret", }, diff --git a/internal/controller/add_bootstrap_resource_test.go b/internal/controller/add_bootstrap_resource_test.go index def29400e..39b68e67d 100644 --- a/internal/controller/add_bootstrap_resource_test.go +++ b/internal/controller/add_bootstrap_resource_test.go @@ -4,7 +4,7 @@ import ( "strings" "testing" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" resources "github.com/emqx/emqx-operator/internal/controller/resources" "github.com/emqx/emqx-operator/internal/handler" @@ -16,7 +16,7 @@ import ( ) func TestGenerateNodeCookieSecret(t *testing.T) { - instance := &crdv2.EMQX{ + instance := &crd.EMQX{ ObjectMeta: metav1.ObjectMeta{ Name: "emqx", Namespace: "emqx", @@ -61,13 +61,13 @@ func TestGenerateBootstrapAPIKeySecret(t *testing.T) { // Create a context ctx := ctx - instance := &crdv2.EMQX{ + instance := &crd.EMQX{ ObjectMeta: metav1.ObjectMeta{ Name: "emqx", Namespace: "emqx", }, - Spec: crdv2.EMQXSpec{ - BootstrapAPIKeys: []crdv2.BootstrapAPIKey{ + Spec: crd.EMQXSpec{ + BootstrapAPIKeys: []crd.BootstrapAPIKey{ { Key: "test_key", Secret: "test_secret", @@ -145,20 +145,20 @@ func TestGenerateBootstrapAPIKeySecretWithSecretRef(t *testing.T) { t.Fatal(err) } - instance := &crdv2.EMQX{ + instance := &crd.EMQX{ ObjectMeta: metav1.ObjectMeta{ Name: "emqx", Namespace: "emqx", }, - Spec: crdv2.EMQXSpec{ - BootstrapAPIKeys: []crdv2.BootstrapAPIKey{ + Spec: crd.EMQXSpec{ + BootstrapAPIKeys: []crd.BootstrapAPIKey{ { - SecretRef: &crdv2.SecretRef{ - Key: crdv2.KeyRef{ + SecretRef: &crd.SecretRef{ + Key: crd.KeyRef{ SecretName: "test-key-secret", SecretKey: "key", }, - Secret: crdv2.KeyRef{ + Secret: crd.KeyRef{ SecretName: "test-value-secret", SecretKey: "secret", }, @@ -220,7 +220,7 @@ func TestReadSecret(t *testing.T) { } // Create a context - instance := &crdv2.EMQX{ + instance := &crd.EMQX{ ObjectMeta: metav1.ObjectMeta{ Name: "emqx", Namespace: "default", diff --git a/internal/controller/add_core_set.go b/internal/controller/add_core_set.go index 91db2ed90..1463bd379 100644 --- a/internal/controller/add_core_set.go +++ b/internal/controller/add_core_set.go @@ -2,128 +2,85 @@ package controller import ( "fmt" - "reflect" "slices" + "time" emperror "emperror.dev/errors" "github.com/cisco-open/k8s-objectmatcher/patch" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" resources "github.com/emqx/emqx-operator/internal/controller/resources" util "github.com/emqx/emqx-operator/internal/controller/util" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" k8sErrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/util/retry" "k8s.io/klog/v2" - "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" ) type addCoreSet struct { *EMQXReconciler } -func (a *addCoreSet) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { - sts := newStatefulSet(instance, r.conf) - stsHash := sts.Labels[crdv2.LabelPodTemplateHash] +func (a *addCoreSet) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { + existing := r.state.coreSet() + coreSet := newStatefulSet(instance, r.conf) + _ = ctrl.SetControllerReference(instance, coreSet, a.Scheme) - needCreate := false - updateCoreSet := r.state.updateCoreSet(instance) - if updateCoreSet == nil { - r.log.Info("creating new statefulSet", - "statefulSet", klog.KObj(sts), + if existing == nil { + // No StatefulSet exists yet. + r.log.Info("creating statefulSet", + "statefulSet", klog.KObj(coreSet), "reason", "no existing statefulSet", ) - needCreate = true - } else { - patchResult, _ := a.Patcher.Calculate(updateCoreSet, sts, justCheckPodTemplate()) - if !patchResult.IsEmpty() { - r.log.Info("creating new statefulSet", - "statefulSet", klog.KObj(sts), - "reason", "pod template has changed", - "patch", string(patchResult.Patch), - ) - needCreate = true - } - } - - if needCreate { - _ = ctrl.SetControllerReference(instance, sts, a.Scheme) - if err := a.Handler.Create(r.ctx, sts); err != nil { + if err := a.Handler.Create(r.ctx, coreSet); err != nil { if k8sErrors.IsAlreadyExists(emperror.Cause(err)) { - cond := instance.Status.GetLastTrueCondition() - if cond != nil && cond.Type != crdv2.Available && cond.Type != crdv2.Ready { - // Sometimes the updated statefulSet will not be ready, because the EMQX node can not be started. - // And then we will rollback EMQX CR spec, the EMQX operator controller will create a new statefulSet. - // But the new statefulSet will be the same as the previous one, so we didn't need to create it, just change the EMQX status. - if stsHash == instance.Status.CoreNodesStatus.CurrentRevision { - _ = a.updateEMQXStatus(r, instance, "RevertStatefulSet", stsHash) - return subResult{} - } - } - if instance.Status.CoreNodesStatus.CollisionCount == nil { - instance.Status.CoreNodesStatus.CollisionCount = ptr.To(int32(0)) - } - *instance.Status.CoreNodesStatus.CollisionCount++ - _ = a.Client.Status().Update(r.ctx, instance) - return subResult{result: ctrl.Result{Requeue: true}} + return reconcileRequeue() } - return subResult{err: emperror.Wrap(err, "failed to create statefulSet")} + return reconcileError(emperror.Wrap(err, "failed to create statefulSet")) } - updateResult := a.updateEMQXStatus(r, instance, "CreateNewStatefulSet", stsHash) - return subResult{err: updateResult} + // Force Requeue to give StatefulSet controller time to reflect status. + return reconcileRequeueAfter(time.Second) } - sts.ObjectMeta = updateCoreSet.ObjectMeta - sts.Spec.Template.ObjectMeta = updateCoreSet.Spec.Template.ObjectMeta - sts.Spec.Selector = updateCoreSet.Spec.Selector + // StatefulSet exists. + // Update it in place if the spec has changed. + // With OnDelete strategy, updating the spec does not restart pods. patchResult, _ := a.Patcher.Calculate( - updateCoreSet, - sts, - // Ignore Status fields and VolumeClaimTemplate stuff. + existing, + coreSet, patch.IgnoreStatusFields(), patch.IgnoreVolumeClaimTemplateTypeMetaAndStatus(), // Ignore if number of replicas has changed. - // Reconciler `syncCoreSets` will handle scaling up and down of the existing statefulSet. + // Reconciler `syncCoreSets` will handle scaling of the statefulSet. ignoreStatefulSetReplicas(), ) if !patchResult.IsEmpty() { - // Update statefulSet r.log.Info("updating statefulSet", - "statefulSet", klog.KObj(sts), - "reason", "statefulSet has changed", + "statefulSet", klog.KObj(coreSet), + "reason", "spec has changed", "patch", string(patchResult.Patch), ) - if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - storage := &appsv1.StatefulSet{} - _ = a.Client.Get(r.ctx, client.ObjectKeyFromObject(sts), storage) - sts.ResourceVersion = storage.ResourceVersion - return a.Handler.Update(r.ctx, sts) - }); err != nil { - return subResult{err: emperror.Wrap(err, "failed to update statefulSet")} + // NOTE + // Conflicts are expected as StatefulSet contoller may act concurrently on the resource. + // Conflicts are handled on `EMQXReconciler` level. + err := a.Handler.Update(r.ctx, coreSet) + if err != nil { + return reconcileError(emperror.Wrap(err, "failed to update statefulSet")) } - updateResult := a.updateEMQXStatus(r, instance, "UpdateStatefulSet", stsHash) - return subResult{err: updateResult} + forceCoreNodesProgressing(instance) + updateResult := a.Client.Status().Update(r.ctx, instance) + // Force Requeue to give StatefulSet controller time to reflect status. + return subResult{err: updateResult, immediateResult: &ctrl.Result{RequeueAfter: time.Second}} } - return subResult{} -} -func (a *addCoreSet) updateEMQXStatus(r *reconcileRound, instance *crdv2.EMQX, reason, podTemplateHash string) error { - instance.Status.ResetConditions(reason) - instance.Status.CoreNodesStatus.UpdateRevision = podTemplateHash - return a.Client.Status().Update(r.ctx, instance) + return subResult{} } -func newStatefulSet(instance *crdv2.EMQX, conf *config.EMQX) *appsv1.StatefulSet { +func newStatefulSet(instance *crd.EMQX, conf *config.EMQX) *appsv1.StatefulSet { sts := generateStatefulSet(instance) - podTemplateHash := computeHash(sts.Spec.Template.DeepCopy(), instance.Status.CoreNodesStatus.CollisionCount) - sts.Name = sts.Name + "-" + podTemplateHash - sts.Labels[crdv2.LabelPodTemplateHash] = podTemplateHash - sts.Spec.Template.Labels[crdv2.LabelPodTemplateHash] = podTemplateHash - sts.Spec.Selector = util.CloneSelectorAndAddLabel(sts.Spec.Selector, crdv2.LabelPodTemplateHash, podTemplateHash) sts.Spec.Template.Spec.Containers[0].Ports = util.MergeContainerPorts( sts.Spec.Template.Spec.Containers[0].Ports, util.MapServicePortsToContainerPorts(conf.GetDashboardServicePorts()), @@ -131,26 +88,34 @@ func newStatefulSet(instance *crdv2.EMQX, conf *config.EMQX) *appsv1.StatefulSet return sts } -func generateStatefulSet(instance *crdv2.EMQX) *appsv1.StatefulSet { - // Add a PreStop hook to leave the cluster when the pod is asked to stop. - // This is especially important when DS Raft is enabled, otherwise there will be a - // lot of leftover records in the DS cluster metadata. - lifecycle := instance.Spec.CoreTemplate.Spec.Lifecycle - if lifecycle == nil { - lifecycle = &corev1.Lifecycle{} - } else { - lifecycle = lifecycle.DeepCopy() - } - lifecycle.PreStop = &corev1.LifecycleHandler{ - Exec: &corev1.ExecAction{ - Command: []string{"/bin/sh", "-c", "emqx ctl cluster leave"}, - }, - } +func generateStatefulSet(instance *crd.EMQX) *appsv1.StatefulSet { + template := &instance.Spec.CoreTemplate cookie := resources.Cookie(instance) bootstrapAPIKeys := resources.BootstrapAPIKey(instance) config := resources.EMQXConfig(instance) + // Use OnDelete update strategy so the operator controls pod replacement. + updateStrategy := appsv1.StatefulSetUpdateStrategy{ + Type: appsv1.OnDeleteStatefulSetStrategyType, + } + + // Requires K8s >= 1.27 and the StatefulSetAutoDeletePVC feature gate (stable since K8s 1.32). + pvcRetentionPolicy := appsv1.StatefulSetPersistentVolumeClaimRetentionPolicy{ + WhenScaled: appsv1.DeletePersistentVolumeClaimRetentionPolicyType, + WhenDeleted: appsv1.DeletePersistentVolumeClaimRetentionPolicyType, + } + + readinessProbe := resources.EvacuationReadinessProbe() + + // Prefer evacuation-aware probe over older-version defaults. + if template.Spec.ReadinessProbe != nil { + if template.Spec.ReadinessProbe.HTTPGet != nil && + template.Spec.ReadinessProbe.HTTPGet.Path != "/status" { + readinessProbe = template.Spec.ReadinessProbe.DeepCopy() + } + } + sts := &appsv1.StatefulSet{ TypeMeta: metav1.TypeMeta{ APIVersion: "apps/v1", @@ -159,44 +124,42 @@ func generateStatefulSet(instance *crdv2.EMQX) *appsv1.StatefulSet { ObjectMeta: metav1.ObjectMeta{ Namespace: instance.Namespace, Name: instance.CoreNamespacedName().Name, - Annotations: instance.Spec.CoreTemplate.DeepCopy().Annotations, + Annotations: util.CloneAnnotations(template.Annotations), Labels: statefulSetLabels(instance), }, Spec: appsv1.StatefulSetSpec{ - ServiceName: instance.HeadlessServiceNamespacedName().Name, - Replicas: instance.Spec.CoreTemplate.Spec.Replicas, + ServiceName: instance.HeadlessServiceNamespacedName().Name, + Replicas: template.Spec.Replicas, + MinReadySeconds: instance.Spec.CoreTemplate.Spec.MinReadySeconds, + UpdateStrategy: updateStrategy, + PodManagementPolicy: appsv1.ParallelPodManagement, + PersistentVolumeClaimRetentionPolicy: &pvcRetentionPolicy, Selector: &metav1.LabelSelector{ - MatchLabels: statefulSetLabels(instance), + MatchLabels: statefulSetSelectorLabels(instance), }, - PodManagementPolicy: appsv1.ParallelPodManagement, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ - Annotations: instance.Spec.CoreTemplate.DeepCopy().Annotations, + Annotations: util.CloneAnnotations(template.Annotations), Labels: statefulSetLabels(instance), }, Spec: corev1.PodSpec{ - ReadinessGates: []corev1.PodReadinessGate{ - { - ConditionType: crdv2.PodOnServing, - }, - }, ImagePullSecrets: instance.Spec.ImagePullSecrets, ServiceAccountName: instance.Spec.ServiceAccountName, - SecurityContext: instance.Spec.CoreTemplate.Spec.PodSecurityContext, - Affinity: instance.Spec.CoreTemplate.Spec.Affinity, - Tolerations: instance.Spec.CoreTemplate.Spec.Tolerations, - TopologySpreadConstraints: instance.Spec.CoreTemplate.Spec.TopologySpreadConstraints, - NodeName: instance.Spec.CoreTemplate.Spec.NodeName, - NodeSelector: instance.Spec.CoreTemplate.Spec.NodeSelector, - InitContainers: instance.Spec.CoreTemplate.Spec.InitContainers, + SecurityContext: template.Spec.PodSecurityContext, + Affinity: template.Spec.Affinity, + Tolerations: template.Spec.Tolerations, + TopologySpreadConstraints: template.Spec.TopologySpreadConstraints, + NodeName: template.Spec.NodeName, + NodeSelector: template.Spec.NodeSelector, + InitContainers: template.Spec.InitContainers, Containers: append([]corev1.Container{ { - Name: crdv2.DefaultContainerName, + Name: crd.DefaultContainerName, Image: instance.Spec.Image, ImagePullPolicy: instance.Spec.ImagePullPolicy, - Command: instance.Spec.CoreTemplate.Spec.Command, - Args: instance.Spec.CoreTemplate.Spec.Args, - Ports: instance.Spec.CoreTemplate.Spec.Ports, + Command: template.Spec.Command, + Args: template.Spec.Args, + Ports: template.Spec.Ports, Env: append([]corev1.EnvVar{ { Name: "POD_NAME", @@ -232,14 +195,14 @@ func generateStatefulSet(instance *crdv2.EMQX) *appsv1.StatefulSet { }, cookie.EnvVar(), bootstrapAPIKeys.EnvVar(), - }, instance.Spec.CoreTemplate.Spec.Env...), - EnvFrom: instance.Spec.CoreTemplate.Spec.EnvFrom, - Resources: instance.Spec.CoreTemplate.Spec.Resources, - SecurityContext: instance.Spec.CoreTemplate.Spec.ContainerSecurityContext, - LivenessProbe: instance.Spec.CoreTemplate.Spec.LivenessProbe, - ReadinessProbe: instance.Spec.CoreTemplate.Spec.ReadinessProbe, - StartupProbe: instance.Spec.CoreTemplate.Spec.StartupProbe, - Lifecycle: lifecycle, + }, template.Spec.Env...), + EnvFrom: template.Spec.EnvFrom, + Resources: template.Spec.Resources, + SecurityContext: template.Spec.ContainerSecurityContext, + LivenessProbe: template.Spec.LivenessProbe, + ReadinessProbe: readinessProbe, + StartupProbe: template.Spec.StartupProbe, + Lifecycle: template.Spec.Lifecycle, VolumeMounts: slices.Concat( []corev1.VolumeMount{ { @@ -253,10 +216,10 @@ func generateStatefulSet(instance *crdv2.EMQX) *appsv1.StatefulSet { bootstrapAPIKeys.VolumeMount(), }, config.VolumeMounts(), - instance.Spec.CoreTemplate.Spec.ExtraVolumeMounts, + template.Spec.ExtraVolumeMounts, ), }, - }, instance.Spec.CoreTemplate.Spec.ExtraContainers...), + }, template.Spec.ExtraContainers...), Volumes: append([]corev1.Volume{ config.Volume(), bootstrapAPIKeys.Volume(), @@ -266,44 +229,55 @@ func generateStatefulSet(instance *crdv2.EMQX) *appsv1.StatefulSet { EmptyDir: &corev1.EmptyDirVolumeSource{}, }, }, - }, instance.Spec.CoreTemplate.Spec.ExtraVolumes...), + }, template.Spec.ExtraVolumes...), }, }, }, } - if !reflect.ValueOf(instance.Spec.CoreTemplate.Spec.VolumeClaimTemplates).IsZero() { - volumeClaimTemplates := instance.Spec.CoreTemplate.Spec.VolumeClaimTemplates.DeepCopy() - if volumeClaimTemplates.VolumeMode == nil { - // Wait https://github.com/cisco-open/k8s-objectmatcher/issues/51 fixed - fs := corev1.PersistentVolumeFilesystem - volumeClaimTemplates.VolumeMode = &fs - } - sts.Spec.VolumeClaimTemplates = []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: instance.CoreNamespacedName().Name + "-data", - Namespace: instance.Namespace, - Labels: statefulSetLabels(instance), - }, - Spec: *volumeClaimTemplates, - }, - } - } else { - sts.Spec.Template.Spec.Volumes = append([]corev1.Volume{ - { - Name: instance.CoreNamespacedName().Name + "-data", - VolumeSource: corev1.VolumeSource{ - EmptyDir: &corev1.EmptyDirVolumeSource{}, - }, + sts.Spec.VolumeClaimTemplates = []corev1.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: instance.CoreNamespacedName().Name + "-data", + Namespace: instance.Namespace, + // TODO + // Labels from the core template are currently not attached to PVCs. + // This is deliberate, as it simplifies StatefulSet management. + // If this is needed, care must be taken to propagate the core template + // changes correctly: updating PVC template (if only just labels) inside + // the StatefulSet spec is explicitly forbidden by K8S API server. + Labels: instance.DefaultLabelsWith(crd.CoreLabels()), }, - }, sts.Spec.Template.Spec.Volumes...) + Spec: coreDataVolumeClaimSpec(template), + }, } return sts } // Combine instance labels, core labels and template labels. -func statefulSetLabels(instance *crdv2.EMQX) map[string]string { - return instance.DefaultLabelsWith(crdv2.CoreLabels(), instance.Spec.CoreTemplate.Labels) +func statefulSetLabels(instance *crd.EMQX) map[string]string { + return instance.DefaultLabelsWith(crd.CoreLabels(), instance.Spec.CoreTemplate.Labels) +} + +// Combine just instance labels and core labels. +// Should be stable across EMQX spec changes. +func statefulSetSelectorLabels(instance *crd.EMQX) map[string]string { + return instance.DefaultLabelsWith(crd.CoreLabels()) +} + +func coreDataVolumeClaimSpec(template *crd.EMQXCoreTemplate) corev1.PersistentVolumeClaimSpec { + spec := template.Spec.PersistentVolumeClaimSpec.DeepCopy() + if len(spec.AccessModes) == 0 { + spec.AccessModes = []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce} + } + if spec.Resources.Requests == nil { + spec.Resources.Requests = corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("500Mi")} + } + if spec.VolumeMode == nil { + // https://github.com/cisco-open/k8s-objectmatcher/issues/51 + fs := corev1.PersistentVolumeFilesystem + spec.VolumeMode = &fs + } + return *spec } diff --git a/internal/controller/add_core_set_suite_test.go b/internal/controller/add_core_set_suite_test.go index f870e6840..c0caca97e 100644 --- a/internal/controller/add_core_set_suite_test.go +++ b/internal/controller/add_core_set_suite_test.go @@ -1,9 +1,7 @@ package controller import ( - "time" - - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" . "github.com/emqx/emqx-operator/test/util" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -16,7 +14,7 @@ import ( var _ = Describe("Reconciler addCoreSet", Ordered, func() { var ns *corev1.Namespace - var instance *crdv2.EMQX + var instance *crd.EMQX var a *addCoreSet var round *reconcileRound @@ -24,7 +22,7 @@ var _ = Describe("Reconciler addCoreSet", Ordered, func() { // Create namespace: ns = &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ - Name: "controller-v2beta1-add-emqx-core-test", + Name: "controller-add-emqx-core-test", Labels: map[string]string{ "test": "e2e", }, @@ -38,83 +36,32 @@ var _ = Describe("Reconciler addCoreSet", Ordered, func() { }) BeforeEach(func() { - // Mock instance status: - instance.Status.Conditions = []metav1.Condition{ - { - Type: crdv2.Ready, - Status: metav1.ConditionTrue, - Reason: crdv2.Ready, - LastTransitionTime: metav1.Time{Time: time.Now().AddDate(0, 0, -1)}, - }, - { - Type: crdv2.CoreNodesReady, - Status: metav1.ConditionTrue, - Reason: crdv2.CoreNodesReady, - LastTransitionTime: metav1.Time{Time: time.Now().AddDate(0, 0, -1)}, - }, - { - Type: crdv2.Initialized, - Status: metav1.ConditionTrue, - Reason: crdv2.Initialized, - LastTransitionTime: metav1.Time{Time: time.Now().AddDate(0, 0, -10)}, - }, - } // Instantiate reconciler: a = &addCoreSet{emqxReconciler} round = newReconcileRound() - round.state = loadReconcileState(ctx, k8sClient, instance) + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) }) It("should create statefulSet", func() { - Eventually(a.reconcile).WithArguments(round, instance). - WithTimeout(timeout). - WithPolling(interval). - Should(Equal(subResult{})) - - Eventually(func() []appsv1.StatefulSet { - list := &appsv1.StatefulSetList{} - _ = k8sClient.List(ctx, list, - client.InNamespace(instance.Namespace), - client.MatchingLabels(instance.DefaultLabelsWith(crdv2.CoreLabels())), - ) - return list.Items - }).Should(ConsistOf( - HaveField("Spec.Template.Spec.Containers", ConsistOf(HaveField("Image", Equal(instance.Spec.Image)))), + result := a.reconcile(round, instance) + Expect(result.err).ToNot(HaveOccurred()) + Expect(coreSets(instance)).To(ConsistOf( + HaveField("Spec.Template.Spec.Containers", ConsistOf( + HaveField("Image", Equal(instance.Spec.Image)))), )) }) - It("change image creates new statefulSet", func() { + It("change image updates existing statefulSet in place", func() { instance.Spec.Image = "emqx/emqx" - instance.Spec.UpdateStrategy.InitialDelaySeconds = int32(999999999) - Eventually(a.reconcile).WithArguments(round, instance). - WithTimeout(timeout). - WithPolling(interval). - Should(Equal(subResult{})) - - Eventually(func() []appsv1.StatefulSet { - list := &appsv1.StatefulSetList{} - _ = k8sClient.List(ctx, list, - client.InNamespace(instance.Namespace), - client.MatchingLabels(instance.DefaultLabelsWith(crdv2.CoreLabels())), - ) - return list.Items - }).WithTimeout(timeout).WithPolling(interval).Should(ConsistOf( - HaveField("Spec.Template.Spec.Containers", ConsistOf(HaveField("Image", Equal("emqx")))), - HaveField("Spec.Template.Spec.Containers", ConsistOf(HaveField("Image", Equal("emqx/emqx")))), + result := a.reconcile(round, instance) + Expect(result.err).ToNot(HaveOccurred()) + Expect(actualObject(instance)).To(And( + HaveCondition(crd.Ready, HaveField("Status", Equal(metav1.ConditionFalse))), + HaveCondition(crd.CoreNodesProgressing, HaveField("Status", Equal(metav1.ConditionTrue))), )) - - Eventually(func() *crdv2.EMQX { - _ = k8sClient.Get(ctx, client.ObjectKeyFromObject(instance), instance) - return instance - }).Should(And( - WithTransform( - func(emqx *crdv2.EMQX) *metav1.Condition { - return emqx.Status.GetLastTrueCondition() - }, - HaveField("Type", Equal(crdv2.Initialized)), - ), - HaveCondition(crdv2.Ready, HaveField("Status", Equal(metav1.ConditionFalse))), - HaveCondition(crdv2.CoreNodesReady, HaveField("Status", Equal(metav1.ConditionFalse))), + Expect(coreSets(instance)).To(ConsistOf( + HaveField("Spec.Template.Spec.Containers", ConsistOf( + HaveField("Image", Equal("emqx/emqx")))), )) }) @@ -123,3 +70,12 @@ var _ = Describe("Reconciler addCoreSet", Ordered, func() { Expect(k8sClient.Delete(ctx, ns)).Should(Succeed()) }) }) + +func coreSets(instance *crd.EMQX) []appsv1.StatefulSet { + list := &appsv1.StatefulSetList{} + _ = k8sClient.List(ctx, list, + client.InNamespace(instance.Namespace), + client.MatchingLabels(instance.DefaultLabelsWith(crd.CoreLabels())), + ) + return list.Items +} diff --git a/internal/controller/add_core_set_test.go b/internal/controller/add_core_set_test.go index 39f5f199f..dc8250740 100644 --- a/internal/controller/add_core_set_test.go +++ b/internal/controller/add_core_set_test.go @@ -3,9 +3,10 @@ package controller import ( "testing" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" "github.com/stretchr/testify/assert" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -13,7 +14,7 @@ import ( ) func TestGetNewStatefulSet(t *testing.T) { - instance := &crdv2.EMQX{ + instance := &crd.EMQX{ ObjectMeta: metav1.ObjectMeta{ Name: "emqx", Namespace: "emqx", @@ -24,7 +25,7 @@ func TestGetNewStatefulSet(t *testing.T) { "emqx-annotation-key": "emqx-annotation-value", }, }, - Spec: crdv2.EMQXSpec{ + Spec: crd.EMQXSpec{ Image: "emqx/emqx:5.1", ClusterDomain: "cluster.local", }, @@ -38,9 +39,6 @@ func TestGetNewStatefulSet(t *testing.T) { }, } instance.Spec.CoreTemplate.Spec.Replicas = ptr.To(int32(3)) - instance.Status.CoreNodesStatus = crdv2.EMQXNodesStatus{ - CollisionCount: ptr.To(int32(0)), - } t.Run("check metadata", func(t *testing.T) { emqx := instance.DeepCopy() @@ -49,10 +47,11 @@ func TestGetNewStatefulSet(t *testing.T) { assert.Equal(t, emqx.Spec.CoreTemplate.Annotations, got.Annotations) assert.Equal(t, "core-label-value", got.Labels["core-label-key"]) - assert.Equal(t, "emqx", got.Labels[crdv2.LabelInstance]) - assert.Equal(t, "emqx-operator", got.Labels[crdv2.LabelManagedBy]) - assert.Equal(t, "core", got.Labels[crdv2.LabelDBRole]) - assert.Equal(t, "emqx-core-"+got.Labels[crdv2.LabelPodTemplateHash], got.Name) + assert.Equal(t, "emqx", got.Labels[crd.LabelInstance]) + assert.Equal(t, "emqx-operator", got.Labels[crd.LabelManagedBy]) + assert.Equal(t, "core", got.Labels[crd.LabelDBRole]) + // Single StatefulSet: name is deterministic, no hash suffix. + assert.Equal(t, "emqx-core", got.Name) assert.Equal(t, emqx.Namespace, got.Namespace) }) @@ -62,22 +61,36 @@ func TestGetNewStatefulSet(t *testing.T) { got := newStatefulSet(emqx, conf) assert.Equal(t, emqx.Spec.CoreTemplate.ObjectMeta.Annotations, got.Spec.Template.Annotations) assert.EqualValues(t, map[string]string{ - crdv2.LabelInstance: "emqx", - crdv2.LabelManagedBy: "emqx-operator", - crdv2.LabelDBRole: "core", - crdv2.LabelPodTemplateHash: got.Labels[crdv2.LabelPodTemplateHash], - "core-label-key": "core-label-value", + crd.LabelInstance: "emqx", + crd.LabelManagedBy: "emqx-operator", + crd.LabelDBRole: "core", + "core-label-key": "core-label-value", }, got.Spec.Template.Labels) assert.EqualValues(t, map[string]string{ - crdv2.LabelInstance: "emqx", - crdv2.LabelManagedBy: "emqx-operator", - crdv2.LabelDBRole: "core", - crdv2.LabelPodTemplateHash: got.Labels[crdv2.LabelPodTemplateHash], - "core-label-key": "core-label-value", + crd.LabelInstance: "emqx", + crd.LabelManagedBy: "emqx-operator", + crd.LabelDBRole: "core", }, got.Spec.Selector.MatchLabels) }) + t.Run("check update strategy is OnDelete", func(t *testing.T) { + emqx := instance.DeepCopy() + conf, _ := config.EMQXConfigWithDefaults(emqx.Spec.Config.Data) + got := newStatefulSet(emqx, conf) + assert.Equal(t, appsv1.OnDeleteStatefulSetStrategyType, got.Spec.UpdateStrategy.Type) + assert.EqualValues(t, int32(0), got.Spec.MinReadySeconds) + }) + + t.Run("check PVC retention policy deletes on scale-down and sts deletion", func(t *testing.T) { + emqx := instance.DeepCopy() + conf, _ := config.EMQXConfigWithDefaults(emqx.Spec.Config.Data) + got := newStatefulSet(emqx, conf) + assert.NotNil(t, got.Spec.PersistentVolumeClaimRetentionPolicy) + assert.Equal(t, appsv1.DeletePersistentVolumeClaimRetentionPolicyType, got.Spec.PersistentVolumeClaimRetentionPolicy.WhenScaled) + assert.Equal(t, appsv1.DeletePersistentVolumeClaimRetentionPolicyType, got.Spec.PersistentVolumeClaimRetentionPolicy.WhenDeleted) + }) + t.Run("check bootstrap API keys", func(t *testing.T) { emqx := instance.DeepCopy() conf, _ := config.EMQXConfigWithDefaults(emqx.Spec.Config.Data) @@ -147,15 +160,52 @@ func TestGetNewStatefulSet(t *testing.T) { ) }) - t.Run("check sts volume claim templates", func(t *testing.T) { + t.Run("check default volume claim templates", func(t *testing.T) { + emqx := instance.DeepCopy() + + fs := corev1.PersistentVolumeFilesystem + got := generateStatefulSet(emqx) + assert.Equal(t, []corev1.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "emqx-core-data", + Namespace: "emqx", + Labels: map[string]string{ + crd.LabelDBRole: "core", + crd.LabelInstance: "emqx", + crd.LabelManagedBy: "emqx-operator", + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{ + corev1.ReadWriteOnce, + }, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("500Mi"), + }, + }, + VolumeMode: &fs, + }, + }, + }, got.Spec.VolumeClaimTemplates) + assert.NotContains(t, got.Spec.Template.Spec.Volumes, corev1.Volume{ + Name: "emqx-core-data", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }) + }) + + t.Run("check explicit volume claim templates", func(t *testing.T) { emqx := instance.DeepCopy() - emqx.Spec.CoreTemplate.Spec.VolumeClaimTemplates = corev1.PersistentVolumeClaimSpec{ + emqx.Spec.CoreTemplate.Spec.PersistentVolumeClaimSpec = corev1.PersistentVolumeClaimSpec{ AccessModes: []corev1.PersistentVolumeAccessMode{ corev1.ReadWriteOnce, }, Resources: corev1.VolumeResourceRequirements{ Requests: corev1.ResourceList{ - corev1.ResourceStorage: resource.MustParse("20Mi"), + corev1.ResourceStorage: resource.MustParse("1Gi"), }, }, } @@ -168,10 +218,9 @@ func TestGetNewStatefulSet(t *testing.T) { Name: "emqx-core-data", Namespace: "emqx", Labels: map[string]string{ - crdv2.LabelDBRole: "core", - crdv2.LabelInstance: "emqx", - crdv2.LabelManagedBy: "emqx-operator", - "core-label-key": "core-label-value", + crd.LabelDBRole: "core", + crd.LabelInstance: "emqx", + crd.LabelManagedBy: "emqx-operator", }, }, Spec: corev1.PersistentVolumeClaimSpec{ @@ -180,7 +229,7 @@ func TestGetNewStatefulSet(t *testing.T) { }, Resources: corev1.VolumeResourceRequirements{ Requests: corev1.ResourceList{ - corev1.ResourceStorage: resource.MustParse("20Mi"), + corev1.ResourceStorage: resource.MustParse("1Gi"), }, }, VolumeMode: &fs, diff --git a/internal/controller/add_headless_service.go b/internal/controller/add_headless_service.go index bb051ec11..c921ef605 100644 --- a/internal/controller/add_headless_service.go +++ b/internal/controller/add_headless_service.go @@ -2,7 +2,7 @@ package controller import ( emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" @@ -12,14 +12,14 @@ type addHeadlessService struct { *EMQXReconciler } -func (a *addHeadlessService) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { +func (a *addHeadlessService) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { if err := a.CreateOrUpdate(r.ctx, a.Scheme, r.log, instance, generateHeadlessService(instance)); err != nil { return subResult{err: emperror.Wrap(err, "failed to create or update services")} } return subResult{} } -func generateHeadlessService(instance *crdv2.EMQX) *corev1.Service { +func generateHeadlessService(instance *crd.EMQX) *corev1.Service { headlessSvc := &corev1.Service{ TypeMeta: metav1.TypeMeta{ APIVersion: "v1", @@ -35,7 +35,7 @@ func generateHeadlessService(instance *crdv2.EMQX) *corev1.Service { ClusterIP: corev1.ClusterIPNone, SessionAffinity: corev1.ServiceAffinityNone, PublishNotReadyAddresses: true, - Selector: instance.DefaultLabelsWith(crdv2.CoreLabels()), + Selector: instance.DefaultLabelsWith(crd.CoreLabels()), Ports: []corev1.ServicePort{ { Name: "erlang-dist", diff --git a/internal/controller/add_headless_service_suite_test.go b/internal/controller/add_headless_service_suite_test.go index 43d302a29..58d25a3dc 100644 --- a/internal/controller/add_headless_service_suite_test.go +++ b/internal/controller/add_headless_service_suite_test.go @@ -1,7 +1,7 @@ package controller import ( - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" @@ -11,7 +11,7 @@ import ( var _ = Describe("Reconciler addHeadlessService", Ordered, func() { var a *addHeadlessService - var instance *crdv2.EMQX = &crdv2.EMQX{} + var instance *crd.EMQX = &crd.EMQX{} var ns *corev1.Namespace = &corev1.Namespace{} BeforeEach(func() { @@ -28,9 +28,9 @@ var _ = Describe("Reconciler addHeadlessService", Ordered, func() { instance = emqx.DeepCopy() instance.Namespace = ns.Name - instance.Spec.CoreTemplate = crdv2.EMQXCoreTemplate{ + instance.Spec.CoreTemplate = crd.EMQXCoreTemplate{ ObjectMeta: metav1.ObjectMeta{ - Labels: instance.DefaultLabelsWith(crdv2.CoreLabels()), + Labels: instance.DefaultLabelsWith(crd.CoreLabels()), }, } }) diff --git a/internal/controller/add_headless_service_test.go b/internal/controller/add_headless_service_test.go index e4494a563..43f9cf490 100644 --- a/internal/controller/add_headless_service_test.go +++ b/internal/controller/add_headless_service_test.go @@ -3,7 +3,7 @@ package controller import ( "testing" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -11,13 +11,13 @@ import ( ) func TestGenerateHeadlessSVC(t *testing.T) { - instance := &crdv2.EMQX{ + instance := &crd.EMQX{ ObjectMeta: metav1.ObjectMeta{ Name: "emqx", Namespace: "emqx", }, - Spec: crdv2.EMQXSpec{ - CoreTemplate: crdv2.EMQXCoreTemplate{ + Spec: crd.EMQXSpec{ + CoreTemplate: crd.EMQXCoreTemplate{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"test": "label"}, }, @@ -55,7 +55,7 @@ func TestGenerateHeadlessSVC(t *testing.T) { TargetPort: intstr.FromInt(5369), }, }, - Selector: instance.DefaultLabelsWith(crdv2.CoreLabels()), + Selector: instance.DefaultLabelsWith(crd.CoreLabels()), }, } assert.Equal(t, expect, generateHeadlessService(instance)) diff --git a/internal/controller/add_pdb.go b/internal/controller/add_pdb.go index 861fff7a3..0c493dd68 100644 --- a/internal/controller/add_pdb.go +++ b/internal/controller/add_pdb.go @@ -2,47 +2,32 @@ package controller import ( emperror "emperror.dev/errors" - semver "github.com/Masterminds/semver/v3" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" policyv1 "k8s.io/api/policy/v1" - policyv1beta1 "k8s.io/api/policy/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/discovery" "sigs.k8s.io/controller-runtime/pkg/client" - kubeConfig "sigs.k8s.io/controller-runtime/pkg/client/config" ) type addPdb struct { *EMQXReconciler } -func (a *addPdb) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { - discoveryClient, _ := discovery.NewDiscoveryClientForConfig(kubeConfig.GetConfigOrDie()) - kubeVersion, _ := discoveryClient.ServerVersion() - v, _ := semver.NewVersion(kubeVersion.String()) - +func (a *addPdb) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { pdbList := []client.Object{} - if v.LessThan(semver.MustParse("1.21")) { - corePdb, replPdb := generatePodDisruptionBudgetV1beta1(instance) - pdbList = append(pdbList, corePdb) - if replPdb != nil { - pdbList = append(pdbList, replPdb) - } - } else { - corePdb, replPdb := generatePodDisruptionBudget(instance) - pdbList = append(pdbList, corePdb) - if replPdb != nil { - pdbList = append(pdbList, replPdb) - } + corePdb, replPdb := generatePodDisruptionBudget(instance) + pdbList = append(pdbList, corePdb) + if replPdb != nil { + pdbList = append(pdbList, replPdb) } - if err := a.CreateOrUpdateList(r.ctx, a.Scheme, r.log, instance, pdbList); err != nil { + err := a.CreateOrUpdateList(r.ctx, a.Scheme, r.log, instance, pdbList) + if err != nil { return subResult{err: emperror.Wrap(err, "failed to create or update PDBs")} } return subResult{} } -func generatePodDisruptionBudget(instance *crdv2.EMQX) (*policyv1.PodDisruptionBudget, *policyv1.PodDisruptionBudget) { +func generatePodDisruptionBudget(instance *crd.EMQX) (*policyv1.PodDisruptionBudget, *policyv1.PodDisruptionBudget) { corePdb := &policyv1.PodDisruptionBudget{ TypeMeta: metav1.TypeMeta{ APIVersion: "policy/v1", @@ -55,7 +40,7 @@ func generatePodDisruptionBudget(instance *crdv2.EMQX) (*policyv1.PodDisruptionB }, Spec: policyv1.PodDisruptionBudgetSpec{ Selector: &metav1.LabelSelector{ - MatchLabels: instance.DefaultLabelsWith(crdv2.CoreLabels(), instance.Spec.CoreTemplate.Labels), + MatchLabels: instance.DefaultLabelsWith(crd.CoreLabels()), }, MinAvailable: instance.Spec.CoreTemplate.Spec.MinAvailable, MaxUnavailable: instance.Spec.CoreTemplate.Spec.MaxUnavailable, @@ -65,43 +50,7 @@ func generatePodDisruptionBudget(instance *crdv2.EMQX) (*policyv1.PodDisruptionB if instance.Spec.HasReplicants() { replPdb := corePdb.DeepCopy() replPdb.Name = instance.ReplicantNamespacedName().Name - replPdb.Spec.Selector.MatchLabels = instance.DefaultLabelsWith( - crdv2.ReplicantLabels(), - instance.Spec.ReplicantTemplate.Labels, - ) - replPdb.Spec.MinAvailable = instance.Spec.ReplicantTemplate.Spec.MinAvailable - replPdb.Spec.MaxUnavailable = instance.Spec.ReplicantTemplate.Spec.MaxUnavailable - return corePdb, replPdb - } - return corePdb, nil -} - -func generatePodDisruptionBudgetV1beta1(instance *crdv2.EMQX) (*policyv1beta1.PodDisruptionBudget, *policyv1beta1.PodDisruptionBudget) { - corePdb := &policyv1beta1.PodDisruptionBudget{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "policy/v1", - Kind: "PodDisruptionBudget", - }, - ObjectMeta: metav1.ObjectMeta{ - Namespace: instance.Namespace, - Name: instance.CoreNamespacedName().Name, - Labels: instance.DefaultLabelsWith(instance.Labels), - }, - Spec: policyv1beta1.PodDisruptionBudgetSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: instance.DefaultLabelsWith(crdv2.CoreLabels(), instance.Spec.CoreTemplate.Labels), - }, - MinAvailable: instance.Spec.CoreTemplate.Spec.MinAvailable, - MaxUnavailable: instance.Spec.CoreTemplate.Spec.MaxUnavailable, - }, - } - if instance.Spec.HasReplicants() { - replPdb := corePdb.DeepCopy() - replPdb.Name = instance.ReplicantNamespacedName().Name - replPdb.Spec.Selector.MatchLabels = instance.DefaultLabelsWith( - crdv2.ReplicantLabels(), - instance.Spec.ReplicantTemplate.Labels, - ) + replPdb.Spec.Selector.MatchLabels = instance.DefaultLabelsWith(crd.ReplicantLabels()) replPdb.Spec.MinAvailable = instance.Spec.ReplicantTemplate.Spec.MinAvailable replPdb.Spec.MaxUnavailable = instance.Spec.ReplicantTemplate.Spec.MaxUnavailable return corePdb, replPdb diff --git a/internal/controller/add_replicant_set.go b/internal/controller/add_replicant_set.go index fbd2d97dd..91681003d 100644 --- a/internal/controller/add_replicant_set.go +++ b/internal/controller/add_replicant_set.go @@ -3,10 +3,11 @@ package controller import ( "fmt" "slices" + "time" emperror "emperror.dev/errors" "github.com/cisco-open/k8s-objectmatcher/patch" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" resources "github.com/emqx/emqx-operator/internal/controller/resources" util "github.com/emqx/emqx-operator/internal/controller/util" @@ -14,30 +15,36 @@ import ( corev1 "k8s.io/api/core/v1" k8sErrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/util/retry" "k8s.io/klog/v2" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" ) type addReplicantSet struct { *EMQXReconciler } -func (a *addReplicantSet) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { +func (a *addReplicantSet) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { // Cluster w/o replicants, skip this step. if instance.Spec.ReplicantTemplate == nil { return subResult{} } // Core nodes are still spinning up, wait for them to be ready. - if !instance.Status.IsConditionTrue(crdv2.CoreNodesReady) { - return subResult{} + coreSet := r.state.coreSet() + if coreSet == nil || coreSet.Status.AvailableReplicas == 0 { + return reconcilePostpone() + } + + // Postpone until at least one core of newest revision. + // If there's a rolling update involving version upgrade, replicants should be + // able to connect to at least one core. + if r.state.numCoresRevision(coreSet.Status.UpdateRevision) == 0 { + return reconcilePostpone() } rs := newReplicaSet(instance, r.conf) - rsHash := rs.Labels[crdv2.LabelPodTemplateHash] + rsHash := rs.Labels[crd.LabelPodTemplateHash] needCreate := false updateReplicantSet := r.state.updateReplicantSet(instance) @@ -63,14 +70,13 @@ func (a *addReplicantSet) reconcile(r *reconcileRound, instance *crdv2.EMQX) sub _ = ctrl.SetControllerReference(instance, rs, a.Scheme) if err := a.Handler.Create(r.ctx, rs); err != nil { if k8sErrors.IsAlreadyExists(emperror.Cause(err)) { - cond := instance.Status.GetLastTrueCondition() - if cond != nil && cond.Type != crdv2.Available && cond.Type != crdv2.Ready { - // Sometimes the updated replicaSet will not be ready, because the EMQX node can not be started. - // And then we will rollback EMQX CR spec, the EMQX operator controller will create a new replicaSet. - // But the new replicaSet will be the same as the previous one, so we didn't need to create it, just change the EMQX status. + if !instance.Status.IsConditionTrue(crd.Ready) { + // The updated replicaSet may not be ready because the EMQX node can not be started. + // If the user reverts the CR spec, the desired RS matches the current revision — + // just update the status instead of creating a duplicate. if rsHash == instance.Status.ReplicantNodesStatus.CurrentRevision { - _ = a.updateEMQXStatus(r, instance, "RevertReplicaSet", rsHash) - return subResult{} + updateResult := a.updateEMQXStatus(r, instance, rsHash) + return subResult{err: updateResult} } } if instance.Status.ReplicantNodesStatus.CollisionCount == nil { @@ -78,56 +84,57 @@ func (a *addReplicantSet) reconcile(r *reconcileRound, instance *crdv2.EMQX) sub } *instance.Status.ReplicantNodesStatus.CollisionCount++ _ = a.Client.Status().Update(r.ctx, instance) - return subResult{result: ctrl.Result{Requeue: true}} + return reconcileRequeue() } - return subResult{err: emperror.Wrap(err, "failed to create replicaSet")} + return reconcileError(emperror.Wrap(err, "failed to create replicaSet")) } - updateResult := a.updateEMQXStatus(r, instance, "CreateReplicaSet", rsHash) - return subResult{err: updateResult} + updateResult := a.updateEMQXStatus(r, instance, rsHash) + return subResult{err: updateResult, immediateResult: &ctrl.Result{RequeueAfter: time.Second}} } rs.ObjectMeta = updateReplicantSet.ObjectMeta rs.Spec.Template.ObjectMeta = updateReplicantSet.Spec.Template.ObjectMeta rs.Spec.Selector = updateReplicantSet.Spec.Selector - if patchResult, _ := a.Patcher.Calculate( + patchResult, _ := a.Patcher.Calculate( updateReplicantSet, rs, patch.IgnoreStatusFields(), patch.IgnoreVolumeClaimTemplateTypeMetaAndStatus(), - ); !patchResult.IsEmpty() { + ) + if !patchResult.IsEmpty() { // Update replicaSet r.log.Info("updating replicaSet", "replicaSet", klog.KObj(rs), "reason", "replicaSet has changed", "patch", string(patchResult.Patch), ) - if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - storage := &appsv1.ReplicaSet{} - _ = a.Client.Get(r.ctx, client.ObjectKeyFromObject(rs), storage) - rs.ResourceVersion = storage.ResourceVersion - return a.Handler.Update(r.ctx, rs) - }); err != nil { - return subResult{err: emperror.Wrap(err, "failed to update replicaSet")} + // NOTE + // Conflicts are expected as ReplicaSet contoller may act concurrently on the resource. + // Conflicts are handled on `EMQXReconciler` level. + err := a.Handler.Update(r.ctx, rs) + if err != nil { + return reconcileError(emperror.Wrap(err, "failed to update replicaSet")) } - updateResult := a.updateEMQXStatus(r, instance, "UpdateReplicaSet", rsHash) - return subResult{err: updateResult} + updateResult := a.updateEMQXStatus(r, instance, rsHash) + return subResult{err: updateResult, immediateResult: &ctrl.Result{RequeueAfter: time.Second}} } + return subResult{} } -func (a *addReplicantSet) updateEMQXStatus(r *reconcileRound, instance *crdv2.EMQX, reason, podTemplateHash string) error { - instance.Status.ResetConditions(reason) +func (a *addReplicantSet) updateEMQXStatus(r *reconcileRound, instance *crd.EMQX, podTemplateHash string) error { instance.Status.ReplicantNodesStatus.UpdateRevision = podTemplateHash + forceReplicantNodesProgressing(instance) return a.Client.Status().Update(r.ctx, instance) } -func newReplicaSet(instance *crdv2.EMQX, conf *config.EMQX) *appsv1.ReplicaSet { +func newReplicaSet(instance *crd.EMQX, conf *config.EMQX) *appsv1.ReplicaSet { rs := generateReplicaSet(instance) podTemplateHash := computeHash(rs.Spec.Template.DeepCopy(), instance.Status.ReplicantNodesStatus.CollisionCount) rs.Name = rs.Name + "-" + podTemplateHash - rs.Labels[crdv2.LabelPodTemplateHash] = podTemplateHash - rs.Spec.Template.Labels[crdv2.LabelPodTemplateHash] = podTemplateHash - rs.Spec.Selector = util.CloneSelectorAndAddLabel(rs.Spec.Selector, crdv2.LabelPodTemplateHash, podTemplateHash) + rs.Labels[crd.LabelPodTemplateHash] = podTemplateHash + rs.Spec.Template.Labels[crd.LabelPodTemplateHash] = podTemplateHash + rs.Spec.Selector = util.CloneSelectorAndAddLabel(rs.Spec.Selector, crd.LabelPodTemplateHash, podTemplateHash) rs.Spec.Template.Spec.Containers[0].Ports = util.MergeContainerPorts( rs.Spec.Template.Spec.Containers[0].Ports, util.MapServicePortsToContainerPorts(conf.GetDashboardServicePorts()), @@ -135,15 +142,15 @@ func newReplicaSet(instance *crdv2.EMQX, conf *config.EMQX) *appsv1.ReplicaSet { return rs } -func generateReplicaSet(instance *crdv2.EMQX) *appsv1.ReplicaSet { +func generateReplicaSet(instance *crd.EMQX) *appsv1.ReplicaSet { + template := instance.Spec.ReplicantTemplate + // Add a PreStop hook to leave the cluster when the pod is asked to stop. // This is especially important when DS Raft is enabled, otherwise there will be a // lot of leftover records in the DS cluster metadata. - lifecycle := instance.Spec.ReplicantTemplate.Spec.Lifecycle - if lifecycle == nil { - lifecycle = &corev1.Lifecycle{} - } else { - lifecycle = lifecycle.DeepCopy() + lifecycle := &corev1.Lifecycle{} + if template.Spec.Lifecycle != nil { + lifecycle = template.Spec.Lifecycle.DeepCopy() } lifecycle.PreStop = &corev1.LifecycleHandler{ Exec: &corev1.ExecAction{ @@ -151,6 +158,16 @@ func generateReplicaSet(instance *crdv2.EMQX) *appsv1.ReplicaSet { }, } + readinessProbe := resources.EvacuationReadinessProbe() + + // Prefer evacuation-aware probe over older-version defaults. + if template.Spec.ReadinessProbe != nil { + if template.Spec.ReadinessProbe.HTTPGet != nil && + template.Spec.ReadinessProbe.HTTPGet.Path != "/status" { + readinessProbe = template.Spec.ReadinessProbe.DeepCopy() + } + } + cookie := resources.Cookie(instance) config := resources.EMQXConfig(instance) @@ -162,42 +179,38 @@ func generateReplicaSet(instance *crdv2.EMQX) *appsv1.ReplicaSet { ObjectMeta: metav1.ObjectMeta{ Namespace: instance.Namespace, Name: instance.ReplicantNamespacedName().Name, - Annotations: instance.Spec.ReplicantTemplate.DeepCopy().Annotations, + Annotations: util.CloneAnnotations(template.Annotations), Labels: replicaSetLabels(instance), }, Spec: appsv1.ReplicaSetSpec{ - Replicas: instance.Spec.ReplicantTemplate.Spec.Replicas, + Replicas: template.Spec.Replicas, + MinReadySeconds: instance.Spec.ReplicantTemplate.Spec.MinReadySeconds, Selector: &metav1.LabelSelector{ MatchLabels: replicaSetLabels(instance), }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ - Annotations: instance.Spec.ReplicantTemplate.DeepCopy().Annotations, + Annotations: util.CloneAnnotations(template.Annotations), Labels: replicaSetLabels(instance), }, Spec: corev1.PodSpec{ - ReadinessGates: []corev1.PodReadinessGate{ - { - ConditionType: crdv2.PodOnServing, - }, - }, ImagePullSecrets: instance.Spec.ImagePullSecrets, ServiceAccountName: instance.Spec.ServiceAccountName, - SecurityContext: instance.Spec.ReplicantTemplate.Spec.PodSecurityContext, - Affinity: instance.Spec.ReplicantTemplate.Spec.Affinity, - Tolerations: instance.Spec.ReplicantTemplate.Spec.Tolerations, - TopologySpreadConstraints: instance.Spec.ReplicantTemplate.Spec.TopologySpreadConstraints, - NodeName: instance.Spec.ReplicantTemplate.Spec.NodeName, - NodeSelector: instance.Spec.ReplicantTemplate.Spec.NodeSelector, - InitContainers: instance.Spec.ReplicantTemplate.Spec.InitContainers, + SecurityContext: template.Spec.PodSecurityContext, + Affinity: template.Spec.Affinity, + Tolerations: template.Spec.Tolerations, + TopologySpreadConstraints: template.Spec.TopologySpreadConstraints, + NodeName: template.Spec.NodeName, + NodeSelector: template.Spec.NodeSelector, + InitContainers: template.Spec.InitContainers, Containers: append([]corev1.Container{ { - Name: crdv2.DefaultContainerName, + Name: crd.DefaultContainerName, Image: instance.Spec.Image, ImagePullPolicy: instance.Spec.ImagePullPolicy, - Command: instance.Spec.ReplicantTemplate.Spec.Command, - Args: instance.Spec.ReplicantTemplate.Spec.Args, - Ports: instance.Spec.ReplicantTemplate.Spec.Ports, + Command: template.Spec.Command, + Args: template.Spec.Args, + Ports: template.Spec.Ports, Env: append([]corev1.EnvVar{ { Name: "EMQX_CLUSTER__DISCOVERY_STRATEGY", @@ -228,13 +241,13 @@ func generateReplicaSet(instance *crdv2.EMQX) *appsv1.ReplicaSet { Value: "replicant", }, cookie.EnvVar(), - }, instance.Spec.ReplicantTemplate.Spec.Env...), - EnvFrom: instance.Spec.ReplicantTemplate.Spec.EnvFrom, - Resources: instance.Spec.ReplicantTemplate.Spec.Resources, - SecurityContext: instance.Spec.ReplicantTemplate.Spec.ContainerSecurityContext, - LivenessProbe: instance.Spec.ReplicantTemplate.Spec.LivenessProbe, - ReadinessProbe: instance.Spec.ReplicantTemplate.Spec.ReadinessProbe, - StartupProbe: instance.Spec.ReplicantTemplate.Spec.StartupProbe, + }, template.Spec.Env...), + EnvFrom: template.Spec.EnvFrom, + Resources: template.Spec.Resources, + SecurityContext: template.Spec.ContainerSecurityContext, + LivenessProbe: template.Spec.LivenessProbe, + ReadinessProbe: readinessProbe, + StartupProbe: template.Spec.StartupProbe, Lifecycle: lifecycle, VolumeMounts: slices.Concat( []corev1.VolumeMount{ @@ -248,10 +261,10 @@ func generateReplicaSet(instance *crdv2.EMQX) *appsv1.ReplicaSet { }, }, config.VolumeMounts(), - instance.Spec.ReplicantTemplate.Spec.ExtraVolumeMounts, + template.Spec.ExtraVolumeMounts, ), }, - }, instance.Spec.ReplicantTemplate.Spec.ExtraContainers...), + }, template.Spec.ExtraContainers...), Volumes: append([]corev1.Volume{ config.Volume(), { @@ -266,7 +279,7 @@ func generateReplicaSet(instance *crdv2.EMQX) *appsv1.ReplicaSet { EmptyDir: &corev1.EmptyDirVolumeSource{}, }, }, - }, instance.Spec.ReplicantTemplate.Spec.ExtraVolumes...), + }, template.Spec.ExtraVolumes...), }, }, }, @@ -274,6 +287,6 @@ func generateReplicaSet(instance *crdv2.EMQX) *appsv1.ReplicaSet { } // Combine instance labels, replicant labels and template labels. -func replicaSetLabels(instance *crdv2.EMQX) map[string]string { - return instance.DefaultLabelsWith(crdv2.ReplicantLabels(), instance.Spec.ReplicantTemplate.Labels) +func replicaSetLabels(instance *crd.EMQX) map[string]string { + return instance.DefaultLabelsWith(crd.ReplicantLabels(), instance.Spec.ReplicantTemplate.Labels) } diff --git a/internal/controller/add_replicant_set_suite_test.go b/internal/controller/add_replicant_set_suite_test.go index 2b46937c8..d40286536 100644 --- a/internal/controller/add_replicant_set_suite_test.go +++ b/internal/controller/add_replicant_set_suite_test.go @@ -3,7 +3,7 @@ package controller import ( "time" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -15,47 +15,18 @@ import ( . "github.com/onsi/gomega" ) -func actualInstance(instance *crdv2.EMQX) *crdv2.EMQX { - _ = k8sClient.Get(ctx, client.ObjectKeyFromObject(instance), instance) - return instance -} - -func replicantSets(instance *crdv2.EMQX) []appsv1.ReplicaSet { - list := &appsv1.ReplicaSetList{} - _ = k8sClient.List(ctx, list, - client.InNamespace(instance.Namespace), - client.MatchingLabels(instance.DefaultLabelsWith(crdv2.ReplicantLabels())), - ) - return list.Items -} - -func adoptReplicantSet(instance *crdv2.EMQX) *appsv1.ReplicaSet { - list := replicantSets(instance) - if len(list) == 0 { - return nil - } - rs := list[0].DeepCopy() - rsHash := rs.Labels[crdv2.LabelPodTemplateHash] - instance.Status.ReplicantNodesStatus.UpdateRevision = rsHash - return rs -} - -func replicantSetsReconcileRound(instance *crdv2.EMQX) *reconcileRound { - round := newReconcileRound() - round.state = loadReconcileState(ctx, k8sClient, instance) - return round -} - var _ = Describe("Reconciler addReplicantSet", Ordered, func() { - var a *addReplicantSet - var instance *crdv2.EMQX = &crdv2.EMQX{} var ns *corev1.Namespace = &corev1.Namespace{} + var instance *crd.EMQX = &crd.EMQX{} + var coreSet *appsv1.StatefulSet + var corePod0, corePod1 *corev1.Pod + var a *addReplicantSet + var round *reconcileRound BeforeAll(func() { - // Create namespace: ns = &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ - Name: "controller-v2beta1-add-emqx-repl-test", + Name: "controller-add-emqx-repl-test", Labels: map[string]string{ "test": "e2e", }, @@ -69,203 +40,266 @@ var _ = Describe("Reconciler addReplicantSet", Ordered, func() { }) BeforeEach(func() { - // Create instance: + // Create EMQX instance: instance = emqx.DeepCopy() instance.Namespace = ns.Name - instance.Spec.ReplicantTemplate = &crdv2.EMQXReplicantTemplate{ - Spec: crdv2.EMQXReplicantTemplateSpec{ + instance.Spec.CoreTemplate.Spec.Replicas = ptr.To(int32(2)) + instance.Spec.ReplicantTemplate = &crd.EMQXReplicantTemplate{ + Spec: crd.EMQXReplicantTemplateSpec{ Replicas: ptr.To(int32(3)), }, } - instance.Status = crdv2.EMQXStatus{ - ReplicantNodesStatus: crdv2.EMQXNodesStatus{ - Replicas: 3, + Expect(k8sClient.Create(ctx, instance)).To(Succeed()) + // Simulate core nodes readiness: + coreLabels := instance.DefaultLabelsWith(crd.CoreLabels()) + coreSet = &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: instance.Name + "-core", + Namespace: ns.Name, + Labels: coreLabels, }, - Conditions: []metav1.Condition{ - { - Type: crdv2.Ready, - Status: metav1.ConditionTrue, - LastTransitionTime: metav1.Time{Time: time.Now().AddDate(0, 0, -1)}, - Reason: crdv2.Ready, + Spec: appsv1.StatefulSetSpec{ + ServiceName: instance.Name + "-core", + Replicas: ptr.To(int32(2)), + UpdateStrategy: appsv1.StatefulSetUpdateStrategy{ + Type: appsv1.OnDeleteStatefulSetStrategyType, }, - { - Type: crdv2.CoreNodesReady, - Status: metav1.ConditionTrue, - LastTransitionTime: metav1.Time{Time: time.Now().AddDate(0, 0, -1)}, - Reason: crdv2.CoreNodesReady, + Selector: &metav1.LabelSelector{ + MatchLabels: coreLabels, }, - { - Type: crdv2.ReplicantNodesReady, - Status: metav1.ConditionTrue, - LastTransitionTime: metav1.Time{Time: time.Now().AddDate(0, 0, -1)}, - Reason: crdv2.ReplicantNodesReady, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: coreLabels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "emqx", Image: "emqx"}, + }, + }, }, - { - Type: crdv2.Initialized, - Status: metav1.ConditionTrue, - LastTransitionTime: metav1.Time{Time: time.Now().AddDate(0, 0, -10)}, - Reason: crdv2.Initialized, + }, + } + Expect(k8sClient.Create(ctx, coreSet)).To(Succeed()) + corePod0 = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: coreSet.Name + "-0", + Namespace: ns.Name, + Labels: coreLabels, + OwnerReferences: ownerReferences(coreSet), + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "emqx", Image: "emqx"}, }, }, } - // Instantiate reconciler: + corePod1 = corePod0.DeepCopy() + corePod1.ObjectMeta.Name = coreSet.Name + "-1" + Expect(k8sClient.Create(ctx, corePod0)).To(Succeed()) + Expect(k8sClient.Create(ctx, corePod1)).To(Succeed()) + instance.Status.CoreNodesStatus.ReadyReplicas = 2 + coreSet.Status.Replicas = 2 + coreSet.Status.ReadyReplicas = 2 + coreSet.Status.AvailableReplicas = 2 + coreSet.Status.UpdatedReplicas = 2 + coreSet.Status.CurrentReplicas = 2 + corePod0.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Now().Add(-1 * time.Minute)), + }, + } + corePod1.Status.Conditions = corePod0.Status.Conditions + Expect(k8sClient.Status().Update(ctx, instance)).To(Succeed()) + Expect(k8sClient.Status().Update(ctx, coreSet)).To(Succeed()) + Expect(k8sClient.Status().Update(ctx, corePod0)).To(Succeed()) + Expect(k8sClient.Status().Update(ctx, corePod1)).To(Succeed()) + // Instantiate reconciler and reconcile round: a = &addReplicantSet{emqxReconciler} + round = newReconcileRound() + round.state, _ = loadReconcileState(ctx, k8sClient, instance) + }) + + AfterEach(func() { + _ = k8sClient.Delete(ctx, corePod0) + _ = k8sClient.Delete(ctx, corePod1) + Expect(k8sClient.Delete(ctx, coreSet)).To(Succeed()) + Expect(k8sClient.Delete(ctx, instance)).To(Succeed()) }) - Context("replicant template is nil", func() { + When("replicant template is nil", func() { It("should do nothing", func() { // Clear replicant template: instance.Spec.ReplicantTemplate = nil // Reconciliation step should do nothing and succeed: - round := replicantSetsReconcileRound(instance) - Eventually(a.reconcile).WithArguments(round, instance). - WithTimeout(timeout). - WithPolling(interval). - Should(Equal(subResult{})) - Eventually(replicantSets).WithArguments(instance). - Should(BeEmpty()) + result := a.reconcile(round, instance) + Expect(result.err).ToNot(HaveOccurred()) + Expect(replicantSets(instance)).To(BeEmpty()) }) }) - Context("core nodes is not ready", func() { - It("should do nothing", func() { - // Remove core nodes ready condition: - instance.Status.RemoveCondition(crdv2.CoreNodesReady) - // Reconciliation step should succeed: - round := replicantSetsReconcileRound(instance) - Eventually(a.reconcile).WithArguments(round, instance). - WithTimeout(timeout). - WithPolling(interval). - Should(Equal(subResult{})) - Eventually(replicantSets).WithArguments(instance). - Should(BeEmpty()) + When("single core node instance", func() { + It("should fail to create", func() { + instanceInvalid := instance.DeepCopy() + instanceInvalid.Spec.CoreTemplate.Spec.Replicas = ptr.To(int32(1)) + instanceInvalid.Spec.ReplicantTemplate.Spec.Replicas = ptr.To(int32(2)) + Expect(k8sClient.Create(ctx, instanceInvalid)).To(HaveOccurred()) }) }) - Context("replicant template is not nil, and core code is ready", func() { - It("should create replicaSet", func() { - round := replicantSetsReconcileRound(instance) - Eventually(a.reconcile).WithArguments(round, instance). - WithTimeout(timeout). - WithPolling(interval). - Should(Equal(subResult{})) - Eventually(replicantSets).WithArguments(instance). - Should(ConsistOf( - HaveField("Spec.Template.Spec.Containers", ConsistOf( - HaveField("Image", Equal(instance.Spec.Image)), - )), - )) + When("no available core pods", func() { + It("should do nothing", func() { + coreSet.Status.AvailableReplicas = 0 + Expect(k8sClient.Status().Update(ctx, coreSet)).To(Succeed()) + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) + // Reconciliation step should succeed but not create any RS: + result := a.reconcile(round, instance) + Expect(result.err).ToNot(HaveOccurred()) + Expect(replicantSets(instance)).To(BeEmpty()) }) }) - Context("scale down replicas count", func() { + When("core nodes are ready", func() { + It("should create replicaSet", func() { + result := a.reconcile(round, instance) + Expect(result.err).ToNot(HaveOccurred()) + Expect(replicantSets(instance)).To(ConsistOf( + HaveField("Spec.Template.Spec.Containers", ConsistOf( + HaveField("Image", Equal(instance.Spec.Image)), + )), + )) + }) - BeforeAll(func() { - Eventually(adoptReplicantSet).WithArguments(instance).Should(Not(BeNil())) + AfterAll(func() { + cleanupReplicantSets(instance) }) + }) - JustBeforeEach(func() { - rs := adoptReplicantSet(instance) - rs.Status.Replicas = 3 - Expect(k8sClient.Status().Update(ctx, rs)).Should(Succeed()) - Eventually(func() *appsv1.ReplicaSet { - _ = k8sClient.Get(ctx, client.ObjectKeyFromObject(rs), rs) - return rs - }).WithTimeout(timeout).WithPolling(interval).Should( - HaveField("Status.Replicas", BeEquivalentTo(3)), - ) + When("number of replicas decreases", func() { + BeforeAll(func() { + // Start with 3 replicas: + instance.Spec.ReplicantTemplate.Spec.Replicas = ptr.To(int32(3)) + // Run the reconcile to create replicantSet: + result := a.reconcile(round, instance) + Expect(result.err).ToNot(HaveOccurred()) + Expect(replicantSets(instance)).To(ConsistOf( + HaveField("Spec.Replicas", HaveValue(BeEquivalentTo(3))), + )) }) - It("should update replicaSet", func() { + It("should scale down replicaSet", func() { // Set replicas count to 0: instance.Spec.ReplicantTemplate.Spec.Replicas = ptr.To(int32(0)) + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) // Reconciliation step should succeed: - round := replicantSetsReconcileRound(instance) - Eventually(a.reconcile).WithArguments(round, instance). - WithTimeout(timeout). - WithPolling(interval). - Should(Equal(subResult{})) + result := a.reconcile(round, instance) + Expect(result.err).ToNot(HaveOccurred()) + // Status conditions should reset: + Expect(actualObject(instance)).To(And( + HaveCondition(crd.Ready, HaveField("Status", Equal(metav1.ConditionFalse))), + HaveCondition(crd.ReplicantNodesProgressing, HaveField("Status", Equal(metav1.ConditionTrue))), + )) // ReplicaSet should be updated in place: - Eventually(replicantSets).WithArguments(instance). - Should(ConsistOf( - HaveField("Spec.Replicas", HaveValue(BeEquivalentTo(0))), - )) + Expect(replicantSets(instance)).To(ConsistOf( + HaveField("Spec.Replicas", HaveValue(BeEquivalentTo(0))), + )) }) AfterAll(func() { - Eventually(adoptReplicantSet).WithArguments(instance).Should(Not(BeNil())) + cleanupReplicantSets(instance) }) - }) - Context("scale up replicas count", func() { - + When("number of replicas increases", func() { BeforeAll(func() { - Eventually(adoptReplicantSet).WithArguments(instance).Should(Not(BeNil())) + // Start with 1 replicas: + instance.Spec.ReplicantTemplate.Spec.Replicas = ptr.To(int32(1)) + // Run the reconcile to create replicantSet: + result := a.reconcile(round, instance) + Expect(result.err).ToNot(HaveOccurred()) + Expect(replicantSets(instance)).To(ConsistOf( + HaveField("Spec.Replicas", HaveValue(BeEquivalentTo(1))), + )) }) - It("should update replicaSet", func() { + It("scales up replicaSet", func() { // Set replicas count to 4: instance.Spec.ReplicantTemplate.Spec.Replicas = ptr.To(int32(4)) + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) // Reconciliation step should succeed: - round := replicantSetsReconcileRound(instance) - Eventually(a.reconcile).WithArguments(round, instance). - WithTimeout(timeout). - WithPolling(interval). - Should(Equal(subResult{})) - // ReplicaSet should be updated: - Eventually(replicantSets).WithArguments(instance). - Should(ConsistOf( - HaveField("Spec.Replicas", HaveValue(BeEquivalentTo(4))), - )) + result := a.reconcile(round, instance) + Expect(result.err).ToNot(HaveOccurred()) // Status conditions should reset: - Eventually(actualInstance).WithArguments(instance). - Should(And( - WithTransform( - func(emqx *crdv2.EMQX) *metav1.Condition { return emqx.Status.GetLastTrueCondition() }, - HaveField("Type", Equal(crdv2.Initialized)), - ), - HaveCondition(crdv2.Ready, HaveField("Status", Equal(metav1.ConditionFalse))), - HaveCondition(crdv2.ReplicantNodesReady, HaveField("Status", Equal(metav1.ConditionFalse))), - )) + Expect(actualObject(instance)).To(And( + HaveCondition(crd.Ready, HaveField("Status", Equal(metav1.ConditionFalse))), + HaveCondition(crd.ReplicantNodesProgressing, HaveField("Status", Equal(metav1.ConditionTrue))), + )) + // ReplicaSet should be updated: + Expect(replicantSets(instance)).To(ConsistOf( + HaveField("Spec.Replicas", HaveValue(BeEquivalentTo(4))), + )) }) + AfterAll(func() { + cleanupReplicantSets(instance) + }) }) - Context("change image", func() { - + When("image is changed", func() { BeforeAll(func() { - Eventually(adoptReplicantSet).WithArguments(instance).Should(Not(BeNil())) + // Start with 1 replicas: + instance.Spec.ReplicantTemplate.Spec.Replicas = ptr.To(int32(1)) + // Run the reconcile to create replicantSet: + result := a.reconcile(round, instance) + Expect(result.err).ToNot(HaveOccurred()) + Expect(replicantSets(instance)).To(ConsistOf( + HaveField("Spec.Replicas", HaveValue(BeEquivalentTo(1))), + )) }) It("should create new replicaSet", func() { // Introduce changes that require creating a new replicaSet: instance.Spec.Image = "emqx/emqx" - instance.Spec.UpdateStrategy.InitialDelaySeconds = int32(999999999) + Expect(k8sClient.Update(ctx, instance)).To(Succeed()) + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) // Reconciliation step should succeed: - round := replicantSetsReconcileRound(instance) - Eventually(a.reconcile).WithArguments(round, instance). - WithTimeout(timeout). - WithPolling(interval). - Should(Equal(subResult{})) + result := a.reconcile(round, instance) + Expect(result.err).ToNot(HaveOccurred()) // There should be two replicaSets soon: - Eventually(replicantSets).WithArguments(instance). - Should(ConsistOf( - HaveField("Spec.Template.Spec.Containers", ConsistOf(HaveField("Image", Equal(emqx.Spec.Image)))), - HaveField("Spec.Template.Spec.Containers", ConsistOf(HaveField("Image", Equal(instance.Spec.Image)))), - )) - // Status conditions should reset to `ReplicantNodesProgressing`: - Eventually(actualInstance).WithArguments(instance). - Should(And( - WithTransform( - func(emqx *crdv2.EMQX) *metav1.Condition { return emqx.Status.GetLastTrueCondition() }, - HaveField("Type", Equal(crdv2.Initialized)), - ), - HaveCondition(crdv2.Ready, HaveField("Status", Equal(metav1.ConditionFalse))), - HaveCondition(crdv2.ReplicantNodesReady, HaveField("Status", Equal(metav1.ConditionFalse))), - )) + Expect(replicantSets(instance)).To(ConsistOf( + HaveField("Spec.Template.Spec.Containers", ConsistOf(HaveField("Image", Equal(emqx.Spec.Image)))), + HaveField("Spec.Template.Spec.Containers", ConsistOf(HaveField("Image", Equal("emqx/emqx")))), + )) + // Update revision should differ from current revision (new RS created): + Expect(actualObject(instance)).To( + HaveField("Status.ReplicantNodesStatus", WithTransform( + func(s crd.ReplicantNodesStatus) bool { + return s.UpdateRevision != "" && s.UpdateRevision != s.CurrentRevision + }, + BeTrue(), + ))) }) + AfterAll(func() { + cleanupReplicantSets(instance) + }) }) }) + +func replicantSets(instance *crd.EMQX) []appsv1.ReplicaSet { + list := &appsv1.ReplicaSetList{} + _ = k8sClient.List(ctx, list, + client.InNamespace(instance.Namespace), + client.MatchingLabels(instance.DefaultLabelsWith(crd.ReplicantLabels())), + ) + return list.Items +} + +func cleanupReplicantSets(instance *crd.EMQX) { + replicantSets := replicantSets(instance) + for _, rs := range replicantSets { + _ = k8sClient.Delete(ctx, &rs) + } +} diff --git a/internal/controller/add_replicant_set_test.go b/internal/controller/add_replicant_set_test.go index 3ef36b077..c3dcbfa12 100644 --- a/internal/controller/add_replicant_set_test.go +++ b/internal/controller/add_replicant_set_test.go @@ -3,7 +3,7 @@ package controller import ( "testing" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" @@ -12,7 +12,7 @@ import ( ) func TestGetNewReplicaSet(t *testing.T) { - instance := &crdv2.EMQX{ + instance := &crd.EMQX{ ObjectMeta: metav1.ObjectMeta{ Name: "emqx", Namespace: "emqx", @@ -23,12 +23,12 @@ func TestGetNewReplicaSet(t *testing.T) { "emqx-annotation-key": "emqx-annotation-value", }, }, - Spec: crdv2.EMQXSpec{ + Spec: crd.EMQXSpec{ Image: "emqx/emqx:5.1", ClusterDomain: "cluster.local", }, } - instance.Spec.ReplicantTemplate = &crdv2.EMQXReplicantTemplate{ + instance.Spec.ReplicantTemplate = &crd.EMQXReplicantTemplate{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ "repl-label-key": "repl-label-value", @@ -37,11 +37,11 @@ func TestGetNewReplicaSet(t *testing.T) { "repl-annotation-key": "repl-annotation-value", }, }, - Spec: crdv2.EMQXReplicantTemplateSpec{ + Spec: crd.EMQXReplicantTemplateSpec{ Replicas: ptr.To(int32(3)), }, } - instance.Status.ReplicantNodesStatus = crdv2.EMQXNodesStatus{ + instance.Status.ReplicantNodesStatus = crd.ReplicantNodesStatus{ CollisionCount: ptr.To(int32(0)), } @@ -52,11 +52,12 @@ func TestGetNewReplicaSet(t *testing.T) { assert.Equal(t, emqx.Spec.ReplicantTemplate.Annotations, got.Annotations) assert.Equal(t, "repl-label-value", got.Labels["repl-label-key"]) - assert.Equal(t, "emqx", got.Labels[crdv2.LabelInstance]) - assert.Equal(t, "emqx-operator", got.Labels[crdv2.LabelManagedBy]) - assert.Equal(t, "replicant", got.Labels[crdv2.LabelDBRole]) - assert.Equal(t, "emqx-replicant-"+got.Labels[crdv2.LabelPodTemplateHash], got.Name) + assert.Equal(t, "emqx", got.Labels[crd.LabelInstance]) + assert.Equal(t, "emqx-operator", got.Labels[crd.LabelManagedBy]) + assert.Equal(t, "replicant", got.Labels[crd.LabelDBRole]) + assert.Equal(t, "emqx-replicant-"+got.Labels[crd.LabelPodTemplateHash], got.Name) assert.Equal(t, emqx.Namespace, got.Namespace) + assert.EqualValues(t, int32(0), got.Spec.MinReadySeconds) }) t.Run("check selector and pod metadata", func(t *testing.T) { @@ -66,19 +67,19 @@ func TestGetNewReplicaSet(t *testing.T) { assert.Equal(t, emqx.Spec.ReplicantTemplate.ObjectMeta.Annotations, got.Spec.Template.Annotations) assert.EqualValues(t, map[string]string{ - crdv2.LabelInstance: "emqx", - crdv2.LabelManagedBy: "emqx-operator", - crdv2.LabelDBRole: "replicant", - crdv2.LabelPodTemplateHash: got.Labels[crdv2.LabelPodTemplateHash], - "repl-label-key": "repl-label-value", + crd.LabelInstance: "emqx", + crd.LabelManagedBy: "emqx-operator", + crd.LabelDBRole: "replicant", + crd.LabelPodTemplateHash: got.Labels[crd.LabelPodTemplateHash], + "repl-label-key": "repl-label-value", }, got.Spec.Template.Labels) assert.EqualValues(t, map[string]string{ - crdv2.LabelInstance: "emqx", - crdv2.LabelManagedBy: "emqx-operator", - crdv2.LabelDBRole: "replicant", - crdv2.LabelPodTemplateHash: got.Labels[crdv2.LabelPodTemplateHash], - "repl-label-key": "repl-label-value", + crd.LabelInstance: "emqx", + crd.LabelManagedBy: "emqx-operator", + crd.LabelDBRole: "replicant", + crd.LabelPodTemplateHash: got.Labels[crd.LabelPodTemplateHash], + "repl-label-key": "repl-label-value", }, got.Spec.Selector.MatchLabels) }) diff --git a/internal/controller/add_service.go b/internal/controller/add_service.go index aedc12a50..e7123b5d2 100644 --- a/internal/controller/add_service.go +++ b/internal/controller/add_service.go @@ -2,7 +2,7 @@ package controller import ( emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" util "github.com/emqx/emqx-operator/internal/controller/util" "github.com/emqx/emqx-operator/internal/emqx/api" @@ -12,18 +12,19 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) +// Responsibilities: +// - Sets up Service resources: for MQTT/Gateway EMQX listeners, and for the API/Dashboard endpoint. +// - Switches target set of pods on readiness change, see `listenerServiceSelector`. type addService struct { *EMQXReconciler } -func (a *addService) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { +func (a *addService) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { + // Postpone if there are no usable cores yet. + // Should proceed once one core replica is Ready. req := r.oldestCoreRequester() if req == nil { - return subResult{} - } - - if !instance.Status.IsConditionTrue(crdv2.CoreNodesReady) { - return subResult{} + return reconcilePostpone() } configStr, err := api.Configs(req) @@ -40,7 +41,7 @@ func (a *addService) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResul if dashboard := generateDashboardService(instance, conf); dashboard != nil { resources = append(resources, dashboard) } - if listeners := generateListenerService(instance, conf); listeners != nil { + if listeners := generateListenerService(r, instance, conf); listeners != nil { resources = append(resources, listeners) } @@ -50,7 +51,7 @@ func (a *addService) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResul return subResult{} } -func generateDashboardService(instance *crdv2.EMQX, conf *config.EMQX) *corev1.Service { +func generateDashboardService(instance *crd.EMQX, conf *config.EMQX) *corev1.Service { meta := &metav1.ObjectMeta{} spec := &corev1.ServiceSpec{} if instance.Spec.DashboardServiceTemplate != nil { @@ -67,7 +68,7 @@ func generateDashboardService(instance *crdv2.EMQX, conf *config.EMQX) *corev1.S } spec.Ports = util.MergeServicePorts(spec.Ports, ports) - spec.Selector = instance.DefaultLabelsWith(crdv2.CoreLabels()) + spec.Selector = instance.DefaultLabelsWith(crd.CoreLabels()) return &corev1.Service{ TypeMeta: metav1.TypeMeta{ @@ -84,7 +85,7 @@ func generateDashboardService(instance *crdv2.EMQX, conf *config.EMQX) *corev1.S } } -func generateListenerService(instance *crdv2.EMQX, conf *config.EMQX) *corev1.Service { +func generateListenerService(r *reconcileRound, instance *crd.EMQX, conf *config.EMQX) *corev1.Service { meta := &metav1.ObjectMeta{} spec := &corev1.ServiceSpec{} if instance.Spec.ListenersServiceTemplate != nil { @@ -126,10 +127,7 @@ func generateListenerService(instance *crdv2.EMQX, conf *config.EMQX) *corev1.Se } spec.Ports = util.MergeServicePorts(spec.Ports, ports) - spec.Selector = instance.DefaultLabelsWith(crdv2.CoreLabels()) - if instance.Spec.HasReplicants() && instance.Status.ReplicantNodesStatus.ReadyReplicas > 0 { - spec.Selector = instance.DefaultLabelsWith(crdv2.ReplicantLabels()) - } + spec.Selector = listenerServiceSelector(r, instance) return &corev1.Service{ TypeMeta: metav1.TypeMeta{ APIVersion: "v1", @@ -144,3 +142,35 @@ func generateListenerService(instance *crdv2.EMQX, conf *config.EMQX) *corev1.Se Spec: *spec, } } + +// listenerServiceSelector chooses Service endpoints for MQTT/TLS listeners. +// ReplicaSet readiness uses Status.ReadyReplicas only (not EMQX node status). +// 1. No replicants in spec -> cores serve. +// 2. If the "update" replicant set has Ready replicas, its pods serve. +// 3. While the "update" replicant set is not ready yet, the "current" replicant set serves if +// it has ready pods. +// 4. If no replicant sets can serve traffic, cores serve. +// +// Criteria are intentionally lax (ReadyReplicas > 0): +// during replicant restarts or scale-up, both ReplicaSets can sit below desired ready +// for a while; requiring ReadyReplicas >= desired would send traffic to cores and drop +// listener sessions. Prefer routing to whichever revision still has ready pods. +func listenerServiceSelector(r *reconcileRound, instance *crd.EMQX) map[string]string { + if instance.Spec.HasReplicants() { + updateRs := r.state.updateReplicantSet(instance) + currentRs := r.state.currentReplicantSet(instance) + if updateRs != nil && updateRs.Status.ReadyReplicas > 0 { + return instance.DefaultLabelsWith( + crd.ReplicantLabels(), + map[string]string{crd.LabelPodTemplateHash: instance.Status.ReplicantNodesStatus.UpdateRevision}, + ) + } + if currentRs != nil && currentRs.Status.ReadyReplicas > 0 { + return instance.DefaultLabelsWith( + crd.ReplicantLabels(), + map[string]string{crd.LabelPodTemplateHash: instance.Status.ReplicantNodesStatus.CurrentRevision}, + ) + } + } + return instance.DefaultLabelsWith(crd.CoreLabels()) +} diff --git a/internal/controller/add_service_suite_test.go b/internal/controller/add_service_suite_test.go index acbdbde38..10d677e42 100644 --- a/internal/controller/add_service_suite_test.go +++ b/internal/controller/add_service_suite_test.go @@ -1,60 +1,193 @@ package controller import ( - crdv2 "github.com/emqx/emqx-operator/api/v2" + "net/http" + "net/url" + "strings" + + crd "github.com/emqx/emqx-operator/api/v3alpha1" + config "github.com/emqx/emqx-operator/internal/controller/config" + req "github.com/emqx/emqx-operator/internal/requester" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + k8sErrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" ) +func mockConfigsRequester(configBody string) req.RequesterInterface { + return req.NewMockRequester( + func(method string, u url.URL, body []byte, header http.Header) (*http.Response, []byte, error) { + if method == "GET" && strings.Contains(u.Path, "api/v5/configs") { + return &http.Response{StatusCode: http.StatusOK}, []byte(configBody), nil + } + return &http.Response{StatusCode: http.StatusNotImplemented}, nil, nil + }, + ) +} + var _ = Describe("Reconciler addService", Ordered, func() { var a *addService - var instance *crdv2.EMQX = &crdv2.EMQX{} - var ns *corev1.Namespace = &corev1.Namespace{} + var instance *crd.EMQX + var ns *corev1.Namespace + var round *reconcileRound - BeforeEach(func() { - a = &addService{emqxReconciler} + validConfig := config.WithDefaults("") + BeforeAll(func() { ns = &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ - Name: "controller-v2beta1-add-headless-svc-test", + Name: "controller-add-service-test", Labels: map[string]string{ "test": "e2e", }, }, } + Expect(k8sClient.Create(ctx, ns)).Should(Succeed()) + }) + BeforeEach(func() { instance = emqx.DeepCopy() instance.Namespace = ns.Name - instance.Spec.CoreTemplate = crdv2.EMQXCoreTemplate{ + instance.Spec.CoreTemplate = crd.EMQXCoreTemplate{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"test": "label"}, }, } + a = &addService{emqxReconciler} + round = newReconcileRoundWithRequester(mockConfigsRequester(validConfig)) }) - It("create namespace", func() { - Expect(k8sClient.Create(ctx, ns)).Should(Succeed()) + AfterEach(func() { + var serviceList corev1.ServiceList + Expect(k8sClient.List(ctx, &serviceList, client.InNamespace(ns.Name))).To(Succeed()) + for _, service := range serviceList.Items { + _ = k8sClient.Delete(ctx, &service) + } + }) + + AfterAll(func() { + Expect(k8sClient.Delete(ctx, ns)).Should(Succeed()) + }) + + It("postpones when there is no usable API requester yet", func() { + r := &reconcileRound{ + ctx: ctx, + log: logger, + conf: emqxConf, + requester: &apiRequesterUnavailable{}, + state: &reconcileState{}, + } + Expect(a.reconcile(r, instance)).To(Equal(subResult{needRequeue: true})) + }) + + It("creates Dashboard and Listeners Services from EMQX config", func() { + Eventually(a.reconcile).WithArguments(round, instance). + Should(Equal(subResult{})) + + dashboard := &corev1.Service{} + Expect(k8sClient.Get(ctx, instance.DashboardServiceNamespacedName(), dashboard)).To(Succeed()) + Expect(dashboard.Spec.Selector).To(Equal(instance.DefaultLabelsWith(crd.CoreLabels()))) + + listeners := &corev1.Service{} + Expect(k8sClient.Get(ctx, instance.ListenersServiceNamespacedName(), listeners)).To(Succeed()) + Expect(listeners.Spec.Selector).To(Equal(instance.DefaultLabelsWith(crd.CoreLabels()))) + }) + + It("points the Listeners Service at current-revision replicants when recent revision not ready", func() { + instance.Spec.ReplicantTemplate = &crd.EMQXReplicantTemplate{ + Spec: crd.EMQXReplicantTemplateSpec{ + Replicas: ptr.To(int32(1)), + }, + } + instance.Status.ReplicantNodesStatus = crd.ReplicantNodesStatus{ + ReadyReplicas: 1, + UpdateRevision: "rev-update", + CurrentRevision: "rev-current", + } + round.state.replicantSets = []*appsv1.ReplicaSet{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "emqx-rev-current", + Labels: map[string]string{crd.LabelPodTemplateHash: "rev-current"}, + }, + Status: appsv1.ReplicaSetStatus{ReadyReplicas: 1}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "emqx-rev-update", + Labels: map[string]string{crd.LabelPodTemplateHash: "rev-update"}, + }, + Status: appsv1.ReplicaSetStatus{ReadyReplicas: 0}, + }, + } + + Eventually(a.reconcile).WithArguments(round, instance). + Should(Equal(subResult{})) + + listeners := &corev1.Service{} + Expect(k8sClient.Get(ctx, instance.ListenersServiceNamespacedName(), listeners)).To(Succeed()) + Expect(listeners.Spec.Selector).To(Equal(instance.DefaultLabelsWith( + crd.ReplicantLabels(), + map[string]string{crd.LabelPodTemplateHash: "rev-current"}, + ))) }) - It("generate svc", func() { - Eventually(a.reconcile).WithArguments(newReconcileRound(), instance). - WithTimeout(timeout). - WithPolling(interval). + It("points the Listeners Service at recent-revision replicants when ready", func() { + instance.Spec.ReplicantTemplate = &crd.EMQXReplicantTemplate{ + Spec: crd.EMQXReplicantTemplateSpec{ + Replicas: ptr.To(int32(1)), + }, + } + instance.Status.ReplicantNodesStatus = crd.ReplicantNodesStatus{ + ReadyReplicas: 1, + UpdateRevision: "rev-update", + CurrentRevision: "rev-current", + } + round.state.replicantSets = []*appsv1.ReplicaSet{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "emqx-rev-current", + Labels: map[string]string{crd.LabelPodTemplateHash: "rev-current"}, + }, + Status: appsv1.ReplicaSetStatus{ReadyReplicas: 1}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "emqx-rev-update", + Labels: map[string]string{crd.LabelPodTemplateHash: "rev-update"}, + }, + Status: appsv1.ReplicaSetStatus{ReadyReplicas: 1}, + }, + } + + Eventually(a.reconcile).WithArguments(round, instance). Should(Equal(subResult{})) - Eventually(func() *corev1.Service { - svc := &corev1.Service{} - _ = k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "emqx-dashboard"}, svc) - return svc - }).Should(Not(BeNil())) - - Eventually(func() *corev1.Service { - svc := &corev1.Service{} - _ = k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "emqx-listeners"}, svc) - return svc - }).Should(Not(BeNil())) + listeners := &corev1.Service{} + Expect(k8sClient.Get(ctx, instance.ListenersServiceNamespacedName(), listeners)).To(Succeed()) + Expect(listeners.Spec.Selector).To(Equal(instance.DefaultLabelsWith( + crd.ReplicantLabels(), + map[string]string{crd.LabelPodTemplateHash: "rev-update"}, + ))) + }) + + It("does not create the Dashboard Service when the template is disabled", func() { + disabled := false + instance.Spec.DashboardServiceTemplate = &crd.ServiceTemplate{ + Enabled: &disabled, + } + + r := newReconcileRoundWithRequester(mockConfigsRequester(validConfig)) + Expect(a.reconcile(r, instance)).To(Equal(subResult{})) + + err := k8sClient.Get(ctx, instance.DashboardServiceNamespacedName(), &corev1.Service{}) + Expect(k8sErrors.IsNotFound(err)).To(BeTrue()) + + listeners := &corev1.Service{} + Expect(k8sClient.Get(ctx, instance.ListenersServiceNamespacedName(), listeners)).To(Succeed()) }) }) diff --git a/internal/controller/add_service_test.go b/internal/controller/add_service_test.go index 0fe41db11..b3ab4c1ff 100644 --- a/internal/controller/add_service_test.go +++ b/internal/controller/add_service_test.go @@ -3,7 +3,7 @@ package controller import ( "testing" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" @@ -16,11 +16,10 @@ func loadConf(data string) *config.EMQX { conf, _ := config.EMQXConfigWithDefaults(data) return conf } - func TestGenerateDashboardService(t *testing.T) { t.Run("check metadata", func(t *testing.T) { - emqx := &crdv2.EMQX{ + emqx := &crd.EMQX{ ObjectMeta: metav1.ObjectMeta{ Name: "emqx", Namespace: "emqx", @@ -31,8 +30,8 @@ func TestGenerateDashboardService(t *testing.T) { "emqx-annotation-key": "emqx", }, }, - Spec: crdv2.EMQXSpec{ - DashboardServiceTemplate: &crdv2.ServiceTemplate{ + Spec: crd.EMQXSpec{ + DashboardServiceTemplate: &crd.ServiceTemplate{ Enabled: ptr.To(true), ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ @@ -63,8 +62,8 @@ func TestGenerateDashboardService(t *testing.T) { }) t.Run("check disabled", func(t *testing.T) { - emqx := &crdv2.EMQX{} - emqx.Spec.DashboardServiceTemplate = &crdv2.ServiceTemplate{ + emqx := &crd.EMQX{} + emqx.Spec.DashboardServiceTemplate = &crd.ServiceTemplate{ Enabled: ptr.To(false), } got := generateDashboardService(emqx, loadConf("")) @@ -72,21 +71,21 @@ func TestGenerateDashboardService(t *testing.T) { }) t.Run("check selector", func(t *testing.T) { - emqx := &crdv2.EMQX{ + emqx := &crd.EMQX{ ObjectMeta: metav1.ObjectMeta{ Name: "emqx", }, } got := generateDashboardService(emqx, loadConf("")) assert.Equal(t, map[string]string{ - crdv2.LabelInstance: "emqx", - crdv2.LabelManagedBy: "emqx-operator", - crdv2.LabelDBRole: "core", + crd.LabelInstance: "emqx", + crd.LabelManagedBy: "emqx-operator", + crd.LabelDBRole: "core", }, got.Spec.Selector) }) t.Run("check http ports", func(t *testing.T) { - emqx := &crdv2.EMQX{} + emqx := &crd.EMQX{} got := generateDashboardService(emqx, loadConf(` dashboard.listeners.http.bind = 18083 `)) @@ -101,7 +100,7 @@ func TestGenerateDashboardService(t *testing.T) { }) t.Run("check https ports", func(t *testing.T) { - emqx := &crdv2.EMQX{} + emqx := &crd.EMQX{} got := generateDashboardService(emqx, loadConf(` dashboard.listeners.http.bind = 0 dashboard.listeners.https.bind = 18084 @@ -117,7 +116,7 @@ func TestGenerateDashboardService(t *testing.T) { }) t.Run("check http and https ports", func(t *testing.T) { - emqx := &crdv2.EMQX{} + emqx := &crd.EMQX{} got := generateDashboardService(emqx, loadConf(` dashboard.listeners.http.bind = 18083 dashboard.listeners.https.bind = 18084 @@ -139,7 +138,7 @@ func TestGenerateDashboardService(t *testing.T) { }) t.Run("check empty ports", func(t *testing.T) { - emqx := &crdv2.EMQX{} + emqx := &crd.EMQX{} got := generateDashboardService(emqx, loadConf(` dashboard.listeners.http.bind = 0 dashboard.listeners.https.bind = 0 @@ -150,7 +149,7 @@ func TestGenerateDashboardService(t *testing.T) { func TestGenerateListenersService(t *testing.T) { t.Run("check metadata", func(t *testing.T) { - emqx := &crdv2.EMQX{ + emqx := &crd.EMQX{ ObjectMeta: metav1.ObjectMeta{ Name: "emqx", Namespace: "emqx", @@ -161,8 +160,8 @@ func TestGenerateListenersService(t *testing.T) { "emqx-annotation-key": "emqx", }, }, - Spec: crdv2.EMQXSpec{ - ListenersServiceTemplate: &crdv2.ServiceTemplate{ + Spec: crd.EMQXSpec{ + ListenersServiceTemplate: &crd.ServiceTemplate{ Enabled: ptr.To(true), ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ @@ -175,7 +174,7 @@ func TestGenerateListenersService(t *testing.T) { }, }, } - got := generateListenerService(emqx, loadConf("")) + got := generateListenerService(newReconcileRound(), emqx, loadConf("")) assert.Equal(t, metav1.ObjectMeta{ Name: "emqx-listeners", Namespace: "emqx", @@ -191,57 +190,31 @@ func TestGenerateListenersService(t *testing.T) { }) t.Run("check disabled", func(t *testing.T) { - emqx := &crdv2.EMQX{} - emqx.Spec.ListenersServiceTemplate = &crdv2.ServiceTemplate{ + emqx := &crd.EMQX{} + emqx.Spec.ListenersServiceTemplate = &crd.ServiceTemplate{ Enabled: ptr.To(false), } - got := generateListenerService(emqx, loadConf("")) + got := generateListenerService(newReconcileRound(), emqx, loadConf("")) assert.Nil(t, got) }) - t.Run("should selector core pods", func(t *testing.T) { - emqx := &crdv2.EMQX{ - ObjectMeta: metav1.ObjectMeta{ - Name: "emqx", - }, - } - got := generateListenerService(emqx, loadConf("")) - assert.Equal(t, map[string]string{ - crdv2.LabelInstance: "emqx", - crdv2.LabelManagedBy: "emqx-operator", - crdv2.LabelDBRole: "core", - }, got.Spec.Selector) - }) - - t.Run("should selector replicant pods", func(t *testing.T) { - emqx := &crdv2.EMQX{ + t.Run("check core pod selector by default", func(t *testing.T) { + emqx := &crd.EMQX{ ObjectMeta: metav1.ObjectMeta{ Name: "emqx", }, - Spec: crdv2.EMQXSpec{ - ReplicantTemplate: &crdv2.EMQXReplicantTemplate{ - Spec: crdv2.EMQXReplicantTemplateSpec{ - Replicas: ptr.To(int32(3)), - }, - }, - }, - Status: crdv2.EMQXStatus{ - ReplicantNodesStatus: crdv2.EMQXNodesStatus{ - ReadyReplicas: 3, - }, - }, } - got := generateListenerService(emqx, loadConf("")) + got := generateListenerService(newReconcileRound(), emqx, loadConf("")) assert.Equal(t, map[string]string{ - crdv2.LabelInstance: "emqx", - crdv2.LabelManagedBy: "emqx-operator", - crdv2.LabelDBRole: "replicant", + crd.LabelInstance: "emqx", + crd.LabelManagedBy: "emqx-operator", + crd.LabelDBRole: "core", }, got.Spec.Selector) }) t.Run("check default ports", func(t *testing.T) { - emqx := &crdv2.EMQX{} - got := generateListenerService(emqx, loadConf("")) + emqx := &crd.EMQX{} + got := generateListenerService(newReconcileRound(), emqx, loadConf("")) assert.ElementsMatch(t, []corev1.ServicePort{ { Name: "tcp-default", @@ -271,11 +244,11 @@ func TestGenerateListenersService(t *testing.T) { }) t.Run("check ports", func(t *testing.T) { - emqx := &crdv2.EMQX{} + emqx := &crd.EMQX{} conf, _ := config.EMQXConfigWithDefaults(` gateway.lwm2m.listeners.udp.default.bind = 5783 `) - got := generateListenerService(emqx, conf) + got := generateListenerService(newReconcileRound(), emqx, conf) assert.ElementsMatch(t, []corev1.ServicePort{ { Name: "lwm2m-udp-default", diff --git a/internal/controller/api_requester.go b/internal/controller/api_requester.go index 6358b1088..7f72e2b0a 100644 --- a/internal/controller/api_requester.go +++ b/internal/controller/api_requester.go @@ -7,7 +7,7 @@ import ( "strings" emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" resources "github.com/emqx/emqx-operator/internal/controller/resources" util "github.com/emqx/emqx-operator/internal/controller/util" @@ -32,6 +32,14 @@ type podRequesterFilter interface { filter(pod *corev1.Pod) bool } +type podConditionFilter struct { + cond corev1.PodConditionType +} + +func (f *podConditionFilter) filter(pod *corev1.Pod) bool { + return util.IsPodConditionTrue(pod, f.cond) +} + type managedByFilter struct { manager metav1.Object } @@ -48,7 +56,7 @@ func (f *managedByFilter) filter(pod *corev1.Pod) bool { } type emqxVersionFilter struct { - instance *crdv2.EMQX + instance *crd.EMQX prefix string } @@ -83,7 +91,7 @@ func (b *apiRequesterBuilder) forPod(pod *corev1.Pod) req.RequesterInterface { if b == nil { return nil } - if pod.Status.PodIP == "" || !util.IsPodConditionTrue(pod, corev1.ContainersReady) { + if pod.Status.PodIP == "" || pod.Status.Phase != corev1.PodRunning { return nil } return &req.Requester{ diff --git a/internal/controller/api_requester_test.go b/internal/controller/api_requester_test.go index 96ce00895..31d17f4d4 100644 --- a/internal/controller/api_requester_test.go +++ b/internal/controller/api_requester_test.go @@ -4,7 +4,7 @@ import ( "testing" "time" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" req "github.com/emqx/emqx-operator/internal/requester" "github.com/stretchr/testify/assert" appsv1 "k8s.io/api/apps/v1" @@ -15,26 +15,21 @@ import ( ) func TestRequesterFilter(t *testing.T) { - var coreSetName string = "emqx-core-cur" + var coreSetName string = "emqx-core" var coreSetUID types.UID = "123" - instance := &crdv2.EMQX{ + instance := &crd.EMQX{ ObjectMeta: metav1.ObjectMeta{ Name: "emqx", Namespace: "emqx", }, - Status: crdv2.EMQXStatus{ - CoreNodesStatus: crdv2.EMQXNodesStatus{ - Replicas: 2, + Status: crd.EMQXStatus{ + CoreNodesStatus: crd.CoreNodesStatus{}, + ReplicantNodesStatus: crd.ReplicantNodesStatus{ CurrentRevision: "cur", UpdateRevision: "upd", }, - ReplicantNodesStatus: crdv2.EMQXNodesStatus{ - Replicas: 0, - CurrentRevision: "cur", - UpdateRevision: "upd", - }, - CoreNodes: []crdv2.EMQXNode{ + CoreNodes: []crd.EMQXNode{ { PodName: coreSetName + "-0", Name: "emqx@core-0", @@ -56,7 +51,7 @@ func TestRequesterFilter(t *testing.T) { Connections: 0, }, }, - ReplicantNodes: []crdv2.EMQXNode{}, + ReplicantNodes: []crd.EMQXNode{}, }, } @@ -72,12 +67,12 @@ func TestRequesterFilter(t *testing.T) { coreSets: []*appsv1.StatefulSet{ { ObjectMeta: metav1.ObjectMeta{ - Name: coreSetName, - UID: coreSetUID, - Labels: map[string]string{crdv2.LabelPodTemplateHash: "cur"}, + Name: coreSetName, + UID: coreSetUID, }, Status: appsv1.StatefulSetStatus{ - Replicas: 2, + Replicas: 2, + UpdateRevision: "upd", }, }, }, @@ -86,7 +81,7 @@ func TestRequesterFilter(t *testing.T) { { ObjectMeta: metav1.ObjectMeta{ Name: coreSetName + "-0", - Labels: crdv2.CoreLabels(), + Labels: crd.CoreLabels(), CreationTimestamp: metav1.NewTime(time.Now().Add(-1 * time.Minute)), OwnerReferences: []metav1.OwnerReference{coreOwnerReference}, }, @@ -99,7 +94,7 @@ func TestRequesterFilter(t *testing.T) { { ObjectMeta: metav1.ObjectMeta{ Name: coreSetName + "-1", - Labels: crdv2.CoreLabels(), + Labels: crd.CoreLabels(), CreationTimestamp: metav1.NewTime(time.Now().Add(-1 * time.Second)), OwnerReferences: []metav1.OwnerReference{coreOwnerReference}, }, @@ -131,13 +126,11 @@ func TestRequesterFilter(t *testing.T) { requester = builder.forPod(state.pods[1]) assert.NotNil(t, requester) - requester = builder.forOldestCore(state, &managedByFilter{state.currentCoreSet(instance)}) + // Filter by the single core StatefulSet: + requester = builder.forOldestCore(state, &managedByFilter{state.coreSet()}) assert.NotNil(t, requester) assert.Equal(t, state.pods[1].Name, requester.GetDescription()) - requester = builder.forOldestCore(state, &managedByFilter{state.updateCoreSet(instance)}) - assert.Nil(t, requester) - requester = builder.forOldestCore(state, &emqxVersionFilter{instance, "5.10."}) assert.NotNil(t, requester) diff --git a/internal/controller/cleanup_outdated.go b/internal/controller/cleanup_outdated.go index 6e10dac45..40bbf9c0d 100644 --- a/internal/controller/cleanup_outdated.go +++ b/internal/controller/cleanup_outdated.go @@ -1,21 +1,19 @@ package controller import ( - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" k8sErrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/klog/v2" - "sigs.k8s.io/controller-runtime/pkg/client" ) type cleanupOutdatedSets struct { *EMQXReconciler } -func (s *cleanupOutdatedSets) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { +func (s *cleanupOutdatedSets) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { // Postpone cleanups until the instance is ready: - if !instance.Status.IsConditionTrue(crdv2.Ready) { + if !instance.Status.IsConditionTrue(crd.Ready) { return subResult{} } @@ -42,50 +40,10 @@ func (s *cleanupOutdatedSets) reconcile(r *reconcileRound, instance *crdv2.EMQX) } } - // List outdated coreSets, preserving order by creation timestamp: - currentSts := r.state.currentCoreSet(instance) - updateSts := r.state.updateCoreSet(instance) - prevStsList := []*appsv1.StatefulSet{} - for _, sts := range r.state.coreSets { - if sts.DeletionTimestamp == nil && sts != currentSts && sts != updateSts { - prevStsList = append(prevStsList, sts) - } - } - - stsOutdated := len(prevStsList) - int(instance.Spec.RevisionHistoryLimit) - for i := 0; i < stsOutdated; i++ { - sts := prevStsList[i] - // Avoid delete stateful set with non-zero replica counts - if sts.Status.Replicas != 0 || *(sts.Spec.Replicas) != 0 || sts.Generation > sts.Status.ObservedGeneration { - continue - } - - // Delete PVCs - pvcList := &corev1.PersistentVolumeClaimList{} - _ = s.Client.List(r.ctx, pvcList, - client.InNamespace(instance.Namespace), - client.MatchingLabels(sts.Spec.Selector.MatchLabels), - ) - for _, p := range pvcList.Items { - pvc := p.DeepCopy() - if pvc.DeletionTimestamp != nil { - continue - } - r.log.Info("removing persistentVolumeClaim of outdated coreSet", - "persistentVolumeClaim", klog.KObj(pvc), - "coreSet", klog.KObj(sts), - ) - if err := s.Client.Delete(r.ctx, pvc); err != nil && !k8sErrors.IsNotFound(err) { - return subResult{err: err} - } - } - - r.log.Info("removing outdated coreSet", "statefulSet", klog.KObj(sts)) - if err := s.Client.Delete(r.ctx, sts); err != nil && !k8sErrors.IsNotFound(err) { - return subResult{err: err} - } - - } + // With the single-StatefulSet model for cores, there are no outdated core StatefulSets + // to clean up. The single StatefulSet is updated in place. Legacy StatefulSets from + // previous operator versions (with hash-suffixed names) will be cleaned up separately + // if needed. return subResult{} } diff --git a/internal/controller/cleanup_outdated_suite_test.go b/internal/controller/cleanup_outdated_suite_test.go index dbd5a643d..081cd119e 100644 --- a/internal/controller/cleanup_outdated_suite_test.go +++ b/internal/controller/cleanup_outdated_suite_test.go @@ -4,76 +4,82 @@ import ( "fmt" "time" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/rand" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" ) -var _ = Describe("Reconciler cleanupOutdatedSets", func() { +var _ = Describe("Reconciler cleanupOutdatedSets", Ordered, func() { var s *cleanupOutdatedSets - var instance *crdv2.EMQX = &crdv2.EMQX{} + var instance *crd.EMQX = &crd.EMQX{} var ns *corev1.Namespace = &corev1.Namespace{} var round *reconcileRound - BeforeEach(func() { - s = &cleanupOutdatedSets{emqxReconciler} + BeforeAll(func() { ns = &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ - Name: "controller-v2beta1-sync-sets-test-" + rand.String(5), + Name: "controller-cleanup-outdated-test", Labels: map[string]string{ "test": "e2e", }, }, } + Expect(k8sClient.Create(ctx, ns)).To(Succeed()) + }) + + AfterAll(func() { + Expect(k8sClient.Delete(ctx, ns)).To(Succeed()) + }) + + BeforeEach(func() { instance = emqx.DeepCopy() instance.Namespace = ns.Name instance.Spec.RevisionHistoryLimit = 3 - instance.Status = crdv2.EMQXStatus{ + instance.Status = crd.EMQXStatus{ Conditions: []metav1.Condition{ { - Type: crdv2.Ready, + Type: crd.Ready, Status: metav1.ConditionTrue, LastTransitionTime: metav1.Time{Time: time.Now().AddDate(0, 0, -1)}, }, }, } - + s = &cleanupOutdatedSets{emqxReconciler} round = newReconcileRound() + }) - Expect(k8sClient.Create(ctx, ns)).To(Succeed()) - for i := 0; i < 5; i++ { + It("should delete outdated replicant sets", func() { + numReplicaSets := 5 + for i := 0; i < numReplicaSets; i++ { name := fmt.Sprintf("%s-%d", instance.Name, i) - rs := &appsv1.ReplicaSet{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: instance.Namespace, Labels: instance.DefaultLabelsWith( - crdv2.ReplicantLabels(), - map[string]string{crdv2.LabelPodTemplateHash: fmt.Sprintf("fake-%d", i)}, + crd.ReplicantLabels(), + map[string]string{crd.LabelPodTemplateHash: fmt.Sprintf("fake-%d", i)}, ), }, Spec: appsv1.ReplicaSetSpec{ Replicas: ptr.To(int32(0)), Selector: &metav1.LabelSelector{ MatchLabels: instance.DefaultLabelsWith( - crdv2.ReplicantLabels(), - map[string]string{crdv2.LabelPodTemplateHash: fmt.Sprintf("fake-%d", i)}, + crd.ReplicantLabels(), + map[string]string{crd.LabelPodTemplateHash: fmt.Sprintf("fake-%d", i)}, ), }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: instance.DefaultLabelsWith( - crdv2.ReplicantLabels(), - map[string]string{crdv2.LabelPodTemplateHash: fmt.Sprintf("fake-%d", i)}, + crd.ReplicantLabels(), + map[string]string{crd.LabelPodTemplateHash: fmt.Sprintf("fake-%d", i)}, ), }, Spec: corev1.PodSpec{ @@ -88,113 +94,21 @@ var _ = Describe("Reconciler cleanupOutdatedSets", func() { rs.Status.Replicas = 0 rs.Status.ObservedGeneration = 1 Expect(k8sClient.Status().Patch(ctx, rs.DeepCopy(), client.Merge)).Should(Succeed()) - round.state.replicantSets = append(round.state.replicantSets, rs) - - sts := &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: instance.Namespace, - Labels: instance.DefaultLabelsWith( - crdv2.CoreLabels(), - map[string]string{crdv2.LabelPodTemplateHash: fmt.Sprintf("fake-%d", i)}, - ), - }, - Spec: appsv1.StatefulSetSpec{ - Replicas: ptr.To(int32(0)), - Selector: &metav1.LabelSelector{ - MatchLabels: instance.DefaultLabelsWith( - crdv2.CoreLabels(), - map[string]string{crdv2.LabelPodTemplateHash: fmt.Sprintf("fake-%d", i)}, - ), - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: instance.DefaultLabelsWith( - crdv2.CoreLabels(), - map[string]string{crdv2.LabelPodTemplateHash: fmt.Sprintf("fake-%d", i)}, - ), - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Name: "emqx", Image: "emqx"}, - }, - }, - }, - }, - } - Expect(k8sClient.Create(ctx, sts.DeepCopy())).Should(Succeed()) - sts.Status.Replicas = 0 - sts.Status.ObservedGeneration = 1 - Expect(k8sClient.Status().Patch(ctx, sts.DeepCopy(), client.Merge)).Should(Succeed()) - - round.state.coreSets = append(round.state.coreSets, sts) - - pvc := &corev1.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: sts.Name, - Namespace: sts.Namespace, - Labels: sts.Labels, - }, - Spec: corev1.PersistentVolumeClaimSpec{ - AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, - Resources: corev1.VolumeResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceStorage: resource.MustParse("1Gi"), - }, - }, - }, - } - Expect(k8sClient.Create(ctx, pvc.DeepCopy())).Should(Succeed()) } - }) - It("should delete rs sts and pvc", func() { Expect(s.reconcile(round, instance)).Should(Equal(subResult{})) - Eventually(func() int { + Eventually(func() *appsv1.ReplicaSetList { list := &appsv1.ReplicaSetList{} _ = k8sClient.List(ctx, list, client.InNamespace(instance.Namespace), - client.MatchingLabels(instance.DefaultLabelsWith(crdv2.ReplicantLabels())), - ) - count := 0 - for _, rs := range list.Items { - if rs.DeletionTimestamp == nil { - count++ - } - } - return count - }).WithTimeout(timeout).WithPolling(interval).Should(BeEquivalentTo(instance.Spec.RevisionHistoryLimit)) - - Eventually(func() int { - list := &appsv1.StatefulSetList{} - _ = k8sClient.List(ctx, list, - client.InNamespace(instance.Namespace), - client.MatchingLabels(instance.DefaultLabelsWith(crdv2.CoreLabels())), + client.MatchingLabels(instance.DefaultLabelsWith(crd.ReplicantLabels())), ) - count := 0 - for _, sts := range list.Items { - if sts.DeletionTimestamp == nil { - count++ - } - } - return count - }).WithTimeout(timeout).WithPolling(interval).Should(BeEquivalentTo(instance.Spec.RevisionHistoryLimit)) - - Eventually(func() int { - list := &corev1.PersistentVolumeClaimList{} - _ = k8sClient.List(ctx, list, - client.InNamespace(instance.Namespace), - client.MatchingLabels(instance.DefaultLabelsWith(crdv2.CoreLabels())), - ) - count := 0 - for _, pvc := range list.Items { - if pvc.DeletionTimestamp == nil { - count++ - } - } - return count - }).WithTimeout(timeout).WithPolling(interval).Should(BeEquivalentTo(instance.Spec.RevisionHistoryLimit)) + return list + }). + WithTimeout(timeout). + WithPolling(interval). + Should(HaveField("Items", HaveLen(int(instance.Spec.RevisionHistoryLimit)))) }) }) diff --git a/internal/controller/ds_cleanup_sites.go b/internal/controller/ds_cleanup_sites.go index f0ddac654..b3a0def07 100644 --- a/internal/controller/ds_cleanup_sites.go +++ b/internal/controller/ds_cleanup_sites.go @@ -2,7 +2,7 @@ package controller import ( emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" "github.com/emqx/emqx-operator/internal/emqx/api" ) @@ -12,7 +12,7 @@ type dsCleanupSites struct { *EMQXReconciler } -func (c *dsCleanupSites) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { +func (c *dsCleanupSites) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { // If DS cluster state is not loaded, skip the reconciliation. if r.dsCluster == nil { return subResult{} @@ -22,7 +22,7 @@ func (c *dsCleanupSites) reconcile(r *reconcileRound, instance *crdv2.EMQX) subR // Required API operation is available only since EMQX 6.0.0. req := r.requester.forOldestCore(r.state, &emqxVersionFilter{instance: instance, prefix: "6."}) - lostSites := []string{} + lostSites := []*api.DSSite{} for _, site := range r.dsCluster.Sites { if site.Up || (len(site.Shards) > 0) { continue @@ -31,7 +31,7 @@ func (c *dsCleanupSites) reconcile(r *reconcileRound, instance *crdv2.EMQX) subR if node != nil { continue } - lostSites = append(lostSites, site.ID) + lostSites = append(lostSites, &site) } if len(lostSites) == 0 { @@ -45,9 +45,11 @@ func (c *dsCleanupSites) reconcile(r *reconcileRound, instance *crdv2.EMQX) subR } for _, site := range lostSites { - err := api.ForgetDSSite(req, site) - if err != nil { - return subResult{err: emperror.Wrapf(err, "failed to forget DS site %s", site)} + err := api.ForgetDSSite(req, site.ID) + if err == nil { + r.log.V(1).Info("cleaned up lost DS site", "site", site) + } else { + return subResult{err: emperror.Wrapf(err, "failed to forget DS site %s", site.ID)} } } diff --git a/internal/controller/ds_load_cluster_state.go b/internal/controller/ds_load_cluster_state.go index 6134da102..3e70f6d39 100644 --- a/internal/controller/ds_load_cluster_state.go +++ b/internal/controller/ds_load_cluster_state.go @@ -2,7 +2,7 @@ package controller import ( emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" "github.com/emqx/emqx-operator/internal/emqx/api" ) @@ -12,12 +12,17 @@ type dsLoadClusterState struct { *EMQXReconciler } -func (c *dsLoadClusterState) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { - // Instantiate API requester for a node that is part of update StatefulSet. - req := r.requester.forOldestCore(r.state, &managedByFilter{r.state.updateCoreSet(instance)}) +func (c *dsLoadClusterState) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { + // Instantiate API requester for a node that is part of the core StatefulSet. + // Prefer EMQX 6.x requester first: EMQX starting from 6.1.0 has separate cluster view. + req := r.requester.forOldestCore(r.state, &emqxVersionFilter{instance: instance, prefix: "6."}) + if req == nil { + req = r.oldestCoreRequester() + } + // If there's no suitable EMQX API to query, skip the reconciliation. if req == nil { - return subResult{} + return reconcilePostpone() } // If EMQX DS API is not available, fail the reconciliation. diff --git a/internal/controller/ds_reflect_pod_condition.go b/internal/controller/ds_reflect_pod_condition.go index a70bb2d77..05d8aa040 100644 --- a/internal/controller/ds_reflect_pod_condition.go +++ b/internal/controller/ds_reflect_pod_condition.go @@ -2,7 +2,7 @@ package controller import ( emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" util "github.com/emqx/emqx-operator/internal/controller/util" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -12,7 +12,7 @@ type dsReflectPodCondition struct { *EMQXReconciler } -func (u *dsReflectPodCondition) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { +func (u *dsReflectPodCondition) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { // If DS cluster state is not loaded, skip the reconciliation step. if r.dsCluster == nil { return subResult{} @@ -24,7 +24,7 @@ func (u *dsReflectPodCondition) reconcile(r *reconcileRound, instance *crdv2.EMQ continue } condition := corev1.PodCondition{ - Type: crdv2.DSReplicationSite, + Type: crd.DSReplicationSite, Status: corev1.ConditionUnknown, LastTransitionTime: metav1.Now(), } @@ -41,7 +41,7 @@ func (u *dsReflectPodCondition) reconcile(r *reconcileRound, instance *crdv2.EMQ // a DS replication site. condition.Status = corev1.ConditionFalse } - existing := util.FindPodCondition(pod, crdv2.DSReplicationSite) + existing := util.FindPodCondition(pod, crd.DSReplicationSite) if existing == nil || existing.Status != condition.Status { err := util.UpdatePodCondition(r.ctx, u.Client, pod, condition) if err != nil { diff --git a/internal/controller/ds_update_replica_sets.go b/internal/controller/ds_update_replica_sets.go index 907ccfe80..6ba0ff207 100644 --- a/internal/controller/ds_update_replica_sets.go +++ b/internal/controller/ds_update_replica_sets.go @@ -3,42 +3,51 @@ package controller import ( "reflect" "sort" - "strconv" "strings" emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" - "github.com/emqx/emqx-operator/internal/emqx/api" + crd "github.com/emqx/emqx-operator/api/v3alpha1" + util "github.com/emqx/emqx-operator/internal/controller/util" + api "github.com/emqx/emqx-operator/internal/emqx/api" ) type dsUpdateReplicaSets struct { *EMQXReconciler } -func (u *dsUpdateReplicaSets) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { +func (u *dsUpdateReplicaSets) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { // If DS cluster state is not loaded, skip the reconciliation. if r.dsCluster == nil || r.dsReplication == nil { return subResult{} } - // Get the most recent stateful set. - updateCoreSet := r.state.updateCoreSet(instance) - if updateCoreSet == nil { + // Get the single core StatefulSet. + coreSet := r.state.coreSet() + if coreSet == nil { return subResult{} } - // Instantiate API requester for a node that is part of update StatefulSet. - req := r.requester.forOldestCore(r.state, &managedByFilter{updateCoreSet}) + // Instantiate API requester for a node that is part of the core StatefulSet. + // Prefer EMQX 6.x requester first: EMQX starting from 6.1.0 has separate cluster view. + req := r.requester.forOldestCore(r.state, &emqxVersionFilter{instance: instance, prefix: "6."}) + if req == nil { + req = r.oldestCoreRequester() + } // If there's no EMQX API to query, skip the reconciliation. if req == nil { + return reconcilePostpone() + } + + // If there are no known DS DBs, skip the reconciliation. + if len(r.dsReplication.DBs) == 0 { return subResult{} } // Wait until all pods are ready. - desiredReplicas := instance.Status.CoreNodesStatus.Replicas - if updateCoreSet.Status.AvailableReplicas < desiredReplicas { - return subResult{} + desiredReplicas := instance.Spec.NumCoreReplicas() + if coreSet.Status.AvailableReplicas < desiredReplicas { + return reconcilePostpone() } // Compute the current sites. @@ -46,19 +55,24 @@ func (u *dsUpdateReplicaSets) reconcile(r *reconcileRound, instance *crdv2.EMQX) // Compute the target sites. targetSites := []string{} - for _, node := range instance.Status.CoreNodes { - pod := r.state.podWithName(node.PodName) - if pod != nil && r.state.partOfUpdateSet(pod, instance) { - site := r.dsCluster.FindSite(node.Name) - if site == nil { - return subResult{err: emperror.Errorf("no site for node %s", node.Name)} - } - if getPodIndex(node.PodName) < desiredReplicas { + for _, site := range r.dsCluster.Sites { + nodeName := parseNodeName(site.Node, instance) + if nodeName == nil { + return subResult{err: emperror.Errorf("unrecognized DS site node name: %s", site.Node)} + } + if strings.HasPrefix(nodeName.podName, instance.CoreName()) { + ordinal := util.PodOrdinal(nodeName.podName) + if ordinal >= 0 && ordinal < int(desiredReplicas) { targetSites = append(targetSites, site.ID) } } } + // No target sites. + if len(targetSites) == 0 { + return subResult{} + } + sort.Strings(targetSites) sort.Strings(currentSites) @@ -68,9 +82,7 @@ func (u *dsUpdateReplicaSets) reconcile(r *reconcileRound, instance *crdv2.EMQX) } // Update replica sets for each DB. - if len(r.dsReplication.DBs) > 0 { - r.log.V(1).Info("updating DS replica sets", "targetSites", targetSites, "currentSites", currentSites) - } + r.log.V(1).Info("updating DS replica sets", "targetSites", targetSites, "currentSites", currentSites) for _, db := range r.dsReplication.DBs { err := api.UpdateDSReplicaSet(req, db.Name, targetSites) if err != nil { @@ -80,16 +92,3 @@ func (u *dsUpdateReplicaSets) reconcile(r *reconcileRound, instance *crdv2.EMQX) return subResult{} } - -func getPodIndex(podName string) int32 { - parts := strings.Split(podName, "-") - if len(parts) < 2 { - return -1 - } - indexPart := parts[len(parts)-1] - index, err := strconv.Atoi(indexPart) - if err != nil { - return -1 - } - return int32(index) -} diff --git a/internal/controller/emqx_controller.go b/internal/controller/emqx_controller.go index f659c4acc..30ed52d8f 100644 --- a/internal/controller/emqx_controller.go +++ b/internal/controller/emqx_controller.go @@ -28,8 +28,9 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" "github.com/emqx/emqx-operator/internal/emqx/api" req "github.com/emqx/emqx-operator/internal/requester" @@ -56,18 +57,38 @@ type reconcileRound struct { } // Instantiate default API requester for a core node. +// Picks oldest core node that is considered ready: up and running, not evacuating. func (r *reconcileRound) oldestCoreRequester() req.RequesterInterface { - return r.requester.forOldestCore(r.state) + return r.requester.forOldestCore(r.state, &podConditionFilter{cond: corev1.ContainersReady}) } // subResult provides a wrapper around different results from a subreconciler. type subResult struct { - err error - result ctrl.Result + err error + // If `true`, short timeout requeue is needed: + needRequeue bool + // Immediately report controller `Result` if not nil: + immediateResult *ctrl.Result +} + +func reconcileError(err error) subResult { + return subResult{err: err} +} + +func reconcileRequeue() subResult { + return subResult{immediateResult: &ctrl.Result{Requeue: true}} +} + +func reconcileRequeueAfter(duration time.Duration) subResult { + return subResult{immediateResult: &ctrl.Result{RequeueAfter: duration}} +} + +func reconcilePostpone() subResult { + return subResult{needRequeue: true} } type subReconciler interface { - reconcile(*reconcileRound, *crdv2.EMQX) subResult + reconcile(*reconcileRound, *crd.EMQX) subResult } func subReconcilerName(s subReconciler) string { @@ -100,7 +121,7 @@ func NewEMQXReconciler(mgr manager.Manager) *EMQXReconciler { // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.19.1/pkg/reconcile func (r *EMQXReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - instance := &crdv2.EMQX{} + instance := &crd.EMQX{} if err := r.Client.Get(ctx, req.NamespacedName, instance); err != nil { if k8sErrors.IsNotFound(err) { return ctrl.Result{}, nil @@ -114,6 +135,7 @@ func (r *EMQXReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. logger := log.FromContext(ctx) round := reconcileRound{ctx: ctx, log: logger} + needRequeue := false for _, subReconciler := range []subReconciler{ // Load EMQX configuration defined in the spec.config.data: @@ -126,7 +148,6 @@ func (r *EMQXReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. &setupAPIRequester{r}, // Perform reconciliation steps: &updateStatus{r}, - &updatePodConditions{r}, &syncConfig{r}, &addHeadlessService{r}, &addCoreSet{r}, @@ -134,18 +155,17 @@ func (r *EMQXReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. &addPdb{r}, &addService{r}, &dsLoadClusterState{r}, + &dsCleanupSites{r}, &dsUpdateReplicaSets{r}, &dsReflectPodCondition{r}, + &syncCoreSet{r}, &syncReplicantSets{r}, - &syncCoreSets{r}, + &syncClusterMembership{r}, &cleanupOutdatedSets{r}, - &dsCleanupSites{r}, } { round.log = logger.WithValues("reconciler", subReconcilerName(subReconciler)) subResult := subReconciler.reconcile(&round, instance) - if !subResult.result.IsZero() { - return subResult.result, nil - } + needRequeue = needRequeue || subResult.needRequeue if subResult.err != nil { if errors.IsCommonError(subResult.err) { round.log.Info("reconciler requeue", "reason", subResult.err) @@ -157,19 +177,23 @@ func (r *EMQXReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. ) return ctrl.Result{}, subResult.err } + if subResult.immediateResult != nil { + return *subResult.immediateResult, nil + } } - isStable := instance.Status.IsConditionTrue(crdv2.Ready) && instance.Status.DSReplication.IsStable() - if !isStable { + if !instance.Status.IsConditionTrue(crd.Ready) || needRequeue { return ctrl.Result{RequeueAfter: time.Second}, nil } + return ctrl.Result{RequeueAfter: time.Duration(30) * time.Second}, nil } // SetupWithManager sets up the controller with the Manager. func (r *EMQXReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). - For(&crdv2.EMQX{}). + For(&crd.EMQX{}). + WithEventFilter(predicate.GenerationChangedPredicate{}). Named("emqx"). Complete(r) } diff --git a/internal/controller/load_config.go b/internal/controller/load_config.go index 6fa136f09..638fd22c3 100644 --- a/internal/controller/load_config.go +++ b/internal/controller/load_config.go @@ -2,7 +2,7 @@ package controller import ( emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" ) @@ -10,7 +10,7 @@ type loadConfig struct { *EMQXReconciler } -func (l *loadConfig) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { +func (l *loadConfig) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { conf, err := config.EMQXConfigWithDefaults(applicableConfig(instance)) if err != nil { return subResult{ diff --git a/internal/controller/load_state.go b/internal/controller/load_state.go index 252d53be8..d5ac3eacb 100644 --- a/internal/controller/load_state.go +++ b/internal/controller/load_state.go @@ -3,7 +3,9 @@ package controller import ( "context" - crdv2 "github.com/emqx/emqx-operator/api/v2" + emperror "emperror.dev/errors" + crd "github.com/emqx/emqx-operator/api/v3alpha1" + util "github.com/emqx/emqx-operator/internal/controller/util" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -28,7 +30,7 @@ func (r *reconcileState) podWithName(name string) *corev1.Pod { func (r *reconcileState) podsWithRole(role string) []*corev1.Pod { var list []*corev1.Pod for _, pod := range r.pods { - if pod.Labels[crdv2.LabelDBRole] == role { + if pod.Labels[crd.LabelDBRole] == role { list = append(list, pod) } } @@ -41,46 +43,76 @@ func (r *reconcileState) podsManagedBy(object metav1.Object) []*corev1.Pod { return list } for _, pod := range r.pods { - if metav1.GetControllerOf(pod) != nil && metav1.GetControllerOf(pod).UID == object.GetUID() { + if util.IsPodManagedBy(pod, object) { list = append(list, pod) } } return list } -func (r *reconcileState) currentCoreSet(instance *crdv2.EMQX) *appsv1.StatefulSet { - for _, sts := range r.coreSets { - hash := sts.Labels[crdv2.LabelPodTemplateHash] - if hash == instance.Status.CoreNodesStatus.CurrentRevision { - return sts - } +// coreSet returns the single core StatefulSet, or nil if none exists. +// With the rolling update model, there is always at most one core StatefulSet. +func (r *reconcileState) coreSet() *appsv1.StatefulSet { + if len(r.coreSets) > 0 { + return r.coreSets[0] } return nil } -func (r *reconcileState) currentReplicantSet(instance *crdv2.EMQX) *appsv1.ReplicaSet { - for _, rs := range r.replicantSets { - hash := rs.Labels[crdv2.LabelPodTemplateHash] - if hash == instance.Status.ReplicantNodesStatus.CurrentRevision { - return rs +func (r *reconcileState) partOfCoreSet(pod *corev1.Pod) bool { + coreSet := r.coreSet() + if coreSet == nil { + return false + } + return util.IsPodManagedBy(pod, coreSet) +} + +// partOfCoreSetRevision checks if a core pod's StatefulSet-assigned revision matches the specified revision. +func (r *reconcileState) partOfCoreSetRevision(pod *corev1.Pod, revision string) bool { + coreSet := r.coreSet() + if coreSet == nil { + return false + } + if !util.IsPodManagedBy(pod, coreSet) { + return false + } + podRevision := pod.Labels[appsv1.ControllerRevisionHashLabelKey] + return podRevision == revision +} + +// numCoresRevision counts number of core pods running specified StatefulSet revision. +func (r *reconcileState) numCoresRevision(revision string) int { + coreSet := r.coreSet() + if coreSet == nil { + return 0 + } + num := 0 + for _, pod := range r.podsManagedBy(coreSet) { + podRevision := pod.Labels[appsv1.ControllerRevisionHashLabelKey] + if podRevision == revision { + num++ } } - return nil + return num } -func (r *reconcileState) updateCoreSet(instance *crdv2.EMQX) *appsv1.StatefulSet { - for _, sts := range r.coreSets { - hash := sts.Labels[crdv2.LabelPodTemplateHash] - if hash == instance.Status.CoreNodesStatus.UpdateRevision { - return sts +// Returns ReplicaSet representing current set of replicant nodes. +// Current set is considered outdated if CurrentRevision != UpdateRevision. +func (r *reconcileState) currentReplicantSet(instance *crd.EMQX) *appsv1.ReplicaSet { + for _, rs := range r.replicantSets { + hash := rs.Labels[crd.LabelPodTemplateHash] + if hash == instance.Status.ReplicantNodesStatus.CurrentRevision { + return rs } } return nil } -func (r *reconcileState) updateReplicantSet(instance *crdv2.EMQX) *appsv1.ReplicaSet { +// Returns ReplicaSet representing newest set of replicant nodes. +// Same as current if CurrentRevision == UpdateRevision. +func (r *reconcileState) updateReplicantSet(instance *crd.EMQX) *appsv1.ReplicaSet { for _, rs := range r.replicantSets { - hash := rs.Labels[crdv2.LabelPodTemplateHash] + hash := rs.Labels[crd.LabelPodTemplateHash] if hash == instance.Status.ReplicantNodesStatus.UpdateRevision { return rs } @@ -88,56 +120,69 @@ func (r *reconcileState) updateReplicantSet(instance *crdv2.EMQX) *appsv1.Replic return nil } -func (r *reconcileState) partOfCurrentSet(pod *corev1.Pod, instance *crdv2.EMQX) bool { +// partOfUpdateReplicantSet checks if a pod belongs to the update (newest) ReplicaSet. +func (r *reconcileState) partOfUpdateReplicantSet(pod *corev1.Pod, instance *crd.EMQX) bool { controllerRef := metav1.GetControllerOf(pod) if controllerRef == nil { return false } - currentCoreSet := r.currentCoreSet(instance) - if currentCoreSet != nil && controllerRef.UID == currentCoreSet.UID { - return true - } - currentReplicantSet := r.currentReplicantSet(instance) - if currentReplicantSet != nil && controllerRef.UID == currentReplicantSet.UID { + updateReplicantSet := r.updateReplicantSet(instance) + if updateReplicantSet != nil && controllerRef.UID == updateReplicantSet.UID { return true } return false } -func (r *reconcileState) partOfUpdateSet(pod *corev1.Pod, instance *crdv2.EMQX) bool { - controllerRef := metav1.GetControllerOf(pod) - if controllerRef == nil { - return false - } - updateCoreSet := r.updateCoreSet(instance) - if updateCoreSet != nil && controllerRef.UID == updateCoreSet.UID { +func (r *reconcileState) areReplicantsAvailable(instance *crd.EMQX) bool { + desired := instance.Spec.NumReplicantReplicas() + if desired == 0 { return true } - updateReplicantSet := r.updateReplicantSet(instance) - if updateReplicantSet != nil && controllerRef.UID == updateReplicantSet.UID { - return true + replicantSet := r.updateReplicantSet(instance) + if replicantSet == nil { + return false } - return false + return replicantSet.Status.AvailableReplicas >= desired } type loadState struct { *EMQXReconciler } -func (l *loadState) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { - state := loadReconcileState(r.ctx, l.Client, instance) +func (l *loadState) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { + state, err := loadReconcileState(r.ctx, l.Client, instance) + if err != nil { + return subResult{err: emperror.Wrap(err, "failed to load reconcile round state")} + } r.state = state return subResult{} } -func loadReconcileState(ctx context.Context, client k8s.Client, instance *crdv2.EMQX) *reconcileState { +func reloadReconcileState(r *reconcileRound, client k8s.Client, instance *crd.EMQX) error { + state, err := loadReconcileState(r.ctx, client, instance) + if err != nil { + return err + } + r.state = state + return nil +} + +func loadReconcileState( + ctx context.Context, + client k8s.Client, + instance *crd.EMQX, +) (*reconcileState, error) { + var err error state := &reconcileState{} stsList := &appsv1.StatefulSetList{} - _ = client.List(ctx, stsList, + err = client.List(ctx, stsList, k8s.InNamespace(instance.Namespace), - k8s.MatchingLabels(instance.DefaultLabelsWith(crdv2.CoreLabels())), + k8s.MatchingLabels(instance.DefaultLabelsWith(crd.CoreLabels())), ) + if err != nil { + return nil, err + } for _, sts := range stsList.Items { state.coreSets = append(state.coreSets, sts.DeepCopy()) @@ -146,10 +191,13 @@ func loadReconcileState(ctx context.Context, client k8s.Client, instance *crdv2. sortByCreationTimestamp(state.coreSets) rsList := &appsv1.ReplicaSetList{} - _ = client.List(ctx, rsList, + err = client.List(ctx, rsList, k8s.InNamespace(instance.Namespace), - k8s.MatchingLabels(instance.DefaultLabelsWith(crdv2.ReplicantLabels())), + k8s.MatchingLabels(instance.DefaultLabelsWith(crd.ReplicantLabels())), ) + if err != nil { + return nil, err + } for _, rs := range rsList.Items { state.replicantSets = append(state.replicantSets, rs.DeepCopy()) @@ -158,10 +206,13 @@ func loadReconcileState(ctx context.Context, client k8s.Client, instance *crdv2. sortByCreationTimestamp(state.replicantSets) podList := &corev1.PodList{} - _ = client.List(ctx, podList, + err = client.List(ctx, podList, k8s.InNamespace(instance.Namespace), k8s.MatchingLabels(instance.DefaultLabels()), ) + if err != nil { + return nil, err + } for _, pod := range podList.Items { // Disregard pods that are being deleted. @@ -180,5 +231,5 @@ func loadReconcileState(ctx context.Context, client k8s.Client, instance *crdv2. state.pods = append(state.pods, pod) } - return state + return state, nil } diff --git a/internal/controller/rebalance_controller.go b/internal/controller/rebalance_controller.go index 63b31eeee..943d5d352 100644 --- a/internal/controller/rebalance_controller.go +++ b/internal/controller/rebalance_controller.go @@ -32,8 +32,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/manager" - crdv2 "github.com/emqx/emqx-operator/api/v2" crdv2beta1 "github.com/emqx/emqx-operator/api/v2beta1" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" "github.com/emqx/emqx-operator/internal/emqx/api" @@ -79,7 +79,7 @@ func (r *RebalanceReconciler) Reconcile(ctx context.Context, request ctrl.Reques return ctrl.Result{}, err } - emqx := &crdv2.EMQX{} + emqx := &crd.EMQX{} if err := r.Client.Get(ctx, client.ObjectKey{ Name: rebalance.Spec.InstanceName, Namespace: rebalance.Namespace, @@ -100,7 +100,7 @@ func (r *RebalanceReconciler) Reconcile(ctx context.Context, request ctrl.Reques } // check if emqx is ready - if !emqx.Status.IsConditionTrue(crdv2.Ready) { + if !emqx.Status.IsConditionTrue(crd.Ready) { // return ctrl.Result{}, emperror.New("EMQX is not ready") _ = rebalance.Status.SetFailed(crdv2beta1.RebalanceCondition{ Type: crdv2beta1.RebalanceConditionFailed, @@ -125,7 +125,11 @@ func (r *RebalanceReconciler) Reconcile(ctx context.Context, request ctrl.Reques return ctrl.Result{}, emperror.Wrap(err, "failed to create EMQX API requester") } - state := loadReconcileState(ctx, r.Client, emqx) + state, err := loadReconcileState(ctx, r.Client, emqx) + if err != nil { + return ctrl.Result{}, emperror.New("failed to load reconcile round state") + } + req = requester.forOldestCore(state) if req == nil { return ctrl.Result{}, emperror.New("EMQX API requester unavailable") @@ -177,7 +181,7 @@ func (r *RebalanceReconciler) SetupWithManager(mgr ctrl.Manager) error { Complete(r) } -func rebalanceStatusHandler(emqx *crdv2.EMQX, rebalance *crdv2beta1.Rebalance, req req.RequesterInterface) { +func rebalanceStatusHandler(emqx *crd.EMQX, rebalance *crdv2beta1.Rebalance, req req.RequesterInterface) { switch rebalance.Status.Phase { case "": if err := startRebalance(emqx, rebalance, req); err != nil { @@ -222,7 +226,7 @@ func rebalanceStatusHandler(emqx *crdv2.EMQX, rebalance *crdv2beta1.Rebalance, r } } -func startRebalance(emqx *crdv2.EMQX, rebalance *crdv2beta1.Rebalance, req req.RequesterInterface) error { +func startRebalance(emqx *crd.EMQX, rebalance *crdv2beta1.Rebalance, req req.RequesterInterface) error { nodes := []string{} if len(emqx.Status.ReplicantNodes) == 0 { for _, node := range emqx.Status.CoreNodes { diff --git a/internal/controller/rebalance_controller_test.go b/internal/controller/rebalance_controller_test.go index bd8a5c296..c10c49256 100644 --- a/internal/controller/rebalance_controller_test.go +++ b/internal/controller/rebalance_controller_test.go @@ -16,77 +16,6 @@ limitations under the License. package controller -import ( - "context" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/reconcile" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - appsv2beta1 "github.com/emqx/emqx-operator/api/v2beta1" -) - -var _ = Describe("Rebalance Controller", func() { - Context("When reconciling a resource", func() { - const resourceName = "test-resource" - - ctx := context.Background() - - typeNamespacedName := types.NamespacedName{ - Name: resourceName, - Namespace: "default", // TODO(user):Modify as needed - } - rebalance := &appsv2beta1.Rebalance{} - - BeforeEach(func() { - By("creating the custom resource for the Kind Rebalance") - err := k8sClient.Get(ctx, typeNamespacedName, rebalance) - if err != nil && errors.IsNotFound(err) { - resource := &appsv2beta1.Rebalance{ - ObjectMeta: metav1.ObjectMeta{ - Name: resourceName, - Namespace: "default", - }, - Spec: appsv2beta1.RebalanceSpec{ - RebalanceStrategy: appsv2beta1.RebalanceStrategy{ - WaitTakeover: 10, - ConnEvictRate: 10, - SessEvictRate: 10, - WaitHealthCheck: 10, - AbsSessThreshold: 100, - RelConnThreshold: "1.2", - AbsConnThreshold: 100, - RelSessThreshold: "1.2", - }, - }, - } - Expect(k8sClient.Create(ctx, resource)).To(Succeed()) - } - }) - - AfterEach(func() { - // TODO(user): Cleanup logic after each test, like removing the resource instance. - resource := &appsv2beta1.Rebalance{} - err := k8sClient.Get(ctx, typeNamespacedName, resource) - Expect(err).NotTo(HaveOccurred()) - - By("Cleanup the specific resource instance Rebalance") - Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) - }) - It("should successfully reconcile the resource", func() { - By("Reconciling the created resource") - controllerReconciler := NewRebalanceReconciler(k8sManager) - - _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: typeNamespacedName, - }) - Expect(err).NotTo(HaveOccurred()) - // TODO(user): Add more specific assertions depending on your controller's reconciliation logic. - // Example: If you expect a certain status condition after reconciliation, verify it here. - }) - }) -}) +// NOT FUNCTIONAL — Rebalance controller tests are disabled because the Rebalance +// CRD is not registered with the scheme. Uncomment (and update) when the Rebalance +// CRD is re-enabled. diff --git a/internal/controller/resources/config.go b/internal/controller/resources/config.go index 3588966a1..03b73f3d5 100644 --- a/internal/controller/resources/config.go +++ b/internal/controller/resources/config.go @@ -1,7 +1,7 @@ package controller import ( - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -12,10 +12,10 @@ const OverridesConfigFile string = "emqx.conf" const configVolumeName = "bootstrap-config" type emqxConfigResource struct { - *crdv2.EMQX + *crd.EMQX } -func EMQXConfig(instance *crdv2.EMQX) emqxConfigResource { +func EMQXConfig(instance *crd.EMQX) emqxConfigResource { return emqxConfigResource{instance} } diff --git a/internal/controller/resources/probes.go b/internal/controller/resources/probes.go new file mode 100644 index 000000000..17a1cf8a0 --- /dev/null +++ b/internal/controller/resources/probes.go @@ -0,0 +1,31 @@ +package controller + +import ( + "github.com/emqx/emqx-operator/internal/emqx/api" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +// EvacuationReadinessProbe returns a readiness probe that uses the EMQX +// Evacuation & Rebalance API availability check. +// +// This endpoint returns 503 when the node is being evacuated, causing +// kubelet to mark the pod not-ready and removing it from Service endpoints +// automatically. +// +// Keep in sync with `EMQXReplicantTemplate.ReadinessProbe` default. +func EvacuationReadinessProbe() *corev1.Probe { + return &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/" + api.URLAvailabilityCheck, + Port: intstr.FromString("dashboard"), + }, + }, + InitialDelaySeconds: 10, + TimeoutSeconds: 3, + PeriodSeconds: 5, + FailureThreshold: 1, + SuccessThreshold: 1, + } +} diff --git a/internal/controller/resources/secrets.go b/internal/controller/resources/secrets.go index 1e23c153d..4b777fee2 100644 --- a/internal/controller/resources/secrets.go +++ b/internal/controller/resources/secrets.go @@ -3,7 +3,7 @@ package controller import ( "fmt" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -13,18 +13,18 @@ const DefaultBootstrapAPIKey string = "emqx-operator-controller" const boostrapApiKeysVolumeName = "bootstrap-api-keys" type cookieResource struct { - *crdv2.EMQX + *crd.EMQX } type bootstrapAPIKeyResource struct { - *crdv2.EMQX + *crd.EMQX } -func BootstrapAPIKey(instance *crdv2.EMQX) bootstrapAPIKeyResource { +func BootstrapAPIKey(instance *crd.EMQX) bootstrapAPIKeyResource { return bootstrapAPIKeyResource{instance} } -func Cookie(instance *crdv2.EMQX) cookieResource { +func Cookie(instance *crd.EMQX) cookieResource { return cookieResource{instance} } diff --git a/internal/controller/setup_api_requester.go b/internal/controller/setup_api_requester.go index 9a63dc5a3..02860ba68 100644 --- a/internal/controller/setup_api_requester.go +++ b/internal/controller/setup_api_requester.go @@ -4,7 +4,7 @@ import ( "context" emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" corev1 "k8s.io/api/core/v1" k8s "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -13,7 +13,7 @@ type setupAPIRequester struct { *EMQXReconciler } -func (s *setupAPIRequester) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { +func (s *setupAPIRequester) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { bootstrapAPIKey, err := getBootstrapAPIKey(r.ctx, s.Client, instance) if err != nil { return subResult{err: err} @@ -29,7 +29,7 @@ func (s *setupAPIRequester) reconcile(r *reconcileRound, instance *crdv2.EMQX) s func getBootstrapAPIKey( ctx context.Context, client k8s.Client, - instance *crdv2.EMQX, + instance *crd.EMQX, ) (*corev1.Secret, error) { bootstrapAPIKey := &corev1.Secret{} err := client.Get(ctx, instance.BootstrapAPIKeyNamespacedName(), bootstrapAPIKey) diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 8d8f9926c..cbb736c5c 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -29,13 +29,16 @@ import ( "github.com/go-logr/logr" . "github.com/onsi/ginkgo/v2" ginkgotypes "github.com/onsi/ginkgo/v2/types" + "github.com/onsi/gomega" . "github.com/onsi/gomega" "go.uber.org/zap/zapcore" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest" @@ -43,8 +46,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - crdv2 "github.com/emqx/emqx-operator/api/v2" - crdv2beta1 "github.com/emqx/emqx-operator/api/v2beta1" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" req "github.com/emqx/emqx-operator/internal/requester" // +kubebuilder:scaffold:imports @@ -64,16 +66,16 @@ var timeout, interval time.Duration var emqxReconciler *EMQXReconciler var emqxConf *config.EMQX -var emqx *crdv2.EMQX = &crdv2.EMQX{ +var emqx *crd.EMQX = &crd.EMQX{ ObjectMeta: metav1.ObjectMeta{ UID: "fake-1234567890", Name: "emqx", Labels: map[string]string{ - crdv2.LabelManagedBy: "emqx-operator", - crdv2.LabelInstance: "emqx", + crd.LabelManagedBy: "emqx-operator", + crd.LabelInstance: "emqx", }, }, - Spec: crdv2.EMQXSpec{ + Spec: crd.EMQXSpec{ Image: "emqx", }, } @@ -89,6 +91,9 @@ var _ = BeforeSuite(func() { timeout = time.Second * 10 interval = time.Second + gomega.SetDefaultEventuallyTimeout(timeout) + gomega.SetDefaultEventuallyPollingInterval(interval) + logger = zap.New( zap.WriteTo(GinkgoWriter), zap.UseDevMode(true), @@ -118,10 +123,7 @@ var _ = BeforeSuite(func() { Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) - err = crdv2.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - err = crdv2beta1.AddToScheme(scheme.Scheme) + err = crd.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) // +kubebuilder:scaffold:scheme @@ -158,6 +160,33 @@ var _ = AfterSuite(func() { // Expect(err).NotTo(HaveOccurred()) }) +func actualObject[Object client.Object](o Object) (Object, error) { + err := k8sClient.Get(ctx, client.ObjectKeyFromObject(o), o) + return o, err +} + +func ownerReferences(owner client.Object) []metav1.OwnerReference { + var apiVersion, kind string + switch owner.(type) { + case *appsv1.StatefulSet: + apiVersion = "apps/v1" + kind = "StatefulSet" + case *appsv1.ReplicaSet: + apiVersion = "apps/v1" + kind = "ReplicaSet" + } + return []metav1.OwnerReference{ + { + APIVersion: apiVersion, + Kind: kind, + Name: owner.GetName(), + UID: owner.GetUID(), + BlockOwnerDeletion: ptr.To(true), + Controller: ptr.To(true), + }, + } +} + func newReconcileRound() *reconcileRound { req := req.NewMockRequester( func(method string, url url.URL, body []byte, header http.Header) (resp *http.Response, respBody []byte, err error) { @@ -188,3 +217,14 @@ func (b *apiRequesterOverride) forOldestCore(_ *reconcileState, _ ...podRequeste func (b *apiRequesterOverride) forPod(_ *corev1.Pod) req.RequesterInterface { return b.requester } + +type apiRequesterUnavailable struct { +} + +func (b *apiRequesterUnavailable) forOldestCore(_ *reconcileState, _ ...podRequesterFilter) req.RequesterInterface { + return nil +} + +func (b *apiRequesterUnavailable) forPod(_ *corev1.Pod) req.RequesterInterface { + return nil +} diff --git a/internal/controller/sync_cluster_membership.go b/internal/controller/sync_cluster_membership.go new file mode 100644 index 000000000..7056d62d6 --- /dev/null +++ b/internal/controller/sync_cluster_membership.go @@ -0,0 +1,66 @@ +package controller + +import ( + "fmt" + + crd "github.com/emqx/emqx-operator/api/v3alpha1" + util "github.com/emqx/emqx-operator/internal/controller/util" + api "github.com/emqx/emqx-operator/internal/emqx/api" + corev1 "k8s.io/api/core/v1" +) + +// Responsibilities: +// - Removes EMQX cores from the cluster that should no longer be a member, because of scale-down. +// - Removes EMQX replicants that no longer have a corresponding pod. +type syncClusterMembership struct { + *EMQXReconciler +} + +func (s *syncClusterMembership) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { + // Instantiate API requester. + req := r.oldestCoreRequester() + if req == nil { + return reconcilePostpone() + } + + staleNodes := []*crd.EMQXNode{} + for _, node := range instance.Status.CoreNodes { + // Running cores / cores still having respective pods should not be force-left: + if node.Status != api.NodeStatusStopped || node.PodName != "" || r.state.podWithName(node.PodName) != nil { + continue + } + // Cores with node name having pod ordinal under desired number of replicas should not be force-left: + nodeName := parseNodeName(node.Name, instance) + ordinal := util.PodOrdinal(nodeName.podName) + if ordinal < int(instance.Spec.NumCoreReplicas()) { + continue + } + // Cores having higher pod ordinals should be force-left: + staleNodes = append(staleNodes, &node) + } + + for _, node := range instance.Status.ReplicantNodes { + // Running replicants / replicants still having respective pods should not be force-left: + if node.Status != api.NodeStatusStopped || node.PodName != "" || r.state.podWithName(node.PodName) != nil { + continue + } + // Stopped replicants w/o respective pods should be force-left: + staleNodes = append(staleNodes, &node) + } + + for _, staleNode := range staleNodes { + err := api.ForceLeave(r.oldestCoreRequester(), staleNode.Name) + if err == nil { + s.EventRecorder.Event( + instance, + corev1.EventTypeNormal, + "NodeForceLeave", + fmt.Sprintf("Stale %s node %s force-left the cluster", staleNode.Role, staleNode.Name), + ) + } else { + return reconcileError(err) + } + } + + return subResult{} +} diff --git a/internal/controller/sync_cluster_membership_suite_test.go b/internal/controller/sync_cluster_membership_suite_test.go new file mode 100644 index 000000000..4f88a9b00 --- /dev/null +++ b/internal/controller/sync_cluster_membership_suite_test.go @@ -0,0 +1,260 @@ +package controller + +import ( + "fmt" + "net/http" + "net/url" + "strings" + + crd "github.com/emqx/emqx-operator/api/v3alpha1" + req "github.com/emqx/emqx-operator/internal/requester" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" +) + +var _ = Describe("Reconciler syncClusterMembership", Ordered, func() { + var ns *corev1.Namespace + var instance *crd.EMQX + var round *reconcileRound + var coreSet *appsv1.StatefulSet + var replicantSet *appsv1.ReplicaSet + var corePod0 *corev1.Pod + var replicantPod *corev1.Pod + + var forceLeftNodes []string + + const updateRevision = "update" + + emqxNodeName := func(podName string) string { + return fmt.Sprintf("emqx@%s.%s.%s.svc.%s", + podName, + instance.HeadlessServiceNamespacedName().Name, + instance.Namespace, + instance.Spec.ClusterDomain) + } + + BeforeAll(func() { + ns = &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "controller-sync-cluster-test", + Labels: map[string]string{"test": "e2e"}, + }, + } + Expect(k8sClient.Create(ctx, ns)).Should(Succeed()) + }) + + AfterAll(func() { + Expect(k8sClient.Delete(ctx, ns)).Should(Succeed()) + }) + + BeforeEach(func() { + coreLabels := emqx.DefaultLabelsWith(crd.CoreLabels()) + coreSet = &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: emqx.Name + "-core", + Namespace: ns.Name, + Labels: coreLabels, + }, + Spec: appsv1.StatefulSetSpec{ + ServiceName: emqx.Name + "-core", + Replicas: ptr.To(int32(1)), + UpdateStrategy: appsv1.StatefulSetUpdateStrategy{ + Type: appsv1.OnDeleteStatefulSetStrategyType, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: coreLabels, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: coreLabels}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "emqx", Image: "emqx"}}, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, coreSet)).Should(Succeed()) + + corePod0 = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: coreSet.Name + "-0", + Namespace: ns.Name, + Labels: map[string]string{ + crd.LabelInstance: "emqx", + crd.LabelManagedBy: "emqx-operator", + crd.LabelDBRole: "core", + appsv1.ControllerRevisionHashLabelKey: updateRevision, + }, + OwnerReferences: ownerReferences(coreSet), + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "emqx", Image: "emqx"}}, + }, + } + Expect(k8sClient.Create(ctx, corePod0)).Should(Succeed()) + + replicantLabels := emqx.DefaultLabelsWith( + crd.ReplicantLabels(), + map[string]string{crd.LabelPodTemplateHash: "rev1"}, + ) + replicantSet = &appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: emqx.Name + "-replicant-rev1", + Namespace: ns.Name, + Labels: replicantLabels, + }, + Spec: appsv1.ReplicaSetSpec{ + Replicas: ptr.To(int32(1)), + Selector: &metav1.LabelSelector{MatchLabels: replicantLabels}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: replicantLabels}, + Spec: corev1.PodSpec{Containers: []corev1.Container{{Name: "emqx", Image: "emqx"}}}, + }, + }, + } + Expect(k8sClient.Create(ctx, replicantSet)).Should(Succeed()) + replicantPod = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: replicantSet.Name + "-xyz", + Namespace: ns.Name, + Labels: replicantLabels, + OwnerReferences: ownerReferences(replicantSet), + }, + Spec: replicantSet.Spec.Template.Spec, + } + Expect(k8sClient.Create(ctx, replicantPod)).Should(Succeed()) + + replicantPod.Status.PodIP = "10.0.0.1" + Expect(k8sClient.Status().Update(ctx, replicantPod)).Should(Succeed()) + + coreSet.Status.Replicas = 1 + coreSet.Status.ReadyReplicas = 1 + coreSet.Status.CurrentRevision = updateRevision + coreSet.Status.UpdateRevision = updateRevision + Expect(k8sClient.Status().Update(ctx, coreSet)).Should(Succeed()) + + replicantSet.Status.Replicas = 1 + replicantSet.Status.ReadyReplicas = 1 + Expect(k8sClient.Status().Update(ctx, replicantSet)).Should(Succeed()) + + forceLeftNodes = nil + mockRequester := req.NewMockRequester( + func(method string, u url.URL, body []byte, header http.Header) (*http.Response, []byte, error) { + if method == "DELETE" && strings.Contains(u.Path, "force_leave") { + parts := strings.Split(u.Path, "/") + forceLeftNodes = append(forceLeftNodes, parts[len(parts)-2]) + return &http.Response{StatusCode: 204}, nil, nil + } + return &http.Response{StatusCode: 501}, nil, nil + }, + ) + + instance = emqx.DeepCopy() + instance.Namespace = ns.Name + instance.Spec.ClusterDomain = "local" + instance.Spec.CoreTemplate.Spec.Replicas = ptr.To(int32(1)) + instance.Spec.ReplicantTemplate = &crd.EMQXReplicantTemplate{} + instance.Spec.ReplicantTemplate.Spec.Replicas = ptr.To(int32(1)) + round = newReconcileRoundWithRequester(mockRequester) + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) + }) + + AfterEach(func() { + _ = k8sClient.Delete(ctx, corePod0) + _ = k8sClient.Delete(ctx, replicantPod) + Expect(k8sClient.Delete(ctx, coreSet)).Should(Succeed()) + Expect(k8sClient.Delete(ctx, replicantSet)).Should(Succeed()) + }) + + It("force-leaves scaled-down node whose pod is gone", func() { + instance.Status.CoreNodes = []crd.EMQXNode{ + {Name: emqxNodeName(corePod0.Name), PodName: corePod0.Name, Status: "running"}, + {Name: emqxNodeName(coreSet.Name + "-1"), PodName: "", Status: "stopped"}, + {Name: emqxNodeName(coreSet.Name + "-10"), PodName: "", Status: "stopped"}, + } + s := &syncClusterMembership{emqxReconciler} + Expect(s.reconcile(round, instance)).Should(Equal(subResult{})) + Expect(forceLeftNodes).To(ConsistOf( + emqxNodeName(coreSet.Name+"-1"), + emqxNodeName(coreSet.Name+"-10"), + )) + }) + + It("does NOT force-leave stopped node whose pod still exists", func() { + instance = emqx.DeepCopy() + instance.Namespace = ns.Name + instance.Status.CoreNodes = []crd.EMQXNode{ + {Name: emqxNodeName(corePod0.Name), PodName: corePod0.Name, Status: "stopped"}, + {Name: emqxNodeName(coreSet.Name + "-1"), PodName: coreSet.Name + "-1", Status: "stopped"}, + } + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) + s := &syncClusterMembership{emqxReconciler} + Expect(s.reconcile(round, instance)).Should(Equal(subResult{})) + Expect(forceLeftNodes).To(BeEmpty()) + }) + + It("does NOT force-leave running nodes without pods", func() { + instance = emqx.DeepCopy() + instance.Namespace = ns.Name + instance.Spec.CoreTemplate.Spec.Replicas = ptr.To(int32(1)) + instance.Status.CoreNodes = []crd.EMQXNode{ + {Name: emqxNodeName(corePod0.Name), PodName: corePod0.Name, Status: "running"}, + {Name: emqxNodeName(coreSet.Name + "-1"), PodName: "", Status: "running"}, + } + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) + s := &syncClusterMembership{emqxReconciler} + Expect(s.reconcile(round, instance)).Should(Equal(subResult{})) + Expect(forceLeftNodes).To(BeEmpty()) + }) + + It("force-leaves stale replicant nodes whose pods are gone", func() { + instance = emqx.DeepCopy() + instance.Namespace = ns.Name + instance.Spec.CoreTemplate.Spec.Replicas = ptr.To(int32(1)) + instance.Status.CoreNodes = []crd.EMQXNode{ + {Name: emqxNodeName(corePod0.Name), PodName: corePod0.Name, Status: "running"}, + } + instance.Status.ReplicantNodes = []crd.EMQXNode{ + {Name: "emqx@10.0.0.1", PodName: replicantPod.Name, Status: "stopped", Role: "replicant"}, + {Name: "emqx@10.0.0.11", PodName: "", Status: "stopped", Role: "replicant"}, + } + s := &syncClusterMembership{emqxReconciler} + Expect(s.reconcile(round, instance)).Should(Equal(subResult{})) + Expect(forceLeftNodes).To(ConsistOf("emqx@10.0.0.11")) + }) + + It("does NOT force-leave stopped replicant whose pod still exists", func() { + instance = emqx.DeepCopy() + instance.Namespace = ns.Name + instance.Spec.CoreTemplate.Spec.Replicas = ptr.To(int32(1)) + instance.Status.CoreNodes = []crd.EMQXNode{ + {Name: emqxNodeName(corePod0.Name), PodName: corePod0.Name, Status: "running"}, + } + instance.Status.ReplicantNodes = []crd.EMQXNode{ + {Name: "emqx@10.0.0.1", PodName: replicantPod.Name, Status: "stopped", Role: "replicant"}, + } + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) + s := &syncClusterMembership{emqxReconciler} + Expect(s.reconcile(round, instance)).Should(Equal(subResult{})) + Expect(forceLeftNodes).To(BeEmpty()) + }) + + It("does NOT force-leave running replicant nodes without pods", func() { + instance = emqx.DeepCopy() + instance.Namespace = ns.Name + instance.Spec.CoreTemplate.Spec.Replicas = ptr.To(int32(1)) + instance.Status.CoreNodes = []crd.EMQXNode{ + {Name: emqxNodeName(corePod0.Name), PodName: corePod0.Name, Status: "running"}, + } + instance.Status.ReplicantNodes = []crd.EMQXNode{ + {Name: "emqx@10.0.0.99", PodName: "", Status: "running", Role: "replicant"}, + } + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) + s := &syncClusterMembership{emqxReconciler} + Expect(s.reconcile(round, instance)).Should(Equal(subResult{})) + Expect(forceLeftNodes).To(BeEmpty()) + }) +}) diff --git a/internal/controller/sync_core_set.go b/internal/controller/sync_core_set.go new file mode 100644 index 000000000..762847f07 --- /dev/null +++ b/internal/controller/sync_core_set.go @@ -0,0 +1,398 @@ +package controller + +import ( + "fmt" + "slices" + + emperror "emperror.dev/errors" + crd "github.com/emqx/emqx-operator/api/v3alpha1" + util "github.com/emqx/emqx-operator/internal/controller/util" + "github.com/emqx/emqx-operator/internal/emqx/api" + corev1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" +) + +// Responsibilities: +// - Scaling the core set up according to specified number of replicas. +// - Scaling the core set down safely according to specified number of replicas. +// - Safety: availability is maintained. +// Loss of availability of a single, next-in-line replica is tolerated. +// - Safety: connections and sessions are evacuated first. +// - Safety: existing DS shard replicas prevent scaling until all replicas are migrated. +// See `dsUpdateReplicaSets` reconciler. +// +// - Conducting safe pod-by-pod in-place rolling update of the core set. +// - Safety: availability is maintained. +// - Safety: connections and sessions are evacuated first. +// +// - Terminating evacuations on updated cores. +// +// NOTE +// As evacuation state is expected to survive pod recreation, evacuations need to be +// terminated manually. Currently, this reconciler can not tell the difference between +// evacuations started by itself and manually by the user, latter will be stopped on the +// next reconcile. +type syncCoreSet struct { + *EMQXReconciler +} + +type admissionAction int + +const ( + // Pod may be removed right now. + admissionRemove admissionAction = iota + // Pod removal blocked; reason explains why. + admissionWait + // Pod needs evacuation before it can be removed. + admissionEvacuate +) + +type coreAdmission struct { + Action admissionAction + Reason string +} + +func (s *syncCoreSet) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { + coreSet := r.state.coreSet() + if coreSet == nil { + return reconcilePostpone() + } + + desiredReplicas := instance.Spec.NumCoreReplicas() + currentReplicas := util.NumReplicas(coreSet) + + // Handle scale-up: simply update the StatefulSet replica count. + if currentReplicas < desiredReplicas { + r.log.V(1).Info("scaling up coreSet", + "statefulSet", klog.KObj(coreSet), + "from", currentReplicas, + "to", desiredReplicas, + ) + return s.scaleUp(r, desiredReplicas) + } + + // Handle scale-down: remove highest-ordinal pod with evacuation gating. + if currentReplicas > desiredReplicas { + r.log.V(1).Info("scaling down coreSet", + "statefulSet", klog.KObj(coreSet), + "from", currentReplicas, + "to", desiredReplicas, + ) + return s.scaleDown(r, instance, currentReplicas) + } + + err := s.updateEvacuationState(r, instance) + if err != nil { + return reconcileError(emperror.Wrap(err, "failed to update evacuation state")) + } + + // Handle rolling update: replace outdated pods one at a time, highest ordinal first. + return s.rollingUpdate(r, instance) +} + +// rollingUpdate detects outdated core pods and replaces them one at a time, +// starting from the highest ordinal. Each pod is evacuated before deletion. +func (s *syncCoreSet) rollingUpdate(r *reconcileRound, instance *crd.EMQX) subResult { + // Sort outdated pods by ordinal ascending; pick highest ordinal last. + outdated := listOutdatedPods(r) + sortByOrdinal(outdated) + + if len(outdated) == 0 { + return subResult{} + } + + r.log.V(1).Info("rolling coreSet update", + "statefulSet", klog.KObj(r.state.coreSet()), + "outdatedPods", len(outdated), + ) + + candidate := outdated[len(outdated)-1] + + if instance.Spec.HasReplicants() && + instance.Status.ReplicantNodesStatus.CurrentRevision != instance.Status.ReplicantNodesStatus.UpdateRevision { + // ReplicantSet is in the process of update. + // Keep at least one old-version core alive so current-revision replicants can rejoin. + if len(outdated) == 1 { + admission := coreAdmission{Action: admissionWait, Reason: "current replicantSet still migrating"} + return s.onCoreAdmission(r, instance, candidate, admission, "rollingUpdate") + } + } + + admission := checkCorePodRemoval(r, instance, candidate, false) + return s.onCoreAdmission(r, instance, candidate, admission, "rollingUpdate") +} + +func (s *syncCoreSet) scaleUp(r *reconcileRound, desiredReplicas int32) subResult { + coreSet := r.state.coreSet() + coreSet.Spec.Replicas = &desiredReplicas + err := s.Client.Update(r.ctx, coreSet) + if err != nil { + return subResult{err: emperror.Wrap(err, "failed to scale up coreSet")} + } + return subResult{} +} + +// scaleDown removes the highest-ordinal pod with evacuation gating, then +// decrements the StatefulSet replica count. +func (s *syncCoreSet) scaleDown(r *reconcileRound, instance *crd.EMQX, currentReplicas int32) subResult { + coreSet := r.state.coreSet() + + // Candidate is the highest-ordinal pod, where ordinal = currentReplicas-1. + candidateName := fmt.Sprintf("%s-%d", coreSet.Name, currentReplicas-1) + candidate := r.state.podWithName(candidateName) + + var admission coreAdmission + if candidate == nil { + admission = coreAdmission{Action: admissionRemove, Reason: "already terminated"} + } else { + admission = checkCorePodRemoval(r, instance, candidate, true) + } + + if admission.Action == admissionRemove { + // Decrement StatefulSet replica count first so the StatefulSet controller + // won't recreate the pod after we delete it. + newReplicas := currentReplicas - 1 + coreSet.Spec.Replicas = &newReplicas + err := s.Client.Update(r.ctx, coreSet) + if err != nil { + return subResult{err: emperror.Wrap(err, "failed to decrement coreSet replicas")} + } + } + if candidate != nil { + return s.onCoreAdmission(r, instance, candidate, admission, "scaleDown") + } + return subResult{} +} + +// Stops evacuation on nodes that are no longer need to evacuate anything: +// nodes that belong to the most recent coreSet revision. +func (s *syncCoreSet) updateEvacuationState(r *reconcileRound, instance *crd.EMQX) error { + updateRevision := r.state.coreSet().Status.UpdateRevision + for _, evacuation := range instance.Status.NodeEvacuations { + if evacuation.State != api.EvacuationStateProhibiting { + continue + } + node := instance.Status.FindNode(evacuation.NodeName) + if node == nil || node.Role != "core" || node.PodName == "" { + continue + } + pod := r.state.podWithName(node.PodName) + if pod != nil && r.state.partOfCoreSetRevision(pod, updateRevision) { + err := api.StopEvacuation(r.requester.forPod(pod), node.Name) + if err == nil { + s.EventRecorder.Event( + instance, + corev1.EventTypeNormal, + "NodeEvacuation", + fmt.Sprintf("Node %s evacuation stopped", node.Name), + ) + } else { + return err + } + } + } + return nil +} + +// listOutdatedPods returns core StatefulSet pods whose pod template is not yet the +// desired one: anything not labeled with Status.UpdateRevision. +// +// We intentionally do not key off CurrentRevision alone. Pods can remain labeled +// with a revision hash that is neither CurrentRevision nor UpdateRevision (e.g. +// stuck pod after ControllerRevision history moved on). Those are still outdated. +func listOutdatedPods(r *reconcileRound) []*corev1.Pod { + var outdated []*corev1.Pod + coreSet := r.state.coreSet() + updateRevision := coreSet.Status.UpdateRevision + if updateRevision == "" || updateRevision == coreSet.Status.CurrentRevision { + return outdated + } + for _, pod := range r.state.podsManagedBy(coreSet) { + if !r.state.partOfCoreSetRevision(pod, updateRevision) { + outdated = append(outdated, pod) + } + } + return outdated +} + +// checkCorePodRemoval is a pure function that decides whether a core pod can +// be safely removed. It inspects instance status and pod state but performs no +// side effects. +// +// When isPermanent is true, the pod is being permanently removed: +// data would be lost so there are extra safety checks (e.g. DS replication site). +// When isPermanent is false (rolling update), the replacement pod inherits the +// data: blocking on DS site condition would stall the rollout indefinitely. +func checkCorePodRemoval( + r *reconcileRound, + instance *crd.EMQX, + pod *corev1.Pod, + isPermanent bool, +) coreAdmission { + status := &instance.Status + + if len(status.NodeEvacuations) > 0 { + if status.NodeEvacuations[0].State != api.EvacuationStateProhibiting { + return coreAdmission{Action: admissionWait, Reason: "node evacuation is still in progress"} + } + } + + // Disallow removing pod if other cores just recently became ready. + numAvailableCores := int32(0) + for _, p := range r.state.podsManagedBy(r.state.coreSet()) { + if p.GetUID() != pod.GetUID() && util.IsPodAvailable(p, instance.Spec.CoreTemplate.Spec.MinReadySeconds) { + numAvailableCores++ + } + } + if numAvailableCores < instance.Spec.NumCoreReplicas()-1 { + return coreAdmission{Action: admissionWait, Reason: "cores are not available yet"} + } + + // If a pod is already being deleted, return it. + if pod.DeletionTimestamp != nil { + return coreAdmission{Action: admissionWait, Reason: fmt.Sprintf("pod %s deletion in progress", pod.Name)} + } + + // Disallow permanently removing the pod that is still a DS replication site. + if isPermanent { + dsCondition := util.FindPodCondition(pod, crd.DSReplicationSite) + if dsCondition != nil && dsCondition.Status != corev1.ConditionFalse { + return coreAdmission{Action: admissionWait, Reason: fmt.Sprintf("pod %s is still a DS replication site", pod.Name)} + } + } + + var nodeInfo *crd.EMQXNode + for _, node := range status.CoreNodes { + if node.PodName == pod.Name { + nodeInfo = &node + break + } + } + + if nodeInfo == nil { + return coreAdmission{Action: admissionRemove, Reason: "node is out of cluster"} + } + + if nodeInfo.Status == api.NodeStatusStopped { + return coreAdmission{Action: admissionRemove, Reason: "node is already stopped"} + } + + if nodeInfo.Sessions > 0 { + if instance.Spec.NumCoreReplicas() == 1 && !instance.Spec.HasReplicants() { + return coreAdmission{ + Action: admissionRemove, + Reason: fmt.Sprintf("node %s has active sessions nowhere to evacuate", nodeInfo.Name), + } + } + return coreAdmission{ + Action: admissionEvacuate, + Reason: fmt.Sprintf("node %s has active sessions", nodeInfo.Name), + } + } + + return coreAdmission{Action: admissionRemove, Reason: "node is safe to stop"} +} + +// onCoreAdmission performs the side effects implied by a coreAdmission. +// Currently this only handles coreAdmitEvacuate by calling the evacuation API. +func (s *syncCoreSet) onCoreAdmission( + r *reconcileRound, + instance *crd.EMQX, + candidate *corev1.Pod, + admission coreAdmission, + cause string, +) subResult { + switch admission.Action { + case admissionRemove: + r.log.V(1).Info("removing core pod", + "reason", admission.Reason, + "pod", klog.KObj(candidate), + "statefulSet", klog.KObj(r.state.coreSet()), + "cause", cause, + ) + if err := s.Client.Delete(r.ctx, candidate); err != nil { + return subResult{err: emperror.Wrap(err, "failed to delete core pod")} + } + case admissionWait: + r.log.V(1).Info("removal of core pod postponed", + "reason", admission.Reason, + "pod", klog.KObj(candidate), + "statefulSet", klog.KObj(r.state.coreSet()), + "cause", cause, + ) + case admissionEvacuate: + err := s.startEvacuation(r, instance, candidate) + if err != nil { + return subResult{err: emperror.WrapWithDetails(err, + "failed to start node evacuation", + "pod", klog.KObj(candidate), + "statefulSet", klog.KObj(r.state.coreSet()), + "cause", cause, + )} + } + } + return subResult{} +} + +// actOnCoreAdmission performs the side effects implied by a coreAdmission. +// Currently this only handles coreAdmitEvacuate by calling the evacuation API. +func (s *syncCoreSet) startEvacuation( + r *reconcileRound, + instance *crd.EMQX, + pod *corev1.Pod, +) error { + nodeInfo := instance.Status.FindNodeByPodName(pod.Name) + if nodeInfo == nil { + return emperror.New("no corresponding node in cluster status") + } + nodeName := nodeInfo.Name + strategy := instance.Spec.UpdateStrategy.EvacuationStrategy + migrateTo := migrationTargetNodes(r, instance) + migrateTo = slices.DeleteFunc( + migrateTo, + func(e string) bool { return e == nodeInfo.Name }, + ) + if len(migrateTo) == 0 { + s.EventRecorder.Event( + instance, + corev1.EventTypeWarning, + "NodeEvacuation", + fmt.Sprintf("Node %s evacuation skipped: no nodes to migrate to", nodeName), + ) + return nil + } + err := api.StartEvacuation(r.requester.forPod(pod), strategy, migrateTo, nodeName) + if err != nil { + return err + } + s.EventRecorder.Event( + instance, + corev1.EventTypeNormal, + "NodeEvacuation", + fmt.Sprintf("Node %s evacuation started", nodeName), + ) + return nil +} + +// migrationTargetNodes returns the list of EMQX nodes to migrate workloads to. +// For cores, targets are pods on the current (update) revision. For replicants, +// targets are pods in the update ReplicaSet. +func migrationTargetNodes(r *reconcileRound, instance *crd.EMQX) []string { + targets := []string{} + if instance.Spec.HasReplicants() { + for _, node := range instance.Status.ReplicantNodes { + pod := r.state.podWithName(node.PodName) + if pod != nil && r.state.partOfUpdateReplicantSet(pod, instance) { + targets = append(targets, node.Name) + } + } + } else { + for _, node := range instance.Status.CoreNodes { + pod := r.state.podWithName(node.PodName) + if pod != nil && r.state.partOfCoreSet(pod) { + targets = append(targets, node.Name) + } + } + } + return targets +} diff --git a/internal/controller/sync_core_set_suite_test.go b/internal/controller/sync_core_set_suite_test.go new file mode 100644 index 000000000..ec6826252 --- /dev/null +++ b/internal/controller/sync_core_set_suite_test.go @@ -0,0 +1,238 @@ +package controller + +import ( + crd "github.com/emqx/emqx-operator/api/v3alpha1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + k8sErrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Reconciler syncCoreSet", Ordered, func() { + var ns *corev1.Namespace = &corev1.Namespace{} + var instance *crd.EMQX + + var round *reconcileRound + var coreSet *appsv1.StatefulSet + var pod0, pod1 *corev1.Pod + + const ( + currentRevision string = "current" + updateRevision string = "update" + ) + + BeforeAll(func() { + ns = &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "controller-sync-core-set-test", + Labels: map[string]string{"test": "e2e"}, + }, + } + Expect(k8sClient.Create(ctx, ns)).Should(Succeed()) + }) + + AfterAll(func() { + Expect(k8sClient.Delete(ctx, ns)).Should(Succeed()) + }) + + BeforeEach(func() { + coreLabels := emqx.DefaultLabelsWith(crd.CoreLabels()) + coreSet = &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: emqx.Name + "-core", + Namespace: ns.Name, + Labels: coreLabels, + }, + Spec: appsv1.StatefulSetSpec{ + ServiceName: emqx.Name + "-core", + Replicas: ptr.To(int32(2)), + UpdateStrategy: appsv1.StatefulSetUpdateStrategy{ + Type: appsv1.OnDeleteStatefulSetStrategyType, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: coreLabels, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: coreLabels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "emqx", Image: "emqx"}, + }, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, coreSet)).Should(Succeed()) + + pod0 = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: coreSet.Name + "-0", + Namespace: ns.Name, + Labels: map[string]string{ + crd.LabelInstance: "emqx", + crd.LabelManagedBy: "emqx-operator", + crd.LabelDBRole: "core", + appsv1.ControllerRevisionHashLabelKey: currentRevision, + }, + OwnerReferences: ownerReferences(coreSet), + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "emqx", Image: "emqx"}, + }, + }, + } + pod1 = pod0.DeepCopy() + pod1.ObjectMeta.Name = coreSet.Name + "-1" + Expect(k8sClient.Create(ctx, pod0)).Should(Succeed()) + Expect(k8sClient.Create(ctx, pod1)).Should(Succeed()) + pod0.Status.Conditions = []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()}, + } + pod1.Status.Conditions = pod0.Status.Conditions + Expect(k8sClient.Status().Update(ctx, pod0)).Should(Succeed()) + Expect(k8sClient.Status().Update(ctx, pod1)).Should(Succeed()) + + coreSet.Status.Replicas = 2 + coreSet.Status.ReadyReplicas = 2 + coreSet.Status.AvailableReplicas = 2 + coreSet.Status.CurrentRevision = currentRevision + coreSet.Status.UpdateRevision = updateRevision + Expect(k8sClient.Status().Update(ctx, coreSet)).Should(Succeed()) + + instance = emqx.DeepCopy() + instance.Namespace = ns.Name + instance.Spec.CoreTemplate.Spec.Replicas = ptr.To(int32(2)) + instance.Status.CoreNodes = []crd.EMQXNode{ + {Name: "emqx@" + pod0.Name, PodName: pod0.Name, Status: "running"}, + {Name: "emqx@" + pod1.Name, PodName: pod1.Name, Status: "running"}, + } + + round = newReconcileRound() + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) + }) + + AfterEach(func() { + _ = k8sClient.Delete(ctx, pod0) + _ = k8sClient.Delete(ctx, pod1) + Expect(k8sClient.Delete(ctx, coreSet)).Should(Succeed()) + }) + + It("waits when fewer than N-1 other core pods are available (ready)", func() { + // Removing pod1 requires at least one *other* available core; pod0 is not Ready. + pod0.Status.Conditions = []corev1.PodCondition{} + Expect(k8sClient.Status().Update(ctx, pod0)).Should(Succeed()) + coreSet.Status.AvailableReplicas = 1 + Expect(k8sClient.Status().Update(ctx, coreSet)).Should(Succeed()) + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) + admission := checkCorePodRemoval(round, instance, pod1, false) + Expect(admission).Should(And( + HaveField("Action", Equal(admissionWait)), + HaveField("Reason", Equal("cores are not available yet")), + )) + }) + + It("waits while a node evacuation is still in progress", func() { + instance.Status.NodeEvacuations = []crd.NodeEvacuationStatus{ + {NodeName: "emqx@" + pod1.Name, State: "evicting_sessions"}, + } + admission := checkCorePodRemoval(round, instance, pod1, false) + Expect(admission).Should(And( + HaveField("Action", Equal(admissionWait)), + HaveField("Reason", Equal("node evacuation is still in progress")), + )) + }) + + It("node session > 0", func() { + instance.Status.CoreNodes[1].Sessions = 99999 + admission := checkCorePodRemoval(round, instance, pod1, false) + Expect(admission).Should(And( + HaveField("Action", Equal(admissionEvacuate)), + HaveField("Reason", ContainSubstring("active sessions")), + )) + }) + + It("single node session > 0", func() { + instance.Spec.CoreTemplate.Spec.Replicas = ptr.To(int32(1)) + instance.Status.CoreNodes = []crd.EMQXNode{ + {Name: "emqx@" + pod0.Name, PodName: pod0.Name, Status: "running", Sessions: 99999}, + } + admission := checkCorePodRemoval(round, instance, pod0, false) + Expect(admission).Should(And( + HaveField("Action", Equal(admissionRemove)), + HaveField("Reason", ContainSubstring("nowhere")), + )) + }) + + It("node session is 0", func() { + instance.Status.CoreNodes[1].Sessions = 0 + admission := checkCorePodRemoval(round, instance, pod1, false) + Expect(admission).To( + HaveField("Action", Equal(admissionRemove)), + ) + }) + + When("replicant replicaSet updating", func() { + BeforeEach(func() { + instance.Spec.ReplicantTemplate = &crd.EMQXReplicantTemplate{ + Spec: crd.EMQXReplicantTemplateSpec{ + Replicas: ptr.To(int32(3)), + }, + } + instance.Status.ReplicantNodesStatus = crd.ReplicantNodesStatus{ + UpdateRevision: updateRevision, + CurrentRevision: currentRevision, + } + }) + + It("should allow rolling update with multiple old cores", func() { + s := &syncCoreSet{emqxReconciler} + result := s.rollingUpdate(round, instance) + Expect(result).To(Equal(subResult{})) + _, err := actualObject(pod1) + Expect(k8sErrors.IsNotFound(err)).To(BeTrue(), "highest-ordinal outdated pod should be deleted first") + }) + + It("should block rolling update with 1 old core", func() { + // Simulate `pod1` already on new revision, while `pod0` is outdated. + pod1.Labels[appsv1.ControllerRevisionHashLabelKey] = coreSet.Status.UpdateRevision + Expect(k8sClient.Update(ctx, pod1)).Should(Succeed()) + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) + s := &syncCoreSet{emqxReconciler} + result := s.rollingUpdate(round, instance) + Expect(result).To(Equal(subResult{})) + Expect(actualObject(pod0)).To( + Not(BeNil()), + "sole remaining outdated core must stay up while replicant ReplicaSet migrates", + ) + }) + + }) + + It("DS replication site blocks scale-down", func() { + pod1.Status.Conditions = []corev1.PodCondition{ + {Type: crd.DSReplicationSite, Status: corev1.ConditionTrue}, + } + admission := checkCorePodRemoval(round, instance, pod1, true) + Expect(admission).To(And( + HaveField("Action", Equal(admissionWait)), + HaveField("Reason", ContainSubstring("DS replication site")), + )) + }) + + It("DS replication site does not block rolling update", func() { + pod1.Status.Conditions = []corev1.PodCondition{ + {Type: crd.DSReplicationSite, Status: corev1.ConditionTrue}, + } + admission := checkCorePodRemoval(round, instance, pod1, false) + Expect(admission).To( + HaveField("Action", Equal(admissionRemove)), + ) + }) +}) diff --git a/internal/controller/sync_core_set_test.go b/internal/controller/sync_core_set_test.go new file mode 100644 index 000000000..e5725c80f --- /dev/null +++ b/internal/controller/sync_core_set_test.go @@ -0,0 +1,256 @@ +package controller + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" +) + +func TestSortByOrdinal(t *testing.T) { + mkPod := func(name string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + } + } + + t.Run("fewer than 10 pods", func(t *testing.T) { + pods := []*corev1.Pod{ + mkPod("emqx-core-2"), + mkPod("emqx-core-0"), + mkPod("emqx-core-1"), + } + sortByOrdinal(pods) + assert.Equal(t, "emqx-core-0", pods[0].Name) + assert.Equal(t, "emqx-core-1", pods[1].Name) + assert.Equal(t, "emqx-core-2", pods[2].Name) + }) + + t.Run("10+ pods sort numerically not lexicographically", func(t *testing.T) { + pods := []*corev1.Pod{ + mkPod("emqx-core-9"), + mkPod("emqx-core-10"), + mkPod("emqx-core-2"), + mkPod("emqx-core-11"), + mkPod("emqx-core-0"), + mkPod("emqx-core-1"), + } + sortByOrdinal(pods) + expected := []string{ + "emqx-core-0", + "emqx-core-1", + "emqx-core-2", + "emqx-core-9", + "emqx-core-10", + "emqx-core-11", + } + got := make([]string, len(pods)) + for i, p := range pods { + got[i] = p.Name + } + assert.Equal(t, expected, got) + }) + + t.Run("pods without ordinal suffix sort first", func(t *testing.T) { + pods := []*corev1.Pod{ + mkPod("emqx-core-1"), + mkPod("emqx-core-nonum"), + mkPod("emqx-core-0"), + } + sortByOrdinal(pods) + assert.Equal(t, "emqx-core-nonum", pods[0].Name) + assert.Equal(t, "emqx-core-0", pods[1].Name) + assert.Equal(t, "emqx-core-1", pods[2].Name) + }) +} + +func TestListOutdatedPods(t *testing.T) { + const coreSetName = "emqx-core" + const coreSetUID = types.UID("test-sts-uid") + const otherOwnerUID = types.UID("other-rs-uid") + + mkCoreSet := func(currentRev, updateRev string) *appsv1.StatefulSet { + return &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: coreSetName, + Namespace: "default", + UID: coreSetUID, + }, + Status: appsv1.StatefulSetStatus{ + CurrentRevision: currentRev, + UpdateRevision: updateRev, + }, + } + } + + mkCoreSetPod := func(name string, revision ...string) *corev1.Pod { + labels := map[string]string{} + if len(revision) == 1 { + labels[appsv1.ControllerRevisionHashLabelKey] = revision[0] + } + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "default", + Labels: labels, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "apps/v1", + Kind: "StatefulSet", + Name: coreSetName, + UID: coreSetUID, + Controller: ptr.To(true), + }, + }, + }, + } + } + + mkReplicantPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "emqx-replicant-xyz", + Namespace: "default", + Labels: map[string]string{ + appsv1.ControllerRevisionHashLabelKey: "some-replicant-hash", + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "apps/v1", + Kind: "ReplicaSet", + Name: "emqx-replicant-hash", + UID: otherOwnerUID, + Controller: ptr.To(true), + }, + }, + }, + } + + t.Run("empty updateRevision yields no outdated pods", func(t *testing.T) { + sts := mkCoreSet("rev-a", "") + r := &reconcileRound{ + state: &reconcileState{ + coreSets: []*appsv1.StatefulSet{sts}, + pods: []*corev1.Pod{ + mkCoreSetPod(coreSetName+"-0", "rev-a"), + mkCoreSetPod(coreSetName+"-1", "rev-b"), + }, + }, + } + assert.Empty(t, listOutdatedPods(r)) + }) + + t.Run("standard rolling update: pods on currentRevision only", func(t *testing.T) { + sts := mkCoreSet("rev-old", "rev-new") + r := &reconcileRound{ + state: &reconcileState{ + coreSets: []*appsv1.StatefulSet{sts}, + pods: []*corev1.Pod{ + mkCoreSetPod(coreSetName+"-0", "rev-old"), + mkCoreSetPod(coreSetName+"-1", "rev-old"), + }, + }, + } + got := listOutdatedPods(r) + require.Len(t, got, 2) + assert.Equal(t, coreSetName+"-0", got[0].Name) + assert.Equal(t, coreSetName+"-1", got[1].Name) + }) + + t.Run("partial rollout: only pods not on updateRevision", func(t *testing.T) { + sts := mkCoreSet("rev-old", "rev-new") + r := &reconcileRound{ + state: &reconcileState{ + coreSets: []*appsv1.StatefulSet{sts}, + pods: []*corev1.Pod{ + mkCoreSetPod(coreSetName+"-0", "rev-new"), + mkCoreSetPod(coreSetName+"-1", "rev-old"), + }, + }, + } + got := listOutdatedPods(r) + require.Len(t, got, 1) + assert.Equal(t, coreSetName+"-1", got[0].Name) + }) + + t.Run("untracked third revision during rollout", func(t *testing.T) { + sts := mkCoreSet("rev-old", "rev-new") + r := &reconcileRound{ + state: &reconcileState{ + coreSets: []*appsv1.StatefulSet{sts}, + pods: []*corev1.Pod{ + mkCoreSetPod(coreSetName+"-0", "rev-ancient"), + mkCoreSetPod(coreSetName+"-1", "rev-old"), + }, + }, + } + got := listOutdatedPods(r) + require.Len(t, got, 2) + assert.Equal(t, coreSetName+"-0", got[0].Name) + assert.Equal(t, coreSetName+"-1", got[1].Name) + }) + + t.Run("all pods on updateRevision", func(t *testing.T) { + sts := mkCoreSet("rev-old", "rev-new") + r := &reconcileRound{ + state: &reconcileState{ + coreSets: []*appsv1.StatefulSet{sts}, + pods: []*corev1.Pod{ + mkCoreSetPod(coreSetName+"-0", "rev-new"), + mkCoreSetPod(coreSetName+"-1", "rev-new"), + }, + }, + } + assert.Empty(t, listOutdatedPods(r)) + }) + + t.Run("pods not managed by core StatefulSet are ignored", func(t *testing.T) { + sts := mkCoreSet("rev-old", "rev-new") + r := &reconcileRound{ + state: &reconcileState{ + coreSets: []*appsv1.StatefulSet{sts}, + pods: []*corev1.Pod{ + mkCoreSetPod(coreSetName+"-0", "rev-old"), + mkReplicantPod, + }, + }, + } + got := listOutdatedPods(r) + require.Len(t, got, 1) + assert.Equal(t, coreSetName+"-0", got[0].Name) + }) + + t.Run("missing controller-revision-hash label counts as outdated", func(t *testing.T) { + sts := mkCoreSet("rev-old", "rev-new") + r := &reconcileRound{ + state: &reconcileState{ + coreSets: []*appsv1.StatefulSet{sts}, + pods: []*corev1.Pod{ + mkCoreSetPod(coreSetName + "-0"), + }, + }, + } + got := listOutdatedPods(r) + require.Len(t, got, 1) + assert.Equal(t, coreSetName+"-0", got[0].Name) + }) + + t.Run("currentRevision empty but updateRevision set still lists mismatches", func(t *testing.T) { + sts := mkCoreSet("", "rev-new") + r := &reconcileRound{ + state: &reconcileState{ + coreSets: []*appsv1.StatefulSet{sts}, + pods: []*corev1.Pod{ + mkCoreSetPod(coreSetName+"-0", "rev-old"), + }, + }, + } + got := listOutdatedPods(r) + require.Len(t, got, 1) + assert.Equal(t, coreSetName+"-0", got[0].Name) + }) +} diff --git a/internal/controller/sync_core_sets.go b/internal/controller/sync_core_sets.go deleted file mode 100644 index e5e9c4123..000000000 --- a/internal/controller/sync_core_sets.go +++ /dev/null @@ -1,202 +0,0 @@ -package controller - -import ( - "fmt" - - emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" - util "github.com/emqx/emqx-operator/internal/controller/util" - "github.com/emqx/emqx-operator/internal/emqx/api" - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - "k8s.io/klog/v2" -) - -type syncCoreSets struct { - *EMQXReconciler -} - -type scaleDownCore struct { - Pod *corev1.Pod - Reason string -} - -func (s *syncCoreSets) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { - updateSts := r.state.updateCoreSet(instance) - currentSts := r.state.currentCoreSet(instance) - if updateSts == nil || currentSts == nil { - return subResult{} - } - if updateSts.UID != currentSts.UID { - return s.migrateSet(r, instance, currentSts) - } - return s.scaleDownSet(r, instance, currentSts) -} - -// Orchestrates gradual scale down of the old statefulSet, by migrating workloads to the new statefulSet. -func (s *syncCoreSets) migrateSet( - r *reconcileRound, - instance *crdv2.EMQX, - current *appsv1.StatefulSet, -) subResult { - admission, err := s.chooseScaleDownCore(r, instance, current) - if err != nil { - return subResult{err: err} - } - if admission.Pod != nil { - r.log.V(1).Info("migrating coreSet", "pod", klog.KObj(admission.Pod), "statefulSet", klog.KObj(current)) - - *current.Spec.Replicas = *current.Spec.Replicas - 1 - if err := s.Client.Update(r.ctx, current); err != nil { - return subResult{err: emperror.Wrap(err, "failed to scale down old statefulSet")} - } - } - if admission.Reason != "" { - r.log.V(1).Info("migrate coreSet skipped", "reason", admission.Reason, "statefulSet", klog.KObj(current)) - } - return subResult{} -} - -// Scale up or down the existing statefulSet. -func (s *syncCoreSets) scaleDownSet( - r *reconcileRound, - instance *crdv2.EMQX, - current *appsv1.StatefulSet, -) subResult { - desiredReplicas := *instance.Spec.CoreTemplate.Spec.Replicas - currentReplicas := *current.Spec.Replicas - - if currentReplicas < desiredReplicas { - r.log.V(1).Info("scaling up coreSet", "statefulSet", klog.KObj(current), "desiredReplicas", desiredReplicas) - *current.Spec.Replicas = desiredReplicas - if err := s.Client.Update(r.ctx, current); err != nil { - return subResult{err: emperror.Wrap(err, "failed to scale up statefulSet")} - } - return subResult{} - } - - if currentReplicas > desiredReplicas { - admission, err := s.chooseScaleDownCore(r, instance, current) - if err != nil { - return subResult{err: err} - } - if admission.Pod != nil { - r.log.V(1).Info("scaling down coreSet", "pod", klog.KObj(admission.Pod), "statefulSet", klog.KObj(current)) - *current.Spec.Replicas = *current.Spec.Replicas - 1 - if err := s.Client.Update(r.ctx, current); err != nil { - return subResult{err: emperror.Wrap(err, "failed to scale down statefulSet")} - } - return subResult{} - } - if admission.Reason != "" { - r.log.V(1).Info("scale down coreSet skipped", "reason", admission.Reason, "statefulSet", klog.KObj(current)) - } - } - - return subResult{} -} - -func (s *syncCoreSets) chooseScaleDownCore( - r *reconcileRound, - instance *crdv2.EMQX, - current *appsv1.StatefulSet, -) (scaleDownCore, error) { - // Disallow scaling down the statefulSet if replcants replicaSet is still updating. - status := &instance.Status - if instance.Spec.HasReplicants() { - if status.ReplicantNodesStatus.CurrentRevision != status.ReplicantNodesStatus.UpdateRevision { - return scaleDownCore{Reason: "replicant replicaSet is still updating"}, nil - } - } - - if !checkInitialDelaySecondsReady(instance) { - return scaleDownCore{Reason: "instance is not ready"}, nil - } - - if len(status.NodeEvacuationsStatus) > 0 { - if status.NodeEvacuationsStatus[0].State != "prohibiting" { - return scaleDownCore{Reason: "node evacuation is still in progress"}, nil - } - } - - // List the pods managed by the current coreSet. - pods := r.state.podsManagedBy(current) - sortByName(pods) - - // No more pods, no need to scale down. - if len(pods) == 0 { - return scaleDownCore{Reason: "no more pods"}, nil - } - - // Get the pod to be scaled down next. - scaleDownPod := pods[len(pods)-1] - - // Disallow scaling down the pod that is already being deleted. - if scaleDownPod.DeletionTimestamp != nil { - return scaleDownCore{Reason: fmt.Sprintf("pod %s deletion in progress", scaleDownPod.Name)}, nil - } - - // Disallow scaling down the pod that is still a DS replication site. - dsCondition := util.FindPodCondition(scaleDownPod, crdv2.DSReplicationSite) - if dsCondition != nil && dsCondition.Status != corev1.ConditionFalse { - return scaleDownCore{Reason: fmt.Sprintf("pod %s is still a DS replication site", scaleDownPod.Name)}, nil - } - - // Get the node info of the pod to be scaled down. - var scaleDownNode *crdv2.EMQXNode - for _, node := range instance.Status.CoreNodes { - if node.PodName == scaleDownPod.Name { - scaleDownNode = &node - break - } - } - - // If the cluster lacks information about the node, there's very likely nothing to migrate. - if scaleDownNode == nil { - return scaleDownCore{Pod: scaleDownPod, Reason: "node is out of cluster"}, nil - } - - // Scale down the node that is already stopped. - if scaleDownNode.Status == "stopped" { - return scaleDownCore{Pod: scaleDownPod, Reason: "node is already stopped"}, nil - } - - // Disallow scaling down the node that has at least one session. - if scaleDownNode.Sessions > 0 { - nodeName := scaleDownNode.Name - strategy := instance.Spec.UpdateStrategy.EvacuationStrategy - migrateTo := migrationTargetNodes(r, instance) - if len(migrateTo) == 0 { - return scaleDownCore{Reason: fmt.Sprintf("no nodes to migrate %s to", nodeName)}, nil - } - err := api.StartEvacuation(r.oldestCoreRequester(), strategy, migrateTo, nodeName) - if err != nil { - return scaleDownCore{}, emperror.Wrap(err, "failed to start node evacuation") - } - s.EventRecorder.Event(instance, corev1.EventTypeNormal, "NodeEvacuation", fmt.Sprintf("Node %s evacuation started", nodeName)) - return scaleDownCore{Reason: fmt.Sprintf("node %s needs to be evacuated", nodeName)}, nil - } - - return scaleDownCore{Pod: scaleDownPod}, nil -} - -// Returns the list of nodes to migrate workloads to. -func migrationTargetNodes(r *reconcileRound, instance *crdv2.EMQX) []string { - targets := []string{} - if instance.Spec.HasReplicants() { - for _, node := range instance.Status.ReplicantNodes { - pod := r.state.podWithName(node.PodName) - if pod != nil && r.state.partOfUpdateSet(pod, instance) { - targets = append(targets, node.Name) - } - } - } else { - for _, node := range instance.Status.CoreNodes { - pod := r.state.podWithName(node.PodName) - if pod != nil && r.state.partOfUpdateSet(pod, instance) { - targets = append(targets, node.Name) - } - } - } - return targets -} diff --git a/internal/controller/sync_emqx_config.go b/internal/controller/sync_emqx_config.go index bc12d2735..935d9f5cd 100644 --- a/internal/controller/sync_emqx_config.go +++ b/internal/controller/sync_emqx_config.go @@ -4,7 +4,7 @@ import ( "fmt" emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" config "github.com/emqx/emqx-operator/internal/controller/config" resources "github.com/emqx/emqx-operator/internal/controller/resources" "github.com/emqx/emqx-operator/internal/emqx/api" @@ -18,7 +18,7 @@ type syncConfig struct { *EMQXReconciler } -func (s *syncConfig) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { +func (s *syncConfig) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { // Fetch desired / applied configuration. confSpec := instance.Spec.Config.Data confLast := lastAppliedConfig(instance) @@ -38,16 +38,16 @@ func (s *syncConfig) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResul if err != nil && k8sErrors.IsNotFound(err) { configMap = resource.ConfigMap(confWithDefaults) if err := ctrl.SetControllerReference(instance, configMap, s.Scheme); err != nil { - return subResult{err: emperror.Wrap(err, "failed to set controller reference for configMap")} + return reconcileError(emperror.Wrap(err, "failed to set controller reference for configMap")) } r.log.V(1).Info("creating config resource", "configMap", klog.KObj(configMap)) if err := s.Client.Create(r.ctx, configMap); err != nil { - return subResult{err: emperror.Wrap(err, "failed to create configMap")} + return reconcileError(emperror.Wrap(err, "failed to create configMap")) } return subResult{} } if err != nil { - return subResult{err: emperror.Wrap(err, "failed to get configMap")} + return reconcileError(emperror.Wrap(err, "failed to get configMap")) } // If the config is different, update the config right away. @@ -55,11 +55,11 @@ func (s *syncConfig) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResul if configMap.Data[resources.BaseConfigFile] != confWithDefaults { configMap = resource.ConfigMap(confWithDefaults) if err := ctrl.SetControllerReference(instance, configMap, s.Scheme); err != nil { - return subResult{err: emperror.Wrap(err, "failed to set controller reference for configMap")} + return reconcileError(emperror.Wrap(err, "failed to set controller reference for configMap")) } r.log.V(1).Info("updating config resource", "configMap", klog.KObj(configMap)) if err := s.Client.Update(r.ctx, configMap); err != nil { - return subResult{err: emperror.Wrap(err, "failed to update configMap")} + return reconcileError(emperror.Wrap(err, "failed to update configMap")) } if len(stripped) > 0 { s.EventRecorder.Event( @@ -80,9 +80,10 @@ func (s *syncConfig) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResul return subResult{} } - // Postpone runtime config updates until ready. - if !instance.Status.IsConditionTrue(crdv2.CoreNodesReady) { - return subResult{} + // Postpone runtime config updates until at least one core is available. + coreSet := r.state.coreSet() + if coreSet.Status.ReadyReplicas == 0 { + return reconcilePostpone() } // If the annotation is set, and the config is different, update the config. @@ -90,7 +91,7 @@ func (s *syncConfig) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResul // Delete readonly configs c, err := config.EMQXConfig(confWithDefaults) if err != nil || c == nil { - return subResult{err: emperror.Wrap(err, "failed to parse .spec.config.data")} + return reconcileError(emperror.Wrap(err, "failed to parse .spec.config.data")) } strippedReadonly := c.StripReadOnlyConfig() confRuntime := c.Print() @@ -98,7 +99,7 @@ func (s *syncConfig) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResul // Update the config through API r.log.V(1).Info("applying runtime config", "config", confRuntime) if err := api.UpdateConfigs(r.oldestCoreRequester(), instance.Spec.Config.Mode, confRuntime); err != nil { - return subResult{err: emperror.Wrap(err, "failed to update emqx config through API")} + return reconcileError(emperror.Wrap(err, "failed to update emqx config through API")) } if len(strippedReadonly) > 0 { s.EventRecorder.Event( @@ -110,11 +111,11 @@ func (s *syncConfig) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResul reflectLastAppliedConfig(instance, conf) if err := s.Client.Update(r.ctx, instance); err != nil { - return subResult{err: emperror.Wrap(err, "failed to update emqx instance annotation")} + return reconcileError(emperror.Wrap(err, "failed to update emqx instance annotation")) } // Restart reconciliation loop with consistent reconcile state. - return subResult{result: ctrl.Result{Requeue: true}} + return reconcileRequeue() } return subResult{} @@ -150,23 +151,23 @@ func stripNonChangeableConfig(confDesired string, confLast string) (string, []st return confDesired, stripped } -func reflectLastAppliedConfig(instance *crdv2.EMQX, confStr string) { +func reflectLastAppliedConfig(instance *crd.EMQX, confStr string) { if instance.Annotations == nil { instance.Annotations = map[string]string{} } - instance.Annotations[crdv2.AnnotationLastEMQXConfig] = confStr + instance.Annotations[crd.AnnotationLastEMQXConfig] = confStr } -func lastAppliedConfig(instance *crdv2.EMQX) *string { +func lastAppliedConfig(instance *crd.EMQX) *string { if instance.Annotations != nil { - if confStr := instance.Annotations[crdv2.AnnotationLastEMQXConfig]; confStr != "" { + if confStr := instance.Annotations[crd.AnnotationLastEMQXConfig]; confStr != "" { return &confStr } } return nil } -func applicableConfig(instance *crdv2.EMQX) string { +func applicableConfig(instance *crd.EMQX) string { // If the annotation is set, use it: most of the time it's the config currently in use. if confStr := lastAppliedConfig(instance); confStr != nil { return config.WithDefaults(*confStr) diff --git a/internal/controller/sync_pods_suite_test.go b/internal/controller/sync_pods_suite_test.go deleted file mode 100644 index 173dcf14c..000000000 --- a/internal/controller/sync_pods_suite_test.go +++ /dev/null @@ -1,579 +0,0 @@ -package controller - -import ( - "time" - - crdv2 "github.com/emqx/emqx-operator/api/v2" - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/ptr" - "sigs.k8s.io/controller-runtime/pkg/client" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -const currentRevision string = "current" -const updateRevision string = "update" - -func actualize(instance client.Object) (client.Object, error) { - err := k8sClient.Get(ctx, client.ObjectKeyFromObject(instance), instance) - return instance, err -} - -var _ = Describe("Reconciler syncPods", Ordered, func() { - var ns *corev1.Namespace = &corev1.Namespace{} - var instance *crdv2.EMQX - - var sr *syncReplicantSets - var sc *syncCoreSets - var round *reconcileRound - - var updateCoreSet, currentCoreSet *appsv1.StatefulSet - var updateReplicantSet, currentReplicantSet *appsv1.ReplicaSet - var currentCorePod, currentReplicantPod *corev1.Pod - - BeforeAll(func() { - // Create namespace: - ns = &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: "controller-v2beta1-sync-pods-suite-test", - Labels: map[string]string{ - "test": "e2e", - }, - }, - } - Expect(k8sClient.Create(ctx, ns)).Should(Succeed()) - // Set up "update" coreSet: - instance = emqx.DeepCopy() - updateCoreLabels := instance.DefaultLabelsWith( - crdv2.CoreLabels(), - map[string]string{crdv2.LabelPodTemplateHash: updateRevision}, - ) - updateCoreSet = &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: instance.Name + "-", - Namespace: ns.Name, - Labels: updateCoreLabels, - }, - Spec: appsv1.StatefulSetSpec{ - Replicas: ptr.To(int32(1)), - Selector: &metav1.LabelSelector{ - MatchLabels: updateCoreLabels, - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: updateCoreLabels, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Name: "emqx", Image: "emqx"}, - }, - }, - }, - }, - } - // Set up "current" coreSet: - currentCoreSet = updateCoreSet.DeepCopy() - currentCoreSet.Labels[crdv2.LabelPodTemplateHash] = currentRevision - currentCoreSet.Spec.Selector.MatchLabels[crdv2.LabelPodTemplateHash] = currentRevision - currentCoreSet.Spec.Template.Labels[crdv2.LabelPodTemplateHash] = currentRevision - // Set up "update" replicantSet: - updateReplicantLabels := instance.DefaultLabelsWith( - crdv2.ReplicantLabels(), - map[string]string{crdv2.LabelPodTemplateHash: updateRevision}, - ) - updateReplicantSet = &appsv1.ReplicaSet{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: instance.Name + "-", - Namespace: ns.Name, - Labels: updateReplicantLabels, - }, - Spec: appsv1.ReplicaSetSpec{ - Replicas: ptr.To(int32(1)), - Selector: &metav1.LabelSelector{ - MatchLabels: updateReplicantLabels, - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: updateReplicantLabels, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Name: "emqx", Image: "emqx"}, - }, - }, - }, - }, - } - // Set up "current" replicantSet: - currentReplicantSet = updateReplicantSet.DeepCopy() - currentReplicantSet.Labels[crdv2.LabelPodTemplateHash] = currentRevision - currentReplicantSet.Spec.Selector.MatchLabels[crdv2.LabelPodTemplateHash] = currentRevision - currentReplicantSet.Spec.Template.Labels[crdv2.LabelPodTemplateHash] = currentRevision - // Create resources: - Expect(k8sClient.Create(ctx, updateCoreSet)).Should(Succeed()) - Expect(k8sClient.Create(ctx, updateReplicantSet)).Should(Succeed()) - Expect(k8sClient.Create(ctx, currentCoreSet)).Should(Succeed()) - Expect(k8sClient.Create(ctx, currentReplicantSet)).Should(Succeed()) - // Create "current" coreSet pod: - currentCorePod = &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: currentCoreSet.Name + "-0", - Namespace: currentCoreSet.Namespace, - Labels: currentCoreSet.Spec.Template.Labels, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "apps/v1", - Kind: "StatefulSet", - Name: currentCoreSet.Name, - UID: currentCoreSet.UID, - Controller: ptr.To(true), - }, - }, - }, - Spec: currentCoreSet.Spec.Template.Spec, - } - Expect(k8sClient.Create(ctx, currentCorePod)).Should(Succeed()) - // Create "current" replicantSet pod: - currentReplicantPod = &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: currentReplicantSet.Name + "-", - Namespace: currentReplicantSet.Namespace, - Labels: currentReplicantSet.Spec.Template.Labels, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "apps/v1", - Kind: "ReplicaSet", - Name: currentReplicantSet.Name, - UID: currentReplicantSet.UID, - Controller: ptr.To(true), - }, - }, - }, - Spec: currentReplicantSet.Spec.Template.Spec, - } - Expect(k8sClient.Create(ctx, currentReplicantPod)).Should(Succeed()) - // Mock resource status: - updateCoreSet.Status.Replicas = 1 - updateCoreSet.Status.ReadyReplicas = 1 - updateReplicantSet.Status.Replicas = 1 - updateReplicantSet.Status.ReadyReplicas = 1 - currentCoreSet.Status.Replicas = 1 - currentCoreSet.Status.ReadyReplicas = 1 - currentReplicantSet.Status.Replicas = 1 - currentReplicantSet.Status.ReadyReplicas = 1 - Expect(k8sClient.Status().Update(ctx, updateCoreSet)).Should(Succeed()) - Expect(k8sClient.Status().Update(ctx, updateReplicantSet)).Should(Succeed()) - Expect(k8sClient.Status().Update(ctx, currentCoreSet)).Should(Succeed()) - Expect(k8sClient.Status().Update(ctx, currentReplicantSet)).Should(Succeed()) - }) - - AfterAll(func() { - Expect(k8sClient.DeleteAllOf(ctx, &corev1.Pod{}, client.InNamespace(ns.Name))).Should(Succeed()) - Expect(k8sClient.DeleteAllOf(ctx, &appsv1.ReplicaSet{}, client.InNamespace(ns.Name))).Should(Succeed()) - Expect(k8sClient.DeleteAllOf(ctx, &appsv1.StatefulSet{}, client.InNamespace(ns.Name))).Should(Succeed()) - Expect(k8sClient.Delete(ctx, ns)).Should(Succeed()) - }) - - BeforeEach(func() { - // Mock instance state: - instance = emqx.DeepCopy() - instance.Namespace = ns.Name - instance.Spec.ReplicantTemplate = &crdv2.EMQXReplicantTemplate{ - Spec: crdv2.EMQXReplicantTemplateSpec{ - Replicas: ptr.To(int32(1)), - }, - } - instance.Status = crdv2.EMQXStatus{ - Conditions: []metav1.Condition{ - { - Type: crdv2.Available, - Status: metav1.ConditionTrue, - LastTransitionTime: metav1.Time{Time: time.Now().AddDate(0, 0, -1)}, - }, - }, - CoreNodesStatus: crdv2.EMQXNodesStatus{ - UpdateRevision: updateRevision, - UpdateReplicas: 1, - CurrentRevision: currentRevision, - CurrentReplicas: 1, - ReadyReplicas: 2, - Replicas: 1, - }, - CoreNodes: []crdv2.EMQXNode{ - {Name: "emqx@" + currentCorePod.Name, PodName: currentCorePod.Name, Status: "running"}, - }, - ReplicantNodesStatus: crdv2.EMQXNodesStatus{ - UpdateRevision: updateRevision, - UpdateReplicas: 1, - CurrentRevision: currentRevision, - CurrentReplicas: 1, - ReadyReplicas: 2, - Replicas: 1, - }, - ReplicantNodes: []crdv2.EMQXNode{ - {Name: "emqx@10.0.0.1", PodName: currentReplicantPod.Name, Status: "running"}, - }, - } - // Instantiate reconciler: - sc = &syncCoreSets{emqxReconciler} - sr = &syncReplicantSets{emqxReconciler} - round = newReconcileRound() - round.state = loadReconcileState(ctx, k8sClient, instance) - }) - - It("running update emqx node controller", func() { - Eventually(func() *crdv2.EMQX { - _ = sc.reconcile(round, instance) - _ = sr.reconcile(round, instance) - return instance - }).WithTimeout(timeout).WithPolling(interval).Should(And( - // should add pod deletion cost - WithTransform( - func(*crdv2.EMQX) (client.Object, error) { return actualize(currentReplicantPod) }, - HaveField("Annotations", HaveKeyWithValue("controller.kubernetes.io/pod-deletion-cost", "-99999")), - ), - // should scale down rs - WithTransform( - func(*crdv2.EMQX) (client.Object, error) { return actualize(currentReplicantSet) }, - HaveField("Spec.Replicas", HaveValue(BeEquivalentTo(0))), - ), - // before rs not ready, do nothing for sts - WithTransform( - func(*crdv2.EMQX) (client.Object, error) { return actualize(currentCoreSet) }, - HaveField("Spec.Replicas", HaveValue(BeEquivalentTo(1))), - ), - )) - - By("mock rs ready, should scale down sts") - instance.Status.ReplicantNodesStatus.CurrentRevision = instance.Status.ReplicantNodesStatus.UpdateRevision - Eventually(func() *crdv2.EMQX { - _ = sc.reconcile(round, instance) - _ = sr.reconcile(round, instance) - return instance - }).WithTimeout(timeout).WithPolling(interval).Should( - WithTransform( - func(*crdv2.EMQX) (client.Object, error) { return actualize(currentCoreSet) }, - HaveField("Spec.Replicas", HaveValue(BeEquivalentTo(0))), - ), - ) - }) - -}) - -var _ = Describe("Reconciler syncCoreSets", Ordered, func() { - var ns *corev1.Namespace = &corev1.Namespace{} - var instance *crdv2.EMQX - - var s *syncCoreSets - var round *reconcileRound - var current *appsv1.StatefulSet - var currentPod *corev1.Pod - - BeforeAll(func() { - // Create namespace: - ns = &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: "controller-v2beta1-sync-core-sets-test", - Labels: map[string]string{"test": "e2e"}, - }, - } - Expect(k8sClient.Create(ctx, ns)).Should(Succeed()) - // Create "current" coreSet: - instance = emqx.DeepCopy() - currentLabels := instance.DefaultLabelsWith( - crdv2.CoreLabels(), - map[string]string{crdv2.LabelPodTemplateHash: "fake"}, - ) - current = &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: instance.Name + "-fake", - Namespace: ns.Name, - Labels: currentLabels, - }, - Spec: appsv1.StatefulSetSpec{ - ServiceName: instance.Name + "-fake", - Replicas: ptr.To(int32(1)), - Selector: &metav1.LabelSelector{ - MatchLabels: currentLabels, - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: currentLabels, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Name: "emqx", Image: "emqx"}, - }, - }, - }, - }, - } - Expect(k8sClient.Create(ctx, current)).Should(Succeed()) - // Create "current" coreSet pod: - currentPod = &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: current.Name + "-0", - Namespace: ns.Name, - Labels: current.Spec.Template.Labels, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "apps/v1", - Kind: "StatefulSet", - Name: current.Name, - UID: current.UID, - Controller: ptr.To(true), - }, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Name: "emqx", Image: "emqx"}, - }, - }, - } - Expect(k8sClient.Create(ctx, currentPod)).Should(Succeed()) - }) - - AfterAll(func() { - Expect(k8sClient.DeleteAllOf(ctx, &corev1.Pod{}, client.InNamespace(ns.Name))).Should(Succeed()) - Expect(k8sClient.DeleteAllOf(ctx, &appsv1.StatefulSet{}, client.InNamespace(ns.Name))).Should(Succeed()) - Expect(k8sClient.Delete(ctx, ns)).Should(Succeed()) - }) - - BeforeEach(func() { - // Mock instance state: - instance = emqx.DeepCopy() - instance.Namespace = ns.Name - instance.Status.CoreNodesStatus.CurrentRevision = "fake" - instance.Status.Conditions = []metav1.Condition{ - { - Type: crdv2.Available, - Status: metav1.ConditionTrue, - LastTransitionTime: metav1.Time{Time: time.Now().AddDate(0, 0, -1)}, - }, - } - instance.Status.CoreNodes = []crdv2.EMQXNode{ - {Name: "emqx@" + currentPod.Name, PodName: currentPod.Name, Status: "running"}, - } - // Instantiate reconciler: - s = &syncCoreSets{emqxReconciler} - round = newReconcileRound() - round.state = loadReconcileState(ctx, k8sClient, instance) - }) - - It("emqx is not available", func() { - instance.Status.Conditions = []metav1.Condition{} - admission, err := s.chooseScaleDownCore(round, instance, current) - Expect(err).ShouldNot(HaveOccurred()) - Expect(admission).Should(And( - HaveField("Reason", ContainSubstring("not ready")), - HaveField("Pod", BeNil()), - )) - }) - - It("emqx is available / initial delay has not passed", func() { - instance.Spec.UpdateStrategy.InitialDelaySeconds = 99999999 - admission, err := s.chooseScaleDownCore(round, instance, current) - Expect(err).ShouldNot(HaveOccurred()) - Expect(admission).Should(And( - HaveField("Reason", ContainSubstring("not ready")), - HaveField("Pod", BeNil()), - )) - }) - - It("replicaSet is not ready", func() { - instance.Spec.ReplicantTemplate = &crdv2.EMQXReplicantTemplate{ - Spec: crdv2.EMQXReplicantTemplateSpec{ - Replicas: ptr.To(int32(3)), - }, - } - instance.Status.ReplicantNodesStatus = crdv2.EMQXNodesStatus{ - UpdateRevision: updateRevision, - CurrentRevision: currentRevision, - } - admission, err := s.chooseScaleDownCore(round, instance, current) - Expect(err).ShouldNot(HaveOccurred()) - Expect(admission).Should(And( - HaveField("Reason", ContainSubstring("replicaSet")), - HaveField("Pod", BeNil()), - )) - Eventually(s.reconcile).WithArguments(newReconcileRound(), instance). - WithTimeout(timeout). - WithPolling(interval). - Should(Equal(subResult{})) - }) - - It("node session > 0", func() { - instance.Status.CoreNodes[0].Sessions = 99999 - admission, err := s.chooseScaleDownCore(round, instance, current) - Expect(err).ShouldNot(HaveOccurred()) - Expect(admission).Should(And( - HaveField("Reason", Not(BeEmpty())), - HaveField("Pod", BeNil()), - )) - }) - - It("node session is 0", func() { - instance.Status.CoreNodes[0].Sessions = 0 - admission, err := s.chooseScaleDownCore(round, instance, current) - Expect(err).ShouldNot(HaveOccurred()) - Expect(admission).Should(And( - HaveField("Reason", BeEmpty()), - HaveField("Pod", Not(BeNil())), - )) - }) -}) - -var _ = Describe("Reconciler syncReplicantSets", Ordered, func() { - var ns *corev1.Namespace = &corev1.Namespace{} - var instance *crdv2.EMQX - - var s *syncReplicantSets - var round *reconcileRound - var current *appsv1.ReplicaSet - var currentPod *corev1.Pod - - BeforeAll(func() { - // Create namespace: - ns = &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: "controller-v2beta1-sync-replicant-sets-test", - Labels: map[string]string{ - "test": "e2e", - }, - }, - } - Expect(k8sClient.Create(ctx, ns)).Should(Succeed()) - // Create "current" replicaSet: - instance = emqx.DeepCopy() - current = &appsv1.ReplicaSet{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: instance.Name + "-", - Namespace: ns.Name, - Labels: instance.DefaultLabelsWith(crdv2.ReplicantLabels()), - }, - Spec: appsv1.ReplicaSetSpec{ - Replicas: ptr.To(int32(1)), - Selector: &metav1.LabelSelector{ - MatchLabels: instance.DefaultLabelsWith(crdv2.ReplicantLabels()), - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: instance.DefaultLabelsWith(crdv2.ReplicantLabels()), - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Name: "emqx", Image: "emqx"}, - }, - }, - }, - }, - } - Expect(k8sClient.Create(ctx, current)).Should(Succeed()) - // Create "current" replicaSet pod: - currentPod = &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: current.Name + "-", - Namespace: ns.Name, - Labels: current.Spec.Selector.MatchLabels, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "apps/v1", - Kind: "ReplicaSet", - Name: current.Name, - UID: current.UID, - Controller: ptr.To(true), - }, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Name: "emqx", Image: "emqx"}, - }, - }, - } - Expect(k8sClient.Create(ctx, currentPod)).Should(Succeed()) - }) - - AfterAll(func() { - Expect(k8sClient.DeleteAllOf(ctx, &corev1.Pod{}, client.InNamespace(ns.Name))).Should(Succeed()) - Expect(k8sClient.DeleteAllOf(ctx, &appsv1.ReplicaSet{}, client.InNamespace(ns.Name))).Should(Succeed()) - Expect(k8sClient.Delete(ctx, ns)).Should(Succeed()) - }) - - BeforeEach(func() { - // Mock instance state: - instance = emqx.DeepCopy() - instance.Namespace = ns.Name - instance.Status.ReplicantNodesStatus.CurrentRevision = "fake" - instance.Status.Conditions = []metav1.Condition{ - { - Type: crdv2.Available, - Status: metav1.ConditionTrue, - LastTransitionTime: metav1.Time{Time: time.Now().AddDate(0, 0, -1)}, - }, - } - instance.Status.ReplicantNodes = []crdv2.EMQXNode{ - {Name: "emqx@10.0.0.1", PodName: currentPod.Name, Status: "running"}, - } - // Instantiate reconciler: - s = &syncReplicantSets{emqxReconciler} - round = newReconcileRound() - round.state = loadReconcileState(ctx, k8sClient, instance) - }) - - It("emqx is not available", func() { - instance.Status.Conditions = []metav1.Condition{} - admission, err := s.chooseScaleDownReplicant(round, instance, current) - Expect(err).ShouldNot(HaveOccurred()) - Expect(admission).Should(And( - HaveField("Reason", ContainSubstring("not ready")), - HaveField("Pod", BeNil()), - )) - }) - - It("emqx is available / initial delay has not passed", func() { - instance.Spec.UpdateStrategy.InitialDelaySeconds = 99999999 - admission, err := s.chooseScaleDownReplicant(round, instance, current) - Expect(err).ShouldNot(HaveOccurred()) - Expect(admission).Should(And( - HaveField("Reason", ContainSubstring("not ready")), - HaveField("Pod", BeNil()), - )) - }) - - It("emqx is in node evacuations", func() { - instance.Status.NodeEvacuationsStatus = []crdv2.NodeEvacuationStatus{ - {State: "fake"}, - } - admission, err := s.chooseScaleDownReplicant(round, instance, current) - Expect(err).ShouldNot(HaveOccurred()) - Expect(admission).Should(And( - HaveField("Reason", ContainSubstring("evacuation")), - HaveField("Pod", BeNil()), - )) - }) - - It("node session > 0", func() { - instance.Status.ReplicantNodes[0].Sessions = 99999 - admission, err := s.chooseScaleDownReplicant(round, instance, current) - Expect(err).ShouldNot(HaveOccurred()) - Expect(admission).Should(And( - HaveField("Reason", Not(BeEmpty())), - HaveField("Pod", BeNil()), - )) - }) - - It("node session is 0", func() { - instance.Status.ReplicantNodes[0].Sessions = 0 - admission, err := s.chooseScaleDownReplicant(round, instance, current) - Expect(err).ShouldNot(HaveOccurred()) - Expect(admission).Should(And( - HaveField("Reason", BeEmpty()), - HaveField("Pod", Not(BeNil())), - )) - }) -}) diff --git a/internal/controller/sync_replicant_sets.go b/internal/controller/sync_replicant_sets.go index f29407526..f0579f094 100644 --- a/internal/controller/sync_replicant_sets.go +++ b/internal/controller/sync_replicant_sets.go @@ -4,7 +4,7 @@ import ( "fmt" emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" util "github.com/emqx/emqx-operator/internal/controller/util" "github.com/emqx/emqx-operator/internal/emqx/api" appsv1 "k8s.io/api/apps/v1" @@ -21,7 +21,7 @@ type scaleDownReplicant struct { Reason string } -func (s *syncReplicantSets) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { +func (s *syncReplicantSets) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { updateRs := r.state.updateReplicantSet(instance) currentRs := r.state.currentReplicantSet(instance) if updateRs == nil || currentRs == nil { @@ -36,7 +36,7 @@ func (s *syncReplicantSets) reconcile(r *reconcileRound, instance *crdv2.EMQX) s // Orchestrates gradual scale down of the old replicaSet, by migrating workloads to the new replicaSet. func (s *syncReplicantSets) migrateSet( r *reconcileRound, - instance *crdv2.EMQX, + instance *crd.EMQX, current *appsv1.ReplicaSet, ) subResult { admission, err := s.chooseScaleDownReplicant(r, instance, current) @@ -70,16 +70,16 @@ func (s *syncReplicantSets) migrateSet( func (s *syncReplicantSets) chooseScaleDownReplicant( r *reconcileRound, - instance *crdv2.EMQX, + instance *crd.EMQX, current *appsv1.ReplicaSet, ) (scaleDownReplicant, error) { var scaleDownPod *corev1.Pod - var scaleDownNode *crdv2.EMQXNode + var scaleDownNode *crd.EMQXNode status := &instance.Status - // Disallow scaling down the replicaSet if the instance just recently became ready. - if !checkInitialDelaySecondsReady(instance) { - return scaleDownReplicant{Reason: "instance is not ready"}, nil + // Disallow scaling down the replicaSet if replicants just recently became ready. + if !r.state.areReplicantsAvailable(instance) { + return scaleDownReplicant{Reason: "replicants are not available yet"}, nil } // Nothing to do if the replicaSet has no pods. @@ -99,9 +99,9 @@ func (s *syncReplicantSets) chooseScaleDownReplicant( } } - if len(status.NodeEvacuationsStatus) > 0 { - evacuatingNode := status.NodeEvacuationsStatus[0] - if evacuatingNode.State != "prohibiting" { + if len(status.NodeEvacuations) > 0 { + evacuatingNode := status.NodeEvacuations[0] + if evacuatingNode.State != api.EvacuationStateProhibiting { return scaleDownReplicant{Reason: fmt.Sprintf("node %s evacuation in progress", evacuatingNode.NodeName)}, nil } for _, node := range status.ReplicantNodes { @@ -127,14 +127,14 @@ func (s *syncReplicantSets) chooseScaleDownReplicant( return scaleDownReplicant{}, emperror.Errorf("node is missing for pod %s", scaleDownPod.Name) } // If the pod is already stopped, return it. - if scaleDownNode.Status == "stopped" { + if scaleDownNode.Status == api.NodeStatusStopped { return scaleDownReplicant{Pod: scaleDownPod, Reason: "pod is already stopped"}, nil } } // Disallow scaling down the pod that is still a DS replication site. // While replicants are not supposed to be DS replication sites, check it for safety. - dsCondition := util.FindPodCondition(scaleDownPod, crdv2.DSReplicationSite) + dsCondition := util.FindPodCondition(scaleDownPod, crd.DSReplicationSite) if dsCondition != nil && dsCondition.Status != corev1.ConditionFalse { return scaleDownReplicant{Reason: fmt.Sprintf("pod %s is still a DS replication site", scaleDownPod.Name)}, nil } diff --git a/internal/controller/sync_replicant_sets_suite_test.go b/internal/controller/sync_replicant_sets_suite_test.go new file mode 100644 index 000000000..0596ad1aa --- /dev/null +++ b/internal/controller/sync_replicant_sets_suite_test.go @@ -0,0 +1,392 @@ +package controller + +import ( + crd "github.com/emqx/emqx-operator/api/v3alpha1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Reconciler syncReplicantSets", Ordered, func() { + var ns *corev1.Namespace = &corev1.Namespace{} + var instance *crd.EMQX + + var s *syncReplicantSets + var round *reconcileRound + + var coreSet *appsv1.StatefulSet + var update, current *appsv1.ReplicaSet + var currentReplicantPod *corev1.Pod + + const ( + currentRevision string = "current" + updateRevision string = "update" + ) + + BeforeAll(func() { + ns = &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "controller-sync-replicant-sets-suite-test", + Labels: map[string]string{ + "test": "e2e", + }, + }, + } + Expect(k8sClient.Create(ctx, ns)).Should(Succeed()) + + instance = emqx.DeepCopy() + coreLabels := instance.DefaultLabelsWith(crd.CoreLabels()) + coreSet = &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: instance.CoreNamespacedName().Name, + Namespace: ns.Name, + Labels: coreLabels, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: ptr.To(int32(1)), + ServiceName: instance.HeadlessServiceNamespacedName().Name, + UpdateStrategy: appsv1.StatefulSetUpdateStrategy{ + Type: appsv1.OnDeleteStatefulSetStrategyType, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: coreLabels, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: coreLabels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "emqx", Image: "emqx"}, + }, + }, + }, + }, + } + + updateReplicantLabels := instance.DefaultLabelsWith( + crd.ReplicantLabels(), + map[string]string{crd.LabelPodTemplateHash: updateRevision}, + ) + update = &appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: instance.Name + "-", + Namespace: ns.Name, + Labels: updateReplicantLabels, + }, + Spec: appsv1.ReplicaSetSpec{ + Replicas: ptr.To(int32(1)), + Selector: &metav1.LabelSelector{ + MatchLabels: updateReplicantLabels, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: updateReplicantLabels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "emqx", Image: "emqx"}, + }, + }, + }, + }, + } + current = update.DeepCopy() + current.Labels[crd.LabelPodTemplateHash] = currentRevision + current.Spec.Selector.MatchLabels[crd.LabelPodTemplateHash] = currentRevision + current.Spec.Template.Labels[crd.LabelPodTemplateHash] = currentRevision + + Expect(k8sClient.Create(ctx, coreSet)).Should(Succeed()) + Expect(k8sClient.Create(ctx, update)).Should(Succeed()) + Expect(k8sClient.Create(ctx, current)).Should(Succeed()) + + currentReplicantPod = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: current.Name + "-", + Namespace: current.Namespace, + Labels: current.Spec.Template.Labels, + OwnerReferences: ownerReferences(current), + }, + Spec: current.Spec.Template.Spec, + } + Expect(k8sClient.Create(ctx, currentReplicantPod)).Should(Succeed()) + + // Create a pod for the update RS so areReplicantsAvailable is satisfied. + updateReplicantPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: update.Name + "-", + Namespace: update.Namespace, + Labels: update.Spec.Template.Labels, + OwnerReferences: ownerReferences(update), + }, + Spec: update.Spec.Template.Spec, + } + Expect(k8sClient.Create(ctx, updateReplicantPod)).Should(Succeed()) + updateReplicantPod.Status.Conditions = []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()}, + } + Expect(k8sClient.Status().Update(ctx, updateReplicantPod)).Should(Succeed()) + + coreSet.Status.Replicas = 1 + coreSet.Status.ReadyReplicas = 1 + coreSet.Status.UpdateRevision = updateRevision + coreSet.Status.CurrentRevision = updateRevision + update.Status.Replicas = 1 + update.Status.ReadyReplicas = 1 + update.Status.AvailableReplicas = 1 + current.Status.Replicas = 1 + current.Status.ReadyReplicas = 1 + Expect(k8sClient.Status().Update(ctx, coreSet)).Should(Succeed()) + Expect(k8sClient.Status().Update(ctx, update)).Should(Succeed()) + Expect(k8sClient.Status().Update(ctx, current)).Should(Succeed()) + }) + + AfterAll(func() { + Expect(k8sClient.Delete(ctx, ns)).Should(Succeed()) + }) + + BeforeEach(func() { + instance = emqx.DeepCopy() + instance.Namespace = ns.Name + instance.Spec.ReplicantTemplate = &crd.EMQXReplicantTemplate{ + Spec: crd.EMQXReplicantTemplateSpec{ + Replicas: ptr.To(int32(1)), + }, + } + instance.Status = crd.EMQXStatus{ + CoreNodesStatus: crd.CoreNodesStatus{ + ReadyReplicas: 1, + }, + CoreNodes: []crd.EMQXNode{}, + ReplicantNodesStatus: crd.ReplicantNodesStatus{ + UpdateRevision: updateRevision, + UpdateReplicas: 1, + CurrentRevision: currentRevision, + CurrentReplicas: 1, + ReadyReplicas: 2, + }, + ReplicantNodes: []crd.EMQXNode{ + {Name: "emqx@10.0.0.1", PodName: currentReplicantPod.Name, Status: "running"}, + }, + } + s = &syncReplicantSets{emqxReconciler} + round = newReconcileRound() + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) + }) + + It("should scale down current replicant set and annotate pod", func() { + Expect(s.reconcile(round, instance)).To(Equal(subResult{})) + // Pod should be annotated with deletion cost: + Expect(actualObject(currentReplicantPod)).To( + HaveField("Annotations", HaveKeyWithValue("controller.kubernetes.io/pod-deletion-cost", "-99999")), + ) + // Current RS should be scaled down: + Expect(actualObject(current)).To( + HaveField("Spec.Replicas", HaveValue(BeEquivalentTo(0))), + ) + }) +}) + +var _ = Describe("Reconciler syncReplicantSets admission", Ordered, func() { + var ns *corev1.Namespace = &corev1.Namespace{} + var instance *crd.EMQX + + var s *syncReplicantSets + var round *reconcileRound + var current *appsv1.ReplicaSet + var update *appsv1.ReplicaSet + var currentPod *corev1.Pod + var updatePod *corev1.Pod + + const ( + currentRevision string = "current" + updateRevision string = "update" + ) + + BeforeAll(func() { + ns = &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "controller-sync-replicant-sets-admission-test", + Labels: map[string]string{ + "test": "e2e", + }, + }, + } + Expect(k8sClient.Create(ctx, ns)).Should(Succeed()) + + // Create "current" (old) RS with a known hash label. + currentLabels := emqx.DefaultLabelsWith( + crd.ReplicantLabels(), + map[string]string{crd.LabelPodTemplateHash: currentRevision}, + ) + current = &appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: emqx.Name + "-", + Namespace: ns.Name, + Labels: currentLabels, + }, + Spec: appsv1.ReplicaSetSpec{ + Replicas: ptr.To(int32(1)), + Selector: &metav1.LabelSelector{ + MatchLabels: currentLabels, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: currentLabels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "emqx", Image: "emqx"}, + }, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, current)).Should(Succeed()) + + // Create pod owned by "current" RS. + currentPod = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: current.Name + "-", + Namespace: ns.Name, + Labels: current.Spec.Selector.MatchLabels, + OwnerReferences: ownerReferences(current), + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "emqx", Image: "emqx"}, + }, + }, + } + Expect(k8sClient.Create(ctx, currentPod)).Should(Succeed()) + + // Create "update" (new) RS with a different hash label. + updateLabels := emqx.DefaultLabelsWith( + crd.ReplicantLabels(), + map[string]string{crd.LabelPodTemplateHash: updateRevision}, + ) + update = &appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: emqx.Name + "-", + Namespace: ns.Name, + Labels: updateLabels, + }, + Spec: appsv1.ReplicaSetSpec{ + Replicas: ptr.To(int32(1)), + Selector: &metav1.LabelSelector{ + MatchLabels: updateLabels, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: updateLabels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "emqx", Image: "emqx:new"}, + }, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, update)).Should(Succeed()) + + // Create a Ready pod owned by "update" RS so that areReplicantsAvailable passes. + updatePod = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: update.Name + "-", + Namespace: ns.Name, + Labels: update.Spec.Selector.MatchLabels, + OwnerReferences: ownerReferences(update), + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "emqx", Image: "emqx:new"}, + }, + }, + } + Expect(k8sClient.Create(ctx, updatePod)).Should(Succeed()) + updatePod.Status.Conditions = []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()}, + } + Expect(k8sClient.Status().Update(ctx, updatePod)).Should(Succeed()) + + update.Status.Replicas = 1 + update.Status.ReadyReplicas = 1 + update.Status.AvailableReplicas = 1 + Expect(k8sClient.Status().Update(ctx, update)).Should(Succeed()) + }) + + AfterAll(func() { + Expect(k8sClient.Delete(ctx, ns)).Should(Succeed()) + }) + + BeforeEach(func() { + instance = emqx.DeepCopy() + instance.Namespace = ns.Name + instance.Spec.ReplicantTemplate = &crd.EMQXReplicantTemplate{ + Spec: crd.EMQXReplicantTemplateSpec{ + Replicas: ptr.To(int32(1)), + }, + } + instance.Status.ReplicantNodesStatus.CurrentRevision = currentRevision + instance.Status.ReplicantNodesStatus.UpdateRevision = updateRevision + instance.Status.ReplicantNodes = []crd.EMQXNode{ + {Name: "emqx@10.0.0.1", PodName: currentPod.Name, Status: "running"}, + } + s = &syncReplicantSets{emqxReconciler} + round = newReconcileRound() + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(update), update)).Should(Succeed()) + update.Status.AvailableReplicas = 1 + Expect(k8sClient.Status().Update(ctx, update)).Should(Succeed()) + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) + }) + + It("replicants not available (update pod not yet ready)", func() { + update.Status.AvailableReplicas = 0 + Expect(k8sClient.Status().Update(ctx, update)).Should(Succeed()) + Expect(reloadReconcileState(round, k8sClient, instance)).To(Succeed()) + admission, err := s.chooseScaleDownReplicant(round, instance, current) + Expect(err).ShouldNot(HaveOccurred()) + Expect(admission).Should(And( + HaveField("Reason", ContainSubstring("not available")), + HaveField("Pod", BeNil()), + )) + }) + + It("node evacuation in progress", func() { + instance.Status.NodeEvacuations = []crd.NodeEvacuationStatus{ + {State: "fake"}, + } + admission, err := s.chooseScaleDownReplicant(round, instance, current) + Expect(err).ShouldNot(HaveOccurred()) + Expect(admission).Should(And( + HaveField("Reason", ContainSubstring("evacuation")), + HaveField("Pod", BeNil()), + )) + }) + + It("node session > 0", func() { + instance.Status.ReplicantNodes[0].Sessions = 99999 + admission, err := s.chooseScaleDownReplicant(round, instance, current) + Expect(err).ShouldNot(HaveOccurred()) + Expect(admission).Should(And( + HaveField("Reason", Not(BeEmpty())), + HaveField("Pod", BeNil()), + )) + }) + + It("node session is 0", func() { + instance.Status.ReplicantNodes[0].Sessions = 0 + admission, err := s.chooseScaleDownReplicant(round, instance, current) + Expect(err).ShouldNot(HaveOccurred()) + Expect(admission).Should(And( + HaveField("Reason", BeEmpty()), + HaveField("Pod", Not(BeNil())), + )) + }) +}) diff --git a/internal/controller/update_emqx_status.go b/internal/controller/update_emqx_status.go index 431644ba1..a42365e66 100644 --- a/internal/controller/update_emqx_status.go +++ b/internal/controller/update_emqx_status.go @@ -2,11 +2,12 @@ package controller import ( "cmp" + "fmt" "slices" "strings" emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" "github.com/emqx/emqx-operator/internal/emqx/api" appsv1 "k8s.io/api/apps/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -17,25 +18,25 @@ type updateStatus struct { *EMQXReconciler } -func (u *updateStatus) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { +func (u *updateStatus) reconcile(r *reconcileRound, instance *crd.EMQX) subResult { status := &instance.Status - status.CoreNodesStatus.Replicas = *instance.Spec.CoreTemplate.Spec.Replicas - if instance.Spec.ReplicantTemplate != nil { - status.ReplicantNodesStatus.Replicas = *instance.Spec.ReplicantTemplate.Spec.Replicas + // Core: count pods on each revision for rolling update progress. + coreSet := r.state.coreSet() + status.CoreNodesStatus.UpdatedReplicas = 0 + status.CoreNodesStatus.CurrentReplicas = 0 + for _, pod := range r.state.podsManagedBy(r.state.coreSet()) { + if r.state.partOfCoreSetRevision(pod, coreSet.Status.UpdateRevision) { + status.CoreNodesStatus.UpdatedReplicas++ + } + if r.state.partOfCoreSetRevision(pod, coreSet.Status.CurrentRevision) { + status.CoreNodesStatus.CurrentReplicas++ + } } - currentCoreSet, updateCoreSet := switchCoreSet(r, instance) + // Replicant: multi-ReplicaSet pattern retained. currentReplicantSet, updateReplicantSet := switchReplicantSet(r, instance) - status.CoreNodesStatus.ReadyReplicas = 0 - if currentCoreSet != nil { - status.CoreNodesStatus.CurrentReplicas = currentCoreSet.Status.Replicas - } - if updateCoreSet != nil { - status.CoreNodesStatus.UpdateReplicas = updateCoreSet.Status.Replicas - } - status.ReplicantNodesStatus.ReadyReplicas = 0 if currentReplicantSet != nil { status.ReplicantNodesStatus.CurrentReplicas = currentReplicantSet.Status.Replicas @@ -54,6 +55,8 @@ func (u *updateStatus) reconcile(r *reconcileRound, instance *crdv2.EMQX) subRes } u.updateEMQXNodesStatus(r, instance, nodes) } + + status.CoreNodesStatus.ReadyReplicas = 0 for _, node := range status.CoreNodes { if node.Status == "running" { status.CoreNodesStatus.ReadyReplicas++ @@ -68,9 +71,9 @@ func (u *updateStatus) reconcile(r *reconcileRound, instance *crdv2.EMQX) subRes if req != nil { clusterEvacuationsStatus, err := api.ClusterEvacuationStatus(req) if err == nil { - status.NodeEvacuationsStatus = []crdv2.NodeEvacuationStatus{} + status.NodeEvacuations = []crd.NodeEvacuationStatus{} for _, ns := range clusterEvacuationsStatus { - status.NodeEvacuationsStatus = append(status.NodeEvacuationsStatus, crdv2.NodeEvacuationStatus{ + status.NodeEvacuations = append(status.NodeEvacuations, crd.NodeEvacuationStatus{ NodeName: ns.Node, State: ns.State, SessionRecipients: ns.SessionRecipients, @@ -96,7 +99,7 @@ func (u *updateStatus) reconcile(r *reconcileRound, instance *crdv2.EMQX) subRes } } if len(dsReplicationStatus.DBs) > 0 { - status.DSReplication.DBs = make([]crdv2.DSDBReplicationStatus, len(dsReplicationStatus.DBs)) + status.DSReplication.DBs = make([]crd.DSDBReplicationStatus, len(dsReplicationStatus.DBs)) } for i, db := range dsReplicationStatus.DBs { minReplicas := 0 @@ -119,7 +122,7 @@ func (u *updateStatus) reconcile(r *reconcileRound, instance *crdv2.EMQX) subRes } } } - status.DSReplication.DBs[i] = crdv2.DSDBReplicationStatus{ + status.DSReplication.DBs[i] = crd.DSDBReplicationStatus{ Name: db.Name, NumShards: int32(len(db.Shards)), NumShardReplicas: int32(numShardReplicas), @@ -131,7 +134,7 @@ func (u *updateStatus) reconcile(r *reconcileRound, instance *crdv2.EMQX) subRes } // update status condition - u.updateStatusCondition(r, instance) + evaluateStatusConditions(r.state, instance) if err := u.Client.Status().Update(r.ctx, instance); err != nil { return subResult{err: emperror.Wrap(err, "failed to update status")} @@ -139,151 +142,200 @@ func (u *updateStatus) reconcile(r *reconcileRound, instance *crdv2.EMQX) subRes return subResult{} } -func (u *updateStatus) updateStatusCondition(r *reconcileRound, instance *crdv2.EMQX) { +// evaluateStatusConditions evaluates all conditions independently from current state. +func evaluateStatusConditions(s *reconcileState, instance *crd.EMQX) { + evaluateCoreNodesProgressing(s, instance) + evaluateReplicantNodesProgressing(s, instance) + evaluateAvailable(s, instance) + evaluateReady(s, instance) +} + +func evaluateCoreNodesProgressing(s *reconcileState, instance *crd.EMQX) { + cond := crd.CoreNodesProgressing status := &instance.Status - condition := status.GetLastTrueCondition() - if condition == nil { - instance.Status.SetTrueCondition(crdv2.Initialized) - u.updateStatusCondition(r, instance) + coreSet := s.coreSet() + if coreSet == nil { + status.SetCondition(cond, metav1.ConditionTrue, "Create", "spinning up core set") return } - switch condition.Type { + desired := instance.Spec.NumCoreReplicas() + updated := coreSet.Status.UpdatedReplicas + total := coreSet.Status.Replicas + + switch { + case total < desired: + status.SetCondition(cond, metav1.ConditionTrue, "ScalingUp", + fmt.Sprintf("%d/%d core pods", total, desired)) + case total > desired: + status.SetCondition(cond, metav1.ConditionTrue, "ScalingDown", + fmt.Sprintf("%d/%d core pods", total, desired)) + case updated < desired: + status.SetCondition(cond, metav1.ConditionTrue, "RollingUpdate", + fmt.Sprintf("%d/%d core pods updated", updated, desired)) + default: + status.SetCondition(cond, metav1.ConditionFalse, "Converged", + fmt.Sprintf("%d core pods up to date", desired)) + } +} - case crdv2.Initialized: - updateSts := r.state.updateCoreSet(instance) - if updateSts != nil { - u.statusTransition(r, instance, crdv2.CoreNodesProgressing) - } +func forceCoreNodesProgressing(instance *crd.EMQX) { + instance.Status.SetCondition(crd.CoreNodesProgressing, metav1.ConditionTrue, "RollingUpdate", + "0 core pods updated") + instance.Status.SetCondition(crd.Ready, metav1.ConditionFalse, "CoreNodesProgressing", + "Core nodes are progressing") +} - case crdv2.CoreNodesProgressing: - updateSts := r.state.updateCoreSet(instance) - if updateSts != nil && - updateSts.Status.ReadyReplicas > 0 && - updateSts.Status.ReadyReplicas == status.CoreNodesStatus.UpdateReplicas { - u.statusTransition(r, instance, crdv2.CoreNodesReady) - } +func evaluateReplicantNodesProgressing(s *reconcileState, instance *crd.EMQX) { + cond := crd.ReplicantNodesProgressing + status := &instance.Status - case crdv2.CoreNodesReady: - if instance.Spec.HasReplicants() { - u.statusTransition(r, instance, crdv2.ReplicantNodesProgressing) - } else { - u.statusTransition(r, instance, crdv2.Available) + if !instance.Spec.HasReplicants() { + status.RemoveCondition(crd.ReplicantNodesProgressing) + return + } + + updateSet := s.updateReplicantSet(instance) + if updateSet == nil { + status.SetCondition(cond, metav1.ConditionTrue, "Create", "spinning up replicant set") + return + } + + desired := instance.Spec.NumReplicantReplicas() + currentRevision := status.ReplicantNodesStatus.CurrentRevision + updateRevision := status.ReplicantNodesStatus.UpdateRevision + total := updateSet.Status.Replicas + + switch { + case currentRevision != updateRevision: + currentSet := s.currentReplicantSet(instance) + currentReplicas := total + if currentSet != nil && currentSet.Spec.Replicas != nil { + currentReplicas = *currentSet.Spec.Replicas } + status.SetCondition(cond, metav1.ConditionTrue, "RollingUpdate", + fmt.Sprintf("%d/%d replicant pods updated", total-currentReplicas, total)) + case total > desired: + status.SetCondition(cond, metav1.ConditionTrue, "ScalingDown", + fmt.Sprintf("%d/%d replicant pods", total, desired)) + case total < desired: + status.SetCondition(cond, metav1.ConditionTrue, "ScalingUp", + fmt.Sprintf("%d/%d replicant pods", total, desired)) + default: + status.SetCondition(cond, metav1.ConditionFalse, "Converged", + fmt.Sprintf("%d replicant pods up to date", desired)) + } +} - case crdv2.ReplicantNodesProgressing: - if instance.Spec.HasReplicants() { - updateRs := r.state.updateReplicantSet(instance) - if updateRs != nil && - updateRs.Status.ReadyReplicas > 0 && - updateRs.Status.ReadyReplicas == status.ReplicantNodesStatus.UpdateReplicas { - u.statusTransition(r, instance, crdv2.ReplicantNodesReady) - } +func forceReplicantNodesProgressing(instance *crd.EMQX) { + instance.Status.SetCondition(crd.ReplicantNodesProgressing, metav1.ConditionTrue, "RollingUpdate", + "0 replicant pods updated") + instance.Status.SetCondition(crd.Ready, metav1.ConditionFalse, "ReplicantNodesProgressing", + "Replicant nodes are progressing") +} + +func evaluateAvailable(s *reconcileState, instance *crd.EMQX) { + cond := crd.Available + status := &instance.Status + if instance.Spec.HasReplicants() { + replicantSet := s.updateReplicantSet(instance) + desired := instance.Spec.NumReplicantReplicas() + available := int32(0) + if replicantSet != nil { + available = replicantSet.Status.AvailableReplicas + } + if available >= desired { + status.SetCondition(cond, metav1.ConditionTrue, "ReplicantPodsAvailable", + fmt.Sprintf("%d/%d replicant pods available", available, desired)) } else { - u.resetConditions(r, instance, "NoReplicants") + status.SetCondition(cond, metav1.ConditionFalse, "ReplicantPodsUnavailable", + fmt.Sprintf("%d/%d replicant pods available", available, desired)) } - - case crdv2.ReplicantNodesReady: - if instance.Spec.HasReplicants() { - u.statusTransition(r, instance, crdv2.Available) + } else { + coreSet := s.coreSet() + desired := instance.Spec.NumCoreReplicas() + available := int32(0) + if coreSet != nil { + available = coreSet.Status.AvailableReplicas + } + if available >= desired { + status.SetCondition(cond, metav1.ConditionTrue, "CorePodsAvailable", + fmt.Sprintf("%d/%d core pods available", available, desired)) } else { - u.resetConditions(r, instance, "NoReplicants") + status.SetCondition(cond, metav1.ConditionFalse, "CorePodsUnavailable", + fmt.Sprintf("%d/%d core pods available", available, desired)) } + } +} - case crdv2.Available: - if status.CoreNodesStatus.UpdateReplicas != status.CoreNodesStatus.Replicas || - status.CoreNodesStatus.ReadyReplicas != status.CoreNodesStatus.Replicas || - status.CoreNodesStatus.UpdateRevision != status.CoreNodesStatus.CurrentRevision { - break - } +func evaluateReady(s *reconcileState, instance *crd.EMQX) { + status := &instance.Status - if instance.Spec.HasReplicants() { - if status.ReplicantNodesStatus.UpdateReplicas != status.ReplicantNodesStatus.Replicas || - status.ReplicantNodesStatus.ReadyReplicas != status.ReplicantNodesStatus.Replicas || - status.ReplicantNodesStatus.UpdateRevision != status.ReplicantNodesStatus.CurrentRevision { - break - } - } + if !evaluateCoresReady(s, instance) { + status.SetCondition(crd.Ready, metav1.ConditionFalse, + "CoreNodesProgressing", + "Core nodes are progressing", + ) + return + } - status.SetCondition(metav1.Condition{ - Type: crdv2.Ready, - Status: metav1.ConditionTrue, - Reason: crdv2.Ready, - Message: "Cluster is ready", - }) - - case crdv2.Ready: - updateSts := r.state.updateCoreSet(instance) - if updateSts != nil && - updateSts.Status.ReadyReplicas != status.CoreNodesStatus.Replicas { - u.resetConditions(r, instance, "CoreNodesNotReady") + if instance.Spec.HasReplicants() { + if !evaluateReplicantsReady(s, instance) { + status.SetCondition(crd.Ready, metav1.ConditionFalse, + "ReplicantNodesProgressing", + "Replicant nodes are progressing", + ) return } - - if instance.Spec.HasReplicants() { - updateRs := r.state.updateReplicantSet(instance) - if updateRs != nil && - updateRs.Status.ReadyReplicas != status.ReplicantNodesStatus.Replicas { - u.resetConditions(r, instance, "ReplicantNodesNotReady") - return - } - } } -} -func (u *updateStatus) resetConditions( - r *reconcileRound, - instance *crdv2.EMQX, - reason string, -) { - if !instance.Spec.HasReplicants() { - instance.Status.RemoveCondition(crdv2.ReplicantNodesProgressing) - instance.Status.RemoveCondition(crdv2.ReplicantNodesReady) + if !instance.Status.DSReplication.IsStable() { + status.SetCondition(crd.Ready, metav1.ConditionFalse, + "DSReplicationProgressing", + "Durable storage membership transitions are in progress", + ) + return } - instance.Status.ResetConditions(reason) - u.updateStatusCondition(r, instance) -} -func (u *updateStatus) statusTransition( - r *reconcileRound, - instance *crdv2.EMQX, - conditionType string, -) { - instance.Status.SetTrueCondition(conditionType) - u.updateStatusCondition(r, instance) + status.SetCondition(crd.Ready, metav1.ConditionTrue, "Ready", "Cluster is ready") } -func switchCoreSet( - r *reconcileRound, - instance *crdv2.EMQX, -) (*appsv1.StatefulSet, *appsv1.StatefulSet) { - current := r.state.currentCoreSet(instance) - update := r.state.updateCoreSet(instance) - if (current == nil || current.Status.Replicas == 0) && update != nil { - current = nil - for _, coreSet := range r.state.coreSets { - // Adopt oldest non-empty coreSet if there are more than 2 (current and update) coreSets: - if coreSet.UID != update.UID && coreSet.Status.Replicas > 0 { - r.log.V(1).Info("adopting non-empty current coreSet", "statefulSet", klog.KObj(coreSet)) - current = coreSet - break - } - } - if current == nil { - r.log.V(1).Info("switching update -> current coreSet", "statefulSet", klog.KObj(update)) - current = update - } +func evaluateCoresReady(r *reconcileState, instance *crd.EMQX) bool { + desired := instance.Spec.NumCoreReplicas() + coreSet := r.coreSet() + coresReady := int32(0) + coresUpdated := int32(0) + coresTotal := int32(0) + nodesTotal := int32(len(instance.Status.CoreNodes)) + nodesReady := instance.Status.CoreNodesStatus.ReadyReplicas + if coreSet != nil { + coresTotal = coreSet.Status.Replicas + coresReady = coreSet.Status.ReadyReplicas + coresUpdated = coreSet.Status.UpdatedReplicas } - if current != nil { - instance.Status.CoreNodesStatus.CurrentRevision = current.Labels[crdv2.LabelPodTemplateHash] + return coresTotal == desired && coresReady == desired && coresUpdated == desired && + nodesTotal == desired && nodesReady == desired +} + +func evaluateReplicantsReady(s *reconcileState, instance *crd.EMQX) bool { + desired := instance.Spec.NumReplicantReplicas() + replicantSet := s.updateReplicantSet(instance) + replicantsTotal := int32(0) + replicantsReady := int32(0) + nodesTotal := int32(len(instance.Status.ReplicantNodes)) + nodesReady := instance.Status.ReplicantNodesStatus.ReadyReplicas + if replicantSet != nil { + replicantsTotal = replicantSet.Status.Replicas + replicantsReady = replicantSet.Status.ReadyReplicas } - return current, update + return replicantsTotal == desired && replicantsReady == desired && + nodesTotal == desired && nodesReady == desired } func switchReplicantSet( r *reconcileRound, - instance *crdv2.EMQX, + instance *crd.EMQX, ) (*appsv1.ReplicaSet, *appsv1.ReplicaSet) { current := r.state.currentReplicantSet(instance) update := r.state.updateReplicantSet(instance) @@ -303,15 +355,15 @@ func switchReplicantSet( } } if current != nil { - instance.Status.ReplicantNodesStatus.CurrentRevision = current.Labels[crdv2.LabelPodTemplateHash] + instance.Status.ReplicantNodesStatus.CurrentRevision = current.Labels[crd.LabelPodTemplateHash] } return current, update } -func (u *updateStatus) updateEMQXNodesStatus(r *reconcileRound, instance *crdv2.EMQX, nodes []api.EMQXNode) { +func (u *updateStatus) updateEMQXNodesStatus(r *reconcileRound, instance *crd.EMQX, nodes []api.EMQXNode) { status := &instance.Status - status.CoreNodes = []crdv2.EMQXNode{} - status.ReplicantNodes = []crdv2.EMQXNode{} + status.CoreNodes = []crd.EMQXNode{} + status.ReplicantNodes = []crd.EMQXNode{} slices.SortFunc(nodes, func(a, b api.EMQXNode) int { // Use seconds granularity to avoid jitter in ordering asec := a.Uptime / 1000 @@ -322,7 +374,7 @@ func (u *updateStatus) updateEMQXNodesStatus(r *reconcileRound, instance *crdv2. return cmp.Compare(asec, bsec) }) for _, n := range nodes { - node := crdv2.EMQXNode{ + node := crd.EMQXNode{ Name: n.Node, Status: n.NodeStatus, OTPRelease: n.OTPRelease, @@ -332,7 +384,7 @@ func (u *updateStatus) updateEMQXNodesStatus(r *reconcileRound, instance *crdv2. Connections: n.LiveConnections, } list := &status.CoreNodes - host := extractHostname(n.Node) + host := parseNodeName(n.Node, instance).hostName if node.Role == "replicant" { list = &status.ReplicantNodes } @@ -349,9 +401,3 @@ func (u *updateStatus) updateEMQXNodesStatus(r *reconcileRound, instance *crdv2. *list = append(*list, node) } } - -func extractHostname(node string) string { - // Example: emqx@emqx-core-557c8b7684-0.emqx-headless.default.svc.cluster.local - // Example: emqx@10.244.0.23 - return strings.Split(node[strings.Index(node, "@")+1:], ":")[0] -} diff --git a/internal/controller/update_pod_conditions.go b/internal/controller/update_pod_conditions.go deleted file mode 100644 index 8b8e201a3..000000000 --- a/internal/controller/update_pod_conditions.go +++ /dev/null @@ -1,47 +0,0 @@ -package controller - -import ( - emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" - util "github.com/emqx/emqx-operator/internal/controller/util" - "github.com/emqx/emqx-operator/internal/emqx/api" - corev1 "k8s.io/api/core/v1" -) - -type updatePodConditions struct { - *EMQXReconciler -} - -func (u *updatePodConditions) reconcile(r *reconcileRound, instance *crdv2.EMQX) subResult { - for _, pod := range r.state.pods { - onServingCondition := util.FindPodCondition(pod, crdv2.PodOnServing) - if onServingCondition == nil { - onServingCondition = &corev1.PodCondition{ - Type: crdv2.PodOnServing, - Status: corev1.ConditionUnknown, - } - } - - if r.state.partOfUpdateSet(pod, instance) { - if util.IsPodConditionTrue(pod, corev1.ContainersReady) { - status := api.AvailabilityCheck(r.requester.forPod(pod)) - util.SwitchPodConditionStatus(onServingCondition, status) - } - } else { - if r.state.partOfCurrentSet(pod, instance) { - // When available condition is true, need clean currentSts / currentRs pod - if instance.Status.IsConditionTrue(crdv2.Available) { - if util.IsPodConditionTrue(pod, corev1.ContainersReady) { - util.SwitchPodConditionStatus(onServingCondition, corev1.ConditionFalse) - } - } - } - } - - err := util.UpdatePodCondition(r.ctx, u.Client, pod, *onServingCondition) - if err != nil { - return subResult{err: emperror.Wrapf(err, "failed to update pod %s status", pod.Name)} - } - } - return subResult{} -} diff --git a/internal/controller/util.go b/internal/controller/util.go index d480fbb35..c775f8666 100644 --- a/internal/controller/util.go +++ b/internal/controller/util.go @@ -8,30 +8,21 @@ import ( "hash" "hash/fnv" "slices" - "time" + "strings" emperror "emperror.dev/errors" "github.com/cisco-open/k8s-objectmatcher/patch" "github.com/davecgh/go-spew/spew" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" + util "github.com/emqx/emqx-operator/internal/controller/util" "github.com/tidwall/gjson" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/rand" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" ) -func checkInitialDelaySecondsReady(instance *crdv2.EMQX) bool { - _, condition := instance.Status.GetCondition(crdv2.Available) - if condition == nil || condition.Status != metav1.ConditionTrue { - return false - } - delay := time.Since(condition.LastTransitionTime.Time).Seconds() - return delay > float64(instance.Spec.UpdateStrategy.InitialDelaySeconds) -} - // JustCheckPodTemplate will check only the differences between the podTemplate of the two statefulSets func justCheckPodTemplate() patch.CalculateOption { getPodTemplate := func(obj []byte) ([]byte, error) { @@ -40,7 +31,7 @@ func justCheckPodTemplate() patch.CalculateOption { _ = json.Unmarshal([]byte(podTemplateSpecJson.String()), podTemplateSpec) // Remove the podTemplateHashLabelKey from the podTemplateSpec - delete(podTemplateSpec.Labels, crdv2.LabelPodTemplateHash) + delete(podTemplateSpec.Labels, crd.LabelPodTemplateHash) emptyRs := &appsv1.ReplicaSet{} emptyRs.Spec.Template = *podTemplateSpec @@ -128,6 +119,14 @@ func sortByName[T client.Object](list []T) { }) } +// sortByOrdinal sorts pods by their StatefulSet ordinal (numeric suffix) ascending. +// Pods whose names do not end in a number get ordinal -1 and sort first. +func sortByOrdinal(list []*corev1.Pod) { + slices.SortFunc(list, func(a, b *corev1.Pod) int { + return cmp.Compare(util.PodOrdinal(a.Name), util.PodOrdinal(b.Name)) + }) +} + // ComputeHash returns a hash value calculated from pod template and // a collisionCount to avoid hash collision. The hash will be safe encoded to // avoid bad words. @@ -158,3 +157,26 @@ func deepHashObject(hasher hash.Hash, objectToWrite interface{}) { } _, _ = printer.Fprintf(hasher, "%#v", objectToWrite) } + +type nodeName struct { + name string + hostName string + podName string +} + +func parseNodeName(s string, instance *crd.EMQX) *nodeName { + // Example: emqx@emqx-core-557c8b7684-0.emqx-headless.default.svc.cluster.local + // Example: emqx@10.244.0.23 + var parsed nodeName + nameParts := strings.Split(s, "@") + if len(nameParts) != 2 { + return nil + } + parsed.name = nameParts[0] + parsed.hostName = nameParts[1] + hostParts := strings.Split(nameParts[1], instance.HeadlessServiceNamespacedName().Name) + if len(hostParts) > 1 { + parsed.podName = strings.TrimRight(hostParts[0], ".") + } + return &parsed +} diff --git a/internal/controller/util/selector.go b/internal/controller/util/meta.go similarity index 87% rename from internal/controller/util/selector.go rename to internal/controller/util/meta.go index d4dc2f66e..10b60ecb3 100644 --- a/internal/controller/util/selector.go +++ b/internal/controller/util/meta.go @@ -36,3 +36,11 @@ func CloneSelectorAndAddLabel(selector *metav1.LabelSelector, labelKey, labelVal return newSelector } + +func CloneAnnotations(annotations map[string]string) map[string]string { + clone := make(map[string]string) + for k, v := range annotations { + clone[k] = v + } + return clone +} diff --git a/internal/controller/util/pod.go b/internal/controller/util/pod.go index 8ed28a396..0d17cc648 100644 --- a/internal/controller/util/pod.go +++ b/internal/controller/util/pod.go @@ -3,6 +3,9 @@ package controller import ( "context" "encoding/json" + "strconv" + "strings" + "time" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -45,3 +48,38 @@ func UpdatePodCondition( patch := client.RawPatch(types.StrategicMergePatchType, patchBytes) return k8sClient.Status().Patch(ctx, pod, patch) } + +func PodReadyDuration(pod *corev1.Pod) time.Duration { + cond := FindPodCondition(pod, corev1.PodReady) + if cond == nil || cond.Status != corev1.ConditionTrue { + return 0 + } + return time.Since(cond.LastTransitionTime.Time) +} + +// IsPodAvailable tells if the pod is Ready and has been so for longer than MinReadySeconds +// on the owning StatefulSet or ReplicaSet. +// Mirrors apps/v1 availability criterion. +func IsPodAvailable(pod *corev1.Pod, minReadySeconds int32) bool { + return PodReadyDuration(pod) > time.Duration(minReadySeconds)*time.Second +} + +func IsPodManagedBy(pod *corev1.Pod, object metav1.Object) bool { + if metav1.GetControllerOf(pod) != nil && metav1.GetControllerOf(pod).UID == object.GetUID() { + return true + } + return false +} + +func PodOrdinal(podName string) int { + parts := strings.Split(podName, "-") + if len(parts) < 2 { + return -1 + } + indexPart := parts[len(parts)-1] + index, err := strconv.Atoi(indexPart) + if err != nil { + return -1 + } + return index +} diff --git a/internal/controller/util/statefulset.go b/internal/controller/util/statefulset.go new file mode 100644 index 000000000..58c89ec65 --- /dev/null +++ b/internal/controller/util/statefulset.go @@ -0,0 +1,12 @@ +package controller + +import ( + appsv1 "k8s.io/api/apps/v1" +) + +func NumReplicas(sts *appsv1.StatefulSet) int32 { + if sts.Spec.Replicas != nil { + return *sts.Spec.Replicas + } + return 1 +} diff --git a/internal/controller/util_test.go b/internal/controller/util_test.go deleted file mode 100644 index ede7134a9..000000000 --- a/internal/controller/util_test.go +++ /dev/null @@ -1,48 +0,0 @@ -package controller - -import ( - "testing" - "time" - - crdv2 "github.com/emqx/emqx-operator/api/v2" - "github.com/stretchr/testify/assert" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func TestCheckInitialDelaySecondsReady(t *testing.T) { - assert.False(t, checkInitialDelaySecondsReady(&crdv2.EMQX{})) - - assert.False(t, checkInitialDelaySecondsReady(&crdv2.EMQX{ - Spec: crdv2.EMQXSpec{ - UpdateStrategy: crdv2.UpdateStrategy{ - InitialDelaySeconds: 999999999, - }, - }, - Status: crdv2.EMQXStatus{ - Conditions: []metav1.Condition{ - { - Type: crdv2.Available, - Status: metav1.ConditionTrue, - LastTransitionTime: metav1.Time{Time: time.Now()}, - }, - }, - }, - })) - - assert.True(t, checkInitialDelaySecondsReady(&crdv2.EMQX{ - Spec: crdv2.EMQXSpec{ - UpdateStrategy: crdv2.UpdateStrategy{ - InitialDelaySeconds: 0, - }, - }, - Status: crdv2.EMQXStatus{ - Conditions: []metav1.Condition{ - { - Type: crdv2.Available, - Status: metav1.ConditionTrue, - LastTransitionTime: metav1.Time{Time: time.Now().AddDate(0, 0, -1)}, - }, - }, - }, - })) -} diff --git a/internal/emqx/api/api.go b/internal/emqx/api/api.go index 66979f83c..a9dfc5aa4 100644 --- a/internal/emqx/api/api.go +++ b/internal/emqx/api/api.go @@ -52,6 +52,10 @@ func post(req req.RequesterInterface, path string, body []byte) ([]byte, error) return request(req, "POST", path, body, nil) } +func delete(req req.RequesterInterface, path string) ([]byte, error) { + return request(req, "DELETE", path, nil, nil) +} + func request(req req.RequesterInterface, method string, path string, body []byte, header http.Header) ([]byte, error) { if req == nil { return nil, emperror.New("no requester") diff --git a/internal/emqx/api/ds.go b/internal/emqx/api/ds.go index 90759296d..014002227 100644 --- a/internal/emqx/api/ds.go +++ b/internal/emqx/api/ds.go @@ -90,17 +90,6 @@ func (s *DSReplicationStatus) TargetSites() (sites []string) { return sites } -func IsDSAvailable(r req.RequesterInterface) (bool, error) { - _, err := get(r, "api/v5/ds/sites") - if err == nil { - return true, nil - } - if emperror.Is(err, ErrorNotFound) { - return false, nil - } - return false, err -} - func GetDSReplicationStatus(requester req.RequesterInterface) (DSReplicationStatus, error) { status := DSReplicationStatus{DBs: []DSDBReplicationStatus{}} diff --git a/internal/emqx/api/evacuation.go b/internal/emqx/api/evacuation.go index 4716185b4..121f5cbc7 100644 --- a/internal/emqx/api/evacuation.go +++ b/internal/emqx/api/evacuation.go @@ -6,11 +6,15 @@ import ( "strings" emperror "emperror.dev/errors" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" req "github.com/emqx/emqx-operator/internal/requester" - corev1 "k8s.io/api/core/v1" ) +// EMQX node evacuation state values reported by the API. +const EvacuationStateProhibiting = "prohibiting" + +const URLAvailabilityCheck = "api/v5/load_rebalance/availability_check" + type nodeEvacuationStatusResponse struct { Evacuations []NodeEvacuationStatus `json:"evacuations"` } @@ -48,7 +52,7 @@ func ClusterEvacuationStatus(req req.RequesterInterface) ([]NodeEvacuationStatus func StartEvacuation( r req.RequesterInterface, - strategy crdv2.EvacuationStrategy, + strategy crd.EvacuationStrategy, migrateTo []string, nodeName string, ) error { @@ -83,13 +87,20 @@ func StartEvacuation( return nil } -func AvailabilityCheck(req req.RequesterInterface) corev1.ConditionStatus { - _, err := get(req, "api/v5/load_rebalance/availability_check") - if err != nil && emperror.Is(err, ErrorServiceUnavailable) { - return corev1.ConditionFalse +func StopEvacuation( + r req.RequesterInterface, + nodeName string, +) error { + path := fmt.Sprintf("api/v5/load_rebalance/%s/evacuation/stop", nodeName) + _, err := post(r, path, []byte{}) + var apiErr apiError + if ok := emperror.As(err, &apiErr); ok { + if apiErr.StatusCode == 400 && strings.Contains(apiErr.Message, "not_started") { + return nil + } } if err != nil { - return corev1.ConditionUnknown + return err } - return corev1.ConditionTrue + return nil } diff --git a/internal/emqx/api/nodes.go b/internal/emqx/api/nodes.go index 9726d291a..3212d5dfb 100644 --- a/internal/emqx/api/nodes.go +++ b/internal/emqx/api/nodes.go @@ -8,6 +8,9 @@ import ( req "github.com/emqx/emqx-operator/internal/requester" ) +// EMQX node status values reported by the API. +const NodeStatusStopped = "stopped" + type EMQXNode struct { // EMQX node name, example: emqx@127.0.0.1 Node string `json:"node,omitempty"` @@ -46,6 +49,17 @@ func NodeInfo(req req.RequesterInterface, nodeName string) (*EMQXNode, error) { return nodeInfo, nil } +// ForceLeave removes a node from the EMQX cluster. +// DELETE /api/v5/cluster/{node}/force_leave +func ForceLeave(req req.RequesterInterface, nodeName string) error { + path := fmt.Sprintf("api/v5/cluster/%s/force_leave", nodeName) + _, err := delete(req, path) + if emperror.Is(err, ErrorNotFound) { + return nil + } + return err +} + func Nodes(req req.RequesterInterface) ([]EMQXNode, error) { body, err := get(req, "api/v5/nodes") if err != nil { diff --git a/internal/emqx/api/rebalance.go b/internal/emqx/api/rebalance.go index 6f35a6eb5..3ad7342a9 100644 --- a/internal/emqx/api/rebalance.go +++ b/internal/emqx/api/rebalance.go @@ -6,17 +6,17 @@ import ( "strconv" emperror "emperror.dev/errors" - appsv2beta1 "github.com/emqx/emqx-operator/api/v2beta1" + crd "github.com/emqx/emqx-operator/api/v2beta1" req "github.com/emqx/emqx-operator/internal/requester" ) const ApiRebalanceV5 = "api/v5/load_rebalance" type rebalanceStatus struct { - Rebalances []appsv2beta1.RebalanceState `json:"rebalances"` + Rebalances []crd.RebalanceState `json:"rebalances"` } -func StartRebalance(req req.RequesterInterface, strategy appsv2beta1.RebalanceStrategy, nodes []string) error { +func StartRebalance(req req.RequesterInterface, strategy crd.RebalanceStrategy, nodes []string) error { path := fmt.Sprintf("api/v5/load_rebalance/%s/start", nodes[0]) request := map[string]interface{}{ "conn_evict_rate": strategy.ConnEvictRate, @@ -40,7 +40,7 @@ func StartRebalance(req req.RequesterInterface, strategy appsv2beta1.RebalanceSt return err } -func GetRebalanceStatus(req req.RequesterInterface) ([]appsv2beta1.RebalanceState, error) { +func GetRebalanceStatus(req req.RequesterInterface) ([]crd.RebalanceState, error) { body, err := get(req, "api/v5/load_rebalance/global_status") if err != nil { return nil, err diff --git a/internal/handler/handler.go b/internal/handler/handler.go index 5957037c9..969d50b0a 100644 --- a/internal/handler/handler.go +++ b/internal/handler/handler.go @@ -2,15 +2,12 @@ package handler import ( "context" - "strings" "github.com/go-logr/logr" - json "github.com/json-iterator/go" emperror "emperror.dev/errors" "github.com/cisco-open/k8s-objectmatcher/patch" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" k8sErrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -22,8 +19,7 @@ import ( ) const ( - ManageContainersAnnotation = "apps.emqx.io/manage-containers" - LastAppliedAnnotation = "apps.emqx.io/last-applied" + LastAppliedAnnotation = "apps.emqx.io/last-applied" ) type Patcher struct { @@ -98,17 +94,6 @@ func (handler *Handler) CreateOrUpdate(ctx context.Context, scheme *runtime.Sche patch.IgnoreStatusFields(), } switch resource := obj.(type) { - case *appsv1.StatefulSet: - opts = append( - opts, - patch.IgnoreVolumeClaimTemplateTypeMetaAndStatus(), - IgnoreOtherContainers(), - ) - case *appsv1.Deployment: - opts = append( - opts, - IgnoreOtherContainers(), - ) case *corev1.Service: storageResource := &corev1.Service{} err := runtime.DefaultUnstructuredConverter.FromUnstructured(u.UnstructuredContent(), storageResource) @@ -170,72 +155,3 @@ func (handler *Handler) Update(ctx context.Context, obj client.Object) error { } return nil } - -func IgnoreOtherContainers() patch.CalculateOption { - return func(current, modified []byte) ([]byte, []byte, error) { - current, err := selectManagerContainer(current) - if err != nil { - return []byte{}, []byte{}, emperror.Wrap(err, "could not delete the field from current byte sequence") - } - - modified, err = selectManagerContainer(modified) - if err != nil { - return []byte{}, []byte{}, emperror.Wrap(err, "could not delete the field from modified byte sequence") - } - - return current, modified, nil - } -} - -func selectManagerContainer(obj []byte) ([]byte, error) { - var podTemplate corev1.PodTemplateSpec - var objMap map[string]interface{} - err := json.Unmarshal(obj, &objMap) - if err != nil { - return nil, emperror.Wrap(err, "could not unmarshal json") - } - - kind := objMap["kind"].(string) - switch kind { - case "Deployment": - deploy := &appsv1.Deployment{} - err := json.Unmarshal(obj, deploy) - if err != nil { - return nil, emperror.Wrap(err, "could not unmarshal json") - } - podTemplate = deploy.Spec.Template - case "StatefulSet": - sts := &appsv1.StatefulSet{} - err := json.Unmarshal(obj, sts) - if err != nil { - return nil, emperror.Wrap(err, "could not unmarshal json") - } - podTemplate = sts.Spec.Template - default: - return nil, emperror.Wrapf(err, "unsupported kind: %s", kind) - } - - containerNames := podTemplate.Annotations[ManageContainersAnnotation] - containers := []corev1.Container{} - for _, container := range podTemplate.Spec.Containers { - if strings.Contains(containerNames, container.Name) { - containers = append(containers, container) - } - } - podTemplate.Spec.Containers = containers - objMap["spec"].(map[string]interface{})["template"] = podTemplate - return json.ConfigCompatibleWithStandardLibrary.Marshal(objMap) -} - -func SetManagerContainerAnnotation(annotations map[string]string, containers []corev1.Container) map[string]string { - containersName := []string{} - for _, container := range containers { - containersName = append(containersName, container.Name) - } - - if annotations == nil { - annotations = make(map[string]string) - } - annotations[ManageContainersAnnotation] = strings.Join(containersName, ",") - return annotations -} diff --git a/internal/handler/handler_test.go b/internal/handler/handler_test.go deleted file mode 100644 index c53b0d1c5..000000000 --- a/internal/handler/handler_test.go +++ /dev/null @@ -1,178 +0,0 @@ -/* -Copyright 2021. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package handler_test - -import ( - "testing" - - "github.com/cisco-open/k8s-objectmatcher/patch" - "github.com/emqx/emqx-operator/internal/handler" - "github.com/stretchr/testify/assert" - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func TestIgnoreOtherContainerForSts(t *testing.T) { - current := &appsv1.StatefulSet{ - TypeMeta: metav1.TypeMeta{ - Kind: "StatefulSet", - APIVersion: "apps/v1", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "emqx", - Namespace: "default", - }, - Spec: appsv1.StatefulSetSpec{ - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Annotations: map[string]string{ - handler.ManageContainersAnnotation: "emqx,reloader", - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "emqx", - }, - { - Name: "reloader", - }, - }, - }, - }, - }, - } - assert.Nil(t, patch.DefaultAnnotator.SetLastAppliedAnnotation(current)) - - modified := current.DeepCopy() - modified.Spec.Template.Spec.Containers = append(modified.Spec.Template.Spec.Containers, corev1.Container{Name: "fake"}) - - patchResult, err := patch.DefaultPatchMaker.Calculate(current, modified, handler.IgnoreOtherContainers()) - assert.Nil(t, err) - assert.True(t, patchResult.IsEmpty()) - - modified.Spec.Template.Spec.Containers = []corev1.Container{ - { - Name: "emqx", - Args: []string{"--fake"}, - }, - { - Name: "reloader", - }, - } - - patchResult, err = patch.DefaultPatchMaker.Calculate(current, modified, handler.IgnoreOtherContainers()) - assert.Nil(t, err) - assert.False(t, patchResult.IsEmpty()) - - modified.Spec.Template.Spec.Containers = []corev1.Container{ - { - Name: "emqx", - }, - { - Name: "reloader", - Args: []string{"--fake"}, - }, - } - - patchResult, err = patch.DefaultPatchMaker.Calculate(current, modified, handler.IgnoreOtherContainers()) - assert.Nil(t, err) - assert.False(t, patchResult.IsEmpty()) -} - -func TestIgnoreOtherContainerForDeploy(t *testing.T) { - current := &appsv1.Deployment{ - TypeMeta: metav1.TypeMeta{ - Kind: "Deployment", - APIVersion: "apps/v1", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "emqx", - Namespace: "default", - }, - Spec: appsv1.DeploymentSpec{ - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Annotations: map[string]string{ - handler.ManageContainersAnnotation: "emqx,reloader", - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "emqx", - }, - { - Name: "reloader", - }, - }, - }, - }, - }, - } - assert.Nil(t, patch.DefaultAnnotator.SetLastAppliedAnnotation(current)) - - modified := current.DeepCopy() - modified.Spec.Template.Spec.Containers = append(modified.Spec.Template.Spec.Containers, corev1.Container{Name: "fake"}) - - patchResult, err := patch.DefaultPatchMaker.Calculate(current, modified, []patch.CalculateOption{ - handler.IgnoreOtherContainers(), - }...) - assert.Nil(t, err) - assert.True(t, patchResult.IsEmpty()) - - modified.Spec.Template.Spec.Containers = []corev1.Container{ - { - Name: "emqx", - Args: []string{"--fake"}, - }, - { - Name: "reloader", - }, - } - - patchResult, err = patch.DefaultPatchMaker.Calculate(current, modified, handler.IgnoreOtherContainers()) - assert.Nil(t, err) - assert.False(t, patchResult.IsEmpty()) - - modified.Spec.Template.Spec.Containers = []corev1.Container{ - { - Name: "emqx", - }, - { - Name: "reloader", - Args: []string{"--fake"}, - }, - } - - patchResult, err = patch.DefaultPatchMaker.Calculate(current, modified, handler.IgnoreOtherContainers()) - assert.Nil(t, err) - assert.False(t, patchResult.IsEmpty()) -} - -func TestSetManagerContainerAnnotation(t *testing.T) { - var a map[string]string - - got := handler.SetManagerContainerAnnotation(a, []corev1.Container{ - {Name: "emqx"}, - {Name: "reloader"}, - }) - assert.Equal(t, map[string]string{ - handler.ManageContainersAnnotation: "emqx,reloader", - }, got) -} diff --git a/test/e2e-helm/helm_install_test.go b/test/e2e-helm/helm_install_test.go index 8570c6c39..25c341e44 100644 --- a/test/e2e-helm/helm_install_test.go +++ b/test/e2e-helm/helm_install_test.go @@ -22,7 +22,7 @@ import ( . "github.com/onsi/gomega" ) -// helmInstall installs the local 2.3.x chart into the given namespace with --wait. +// helmInstall installs the local 3.x chart into the given namespace with --wait. func helmInstall(namespace string, extraArgs ...string) error { args := []string{ "install", @@ -39,31 +39,31 @@ func helmInstall(namespace string, extraArgs ...string) error { return Run("helm", args...) } -// helmUpgrade upgrades to the local 2.3.x chart in the given namespace with --wait. -func helmUpgrade(namespace string) error { - return Run("helm", "upgrade", - helmReleaseName, - localChartPath, - "--namespace", namespace, - "--set", "image.repository="+operatorImageRepo, - "--set", "image.tag="+operatorImageTag, - "--set", "image.pullPolicy=Never", - "--wait", - "--timeout", "2m", - ) -} - -// helmCleanup removes all resources that 2.3.x chart may have left behind in +// helmUpgrade upgrades to the local 3.x chart in the given namespace with --wait. +// func helmUpgrade(namespace string) error { +// return Run("helm", "upgrade", +// helmReleaseName, +// localChartPath, +// "--namespace", namespace, +// "--set", "image.repository="+operatorImageRepo, +// "--set", "image.tag="+operatorImageTag, +// "--set", "image.pullPolicy=Never", +// "--wait", +// "--timeout", "2m", +// ) +// } + +// helmCleanup removes all resources that 3.x chart may have left behind in // the given namespace, including the Helm release itself. func helmCleanup(namespace string) { _ = Run("helm", "uninstall", helmReleaseName, "--namespace", namespace) - _ = Kubectl("delete", "clusterrole", "emqx-operator-manager-role", "--ignore-not-found") - _ = Kubectl("delete", "clusterrolebinding", "emqx-operator-manager-rolebinding", "--ignore-not-found") - _ = Kubectl("delete", "clusterrole", "emqx-operator-pre-upgrade", "--ignore-not-found") - _ = Kubectl("delete", "clusterrolebinding", "emqx-operator-pre-upgrade", "--ignore-not-found") - _ = Kubectl("delete", "crd", "emqxes.apps.emqx.io", "--ignore-not-found") - _ = Kubectl("delete", "crd", "rebalances.apps.emqx.io", "--ignore-not-found") - _ = Kubectl("delete", "ns", namespace, "--ignore-not-found") + _ = Kubectl("delete", "clusterrole", "emqx-operator-manager-role") + _ = Kubectl("delete", "clusterrolebinding", "emqx-operator-manager-rolebinding") + _ = Kubectl("delete", "clusterrole", "emqx-operator-pre-upgrade") + _ = Kubectl("delete", "clusterrolebinding", "emqx-operator-pre-upgrade") + _ = Kubectl("delete", "crd", "emqxes.apps.emqx.io") + _ = Kubectl("delete", "crd", "rebalances.apps.emqx.io") + _ = Kubectl("delete", "ns", namespace) } //nolint:errcheck @@ -89,13 +89,14 @@ var _ = Describe("Helm Install", Ordered, func() { } }) - It("should install cleanly / cleanup no-op", func() { - By("install 2.3.x chart") + It("should install cleanly", func() { + By("install 3.x chart") Expect(helmInstall(namespace)).To(Succeed()) By("verify CRDs are installed") Expect(crdExists("emqxes.apps.emqx.io")).To(BeTrue()) - Expect(crdExists("rebalances.apps.emqx.io")).To(BeTrue()) + // NOTE: Rebalance controller is disabled in this release. See api/v3alpha1/rebalance_types.go. + // Expect(crdExists("rebalances.apps.emqx.io")).To(BeTrue()) By("verify operator deployment is available") Expect(Kubectl("wait", "deployment", @@ -105,22 +106,4 @@ var _ = Describe("Helm Install", Ordered, func() { "--timeout", "1m", )).To(Succeed()) }) - - It("should install cleanly / pre-upgrade check disabled", func() { - By("install 2.3.x with pre-upgrade check disabled") - Expect(helmInstall(namespace, "--set", "upgrade.preUpgradeCheck=false")). - To(Succeed()) - - By("verify operator is running") - Expect(Kubectl("wait", "deployment", - "emqx-operator-controller-manager", - "--for", "condition=Available", - "--namespace", namespace, - "--timeout", "1m", - )).To(Succeed()) - - By("verify CRDs are installed") - Expect(crdExists("emqxes.apps.emqx.io")).To(BeTrue()) - Expect(crdExists("rebalances.apps.emqx.io")).To(BeTrue()) - }) }) diff --git a/test/e2e-helm/helm_suite_test.go b/test/e2e-helm/helm_suite_test.go index a123b41c6..bd0a1810e 100644 --- a/test/e2e-helm/helm_suite_test.go +++ b/test/e2e-helm/helm_suite_test.go @@ -31,7 +31,7 @@ const ( // helmReleaseName is the Helm release name used across all Helm upgrade tests. helmReleaseName = "emqx-operator" - // localChartPath is the path to the local 2.3.x chart being tested. + // localChartPath is the path to the local 3.x chart being tested. localChartPath = "deploy/charts/emqx-operator" // operatorImage is the operator image which should already be built and available. @@ -71,11 +71,6 @@ func crdExists(name string) bool { return Kubectl("get", "crd", name) == nil } -// resourceExists checks whether a cluster-scoped resource exists. -func resourceExists(kind, name string) bool { - return Kubectl("get", kind, name) == nil -} - // dumpHelmDiagnostics writes debug info for the given namespace to GinkgoWriter. func dumpHelmDiagnostics(namespace string) { out, _ := Output("helm", "list", "--namespace", namespace) diff --git a/test/e2e-helm/helm_upgrade_test.go b/test/e2e-helm/helm_upgrade_test.go deleted file mode 100644 index c89d545bf..000000000 --- a/test/e2e-helm/helm_upgrade_test.go +++ /dev/null @@ -1,246 +0,0 @@ -/* -Copyright 2026. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package helm - -import ( - . "github.com/emqx/emqx-operator/test/util" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - appsv1 "k8s.io/api/apps/v1" - batchv1 "k8s.io/api/batch/v1" - apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" -) - -const ( - // v22xChartVersion is the 2.2.x chart version from the emqx Helm repo. - v22xChartVersion = "2.2.29" -) - -// helmInstall22x installs the 2.2.x operator chart into the given namespace. -// Does not use --wait because the deployment won't become ready without cert-manager. -func helmInstall22x(namespace string) error { - return Run("helm", "install", - helmReleaseName, - "emqx/emqx-operator", - "--version", v22xChartVersion, - "--namespace", namespace, - "--set", "cert-manager.enable=false", - ) -} - -// cleanup22x removes all resources that 2.2.x chart may have left behind. -func cleanup22x() { - _ = Kubectl("delete", "mutatingwebhookconfiguration", - "emqx-operator-mutating-webhook-configuration", "--ignore-not-found") - _ = Kubectl("delete", "validatingwebhookconfiguration", - "emqx-operator-validating-webhook-configuration", "--ignore-not-found") - _ = Kubectl("delete", "crd", "emqxbrokers.apps.emqx.io", "--ignore-not-found") - _ = Kubectl("delete", "crd", "emqxenterprises.apps.emqx.io", "--ignore-not-found") - _ = Kubectl("delete", "crd", "emqxplugins.apps.emqx.io", "--ignore-not-found") - _ = Kubectl("delete", "ns", "emqx-operator-system", "--ignore-not-found") -} - -//nolint:errcheck -var _ = Describe("Helm Upgrade / 2.2.x", Ordered, func() { - - const namespace = "emqx-operator-helm" - - BeforeAll(func() { - By("clean up any leftover resources from previous test runs") - helmCleanup(namespace) - cleanup22x() - - By("create test namespace") - Expect(Kubectl("create", "ns", namespace)).To(Succeed()) - }) - - AfterAll(func() { - helmCleanup(namespace) - cleanup22x() - }) - - AfterEach(func() { - if CurrentSpecReport().Failed() { - dumpHelmDiagnostics(namespace) - } - }) - - It("should install 2.2.x and upgrade to 2.3.x", func() { - By("install emqx-operator 2.2.x from Helm repo") - Expect(helmInstall22x(namespace)).To(Succeed()) - - By("verify 2.2.x deployment and all 5 CRDs exist") - Expect(Kubectl("get", "deployment", "emqx-operator-controller-manager", - "--namespace", namespace, - )).To(Succeed()) - for _, crd := range []string{ - "emqxes.apps.emqx.io", - "rebalances.apps.emqx.io", - "emqxbrokers.apps.emqx.io", - "emqxenterprises.apps.emqx.io", - "emqxplugins.apps.emqx.io", - } { - Expect(crdExists(crd)).To( - BeTrue(), - "%s CRD should exist after 2.2.x install", crd, - ) - } - - By("upgrade to 2.3.x chart with default values (cleanup enabled)") - Expect(helmUpgrade(namespace)).To(Succeed()) - - By("verify pre-upgrade completed successfully") - var jobList batchv1.JobList - Eventually(KubectlOut). - WithArguments("get", "jobs", "--namespace", namespace, "-o", "json"). - Should( - // Hook jobs may be cleaned up already (hook-succeeded policy), - // which is fine: it means they completed successfully. - BeUnmarshalledAs(&jobList, HaveField("Items", Or( - BeEmpty(), - ContainElement(And( - HaveField("Name", Equal("pre-upgrade")), - HaveField("Status.Succeeded", BeNumerically(">=", 1)), - )), - ))), - "pre-upgrade job should have completed successfully", - ) - - By("verify CRDs no longer have conversion webhooks") - for _, crdName := range []string{"emqxes.apps.emqx.io", "rebalances.apps.emqx.io"} { - var crd apiextv1.CustomResourceDefinition - Expect(KubectlOut("get", "crd", crdName, "-o", "json")).To( - BeUnmarshalledAs(&crd, Or( - HaveField("Spec.Conversion", BeNil()), - HaveField("Spec.Conversion.Strategy", Equal(apiextv1.NoneConverter)), - )), - "%s should have None or no conversion strategy after upgrade", crdName, - ) - } - - By("verify webhook configurations no longer exist") - Expect(resourceExists("mutatingwebhookconfiguration", - "emqx-operator-mutating-webhook-configuration")).To(BeFalse()) - Expect(resourceExists("validatingwebhookconfiguration", - "emqx-operator-validating-webhook-configuration")).To(BeFalse()) - - By("verify 2.3.x operator deployment is running") - var deployment appsv1.Deployment - Eventually(KubectlOut). - WithArguments("get", "deployment", - "emqx-operator-controller-manager", - "--namespace", namespace, - "-o", "json", - ). - Should(BeUnmarshalledAs(&deployment, - HaveField("Status.AvailableReplicas", BeNumerically(">=", 1)), - )) - - By("verify legacy CRDs were removed by Helm") - Expect(crdExists("emqxbrokers.apps.emqx.io")).To(BeFalse(), - "emqxbrokers CRD should be removed after upgrade") - Expect(crdExists("emqxenterprises.apps.emqx.io")).To(BeFalse(), - "emqxenterprises CRD should be removed after upgrade") - Expect(crdExists("emqxplugins.apps.emqx.io")).To(BeFalse(), - "emqxplugins CRD should be removed after upgrade") - }) -}) - -//nolint:errcheck -var _ = Describe("Helm Upgrade / 2.2.x + legacy CRs", Ordered, func() { - - const namespace = "emqx-operator-legacy" - - BeforeAll(func() { - By("clean up any leftover resources") - helmCleanup(namespace) - cleanup22x() - - By("create test namespace") - Expect(Kubectl("create", "ns", namespace)).To(Succeed()) - }) - - AfterAll(func() { - helmCleanup(namespace) - cleanup22x() - }) - - AfterEach(func() { - if CurrentSpecReport().Failed() { - dumpHelmDiagnostics(namespace) - out, _ := KubectlOut("logs", "--namespace", namespace, - "-l", "app.kubernetes.io/name=emqx-operator", - "--tail", "-1") - GinkgoWriter.Print("Pre-upgrade job logs:\n", out) - } - }) - - It("should block upgrade when legacy CRs exist", func() { - By("install 2.2.x operator") - Expect(helmInstall22x(namespace)).To(Succeed()) - - By("verify legacy CRD exists") - Expect(crdExists("emqxbrokers.apps.emqx.io")).To(BeTrue()) - - By("delete webhook configurations so we can create a CR without admission") - Expect(Kubectl("delete", "mutatingwebhookconfiguration", - "emqx-operator-mutating-webhook-configuration", "--ignore-not-found")).To(Succeed()) - Expect(Kubectl("delete", "validatingwebhookconfiguration", - "emqx-operator-validating-webhook-configuration", "--ignore-not-found")).To(Succeed()) - - By("patch emqxbrokers CRD to remove conversion webhook") - Expect(Kubectl("patch", "crd", "emqxbrokers.apps.emqx.io", - "--type", "json", - "--patch", `[{"op":"replace","path":"/spec/conversion","value":{"strategy":"None"}}]`, - )).To(Succeed()) - - By("create a minimal EmqxBroker CR") - brokerCR := []byte(`{ - "apiVersion": "apps.emqx.io/v1beta4", - "kind": "EmqxBroker", - "metadata": {"name": "test-broker", "namespace": "` + namespace + `"}, - "spec": {} - }`) - Expect(KubectlStdin(brokerCR, "apply", "-f", "-", "--namespace", namespace)). - To(Succeed()) - - By("attempt upgrade to 2.3.x") - Expect(helmUpgrade(namespace)).To(HaveOccurred(), - "upgrade should fail when legacy CRs exist") - - By("verify the Helm release is in failed state") - out, err := Output("helm", "list", "--namespace", namespace) - Expect(err).NotTo(HaveOccurred()) - Expect(out).To(ContainSubstring("failed"), - "release should be in failed state after blocked upgrade") - - By("delete the legacy CR so cleanup can proceed") - Expect(Kubectl("delete", "emqxbrokers", "test-broker", - "--namespace", namespace, "--ignore-not-found")).To(Succeed()) - - By("retry upgrade") - Expect(helmUpgrade(namespace)).To(Succeed()) - - By("verify 2.3.x operator is running") - Expect(Kubectl("wait", "deployment", - "emqx-operator-controller-manager", - "--for", "condition=Available", - "--namespace", namespace, - "--timeout", "1m", - )).To(Succeed()) - }) -}) diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 63027812b..e0fdac12f 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -167,7 +167,7 @@ func DumpDiagnosticReport(namespace string, name string, time time.Time) { return } - GinkgoWriter.Printf("Dumping diagnostic report: %s", path) + GinkgoWriter.Println("Dumping diagnostic report: ", path) controllerLogs, err := util.KubectlOut("logs", "--selector", "control-plane=controller-manager", @@ -176,7 +176,7 @@ func DumpDiagnosticReport(namespace string, name string, time time.Time) { if err == nil { _ = dumpString(path, "controller.log", controllerLogs) } else { - GinkgoWriter.Printf("Failed to get Controller logs: %s", err) + GinkgoWriter.Println("Failed to get Controller logs:", err) } operatorManagedLabel := "apps.emqx.io/managed-by=emqx-operator" @@ -184,42 +184,61 @@ func DumpDiagnosticReport(namespace string, name string, time time.Time) { if err == nil { _ = dumpString(path, "resources", resources) } else { - GinkgoWriter.Printf("Failed to list managed resources: %s", err) + GinkgoWriter.Println("Failed to list managed resources:", err) } emqxCRs, err := util.KubectlOut("get", "emqx", "--output", "yaml") if err == nil { _ = dumpString(path, "emqx-crs.yaml", emqxCRs) } else { - GinkgoWriter.Printf("Failed to list EMQX CRs: %s", err) + GinkgoWriter.Println("Failed to list EMQX CRs:", err) } emqxPods, err := util.KubectlOut("get", "pod", "--selector", operatorManagedLabel, "-o", "yaml") if err == nil { _ = dumpString(path, "emqx-pods.yaml", emqxPods) } else { - GinkgoWriter.Printf("Failed to list EMQX pods: %s", err) + GinkgoWriter.Println("Failed to list EMQX pods:", err) + } + + emqxStatefulSets, err := util.KubectlOut("get", "statefulset", "--selector", operatorManagedLabel, "-o", "yaml") + if err == nil { + _ = dumpString(path, "emqx-statefulsets.yaml", emqxStatefulSets) + } else { + GinkgoWriter.Println("Failed to list EMQX StatefulSets:", err) + } + + emqxReplicaSets, err := util.KubectlOut("get", "replicaset", "--selector", operatorManagedLabel, "-o", "yaml") + if err == nil { + _ = dumpString(path, "emqx-replicasets.yaml", emqxReplicaSets) + } else { + GinkgoWriter.Println("Failed to list EMQX ReplicaSets:", err) } var podList corev1.PodList err = json.Unmarshal(util.FromYAML([]byte(emqxPods)), &podList) if err != nil { - GinkgoWriter.Printf("Failed to unmarshal EMQX pods: %s", err) + GinkgoWriter.Println("Failed to unmarshal EMQX pods:", err) } for _, pod := range podList.Items { logs, err := util.KubectlOut("logs", pod.Name, "--tail", "-1") if err == nil { _ = dumpString(path, pod.Name+".log", logs) } else { - GinkgoWriter.Printf("Failed to get logs for pod %s: %s", pod.Name, err) + GinkgoWriter.Println("Failed to get logs for pod: %s", pod.Name, err) } } + dsInfo, _ := util.KubectlOut("exec", "service/emqx-listeners", "--", "emqx", "ctl", "ds", "info") + if dsInfo != "" { + _ = dumpString(path, "emqx.ctl.ds-info", dsInfo) + } + events, err := util.KubectlOut("get", "events", "--sort-by=.lastTimestamp") if err == nil { _ = dumpString(path, "events", events) } else { - GinkgoWriter.Printf("Failed to get Kubernetes events: %s", err) + GinkgoWriter.Println("Failed to get Kubernetes events:", err) } } diff --git a/test/e2e/emqx.go b/test/e2e/emqx.go index 4b4426eef..a88f7e330 100644 --- a/test/e2e/emqx.go +++ b/test/e2e/emqx.go @@ -1,7 +1,7 @@ package e2e import ( - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" . "github.com/emqx/emqx-operator/test/util" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" @@ -25,8 +25,10 @@ func checkEMQXReady(g Gomega, afterTime ...metav1.Time) { } func checkEMQXStatus(g Gomega, coreReplicas int) { + var status crd.CoreNodesStatus + var nodes []crd.EMQXNode var podList corev1.PodList - var status crdv2.EMQXNodesStatus + var pvcList corev1.PersistentVolumeClaimList g.Expect(KubectlOut("get", "pod", "--selector", "apps.emqx.io/instance=emqx,apps.emqx.io/managed-by=emqx-operator", "-o", "json", @@ -44,65 +46,85 @@ func checkEMQXStatus(g Gomega, coreReplicas int) { To(UnmarshalInto(&status), "Failed to get EMQX status") g.Expect(status).To( And( - HaveField("Replicas", BeEquivalentTo(coreReplicas)), HaveField("ReadyReplicas", BeEquivalentTo(coreReplicas)), - HaveField("CurrentReplicas", BeEquivalentTo(coreReplicas)), - HaveField("UpdateReplicas", BeEquivalentTo(coreReplicas)), + HaveField("UpdatedReplicas", BeEquivalentTo(coreReplicas)), ), "EMQX status does not have expected number of core nodes", ) - checkNodesStatusRevision(g, status, "core", coreReplicas) + g.Expect(KubectlOut("get", "emqx", "emqx", "-o", "jsonpath={.status.coreNodes}")). + To(UnmarshalInto(&nodes), "Failed to get EMQX cluster nodes") + g.Expect(nodes).To( + HaveEach(HaveField("Status", Equal("running"))), + "EMQX cluster contains stopped nodes", + ) + g.Expect(nodes).To( + HaveEach(HaveField("PodName", Not(BeEmpty()))), + "EMQX cluster contains nodes without pods", + ) + g.Expect(KubectlOut("get", "pvc", + "--selector", crd.LabelDBRole+"=core,"+crd.LabelManagedBy+"=emqx-operator", + "-o", "json", + )).To(UnmarshalInto(&pvcList), "Failed to list core PVCs") + g.Expect(pvcList.Items).To( + HaveLen(coreReplicas), + "Expected %d core PVCs", coreReplicas, + ) + g.Expect(pvcList.Items).To( + HaveEach(HaveField("Status.Phase", Equal(corev1.ClaimBound))), + "Not all core PVCs are bound", + ) } func checkNoReplicants(g Gomega) { - g.Expect(KubectlOut("get", "emqx", "emqx", "-o", "jsonpath={.status.replicantNodesStatus}")). - To(Equal("{}"), "EMQX cluster status has replicant nodes status") - g.Expect(KubectlOut("get", "emqx", "emqx", "-o", "jsonpath={.status.replicantNodes}")). - To(BeEmpty(), "EMQX cluster status lists replicant nodes") + g.Expect(KubectlOut("get", "emqx", "emqx", + "-o", "jsonpath={.status.replicantNodesStatus.currentReplicas}", + )).To(Equal("0"), "EMQX cluster status has replicant replicas") + g.Expect(KubectlOut("get", "emqx", "emqx", + "-o", "jsonpath={.status.replicantNodes}", + )).To(BeEmpty(), "EMQX cluster status lists replicant nodes") } func checkReplicantStatus(g Gomega, replicantReplicas int) { - var status crdv2.EMQXNodesStatus + var status crd.ReplicantNodesStatus g.Expect(KubectlOut("get", "emqx", "emqx", "-o", "jsonpath={.status.replicantNodesStatus}")). To(UnmarshalInto(&status), "Failed to get EMQX replicant nodes status") g.Expect(status).To( And( - HaveField("Replicas", BeEquivalentTo(replicantReplicas)), HaveField("ReadyReplicas", BeEquivalentTo(replicantReplicas)), HaveField("CurrentReplicas", BeEquivalentTo(replicantReplicas)), HaveField("UpdateReplicas", BeEquivalentTo(replicantReplicas)), ), "EMQX status does not have expected number of replicant nodes", ) - checkNodesStatusRevision(g, status, "replicant", replicantReplicas) + checkReplicantNodesStatusRevision(g, status, replicantReplicas) } -func checkNodesStatusRevision(g Gomega, status crdv2.EMQXNodesStatus, role string, replicas int) { +func checkReplicantNodesStatusRevision(g Gomega, status crd.ReplicantNodesStatus, replicas int) { var podList corev1.PodList g.Expect(status).To( And( HaveField("CurrentRevision", Not(BeEmpty())), HaveField("UpdateRevision", Not(BeEmpty())), ), - "EMQX %s nodes status does not have expected revision", role, + "EMQX replicant nodes status does not have expected revision", ) g.Expect(status.CurrentRevision).To( Equal(status.UpdateRevision), - "EMQX %s nodes current and update revisions are different", role, + "EMQX replicant nodes current and update revisions are different", ) g.Expect(KubectlOut("get", "pods", - "--selector", crdv2.LabelPodTemplateHash+"="+status.CurrentRevision, + "--selector", crd.LabelPodTemplateHash+"="+status.CurrentRevision, "--field-selector", "status.phase==Running", "-o", "json", - )).To(UnmarshalInto(&podList), "Failed to list %s pods", role) + )).To(UnmarshalInto(&podList), "Failed to list replicant pods") g.Expect(podList.Items).To( HaveLen(replicas), - "EMQX cluster does not have %d current revision %s pods", replicas, role, + "EMQX cluster does not have %d current revision replicant pods", replicas, ) } func checkDSReplicationStatus(g Gomega, coreReplicas int) { - status := &crdv2.DSReplicationStatus{} + status := &crd.DSReplicationStatus{} replicationFactor := min(3, coreReplicas) g.Expect(KubectlOut("get", "emqx", "emqx", "-o", "jsonpath={.status.dsReplication}")). To(UnmarshalInto(&status), "Failed to get emqx status") @@ -112,7 +134,7 @@ func checkDSReplicationStatus(g Gomega, coreReplicas int) { HaveField("Name", Not(BeEmpty())), HaveField("NumShards", Not(BeZero())), HaveField("NumShardReplicas", Not(BeZero())), - Satisfy(func(db crdv2.DSDBReplicationStatus) bool { + Satisfy(func(db crd.DSDBReplicationStatus) bool { return db.NumShardReplicas == db.NumShards*int32(replicationFactor) }), HaveField("LostShardReplicas", BeEquivalentTo(0)), diff --git a/test/e2e/emqx_test.go b/test/e2e/emqx_test.go index 566860879..12beb3c48 100644 --- a/test/e2e/emqx_test.go +++ b/test/e2e/emqx_test.go @@ -6,7 +6,7 @@ import ( "slices" "strings" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" . "github.com/emqx/emqx-operator/test/util" "github.com/lithammer/dedent" . "github.com/onsi/ginkgo/v2" @@ -25,7 +25,7 @@ func withCores(numReplicas int) []byte { func withReplicants(numReplicas int) []byte { return fmt.Appendf(nil, - `{"spec": {"replicantTemplate": {"spec": {"replicas": %d}}}}`, + `{"spec": {"replicantTemplate": {"spec": {"minReadySeconds": 3, "replicas": %d}}}}`, numReplicas, ) } @@ -145,18 +145,36 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { }) It("scale cluster up", func() { - coreReplicas = 3 + coreReplicas = 4 scaleupStartedAt := metav1.Now() Expect(Kubectl("patch", "emqx", "emqx", "--type", "json", - "--patch", `[{"op": "replace", "path": "/spec/coreTemplate/spec/replicas", "value": 3}]`, + "--patch", fmt.Sprintf( + `[{"op": "replace", "path": "/spec/coreTemplate/spec/replicas", "value": %d}]`, + coreReplicas, + ), )).To(Succeed(), "Failed to scale up EMQX cluster") Eventually(checkEMQXReady).WithArguments(scaleupStartedAt).Should(Succeed()) Eventually(checkEMQXStatus).WithArguments(coreReplicas).Should(Succeed()) checkNoReplicants(Default) }) - It("change image to trigger blue-green update", func() { + It("scale cluster down", func() { + coreReplicas = 3 + scaledownStartedAt := metav1.Now() + Expect(Kubectl("patch", "emqx", "emqx", + "--type", "json", + "--patch", fmt.Sprintf( + `[{"op": "replace", "path": "/spec/coreTemplate/spec/replicas", "value": %d}]`, + coreReplicas, + ), + )).To(Succeed(), "Failed to scale down EMQX cluster") + Eventually(checkEMQXReady).WithArguments(scaledownStartedAt).Should(Succeed()) + Eventually(checkEMQXStatus).WithArguments(coreReplicas).Should(Succeed()) + checkNoReplicants(Default) + }) + + It("change image to trigger rolling update", func() { By("create MQTTX client") Expect(Kubectl("apply", "-f", "test/e2e/files/resources/mqttx.yaml")).To(Succeed()) defer Kubectl("delete", "-f", "test/e2e/files/resources/mqttx.yaml") @@ -166,15 +184,14 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { "--timeout=1m", )).To(Succeed(), "Timed out waiting MQTTX to be ready") - By("fetch current core StatefulSet") - var stsList appsv1.StatefulSetList - coreRev, err := KubectlOut("get", "emqx", "emqx", "-o", "jsonpath={.status.coreNodesStatus.currentRevision}") - Expect(err).NotTo(HaveOccurred(), "Failed to get EMQX status") + By("fetch core StatefulSet") + var stsListBefore appsv1.StatefulSetList Expect(KubectlOut("get", "statefulset", - "--selector", crdv2.LabelPodTemplateHash+"="+coreRev, + "--selector", crd.LabelDBRole+"=core", "-o", "json", - )).To(UnmarshalInto(&stsList), "Failed to list statefulsets") - Expect(stsList.Items).To(HaveLen(1)) + )).To(UnmarshalInto(&stsListBefore)) + Expect(stsListBefore.Items).To(HaveLen(1), "More than one core StatefulSet") + stsBefore := stsListBefore.Items[0].DeepCopy() By("change EMQX image") changedAt := metav1.Now() @@ -185,17 +202,27 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { By("check EMQX cluster node evacuations status") Eventually(KubectlOut). - WithArguments("get", "emqx", "emqx", "-o", "jsonpath={.status.nodeEvacuationsStatus}"). - ShouldNot(ContainSubstring("connection_eviction_rate")) + WithArguments("get", "emqx", "emqx", "-o", "jsonpath={.status.nodeEvacuations}"). + Should(BeEmpty()) Eventually(checkEMQXReady).WithArguments(changedAt).Should(Succeed()) Eventually(checkEMQXStatus).WithArguments(coreReplicas).Should(Succeed()) checkNoReplicants(Default) - By("check previous core StatefulSet has been scaled down to 0") - out, err := KubectlOut("get", "statefulset", stsList.Items[0].Name, "-o", "jsonpath={.status.replicas}") - Expect(err).NotTo(HaveOccurred(), "Failed to get core StatefulSet replicas") - Expect(out).To(Equal("0")) + By("verify exactly one core StatefulSet was updated") + var stsList appsv1.StatefulSetList + Expect(KubectlOut("get", "statefulset", + "--selector", crd.LabelDBRole+"=core", + "-o", "json", + )).To(UnmarshalInto(&stsList)) + + Expect(stsList.Items).To( + ConsistOf(And( + HaveField("ObjectMeta.Name", Equal(stsBefore.ObjectMeta.Name)), + HaveField("Status.UpdateRevision", Not(Equal(stsBefore.Status.UpdateRevision))), + )), + "Unexpected set of core StatefulSets", + ) }) It("change config", func() { @@ -243,7 +270,7 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { }) }) - Context("EMQX Cluster / Botched Blue-Green Updates", func() { + Context("EMQX Cluster / Botched Rolling Updates", func() { // Initial number of core replicas: var coreReplicas int = 2 @@ -261,7 +288,7 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { Eventually(checkEMQXStatus).WithArguments(coreReplicas).Should(Succeed()) }) - It("trigger botched blue-green updates", func() { + It("trigger botched rolling updates", func() { By("create MQTT workload") Expect(Kubectl("apply", "-f", "test/e2e/files/resources/mqttx.yaml")).To(Succeed()) defer Kubectl("delete", "-f", "test/e2e/files/resources/mqttx.yaml") @@ -271,34 +298,21 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { "--timeout=1m", )).To(Succeed(), "Timed out waiting MQTTX to be ready") - By("decrease revision history limit") - Expect(Kubectl("patch", "emqx", "emqx", - "--type", "json", - "--patch", `[ - {"op": "replace", "path": "/spec/revisionHistoryLimit", "value": 1} - ]`)). - To(Succeed()) - By("lookup initial EMQX status") - var statusInitial crdv2.EMQXNodesStatus + var statusInitial crd.CoreNodesStatus Eventually(checkEMQXReady).Should(Succeed()) Expect(KubectlOut("get", "emqx", "emqx", "-o", "jsonpath={.status.coreNodesStatus}")). To(UnmarshalInto(&statusInitial)) By("specify incorrect EMQX image") - coreReplicas = 1 changedAt1 := metav1.Now() Expect(Kubectl("patch", "emqx", "emqx", "--type", "json", "--patch", `[ - {"op": "replace", "path": "/spec/coreTemplate/spec/replicas", "value": 1}, {"op": "replace", "path": "/spec/image", "value": "emqx/emqx:5.Y.ZZZ"} ]`)). To(Succeed()) Consistently(checkEMQXReady, "30s", "3s").WithArguments(changedAt1).Should(Not(Succeed())) - var status1 crdv2.EMQXNodesStatus - Expect(KubectlOut("get", "emqx", "emqx", "-o", "jsonpath={.status.coreNodesStatus}")). - To(UnmarshalInto(&status1)) By("specify broken EMQX config") changedAt2 := metav1.Now() @@ -311,18 +325,15 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { ]`)). To(Succeed()) Consistently(checkEMQXReady, "30s", "3s").WithArguments(changedAt2).Should(Not(Succeed())) - var status2 crdv2.EMQXNodesStatus - Expect(KubectlOut("get", "emqx", "emqx", "-o", "jsonpath={.status.coreNodesStatus}")). - To(UnmarshalInto(&status2)) - By("verify current sets have not changed") - var status crdv2.EMQXNodesStatus + var status crd.CoreNodesStatus Expect(KubectlOut("get", "emqx", "emqx", "-o", "jsonpath={.status.coreNodesStatus}")). - To(BeUnmarshalledAs(&status, And( - HaveField("CurrentRevision", Equal(statusInitial.CurrentRevision)), - HaveField("CurrentReplicas", Equal(statusInitial.CurrentReplicas)), - HaveField("ReadyReplicas", Equal(statusInitial.CurrentReplicas)), - ))) + To( + BeUnmarshalledAs(&status, + HaveField("ReadyReplicas", Equal(statusInitial.ReadyReplicas-1)), + ), + "no more than 1 replica went unavailable", + ) By("specify correct EMQX config") changedAt3 := metav1.Now() @@ -336,21 +347,6 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { To(Succeed()) Eventually(checkEMQXReady).WithArguments(changedAt3).Should(Succeed()) Eventually(checkEMQXStatus).WithArguments(coreReplicas).Should(Succeed()) - - var stsList appsv1.StatefulSetList - Eventually(KubectlOut).WithArguments("get", "statefulset", - "--selector", crdv2.LabelInstance+"=emqx", - "-o", "json", - ).Should(BeUnmarshalledAs(&stsList, HaveField("Items", - // Current (same as update) + 1 outdated - HaveLen(2), - ))) - Expect(stsList.Items).To(And( - // First botched coreSet should be cleaned - Not(ContainElement(HaveLabel(crdv2.LabelPodTemplateHash, Equal(status1.UpdateRevision)))), - // Second botched coreSet should be preserved as part of revision history - ContainElement(HaveLabel(crdv2.LabelPodTemplateHash, Equal(status2.UpdateRevision))), - )) }) It("delete cluster", func() { @@ -360,7 +356,7 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { Context("EMQX Core-Replicant Cluster", func() { // Initial number of core and replicant replicas: - var coreReplicas int = 1 + var coreReplicas int = 2 var replicantReplicas int = 2 It("deploy cluster", func() { @@ -380,13 +376,13 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { }) It("scale cluster up", func() { - coreReplicas = 2 + coreReplicas = 3 replicantReplicas = 3 scaleupStartedAt := metav1.Now() By("change number of core replicas") Expect(Kubectl("patch", "emqx", "emqx", "--type", "json", - "--patch", `[{"op": "replace", "path": "/spec/coreTemplate/spec/replicas", "value": 2}]`)). + "--patch", `[{"op": "replace", "path": "/spec/coreTemplate/spec/replicas", "value": 3}]`)). To(Succeed(), "Failed to scale emqx cluster") By("change number of replicant replicas") Expect(Kubectl("patch", "emqx", "emqx", @@ -399,7 +395,27 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { Eventually(checkReplicantStatus).WithArguments(replicantReplicas).Should(Succeed()) }) - It("change image for target blue-green update", func() { + It("scale cluster down", func() { + coreReplicas = 2 + replicantReplicas = 2 + scaledownStartedAt := metav1.Now() + By("change number of core replicas") + Expect(Kubectl("patch", "emqx", "emqx", + "--type", "json", + "--patch", `[{"op": "replace", "path": "/spec/coreTemplate/spec/replicas", "value": 2}]`)). + To(Succeed(), "Failed to scale emqx cluster") + By("change number of replicant replicas") + Expect(Kubectl("patch", "emqx", "emqx", + "--type", "json", + "--patch", `[{"op": "replace", "path": "/spec/replicantTemplate/spec/replicas", "value": 2}]`)). + To(Succeed(), "Failed to scale emqx cluster") + By("wait for EMQX cluster to be ready after scaling") + Eventually(checkEMQXReady).WithArguments(scaledownStartedAt).Should(Succeed()) + Eventually(checkEMQXStatus).WithArguments(coreReplicas).Should(Succeed()) + Eventually(checkReplicantStatus).WithArguments(replicantReplicas).Should(Succeed()) + }) + + It("change image to trigger rolling update", func() { By("create MQTTX client") Expect(Kubectl("apply", "-f", "test/e2e/files/resources/mqttx.yaml")).To(Succeed()) defer Kubectl("delete", "-f", "test/e2e/files/resources/mqttx.yaml") @@ -409,22 +425,20 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { "--timeout=1m", )).To(Succeed(), "Timed out waiting for MQTTX to be ready") - By("fetch current core StatefulSet") - var stsList appsv1.StatefulSetList - coreRev, err := KubectlOut("get", "emqx", "emqx", "-o", "jsonpath={.status.coreNodesStatus.currentRevision}") - Expect(err).NotTo(HaveOccurred(), "Failed to get EMQX status") + By("fetch core StatefulSet") + var stsListBefore appsv1.StatefulSetList Expect(KubectlOut("get", "statefulset", - "--selector", crdv2.LabelPodTemplateHash+"="+coreRev, + "--selector", crd.LabelDBRole+"=core", "-o", "json", - )).To(UnmarshalInto(&stsList), "Failed to list statefulsets") - Expect(stsList.Items).To(HaveLen(1)) + )).To(UnmarshalInto(&stsListBefore)) + Expect(stsListBefore.Items).To(HaveLen(1), "More than one core StatefulSet") By("fetch current replicant ReplicaSet") var rsList appsv1.ReplicaSetList replRev, err := KubectlOut("get", "emqx", "emqx", "-o", "jsonpath={.status.replicantNodesStatus.currentRevision}") Expect(err).NotTo(HaveOccurred(), "Failed to get EMQX status") Expect(KubectlOut("get", "replicaset", - "--selector", crdv2.LabelPodTemplateHash+"="+replRev, + "--selector", crd.LabelPodTemplateHash+"="+replRev, "-o", "json", )).To(UnmarshalInto(&rsList), "Failed to list replicasets") Expect(rsList.Items).To(HaveLen(1)) @@ -438,7 +452,7 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { By("check EMQX cluster node evacuations status") Eventually(KubectlOut). - WithArguments("get", "emqx", "emqx", "-o", "jsonpath={.status.nodeEvacuationsStatus}"). + WithArguments("get", "emqx", "emqx", "-o", "jsonpath={.status.nodeEvacuations}"). ShouldNot(ContainSubstring("connection_eviction_rate")) By("wait for EMQX cluster to be ready again") @@ -446,15 +460,26 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { Eventually(checkEMQXStatus).WithArguments(coreReplicas).Should(Succeed()) Eventually(checkReplicantStatus).WithArguments(replicantReplicas).Should(Succeed()) - By("check previous coreSet has been scaled down to 0") - out, err := KubectlOut("get", "statefulset", stsList.Items[0].Name, "-o", "jsonpath={.status.replicas}") - Expect(err).NotTo(HaveOccurred(), "Failed to get core StatefulSet") - Expect(out).To(Equal("0")) + By("verify exactly one core StatefulSet was updated") + var stsList appsv1.StatefulSetList + Expect(KubectlOut("get", "statefulset", + "--selector", crd.LabelDBRole+"=core", + "-o", "json", + )).To(UnmarshalInto(&stsList)) + + stsBefore := stsListBefore.Items[0] + Expect(stsList.Items).To( + ConsistOf(And( + HaveField("ObjectMeta.Name", Equal(stsBefore.ObjectMeta.Name)), + HaveField("Status.UpdateRevision", Not(Equal(stsBefore.Status.UpdateRevision))), + )), + "Unexpected set of core StatefulSets", + ) By("check previous replicantSet has been scaled down to 0") - out, err = KubectlOut("get", "replicaset", rsList.Items[0].Name, "-o", "jsonpath={.status.replicas}") - Expect(err).NotTo(HaveOccurred(), "Failed to get replicant ReplicaSet") - Expect(out).To(Equal("0")) + Expect(KubectlOut("get", "replicaset", rsList.Items[0].Name, + "-o", "jsonpath={.status.replicas}", + )).To(Equal("0")) }) It("delete cluster", func() { @@ -489,20 +514,20 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { By("verify EMQX pods have relevant conditions") var pods corev1.PodList Expect(KubectlOut("get", "pods", - "--selector", crdv2.LabelManagedBy+"=emqx-operator", + "--selector", crd.LabelManagedBy+"=emqx-operator", "-o", "json", )).To(UnmarshalInto(&pods), "Failed to list EMQX pods") Expect(pods.Items).To(HaveLen(4), "EMQX cluster does not have 4 pods") for _, pod := range pods.Items { - if pod.Labels[crdv2.LabelDBRole] == "core" { + if pod.Labels[crd.LabelDBRole] == "core" { Expect(pod.Status.Conditions).To(ContainElement(And( - HaveField("Type", Equal(crdv2.DSReplicationSite)), + HaveField("Type", Equal(crd.DSReplicationSite)), HaveField("Status", Equal(corev1.ConditionTrue)), ))) } - if pod.Labels[crdv2.LabelDBRole] == "replicant" { + if pod.Labels[crd.LabelDBRole] == "replicant" { Expect(pod.Status.Conditions).To(ContainElement(And( - HaveField("Type", Equal(crdv2.DSReplicationSite)), + HaveField("Type", Equal(crd.DSReplicationSite)), HaveField("Status", Equal(corev1.ConditionFalse)), ))) } @@ -542,15 +567,14 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { // Eventually(checkDSReplicationHealthy).Should(Succeed()) }) - It("perform a blue-green update", func() { - By("fetch current core StatefulSet") - var stsList appsv1.StatefulSetList - coreRev, err := KubectlOut("get", "emqx", "emqx", "-o", "jsonpath={.status.coreNodesStatus.currentRevision}") - Expect(err).NotTo(HaveOccurred(), "Failed to get EMQX status") + It("perform a rolling update", func() { + By("fetch core StatefulSet") + var stsListBefore appsv1.StatefulSetList Expect(KubectlOut("get", "statefulset", - "--selector", crdv2.LabelPodTemplateHash+"="+coreRev, + "--selector", crd.LabelDBRole+"=core", "-o", "json", - )).To(UnmarshalInto(&stsList), "Failed to list statefulSets") + )).To(UnmarshalInto(&stsListBefore)) + Expect(stsListBefore.Items).To(HaveLen(1), "More than one core StatefulSet") By("change EMQX image + number of replicas") coreReplicas = 2 @@ -563,19 +587,26 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { ]`, )).To(Succeed()) - By("check new core StatefulSet is spinning up") - Eventually(KubectlOut). - WithArguments("get", "emqx", "emqx", "-o", "jsonpath={.status.coreNodesStatus.updateRevision}"). - ShouldNot(Equal(coreRev), "New StatefulSet has not been spun up") - By("wait for EMQX cluster to be ready again") Eventually(checkEMQXReady).WithArguments(changedAt).Should(Succeed()) Eventually(checkEMQXStatus).WithArguments(coreReplicas).Should(Succeed()) Eventually(checkReplicantStatus).WithArguments(replicantReplicas).Should(Succeed()) - By("check previous coreSet has been scaled down to 0") - Expect(KubectlOut("get", "statefulset", stsList.Items[0].Name, "-o", "jsonpath={.status.replicas}")). - To(Equal("0")) + By("verify exactly one core StatefulSet was updated") + var stsList appsv1.StatefulSetList + Expect(KubectlOut("get", "statefulset", + "--selector", crd.LabelDBRole+"=core", + "-o", "json", + )).To(UnmarshalInto(&stsList)) + + stsBefore := stsListBefore.Items[0] + Expect(stsList.Items).To( + ConsistOf(And( + HaveField("ObjectMeta.Name", Equal(stsBefore.ObjectMeta.Name)), + HaveField("Status.UpdateRevision", Not(Equal(stsBefore.Status.UpdateRevision))), + )), + "Unexpected set of core StatefulSets", + ) By("wait for DS replication status to be stable") Eventually(checkDSReplicationStatus).WithArguments(coreReplicas).Should(Succeed()) @@ -592,9 +623,11 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { Context("EMQX Core-Replicant Cluster / Runtime-enabled DS Replication", func() { // Initial number of core and replicant replicas: - var coreReplicas int = 1 + var coreReplicas int = 2 var replicantReplicas int = 2 + const emqxImage = "emqx/emqx:5.10.2" + It("deploy core-replicant EMQX cluster", func() { By("create EMQX cluster") emqxCR := PatchDocument( @@ -613,16 +646,14 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { }) It("enable DS replication", func() { - By("change config + add label to trigger new deployment") + By("change config + add label to trigger rolling update") configDs := string(intoJsonString(configDS())) - coreReplicas = 2 changedAt := metav1.Now() Expect(Kubectl("patch", "emqx", "emqx", "--type", "json", "--patch", `[ {"op": "replace", "path": "/spec/config/data", "value": `+configDs+`}, - {"op": "add", "path": "/spec/coreTemplate/metadata/labels", "value": {"e2e/ds-replication": "true"}}, - {"op": "replace", "path": "/spec/coreTemplate/spec/replicas", "value": 2} + {"op": "add", "path": "/spec/coreTemplate/metadata/labels", "value": {"e2e/ds-replication": "true"}} ]`, )).To(Succeed()) @@ -637,20 +668,20 @@ var _ = Describe("EMQX Test", Label("emqx"), Ordered, func() { By("verify EMQX pods have relevant conditions") var pods corev1.PodList Expect(KubectlOut("get", "pods", - "--selector", crdv2.LabelManagedBy+"=emqx-operator", + "--selector", crd.LabelManagedBy+"=emqx-operator", "-o", "json", )).To(UnmarshalInto(&pods), "Failed to list EMQX pods") Expect(pods.Items).To(HaveLen(4)) for _, pod := range pods.Items { - if pod.Labels[crdv2.LabelDBRole] == "core" { + if pod.Labels[crd.LabelDBRole] == "core" { Expect(pod.Status.Conditions).To(ContainElement(And( - HaveField("Type", Equal(crdv2.DSReplicationSite)), + HaveField("Type", Equal(crd.DSReplicationSite)), HaveField("Status", Equal(corev1.ConditionTrue)), ))) } - if pod.Labels[crdv2.LabelDBRole] == "replicant" { + if pod.Labels[crd.LabelDBRole] == "replicant" { Expect(pod.Status.Conditions).To(ContainElement(And( - HaveField("Type", Equal(crdv2.DSReplicationSite)), + HaveField("Type", Equal(crd.DSReplicationSite)), HaveField("Status", Equal(corev1.ConditionFalse)), ))) } diff --git a/test/e2e/files/resources/emqx.yaml b/test/e2e/files/resources/emqx.yaml index 09548c81a..8d5321726 100644 --- a/test/e2e/files/resources/emqx.yaml +++ b/test/e2e/files/resources/emqx.yaml @@ -1,4 +1,4 @@ -apiVersion: apps.emqx.io/v2 +apiVersion: apps.emqx.io/v3alpha1 kind: EMQX metadata: name: emqx @@ -8,7 +8,10 @@ spec: config: data: | license { key = "evaluation" } + coreTemplate: + spec: + minReadySeconds: 3 updateStrategy: evacuationStrategy: waitTakeover: 5 # seconds - waitHealthCheck: 10 # seconds + waitHealthCheck: 5 # seconds diff --git a/test/e2e/rebalance_test.go b/test/e2e/rebalance_test.go index 28ea6d045..0c67364b3 100644 --- a/test/e2e/rebalance_test.go +++ b/test/e2e/rebalance_test.go @@ -3,7 +3,7 @@ package e2e import ( "fmt" - appsv2beta1 "github.com/emqx/emqx-operator/api/v2beta1" + crd "github.com/emqx/emqx-operator/api/v2beta1" . "github.com/emqx/emqx-operator/test/util" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -11,8 +11,11 @@ import ( corev1 "k8s.io/api/core/v1" ) +// NOTE: Rebalance controller is disabled in this release. See api/v3alpha1/rebalance_types.go. + //nolint:errcheck -var _ = Describe("Rebalance Test", Label("rebalance"), Ordered, func() { +var _ = Describe("Rebalance Test", Label("rebalance"), Ordered, Pending, func() { + BeforeAll(func() { By("create manager namespace") Expect(Kubectl("create", "ns", namespace)).To(Succeed()) @@ -56,10 +59,10 @@ var _ = Describe("Rebalance Test", Label("rebalance"), Ordered, func() { By("wait for Rebalance to be failed") Eventually(RebalanceStatus).Should( And( - HaveField("Phase", Equal(appsv2beta1.RebalancePhaseFailed)), + HaveField("Phase", Equal(crd.RebalancePhaseFailed)), HaveField("RebalanceStates", BeEmpty()), HaveRebalanceCondition( - appsv2beta1.RebalanceConditionFailed, + crd.RebalanceConditionFailed, HaveField("Status", Equal(corev1.ConditionTrue)), ), ), @@ -80,10 +83,10 @@ var _ = Describe("Rebalance Test", Label("rebalance"), Ordered, func() { By("wait for Rebalance to become failed") Eventually(RebalanceStatus).Should(And( - HaveField("Phase", Equal(appsv2beta1.RebalancePhaseFailed)), + HaveField("Phase", Equal(crd.RebalancePhaseFailed)), HaveField("RebalanceStates", BeEmpty()), HaveRebalanceCondition( - appsv2beta1.RebalanceConditionFailed, + crd.RebalanceConditionFailed, HaveField("Status", Equal(corev1.ConditionTrue)), ), )) @@ -120,21 +123,21 @@ var _ = Describe("Rebalance Test", Label("rebalance"), Ordered, func() { By("check Rebalance CR state") Eventually(RebalanceStatus).Should(And( - HaveField("Phase", Equal(appsv2beta1.RebalancePhaseProcessing)), + HaveField("Phase", Equal(crd.RebalancePhaseProcessing)), HaveField("RebalanceStates", Not(BeEmpty())), )) Eventually(RebalanceStatus).Should(And( - HaveField("Phase", Equal(appsv2beta1.RebalancePhaseCompleted)), + HaveField("Phase", Equal(crd.RebalancePhaseCompleted)), HaveRebalanceCondition( - appsv2beta1.RebalanceConditionCompleted, + crd.RebalanceConditionCompleted, HaveField("Status", Equal(corev1.ConditionTrue)), ), )) }) }) -func RebalanceStatus(g Gomega) appsv2beta1.RebalanceStatus { - var status appsv2beta1.RebalanceStatus +func RebalanceStatus(g Gomega) crd.RebalanceStatus { + var status crd.RebalanceStatus out, err := KubectlOut("get", "rebalance", "rebalance", "-o", "jsonpath={.status}") g.Expect(err).NotTo(HaveOccurred(), "Failed to get rebalance status") g.Expect(out).To(UnmarshalInto(&status)) @@ -142,11 +145,11 @@ func RebalanceStatus(g Gomega) appsv2beta1.RebalanceStatus { } func HaveRebalanceCondition( - conditionType appsv2beta1.RebalanceConditionType, + conditionType crd.RebalanceConditionType, matcher types.GomegaMatcher, ) types.GomegaMatcher { return WithTransform( - func(s appsv2beta1.RebalanceStatus) *appsv2beta1.RebalanceCondition { + func(s crd.RebalanceStatus) *crd.RebalanceCondition { for _, c := range s.Conditions { if c.Type == conditionType { return &c diff --git a/test/util/gomega.go b/test/util/gomega.go index df663fe93..afb5f9e34 100644 --- a/test/util/gomega.go +++ b/test/util/gomega.go @@ -3,7 +3,7 @@ package util import ( "encoding/json" - crdv2 "github.com/emqx/emqx-operator/api/v2" + crd "github.com/emqx/emqx-operator/api/v3alpha1" "github.com/onsi/gomega" "github.com/onsi/gomega/types" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -11,7 +11,7 @@ import ( func HaveCondition(conditionType string, matcher types.GomegaMatcher) types.GomegaMatcher { return gomega.WithTransform( - func(instance *crdv2.EMQX) *metav1.Condition { + func(instance *crd.EMQX) *metav1.Condition { _, condition := instance.Status.GetCondition(conditionType) return condition },