Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1"
"github.com/openshift/hypershift/control-plane-operator/hostedclusterconfigoperator/controllers/resources/manifests"
"github.com/openshift/hypershift/support/globalconfig"
"github.com/openshift/hypershift/support/k8sutil"

Check failure on line 12 in control-plane-operator/hostedclusterconfigoperator/controllers/inplaceupgrader/inplaceupgrader.go

View check run for this annotation

Red Hat Konflux / Red Hat Konflux / control-plane-operator-4-22-on-pull-request

control-plane-operator/hostedclusterconfigoperator/controllers/inplaceupgrader/inplaceupgrader.go#L12

cannot find module providing package github.com/openshift/hypershift/support/k8sutil: import lookup disabled by -mod=vendor
"github.com/openshift/hypershift/support/releaseinfo"
"github.com/openshift/hypershift/support/upsert"

Expand Down Expand Up @@ -58,6 +59,11 @@
TokenSecretPayloadKey = "payload"
TokenSecretReleaseKey = "release"
TokenSecretReleaseVersionKey = "release-version"

// upgradeRequeueInterval is how often the controller rechecks while an
// upgrade is in progress, closing the gap when a force-deleted pod's
// deletion event is missed.
upgradeRequeueInterval = 30 * time.Second
)

type Reconciler struct {
Expand Down Expand Up @@ -147,7 +153,14 @@
return ctrl.Result{}, fmt.Errorf("token secret %s/%s is missing %q key", tokenSecret.Namespace, tokenSecret.Name, TokenSecretReleaseVersionKey)
}

return ctrl.Result{}, r.reconcileInPlaceUpgrade(ctx, nodePoolUpgradeAPI, tokenSecret, mcoImage, releaseVersion)
if err := r.reconcileInPlaceUpgrade(ctx, nodePoolUpgradeAPI, tokenSecret, mcoImage, releaseVersion); err != nil {
return ctrl.Result{}, err
}
// Requeue periodically while an upgrade is in progress. The controller only
// watches Nodes and MachineSets, so if an upgrade pod is force-deleted the
// deletion event is missed and the replacement pod is never created. A
// periodic recheck closes that gap.
return ctrl.Result{RequeueAfter: upgradeRequeueInterval}, nil
}

type nodePoolUpgradeAPI struct {
Expand Down Expand Up @@ -277,7 +290,7 @@

err = r.reconcileUpgradePods(ctx, r.guestClusterClient, nodes, nodePoolUpgradeAPI.spec.poolRef.GetName(), mcoImage, nodePoolUpgradeAPI.proxy)
if err != nil {
return fmt.Errorf("failed to delete idle upgrade pods: %w", err)
return fmt.Errorf("failed to reconcile upgrade pods: %w", err)
}
return nil
}
Expand Down Expand Up @@ -312,24 +325,14 @@
pod := inPlaceUpgradePod(namespace.Name, node.Name)

if node.Annotations[CurrentMachineConfigAnnotationKey] == node.Annotations[DesiredMachineConfigAnnotationKey] &&
node.Annotations[DesiredDrainerAnnotationKey] == node.Annotations[LastAppliedDrainerAnnotationKey] {
node.Annotations[DesiredDrainerAnnotationKey] == node.Annotations[LastAppliedDrainerAnnotationKey] &&
node.Annotations[MachineConfigDaemonStateAnnotationKey] == MachineConfigDaemonStateDone {
// the node is updated and does not require a MCD running
if err := hostedClusterClient.Get(ctx, client.ObjectKeyFromObject(pod), pod); err != nil {
if apierrors.IsNotFound(err) {
continue
}
return fmt.Errorf("error getting upgrade MCD pod: %w", err)
}
if pod.DeletionTimestamp != nil {
continue
}
if err := hostedClusterClient.Delete(ctx, pod); err != nil {
if apierrors.IsNotFound(err) {
continue
}
return fmt.Errorf("error deleting upgrade MCD pod: %w", err)
if existed, err := k8sutil.DeleteIfNeeded(ctx, hostedClusterClient, pod); err != nil {
return err
} else if existed {
log.Info("Deleted idle upgrade pod")
}
log.Info("Deleted idle upgrade pod")
} else {
if err := hostedClusterClient.Get(ctx, types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}, pod); err != nil {
if !apierrors.IsNotFound(err) {
Expand All @@ -349,6 +352,19 @@
} else {
log.Info("create upgrade pod", "result", result)
}
// A pod with RestartPolicy=OnFailure only reaches a terminal phase after kubelet has exhausted its restart attempts (e.g. eviction or node loss), so deleting it here does not interrupt an active retry.
} else if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {
if pod.DeletionTimestamp != nil {
continue
}
log.Info("Detected terminated upgrade pod on node that still needs upgrade, deleting for retry",
"node", node.Name, "podPhase", pod.Status.Phase)
if err := hostedClusterClient.Delete(ctx, pod); err != nil {
if apierrors.IsNotFound(err) {
continue
}
return fmt.Errorf("error deleting terminated upgrade MCD pod for node %s: %w", node.Name, err)
}
}
}
}
Expand Down Expand Up @@ -484,20 +500,8 @@
namespace := inPlaceUpgradeNamespace(poolName)
for _, node := range nodes {
pod := inPlaceUpgradePod(namespace.Name, node.Name)
if err := hostedClusterClient.Get(ctx, client.ObjectKeyFromObject(pod), pod); err != nil {
if apierrors.IsNotFound(err) {
continue
}
return fmt.Errorf("error getting upgrade MCD pod: %w", err)
}
if pod.DeletionTimestamp != nil {
continue
}
if err := hostedClusterClient.Delete(ctx, pod); err != nil {
if apierrors.IsNotFound(err) {
continue
}
return fmt.Errorf("error deleting upgrade MCD pod: %w", err)
if _, err := k8sutil.DeleteIfNeeded(ctx, hostedClusterClient, pod); err != nil {
return err
}
}
return nil
Expand Down
Loading
Loading