diff --git a/controllers/actions.github.com/autoscalingrunnerset_controller.go b/controllers/actions.github.com/autoscalingrunnerset_controller.go index 3509de913a..1f9fdb0bd7 100644 --- a/controllers/actions.github.com/autoscalingrunnerset_controller.go +++ b/controllers/actions.github.com/autoscalingrunnerset_controller.go @@ -315,7 +315,14 @@ func (r *AutoscalingRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl // Make sure the AutoscalingListener is up and running in the controller namespace if !listenerFound { - if r.drainingJobs(&latestRunnerSet.Status) { + // Only block listener creation if the runner spec has changed and old-spec + // runners still need to drain. When only listener values changed (e.g., + // minRunners, maxRunners), the existing runners are valid and the listener + // should be recreated immediately. Without this guard, minRunners idle + // runners would block listener recreation indefinitely (deadlock). + // See: https://github.com/actions/actions-runner-controller/issues/4432 + runnerSpecOutdated := latestRunnerSet.Annotations[annotationKeyRunnerSpecHash] != autoscalingRunnerSet.RunnerSetSpecHash() + if runnerSpecOutdated && r.drainingJobs(&latestRunnerSet.Status) { log.Info("Creating a new AutoscalingListener is waiting for the running and pending runners to finish. Waiting for the running and pending runners to finish:", "running", latestRunnerSet.Status.RunningEphemeralRunners, "pending", latestRunnerSet.Status.PendingEphemeralRunners) return ctrl.Result{}, nil } diff --git a/controllers/actions.github.com/autoscalingrunnerset_controller_test.go b/controllers/actions.github.com/autoscalingrunnerset_controller_test.go index a2d9a6b259..3b45cbe472 100644 --- a/controllers/actions.github.com/autoscalingrunnerset_controller_test.go +++ b/controllers/actions.github.com/autoscalingrunnerset_controller_test.go @@ -618,6 +618,85 @@ var _ = Describe("Test AutoScalingRunnerSet controller", Ordered, func() { autoscalingRunnerSetTestInterval, ).ShouldNot(Succeed(), "Listener should not be recreated") }) + + It("Should recreate the listener immediately when only values change (not runner spec) even with running runners. Regression test for #4432.", func() { + // Wait for initial setup to complete with default (immediate) strategy + listener := new(v1alpha1.AutoscalingListener) + Eventually( + func() error { + return k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerName(autoscalingRunnerSet), Namespace: autoscalingRunnerSet.Namespace}, listener) + }, + autoscalingRunnerSetTestTimeout, + autoscalingRunnerSetTestInterval, + ).Should(Succeed(), "Listener should be created") + + Eventually( + func() (int, error) { + runnerSetList := new(v1alpha1.EphemeralRunnerSetList) + err := k8sClient.List(ctx, runnerSetList, client.InNamespace(autoscalingRunnerSet.Namespace)) + if err != nil { + return 0, err + } + return len(runnerSetList.Items), nil + }, + autoscalingRunnerSetTestTimeout, + autoscalingRunnerSetTestInterval, + ).Should(BeEquivalentTo(1), "Only one EphemeralRunnerSet should be created") + + // Now switch to eventual strategy (simulates real-world config) + controller.UpdateStrategy = UpdateStrategyEventual + + runnerSetList := new(v1alpha1.EphemeralRunnerSetList) + err := k8sClient.List(ctx, runnerSetList, client.InNamespace(autoscalingRunnerSet.Namespace)) + Expect(err).NotTo(HaveOccurred(), "failed to list EphemeralRunnerSet") + + // Emulate minRunners idle runners (running but no active jobs) + runnerSet := runnerSetList.Items[0] + activeRunnerSet := runnerSet.DeepCopy() + activeRunnerSet.Status.CurrentReplicas = 2 + activeRunnerSet.Status.RunningEphemeralRunners = 2 + activeRunnerSet.Status.PendingEphemeralRunners = 0 + + err = k8sClient.Status().Patch(ctx, activeRunnerSet, client.MergeFrom(&runnerSet)) + Expect(err).NotTo(HaveOccurred(), "Failed to patch runner set status") + + // Patch ONLY the values hash (e.g., maxRunners changed) without changing the runner spec + patched := autoscalingRunnerSet.DeepCopy() + if patched.Annotations == nil { + patched.Annotations = make(map[string]string) + } + patched.Annotations[annotationKeyValuesHash] = "changed-values-hash" + err = k8sClient.Patch(ctx, patched, client.MergeFrom(autoscalingRunnerSet)) + Expect(err).NotTo(HaveOccurred(), "failed to patch AutoScalingRunnerSet") + autoscalingRunnerSet = patched.DeepCopy() + + // The listener should be deleted (values hash changed) + Eventually( + func() error { + return k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerName(autoscalingRunnerSet), Namespace: autoscalingRunnerSet.Namespace}, listener) + }, + autoscalingRunnerSetTestTimeout, + autoscalingRunnerSetTestInterval, + ).ShouldNot(Succeed(), "Old listener should be deleted") + + // The listener SHOULD be recreated even though there are running runners, + // because the runner spec hasn't changed — only values changed. + // Before the fix for #4432, this would deadlock. + Eventually( + func() error { + return k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerName(autoscalingRunnerSet), Namespace: autoscalingRunnerSet.Namespace}, listener) + }, + autoscalingRunnerSetTestTimeout, + autoscalingRunnerSetTestInterval, + ).Should(Succeed(), "Listener should be recreated despite running minRunners idle runners") + + // The EphemeralRunnerSet should NOT be recreated (runner spec unchanged) + currentRunnerSetList := new(v1alpha1.EphemeralRunnerSetList) + err = k8sClient.List(ctx, currentRunnerSetList, client.InNamespace(autoscalingRunnerSet.Namespace)) + Expect(err).NotTo(HaveOccurred(), "failed to list EphemeralRunnerSet") + Expect(len(currentRunnerSetList.Items)).To(BeEquivalentTo(1), "EphemeralRunnerSet should not be recreated") + Expect(currentRunnerSetList.Items[0].Name).To(Equal(activeRunnerSet.Name), "Should be the same EphemeralRunnerSet") + }) }) It("Should update Status on EphemeralRunnerSet status Update", func() {