Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
227 changes: 227 additions & 0 deletions test/e2e/v2/tests/hosted_cluster_azure_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

routev1 "github.com/openshift/api/route/v1"

hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1"
hcpmanifests "github.com/openshift/hypershift/control-plane-operator/controllers/hostedcontrolplane/manifests"
"github.com/openshift/hypershift/support/azureutil"
Expand All @@ -32,6 +34,7 @@ import (
"github.com/openshift/hypershift/test/e2e/v2/internal"

corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/clientcmd"
crclient "sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -210,6 +213,176 @@ func AzurePrivateTopologyTest(getTestCtx internal.TestContextGetter) {
})
}

// AzureEndpointAccessTransitionTest validates transitioning a HostedCluster
// between Private and PublicAndPrivate topology on Azure.
// This test MUST be registered after AzurePrivateTopologyTest to ensure
// stateless private-topology assertions run on the pristine Private cluster first.
func AzureEndpointAccessTransitionTest(getTestCtx internal.TestContextGetter) {
Context("[Feature:AzureEndpointAccess] Azure Endpoint Access Transition", Label("Azure", "self-managed-azure-private"), Ordered, func() {
var testCtx *internal.TestContext
var hc *hyperv1.HostedCluster
var controlPlaneNamespace string

BeforeAll(func() {
testCtx = getTestCtx()
hc = testCtx.GetHostedCluster()
if hc == nil || hc.Spec.Platform.Type != hyperv1.AzurePlatform {
Skip("Azure endpoint access transition tests are only for Azure platform")
}
if hc.Spec.Platform.Azure == nil || hc.Spec.Platform.Azure.Topology != hyperv1.AzureTopologyPrivate {
Skip("Azure endpoint access transition tests require Private topology")
}
controlPlaneNamespace = testCtx.ControlPlaneNamespace
Expect(controlPlaneNamespace).NotTo(BeEmpty(), "control plane namespace must be set")

DeferCleanup(func() {
restoreErr := e2eutil.UpdateObject(GinkgoTB(), context.Background(), testCtx.MgmtClient, hc, func(obj *hyperv1.HostedCluster) {
obj.Spec.Platform.Azure.Topology = hyperv1.AzureTopologyPrivate
})
if restoreErr != nil {
GinkgoTB().Logf("WARNING: failed to restore Private topology: %v", restoreErr)
}
})
Comment on lines +238 to +245

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Fail cleanup if topology restore does not succeed.

The cleanup currently logs and continues on restore failure, which can leave shared cluster state mutated and cause cascading failures in later specs.

Proposed fix
 			DeferCleanup(func() {
-				restoreErr := e2eutil.UpdateObject(GinkgoTB(), context.Background(), testCtx.MgmtClient, hc, func(obj *hyperv1.HostedCluster) {
+				cleanupCtx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
+				defer cancel()
+				restoreErr := e2eutil.UpdateObject(GinkgoTB(), cleanupCtx, testCtx.MgmtClient, hc, func(obj *hyperv1.HostedCluster) {
 					obj.Spec.Platform.Azure.Topology = hyperv1.AzureTopologyPrivate
 				})
-				if restoreErr != nil {
-					GinkgoTB().Logf("WARNING: failed to restore Private topology: %v", restoreErr)
-				}
+				Expect(restoreErr).NotTo(HaveOccurred(), "cleanup: failed to restore Private topology")
 			})

As per coding guidelines, when mutating cluster state, restore it via DeferCleanup in a fail-safe way on all exit paths.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@test/e2e/v2/tests/hosted_cluster_azure_test.go` around lines 236 - 243, The
DeferCleanup function currently logs a warning when the topology restore fails
(when restoreErr is not nil) but continues execution, which can leave the
cluster state mutated and cause cascading failures in subsequent tests. Modify
the error handling block where restoreErr is checked to fail the test using an
appropriate Ginkgo assertion or failure method (such as GinkgoTB().Fail) instead
of just logging a warning, ensuring that any failure to restore the Azure
topology causes the test cleanup to fail and prevent state corruption from
affecting other specs.

Source: Coding guidelines

})

It("should transition from Private to PublicAndPrivate", func() {
ctx := testCtx.Context

err := e2eutil.UpdateObject(GinkgoTB(), ctx, testCtx.MgmtClient, hc, func(obj *hyperv1.HostedCluster) {
obj.Spec.Platform.Azure.Topology = hyperv1.AzureTopologyPublicAndPrivate
})
Expect(err).NotTo(HaveOccurred(), "failed to update topology to PublicAndPrivate")

e2eutil.EventuallyObject(GinkgoTB(), ctx, "KAS external public route exists after transition to PublicAndPrivate",
func(ctx context.Context) (*routev1.Route, error) {
route := hcpmanifests.KubeAPIServerExternalPublicRoute(controlPlaneNamespace)
err := testCtx.MgmtClient.Get(ctx, crclient.ObjectKeyFromObject(route), route)
return route, err
},
[]e2eutil.Predicate[*routev1.Route]{
routeHasHostPredicate(),
},
e2eutil.WithTimeout(10*time.Minute),
)

Eventually(func() error {
route := hcpmanifests.KubeAPIServerExternalPrivateRoute(controlPlaneNamespace)
return expectDeleted(ctx, testCtx.MgmtClient, route)
}).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed(),
"KAS external private route should be deleted after transition to PublicAndPrivate")

e2eutil.EventuallyObject(GinkgoTB(), ctx, "OAuth external public route exists after transition to PublicAndPrivate",
func(ctx context.Context) (*routev1.Route, error) {
route := hcpmanifests.OauthServerExternalPublicRoute(controlPlaneNamespace)
err := testCtx.MgmtClient.Get(ctx, crclient.ObjectKeyFromObject(route), route)
return route, err
},
[]e2eutil.Predicate[*routev1.Route]{
routeHasHostPredicate(),
},
e2eutil.WithTimeout(10*time.Minute),
)

Eventually(func() error {
route := hcpmanifests.OauthServerExternalPrivateRoute(controlPlaneNamespace)
return expectDeleted(ctx, testCtx.MgmtClient, route)
}).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed(),
"OAuth external private route should be deleted after transition to PublicAndPrivate")

e2eutil.EventuallyObject(GinkgoTB(), ctx, "router-public Service is LoadBalancer after transition to PublicAndPrivate",
func(ctx context.Context) (*corev1.Service, error) {
svc := hcpmanifests.RouterPublicService(controlPlaneNamespace)
err := testCtx.MgmtClient.Get(ctx, crclient.ObjectKeyFromObject(svc), svc)
return svc, err
},
[]e2eutil.Predicate[*corev1.Service]{
serviceTypePredicate(corev1.ServiceTypeLoadBalancer),
},
e2eutil.WithTimeout(10*time.Minute),
)

e2eutil.EventuallyObjects(GinkgoTB(), ctx, "PLS CRs still exist after transition to PublicAndPrivate",
func(ctx context.Context) ([]*hyperv1.AzurePrivateLinkService, error) {
return listPLS(ctx, testCtx.MgmtClient, controlPlaneNamespace)
},
[]e2eutil.Predicate[[]*hyperv1.AzurePrivateLinkService]{
plsExistsPredicate(),
},
nil,
e2eutil.WithTimeout(30*time.Second),
)

verifyAPIReachable(testCtx, "after transition to PublicAndPrivate")
})

It("should transition from PublicAndPrivate back to Private", func() {
ctx := testCtx.Context

err := e2eutil.UpdateObject(GinkgoTB(), ctx, testCtx.MgmtClient, hc, func(obj *hyperv1.HostedCluster) {
obj.Spec.Platform.Azure.Topology = hyperv1.AzureTopologyPrivate
})
Expect(err).NotTo(HaveOccurred(), "failed to update topology to Private")

e2eutil.EventuallyObject(GinkgoTB(), ctx, "KAS external private route exists after restore to Private",
func(ctx context.Context) (*routev1.Route, error) {
route := hcpmanifests.KubeAPIServerExternalPrivateRoute(controlPlaneNamespace)
err := testCtx.MgmtClient.Get(ctx, crclient.ObjectKeyFromObject(route), route)
return route, err
},
[]e2eutil.Predicate[*routev1.Route]{
routeHasHostPredicate(),
},
e2eutil.WithTimeout(10*time.Minute),
)

Eventually(func() error {
route := hcpmanifests.KubeAPIServerExternalPublicRoute(controlPlaneNamespace)
return expectDeleted(ctx, testCtx.MgmtClient, route)
}).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed(),
"KAS external public route should be deleted after restore to Private")

e2eutil.EventuallyObject(GinkgoTB(), ctx, "OAuth external private route exists after restore to Private",
func(ctx context.Context) (*routev1.Route, error) {
route := hcpmanifests.OauthServerExternalPrivateRoute(controlPlaneNamespace)
err := testCtx.MgmtClient.Get(ctx, crclient.ObjectKeyFromObject(route), route)
return route, err
},
[]e2eutil.Predicate[*routev1.Route]{
routeHasHostPredicate(),
},
e2eutil.WithTimeout(10*time.Minute),
)

Eventually(func() error {
route := hcpmanifests.OauthServerExternalPublicRoute(controlPlaneNamespace)
return expectDeleted(ctx, testCtx.MgmtClient, route)
}).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed(),
"OAuth external public route should be deleted after restore to Private")

Eventually(func() error {
svc := hcpmanifests.RouterPublicService(controlPlaneNamespace)
return expectDeleted(ctx, testCtx.MgmtClient, svc)
}).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed(),
"router-public Service should be deleted after restore to Private")

e2eutil.EventuallyObjects(GinkgoTB(), ctx, "PLS CRs still exist after restore to Private",
func(ctx context.Context) ([]*hyperv1.AzurePrivateLinkService, error) {
return listPLS(ctx, testCtx.MgmtClient, controlPlaneNamespace)
},
[]e2eutil.Predicate[[]*hyperv1.AzurePrivateLinkService]{
plsExistsPredicate(),
},
nil,
e2eutil.WithTimeout(30*time.Second),
)
})

It("should reach the API server after restoring Private topology", func() {
verifyAPIReachable(testCtx, "after restore to Private")
})
})
}

// listPLS returns all AzurePrivateLinkService CRs in the given namespace.
func listPLS(ctx context.Context, client crclient.Client, namespace string) ([]*hyperv1.AzurePrivateLinkService, error) {
plsList := &hyperv1.AzurePrivateLinkServiceList{}
Expand All @@ -223,6 +396,59 @@ func listPLS(ctx context.Context, client crclient.Client, namespace string) ([]*
return items, nil
}

func routeHasHostPredicate() e2eutil.Predicate[*routev1.Route] {
return func(route *routev1.Route) (done bool, reasons string, err error) {
if route.Spec.Host != "" {
return true, fmt.Sprintf("route has host %q", route.Spec.Host), nil
}
return false, "route has no host yet", nil
}
}

func expectDeleted(ctx context.Context, client crclient.Client, obj crclient.Object) error {
err := client.Get(ctx, crclient.ObjectKeyFromObject(obj), obj)
if apierrors.IsNotFound(err) {
return nil
}
if err != nil {
return fmt.Errorf("failed to check %s %s: %w", obj.GetObjectKind().GroupVersionKind().Kind, obj.GetName(), err)
}
return fmt.Errorf("%s %s still exists", obj.GetObjectKind().GroupVersionKind().Kind, obj.GetName())
}

func serviceTypePredicate(expected corev1.ServiceType) e2eutil.Predicate[*corev1.Service] {
return func(svc *corev1.Service) (done bool, reasons string, err error) {
if svc.Spec.Type == expected {
return true, fmt.Sprintf("service type is %s", expected), nil
}
return false, fmt.Sprintf("service type is %s, want %s", svc.Spec.Type, expected), nil
}
}

func plsExistsPredicate() e2eutil.Predicate[[]*hyperv1.AzurePrivateLinkService] {
return func(items []*hyperv1.AzurePrivateLinkService) (done bool, reasons string, err error) {
if len(items) == 0 {
return false, "no AzurePrivateLinkService CRs found", nil
}
for _, pls := range items {
if pls.Status.PrivateLinkServiceAlias != "" {
return true, fmt.Sprintf("PLS exists with alias %q", pls.Status.PrivateLinkServiceAlias), nil
}
}
return false, "AzurePrivateLinkService CRs exist but none have a PLS alias", nil
}
}

func verifyAPIReachable(testCtx *internal.TestContext, phase string) {
Eventually(func(g Gomega) {
nsList := &corev1.NamespaceList{}
hostedClusterClient := testCtx.GetHostedClusterClient()
g.Expect(hostedClusterClient).NotTo(BeNil(), "hosted cluster client is nil %s", phase)
g.Expect(hostedClusterClient.List(testCtx.Context, nsList)).To(Succeed(), "failed to list namespaces %s", phase)
g.Expect(nsList.Items).NotTo(BeEmpty(), "namespace list is empty %s", phase)
}).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed(), "API server not reachable %s", phase)
}

// AzureOAuthLoadBalancerTest registers tests for Azure OAuth LoadBalancer publishing validation.
// These tests verify that OAuth is properly exposed via a LoadBalancer Service and that the
// OAuth token flow works through that endpoint.
Expand Down Expand Up @@ -290,6 +516,7 @@ func AzureOAuthLoadBalancerTest(getTestCtx internal.TestContextGetter) {
func RegisterHostedClusterAzureTests(getTestCtx internal.TestContextGetter) {
AzurePublicClusterTest(getTestCtx)
AzurePrivateTopologyTest(getTestCtx)
AzureEndpointAccessTransitionTest(getTestCtx)
AzureOAuthLoadBalancerTest(getTestCtx)
}

Expand Down