From fb41987ee85865d23a9831089b9a67a8ed473e6b Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Mon, 15 Jun 2026 16:19:20 -0700 Subject: [PATCH 1/2] poc --- v1/image.go | 7 +- v1/instancetype.go | 6 +- v1/instancetype_test.go | 30 + v1/providers/testkube/README.md | 137 ++++ v1/providers/testkube/capabilities.go | 25 + v1/providers/testkube/client.go | 204 +++++ .../testkube/images/ubuntu-vm/Dockerfile | 42 + .../images/ubuntu-vm/testkube-bootstrap | 56 ++ .../ubuntu-vm/testkube-bootstrap.service | 14 + v1/providers/testkube/instance.go | 728 ++++++++++++++++++ v1/providers/testkube/instance_test.go | 251 ++++++ v1/providers/testkube/instancetype.go | 168 ++++ v1/providers/testkube/instancetype_test.go | 57 ++ v1/providers/testkube/location.go | 18 + v1/providers/testkube/validation_test.go | 278 +++++++ 15 files changed, 2015 insertions(+), 6 deletions(-) create mode 100644 v1/instancetype_test.go create mode 100644 v1/providers/testkube/README.md create mode 100644 v1/providers/testkube/capabilities.go create mode 100644 v1/providers/testkube/client.go create mode 100644 v1/providers/testkube/images/ubuntu-vm/Dockerfile create mode 100644 v1/providers/testkube/images/ubuntu-vm/testkube-bootstrap create mode 100644 v1/providers/testkube/images/ubuntu-vm/testkube-bootstrap.service create mode 100644 v1/providers/testkube/instance.go create mode 100644 v1/providers/testkube/instance_test.go create mode 100644 v1/providers/testkube/instancetype.go create mode 100644 v1/providers/testkube/instancetype_test.go create mode 100644 v1/providers/testkube/location.go create mode 100644 v1/providers/testkube/validation_test.go diff --git a/v1/image.go b/v1/image.go index c74e3453..f0f1b313 100644 --- a/v1/image.go +++ b/v1/image.go @@ -102,10 +102,11 @@ func validateArchitecture(ctx context.Context, sshClient *ssh.Client) (string, e return "", fmt.Errorf("failed to check architecture: %w, stdout: %s, stderr: %s", err, stdout, stderr) } arch := strings.TrimSpace(stdout) - if !strings.Contains(arch, "x86_64") { - return "", fmt.Errorf("expected x86_64 architecture, got: %s", arch) + normalizedArch := GetArchitecture(arch) + if normalizedArch == ArchitectureUnknown { + return "", fmt.Errorf("unsupported architecture: %s", arch) } - return "x86_64", nil + return string(normalizedArch), nil } func validateOSVersion(ctx context.Context, sshClient *ssh.Client) (string, error) { diff --git a/v1/instancetype.go b/v1/instancetype.go index 77d9083d..a58bc0bb 100644 --- a/v1/instancetype.go +++ b/v1/instancetype.go @@ -42,10 +42,10 @@ const ( ) func GetArchitecture(architecture string) Architecture { - switch strings.ToLower(architecture) { - case "x86_64": + switch strings.ToLower(strings.TrimSpace(architecture)) { + case "x86_64", "amd64": return ArchitectureX86_64 - case "arm64": + case "arm64", "aarch64": return ArchitectureARM64 default: return ArchitectureUnknown diff --git a/v1/instancetype_test.go b/v1/instancetype_test.go new file mode 100644 index 00000000..d76b3238 --- /dev/null +++ b/v1/instancetype_test.go @@ -0,0 +1,30 @@ +package v1 + +import "testing" + +func TestGetArchitectureAliases(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + raw string + want Architecture + }{ + {name: "x86_64", raw: "x86_64", want: ArchitectureX86_64}, + {name: "amd64", raw: "amd64", want: ArchitectureX86_64}, + {name: "arm64", raw: "arm64", want: ArchitectureARM64}, + {name: "aarch64", raw: "aarch64", want: ArchitectureARM64}, + {name: "trim and case", raw: " AARCH64\n", want: ArchitectureARM64}, + {name: "unknown", raw: "riscv64", want: ArchitectureUnknown}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + if got := GetArchitecture(tt.raw); got != tt.want { + t.Fatalf("GetArchitecture(%q) = %q, want %q", tt.raw, got, tt.want) + } + }) + } +} diff --git a/v1/providers/testkube/README.md b/v1/providers/testkube/README.md new file mode 100644 index 00000000..9f1933d6 --- /dev/null +++ b/v1/providers/testkube/README.md @@ -0,0 +1,137 @@ +# Test Kubernetes Provider + +`test-kubernetes` is a developer-only provider that backs cloud instance lifecycle calls with Kubernetes resources. + +## Credentials + +The credential intentionally starts with only a base64-encoded kubeconfig and an optional namespace: + +```go +type TestKubeCredential struct { + RefID string + KubeconfigBase64 string + Namespace string +} +``` + +## Validation + +The validation tests are opt-in and skipped unless `TESTKUBE_KUBECONFIG_BASE64` is set. The inventory and failure tests only need Kubernetes API access. The real lifecycle validation needs a runnable image tagged as `ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest` and a working Kubernetes `LoadBalancer` implementation. + +`test.ok.cpu` uses a `LoadBalancer` Service for SSH. The instance remains `pending` until the pod is ready and the Service has a load balancer ingress address. This more closely emulates real providers because arbitrary machines can use the returned `PublicIP`/`PublicDNS` and `SSHPort` without sharing the provider process. + +### Local: minikube + +Local validation should use minikube with `minikube tunnel`. The tunnel updates normal Kubernetes `LoadBalancer` Service status and makes the reported external IP reachable from the host, so the provider does not need local-cluster-specific endpoint translation. + +```bash +brew install minikube kubectl + +minikube start --driver=docker --profile testkube +kubectl config use-context testkube + +docker build -t ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest ./v1/providers/testkube/images/ubuntu-vm +minikube --profile testkube image load ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest +kubectl create namespace testkube + +# In another terminal, keep this running while validation runs. +sudo minikube --profile testkube tunnel + +# Polulate .env with the contents of: +kubectl config view --raw --minify | base64 | tr -d '\n' + +# .env +TESTKUBE_KUBECONFIG_BASE64= +TESTKUBE_NAMESPACE=testkube +``` + +Clean up: + +```bash +minikube --profile testkube delete +``` + +### CI: kind + +kind remains the preferred CI path because it runs a disposable Kubernetes cluster on the GitHub Linux runner's Docker runtime. The CI path still uses `cloud-provider-kind` to provide `LoadBalancer` Services, but the provider itself only reads standard Kubernetes Service status. + +Minimal GitHub Actions sketch: + +```yaml +jobs: + testkube-validation: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + - name: Install cloud-provider-kind + run: go install sigs.k8s.io/cloud-provider-kind@latest + - name: Create kind cluster + run: | + kind create cluster --name testkube --wait 5m + kubectl label node testkube-control-plane node.kubernetes.io/exclude-from-external-load-balancers- || true + docker build -t ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest ./v1/providers/testkube/images/ubuntu-vm + kind load docker-image ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest --name testkube + kubectl create namespace testkube + echo "TESTKUBE_KUBECONFIG_BASE64=$(kind get kubeconfig --name testkube | base64 | tr -d '\n')" >> "$GITHUB_ENV" + echo "TESTKUBE_NAMESPACE=testkube" >> "$GITHUB_ENV" + echo "VALIDATION_TEST=1" >> "$GITHUB_ENV" + - name: Start cloud-provider-kind + run: | + sudo "$(go env GOPATH)/bin/cloud-provider-kind" > /tmp/cloud-provider-kind.log 2>&1 & + - name: Run provider validation + run: go test -v -run 'TestValidationFunctions|TestInstanceLifecycleValidation|TestFailureInstanceTypesValidation' ./v1/providers/testkube +``` + +These tests validate inventory, Kubernetes resource creation/listing, SSH access, stop/start/delete, and configured failure types. They do not validate dev-plane software setup. + +### Image-Backed SSH Validation + +Build the local image: + +```bash +docker build -t ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest ./v1/providers/testkube/images/ubuntu-vm +``` + +For local minikube validation, load the image into the minikube profile: + +```bash +minikube --profile testkube image load ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest +``` + +For CI kind validation, load the image into the kind cluster: + +```bash +kind load docker-image ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest --name testkube +``` + +Then run the focused SSH validation: + +```bash +go test -v -run TestImageBackedInstanceValidation ./v1/providers/testkube +``` + +`TestImageBackedInstanceValidation` creates a `test.ok.cpu` instance with the baked image tag, waits for the pod and load balancer to become ready, uses the provider-returned SSH endpoint, and verifies key-based SSH plus `sudo`, `apt-get`, and systemd basics. + +CI can add the same build/load step before running the image-backed test: + +```yaml + - name: Build local testkube image + run: docker build -t ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest ./v1/providers/testkube/images/ubuntu-vm + - name: Load local testkube image into kind + run: kind load docker-image ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest --name testkube + - name: Run image-backed validation + run: go test -v -run TestImageBackedInstanceValidation ./v1/providers/testkube +``` + +## Image Contract + +`test.ok.cpu` points at `ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest`. Before that image is published to GHCR, build the local image with that same tag. Once the image is published, local and CI validation can either pull it or keep building and loading the same tag for hermetic tests. + +The image at `images/ubuntu-vm` is expected to behave like a minimal Ubuntu VM for dev-plane: SSH access, `sudo`, `apt-get`, and systemd compatibility. + +The cloud provider does not lay down dev-plane software; dev-plane owns that. The image only needs to provide a base OS environment that dev-plane setup can use. + +Future exposure modes, such as fixed `NodePort` for one-off local debugging, should be separate instance type specs rather than credential fields. \ No newline at end of file diff --git a/v1/providers/testkube/capabilities.go b/v1/providers/testkube/capabilities.go new file mode 100644 index 00000000..f3f63b6d --- /dev/null +++ b/v1/providers/testkube/capabilities.go @@ -0,0 +1,25 @@ +package v1 + +import ( + "context" + + cloudv1 "github.com/brevdev/cloud/v1" +) + +func getTestKubeCapabilities() cloudv1.Capabilities { + return cloudv1.Capabilities{ + cloudv1.CapabilityCreateInstance, + cloudv1.CapabilityTerminateInstance, + cloudv1.CapabilityStopStartInstance, + cloudv1.CapabilityRebootInstance, + cloudv1.CapabilityTags, + } +} + +func (c *TestKubeCredential) GetCapabilities(_ context.Context) (cloudv1.Capabilities, error) { + return getTestKubeCapabilities(), nil +} + +func (c *TestKubeClient) GetCapabilities(_ context.Context) (cloudv1.Capabilities, error) { + return getTestKubeCapabilities(), nil +} diff --git a/v1/providers/testkube/client.go b/v1/providers/testkube/client.go new file mode 100644 index 00000000..d4fa3d06 --- /dev/null +++ b/v1/providers/testkube/client.go @@ -0,0 +1,204 @@ +package v1 + +import ( + "context" + "crypto/sha256" + "encoding/base64" + "fmt" + "strings" + "time" + + cloudv1 "github.com/brevdev/cloud/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" +) + +const ( + CloudProviderID = "test-kubernetes" + + DefaultNamespace = "default" + DefaultLocation = "test-local" + + servicePortName = "ssh" + servicePort = int32(22) + containerSSHPort = int32(22) +) + +// TestKubeCredential authenticates a developer test provider backed by Kubernetes. +type TestKubeCredential struct { + RefID string + + KubeconfigBase64 string + Namespace string +} + +var _ cloudv1.CloudCredential = &TestKubeCredential{} + +func NewTestKubeCredential(refID, kubeconfigBase64, namespace string) *TestKubeCredential { + return &TestKubeCredential{ + RefID: refID, + KubeconfigBase64: kubeconfigBase64, + Namespace: namespace, + } +} + +func (c *TestKubeCredential) GetReferenceID() string { + return c.RefID +} + +func (c *TestKubeCredential) GetAPIType() cloudv1.APIType { + return cloudv1.APITypeGlobal +} + +func (c *TestKubeCredential) GetCloudProviderID() cloudv1.CloudProviderID { + return CloudProviderID +} + +func (c *TestKubeCredential) GetTenantID() (string, error) { + fingerprint := c.KubeconfigBase64 + c.Namespace + return fmt.Sprintf("%s-%x", CloudProviderID, sha256.Sum256([]byte(fingerprint))), nil +} + +func (c *TestKubeCredential) MakeClient(_ context.Context, location string) (cloudv1.CloudClient, error) { + restConfig, err := c.restConfig() + if err != nil { + return nil, err + } + + opts := []TestKubeClientOption{ + WithNamespace(c.Namespace), + WithLocation(firstNonEmpty(location, DefaultLocation)), + } + return NewTestKubeClient(c.RefID, restConfig, opts...) +} + +func (c *TestKubeCredential) restConfig() (*rest.Config, error) { + if c.KubeconfigBase64 == "" { + return nil, fmt.Errorf("kubeconfigBase64 is required") + } + kubeconfig, err := base64.StdEncoding.DecodeString(c.KubeconfigBase64) + if err != nil { + return nil, fmt.Errorf("decode kubeconfig: %w", err) + } + return clientcmd.RESTConfigFromKubeConfig(kubeconfig) +} + +// TestKubeClient implements the CloudClient interface with Kubernetes primitives. +type TestKubeClient struct { + cloudv1.NotImplCloudClient + + refID string + namespace string + location string + k8sClient kubernetes.Interface +} + +var _ cloudv1.CloudClient = &TestKubeClient{} + +type testKubeClientOptions struct { + namespace string + location string + k8sClient kubernetes.Interface +} + +type TestKubeClientOption func(*testKubeClientOptions) error + +func WithNamespace(namespace string) TestKubeClientOption { + return func(options *testKubeClientOptions) error { + options.namespace = namespace + return nil + } +} + +func WithLocation(location string) TestKubeClientOption { + return func(options *testKubeClientOptions) error { + options.location = location + return nil + } +} + +func WithKubernetesClient(k8sClient kubernetes.Interface) TestKubeClientOption { + return func(options *testKubeClientOptions) error { + options.k8sClient = k8sClient + return nil + } +} + +func NewTestKubeClient(refID string, config *rest.Config, opts ...TestKubeClientOption) (*TestKubeClient, error) { + options := testKubeClientOptions{ + namespace: DefaultNamespace, + location: DefaultLocation, + } + for _, opt := range opts { + if err := opt(&options); err != nil { + return nil, err + } + } + if strings.TrimSpace(refID) == "" { + return nil, fmt.Errorf("refID is required") + } + if strings.TrimSpace(options.namespace) == "" { + options.namespace = DefaultNamespace + } + if strings.TrimSpace(options.location) == "" { + options.location = DefaultLocation + } + if options.k8sClient == nil { + if config == nil { + return nil, fmt.Errorf("kubernetes rest config is required") + } + k8sClient, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("create kubernetes client: %w", err) + } + options.k8sClient = k8sClient + } + + return &TestKubeClient{ + refID: refID, + namespace: options.namespace, + location: options.location, + k8sClient: options.k8sClient, + }, nil +} + +func (c *TestKubeClient) GetAPIType() cloudv1.APIType { + return cloudv1.APITypeGlobal +} + +func (c *TestKubeClient) GetCloudProviderID() cloudv1.CloudProviderID { + return CloudProviderID +} + +func (c *TestKubeClient) GetReferenceID() string { + return c.refID +} + +func (c *TestKubeClient) GetTenantID() (string, error) { + return fmt.Sprintf("%s-%x", CloudProviderID, sha256.Sum256([]byte(c.refID+c.namespace))), nil +} + +func (c *TestKubeClient) MakeClient(_ context.Context, location string) (cloudv1.CloudClient, error) { + if location != "" { + c.location = location + } + return c, nil +} + +func (c *TestKubeClient) GetInstancePollTime() time.Duration { + return time.Second +} + +func (c *TestKubeClient) GetInstanceTypePollTime() time.Duration { + return time.Minute +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + if value != "" { + return value + } + } + return "" +} diff --git a/v1/providers/testkube/images/ubuntu-vm/Dockerfile b/v1/providers/testkube/images/ubuntu-vm/Dockerfile new file mode 100644 index 00000000..88b10ea7 --- /dev/null +++ b/v1/providers/testkube/images/ubuntu-vm/Dockerfile @@ -0,0 +1,42 @@ +FROM ubuntu:24.04 + +ENV container=docker +ENV USER_NAME=ubuntu +ENV SUDO_ACCESS=true + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + dbus \ + openssh-server \ + sudo \ + systemd \ + systemd-sysv \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && if ! id ubuntu >/dev/null 2>&1; then useradd --create-home --shell /bin/bash ubuntu; fi \ + && passwd -d ubuntu >/dev/null 2>&1 || true \ + && mkdir -p /run/sshd /etc/docker /etc/testkube \ + && printf '{\n "storage-driver": "vfs"\n}\n' > /etc/docker/daemon.json \ + && ssh-keygen -A \ + && systemctl enable ssh.service \ + && systemctl mask \ + console-getty.service \ + dev-hugepages.mount \ + getty.target \ + sys-fs-fuse-connections.mount \ + systemd-logind.service + +COPY testkube-bootstrap /usr/local/sbin/testkube-bootstrap +COPY testkube-bootstrap.service /etc/systemd/system/testkube-bootstrap.service + +RUN chmod 0755 /usr/local/sbin/testkube-bootstrap \ + && systemctl enable testkube-bootstrap.service + +EXPOSE 22 + +STOPSIGNAL SIGRTMIN+3 +CMD ["/sbin/init"] diff --git a/v1/providers/testkube/images/ubuntu-vm/testkube-bootstrap b/v1/providers/testkube/images/ubuntu-vm/testkube-bootstrap new file mode 100644 index 00000000..02263aa0 --- /dev/null +++ b/v1/providers/testkube/images/ubuntu-vm/testkube-bootstrap @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ "${TESTKUBE_FAIL_BUILD:-false}" == "true" ]]; then + echo "TESTKUBE_FAIL_BUILD=true; exiting testkube image" + systemctl --no-block exit 42 || true + exit 42 +fi + +user_name="${USER_NAME:-ubuntu}" + +if ! id "$user_name" >/dev/null 2>&1; then + useradd --create-home --shell /bin/bash "$user_name" +fi +passwd -d "$user_name" >/dev/null 2>&1 || true + +home_dir="$(getent passwd "$user_name" | cut -d: -f6)" +install -d -m 0700 -o "$user_name" -g "$user_name" "$home_dir/.ssh" + +if [[ -n "${PUBLIC_KEY:-}" ]]; then + printf '%s\n' "$PUBLIC_KEY" > "$home_dir/.ssh/authorized_keys" +else + : > "$home_dir/.ssh/authorized_keys" +fi +chown "$user_name:$user_name" "$home_dir/.ssh/authorized_keys" +chmod 0600 "$home_dir/.ssh/authorized_keys" + +if [[ "${SUDO_ACCESS:-true}" == "true" ]]; then + printf '%s ALL=(ALL) NOPASSWD:ALL\n' "$user_name" > /etc/sudoers.d/testkube-user + chmod 0440 /etc/sudoers.d/testkube-user +else + rm -f /etc/sudoers.d/testkube-user +fi + +mkdir -p /run/sshd /etc/testkube /etc/ssh/sshd_config.d +ssh-keygen -A + +password_access="no" +if [[ "${PASSWORD_ACCESS:-false}" == "true" ]]; then + password_access="yes" +fi + +cat > /etc/ssh/sshd_config.d/90-testkube.conf < /etc/testkube/scenario.env < 0 { + instance.SSHPort = int(service.Spec.Ports[0].Port) + } + + switch service.Spec.Type { + case corev1.ServiceTypeLoadBalancer: + if len(service.Status.LoadBalancer.Ingress) == 0 { + return + } + ingress := service.Status.LoadBalancer.Ingress[0] + if ingress.IP != "" { + instance.PublicIP = ingress.IP + instance.PublicDNS = ingress.IP + } + if ingress.Hostname != "" { + instance.PublicDNS = ingress.Hostname + if instance.PublicIP == "" { + instance.PublicIP = ingress.Hostname + } + } + case corev1.ServiceTypeNodePort: + if len(service.Spec.Ports) == 0 { + return + } + instance.SSHPort = int(service.Spec.Ports[0].NodePort) + case corev1.ServiceTypeClusterIP: + } +} + +func statusFromResources(statefulSet *appsv1.StatefulSet, service *corev1.Service, pods []corev1.Pod) cloudv1.Status { + if statefulSet.DeletionTimestamp != nil { + return cloudv1.Status{LifecycleStatus: cloudv1.LifecycleStatusTerminating} + } + if statefulSet.Spec.Replicas != nil && *statefulSet.Spec.Replicas == 0 { + return cloudv1.Status{LifecycleStatus: cloudv1.LifecycleStatusStopped} + } + if podFailed(pods) { + return cloudv1.Status{ + LifecycleStatus: cloudv1.LifecycleStatusFailed, + Messages: podMessages(pods), + } + } + if podReady(pods) || statefulSet.Status.ReadyReplicas > 0 { + if service == nil { + return cloudv1.Status{ + LifecycleStatus: cloudv1.LifecycleStatusPending, + Messages: append(podMessages(pods), "waiting for service"), + } + } + if service.Spec.Type == corev1.ServiceTypeLoadBalancer && !loadBalancerReady(service) { + return cloudv1.Status{ + LifecycleStatus: cloudv1.LifecycleStatusPending, + Messages: append(podMessages(pods), fmt.Sprintf("service %s waiting for load balancer ingress", service.Name)), + } + } + return cloudv1.Status{ + LifecycleStatus: cloudv1.LifecycleStatusRunning, + Messages: podMessages(pods), + } + } + return cloudv1.Status{ + LifecycleStatus: cloudv1.LifecycleStatusPending, + Messages: podMessages(pods), + } +} + +func loadBalancerReady(service *corev1.Service) bool { + for _, ingress := range service.Status.LoadBalancer.Ingress { + if ingress.IP != "" || ingress.Hostname != "" { + return true + } + } + return false +} + +func podReady(pods []corev1.Pod) bool { + for _, pod := range pods { + if pod.Status.Phase != corev1.PodRunning { + continue + } + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + return true + } + } + } + return false +} + +func podFailed(pods []corev1.Pod) bool { + for _, pod := range pods { + if pod.Status.Phase == corev1.PodFailed { + return true + } + for _, status := range pod.Status.ContainerStatuses { + if status.State.Terminated != nil && status.State.Terminated.ExitCode != 0 { + return true + } + if status.State.Waiting != nil && isFailureWaitingReason(status.State.Waiting.Reason) { + return true + } + } + } + return false +} + +func isFailureWaitingReason(reason string) bool { + switch reason { + case "CrashLoopBackOff", "CreateContainerConfigError", "ErrImagePull", "ImagePullBackOff", "InvalidImageName": + return true + default: + return false + } +} + +func podMessages(pods []corev1.Pod) []string { + messages := []string{} + for _, pod := range pods { + if pod.Status.Phase != "" { + messages = append(messages, fmt.Sprintf("%s: phase=%s", pod.Name, pod.Status.Phase)) + } + for _, condition := range pod.Status.Conditions { + if condition.Message != "" { + messages = append(messages, fmt.Sprintf("%s: %s", pod.Name, condition.Message)) + } + } + for _, status := range pod.Status.ContainerStatuses { + if status.State.Waiting != nil { + message := status.State.Waiting.Reason + if status.State.Waiting.Message != "" { + message += ": " + status.State.Waiting.Message + } + messages = append(messages, fmt.Sprintf("%s/%s waiting: %s", pod.Name, status.Name, message)) + } + if status.State.Terminated != nil { + message := status.State.Terminated.Reason + if status.State.Terminated.Message != "" { + message += ": " + status.State.Terminated.Message + } + messages = append(messages, fmt.Sprintf("%s/%s terminated: %s", pod.Name, status.Name, message)) + } + } + } + return messages +} + +func podHostname(pods []corev1.Pod) string { + for _, pod := range pods { + if pod.Name != "" { + return pod.Name + } + } + return "" +} + +func createdAt(statefulSet *appsv1.StatefulSet) time.Time { + if statefulSet.Annotations != nil { + if createdAtRaw := statefulSet.Annotations[annotationCreatedAt]; createdAtRaw != "" { + if parsed, err := time.Parse(time.RFC3339Nano, createdAtRaw); err == nil { + return parsed + } + } + } + return statefulSet.CreationTimestamp.Time +} + +func matchesListArgs(instance cloudv1.Instance, args cloudv1.ListInstancesArgs) bool { + if len(args.InstanceIDs) > 0 && !containsInstanceID(args.InstanceIDs, instance.CloudID) { + return false + } + if len(args.Locations) > 0 && !args.Locations.IsAll() && !args.Locations.IsAllowed(instance.Location) { + return false + } + for tagKey, allowedValues := range args.TagFilters { + tagValue, ok := instance.Tags[tagKey] + if !ok { + return false + } + if len(allowedValues) > 0 && !containsString(allowedValues, tagValue) { + return false + } + } + return true +} + +func containsInstanceID(values []cloudv1.CloudProviderInstanceID, value cloudv1.CloudProviderInstanceID) bool { + for _, v := range values { + if v == value { + return true + } + } + return false +} + +func selectorLabels(cloudID string) map[string]string { + return map[string]string{ + labelCloudID: cloudID, + } +} + +func objectLabels(cloudID string, location string) map[string]string { + labels := selectorLabels(cloudID) + labels[labelName] = labelNameValue + labels[labelManagedBy] = labelManagedByValue + labels[labelLocation] = sanitizeLabelValue(location) + return labels +} + +func makeCloudID(credentialRefID string, refID string) cloudv1.CloudProviderInstanceID { + sum := sha256.Sum256([]byte(credentialRefID + ":" + refID)) + return cloudv1.CloudProviderInstanceID("tk-" + hex.EncodeToString(sum[:])[:20]) +} + +func scenarioForInstanceType(instanceType string) string { + return strings.TrimPrefix(instanceType, "test.") +} + +func marshalTags(tags cloudv1.Tags) (string, error) { + if tags == nil { + tags = cloudv1.Tags{} + } + tagsBytes, err := json.Marshal(tags) + if err != nil { + return "", fmt.Errorf("marshal testkube tags: %w", err) + } + return string(tagsBytes), nil +} + +func tagsFromAnnotations(annotations map[string]string) cloudv1.Tags { + tags := cloudv1.Tags{} + if annotations == nil || annotations[annotationTagsJSON] == "" { + return tags + } + if err := json.Unmarshal([]byte(annotations[annotationTagsJSON]), &tags); err != nil { + return cloudv1.Tags{} + } + return tags +} + +func sanitizeLabelValue(value string) string { + value = strings.ToLower(value) + var builder strings.Builder + for _, r := range value { + switch { + case r >= 'a' && r <= 'z': + builder.WriteRune(r) + case r >= '0' && r <= '9': + builder.WriteRune(r) + case r == '-' || r == '_' || r == '.': + builder.WriteRune(r) + default: + builder.WriteRune('-') + } + } + sanitized := strings.Trim(builder.String(), "-_.") + if len(sanitized) > 63 { + sanitized = sanitized[:63] + sanitized = strings.TrimRight(sanitized, "-_.") + } + if sanitized == "" { + return "unknown" + } + return sanitized +} + +func sshFirewallRules() cloudv1.FirewallRules { + rule := cloudv1.FirewallRule{ + FromPort: servicePort, + ToPort: servicePort, + IPRanges: []string{"0.0.0.0/0"}, + } + return cloudv1.FirewallRules{ + IngressRules: []cloudv1.FirewallRule{rule}, + EgressRules: []cloudv1.FirewallRule{rule}, + } +} + +func int32Ptr(value int32) *int32 { + return &value +} + +func int64Ptr(value int64) *int64 { + return &value +} + +func boolPtr(value bool) *bool { + return &value +} diff --git a/v1/providers/testkube/instance_test.go b/v1/providers/testkube/instance_test.go new file mode 100644 index 00000000..fae389e3 --- /dev/null +++ b/v1/providers/testkube/instance_test.go @@ -0,0 +1,251 @@ +package v1 + +import ( + "context" + "errors" + "testing" + + cloudv1 "github.com/brevdev/cloud/v1" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestCreateInstanceProvisionFailures(t *testing.T) { + ctx := context.Background() + + for _, tc := range []struct { + name string + instanceType string + expectedErr error + }{ + { + name: "capacity", + instanceType: InstanceTypeFailCapacity, + expectedErr: cloudv1.ErrInsufficientResources, + }, + { + name: "quota", + instanceType: InstanceTypeFailQuota, + expectedErr: cloudv1.ErrOutOfQuota, + }, + } { + t.Run(tc.name, func(t *testing.T) { + client := newTestClient(t) + + instance, err := client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: tc.name, + Name: tc.name, + InstanceType: tc.instanceType, + }) + require.Nil(t, instance) + require.ErrorIs(t, err, tc.expectedErr) + + statefulSets, err := client.k8sClient.AppsV1().StatefulSets(client.namespace).List(ctx, metav1.ListOptions{}) + require.NoError(t, err) + require.Empty(t, statefulSets.Items) + + services, err := client.k8sClient.CoreV1().Services(client.namespace).List(ctx, metav1.ListOptions{}) + require.NoError(t, err) + require.Empty(t, services.Items) + }) + } +} + +func TestInstanceLifecycle(t *testing.T) { + ctx := context.Background() + client := newTestClient(t) + + instance, err := client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: "env-1", + Name: "dev env", + InstanceType: InstanceTypeOKCPU, + PublicKey: "ssh-rsa test", + Tags: cloudv1.Tags{ + "purpose": "test", + }, + }) + require.NoError(t, err) + require.Equal(t, cloudv1.LifecycleStatusPending, instance.Status.LifecycleStatus) + require.Equal(t, "env-1", instance.RefID) + require.Equal(t, "test-credential", instance.CloudCredRefID) + require.Equal(t, InstanceTypeOKCPU, instance.InstanceType) + spec, ok := getInstanceTypeSpec(InstanceTypeOKCPU) + require.True(t, ok) + require.Equal(t, spec.imageID, instance.ImageID) + + listed, err := client.ListInstances(ctx, cloudv1.ListInstancesArgs{ + TagFilters: map[string][]string{ + "purpose": {"test"}, + }, + }) + require.NoError(t, err) + require.Len(t, listed, 1) + + require.NoError(t, client.StopInstance(ctx, instance.CloudID)) + stopped, err := client.GetInstance(ctx, instance.CloudID) + require.NoError(t, err) + require.Equal(t, cloudv1.LifecycleStatusStopped, stopped.Status.LifecycleStatus) + + require.NoError(t, client.StartInstance(ctx, instance.CloudID)) + createReadyPod(t, client, instance.CloudID) + + pendingLB, err := client.GetInstance(ctx, instance.CloudID) + require.NoError(t, err) + require.Equal(t, cloudv1.LifecycleStatusPending, pendingLB.Status.LifecycleStatus) + require.Contains(t, pendingLB.Status.Messages, "service "+string(instance.CloudID)+" waiting for load balancer ingress") + + setServiceLoadBalancerIngress(t, client, instance.CloudID, "203.0.113.10", "") + + running, err := client.GetInstance(ctx, instance.CloudID) + require.NoError(t, err) + require.Equal(t, cloudv1.LifecycleStatusRunning, running.Status.LifecycleStatus) + require.Equal(t, string(instance.CloudID)+"-0", running.Hostname) + require.Equal(t, "203.0.113.10", running.PublicIP) + require.Equal(t, "203.0.113.10", running.PublicDNS) + require.Equal(t, 22, running.SSHPort) + + require.NoError(t, client.UpdateInstanceTags(ctx, cloudv1.UpdateInstanceTagsArgs{ + InstanceID: instance.CloudID, + Tags: cloudv1.Tags{ + "purpose": "updated", + }, + })) + updated, err := client.GetInstance(ctx, instance.CloudID) + require.NoError(t, err) + require.Equal(t, "updated", updated.Tags["purpose"]) + + require.NoError(t, client.RebootInstance(ctx, instance.CloudID)) + pods, err := client.k8sClient.CoreV1().Pods(client.namespace).List(ctx, metav1.ListOptions{}) + require.NoError(t, err) + require.Empty(t, pods.Items) + + require.NoError(t, client.TerminateInstance(ctx, instance.CloudID)) + _, err = client.GetInstance(ctx, instance.CloudID) + require.True(t, errors.Is(err, cloudv1.ErrInstanceNotFound)) +} + +func TestScenarioEnvironment(t *testing.T) { + ctx := context.Background() + client := newTestClient(t) + + instance, err := client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: "build", + Name: "build", + InstanceType: InstanceTypeFailBuild, + }) + require.NoError(t, err) + + statefulSet, err := client.k8sClient.AppsV1().StatefulSets(client.namespace).Get(ctx, string(instance.CloudID), metav1.GetOptions{}) + require.NoError(t, err) + service, err := client.k8sClient.CoreV1().Services(client.namespace).Get(ctx, string(instance.CloudID), metav1.GetOptions{}) + require.NoError(t, err) + spec, ok := getInstanceTypeSpec(InstanceTypeFailBuild) + require.True(t, ok) + require.Equal(t, spec.serviceType, service.Spec.Type) + require.Zero(t, service.Spec.Ports[0].NodePort) + container := statefulSet.Spec.Template.Spec.Containers[0] + require.Equal(t, spec.image, container.Image) + require.Zero(t, container.Ports[0].HostPort) + envByName := envMap(container.Env) + require.Equal(t, "fail.build", envByName[envScenario]) + require.Equal(t, "true", envByName[envFailBuild]) +} + +func TestInstanceUsesBakedImageSpec(t *testing.T) { + ctx := context.Background() + client := newTestClient(t) + + instance, err := client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: "image-spec", + Name: "image spec", + InstanceType: InstanceTypeOKCPU, + }) + require.NoError(t, err) + spec, ok := getInstanceTypeSpec(InstanceTypeOKCPU) + require.True(t, ok) + require.Equal(t, spec.imageID, instance.ImageID) + + statefulSet, err := client.k8sClient.AppsV1().StatefulSets(client.namespace).Get(ctx, string(instance.CloudID), metav1.GetOptions{}) + require.NoError(t, err) + container := statefulSet.Spec.Template.Spec.Containers[0] + require.Equal(t, spec.image, container.Image) + require.NotNil(t, container.ReadinessProbe) + require.NotNil(t, container.ReadinessProbe.TCPSocket) + require.Equal(t, containerSSHPort, container.ReadinessProbe.TCPSocket.Port.IntVal) + for _, mount := range container.VolumeMounts { + require.NotEqual(t, "/sys/fs/cgroup", mount.MountPath) + } +} + +func TestPopulateNetworkLoadBalancer(t *testing.T) { + instance := &cloudv1.Instance{} + populateNetwork(&corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + ClusterIP: "10.96.119.41", + Ports: []corev1.ServicePort{ + { + Port: 22, + }, + }, + }, + Status: corev1.ServiceStatus{ + LoadBalancer: corev1.LoadBalancerStatus{ + Ingress: []corev1.LoadBalancerIngress{ + {Hostname: "testkube.example.com"}, + }, + }, + }, + }, instance) + + require.Equal(t, "10.96.119.41", instance.PrivateIP) + require.Equal(t, "testkube.example.com", instance.PublicIP) + require.Equal(t, "testkube.example.com", instance.PublicDNS) + require.Equal(t, 22, instance.SSHPort) +} + +func createReadyPod(t *testing.T, client *TestKubeClient, instanceID cloudv1.CloudProviderInstanceID) { + t.Helper() + + _, err := client.k8sClient.CoreV1().Pods(client.namespace).Create(context.Background(), &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: string(instanceID) + "-0", + Namespace: client.namespace, + Labels: selectorLabels(string(instanceID)), + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + }, + }, metav1.CreateOptions{}) + require.NoError(t, err) +} + +func setServiceLoadBalancerIngress(t *testing.T, client *TestKubeClient, instanceID cloudv1.CloudProviderInstanceID, ip, hostname string) { + t.Helper() + + service, err := client.k8sClient.CoreV1().Services(client.namespace).Get(context.Background(), string(instanceID), metav1.GetOptions{}) + require.NoError(t, err) + service.Status.LoadBalancer.Ingress = []corev1.LoadBalancerIngress{ + { + IP: ip, + Hostname: hostname, + }, + } + _, err = client.k8sClient.CoreV1().Services(client.namespace).UpdateStatus(context.Background(), service, metav1.UpdateOptions{}) + require.NoError(t, err) +} + +func envMap(envVars []corev1.EnvVar) map[string]string { + envByName := map[string]string{} + for _, envVar := range envVars { + envByName[envVar.Name] = envVar.Value + } + return envByName +} diff --git a/v1/providers/testkube/instancetype.go b/v1/providers/testkube/instancetype.go new file mode 100644 index 00000000..2bb7a039 --- /dev/null +++ b/v1/providers/testkube/instancetype.go @@ -0,0 +1,168 @@ +package v1 + +import ( + "context" + "time" + + "github.com/alecthomas/units" + "github.com/bojanz/currency" + cloudv1 "github.com/brevdev/cloud/v1" + corev1 "k8s.io/api/core/v1" +) + +const ( + DefaultImageID = "testkube-ubuntu-vm" + DefaultImage = "ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest" + + InstanceTypeOKCPU = "test.ok.cpu" + InstanceTypeFailCapacity = "test.fail.capacity" + InstanceTypeFailQuota = "test.fail.quota" + InstanceTypeFailBuild = "test.fail.build" +) + +type testInstanceTypeSpec struct { + instanceType cloudv1.InstanceType + imageID string + image string + serviceType corev1.ServiceType +} + +var testInstanceTypeSpecs = []testInstanceTypeSpec{ + makeTestInstanceTypeSpec(InstanceTypeOKCPU), + makeTestInstanceTypeSpec(InstanceTypeFailCapacity), + makeTestInstanceTypeSpec(InstanceTypeFailQuota), + makeTestInstanceTypeSpec(InstanceTypeFailBuild), +} + +func makeTestInstanceTypeSpec(instanceType string) testInstanceTypeSpec { + estimatedDeployTime := 20 * time.Second + return testInstanceTypeSpec{ + instanceType: makeCPUInstanceType(instanceType, true, &estimatedDeployTime), + imageID: DefaultImageID, + image: DefaultImage, + serviceType: corev1.ServiceTypeLoadBalancer, + } +} + +func (c *TestKubeClient) GetInstanceTypes(_ context.Context, args cloudv1.GetInstanceTypeArgs) ([]cloudv1.InstanceType, error) { + instanceTypes := c.testInstanceTypes() + instanceTypes = filterInstanceTypes(instanceTypes, args) + return instanceTypes, nil +} + +func (c *TestKubeClient) testInstanceTypes() []cloudv1.InstanceType { + instanceTypes := make([]cloudv1.InstanceType, 0, len(testInstanceTypeSpecs)) + for _, spec := range testInstanceTypeSpecs { + instanceTypes = append(instanceTypes, c.instanceTypeFromSpec(spec)) + } + return instanceTypes +} + +func (c *TestKubeClient) instanceTypeFromSpec(spec testInstanceTypeSpec) cloudv1.InstanceType { + instanceType := spec.instanceType + instanceType.Location = c.location + instanceType.ID = cloudv1.MakeGenericInstanceTypeID(instanceType) + return instanceType +} + +func makeCPUInstanceType(instanceType string, available bool, estimatedDeployTime *time.Duration) cloudv1.InstanceType { + basePrice, _ := currency.NewAmountFromInt64(0, "USD") + it := cloudv1.InstanceType{ + Type: instanceType, + SupportedStorage: []cloudv1.Storage{ + { + Type: "ephemeral", + Count: 1, + Size: units.GiB * 20, + SizeBytes: cloudv1.NewBytes(20, cloudv1.Gibibyte), + IsEphemeral: true, + }, + }, + ElasticRootVolume: false, + SupportedUsageClasses: []string{"on-demand"}, + Memory: units.GiB * 4, + MemoryBytes: cloudv1.NewBytes(4, cloudv1.Gibibyte), + SupportedNumCores: []int32{2}, + DefaultCores: 2, + VCPU: 2, + SupportedArchitectures: []cloudv1.Architecture{ + cloudv1.ArchitectureX86_64, + }, + Stoppable: true, + Rebootable: true, + IsAvailable: available, + BasePrice: &basePrice, + IsContainer: true, + EstimatedDeployTime: estimatedDeployTime, + Provider: CloudProviderID, + Cloud: CloudProviderID, + } + return it +} + +func filterInstanceTypes(instanceTypes []cloudv1.InstanceType, args cloudv1.GetInstanceTypeArgs) []cloudv1.InstanceType { + filtered := make([]cloudv1.InstanceType, 0, len(instanceTypes)) + for _, instanceType := range instanceTypes { + if len(args.Locations) > 0 && !args.Locations.IsAll() && !args.Locations.IsAllowed(instanceType.Location) { + continue + } + if len(args.InstanceTypes) > 0 && !containsString(args.InstanceTypes, instanceType.Type) { + continue + } + if args.CloudFilter != nil && !args.CloudFilter.IsAllowed(instanceType.Cloud) { + continue + } + if args.ArchitectureFilter != nil && !isArchitectureAllowed(args.ArchitectureFilter, instanceType.SupportedArchitectures) { + continue + } + if args.GPUManufactererFilter != nil && !isGPUManufacturerAllowed(args.GPUManufactererFilter, instanceType.SupportedGPUs) { + continue + } + filtered = append(filtered, instanceType) + } + return filtered +} + +func isArchitectureAllowed(filter *cloudv1.ArchitectureFilter, architectures []cloudv1.Architecture) bool { + for _, architecture := range architectures { + if filter.IsAllowed(architecture) { + return true + } + } + return false +} + +func isGPUManufacturerAllowed(filter *cloudv1.GPUManufacturerFilter, gpus []cloudv1.GPU) bool { + if len(gpus) == 0 { + return filter.IsAllowed(cloudv1.ManufacturerUnknown) + } + for _, gpu := range gpus { + if filter.IsAllowed(gpu.Manufacturer) { + return true + } + } + return false +} + +func isKnownInstanceType(instanceType string) bool { + _, ok := getInstanceTypeSpec(instanceType) + return ok +} + +func getInstanceTypeSpec(instanceType string) (testInstanceTypeSpec, bool) { + for _, spec := range testInstanceTypeSpecs { + if spec.instanceType.Type == instanceType { + return spec, true + } + } + return testInstanceTypeSpec{}, false +} + +func containsString(values []string, value string) bool { + for _, v := range values { + if v == value { + return true + } + } + return false +} diff --git a/v1/providers/testkube/instancetype_test.go b/v1/providers/testkube/instancetype_test.go new file mode 100644 index 00000000..218c9392 --- /dev/null +++ b/v1/providers/testkube/instancetype_test.go @@ -0,0 +1,57 @@ +package v1 + +import ( + "context" + "testing" + + cloudv1 "github.com/brevdev/cloud/v1" + "github.com/stretchr/testify/require" + "k8s.io/client-go/kubernetes/fake" +) + +func TestGetInstanceTypes(t *testing.T) { + client := newTestClient(t) + + instanceTypes, err := client.GetInstanceTypes(context.Background(), cloudv1.GetInstanceTypeArgs{}) + require.NoError(t, err) + require.Len(t, instanceTypes, 4) + + instanceTypeByName := map[string]cloudv1.InstanceType{} + for _, instanceType := range instanceTypes { + instanceTypeByName[instanceType.Type] = instanceType + } + + for _, expected := range []string{ + InstanceTypeOKCPU, + InstanceTypeFailCapacity, + InstanceTypeFailQuota, + InstanceTypeFailBuild, + } { + instanceType, ok := instanceTypeByName[expected] + require.True(t, ok, "missing instance type %s", expected) + require.True(t, instanceType.IsAvailable) + require.Equal(t, CloudProviderID, instanceType.Provider) + require.Equal(t, CloudProviderID, instanceType.Cloud) + } +} + +func TestCapabilitiesDoNotAdvertiseImages(t *testing.T) { + client := newTestClient(t) + + capabilities, err := client.GetCapabilities(context.Background()) + require.NoError(t, err) + require.True(t, capabilities.IsCapable(cloudv1.CapabilityCreateInstance)) + require.False(t, capabilities.IsCapable(cloudv1.CapabilityMachineImage)) +} + +func newTestClient(t *testing.T) *TestKubeClient { + t.Helper() + + client, err := NewTestKubeClient("test-credential", nil, + WithKubernetesClient(fake.NewSimpleClientset()), + WithNamespace("testkube"), + WithLocation("local"), + ) + require.NoError(t, err) + return client +} diff --git a/v1/providers/testkube/location.go b/v1/providers/testkube/location.go new file mode 100644 index 00000000..11c0605e --- /dev/null +++ b/v1/providers/testkube/location.go @@ -0,0 +1,18 @@ +package v1 + +import ( + "context" + + cloudv1 "github.com/brevdev/cloud/v1" +) + +func (c *TestKubeClient) GetLocations(_ context.Context, _ cloudv1.GetLocationsArgs) ([]cloudv1.Location, error) { + return []cloudv1.Location{ + { + Name: c.location, + Description: "Developer test Kubernetes cluster", + Available: true, + Country: "USA", + }, + }, nil +} diff --git a/v1/providers/testkube/validation_test.go b/v1/providers/testkube/validation_test.go new file mode 100644 index 00000000..c6f57efe --- /dev/null +++ b/v1/providers/testkube/validation_test.go @@ -0,0 +1,278 @@ +package v1 + +import ( + "context" + "encoding/base64" + "net" + "os" + "strconv" + "strings" + "sync" + "testing" + "time" + + internalssh "github.com/brevdev/cloud/internal/ssh" + "github.com/brevdev/cloud/internal/validation" + cloudv1 "github.com/brevdev/cloud/v1" + "github.com/stretchr/testify/require" + gossh "golang.org/x/crypto/ssh" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + testKubeKubeconfigBase64EnvVar = "TESTKUBE_KUBECONFIG_BASE64" + testKubeNamespaceEnvVar = "TESTKUBE_NAMESPACE" + testKubeLocationEnvVar = "TESTKUBE_LOCATION" +) + +var ( + validationSSHKeysOnce sync.Once + validationSSHKeysErr error +) + +func TestValidationFunctions(t *testing.T) { + t.Parallel() + checkValidationSkip(t) + + location := testKubeLocation() + config := validation.ProviderConfig{ + Credential: testKubeCredential(), + Location: location, + StableIDs: []cloudv1.InstanceTypeID{ + cloudv1.InstanceTypeID(location + "-noSub-" + InstanceTypeOKCPU), + }, + } + + validation.RunValidationSuite(t, config) +} + +func TestInstanceLifecycleValidation(t *testing.T) { + t.Parallel() + checkValidationSkip(t) + + location := testKubeLocation() + config := validation.ProviderConfig{ + Credential: testKubeCredential(), + Location: location, + StableIDs: []cloudv1.InstanceTypeID{ + cloudv1.InstanceTypeID(location + "-noSub-" + InstanceTypeOKCPU), + }, + } + + validation.RunInstanceLifecycleValidation(t, config) +} + +func TestFailureInstanceTypesValidation(t *testing.T) { + t.Parallel() + checkValidationSkip(t) + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + t.Cleanup(cancel) + + client, err := testKubeCredential().MakeClient(ctx, testKubeLocation()) + require.NoError(t, err) + + _, err = client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: "validation-capacity", + Name: "validation-capacity", + Location: testKubeLocation(), + InstanceType: InstanceTypeFailCapacity, + }) + require.ErrorIs(t, err, cloudv1.ErrInsufficientResources) + + _, err = client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: "validation-quota", + Name: "validation-quota", + Location: testKubeLocation(), + InstanceType: InstanceTypeFailQuota, + }) + require.ErrorIs(t, err, cloudv1.ErrOutOfQuota) +} + +func TestImageBackedInstanceValidation(t *testing.T) { + checkValidationSkip(t) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + t.Cleanup(cancel) + + client := testKubeImageClient(t) + refID := "validation-image-ok-cpu-" + strconv.FormatInt(time.Now().UnixNano(), 36) + + instance, err := client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: refID, + Name: refID, + Location: testKubeLocation(), + InstanceType: InstanceTypeOKCPU, + PublicKey: testAuthorizedKey(t), + Tags: cloudv1.Tags{ + "test": "image-validation", + }, + }) + require.NoError(t, err) + require.NotNil(t, instance) + t.Cleanup(func() { + _ = client.TerminateInstance(context.Background(), instance.CloudID) + }) + + statefulSet, err := client.k8sClient.AppsV1().StatefulSets(client.namespace).Get(ctx, string(instance.CloudID), metav1.GetOptions{}) + require.NoError(t, err) + require.Equal(t, DefaultImage, statefulSet.Spec.Template.Spec.Containers[0].Image) + + instance = waitForValidationInstanceStatus(t, ctx, client, instance.CloudID, cloudv1.LifecycleStatusRunning, 4*time.Minute) + require.NotEmpty(t, instance.Hostname) + require.NotEmpty(t, instance.PublicIP) + require.NotZero(t, instance.SSHPort) + + sshCtx, cancelSSH := context.WithTimeout(ctx, 2*time.Minute) + t.Cleanup(cancelSSH) + require.NoError(t, internalssh.WaitForSSH(sshCtx, internalssh.ConnectionConfig{ + User: instance.SSHUser, + HostPort: net.JoinHostPort(instance.PublicIP, strconv.Itoa(instance.SSHPort)), + PrivKey: internalssh.DoNotUseDummyPrivateKey, + }, internalssh.WaitForSSHOptions{ + Timeout: 90 * time.Second, + ConnectionTimeout: 10 * time.Second, + CheckCmd: "sudo -n true && command -v apt-get && command -v systemctl && systemctl list-units --type=service --no-pager >/dev/null", + WaitTime: 2 * time.Second, + })) +} + +func checkValidationSkip(t *testing.T) { + t.Helper() + + kubeconfigBase64 := os.Getenv(testKubeKubeconfigBase64EnvVar) + isValidationTest := os.Getenv("VALIDATION_TEST") + if kubeconfigBase64 == "" && isValidationTest != "" { + t.Fatalf("%s not set, but VALIDATION_TEST is set", testKubeKubeconfigBase64EnvVar) + } + if kubeconfigBase64 == "" { + t.Skipf("%s not set, skipping testkube validation tests", testKubeKubeconfigBase64EnvVar) + } + ensureValidationSSHKeys(t) +} + +func testKubeCredential() *TestKubeCredential { + return NewTestKubeCredential("validation-test", os.Getenv(testKubeKubeconfigBase64EnvVar), testKubeNamespace()) +} + +func testKubeImageClient(t *testing.T) *TestKubeClient { + t.Helper() + + credential := testKubeCredential() + restConfig, err := credential.restConfig() + require.NoError(t, err) + + client, err := NewTestKubeClient(credential.RefID, restConfig, + WithNamespace(testKubeNamespace()), + WithLocation(testKubeLocation()), + ) + require.NoError(t, err) + return client +} + +func testKubeNamespace() string { + if namespace := os.Getenv(testKubeNamespaceEnvVar); namespace != "" { + return namespace + } + return DefaultNamespace +} + +func testKubeLocation() string { + if location := os.Getenv(testKubeLocationEnvVar); location != "" { + return location + } + return DefaultLocation +} + +func testAuthorizedKey(t *testing.T) string { + t.Helper() + + authorizedKey, err := defaultAuthorizedKey() + require.NoError(t, err) + return authorizedKey +} + +func defaultAuthorizedKey() (string, error) { + signer, err := gossh.ParsePrivateKey([]byte(internalssh.DoNotUseDummyPrivateKey)) + if err != nil { + return "", err + } + return strings.TrimSpace(string(gossh.MarshalAuthorizedKey(signer.PublicKey()))), nil +} + +func ensureValidationSSHKeys(t *testing.T) { + t.Helper() + + validationSSHKeysOnce.Do(func() { + if os.Getenv("TEST_PRIVATE_KEY_BASE64") == "" { + validationSSHKeysErr = os.Setenv( + "TEST_PRIVATE_KEY_BASE64", + base64.StdEncoding.EncodeToString([]byte(internalssh.DoNotUseDummyPrivateKey)), + ) + if validationSSHKeysErr != nil { + return + } + } + + if os.Getenv("TEST_PUBLIC_KEY_BASE64") == "" { + authorizedKey, err := defaultAuthorizedKey() + if err != nil { + validationSSHKeysErr = err + return + } + validationSSHKeysErr = os.Setenv( + "TEST_PUBLIC_KEY_BASE64", + base64.StdEncoding.EncodeToString([]byte(authorizedKey)), + ) + } + }) + require.NoError(t, validationSSHKeysErr) +} + +func waitForValidationInstanceStatus( + t *testing.T, + ctx context.Context, + client *TestKubeClient, + instanceID cloudv1.CloudProviderInstanceID, + status cloudv1.LifecycleStatus, + timeout time.Duration, +) *cloudv1.Instance { + t.Helper() + + deadline := time.NewTimer(timeout) + defer deadline.Stop() + + tick := time.NewTicker(2 * time.Second) + defer tick.Stop() + + var lastInstance *cloudv1.Instance + var lastErr error + for { + instance, err := client.GetInstance(ctx, instanceID) + if err != nil { + lastErr = err + } else { + lastErr = nil + lastInstance = instance + if instance.Status.LifecycleStatus == status { + return instance + } + if instance.Status.LifecycleStatus == cloudv1.LifecycleStatusFailed && status != cloudv1.LifecycleStatusFailed { + t.Fatalf("instance %s failed while waiting for %s: %v", instanceID, status, instance.Status.Messages) + } + } + + select { + case <-ctx.Done(): + require.NoError(t, lastErr) + t.Fatalf("context ended waiting for instance %s to become %s: %v", instanceID, status, ctx.Err()) + case <-deadline.C: + require.NoError(t, lastErr) + if lastInstance != nil { + t.Fatalf("instance %s status is %s, waiting for %s: %v", instanceID, lastInstance.Status.LifecycleStatus, status, lastInstance.Status.Messages) + } + t.Fatalf("timed out waiting for instance %s to become %s", instanceID, status) + case <-tick.C: + } + } +} From 3f8638457f066f8195d82a955435f6a78e31b3c6 Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Mon, 15 Jun 2026 16:41:45 -0700 Subject: [PATCH 2/2] lint --- v1/providers/testkube/instance.go | 2 +- v1/providers/testkube/instance_test.go | 2 +- v1/providers/testkube/instancetype.go | 5 ----- v1/providers/testkube/validation_test.go | 11 ++--------- 4 files changed, 4 insertions(+), 16 deletions(-) diff --git a/v1/providers/testkube/instance.go b/v1/providers/testkube/instance.go index 198614e4..5e67363a 100644 --- a/v1/providers/testkube/instance.go +++ b/v1/providers/testkube/instance.go @@ -29,7 +29,7 @@ const ( labelManagedByValue = "brev-cloud-sdk" annotationRefID = "testkube.brev.dev/ref-id" - annotationCloudCredRefID = "testkube.brev.dev/cloud-cred-ref-id" + annotationCloudCredRefID = "testkube.brev.dev/cloud-cred-ref-id" //nolint:gosec // this is a valid annotation annotationName = "testkube.brev.dev/name" annotationLocation = "testkube.brev.dev/location" annotationSubLocation = "testkube.brev.dev/sub-location" diff --git a/v1/providers/testkube/instance_test.go b/v1/providers/testkube/instance_test.go index fae389e3..2b6cbd1d 100644 --- a/v1/providers/testkube/instance_test.go +++ b/v1/providers/testkube/instance_test.go @@ -52,7 +52,7 @@ func TestCreateInstanceProvisionFailures(t *testing.T) { } } -func TestInstanceLifecycle(t *testing.T) { +func TestInstanceLifecycle(t *testing.T) { //nolint:funlen // test ok ctx := context.Background() client := newTestClient(t) diff --git a/v1/providers/testkube/instancetype.go b/v1/providers/testkube/instancetype.go index 2bb7a039..bca69dfd 100644 --- a/v1/providers/testkube/instancetype.go +++ b/v1/providers/testkube/instancetype.go @@ -144,11 +144,6 @@ func isGPUManufacturerAllowed(filter *cloudv1.GPUManufacturerFilter, gpus []clou return false } -func isKnownInstanceType(instanceType string) bool { - _, ok := getInstanceTypeSpec(instanceType) - return ok -} - func getInstanceTypeSpec(instanceType string) (testInstanceTypeSpec, bool) { for _, spec := range testInstanceTypeSpecs { if spec.instanceType.Type == instanceType { diff --git a/v1/providers/testkube/validation_test.go b/v1/providers/testkube/validation_test.go index c6f57efe..48d2eccd 100644 --- a/v1/providers/testkube/validation_test.go +++ b/v1/providers/testkube/validation_test.go @@ -118,7 +118,7 @@ func TestImageBackedInstanceValidation(t *testing.T) { require.NoError(t, err) require.Equal(t, DefaultImage, statefulSet.Spec.Template.Spec.Containers[0].Image) - instance = waitForValidationInstanceStatus(t, ctx, client, instance.CloudID, cloudv1.LifecycleStatusRunning, 4*time.Minute) + instance = waitForValidationInstanceStatus(ctx, t, client, instance.CloudID, cloudv1.LifecycleStatusRunning, 4*time.Minute) require.NotEmpty(t, instance.Hostname) require.NotEmpty(t, instance.PublicIP) require.NotZero(t, instance.SSHPort) @@ -229,14 +229,7 @@ func ensureValidationSSHKeys(t *testing.T) { require.NoError(t, validationSSHKeysErr) } -func waitForValidationInstanceStatus( - t *testing.T, - ctx context.Context, - client *TestKubeClient, - instanceID cloudv1.CloudProviderInstanceID, - status cloudv1.LifecycleStatus, - timeout time.Duration, -) *cloudv1.Instance { +func waitForValidationInstanceStatus(ctx context.Context, t *testing.T, client *TestKubeClient, instanceID cloudv1.CloudProviderInstanceID, status cloudv1.LifecycleStatus, timeout time.Duration) *cloudv1.Instance { t.Helper() deadline := time.NewTimer(timeout)