-
Notifications
You must be signed in to change notification settings - Fork 499
OCPBUGS-76530: Fix intermittent etcd peer communication failures #8479
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,82 @@ | ||
| package pki | ||
|
|
||
| import ( | ||
| "crypto/x509" | ||
| "crypto/x509/pkix" | ||
| "testing" | ||
|
|
||
| . "github.com/onsi/gomega" | ||
|
|
||
| "github.com/openshift/hypershift/support/certs" | ||
| "github.com/openshift/hypershift/support/config" | ||
|
|
||
| corev1 "k8s.io/api/core/v1" | ||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
| ) | ||
|
|
||
| func TestReconcileEtcdPeerSecret(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| caCfg := certs.CertCfg{ | ||
| IsCA: true, | ||
| Subject: pkix.Name{CommonName: "etcd-signer", OrganizationalUnit: []string{"openshift"}}, | ||
| } | ||
| caKey, caCert, err := certs.GenerateSelfSignedCertificate(&caCfg) | ||
| if err != nil { | ||
| t.Fatalf("failed to generate CA: %v", err) | ||
| } | ||
| caSecret := &corev1.Secret{ | ||
| Data: map[string][]byte{ | ||
| certs.CASignerCertMapKey: certs.CertToPem(caCert), | ||
| certs.CASignerKeyMapKey: certs.PrivateKeyToPem(caKey), | ||
| }, | ||
| } | ||
|
|
||
| t.Run("When reconciling etcd peer secret it should include both etcd-discovery and etcd-client SANs", func(t *testing.T) { | ||
| g := NewWithT(t) | ||
| secret := &corev1.Secret{ | ||
| ObjectMeta: metav1.ObjectMeta{ | ||
| Namespace: "clusters-test", | ||
| }, | ||
| } | ||
|
|
||
| err := ReconcileEtcdPeerSecret(secret, caSecret, config.OwnerRef{}) | ||
| g.Expect(err).ToNot(HaveOccurred()) | ||
|
|
||
| certData := secret.Data[EtcdPeerCrtKey] | ||
| g.Expect(certData).ToNot(BeEmpty()) | ||
|
|
||
| cert, err := certs.PemToCertificate(certData) | ||
| g.Expect(err).ToNot(HaveOccurred()) | ||
|
|
||
| g.Expect(cert.DNSNames).To(ContainElements( | ||
| "*.etcd-discovery.clusters-test.svc", | ||
| "*.etcd-discovery.clusters-test.svc.cluster.local", | ||
| "*.etcd-client.clusters-test.svc", | ||
| "*.etcd-client.clusters-test.svc.cluster.local", | ||
| // TODO(OCPBUGS-86648): assert on cert.IPAddresses instead once IPs are moved out of dnsNames. | ||
| "127.0.0.1", | ||
| "::1", | ||
|
Comment on lines
+58
to
+59
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Description: Check how ReconcileEtcdPeerSecret handles IP addresses in certificate generation
# Search for the implementation of ReconcileEtcdPeerSecret
ast-grep --pattern $'func ReconcileEtcdPeerSecret($$$) $$$'
# Search for certificate generation calls that might include IPs
rg -A 10 -B 5 "127\.0\.0\.1|::1" --type go --glob '**/pki/etcd.go'
# Look for DNSNames and IPAddresses field assignments
rg -A 5 "DNSNames|IPAddresses" --type go --glob '**/pki/etcd.go'Repository: openshift/hypershift Length of output: 2726 🏁 Script executed: # Find the reconcileSignedCertWithKeysAndAddresses function implementation
ast-grep --pattern $'func reconcileSignedCertWithKeysAndAddresses($$$) $$$'
# Also search for where DNSNames is actually set in the certificate
rg -B 5 -A 15 "DNSNames\s*=" --type go --glob '**/pki/*.go' | head -100
# Look for IPAddresses assignment in certificate generation
rg -B 5 -A 5 "IPAddresses\s*=" --type go --glob '**/pki/*.go'Repository: openshift/hypershift Length of output: 1123 🏁 Script executed: # Find where certs.ReconcileSignedCert is imported from
rg "import.*certs" --type go --glob '**/pki/cert.go' -A 20 | head -40
# Find the certs package and ReconcileSignedCert implementation
find . -name "*.go" -type f -exec grep -l "func ReconcileSignedCert" {} \;
# Search for how it populates certificate fields
rg -B 5 -A 20 "func ReconcileSignedCert" --type goRepository: openshift/hypershift Length of output: 1324 🏁 Script executed: # Get the full ReconcileSignedCert implementation
sed -n '1,300p' support/certs/tls.go | tail -200Repository: openshift/hypershift Length of output: 6056 🏁 Script executed: # Find where the CSR is generated with dnsNames and ips in ReconcileSignedCert
sed -n '/^func ReconcileSignedCert/,/^func [A-Z]/p' support/certs/tls.go | head -100Repository: openshift/hypershift Length of output: 2748 🏁 Script executed: # Get the test file context around lines 57-58
sed -n '35,75p' control-plane-operator/controllers/hostedcontrolplane/pki/etcd_test.goRepository: openshift/hypershift Length of output: 1305 Fix IP address handling in etcd peer certificate generation. Lines 50–51 in Fix the implementation by passing IPs via the dedicated parameter: return reconcileSignedCertWithKeysAndAddresses(secret, ca, ownerRef, "etcd-discovery", []string{"kubernetes"}, X509UsageClientServerAuth, EtcdPeerCrtKey, EtcdPeerKeyKey, "", dnsNames, []string{"127.0.0.1", "::1"}, "")Then update the test assertion to check the correct certificate field: g.Expect(cert.IPAddresses).To(ContainElements(
net.ParseIP("127.0.0.1"),
net.ParseIP("::1"),
))🤖 Prompt for AI Agents
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is encoding the behavior that should be fixed. Mind after creating the JIRA card reference it here in a comment with a TODO
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @sdminonne , |
||
| )) | ||
| }) | ||
|
|
||
| t.Run("When reconciling etcd peer secret it should have client and server auth usage", func(t *testing.T) { | ||
| g := NewWithT(t) | ||
| secret := &corev1.Secret{ | ||
| ObjectMeta: metav1.ObjectMeta{ | ||
| Namespace: "clusters-test", | ||
| }, | ||
| } | ||
|
|
||
| err := ReconcileEtcdPeerSecret(secret, caSecret, config.OwnerRef{}) | ||
| g.Expect(err).ToNot(HaveOccurred()) | ||
|
|
||
| cert, err := certs.PemToCertificate(secret.Data[EtcdPeerCrtKey]) | ||
| g.Expect(err).ToNot(HaveOccurred()) | ||
|
|
||
| g.Expect(cert.ExtKeyUsage).To(ContainElements( | ||
| x509.ExtKeyUsageClientAuth, | ||
| x509.ExtKeyUsageServerAuth, | ||
| )) | ||
| }) | ||
| } | ||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,7 +5,6 @@ metadata: | |
| app: etcd | ||
| name: etcd-client | ||
| spec: | ||
| clusterIP: None | ||
| ports: | ||
| - name: etcd-client | ||
| port: 2379 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we please add a // comment on the reasoning why this is needed so it's clear for humans/agents landing here without needing to trace back the PR desc?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thank you @enxebre , I have added the comment as per the suggestion