Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions pkg/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -1245,13 +1245,18 @@ func (c *Cluster) Update(oldSpec, newSpec *cpov1.Postgresql) error {
}

if !updateFailed {
// Major version upgrade must only fire after success of earlier operations and should stay last
if err := c.majorVersionUpgrade(); err != nil {
c.logger.Errorf("major version upgrade failed: %v", err)
if upgradeErr := c.executeMajorVersionUpgrade(); upgradeErr != nil {
c.logger.Errorf("major version upgrade failed: %v", upgradeErr)
updateFailed = true
}
}

if updateFailed {
c.logger.Errorf("Update for cluster %s/%s finished with errors..", c.Namespace, c.Name)
} else {
c.logger.Infof("Update for cluster %s/%s completed successfully.", c.Namespace, c.Name)
}

return nil
}

Expand Down
74 changes: 57 additions & 17 deletions pkg/cluster/majorversionupgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ package cluster
import (
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"time"

"github.com/Masterminds/semver"
"github.com/cybertec-postgresql/cybertec-pg-operator/pkg/spec"
Expand All @@ -29,6 +31,8 @@ const (
majorVersionUpgradeFailureAnnotation = "last-major-upgrade-failure"
)

var errUpgradePrepNotReady = errors.New("cluster not ready for upgrade")

// IsBiggerPostgresVersion Compare two Postgres version numbers
func IsBiggerPostgresVersion(old string, new string) bool {
oldN := VersionMap[old]
Expand Down Expand Up @@ -232,12 +236,14 @@ func (c *Cluster) majorVersionUpgrade() error {
continue
}
if checkStreaming && member.State != "streaming" {
c.logger.Infof("skipping major version upgrade, replica %s is not streaming from primary", member.Name)
return nil
// c.logger.Infof("skipping major version upgrade, replica %s is not streaming from primary", member.Name)
// return nil
return fmt.Errorf("%w: replica %s is not streaming (state: %s)", errUpgradePrepNotReady, member.Name, member.State)
}
if member.Lag > 16*1024*1024 {
c.logger.Infof("skipping major version upgrade, replication lag on member %s is too high", member.Name)
return nil
// c.logger.Infof("skipping major version upgrade, replication lag on member %s is too high", member.Name)
// return nil
return fmt.Errorf("%w: replication lag on member %s is too high (%d bytes)", errUpgradePrepNotReady, member.Name, member.Lag)
}
}

Expand All @@ -246,11 +252,10 @@ func (c *Cluster) majorVersionUpgrade() error {
if allRunning {
c.logger.Infof("healthy cluster ready to upgrade, current: %d desired: %d", c.currentMajorVersion, desiredVersion)
if c.currentMajorVersion < desiredVersion {
defer func() error {
if err = c.criticalOperationLabel(pods, nil); err != nil {
return fmt.Errorf("failed to remove critical-operation label: %s", err)
defer func() {
if err := c.criticalOperationLabel(pods, nil); err != nil {
c.logger.Errorf("failed to remove critical-operation label: %v", err)
}
return nil
}()
val := "true"
if err = c.criticalOperationLabel(pods, &val); err != nil {
Expand All @@ -260,37 +265,72 @@ func (c *Cluster) majorVersionUpgrade() error {
podName := &spec.NamespacedName{Namespace: masterPod.Namespace, Name: masterPod.Name}
c.logger.Infof("triggering major version upgrade on pod %s of %d pods", masterPod.Name, numberOfPods)
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "starting major version upgrade on pod %s of %d pods", masterPod.Name, numberOfPods)
upgradeCommand := fmt.Sprintf("set -o pipefail && /usr/bin/python3 /scripts/inplace_upgrade.py %d 2>&1 | tee last_upgrade.log", numberOfPods)

c.logger.Debug("checking if the spilo image runs with root or non-root (check for user id=0)")
upgradeCommand := fmt.Sprintf("/usr/local/bin/python3 /scripts/inplace_upgrade.py %d 2>&1", numberOfPods)
c.logger.Debug("checking if the container runs with root or non-root (check for user id=0)")
resultIdCheck, errIdCheck := c.ExecCommand(podName, "/bin/bash", "-c", "/usr/bin/id -u")
if errIdCheck != nil {
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "checking user id to run upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, errIdCheck)
}

resultIdCheck = strings.TrimSuffix(resultIdCheck, "\n")
var result, scriptErrMsg string
var result string

if resultIdCheck != "0" {
c.logger.Infof("user id was identified as: %s, hence default user is non-root already", resultIdCheck)
result, err = c.ExecCommand(podName, "/bin/bash", "-c", upgradeCommand)
scriptErrMsg, _ = c.ExecCommand(podName, "/bin/bash", "-c", "tail -n 1 last_upgrade.log")
} else {
c.logger.Infof("user id was identified as: %s, using su to reach the postgres user", resultIdCheck)
result, err = c.ExecCommand(podName, "/bin/su", "postgres", "-c", upgradeCommand)
scriptErrMsg, _ = c.ExecCommand(podName, "/bin/bash", "-c", "tail -n 1 last_upgrade.log")
}

if err != nil {
isUpgradeSuccess = false
c.annotatePostgresResource(isUpgradeSuccess)
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, scriptErrMsg)
return fmt.Errorf("%s", scriptErrMsg)

finalErrorMsg := strings.TrimSpace(result)
if finalErrorMsg == "" {
finalErrorMsg = err.Error()
}

lines := strings.Split(finalErrorMsg, "\n")
if len(lines) > 5 {
finalErrorMsg = strings.Join(lines[len(lines)-5:], " | ")
}

c.logger.Errorf("Major upgrade failed: %v", err)
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "upgrade from %d to %d FAILED: %s", c.currentMajorVersion, desiredVersion, finalErrorMsg)

return fmt.Errorf("upgrade script failed: %s", finalErrorMsg)
}

c.annotatePostgresResource(isUpgradeSuccess)
c.logger.Infof("upgrade action triggered and command completed: %s", result[:100])
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "upgrade from %d to %d finished", c.currentMajorVersion, desiredVersion)
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "major version upgrade from version %d to version %d was successfully completed.", c.currentMajorVersion, desiredVersion)
}
}

return nil
}

func (c *Cluster) executeMajorVersionUpgrade() error {
maxRetries := 6
var lastErr error

for i := 0; i < maxRetries; i++ {
lastErr = c.majorVersionUpgrade()
if lastErr == nil {
return nil
}

if errors.Is(lastErr, errUpgradePrepNotReady) {
c.logger.Warnf("Major version upgrade deferred (attempt %d/%d): %v. Retrying in 15s...", i+1, maxRetries, lastErr)

if i < maxRetries-1 {
time.Sleep(15 * time.Second)
continue
}
}
return lastErr
}
return lastErr
}
19 changes: 16 additions & 3 deletions pkg/cluster/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ func generateSerialNumber() (*big.Int, error) {
// Unlike the update, sync does not error out if some objects do not exist and takes care of creating them.
func (c *Cluster) Sync(newSpec *cpov1.Postgresql) error {
var err error
syncFailed := false
c.mu.Lock()
defer c.mu.Unlock()

Expand Down Expand Up @@ -326,9 +327,21 @@ func (c *Cluster) Sync(newSpec *cpov1.Postgresql) error {
}
}

// Major version upgrade must only run after success of all earlier operations, must remain last item in sync
if err := c.majorVersionUpgrade(); err != nil {
c.logger.Errorf("major version upgrade failed: %v", err)
if err != nil {
syncFailed = true
}
if !syncFailed {
err = c.executeMajorVersionUpgrade()
if err != nil {
c.logger.Errorf("major version upgrade failed after retries: %v", err)
syncFailed = true
}
}

if syncFailed {
c.logger.Errorf("Update for cluster %s/%s finished with errors..", c.Namespace, c.Name)
} else {
c.logger.Infof("Update for cluster %s/%s completed successfully.", c.Namespace, c.Name)
}

return err
Expand Down
2 changes: 1 addition & 1 deletion ui/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ ARG VERSION=dev
RUN sed -i "s/__version__ = .*/__version__ = '${VERSION}'/" /operator_ui/__init__.py

WORKDIR /
CMD ["/usr/bin/python3", "-m", "operator_ui"]
CMD ["/usr/local/bin/python3", "-m", "operator_ui"]
2 changes: 1 addition & 1 deletion ui/start_server.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/bin/bash
/usr/bin/python3 -m operator_ui
/usr/local/bin/python3 -m operator_ui
Loading