diff --git a/pkg/apis/cpo.opensource.cybertec.at/v1/operator_configuration_type.go b/pkg/apis/cpo.opensource.cybertec.at/v1/operator_configuration_type.go index a76036776..2cb153458 100644 --- a/pkg/apis/cpo.opensource.cybertec.at/v1/operator_configuration_type.go +++ b/pkg/apis/cpo.opensource.cybertec.at/v1/operator_configuration_type.go @@ -62,6 +62,7 @@ type KubernetesMetaConfiguration struct { PodTerminateGracePeriod Duration `json:"pod_terminate_grace_period,omitempty"` SpiloPrivileged bool `json:"spilo_privileged,omitempty"` SpiloAllowPrivilegeEscalation *bool `json:"spilo_allow_privilege_escalation,omitempty"` + ReadOnlyRootFilesystem *bool `json:"container_readonly_root_filesystem" default:"false"` SpiloRunAsUser *int64 `json:"spilo_runasuser,omitempty"` SpiloRunAsGroup *int64 `json:"spilo_runasgroup,omitempty"` SpiloFSGroup *int64 `json:"spilo_fsgroup,omitempty"` @@ -102,6 +103,7 @@ type KubernetesMetaConfiguration struct { PodManagementPolicy string `json:"pod_management_policy,omitempty"` PersistentVolumeClaimRetentionPolicy map[string]string `json:"persistent_volume_claim_retention_policy,omitempty"` EnableReadinessProbe bool `json:"enable_readiness_probe,omitempty"` + EnableLivenessProbe bool `json:"enable_liveness_probe,omitempty"` EnableCrossNamespaceSecret bool `json:"enable_cross_namespace_secret,omitempty"` } diff --git a/pkg/apis/cpo.opensource.cybertec.at/v1/postgresql_type.go b/pkg/apis/cpo.opensource.cybertec.at/v1/postgresql_type.go index 4070e1e82..60eaa5b8b 100644 --- a/pkg/apis/cpo.opensource.cybertec.at/v1/postgresql_type.go +++ b/pkg/apis/cpo.opensource.cybertec.at/v1/postgresql_type.go @@ -216,11 +216,13 @@ type CloneDescription struct { // Sidecar defines a container to be run in the same pod as the Postgres container. type Sidecar struct { - *Resources `json:"resources,omitempty"` - Name string `json:"name,omitempty"` - DockerImage string `json:"image,omitempty"` - Ports []v1.ContainerPort `json:"ports,omitempty"` - Env []v1.EnvVar `json:"env,omitempty"` + *Resources `json:"resources,omitempty"` + Name string `json:"name,omitempty"` + DockerImage string `json:"image,omitempty"` + Ports []v1.ContainerPort `json:"ports,omitempty"` + Env []v1.EnvVar `json:"env,omitempty"` + SecurityContext *v1.SecurityContext `json:"securityContext,omitempty"` + VolumeMounts []v1.VolumeMount `json:"volumeMounts,omitempty"` } // UserFlags defines flags (such as superuser, nologin) that could be assigned to individual users diff --git a/pkg/apis/cpo.opensource.cybertec.at/v1/zz_generated.deepcopy.go b/pkg/apis/cpo.opensource.cybertec.at/v1/zz_generated.deepcopy.go index 9daa74e86..7cb588816 100644 --- a/pkg/apis/cpo.opensource.cybertec.at/v1/zz_generated.deepcopy.go +++ b/pkg/apis/cpo.opensource.cybertec.at/v1/zz_generated.deepcopy.go @@ -236,6 +236,11 @@ func (in *KubernetesMetaConfiguration) DeepCopyInto(out *KubernetesMetaConfigura *out = new(bool) **out = **in } + if in.ReadOnlyRootFilesystem != nil { + in, out := &in.ReadOnlyRootFilesystem, &out.ReadOnlyRootFilesystem + *out = new(bool) + **out = **in + } if in.SpiloRunAsUser != nil { in, out := &in.SpiloRunAsUser, &out.SpiloRunAsUser *out = new(int64) @@ -1497,6 +1502,18 @@ func (in *Sidecar) DeepCopyInto(out *Sidecar) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.SecurityContext != nil { + in, out := &in.SecurityContext, &out.SecurityContext + *out = new(corev1.SecurityContext) + (*in).DeepCopyInto(*out) + } + if in.VolumeMounts != nil { + in, out := &in.VolumeMounts, &out.VolumeMounts + *out = make([]corev1.VolumeMount, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } return } diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index f97611a1d..d7483af89 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -676,6 +676,8 @@ func (c *Cluster) compareContainers(description string, setA, setB []v1.Containe func(a, b v1.Container) bool { return a.Name != b.Name }), newCheck("new statefulset %s's %s (index %d) readiness probe does not match the current one", func(a, b v1.Container) bool { return !reflect.DeepEqual(a.ReadinessProbe, b.ReadinessProbe) }), + newCheck("new statefulset %s's %s (index %d) liveness probe does not match the current one", + func(a, b v1.Container) bool { return !reflect.DeepEqual(a.LivenessProbe, b.LivenessProbe) }), newCheck("new statefulset %s's %s (index %d) ports do not match the current one", func(a, b v1.Container) bool { return !comparePorts(a.Ports, b.Ports) }), newCheck("new statefulset %s's %s (index %d) resources do not match the current ones", diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index a8878a724..bff42e3de 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -677,6 +677,7 @@ func generateContainer( volumeMounts []v1.VolumeMount, privilegedMode bool, privilegeEscalationMode *bool, + readOnlyRootFilesystem *bool, additionalPodCapabilities *v1.Capabilities, ) *v1.Container { return &v1.Container{ @@ -703,7 +704,7 @@ func generateContainer( SecurityContext: &v1.SecurityContext{ AllowPrivilegeEscalation: privilegeEscalationMode, Privileged: &privilegedMode, - ReadOnlyRootFilesystem: util.False(), + ReadOnlyRootFilesystem: readOnlyRootFilesystem, Capabilities: additionalPodCapabilities, }, } @@ -736,7 +737,7 @@ func (c *Cluster) generateSidecarContainers(sidecars []cpov1.Sidecar, } // adds common fields to sidecars -func patchSidecarContainers(in []v1.Container, volumeMounts []v1.VolumeMount, superUserName string, credentialsSecretName string, logger *logrus.Entry) []v1.Container { +func patchSidecarContainers(in []v1.Container, volumeMounts []v1.VolumeMount, superUserName string, credentialsSecretName string, logger *logrus.Entry, privilegedMode bool, privilegeEscalationMode *bool, additionalPodCapabilities *v1.Capabilities) []v1.Container { result := []v1.Container{} for _, container := range in { @@ -777,6 +778,7 @@ func patchSidecarContainers(in []v1.Container, volumeMounts []v1.VolumeMount, su }, } container.Env = appendEnvVars(env, container.Env...) + result = append(result, container) } @@ -873,6 +875,15 @@ func (c *Cluster) generatePodTemplate( podSpec.PriorityClassName = priorityClassName } + if c.Postgresql.Spec.Monitoring != nil { + addEmptyDirVolume(&podSpec, "exporter-tmp", "postgres-exporter", "/tmp") + } + + if c.OpConfig.ReadOnlyRootFilesystem != nil && *c.OpConfig.ReadOnlyRootFilesystem { + addRunVolume(&podSpec, "postgres-run", "postgres", "/run") + addEmptyDirVolume(&podSpec, "postgres-tmp", "postgres", "/tmp") + } + if sharePgSocketWithSidecars != nil && *sharePgSocketWithSidecars { addVarRunVolume(&podSpec) } @@ -988,6 +999,19 @@ func (c *Cluster) generateSpiloPodEnvVars( Name: "HUMAN_ROLE", Value: c.OpConfig.PamRoleName, }, + // NSS WRAPPER + { + Name: "LD_PRELOAD", + Value: "/usr/lib64/libnss_wrapper.so", + }, + { + Name: "NSS_WRAPPER_PASSWD", + Value: "/tmp/nss_wrapper/passwd", + }, + { + Name: "NSS_WRAPPER_GROUP", + Value: "/tmp/nss_wrapper/group", + }, } if c.OpConfig.EnableSpiloWalPathCompat { @@ -1243,6 +1267,8 @@ func getSidecarContainer(sidecar cpov1.Sidecar, index int, resources *v1.Resourc Resources: *resources, Env: sidecar.Env, Ports: sidecar.Ports, + SecurityContext: sidecar.SecurityContext, + VolumeMounts: sidecar.VolumeMounts, } } @@ -1292,6 +1318,23 @@ func generateSpiloReadinessProbe() *v1.Probe { } } +func generatePatroniLivenessProbe() *v1.Probe { + return &v1.Probe{ + FailureThreshold: 6, + ProbeHandler: v1.ProbeHandler{ + HTTPGet: &v1.HTTPGetAction{ + Path: "/liveness", + Port: intstr.IntOrString{IntVal: patroni.ApiPort}, + Scheme: v1.URISchemeHTTP, + }, + }, + InitialDelaySeconds: 30, + PeriodSeconds: 10, + TimeoutSeconds: 5, + SuccessThreshold: 1, + } +} + func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.StatefulSet, error) { var ( @@ -1422,6 +1465,7 @@ func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.Statefu } additionalVolumes = append(additionalVolumes, tlsVolumes...) } + repo_host_mode := false // Add this envVar so that it is not added to the pgbackrest initcontainer if specHasPgbackrestPVCRepo(spec) { @@ -1444,6 +1488,7 @@ func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.Statefu volumeMounts, c.OpConfig.Resources.SpiloPrivileged, c.OpConfig.Resources.SpiloAllowPrivilegeEscalation, + c.OpConfig.Resources.ReadOnlyRootFilesystem, generateCapabilities(c.OpConfig.AdditionalPodCapabilities), ) @@ -1451,6 +1496,10 @@ func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.Statefu if c.OpConfig.EnableReadinessProbe { spiloContainer.ReadinessProbe = generateSpiloReadinessProbe() } + // + if c.OpConfig.EnableLivenessProbe { + spiloContainer.LivenessProbe = generatePatroniLivenessProbe() + } // generate container specs for sidecars specified in the cluster manifest clusterSpecificSidecars := []v1.Container{} @@ -1506,7 +1555,7 @@ func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.Statefu containerName, containerName) } - sidecarContainers = patchSidecarContainers(sidecarContainers, volumeMounts, c.OpConfig.SuperUsername, c.credentialSecretName(c.OpConfig.SuperUsername), c.logger) + sidecarContainers = patchSidecarContainers(sidecarContainers, volumeMounts, c.OpConfig.SuperUsername, c.credentialSecretName(c.OpConfig.SuperUsername), c.logger, c.OpConfig.Resources.SpiloPrivileged, c.OpConfig.Resources.SpiloAllowPrivilegeEscalation, generateCapabilities(c.OpConfig.AdditionalPodCapabilities)) tolerationSpec := tolerations(&spec.Tolerations, c.OpConfig.PodToleration) topologySpreadConstraintsSpec := topologySpreadConstraints(&spec.TopologySpreadConstraints) @@ -1515,7 +1564,7 @@ func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.Statefu podAnnotations := c.generatePodAnnotations(spec) if spec.GetBackup().Pgbackrest != nil { - initContainers = append(initContainers, c.generatePgbackrestRestoreContainer(spec, repo_host_mode, volumeMounts, resourceRequirements)) + initContainers = append(initContainers, c.generatePgbackrestRestoreContainer(spec, repo_host_mode, volumeMounts, resourceRequirements, c.OpConfig.Resources.SpiloPrivileged, c.OpConfig.Resources.SpiloAllowPrivilegeEscalation, generateCapabilities(c.OpConfig.AdditionalPodCapabilities))) additionalVolumes = append(additionalVolumes, c.generatePgbackrestConfigVolume(spec.Backup.Pgbackrest, false)) @@ -1618,7 +1667,7 @@ func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.Statefu return statefulSet, nil } -func (c *Cluster) generatePgbackrestRestoreContainer(spec *cpov1.PostgresSpec, repo_host_mode bool, volumeMounts []v1.VolumeMount, resourceRequirements *v1.ResourceRequirements) v1.Container { +func (c *Cluster) generatePgbackrestRestoreContainer(spec *cpov1.PostgresSpec, repo_host_mode bool, volumeMounts []v1.VolumeMount, resourceRequirements *v1.ResourceRequirements, privilegedMode bool, privilegeEscalationMode *bool, additionalPodCapabilities *v1.Capabilities) v1.Container { isOptional := true pgbackrestRestoreEnvVars := []v1.EnvVar{ { @@ -1698,6 +1747,12 @@ func (c *Cluster) generatePgbackrestRestoreContainer(spec *cpov1.PostgresSpec, r Env: pgbackrestRestoreEnvVars, VolumeMounts: volumeMounts, Resources: *resourceRequirements, + SecurityContext: &v1.SecurityContext{ + AllowPrivilegeEscalation: privilegeEscalationMode, + Privileged: &privilegedMode, + ReadOnlyRootFilesystem: util.True(), + Capabilities: additionalPodCapabilities, + }, } } @@ -1756,6 +1811,7 @@ func (c *Cluster) generateRepoHostStatefulSet(spec *cpov1.PostgresSpec) (*appsv1 volumeMounts, c.OpConfig.Resources.SpiloPrivileged, c.OpConfig.Resources.SpiloAllowPrivilegeEscalation, + c.OpConfig.Resources.ReadOnlyRootFilesystem, generateCapabilities(c.OpConfig.AdditionalPodCapabilities), ) @@ -2159,6 +2215,48 @@ func addShmVolume(podSpec *v1.PodSpec) { podSpec.Volumes = volumes } +func addEmptyDirVolume(podSpec *v1.PodSpec, volumeName string, containerName string, path string) { + vol := v1.Volume{ + Name: volumeName, + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }, + } + podSpec.Volumes = append(podSpec.Volumes, vol) + + mount := v1.VolumeMount{ + Name: vol.Name, + MountPath: path, + } + + for i := range podSpec.Containers { + if podSpec.Containers[i].Name == containerName { + podSpec.Containers[i].VolumeMounts = append(podSpec.Containers[i].VolumeMounts, mount) + } + } +} + +func addRunVolume(podSpec *v1.PodSpec, volumeName string, containerName string, path string) { + vol := v1.Volume{ + Name: volumeName, + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }, + } + podSpec.Volumes = append(podSpec.Volumes, vol) + + mount := v1.VolumeMount{ + Name: vol.Name, + MountPath: path, + } + + for i := range podSpec.Containers { + if podSpec.Containers[i].Name == containerName { + podSpec.Containers[i].VolumeMounts = append(podSpec.Containers[i].VolumeMounts, mount) + } + } +} + func addVarRunVolume(podSpec *v1.PodSpec) { volumes := append(podSpec.Volumes, v1.Volume{ Name: "postgresql-run", @@ -2726,6 +2824,7 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) { []v1.VolumeMount{}, c.OpConfig.SpiloPrivileged, // use same value as for normal DB pods c.OpConfig.SpiloAllowPrivilegeEscalation, + util.False(), nil, ) @@ -3252,9 +3351,13 @@ func (c *Cluster) generatePgbackrestJob(backup *cpov1.Pgbackrest, repo *cpov1.Re []v1.VolumeMount{}, c.OpConfig.SpiloPrivileged, // use same value as for normal DB pods c.OpConfig.SpiloAllowPrivilegeEscalation, + c.OpConfig.Resources.ReadOnlyRootFilesystem, nil, ) + // Patch securityContext - readOnlyRootFilesystem + pgbackrestContainer.SecurityContext.ReadOnlyRootFilesystem = util.True() + podAffinityTerm := v1.PodAffinityTerm{ LabelSelector: c.roleLabelsSelector(Master), TopologyKey: "kubernetes.io/hostname", diff --git a/pkg/cluster/resources.go b/pkg/cluster/resources.go index 9a3361217..76d8608b8 100644 --- a/pkg/cluster/resources.go +++ b/pkg/cluster/resources.go @@ -92,6 +92,12 @@ func (c *Cluster) createStatefulSet() (*appsv1.StatefulSet, error) { }, }, Env: c.generateMonitoringEnvVars(), + SecurityContext: &v1.SecurityContext{ + AllowPrivilegeEscalation: c.OpConfig.Resources.SpiloAllowPrivilegeEscalation, + Privileged: &c.OpConfig.Resources.SpiloPrivileged, + ReadOnlyRootFilesystem: util.True(), + Capabilities: generateCapabilities(c.OpConfig.AdditionalPodCapabilities), + }, } c.Spec.Sidecars = append(c.Spec.Sidecars, *sidecar) //populate the sidecar spec so that the sidecar is automatically created } diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index a12117d97..33f3dd734 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -514,6 +514,12 @@ func (c *Cluster) syncStatefulSet() error { }, }, Env: c.generateMonitoringEnvVars(), + SecurityContext: &v1.SecurityContext{ + AllowPrivilegeEscalation: c.OpConfig.Resources.SpiloAllowPrivilegeEscalation, + Privileged: &c.OpConfig.Resources.SpiloPrivileged, + ReadOnlyRootFilesystem: util.True(), + Capabilities: generateCapabilities(c.OpConfig.AdditionalPodCapabilities), + }, } c.Spec.Sidecars = append(c.Spec.Sidecars, *sidecar) //populate the sidecar spec so that the sidecar is automatically created } diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 8e55b4982..931eacb05 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -10,7 +10,6 @@ import ( "sync" "time" - "github.com/sirupsen/logrus" cpov1 "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/apis/cpo.opensource.cybertec.at/v1" "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/apiserver" "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/cluster" @@ -22,6 +21,7 @@ import ( "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/util/constants" "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/util/k8sutil" "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/util/ringlog" + "github.com/sirupsen/logrus" v1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 12f27a536..c49fd97f3 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -75,6 +75,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *cpov1.OperatorConfigura result.PodTerminateGracePeriod = util.CoalesceDuration(time.Duration(fromCRD.Kubernetes.PodTerminateGracePeriod), "5m") result.SpiloPrivileged = fromCRD.Kubernetes.SpiloPrivileged result.SpiloAllowPrivilegeEscalation = util.CoalesceBool(fromCRD.Kubernetes.SpiloAllowPrivilegeEscalation, util.True()) + result.ReadOnlyRootFilesystem = util.CoalesceBool(fromCRD.Kubernetes.ReadOnlyRootFilesystem, util.False()) result.SpiloRunAsUser = fromCRD.Kubernetes.SpiloRunAsUser result.SpiloRunAsGroup = fromCRD.Kubernetes.SpiloRunAsGroup result.SpiloFSGroup = fromCRD.Kubernetes.SpiloFSGroup @@ -121,6 +122,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *cpov1.OperatorConfigura result.PodManagementPolicy = util.Coalesce(fromCRD.Kubernetes.PodManagementPolicy, "ordered_ready") result.PersistentVolumeClaimRetentionPolicy = fromCRD.Kubernetes.PersistentVolumeClaimRetentionPolicy result.EnableReadinessProbe = fromCRD.Kubernetes.EnableReadinessProbe + result.EnableLivenessProbe = fromCRD.Kubernetes.EnableLivenessProbe result.MasterPodMoveTimeout = util.CoalesceDuration(time.Duration(fromCRD.Kubernetes.MasterPodMoveTimeout), "10m") result.EnablePodAntiAffinity = fromCRD.Kubernetes.EnablePodAntiAffinity result.PodAntiAffinityTopologyKey = util.Coalesce(fromCRD.Kubernetes.PodAntiAffinityTopologyKey, "kubernetes.io/hostname") diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 2e37a0b65..8a5d3b63a 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -38,6 +38,7 @@ type Resources struct { SpiloPrivileged bool `name:"spilo_privileged" default:"false"` SpiloAllowPrivilegeEscalation *bool `name:"spilo_allow_privilege_escalation" default:"true"` AdditionalPodCapabilities []string `name:"additional_pod_capabilities" default:""` + ReadOnlyRootFilesystem *bool `name:"container_readonly_root_filesystem" default:"false"` ClusterLabels map[string]string `name:"cluster_labels" default:"application:cpo"` InheritedLabels []string `name:"inherited_labels" default:""` InheritedAnnotations []string `name:"inherited_annotations" default:""` @@ -248,6 +249,7 @@ type Config struct { PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"` PodManagementPolicy string `name:"pod_management_policy" default:"ordered_ready"` EnableReadinessProbe bool `name:"enable_readiness_probe" default:"false"` + EnableLivenessProbe bool `name:"enable_liveness_probe" default:"true"` ProtectedRoles []string `name:"protected_role_names" default:"admin,cron_admin"` PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""` SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" default:"false"`