Skip to content

Commit 0bef3b3

Browse files
authored
fix migration of single-node clusters (#2134)
1 parent 4d58525 commit 0bef3b3

File tree

1 file changed

+10
-13
lines changed

1 file changed

+10
-13
lines changed

pkg/cluster/pod.go

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -214,19 +214,16 @@ func (c *Cluster) movePodFromEndOfLifeNode(pod *v1.Pod) (*v1.Pod, error) {
214214
// MigrateMasterPod migrates master pod via failover to a replica
215215
func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
216216
var (
217-
masterCandidateName spec.NamespacedName
218-
err error
219-
eol bool
217+
err error
218+
eol bool
220219
)
221220

222221
oldMaster, err := c.KubeClient.Pods(podName.Namespace).Get(context.TODO(), podName.Name, metav1.GetOptions{})
223-
224222
if err != nil {
225-
return fmt.Errorf("could not get pod: %v", err)
223+
return fmt.Errorf("could not get master pod: %v", err)
226224
}
227225

228226
c.logger.Infof("starting process to migrate master pod %q", podName)
229-
230227
if eol, err = c.podIsEndOfLife(oldMaster); err != nil {
231228
return fmt.Errorf("could not get node %q: %v", oldMaster.Spec.NodeName, err)
232229
}
@@ -250,21 +247,21 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
250247
}
251248
c.Statefulset = sset
252249
}
253-
// We may not have a cached statefulset if the initial cluster sync has aborted, revert to the spec in that case.
250+
// we may not have a cached statefulset if the initial cluster sync has aborted, revert to the spec in that case
251+
masterCandidateName := podName
252+
masterCandidatePod := oldMaster
254253
if *c.Statefulset.Spec.Replicas > 1 {
255254
if masterCandidateName, err = c.getSwitchoverCandidate(oldMaster); err != nil {
256255
return fmt.Errorf("could not find suitable replica pod as candidate for failover: %v", err)
257256
}
257+
masterCandidatePod, err = c.KubeClient.Pods(masterCandidateName.Namespace).Get(context.TODO(), masterCandidateName.Name, metav1.GetOptions{})
258+
if err != nil {
259+
return fmt.Errorf("could not get master candidate pod: %v", err)
260+
}
258261
} else {
259262
c.logger.Warningf("migrating single pod cluster %q, this will cause downtime of the Postgres cluster until pod is back", c.clusterName())
260263
}
261264

262-
masterCandidatePod, err := c.KubeClient.Pods(masterCandidateName.Namespace).Get(context.TODO(), masterCandidateName.Name, metav1.GetOptions{})
263-
264-
if err != nil {
265-
return fmt.Errorf("could not get master candidate pod: %v", err)
266-
}
267-
268265
// there are two cases for each postgres cluster that has its master pod on the node to migrate from:
269266
// - the cluster has some replicas - migrate one of those if necessary and failover to it
270267
// - there are no replicas - just terminate the master and wait until it respawns

0 commit comments

Comments
 (0)