Skip to content

Commit c420cd9

Browse files
authored
Merge pull request #20 from cybertec-postgresql/bugfix/stale-leader-observation
Fix race condition causing a spurious promote during a global DCS outage
2 parents b983e7f + a9459f5 commit c420cd9

2 files changed

Lines changed: 13 additions & 1 deletion

File tree

patroni/ha.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ def set_is_leader(self, value: bool) -> None:
300300
301301
:param value: is the current node the leader.
302302
"""
303+
self.patroni.multisite.set_is_local_leader(value)
303304
with self._leader_expiry_lock:
304305
self._leader_expiry = time.time() + self.dcs.ttl if value else 0
305306
if not value:

patroni/multisite.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ def status(self) -> Dict[str, Any]:
6464
def should_failover(self) -> bool:
6565
return False
6666

67+
def set_is_local_leader(self, value: bool):
68+
"""Update multisite mechanisms view if this node is running as a site leader."""
69+
pass
70+
6771
def on_shutdown(self, checkpoint_location: int, prev_location: int):
6872
pass
6973

@@ -111,6 +115,8 @@ def __init__(self, config: 'Config', on_change: Optional[Callable[..., None]] =
111115

112116
self._dcs_error = None
113117

118+
self._is_local_leader = False
119+
114120
@staticmethod
115121
def get_dcs_config(config: 'Config') -> Tuple[Dict[str, Any], AbstractDCS]:
116122
msconfig = config['multisite']
@@ -421,6 +427,10 @@ def touch_member(self):
421427
logger.info(f"Registering site {self.name} in DCS with address {address}")
422428
self.dcs.touch_member(data)
423429

430+
def set_is_local_leader(self, value: bool):
431+
# Assumes setting a boolean flag from other threads without a lock is atomic
432+
self._is_local_leader = value
433+
424434
def run(self):
425435
self._observe_leader()
426436
while not self._heartbeat.wait(self.config['observe_interval']):
@@ -433,7 +443,8 @@ def run(self):
433443
if self._state_updater:
434444
self._state_updater.store_updates()
435445
while not self._heartbeat.wait(self.config['observe_interval']):
436-
self._observe_leader()
446+
if not self._is_local_leader:
447+
self._observe_leader()
437448

438449
def shutdown(self):
439450
self.stop_requested = True

0 commit comments

Comments
 (0)