Skip to content

Commit b19fdd6

Browse files
wangshao1wangshaoyi
andauthored
fix: fix replication on pika node recovering (#3038)
* fix: tryFixReplicationRelationships error (#2991) * fix invalid arguments error when dashboard send slveof force Co-authored-by: wangshaoyi <[email protected]> * fix replication relationship error while recovering from offline * update proxy slot mappings after slave pika recovered --------- Co-authored-by: wangshaoyi <[email protected]>
1 parent 5a761c2 commit b19fdd6

4 files changed

Lines changed: 28 additions & 24 deletions

File tree

codis/pkg/topom/topom_group.go

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ func (s *Topom) GroupPromoteServer(gid int, addr string) error {
330330
}
331331
}
332332

333-
func (s *Topom) tryFixReplicationRelationships(ctx *context, recoveredGroupServers []*redis.ReplicationState, masterOffGroupLen int) {
333+
func (s *Topom) tryFixReplicationRelationships(ctx *context, recoveredGroupServers []*redis.ReplicationState) {
334334
for _, state := range recoveredGroupServers {
335335
log.Infof("group-[%d] try to fix server[%v-%v] replication relationship", state.GroupID, state.Index, state.Addr)
336336
group, err := ctx.getGroup(state.GroupID)
@@ -346,7 +346,7 @@ func (s *Topom) tryFixReplicationRelationships(ctx *context, recoveredGroupServe
346346
continue
347347
}
348348

349-
err = s.tryFixReplicationRelationship(group, state.Server, state, masterOffGroupLen)
349+
err = s.tryFixReplicationRelationship(group, state.Server, state)
350350
if err != nil {
351351
log.Warnf("group-[%d] fix server[%v] replication relationship failed, err: %v", group.Id, state.Addr, err)
352352
continue
@@ -371,29 +371,21 @@ func (s *Topom) tryFixReplicationRelationships(ctx *context, recoveredGroupServe
371371
// only fix which the old state of GroupServer is GroupServerStateOffline.
372372
// It will only update the state of GroupServer to GroupServerStateNormal, If the GroupServer have right
373373
// master-slave replication relationship.
374-
func (s *Topom) tryFixReplicationRelationship(group *models.Group, groupServer *models.GroupServer, state *redis.ReplicationState, masterOffGroupLen int) (err error) {
374+
func (s *Topom) tryFixReplicationRelationship(group *models.Group, groupServer *models.GroupServer, state *redis.ReplicationState) (err error) {
375375
curMasterAddr := group.Servers[0].Addr
376376
if isGroupMaster(state, group) {
377-
// current server is master,
378-
if models.GroupServerRole(state.Replication.Role) == models.RoleMaster {
379-
if masterOffGroupLen > 0 {
380-
return nil
381-
}
382-
}
383-
384377
// execute the command `slaveof no one`
385-
if err = promoteServerToNewMaster(state.Addr, s.config.ProductAuth); err != nil {
386-
return err
378+
if models.GroupServerRole(state.Replication.Role) != models.RoleMaster {
379+
if err = promoteServerToNewMaster(state.Addr, s.config.ProductAuth); err != nil {
380+
return err
381+
}
387382
}
388383
} else {
389-
// skip if it has right replication relationship
390-
if state.Replication.GetMasterAddr() == curMasterAddr {
391-
return nil
392-
}
393-
394-
// current server is slave, execute the command `slaveof [new master ip] [new master port]`
395-
if err = updateMasterToNewOne(groupServer.Addr, curMasterAddr, s.config.ProductAuth); err != nil {
396-
return err
384+
if state.Replication.GetMasterAddr() != curMasterAddr {
385+
// current server is slave, execute the command `slaveof [new master ip] [new master port]`
386+
if err = updateMasterToNewOne(groupServer.Addr, curMasterAddr, s.config.ProductAuth); err != nil {
387+
return err
388+
}
397389
}
398390
}
399391

codis/pkg/topom/topom_sentinel.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ func (s *Topom) CheckStateAndSwitchSlavesAndMasters(filter func(index int, g *mo
4848

4949
if len(recoveredGroupServersState) > 0 {
5050
// offline GroupServer's service has recovered, check and fix it's master-slave replication relationship
51-
s.tryFixReplicationRelationships(ctx, recoveredGroupServersState, len(masterOfflineGroups))
51+
s.tryFixReplicationRelationships(ctx, recoveredGroupServersState)
5252
}
5353

5454
return nil
@@ -85,6 +85,18 @@ func (s *Topom) checkAndUpdateGroupServerState(conf *Config, group *models.Group
8585
*recoveredGroupServers = append(*recoveredGroupServers, state)
8686
// update GroupServer to GroupServerStateNormal state later
8787
} else {
88+
// This may contains any of following condition:
89+
// 1. groupServer.State is Normal
90+
// 2. groupServer.State is GroupServerStateSubjectiveOffline and is Master
91+
// 3. groupServer.State is GroupServerStateSubjectiveOffline and is Slave
92+
// for condition 3, if current server's previous state is SubjectiveOffline
93+
// and has been added to slaveofflinegroups before,
94+
// should also resync mappings to proxy to enable replicationgroup
95+
if groupServer.State == models.GroupServerStateSubjectiveOffline &&
96+
!isGroupMaster(state, group) &&
97+
group.OutOfSync {
98+
*recoveredGroupServers = append(*recoveredGroupServers, state)
99+
}
88100
// Update the offset information of the state and role nodes
89101
groupServer.State = models.GroupServerStateNormal
90102
groupServer.ReCallTimes = 0

codis/pkg/utils/redis/client.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ func (c *Client) SetMaster(master string, force bool) error {
341341
}
342342

343343
if force {
344-
if _, err := c.Do("SLAVEOF", host, port, "-f"); err != nil {
344+
if _, err := c.Do("SLAVEOF", host, port, "force"); err != nil {
345345
return err
346346
}
347347
} else {

codis/pkg/utils/redis/sentinel.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,8 @@ func (i *InfoReplication) UnmarshalJSON(b []byte) error {
106106
}
107107

108108
i.Role = kvmap["role"]
109-
i.MasterPort = kvmap["master_host"]
110-
i.MasterHost = kvmap["master_port"]
109+
i.MasterPort = kvmap["master_port"]
110+
i.MasterHost = kvmap["master_host"]
111111
i.MasterLinkStatus = kvmap["master_link_status"]
112112
i.IsEligibleForMasterElection = kvmap["is_eligible_for_master_election"] == "true"
113113

0 commit comments

Comments
 (0)