Skip to content

Commit

Permalink
check: remove orphan peer only when the peers is greater than the rul…
Browse files Browse the repository at this point in the history
…e count (#7581) (#7591)

close #7584

The healthy orphan peer should be the last one to be removed only if there are extra peers to keep the high availablility.

Signed-off-by: ti-chi-bot <[email protected]>
Signed-off-by: bufferflies <[email protected]>

Co-authored-by: tongjian <[email protected]>
Co-authored-by: bufferflies <[email protected]>
  • Loading branch information
ti-chi-bot and bufferflies authored Dec 20, 2023
1 parent b0b1e38 commit 542fde9
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 1 deletion.
4 changes: 3 additions & 1 deletion server/schedule/checker/rule_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg
}
}

extra := fit.ExtraCount()
// If hasUnhealthyFit is true, try to remove unhealthy orphan peers only if number of OrphanPeers is >= 2.
// Ref https://github.com/tikv/pd/issues/4045
if len(fit.OrphanPeers) >= 2 {
Expand All @@ -522,7 +523,8 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg
checkerCounter.WithLabelValues("rule_checker", "remove-orphan-peer").Inc()
return operator.CreateRemovePeerOperator("remove-unhealthy-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId)
}
if hasHealthPeer {
// The healthy orphan peer can be removed to keep the high availability only if the peer count is greater than the rule requirement.
if hasHealthPeer && extra > 0 {
// there already exists a healthy orphan peer, so we can remove other orphan Peers.
checkerCounter.WithLabelValues("rule_checker", "remove-orphan-peer").Inc()
// if there exists a disconnected orphan peer, we will pick it to remove firstly.
Expand Down
36 changes: 36 additions & 0 deletions server/schedule/checker/rule_checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1883,3 +1883,39 @@ func (suite *ruleCheckerTestSuite) TestTiFlashLocationLabels() {
op := suite.rc.Check(suite.cluster.GetRegion(1))
suite.Nil(op)
}

func (suite *ruleCheckerTestSuite) TestRemoveOrphanPeer() {
suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1", "host": "h1"})
suite.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z1", "host": "h1"})
suite.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z1", "host": "h1"})
suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z2", "host": "h1"})
suite.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z2", "host": "h2"})
suite.cluster.AddLabelsStore(6, 1, map[string]string{"zone": "z2", "host": "h2"})
rule := &placement.Rule{
GroupID: "pd",
ID: "test2",
Role: placement.Voter,
Count: 3,
LabelConstraints: []placement.LabelConstraint{
{
Key: "zone",
Op: placement.In,
Values: []string{"z2"},
},
},
}
suite.ruleManager.SetRule(rule)
suite.ruleManager.DeleteRule("pd", "default")

// case1: regionA has 3 peers but not extra peer can be removed, so it needs to add peer first
suite.cluster.AddLeaderRegionWithRange(1, "200", "300", 1, 2, 3)
op := suite.rc.Check(suite.cluster.GetRegion(1))
suite.NotNil(op)
suite.Equal("add-rule-peer", op.Desc())

// case2: regionB has 4 peers and one extra peer can be removed, so it needs to remove extra peer first
suite.cluster.AddLeaderRegionWithRange(2, "300", "400", 1, 2, 3, 4)
op = suite.rc.Check(suite.cluster.GetRegion(2))
suite.NotNil(op)
suite.Equal("remove-orphan-peer", op.Desc())
}
9 changes: 9 additions & 0 deletions server/schedule/placement/fit.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@ func (f *RegionFit) IsSatisfied() bool {
return len(f.OrphanPeers) == 0
}

// ExtraCount return the extra count.
func (f *RegionFit) ExtraCount() int {
desired := 0
for _, r := range f.RuleFits {
desired += r.Rule.Count
}
return len(f.regionStores) - desired
}

// GetRuleFit returns the RuleFit that contains the peer.
func (f *RegionFit) GetRuleFit(peerID uint64) *RuleFit {
for _, rf := range f.RuleFits {
Expand Down

0 comments on commit 542fde9

Please sign in to comment.