-
Notifications
You must be signed in to change notification settings - Fork 727
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
scheduler: consider leader score when evict leader #8912
Changes from 4 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
// Copyright 2025 TiKV Project Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package filter | ||
|
||
import ( | ||
"math/rand" | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/require" | ||
|
||
"github.com/pingcap/kvproto/pkg/metapb" | ||
|
||
"github.com/tikv/pd/pkg/core" | ||
"github.com/tikv/pd/pkg/mock/mockconfig" | ||
) | ||
|
||
func TestRegionCompare(t *testing.T) { | ||
re := require.New(t) | ||
ids := []uint64{1, 2, 3, 4, 5} | ||
stores := make([]*core.StoreInfo, 0, len(ids)) | ||
for _, id := range ids { | ||
stores = append(stores, core.NewStoreInfo( | ||
&metapb.Store{Id: id}, | ||
core.SetRegionSize(int64(6-id)*1000), | ||
)) | ||
} | ||
cs := NewCandidates(rand.New(rand.NewSource(time.Now().UnixNano())), stores) | ||
cfg := mockconfig.NewTestOptions() | ||
re.Equal(uint64(1), cs.PickFirst().GetID()) | ||
cs.Sort(RegionScoreComparer(cfg)) | ||
re.Equal(uint64(5), cs.PickFirst().GetID()) | ||
} | ||
|
||
func TestLeaderCompare(t *testing.T) { | ||
re := require.New(t) | ||
ids := []uint64{1, 2, 3, 4, 5} | ||
stores := make([]*core.StoreInfo, 0, len(ids)) | ||
for _, id := range ids { | ||
stores = append(stores, core.NewStoreInfo( | ||
&metapb.Store{Id: id}, | ||
core.SetLeaderCount(int(6-id)*1000), | ||
)) | ||
} | ||
cs := NewCandidates(rand.New(rand.NewSource(time.Now().UnixNano())), stores) | ||
cfg := mockconfig.NewTestOptions() | ||
re.Equal(uint64(1), cs.PickFirst().GetID()) | ||
cs.Sort(LeaderScoreComparer(cfg)) | ||
re.Equal(uint64(5), cs.PickFirst().GetID()) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -80,10 +80,9 @@ func CreateRemovePeerOperator(desc string, ci sche.SharedCluster, kind OpKind, r | |
} | ||
|
||
// CreateTransferLeaderOperator creates an operator that transfers the leader from a source store to a target store. | ||
func CreateTransferLeaderOperator(desc string, ci sche.SharedCluster, region *core.RegionInfo, targetStoreID uint64, targetStoreIDs []uint64, kind OpKind) (*Operator, error) { | ||
func CreateTransferLeaderOperator(desc string, ci sche.SharedCluster, region *core.RegionInfo, targetStoreID uint64, kind OpKind) (*Operator, error) { | ||
return NewBuilder(desc, ci, region, SkipOriginJointStateCheck). | ||
SetLeader(targetStoreID). | ||
SetLeaders(targetStoreIDs). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why remove it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We do not need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's an optimization, why we don't need it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
SetLeaders sort target stores according to store id. This PR sort target stores according to score. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. targetLeaderStoreIDs is used previously, but removed by this PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we use it in evict leader scheduler previously to select targets. It is replaced with this pr. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We allowed multiple targets in the op step before, this PR changes it which might be slower? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It only used in evict leader, after this pr there is no other scheduler using it. So I remove it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was O(n) previously and it is O(nlogn) now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See tikv/tikv#10602 |
||
Build(kind) | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -363,19 +363,12 @@ | |
filters = append(filters, &filter.StoreStateFilter{ActionScope: name, TransferLeader: true, OperatorLevel: constant.Urgent}) | ||
candidates := filter.NewCandidates(r, cluster.GetFollowerStores(region)). | ||
FilterTarget(cluster.GetSchedulerConfig(), nil, nil, filters...) | ||
// Compatible with old TiKV transfer leader logic. | ||
target := candidates.RandomPick() | ||
targets := candidates.PickAll() | ||
// `targets` MUST contains `target`, so only needs to check if `target` is nil here. | ||
if target == nil { | ||
|
||
if len(candidates.Stores) == 0 { | ||
evictLeaderNoTargetStoreCounter.Inc() | ||
continue | ||
} | ||
targetIDs := make([]uint64, 0, len(targets)) | ||
for _, t := range targets { | ||
targetIDs = append(targetIDs, t.GetID()) | ||
} | ||
op, err := operator.CreateTransferLeaderOperator(name, cluster, region, target.GetID(), targetIDs, operator.OpLeader) | ||
op, err := createOperatorWithSort(name, cluster, candidates, region) | ||
if err != nil { | ||
log.Debug("fail to create evict leader operator", errs.ZapError(err)) | ||
continue | ||
|
@@ -387,6 +380,22 @@ | |
return ops | ||
} | ||
|
||
func createOperatorWithSort(name string, cluster sche.SchedulerCluster, candidates *filter.StoreCandidates, region *core.RegionInfo) (*operator.Operator, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about renaming it to |
||
// we will pick low leader score store firstly. | ||
candidates.Sort(filter.LeaderScoreComparer(cluster.GetSharedConfig())) | ||
var ( | ||
op *operator.Operator | ||
err error | ||
) | ||
for _, target := range candidates.Stores { | ||
op, err = operator.CreateTransferLeaderOperator(name, cluster, region, target.GetID(), operator.OpLeader) | ||
if op != nil && err == nil { | ||
return op, err | ||
} | ||
} | ||
return op, err | ||
} | ||
|
||
type evictLeaderHandler struct { | ||
rd *render.Render | ||
config *evictLeaderSchedulerConfig | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The leader score is not very accurate, so it leads to the leader count of the lowest score goes up too much. How about considering the the running operators influence?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done