From e693b3e755ba7441f068cd7f09fe22c18eee0e46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Tue, 7 Jan 2025 18:24:57 +0800 Subject: [PATCH 01/18] add scheduler config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/schedule/operator/kind.go | 2 + pkg/schedule/schedulers/balance_key_range.go | 168 ++++++++++++++++++ .../schedulers/balance_key_range_test.go | 1 + pkg/schedule/schedulers/init.go | 25 +++ pkg/schedule/types/type.go | 2 + 5 files changed, 198 insertions(+) create mode 100644 pkg/schedule/schedulers/balance_key_range.go create mode 100644 pkg/schedule/schedulers/balance_key_range_test.go diff --git a/pkg/schedule/operator/kind.go b/pkg/schedule/operator/kind.go index 0187a64c568..0c99a6b7a17 100644 --- a/pkg/schedule/operator/kind.go +++ b/pkg/schedule/operator/kind.go @@ -35,6 +35,8 @@ const ( OpMerge // Initiated by range scheduler. OpRange + // Initiated by key range scheduler. + OpKeyRange // Initiated by replica checker. OpReplica // Include region split. Initiated by rule checker if `kind & OpAdmin == 0`. diff --git a/pkg/schedule/schedulers/balance_key_range.go b/pkg/schedule/schedulers/balance_key_range.go new file mode 100644 index 00000000000..454eb88089a --- /dev/null +++ b/pkg/schedule/schedulers/balance_key_range.go @@ -0,0 +1,168 @@ +package schedulers + +import ( + "net/http" + "net/url" + "time" + + "github.com/gorilla/mux" + "github.com/pingcap/log" + _ "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/core/constant" + "github.com/tikv/pd/pkg/errs" + sche "github.com/tikv/pd/pkg/schedule/core" + "github.com/tikv/pd/pkg/schedule/filter" + "github.com/tikv/pd/pkg/schedule/operator" + "github.com/tikv/pd/pkg/schedule/plan" + "github.com/tikv/pd/pkg/schedule/types" + "github.com/unrolled/render" +) + +const ( + DefaultTimeout = 1 * time.Hour +) + +type balanceKeyRangeSchedulerHandler struct { + rd *render.Render + config *balanceKeyRangeSchedulerConfig +} + +func newBalanceKeyRangeHandler(conf *balanceKeyRangeSchedulerConfig) http.Handler { + handler := &balanceKeyRangeSchedulerHandler{ + config: conf, + rd: render.New(render.Options{IndentJSON: true}), + } + router := mux.NewRouter() + router.HandleFunc("/config", handler.updateConfig).Methods(http.MethodPost) + router.HandleFunc("/list", handler.listConfig).Methods(http.MethodGet) + return router +} + +func (handler *balanceKeyRangeSchedulerHandler) updateConfig(w http.ResponseWriter, r *http.Request) { + handler.rd.JSON(w, http.StatusBadRequest, "update config is not supported") +} + +func (handler *balanceKeyRangeSchedulerHandler) listConfig(w http.ResponseWriter, _ *http.Request) { + conf := handler.config.clone() + handler.rd.JSON(w, http.StatusOK, conf) +} + +type balanceKeyRangeSchedulerConfig struct { + baseDefaultSchedulerConfig + balanceKeyRangeSchedulerParam +} + +type balanceKeyRangeSchedulerParam struct { + Role string `json:"role"` + Engine string `json:"engine"` + StartKey string `json:"start_key"` + EndKey string `json:"end_key"` + Timeout time.Duration `json:"timeout"` +} + +func (conf *balanceKeyRangeSchedulerConfig) encodeConfig() ([]byte, error) { + conf.RLock() + defer conf.RUnlock() + return EncodeConfig(conf) +} + +func (conf *balanceKeyRangeSchedulerConfig) clone() *balanceKeyRangeSchedulerParam { + conf.RLock() + defer conf.RUnlock() + return &balanceKeyRangeSchedulerParam{ + Role: conf.Role, + Engine: conf.Engine, + StartKey: conf.StartKey, + EndKey: conf.EndKey, + } +} + +func (conf *balanceKeyRangeSchedulerConfig) parseFromArgs(args []string) error { + if len(args) < 4 { + return errs.ErrSchedulerConfig.FastGenByArgs("args length should be greater than 4") + } + newConf := &balanceKeyRangeSchedulerConfig{} + var err error + newConf.StartKey, err = url.QueryUnescape(args[0]) + if err != nil { + return errs.ErrQueryUnescape.Wrap(err) + } + newConf.EndKey, err = url.QueryUnescape(args[1]) + if err != nil { + return errs.ErrQueryUnescape.Wrap(err) + } + + newConf.Role, err = url.QueryUnescape(args[2]) + if err != nil { + return errs.ErrQueryUnescape.Wrap(err) + } + + newConf.Engine, err = url.QueryUnescape(args[3]) + if err != nil { + return errs.ErrQueryUnescape.Wrap(err) + } + if len(args) >= 5 { + timeout, err := url.QueryUnescape(args[4]) + if err != nil { + return errs.ErrQueryUnescape.Wrap(err) + } + conf.Timeout, err = time.ParseDuration(timeout) + if err != nil { + return errs.ErrQueryUnescape.Wrap(err) + } + } else { + conf.Timeout = DefaultTimeout + } + *newConf = *newConf + return nil +} + +func (s *balanceKeyRangeScheduler) EncodeConfig() ([]byte, error) { + return s.conf.encodeConfig() +} + +func (s *balanceKeyRangeScheduler) ReloadConfig() error { + return nil +} + +type balanceKeyRangeScheduler struct { + *BaseScheduler + conf *balanceKeyRangeSchedulerConfig + handler http.Handler + filters []filter.Filter + filterCounter *filter.Counter +} + +func (s *balanceKeyRangeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { + log.Info("balance key range scheduler is scheduling, need to implement") + return nil, nil +} + +func (s *balanceKeyRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { + allowed := s.OpController.OperatorCount(operator.OpKeyRange) < cluster.GetSchedulerConfig().GetRegionScheduleLimit() + if !allowed { + operator.IncOperatorLimitCounter(s.GetType(), operator.OpKeyRange) + } + return allowed +} + +type BalanceKeyRangeCreateOption func(s *balanceKeyRangeScheduler) + +// newBalanceKeyRangeScheduler creates a scheduler that tends to keep given peer role on +// special store balanced. +func newBalanceKeyRangeScheduler(opController *operator.Controller, conf *balanceKeyRangeSchedulerConfig, options ...BalanceKeyRangeCreateOption) Scheduler { + s := &balanceKeyRangeScheduler{ + BaseScheduler: NewBaseScheduler(opController, types.BalanceLeaderScheduler, conf), + conf: conf, + handler: newBalanceKeyRangeHandler(conf), + } + for _, option := range options { + option(s) + } + s.filters = []filter.Filter{ + &filter.StoreStateFilter{ActionScope: s.GetName(), TransferLeader: true, OperatorLevel: constant.Medium}, + filter.NewSpecialUseFilter(s.GetName()), + } + s.filterCounter = filter.NewCounter(s.GetName()) + return s +} diff --git a/pkg/schedule/schedulers/balance_key_range_test.go b/pkg/schedule/schedulers/balance_key_range_test.go new file mode 100644 index 00000000000..9185832f5db --- /dev/null +++ b/pkg/schedule/schedulers/balance_key_range_test.go @@ -0,0 +1 @@ +package schedulers diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index 51d857ae445..3fc4c0659c4 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -545,4 +545,29 @@ func schedulersRegister() { conf.init(sche.GetName(), storage, conf) return sche, nil }) + + // balance key range scheduler + RegisterSliceDecoderBuilder(types.BalanceKeyRangeScheduler, func(args []string) ConfigDecoder { + return func(v any) error { + conf, ok := v.(*balanceKeyRangeSchedulerConfig) + if !ok { + return errs.ErrScheduleConfigNotExist.FastGenByArgs() + } + return parseBalanceKeyRangeParamArgs(args, conf) + } + }) + + RegisterScheduler(types.BalanceKeyRangeScheduler, func(opController *operator.Controller, + storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { + conf := &balanceKeyRangeSchedulerConfig{ + baseDefaultSchedulerConfig: newBaseDefaultSchedulerConfig(), + } + if err := decoder(conf); err != nil { + return nil, err + } + sche := newBalanceKeyRangeScheduler(opController, conf) + conf.init(sche.GetName(), storage, conf) + return sche, nil + }) + } diff --git a/pkg/schedule/types/type.go b/pkg/schedule/types/type.go index 7bc27892010..24983a98520 100644 --- a/pkg/schedule/types/type.go +++ b/pkg/schedule/types/type.go @@ -70,6 +70,8 @@ const ( TransferWitnessLeaderScheduler CheckerSchedulerType = "transfer-witness-leader-scheduler" // LabelScheduler is label scheduler name. LabelScheduler CheckerSchedulerType = "label-scheduler" + // BalanceKeyRangeScheduler is balance key range scheduler name. + BalanceKeyRangeScheduler CheckerSchedulerType = "balance-key-range-scheduler" ) // TODO: SchedulerTypeCompatibleMap and ConvertOldStrToType should be removed after From 23ff7d068653200ecb27f97a483d651721124a47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Fri, 10 Jan 2025 09:50:26 +0800 Subject: [PATCH 02/18] add new scheduler for key range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/schedule/operator/kind.go | 1 + pkg/schedule/operator/operator_test.go | 3 + pkg/schedule/schedulers/balance_key_range.go | 80 ++++++------------- .../schedulers/balance_key_range_test.go | 6 ++ pkg/schedule/schedulers/init.go | 23 +++++- pkg/schedule/schedulers/scheduler.go | 2 + pkg/schedule/types/type.go | 37 +++++---- server/api/scheduler.go | 23 ++++++ server/cluster/cluster_test.go | 7 ++ tools/pd-ctl/pdctl/command/scheduler.go | 58 ++++++++++++++ .../pd-ctl/tests/scheduler/scheduler_test.go | 22 ++++- 11 files changed, 187 insertions(+), 75 deletions(-) diff --git a/pkg/schedule/operator/kind.go b/pkg/schedule/operator/kind.go index 0c99a6b7a17..c6e4614f525 100644 --- a/pkg/schedule/operator/kind.go +++ b/pkg/schedule/operator/kind.go @@ -76,6 +76,7 @@ var nameToFlag = map[string]OpKind{ "replica": OpReplica, "merge": OpMerge, "range": OpRange, + "key-range": OpKeyRange, "witness-leader": OpWitnessLeader, } diff --git a/pkg/schedule/operator/operator_test.go b/pkg/schedule/operator/operator_test.go index 6976b5ca12e..422091dea19 100644 --- a/pkg/schedule/operator/operator_test.go +++ b/pkg/schedule/operator/operator_test.go @@ -476,6 +476,9 @@ func (suite *operatorTestSuite) TestSchedulerKind() { }, { op: NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader), expect: OpLeader, + }, { + op: NewTestOperator(1, &metapb.RegionEpoch{}, OpKeyRange|OpLeader), + expect: OpKeyRange, }, } for _, v := range testData { diff --git a/pkg/schedule/schedulers/balance_key_range.go b/pkg/schedule/schedulers/balance_key_range.go index 454eb88089a..dea00f45e9e 100644 --- a/pkg/schedule/schedulers/balance_key_range.go +++ b/pkg/schedule/schedulers/balance_key_range.go @@ -2,12 +2,13 @@ package schedulers import ( "net/http" - "net/url" "time" "github.com/gorilla/mux" "github.com/pingcap/log" - _ "github.com/tikv/pd/pkg/core" + "github.com/unrolled/render" + + "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/core/constant" "github.com/tikv/pd/pkg/errs" sche "github.com/tikv/pd/pkg/schedule/core" @@ -15,7 +16,6 @@ import ( "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" "github.com/tikv/pd/pkg/schedule/types" - "github.com/unrolled/render" ) const ( @@ -44,7 +44,9 @@ func (handler *balanceKeyRangeSchedulerHandler) updateConfig(w http.ResponseWrit func (handler *balanceKeyRangeSchedulerHandler) listConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.clone() - handler.rd.JSON(w, http.StatusOK, conf) + if err := handler.rd.JSON(w, http.StatusOK, conf); err != nil { + log.Error("failed to marshal balance key range scheduler config", errs.ZapError(err)) + } } type balanceKeyRangeSchedulerConfig struct { @@ -53,11 +55,10 @@ type balanceKeyRangeSchedulerConfig struct { } type balanceKeyRangeSchedulerParam struct { - Role string `json:"role"` - Engine string `json:"engine"` - StartKey string `json:"start_key"` - EndKey string `json:"end_key"` - Timeout time.Duration `json:"timeout"` + Role string `json:"role"` + Engine string `json:"engine"` + Timeout time.Duration `json:"timeout"` + Ranges []core.KeyRange `json:"ranges"` } func (conf *balanceKeyRangeSchedulerConfig) encodeConfig() ([]byte, error) { @@ -69,54 +70,16 @@ func (conf *balanceKeyRangeSchedulerConfig) encodeConfig() ([]byte, error) { func (conf *balanceKeyRangeSchedulerConfig) clone() *balanceKeyRangeSchedulerParam { conf.RLock() defer conf.RUnlock() + ranges := make([]core.KeyRange, len(conf.Ranges)) + copy(ranges, conf.Ranges) return &balanceKeyRangeSchedulerParam{ - Role: conf.Role, - Engine: conf.Engine, - StartKey: conf.StartKey, - EndKey: conf.EndKey, + Ranges: ranges, + Role: conf.Role, + Engine: conf.Engine, + Timeout: conf.Timeout, } } -func (conf *balanceKeyRangeSchedulerConfig) parseFromArgs(args []string) error { - if len(args) < 4 { - return errs.ErrSchedulerConfig.FastGenByArgs("args length should be greater than 4") - } - newConf := &balanceKeyRangeSchedulerConfig{} - var err error - newConf.StartKey, err = url.QueryUnescape(args[0]) - if err != nil { - return errs.ErrQueryUnescape.Wrap(err) - } - newConf.EndKey, err = url.QueryUnescape(args[1]) - if err != nil { - return errs.ErrQueryUnescape.Wrap(err) - } - - newConf.Role, err = url.QueryUnescape(args[2]) - if err != nil { - return errs.ErrQueryUnescape.Wrap(err) - } - - newConf.Engine, err = url.QueryUnescape(args[3]) - if err != nil { - return errs.ErrQueryUnescape.Wrap(err) - } - if len(args) >= 5 { - timeout, err := url.QueryUnescape(args[4]) - if err != nil { - return errs.ErrQueryUnescape.Wrap(err) - } - conf.Timeout, err = time.ParseDuration(timeout) - if err != nil { - return errs.ErrQueryUnescape.Wrap(err) - } - } else { - conf.Timeout = DefaultTimeout - } - *newConf = *newConf - return nil -} - func (s *balanceKeyRangeScheduler) EncodeConfig() ([]byte, error) { return s.conf.encodeConfig() } @@ -133,8 +96,13 @@ type balanceKeyRangeScheduler struct { filterCounter *filter.Counter } -func (s *balanceKeyRangeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { - log.Info("balance key range scheduler is scheduling, need to implement") +// ServeHTTP implements the http.Handler interface. +func (s *balanceKeyRangeScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + s.handler.ServeHTTP(w, r) +} + +func (s *balanceKeyRangeScheduler) Schedule(_cluster sche.SchedulerCluster, _dryRun bool) ([]*operator.Operator, []plan.Plan) { + log.Debug("balance key range scheduler is scheduling, need to implement") return nil, nil } @@ -152,7 +120,7 @@ type BalanceKeyRangeCreateOption func(s *balanceKeyRangeScheduler) // special store balanced. func newBalanceKeyRangeScheduler(opController *operator.Controller, conf *balanceKeyRangeSchedulerConfig, options ...BalanceKeyRangeCreateOption) Scheduler { s := &balanceKeyRangeScheduler{ - BaseScheduler: NewBaseScheduler(opController, types.BalanceLeaderScheduler, conf), + BaseScheduler: NewBaseScheduler(opController, types.BalanceKeyRangeScheduler, conf), conf: conf, handler: newBalanceKeyRangeHandler(conf), } diff --git a/pkg/schedule/schedulers/balance_key_range_test.go b/pkg/schedule/schedulers/balance_key_range_test.go index 9185832f5db..f0a402d108a 100644 --- a/pkg/schedule/schedulers/balance_key_range_test.go +++ b/pkg/schedule/schedulers/balance_key_range_test.go @@ -1 +1,7 @@ package schedulers + +import "testing" + +func TestHttpApi(t *testing.T) { + +} diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index 3fc4c0659c4..f9b296ee6d8 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -15,6 +15,7 @@ package schedulers import ( + "net/url" "strconv" "strings" "sync" @@ -547,13 +548,33 @@ func schedulersRegister() { }) // balance key range scheduler + // args: [role, engine, range1, range2, ...] RegisterSliceDecoderBuilder(types.BalanceKeyRangeScheduler, func(args []string) ConfigDecoder { return func(v any) error { conf, ok := v.(*balanceKeyRangeSchedulerConfig) if !ok { return errs.ErrScheduleConfigNotExist.FastGenByArgs() } - return parseBalanceKeyRangeParamArgs(args, conf) + if len(args) < 4 { + return errs.ErrSchedulerConfig.FastGenByArgs("args length must be greater than 3") + } + role, err := url.QueryUnescape(args[0]) + if err != nil { + return errs.ErrQueryUnescape.Wrap(err) + } + engine, err := url.QueryUnescape(args[1]) + if err != nil { + return errs.ErrQueryUnescape.Wrap(err) + } + ranges, err := getKeyRanges(args[2:]) + if err != nil { + return err + } + conf.Ranges = ranges + conf.Engine = engine + conf.Role = role + conf.Timeout = DefaultTimeout + return nil } }) diff --git a/pkg/schedule/schedulers/scheduler.go b/pkg/schedule/schedulers/scheduler.go index 8976c3a1928..fd6d6710350 100644 --- a/pkg/schedule/schedulers/scheduler.go +++ b/pkg/schedule/schedulers/scheduler.go @@ -157,7 +157,9 @@ func CreateScheduler( removeSchedulerCb ...func(string) error, ) (Scheduler, error) { fn, ok := schedulerMap[typ] + log.Info("create scheduler", zap.Any("typ", typ)) if !ok { + log.Warn("create scheduler not found", zap.Any("typ", typ)) return nil, errs.ErrSchedulerCreateFuncNotRegistered.FastGenByArgs(typ) } diff --git a/pkg/schedule/types/type.go b/pkg/schedule/types/type.go index 24983a98520..baeb536c987 100644 --- a/pkg/schedule/types/type.go +++ b/pkg/schedule/types/type.go @@ -99,29 +99,31 @@ var ( SplitBucketScheduler: "split-bucket", TransferWitnessLeaderScheduler: "transfer-witness-leader", LabelScheduler: "label", + BalanceKeyRangeScheduler: "balance-key-range", } // ConvertOldStrToType exists for compatibility. // // It is used to convert the old scheduler type to `CheckerSchedulerType`. ConvertOldStrToType = map[string]CheckerSchedulerType{ - "balance-leader": BalanceLeaderScheduler, - "balance-region": BalanceRegionScheduler, - "balance-witness": BalanceWitnessScheduler, - "evict-leader": EvictLeaderScheduler, - "evict-slow-store": EvictSlowStoreScheduler, - "evict-slow-trend": EvictSlowTrendScheduler, - "grant-leader": GrantLeaderScheduler, - "grant-hot-region": GrantHotRegionScheduler, - "hot-region": BalanceHotRegionScheduler, - "random-merge": RandomMergeScheduler, - "scatter-range": ScatterRangeScheduler, - "shuffle-hot-region": ShuffleHotRegionScheduler, - "shuffle-leader": ShuffleLeaderScheduler, - "shuffle-region": ShuffleRegionScheduler, - "split-bucket": SplitBucketScheduler, - "transfer-witness-leader": TransferWitnessLeaderScheduler, - "label": LabelScheduler, + "balance-leader": BalanceLeaderScheduler, + "balance-region": BalanceRegionScheduler, + "balance-witness": BalanceWitnessScheduler, + "evict-leader": EvictLeaderScheduler, + "evict-slow-store": EvictSlowStoreScheduler, + "evict-slow-trend": EvictSlowTrendScheduler, + "grant-leader": GrantLeaderScheduler, + "grant-hot-region": GrantHotRegionScheduler, + "hot-region": BalanceHotRegionScheduler, + "random-merge": RandomMergeScheduler, + "scatter-range": ScatterRangeScheduler, + "shuffle-hot-region": ShuffleHotRegionScheduler, + "shuffle-leader": ShuffleLeaderScheduler, + "shuffle-region": ShuffleRegionScheduler, + "split-bucket": SplitBucketScheduler, + "transfer-witness-leader": TransferWitnessLeaderScheduler, + "label": LabelScheduler, + "balance-key-range-scheduler": BalanceKeyRangeScheduler, } // StringToSchedulerType is a map to convert the scheduler string to the CheckerSchedulerType. @@ -145,6 +147,7 @@ var ( "split-bucket-scheduler": SplitBucketScheduler, "transfer-witness-leader-scheduler": TransferWitnessLeaderScheduler, "label-scheduler": LabelScheduler, + "balance-key-range-scheduler": BalanceKeyRangeScheduler, } // DefaultSchedulers is the default scheduler types. diff --git a/server/api/scheduler.go b/server/api/scheduler.go index b2d18012c89..f8b62864c0c 100644 --- a/server/api/scheduler.go +++ b/server/api/scheduler.go @@ -99,6 +99,29 @@ func (h *schedulerHandler) CreateScheduler(w http.ResponseWriter, r *http.Reques } switch tp { + case types.BalanceKeyRangeScheduler: + exist, _ := h.IsSchedulerExisted(name) + if exist { + h.r.JSON(w, http.StatusBadRequest, "The scheduler already exists, pls remove the exist scheduler first.") + return + } + if err := apiutil.CollectStringOption("role", input, collector); err != nil { + h.r.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + if err := apiutil.CollectStringOption("engine", input, collector); err != nil { + h.r.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + if err := apiutil.CollectEscapeStringOption("start_key", input, collector); err != nil { + h.r.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + + if err := apiutil.CollectEscapeStringOption("end_key", input, collector); err != nil { + h.r.JSON(w, http.StatusInternalServerError, err.Error()) + return + } case types.ScatterRangeScheduler: if err := apiutil.CollectEscapeStringOption("start_key", input, collector); err != nil { h.r.JSON(w, http.StatusInternalServerError, err.Error()) diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index 2f6d04bbf52..c62fb64fc80 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -3209,6 +3209,13 @@ func TestAddScheduler(t *testing.T) { re.NoError(err) re.NoError(controller.AddScheduler(gls)) + gls, err = schedulers.CreateScheduler(types.BalanceKeyRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceKeyRangeScheduler, []string{}), controller.RemoveScheduler) + re.Error(err) + + gls, err = schedulers.CreateScheduler(types.BalanceKeyRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceKeyRangeScheduler, []string{"leaner", "tiflash", "100", "200"}), controller.RemoveScheduler) + re.NoError(err) + re.NoError(controller.AddScheduler(gls)) + hb, err := schedulers.CreateScheduler(types.BalanceHotRegionScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigJSONDecoder([]byte("{}"))) re.NoError(err) conf, err = hb.EncodeConfig() diff --git a/tools/pd-ctl/pdctl/command/scheduler.go b/tools/pd-ctl/pdctl/command/scheduler.go index 5dc05aff62f..48e8d9ecf2d 100644 --- a/tools/pd-ctl/pdctl/command/scheduler.go +++ b/tools/pd-ctl/pdctl/command/scheduler.go @@ -162,6 +162,7 @@ func NewAddSchedulerCommand() *cobra.Command { c.AddCommand(NewSlowTrendEvictLeaderSchedulerCommand()) c.AddCommand(NewBalanceWitnessSchedulerCommand()) c.AddCommand(NewTransferWitnessLeaderSchedulerCommand()) + c.AddCommand(NewBalanceKeyRangeSchedulerCommand()) return c } @@ -374,6 +375,16 @@ func NewBalanceWitnessSchedulerCommand() *cobra.Command { return c } +func NewBalanceKeyRangeSchedulerCommand() *cobra.Command { + c := &cobra.Command{ + Use: "balance-key-range-scheduler [--format=raw|encode|hex] ", + Short: "add a scheduler to balance region for given key range", + Run: addSchedulerForBalanceKeyRangeCommandFunc, + } + c.Flags().String("format", "hex", "the key format") + return c +} + // NewTransferWitnessLeaderSchedulerCommand returns a command to add a transfer-witness-leader-shceudler. func NewTransferWitnessLeaderSchedulerCommand() *cobra.Command { c := &cobra.Command{ @@ -412,6 +423,32 @@ func addSchedulerForGrantHotRegionCommandFunc(cmd *cobra.Command, args []string) postJSON(cmd, schedulersPrefix, input) } +func addSchedulerForBalanceKeyRangeCommandFunc(cmd *cobra.Command, args []string) { + if len(args) != 4 { + cmd.Println(cmd.UsageString()) + return + } + startKey, err := parseKey(cmd.Flags(), args[2]) + if err != nil { + cmd.Println("Error: ", err) + return + } + endKey, err := parseKey(cmd.Flags(), args[3]) + if err != nil { + cmd.Println("Error: ", err) + return + } + + input := make(map[string]any) + input["name"] = cmd.Name() + input["engine"] = args[0] + input["role"] = args[1] + input["start_key"] = url.QueryEscape(startKey) + input["end_key"] = url.QueryEscape(endKey) + + postJSON(cmd, schedulersPrefix, input) +} + func addSchedulerCommandFunc(cmd *cobra.Command, args []string) { if len(args) != 0 { cmd.Println(cmd.UsageString()) @@ -523,6 +560,7 @@ func NewConfigSchedulerCommand() *cobra.Command { newConfigEvictSlowStoreCommand(), newConfigShuffleHotRegionSchedulerCommand(), newConfigEvictSlowTrendCommand(), + newConfigBalanceKeyRangeCommand(), ) return c } @@ -547,6 +585,26 @@ func newConfigBalanceLeaderCommand() *cobra.Command { return c } +func newConfigBalanceKeyRangeCommand() *cobra.Command { + c := &cobra.Command{ + Use: "balance-key-range-scheduler", + Short: "balance-key-range-scheduler config", + Run: listSchedulerConfigCommandFunc, + } + + c.AddCommand(&cobra.Command{ + Use: "show", + Short: "show the config item", + Run: listSchedulerConfigCommandFunc, + }, &cobra.Command{ + Use: "set ", + Short: "set the config item", + Run: func(cmd *cobra.Command, args []string) { postSchedulerConfigCommandFunc(cmd, c.Name(), args) }, + }) + + return c +} + func newSplitBucketCommand() *cobra.Command { c := &cobra.Command{ Use: "split-bucket-scheduler", diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go index f3a81845921..50237d8303c 100644 --- a/tools/pd-ctl/tests/scheduler/scheduler_test.go +++ b/tools/pd-ctl/tests/scheduler/scheduler_test.go @@ -15,6 +15,7 @@ package scheduler_test import ( + "encoding/base64" "encoding/json" "fmt" "reflect" @@ -84,7 +85,7 @@ func (suite *schedulerTestSuite) TearDownTest() { return currentSchedulers[i] == scheduler }) { echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", scheduler}, nil) - re.Contains(echo, "Success!") + re.Contains(echo, "Success!", scheduler) } } for _, scheduler := range currentSchedulers { @@ -541,6 +542,25 @@ func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestClust return !strings.Contains(echo, "evict-leader-scheduler") }) + // test balance key range scheduler + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-key-range-scheduler"}, nil) + re.NotContains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-key-range-scheduler", "--format=raw", "tiflash", "learner", "a", "b"}, nil) + re.Contains(echo, "Success!") + conf = make(map[string]any) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-key-range-scheduler", "show"}, &conf) + re.Equal("learner", conf["role"]) + re.Equal("tiflash", conf["engine"]) + ranges := conf["ranges"].([]interface{})[0].(map[string]interface{}) + re.Equal(base64.StdEncoding.EncodeToString([]byte("a")), ranges["start-key"]) + re.Equal(base64.StdEncoding.EncodeToString([]byte("b")), ranges["end-key"]) + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-key-range-scheduler", "--format=raw", "tiflash", "learner", "a", "b"}, nil) + re.Contains(echo, "400") + re.Contains(echo, "scheduler already exists") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-key-range-scheduler"}, nil) + re.Contains(echo, "Success!") + // test balance leader config conf = make(map[string]any) conf1 := make(map[string]any) From 1e6d628b5afd10bbb2c481016a584a87ea69c6b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Fri, 10 Jan 2025 14:32:53 +0800 Subject: [PATCH 03/18] pass ut MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/mcs/scheduling/server/cluster.go | 5 +++- pkg/schedule/types/type.go | 36 ++++++++++++++-------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/pkg/mcs/scheduling/server/cluster.go b/pkg/mcs/scheduling/server/cluster.go index 6f80572673c..e45611b6feb 100644 --- a/pkg/mcs/scheduling/server/cluster.go +++ b/pkg/mcs/scheduling/server/cluster.go @@ -314,7 +314,10 @@ func (c *Cluster) updateScheduler() { ) // Create the newly added schedulers. for _, scheduler := range latestSchedulersConfig { - schedulerType := types.ConvertOldStrToType[scheduler.Type] + schedulerType, ok := types.ConvertOldStrToType[scheduler.Type] + if !ok { + log.Warn("scheduler not found ", zap.String("type", scheduler.Type)) + } s, err := schedulers.CreateScheduler( schedulerType, c.coordinator.GetOperatorController(), diff --git a/pkg/schedule/types/type.go b/pkg/schedule/types/type.go index baeb536c987..c9d06f31e9f 100644 --- a/pkg/schedule/types/type.go +++ b/pkg/schedule/types/type.go @@ -106,24 +106,24 @@ var ( // // It is used to convert the old scheduler type to `CheckerSchedulerType`. ConvertOldStrToType = map[string]CheckerSchedulerType{ - "balance-leader": BalanceLeaderScheduler, - "balance-region": BalanceRegionScheduler, - "balance-witness": BalanceWitnessScheduler, - "evict-leader": EvictLeaderScheduler, - "evict-slow-store": EvictSlowStoreScheduler, - "evict-slow-trend": EvictSlowTrendScheduler, - "grant-leader": GrantLeaderScheduler, - "grant-hot-region": GrantHotRegionScheduler, - "hot-region": BalanceHotRegionScheduler, - "random-merge": RandomMergeScheduler, - "scatter-range": ScatterRangeScheduler, - "shuffle-hot-region": ShuffleHotRegionScheduler, - "shuffle-leader": ShuffleLeaderScheduler, - "shuffle-region": ShuffleRegionScheduler, - "split-bucket": SplitBucketScheduler, - "transfer-witness-leader": TransferWitnessLeaderScheduler, - "label": LabelScheduler, - "balance-key-range-scheduler": BalanceKeyRangeScheduler, + "balance-leader": BalanceLeaderScheduler, + "balance-region": BalanceRegionScheduler, + "balance-witness": BalanceWitnessScheduler, + "evict-leader": EvictLeaderScheduler, + "evict-slow-store": EvictSlowStoreScheduler, + "evict-slow-trend": EvictSlowTrendScheduler, + "grant-leader": GrantLeaderScheduler, + "grant-hot-region": GrantHotRegionScheduler, + "hot-region": BalanceHotRegionScheduler, + "random-merge": RandomMergeScheduler, + "scatter-range": ScatterRangeScheduler, + "shuffle-hot-region": ShuffleHotRegionScheduler, + "shuffle-leader": ShuffleLeaderScheduler, + "shuffle-region": ShuffleRegionScheduler, + "split-bucket": SplitBucketScheduler, + "transfer-witness-leader": TransferWitnessLeaderScheduler, + "label": LabelScheduler, + "balance-key-range": BalanceKeyRangeScheduler, } // StringToSchedulerType is a map to convert the scheduler string to the CheckerSchedulerType. From d1da5b577e21852949548c112e3b62e1bf61fad8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Mon, 13 Jan 2025 15:30:14 +0800 Subject: [PATCH 04/18] lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/schedule/schedulers/balance_key_range.go | 24 ++++++++++++++++--- .../schedulers/balance_key_range_test.go | 7 ------ pkg/schedule/schedulers/init.go | 1 - server/cluster/cluster_test.go | 2 +- tools/pd-ctl/pdctl/command/scheduler.go | 1 + .../pd-ctl/tests/scheduler/scheduler_test.go | 2 +- 6 files changed, 24 insertions(+), 13 deletions(-) delete mode 100644 pkg/schedule/schedulers/balance_key_range_test.go diff --git a/pkg/schedule/schedulers/balance_key_range.go b/pkg/schedule/schedulers/balance_key_range.go index dea00f45e9e..aace3cc057b 100644 --- a/pkg/schedule/schedulers/balance_key_range.go +++ b/pkg/schedule/schedulers/balance_key_range.go @@ -5,9 +5,10 @@ import ( "time" "github.com/gorilla/mux" - "github.com/pingcap/log" "github.com/unrolled/render" + "github.com/pingcap/log" + "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/core/constant" "github.com/tikv/pd/pkg/errs" @@ -19,6 +20,7 @@ import ( ) const ( + // DefaultTimeout is the default balance key range scheduler timeout. DefaultTimeout = 1 * time.Hour ) @@ -38,7 +40,7 @@ func newBalanceKeyRangeHandler(conf *balanceKeyRangeSchedulerConfig) http.Handle return router } -func (handler *balanceKeyRangeSchedulerHandler) updateConfig(w http.ResponseWriter, r *http.Request) { +func (handler *balanceKeyRangeSchedulerHandler) updateConfig(w http.ResponseWriter, _ *http.Request) { handler.rd.JSON(w, http.StatusBadRequest, "update config is not supported") } @@ -80,11 +82,24 @@ func (conf *balanceKeyRangeSchedulerConfig) clone() *balanceKeyRangeSchedulerPar } } +// EncodeConfig serializes the config. func (s *balanceKeyRangeScheduler) EncodeConfig() ([]byte, error) { return s.conf.encodeConfig() } +// ReloadConfig reloads the config. func (s *balanceKeyRangeScheduler) ReloadConfig() error { + s.conf.Lock() + defer s.conf.Unlock() + + newCfg := &balanceKeyRangeSchedulerConfig{} + if err := s.conf.load(newCfg); err != nil { + return err + } + s.conf.Ranges = newCfg.Ranges + s.conf.Timeout = newCfg.Timeout + s.conf.Role = newCfg.Role + s.conf.Engine = newCfg.Engine return nil } @@ -101,11 +116,13 @@ func (s *balanceKeyRangeScheduler) ServeHTTP(w http.ResponseWriter, r *http.Requ s.handler.ServeHTTP(w, r) } -func (s *balanceKeyRangeScheduler) Schedule(_cluster sche.SchedulerCluster, _dryRun bool) ([]*operator.Operator, []plan.Plan) { +// Schedule schedules the balance key range operator. +func (*balanceKeyRangeScheduler) Schedule(_cluster sche.SchedulerCluster, _dryRun bool) ([]*operator.Operator, []plan.Plan) { log.Debug("balance key range scheduler is scheduling, need to implement") return nil, nil } +// IsScheduleAllowed checks if the scheduler is allowed to schedule new operators. func (s *balanceKeyRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := s.OpController.OperatorCount(operator.OpKeyRange) < cluster.GetSchedulerConfig().GetRegionScheduleLimit() if !allowed { @@ -114,6 +131,7 @@ func (s *balanceKeyRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerClust return allowed } +// BalanceKeyRangeCreateOption is used to create a scheduler with an option. type BalanceKeyRangeCreateOption func(s *balanceKeyRangeScheduler) // newBalanceKeyRangeScheduler creates a scheduler that tends to keep given peer role on diff --git a/pkg/schedule/schedulers/balance_key_range_test.go b/pkg/schedule/schedulers/balance_key_range_test.go deleted file mode 100644 index f0a402d108a..00000000000 --- a/pkg/schedule/schedulers/balance_key_range_test.go +++ /dev/null @@ -1,7 +0,0 @@ -package schedulers - -import "testing" - -func TestHttpApi(t *testing.T) { - -} diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index f9b296ee6d8..4734b162203 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -590,5 +590,4 @@ func schedulersRegister() { conf.init(sche.GetName(), storage, conf) return sche, nil }) - } diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index c62fb64fc80..1fdac79f539 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -3209,7 +3209,7 @@ func TestAddScheduler(t *testing.T) { re.NoError(err) re.NoError(controller.AddScheduler(gls)) - gls, err = schedulers.CreateScheduler(types.BalanceKeyRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceKeyRangeScheduler, []string{}), controller.RemoveScheduler) + _, err = schedulers.CreateScheduler(types.BalanceKeyRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceKeyRangeScheduler, []string{}), controller.RemoveScheduler) re.Error(err) gls, err = schedulers.CreateScheduler(types.BalanceKeyRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceKeyRangeScheduler, []string{"leaner", "tiflash", "100", "200"}), controller.RemoveScheduler) diff --git a/tools/pd-ctl/pdctl/command/scheduler.go b/tools/pd-ctl/pdctl/command/scheduler.go index 48e8d9ecf2d..9a5993dd8cf 100644 --- a/tools/pd-ctl/pdctl/command/scheduler.go +++ b/tools/pd-ctl/pdctl/command/scheduler.go @@ -375,6 +375,7 @@ func NewBalanceWitnessSchedulerCommand() *cobra.Command { return c } +// NewBalanceKeyRangeSchedulerCommand returns a command to add a balance-key-range-scheduler. func NewBalanceKeyRangeSchedulerCommand() *cobra.Command { c := &cobra.Command{ Use: "balance-key-range-scheduler [--format=raw|encode|hex] ", diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go index 50237d8303c..e5575677cb6 100644 --- a/tools/pd-ctl/tests/scheduler/scheduler_test.go +++ b/tools/pd-ctl/tests/scheduler/scheduler_test.go @@ -551,7 +551,7 @@ func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestClust mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-key-range-scheduler", "show"}, &conf) re.Equal("learner", conf["role"]) re.Equal("tiflash", conf["engine"]) - ranges := conf["ranges"].([]interface{})[0].(map[string]interface{}) + ranges := conf["ranges"].([]any)[0].(map[string]any) re.Equal(base64.StdEncoding.EncodeToString([]byte("a")), ranges["start-key"]) re.Equal(base64.StdEncoding.EncodeToString([]byte("b")), ranges["end-key"]) From d0cfc2d352dac6f58c03417cb19a1c0cbf5945db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Mon, 13 Jan 2025 17:17:36 +0800 Subject: [PATCH 05/18] pass ut MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/mcs/scheduling/server/cluster.go | 3 ++- pkg/schedule/operator/kind.go | 1 + pkg/schedule/schedulers/balance_key_range.go | 9 +++------ pkg/schedule/schedulers/init.go | 17 +++++++++++++---- pkg/schedule/schedulers/scheduler.go | 2 -- server/api/scheduler.go | 10 ++++++++++ server/cluster/cluster_test.go | 9 ++++++++- tools/pd-ctl/tests/scheduler/scheduler_test.go | 8 +++++--- 8 files changed, 42 insertions(+), 17 deletions(-) diff --git a/pkg/mcs/scheduling/server/cluster.go b/pkg/mcs/scheduling/server/cluster.go index e45611b6feb..f0b87e82c06 100644 --- a/pkg/mcs/scheduling/server/cluster.go +++ b/pkg/mcs/scheduling/server/cluster.go @@ -316,7 +316,8 @@ func (c *Cluster) updateScheduler() { for _, scheduler := range latestSchedulersConfig { schedulerType, ok := types.ConvertOldStrToType[scheduler.Type] if !ok { - log.Warn("scheduler not found ", zap.String("type", scheduler.Type)) + log.Error("scheduler not found ", zap.String("type", scheduler.Type)) + continue } s, err := schedulers.CreateScheduler( schedulerType, diff --git a/pkg/schedule/operator/kind.go b/pkg/schedule/operator/kind.go index c6e4614f525..0a7ccb34245 100644 --- a/pkg/schedule/operator/kind.go +++ b/pkg/schedule/operator/kind.go @@ -62,6 +62,7 @@ var flagToName = map[OpKind]string{ OpHotRegion: "hot-region", OpReplica: "replica", OpMerge: "merge", + OpKeyRange: "key-range", OpRange: "range", OpWitness: "witness", OpWitnessLeader: "witness-leader", diff --git a/pkg/schedule/schedulers/balance_key_range.go b/pkg/schedule/schedulers/balance_key_range.go index aace3cc057b..c21c62af71f 100644 --- a/pkg/schedule/schedulers/balance_key_range.go +++ b/pkg/schedule/schedulers/balance_key_range.go @@ -17,11 +17,7 @@ import ( "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" "github.com/tikv/pd/pkg/schedule/types" -) - -const ( - // DefaultTimeout is the default balance key range scheduler timeout. - DefaultTimeout = 1 * time.Hour + "github.com/tikv/pd/pkg/utils/syncutil" ) type balanceKeyRangeSchedulerHandler struct { @@ -52,7 +48,8 @@ func (handler *balanceKeyRangeSchedulerHandler) listConfig(w http.ResponseWriter } type balanceKeyRangeSchedulerConfig struct { - baseDefaultSchedulerConfig + syncutil.RWMutex + schedulerConfig balanceKeyRangeSchedulerParam } diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index 4734b162203..37d17ddd9ae 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -19,6 +19,7 @@ import ( "strconv" "strings" "sync" + "time" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/errs" @@ -548,7 +549,7 @@ func schedulersRegister() { }) // balance key range scheduler - // args: [role, engine, range1, range2, ...] + // args: [role, engine, timeout, range1, range2, ...] RegisterSliceDecoderBuilder(types.BalanceKeyRangeScheduler, func(args []string) ConfigDecoder { return func(v any) error { conf, ok := v.(*balanceKeyRangeSchedulerConfig) @@ -566,14 +567,22 @@ func schedulersRegister() { if err != nil { return errs.ErrQueryUnescape.Wrap(err) } - ranges, err := getKeyRanges(args[2:]) + timeout, err := url.QueryUnescape(args[2]) + if err != nil { + return errs.ErrQueryUnescape.Wrap(err) + } + duration, err := time.ParseDuration(timeout) + if err != nil { + return errs.ErrURLParse.Wrap(err) + } + ranges, err := getKeyRanges(args[3:]) if err != nil { return err } conf.Ranges = ranges conf.Engine = engine conf.Role = role - conf.Timeout = DefaultTimeout + conf.Timeout = duration return nil } }) @@ -581,7 +590,7 @@ func schedulersRegister() { RegisterScheduler(types.BalanceKeyRangeScheduler, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := &balanceKeyRangeSchedulerConfig{ - baseDefaultSchedulerConfig: newBaseDefaultSchedulerConfig(), + schedulerConfig: newBaseDefaultSchedulerConfig(), } if err := decoder(conf); err != nil { return nil, err diff --git a/pkg/schedule/schedulers/scheduler.go b/pkg/schedule/schedulers/scheduler.go index fd6d6710350..8976c3a1928 100644 --- a/pkg/schedule/schedulers/scheduler.go +++ b/pkg/schedule/schedulers/scheduler.go @@ -157,9 +157,7 @@ func CreateScheduler( removeSchedulerCb ...func(string) error, ) (Scheduler, error) { fn, ok := schedulerMap[typ] - log.Info("create scheduler", zap.Any("typ", typ)) if !ok { - log.Warn("create scheduler not found", zap.Any("typ", typ)) return nil, errs.ErrSchedulerCreateFuncNotRegistered.FastGenByArgs(typ) } diff --git a/server/api/scheduler.go b/server/api/scheduler.go index f8b62864c0c..e50e563e5b8 100644 --- a/server/api/scheduler.go +++ b/server/api/scheduler.go @@ -113,6 +113,16 @@ func (h *schedulerHandler) CreateScheduler(w http.ResponseWriter, r *http.Reques h.r.JSON(w, http.StatusInternalServerError, err.Error()) return } + defaultTimeout := "1h" + if err := apiutil.CollectStringOption("timeout", input, collector); err != nil { + if errors.ErrorEqual(err, errs.ErrOptionNotExist) { + collector(defaultTimeout) + } else { + h.r.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + } + if err := apiutil.CollectEscapeStringOption("start_key", input, collector); err != nil { h.r.JSON(w, http.StatusInternalServerError, err.Error()) return diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index 1fdac79f539..ca78b4cfdd7 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -3212,9 +3212,16 @@ func TestAddScheduler(t *testing.T) { _, err = schedulers.CreateScheduler(types.BalanceKeyRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceKeyRangeScheduler, []string{}), controller.RemoveScheduler) re.Error(err) - gls, err = schedulers.CreateScheduler(types.BalanceKeyRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceKeyRangeScheduler, []string{"leaner", "tiflash", "100", "200"}), controller.RemoveScheduler) + gls, err = schedulers.CreateScheduler(types.BalanceKeyRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceKeyRangeScheduler, []string{"learner", "tiflash", "1h", "100", "200"}), controller.RemoveScheduler) re.NoError(err) re.NoError(controller.AddScheduler(gls)) + conf, err = gls.EncodeConfig() + re.NoError(err) + data = make(map[string]any) + re.NoError(json.Unmarshal(conf, &data)) + re.Equal("learner", data["role"]) + re.Equal("tiflash", data["engine"]) + re.Equal(float64(time.Hour.Nanoseconds()), data["timeout"]) hb, err := schedulers.CreateScheduler(types.BalanceHotRegionScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigJSONDecoder([]byte("{}"))) re.NoError(err) diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go index e5575677cb6..38338bc2494 100644 --- a/tools/pd-ctl/tests/scheduler/scheduler_test.go +++ b/tools/pd-ctl/tests/scheduler/scheduler_test.go @@ -548,9 +548,11 @@ func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestClust echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-key-range-scheduler", "--format=raw", "tiflash", "learner", "a", "b"}, nil) re.Contains(echo, "Success!") conf = make(map[string]any) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-key-range-scheduler", "show"}, &conf) - re.Equal("learner", conf["role"]) - re.Equal("tiflash", conf["engine"]) + testutil.Eventually(re, func() bool { + mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-key-range-scheduler"}, &conf) + return conf["role"] == "learner" && conf["engine"] == "tiflash" + }) + re.Equal(float64(time.Hour.Nanoseconds()), conf["timeout"]) ranges := conf["ranges"].([]any)[0].(map[string]any) re.Equal(base64.StdEncoding.EncodeToString([]byte("a")), ranges["start-key"]) re.Equal(base64.StdEncoding.EncodeToString([]byte("b")), ranges["end-key"]) From fb723a0c5d44b35a300f13e60541596497239fcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Wed, 15 Jan 2025 17:26:16 +0800 Subject: [PATCH 06/18] draft MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/core/basic_cluster.go | 11 ++ pkg/core/constant/kind.go | 5 +- pkg/schedule/filter/filters.go | 2 + pkg/schedule/schedulers/balance_key_range.go | 119 +++++++++++++++++-- pkg/schedule/schedulers/metrics.go | 10 +- pkg/schedule/schedulers/split_bucket.go | 2 +- 6 files changed, 138 insertions(+), 11 deletions(-) diff --git a/pkg/core/basic_cluster.go b/pkg/core/basic_cluster.go index f7c3c5e93b1..1f4b2c5f4e1 100644 --- a/pkg/core/basic_cluster.go +++ b/pkg/core/basic_cluster.go @@ -169,6 +169,17 @@ type KeyRanges struct { krs []*KeyRange } +// NewKeyRanges creates a KeyRanges. +func NewKeyRanges(ranges []KeyRange) *KeyRanges { + krs := make([]*KeyRange, 0, len(ranges)) + for _, kr := range ranges { + krs = append(krs, &kr) + } + return &KeyRanges{ + krs, + } +} + // NewKeyRangesWithSize creates a KeyRanges with the hint size. func NewKeyRangesWithSize(size int) *KeyRanges { return &KeyRanges{ diff --git a/pkg/core/constant/kind.go b/pkg/core/constant/kind.go index 39c256c4f5d..933d3463401 100644 --- a/pkg/core/constant/kind.go +++ b/pkg/core/constant/kind.go @@ -66,7 +66,6 @@ const ( RegionKind // WitnessKind indicates the witness kind resource WitnessKind - // ResourceKindLen represents the ResourceKind count ResourceKindLen ) @@ -79,6 +78,10 @@ func (k ResourceKind) String() string { return "region" case WitnessKind: return "witness" + case LearnerKind: + return `learner` + case unKnownKind: + return "unknown" default: return "unknown" } diff --git a/pkg/schedule/filter/filters.go b/pkg/schedule/filter/filters.go index efb27c3ec6d..6363c903b35 100644 --- a/pkg/schedule/filter/filters.go +++ b/pkg/schedule/filter/filters.go @@ -932,6 +932,8 @@ var ( allSpecialEngines = []string{core.EngineTiFlash} // NotSpecialEngines is used to filter the special engine. NotSpecialEngines = placement.LabelConstraint{Key: core.EngineKey, Op: placement.NotIn, Values: allSpecialEngines} + // TiFlashEngineConstraint is used to filter the TiFlash engine. + TiFlashEngineConstraint = placement.LabelConstraint{Key: core.EngineKey, Op: placement.In, Values: allSpecialEngines} ) type isolationFilter struct { diff --git a/pkg/schedule/schedulers/balance_key_range.go b/pkg/schedule/schedulers/balance_key_range.go index c21c62af71f..db82bca37ef 100644 --- a/pkg/schedule/schedulers/balance_key_range.go +++ b/pkg/schedule/schedulers/balance_key_range.go @@ -1,6 +1,7 @@ package schedulers import ( + "github.com/pingcap/kvproto/pkg/metapb" "net/http" "time" @@ -20,6 +21,8 @@ import ( "github.com/tikv/pd/pkg/utils/syncutil" ) +const balanceKeyRangeName = "balance-key-ranges" + type balanceKeyRangeSchedulerHandler struct { rd *render.Render config *balanceKeyRangeSchedulerConfig @@ -104,6 +107,8 @@ type balanceKeyRangeScheduler struct { *BaseScheduler conf *balanceKeyRangeSchedulerConfig handler http.Handler + start time.Time + role Role filters []filter.Filter filterCounter *filter.Counter } @@ -113,18 +118,16 @@ func (s *balanceKeyRangeScheduler) ServeHTTP(w http.ResponseWriter, r *http.Requ s.handler.ServeHTTP(w, r) } -// Schedule schedules the balance key range operator. -func (*balanceKeyRangeScheduler) Schedule(_cluster sche.SchedulerCluster, _dryRun bool) ([]*operator.Operator, []plan.Plan) { - log.Debug("balance key range scheduler is scheduling, need to implement") - return nil, nil -} - // IsScheduleAllowed checks if the scheduler is allowed to schedule new operators. func (s *balanceKeyRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := s.OpController.OperatorCount(operator.OpKeyRange) < cluster.GetSchedulerConfig().GetRegionScheduleLimit() if !allowed { operator.IncOperatorLimitCounter(s.GetType(), operator.OpKeyRange) } + if time.Now().Sub(s.start) > s.conf.Timeout { + allowed = false + balanceExpiredCounter.Inc() + } return allowed } @@ -138,14 +141,114 @@ func newBalanceKeyRangeScheduler(opController *operator.Controller, conf *balanc BaseScheduler: NewBaseScheduler(opController, types.BalanceKeyRangeScheduler, conf), conf: conf, handler: newBalanceKeyRangeHandler(conf), + start: time.Now(), + role: NewRole(conf.Role), } for _, option := range options { option(s) } + f := filter.NotSpecialEngines + if conf.Engine == core.EngineTiFlash { + f = filter.TiFlashEngineConstraint + } s.filters = []filter.Filter{ - &filter.StoreStateFilter{ActionScope: s.GetName(), TransferLeader: true, OperatorLevel: constant.Medium}, - filter.NewSpecialUseFilter(s.GetName()), + filter.NewEngineFilter(balanceKeyRangeName, f), } + s.filterCounter = filter.NewCounter(s.GetName()) return s } + +// Schedule schedules the balance key range operator. +func (s *balanceKeyRangeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { + balanceKeyRangeCounter.Inc() + plan,err:=s.prepare(cluster) + if err != nil { + log.Error("failed to prepare balance key range scheduler", errs.ZapError(err)) + return nil,nil + + } +} + +// BalanceKeyRangeSchedulerPlan is used to record the plan of balance key range scheduler. +type BalanceKeyRangeSchedulerPlan struct { + source []*core.StoreInfo + // store_id -> score + scores map[uint64]uint64 + // store_id -> peer + regions map[uint64]*metapb.Peer +} + +func (s *balanceKeyRangeScheduler) prepare(cluster sche.SchedulerCluster)(*BalanceKeyRangeSchedulerPlan,error) { + krs := core.NewKeyRanges(s.conf.Ranges) + scanRegions, err := cluster.BatchScanRegions(krs) + if err != nil { + return nil,err + } + stores := cluster.GetStores() + sources := filter.SelectSourceStores(stores, s.filters, cluster.GetSchedulerConfig(), nil, nil) + scores := make(map[uint64]uint64, len(sources)) + regions:=make(map[uint64]*metapb.Peer,len(scanRegions)) + for _, region := range scanRegions { + for _, peer := range s.role.getPeers(region) { + scores[peer.GetStoreId()] += 1 + regions[peer.GetStoreId()] = peer + } + } + return &BalanceKeyRangeSchedulerPlan{ + source: sources, + scores: scores, + regions: regions, + },nil +} + + + +type Role int + +const ( + Leader Role = iota + Voter + Learner + Unknown + RoleLen +) + +func (r Role) String() string { + switch r { + case Leader: + return "leader" + case Voter: + return "voter" + case Learner: + return "learner" + default: + return "unknown" + } +} + +func NewRole(role string) Role { + switch role { + case "leader": + return Leader + case "voter": + return Voter + case "learner": + return Learner + default: + return Unknown + } +} + +func (r Role) getPeers(region *core.RegionInfo) []*metapb.Peer {{ + switch r { + case Leader: + return []*metapb.Peer{region.GetLeader()} + case Voter: + return region.GetVoters() + case Learner: + return region.GetLearners() + default: + return nil + } +} diff --git a/pkg/schedule/schedulers/metrics.go b/pkg/schedule/schedulers/metrics.go index bd8a2b4f6ea..4a170d9e146 100644 --- a/pkg/schedule/schedulers/metrics.go +++ b/pkg/schedule/schedulers/metrics.go @@ -222,6 +222,10 @@ func transferWitnessLeaderCounterWithEvent(event string) prometheus.Counter { return schedulerCounter.WithLabelValues(types.TransferWitnessLeaderScheduler.String(), event) } +func balanceKeyRangeCounterWithEvent(event string) prometheus.Counter { + return schedulerCounter.WithLabelValues(types.BalanceKeyRangeScheduler.String(), event) +} + // WithLabelValues is a heavy operation, define variable to avoid call it every time. var ( balanceLeaderScheduleCounter = balanceLeaderCounterWithEvent("schedule") @@ -329,7 +333,7 @@ var ( shuffleRegionNoSourceStoreCounter = shuffleRegionCounterWithEvent("no-source-store") splitBucketDisableCounter = splitBucketCounterWithEvent("bucket-disable") - splitBuckerSplitLimitCounter = splitBucketCounterWithEvent("split-limit") + splitBucketSplitLimitCounter = splitBucketCounterWithEvent("split-limit") splitBucketScheduleCounter = splitBucketCounterWithEvent("schedule") splitBucketNoRegionCounter = splitBucketCounterWithEvent("no-region") splitBucketRegionTooSmallCounter = splitBucketCounterWithEvent("region-too-small") @@ -342,4 +346,8 @@ var ( transferWitnessLeaderCounter = transferWitnessLeaderCounterWithEvent("schedule") transferWitnessLeaderNewOperatorCounter = transferWitnessLeaderCounterWithEvent("new-operator") transferWitnessLeaderNoTargetStoreCounter = transferWitnessLeaderCounterWithEvent("no-target-store") + + balanceKeyRangeCounter = balanceKeyRangeCounterWithEvent("schedule") + balanceKeyRangeNewOperatorCounter = balanceKeyRangeCounterWithEvent("new-operator") + balanceExpiredCounter = balanceKeyRangeCounterWithEvent("expired") ) diff --git a/pkg/schedule/schedulers/split_bucket.go b/pkg/schedule/schedulers/split_bucket.go index d6aee65b181..feecad2fb27 100644 --- a/pkg/schedule/schedulers/split_bucket.go +++ b/pkg/schedule/schedulers/split_bucket.go @@ -181,7 +181,7 @@ func (s *splitBucketScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) } allowed := s.BaseScheduler.OpController.OperatorCount(operator.OpSplit) < s.conf.getSplitLimit() if !allowed { - splitBuckerSplitLimitCounter.Inc() + splitBucketSplitLimitCounter.Inc() operator.IncOperatorLimitCounter(s.GetType(), operator.OpSplit) } return allowed From d86148f6160442b7e3bab0e1b3c5f11ee383ff0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Wed, 15 Jan 2025 18:17:13 +0800 Subject: [PATCH 07/18] rename balance-key-range to balance-range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/mcs/scheduling/server/cluster.go | 2 +- pkg/schedule/operator/kind.go | 4 ---- pkg/schedule/operator/operator_test.go | 3 --- pkg/schedule/schedulers/balance_key_range.go | 6 +++--- pkg/schedule/schedulers/init.go | 4 ++-- pkg/schedule/types/type.go | 10 +++++----- server/api/scheduler.go | 2 +- server/cluster/cluster_test.go | 4 ++-- tools/pd-ctl/pdctl/command/scheduler.go | 18 +++++++++--------- tools/pd-ctl/tests/scheduler/scheduler_test.go | 10 +++++----- 10 files changed, 28 insertions(+), 35 deletions(-) diff --git a/pkg/mcs/scheduling/server/cluster.go b/pkg/mcs/scheduling/server/cluster.go index f0b87e82c06..9ab5d329398 100644 --- a/pkg/mcs/scheduling/server/cluster.go +++ b/pkg/mcs/scheduling/server/cluster.go @@ -316,7 +316,7 @@ func (c *Cluster) updateScheduler() { for _, scheduler := range latestSchedulersConfig { schedulerType, ok := types.ConvertOldStrToType[scheduler.Type] if !ok { - log.Error("scheduler not found ", zap.String("type", scheduler.Type)) + log.Error("scheduler not found", zap.String("type", scheduler.Type)) continue } s, err := schedulers.CreateScheduler( diff --git a/pkg/schedule/operator/kind.go b/pkg/schedule/operator/kind.go index 0a7ccb34245..0187a64c568 100644 --- a/pkg/schedule/operator/kind.go +++ b/pkg/schedule/operator/kind.go @@ -35,8 +35,6 @@ const ( OpMerge // Initiated by range scheduler. OpRange - // Initiated by key range scheduler. - OpKeyRange // Initiated by replica checker. OpReplica // Include region split. Initiated by rule checker if `kind & OpAdmin == 0`. @@ -62,7 +60,6 @@ var flagToName = map[OpKind]string{ OpHotRegion: "hot-region", OpReplica: "replica", OpMerge: "merge", - OpKeyRange: "key-range", OpRange: "range", OpWitness: "witness", OpWitnessLeader: "witness-leader", @@ -77,7 +74,6 @@ var nameToFlag = map[string]OpKind{ "replica": OpReplica, "merge": OpMerge, "range": OpRange, - "key-range": OpKeyRange, "witness-leader": OpWitnessLeader, } diff --git a/pkg/schedule/operator/operator_test.go b/pkg/schedule/operator/operator_test.go index 422091dea19..6976b5ca12e 100644 --- a/pkg/schedule/operator/operator_test.go +++ b/pkg/schedule/operator/operator_test.go @@ -476,9 +476,6 @@ func (suite *operatorTestSuite) TestSchedulerKind() { }, { op: NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader), expect: OpLeader, - }, { - op: NewTestOperator(1, &metapb.RegionEpoch{}, OpKeyRange|OpLeader), - expect: OpKeyRange, }, } for _, v := range testData { diff --git a/pkg/schedule/schedulers/balance_key_range.go b/pkg/schedule/schedulers/balance_key_range.go index c21c62af71f..71e0fab29d9 100644 --- a/pkg/schedule/schedulers/balance_key_range.go +++ b/pkg/schedule/schedulers/balance_key_range.go @@ -121,9 +121,9 @@ func (*balanceKeyRangeScheduler) Schedule(_cluster sche.SchedulerCluster, _dryRu // IsScheduleAllowed checks if the scheduler is allowed to schedule new operators. func (s *balanceKeyRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { - allowed := s.OpController.OperatorCount(operator.OpKeyRange) < cluster.GetSchedulerConfig().GetRegionScheduleLimit() + allowed := s.OpController.OperatorCount(operator.OpRange) < cluster.GetSchedulerConfig().GetRegionScheduleLimit() if !allowed { - operator.IncOperatorLimitCounter(s.GetType(), operator.OpKeyRange) + operator.IncOperatorLimitCounter(s.GetType(), operator.OpRange) } return allowed } @@ -135,7 +135,7 @@ type BalanceKeyRangeCreateOption func(s *balanceKeyRangeScheduler) // special store balanced. func newBalanceKeyRangeScheduler(opController *operator.Controller, conf *balanceKeyRangeSchedulerConfig, options ...BalanceKeyRangeCreateOption) Scheduler { s := &balanceKeyRangeScheduler{ - BaseScheduler: NewBaseScheduler(opController, types.BalanceKeyRangeScheduler, conf), + BaseScheduler: NewBaseScheduler(opController, types.BalanceRangeScheduler, conf), conf: conf, handler: newBalanceKeyRangeHandler(conf), } diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index 37d17ddd9ae..f86e1596f27 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -550,7 +550,7 @@ func schedulersRegister() { // balance key range scheduler // args: [role, engine, timeout, range1, range2, ...] - RegisterSliceDecoderBuilder(types.BalanceKeyRangeScheduler, func(args []string) ConfigDecoder { + RegisterSliceDecoderBuilder(types.BalanceRangeScheduler, func(args []string) ConfigDecoder { return func(v any) error { conf, ok := v.(*balanceKeyRangeSchedulerConfig) if !ok { @@ -587,7 +587,7 @@ func schedulersRegister() { } }) - RegisterScheduler(types.BalanceKeyRangeScheduler, func(opController *operator.Controller, + RegisterScheduler(types.BalanceRangeScheduler, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := &balanceKeyRangeSchedulerConfig{ schedulerConfig: newBaseDefaultSchedulerConfig(), diff --git a/pkg/schedule/types/type.go b/pkg/schedule/types/type.go index c9d06f31e9f..87e89c18948 100644 --- a/pkg/schedule/types/type.go +++ b/pkg/schedule/types/type.go @@ -70,8 +70,8 @@ const ( TransferWitnessLeaderScheduler CheckerSchedulerType = "transfer-witness-leader-scheduler" // LabelScheduler is label scheduler name. LabelScheduler CheckerSchedulerType = "label-scheduler" - // BalanceKeyRangeScheduler is balance key range scheduler name. - BalanceKeyRangeScheduler CheckerSchedulerType = "balance-key-range-scheduler" + // BalanceRangeScheduler is balance key range scheduler name. + BalanceRangeScheduler CheckerSchedulerType = "balance-range-scheduler" ) // TODO: SchedulerTypeCompatibleMap and ConvertOldStrToType should be removed after @@ -99,7 +99,7 @@ var ( SplitBucketScheduler: "split-bucket", TransferWitnessLeaderScheduler: "transfer-witness-leader", LabelScheduler: "label", - BalanceKeyRangeScheduler: "balance-key-range", + BalanceRangeScheduler: "balance-range", } // ConvertOldStrToType exists for compatibility. @@ -123,7 +123,7 @@ var ( "split-bucket": SplitBucketScheduler, "transfer-witness-leader": TransferWitnessLeaderScheduler, "label": LabelScheduler, - "balance-key-range": BalanceKeyRangeScheduler, + "balance-range": BalanceRangeScheduler, } // StringToSchedulerType is a map to convert the scheduler string to the CheckerSchedulerType. @@ -147,7 +147,7 @@ var ( "split-bucket-scheduler": SplitBucketScheduler, "transfer-witness-leader-scheduler": TransferWitnessLeaderScheduler, "label-scheduler": LabelScheduler, - "balance-key-range-scheduler": BalanceKeyRangeScheduler, + "balance-range-scheduler": BalanceRangeScheduler, } // DefaultSchedulers is the default scheduler types. diff --git a/server/api/scheduler.go b/server/api/scheduler.go index e50e563e5b8..d9f8aa6518d 100644 --- a/server/api/scheduler.go +++ b/server/api/scheduler.go @@ -99,7 +99,7 @@ func (h *schedulerHandler) CreateScheduler(w http.ResponseWriter, r *http.Reques } switch tp { - case types.BalanceKeyRangeScheduler: + case types.BalanceRangeScheduler: exist, _ := h.IsSchedulerExisted(name) if exist { h.r.JSON(w, http.StatusBadRequest, "The scheduler already exists, pls remove the exist scheduler first.") diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index ca78b4cfdd7..d2382ded70c 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -3209,10 +3209,10 @@ func TestAddScheduler(t *testing.T) { re.NoError(err) re.NoError(controller.AddScheduler(gls)) - _, err = schedulers.CreateScheduler(types.BalanceKeyRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceKeyRangeScheduler, []string{}), controller.RemoveScheduler) + _, err = schedulers.CreateScheduler(types.BalanceRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceRangeScheduler, []string{}), controller.RemoveScheduler) re.Error(err) - gls, err = schedulers.CreateScheduler(types.BalanceKeyRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceKeyRangeScheduler, []string{"learner", "tiflash", "1h", "100", "200"}), controller.RemoveScheduler) + gls, err = schedulers.CreateScheduler(types.BalanceRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceRangeScheduler, []string{"learner", "tiflash", "1h", "100", "200"}), controller.RemoveScheduler) re.NoError(err) re.NoError(controller.AddScheduler(gls)) conf, err = gls.EncodeConfig() diff --git a/tools/pd-ctl/pdctl/command/scheduler.go b/tools/pd-ctl/pdctl/command/scheduler.go index 9a5993dd8cf..50525d885fd 100644 --- a/tools/pd-ctl/pdctl/command/scheduler.go +++ b/tools/pd-ctl/pdctl/command/scheduler.go @@ -162,7 +162,7 @@ func NewAddSchedulerCommand() *cobra.Command { c.AddCommand(NewSlowTrendEvictLeaderSchedulerCommand()) c.AddCommand(NewBalanceWitnessSchedulerCommand()) c.AddCommand(NewTransferWitnessLeaderSchedulerCommand()) - c.AddCommand(NewBalanceKeyRangeSchedulerCommand()) + c.AddCommand(NewBalanceRangeSchedulerCommand()) return c } @@ -375,12 +375,12 @@ func NewBalanceWitnessSchedulerCommand() *cobra.Command { return c } -// NewBalanceKeyRangeSchedulerCommand returns a command to add a balance-key-range-scheduler. -func NewBalanceKeyRangeSchedulerCommand() *cobra.Command { +// NewBalanceRangeSchedulerCommand returns a command to add a balance-key-range-scheduler. +func NewBalanceRangeSchedulerCommand() *cobra.Command { c := &cobra.Command{ - Use: "balance-key-range-scheduler [--format=raw|encode|hex] ", - Short: "add a scheduler to balance region for given key range", - Run: addSchedulerForBalanceKeyRangeCommandFunc, + Use: "balance-range-scheduler [--format=raw|encode|hex] ", + Short: "add a scheduler to balance region for given range", + Run: addSchedulerForBalanceRangeCommandFunc, } c.Flags().String("format", "hex", "the key format") return c @@ -424,7 +424,7 @@ func addSchedulerForGrantHotRegionCommandFunc(cmd *cobra.Command, args []string) postJSON(cmd, schedulersPrefix, input) } -func addSchedulerForBalanceKeyRangeCommandFunc(cmd *cobra.Command, args []string) { +func addSchedulerForBalanceRangeCommandFunc(cmd *cobra.Command, args []string) { if len(args) != 4 { cmd.Println(cmd.UsageString()) return @@ -588,8 +588,8 @@ func newConfigBalanceLeaderCommand() *cobra.Command { func newConfigBalanceKeyRangeCommand() *cobra.Command { c := &cobra.Command{ - Use: "balance-key-range-scheduler", - Short: "balance-key-range-scheduler config", + Use: "balance-range-scheduler", + Short: "balance-range-scheduler config", Run: listSchedulerConfigCommandFunc, } diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go index 38338bc2494..f95cf033239 100644 --- a/tools/pd-ctl/tests/scheduler/scheduler_test.go +++ b/tools/pd-ctl/tests/scheduler/scheduler_test.go @@ -543,13 +543,13 @@ func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestClust }) // test balance key range scheduler - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-key-range-scheduler"}, nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-range-scheduler"}, nil) re.NotContains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-key-range-scheduler", "--format=raw", "tiflash", "learner", "a", "b"}, nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-range-scheduler", "--format=raw", "tiflash", "learner", "a", "b"}, nil) re.Contains(echo, "Success!") conf = make(map[string]any) testutil.Eventually(re, func() bool { - mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-key-range-scheduler"}, &conf) + mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-range-scheduler"}, &conf) return conf["role"] == "learner" && conf["engine"] == "tiflash" }) re.Equal(float64(time.Hour.Nanoseconds()), conf["timeout"]) @@ -557,10 +557,10 @@ func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestClust re.Equal(base64.StdEncoding.EncodeToString([]byte("a")), ranges["start-key"]) re.Equal(base64.StdEncoding.EncodeToString([]byte("b")), ranges["end-key"]) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-key-range-scheduler", "--format=raw", "tiflash", "learner", "a", "b"}, nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-range-scheduler", "--format=raw", "tiflash", "learner", "a", "b"}, nil) re.Contains(echo, "400") re.Contains(echo, "scheduler already exists") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-key-range-scheduler"}, nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-range-scheduler"}, nil) re.Contains(echo, "Success!") // test balance leader config From 8bdb7bc5c5b1add45ff0b27d5e81c111d5bca296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Thu, 16 Jan 2025 15:21:44 +0800 Subject: [PATCH 08/18] use hex encode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/core/basic_cluster.go | 10 ++++++++++ tools/pd-ctl/pdctl/command/scheduler.go | 9 +++++---- tools/pd-ctl/tests/scheduler/scheduler_test.go | 5 ++--- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/pkg/core/basic_cluster.go b/pkg/core/basic_cluster.go index f7c3c5e93b1..45e06648c35 100644 --- a/pkg/core/basic_cluster.go +++ b/pkg/core/basic_cluster.go @@ -16,6 +16,7 @@ package core import ( "bytes" + "encoding/json" "github.com/tikv/pd/pkg/core/constant" ) @@ -156,6 +157,15 @@ type KeyRange struct { EndKey []byte `json:"end-key"` } +// MarshalJSON marshals to json. +func (kr KeyRange) MarshalJSON() ([]byte, error) { + m := map[string]string{ + "start-key": HexRegionKeyStr(kr.StartKey), + "end-key": HexRegionKeyStr(kr.EndKey), + } + return json.Marshal(m) +} + // NewKeyRange create a KeyRange with the given start key and end key. func NewKeyRange(startKey, endKey string) KeyRange { return KeyRange{ diff --git a/tools/pd-ctl/pdctl/command/scheduler.go b/tools/pd-ctl/pdctl/command/scheduler.go index 50525d885fd..1492709fc79 100644 --- a/tools/pd-ctl/pdctl/command/scheduler.go +++ b/tools/pd-ctl/pdctl/command/scheduler.go @@ -375,12 +375,13 @@ func NewBalanceWitnessSchedulerCommand() *cobra.Command { return c } -// NewBalanceRangeSchedulerCommand returns a command to add a balance-key-range-scheduler. +// NewBalanceRangeSchedulerCommand returns a command to add a balance-range-scheduler. func NewBalanceRangeSchedulerCommand() *cobra.Command { c := &cobra.Command{ - Use: "balance-range-scheduler [--format=raw|encode|hex] ", - Short: "add a scheduler to balance region for given range", - Run: addSchedulerForBalanceRangeCommandFunc, + Use: "balance-range-scheduler [--format=raw|encode|hex] ", + Short: "add a scheduler to balance region for given range", + Run: addSchedulerForBalanceRangeCommandFunc, + Deprecated: "balance-range will be deprecated in the future, please use sql instead", } c.Flags().String("format", "hex", "the key format") return c diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go index f95cf033239..1d011329c42 100644 --- a/tools/pd-ctl/tests/scheduler/scheduler_test.go +++ b/tools/pd-ctl/tests/scheduler/scheduler_test.go @@ -15,7 +15,6 @@ package scheduler_test import ( - "encoding/base64" "encoding/json" "fmt" "reflect" @@ -554,8 +553,8 @@ func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestClust }) re.Equal(float64(time.Hour.Nanoseconds()), conf["timeout"]) ranges := conf["ranges"].([]any)[0].(map[string]any) - re.Equal(base64.StdEncoding.EncodeToString([]byte("a")), ranges["start-key"]) - re.Equal(base64.StdEncoding.EncodeToString([]byte("b")), ranges["end-key"]) + re.Equal(core.HexRegionKeyStr([]byte("a")), ranges["start-key"]) + re.Equal(core.HexRegionKeyStr([]byte("b")), ranges["end-key"]) echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-range-scheduler", "--format=raw", "tiflash", "learner", "a", "b"}, nil) re.Contains(echo, "400") From 0696ba64e4dad421aab0547371315737f63c9f7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Tue, 21 Jan 2025 15:01:40 +0800 Subject: [PATCH 09/18] rename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- ...{balance_key_range.go => balance_range.go} | 74 +++++++++++-------- pkg/schedule/schedulers/init.go | 6 +- tools/pd-ctl/pdctl/command/scheduler.go | 4 +- 3 files changed, 47 insertions(+), 37 deletions(-) rename pkg/schedule/schedulers/{balance_key_range.go => balance_range.go} (55%) diff --git a/pkg/schedule/schedulers/balance_key_range.go b/pkg/schedule/schedulers/balance_range.go similarity index 55% rename from pkg/schedule/schedulers/balance_key_range.go rename to pkg/schedule/schedulers/balance_range.go index 71e0fab29d9..96e015e91c4 100644 --- a/pkg/schedule/schedulers/balance_key_range.go +++ b/pkg/schedule/schedulers/balance_range.go @@ -1,3 +1,17 @@ +// Copyright 2025 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package schedulers import ( @@ -20,13 +34,13 @@ import ( "github.com/tikv/pd/pkg/utils/syncutil" ) -type balanceKeyRangeSchedulerHandler struct { +type balanceRangeSchedulerHandler struct { rd *render.Render - config *balanceKeyRangeSchedulerConfig + config *balanceRangeSchedulerConfig } -func newBalanceKeyRangeHandler(conf *balanceKeyRangeSchedulerConfig) http.Handler { - handler := &balanceKeyRangeSchedulerHandler{ +func newBalanceRangeHandler(conf *balanceRangeSchedulerConfig) http.Handler { + handler := &balanceRangeSchedulerHandler{ config: conf, rd: render.New(render.Options{IndentJSON: true}), } @@ -36,42 +50,36 @@ func newBalanceKeyRangeHandler(conf *balanceKeyRangeSchedulerConfig) http.Handle return router } -func (handler *balanceKeyRangeSchedulerHandler) updateConfig(w http.ResponseWriter, _ *http.Request) { +func (handler *balanceRangeSchedulerHandler) updateConfig(w http.ResponseWriter, _ *http.Request) { handler.rd.JSON(w, http.StatusBadRequest, "update config is not supported") } -func (handler *balanceKeyRangeSchedulerHandler) listConfig(w http.ResponseWriter, _ *http.Request) { +func (handler *balanceRangeSchedulerHandler) listConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.clone() if err := handler.rd.JSON(w, http.StatusOK, conf); err != nil { log.Error("failed to marshal balance key range scheduler config", errs.ZapError(err)) } } -type balanceKeyRangeSchedulerConfig struct { +type balanceRangeSchedulerConfig struct { syncutil.RWMutex schedulerConfig - balanceKeyRangeSchedulerParam + balanceRangeSchedulerParam } -type balanceKeyRangeSchedulerParam struct { +type balanceRangeSchedulerParam struct { Role string `json:"role"` Engine string `json:"engine"` Timeout time.Duration `json:"timeout"` Ranges []core.KeyRange `json:"ranges"` } -func (conf *balanceKeyRangeSchedulerConfig) encodeConfig() ([]byte, error) { - conf.RLock() - defer conf.RUnlock() - return EncodeConfig(conf) -} - -func (conf *balanceKeyRangeSchedulerConfig) clone() *balanceKeyRangeSchedulerParam { +func (conf *balanceRangeSchedulerConfig) clone() *balanceRangeSchedulerParam { conf.RLock() defer conf.RUnlock() ranges := make([]core.KeyRange, len(conf.Ranges)) copy(ranges, conf.Ranges) - return &balanceKeyRangeSchedulerParam{ + return &balanceRangeSchedulerParam{ Ranges: ranges, Role: conf.Role, Engine: conf.Engine, @@ -80,16 +88,18 @@ func (conf *balanceKeyRangeSchedulerConfig) clone() *balanceKeyRangeSchedulerPar } // EncodeConfig serializes the config. -func (s *balanceKeyRangeScheduler) EncodeConfig() ([]byte, error) { - return s.conf.encodeConfig() +func (s *balanceRangeScheduler) EncodeConfig() ([]byte, error) { + s.conf.RLock() + defer s.conf.RUnlock() + return EncodeConfig(s.conf) } // ReloadConfig reloads the config. -func (s *balanceKeyRangeScheduler) ReloadConfig() error { +func (s *balanceRangeScheduler) ReloadConfig() error { s.conf.Lock() defer s.conf.Unlock() - newCfg := &balanceKeyRangeSchedulerConfig{} + newCfg := &balanceRangeSchedulerConfig{} if err := s.conf.load(newCfg); err != nil { return err } @@ -100,27 +110,27 @@ func (s *balanceKeyRangeScheduler) ReloadConfig() error { return nil } -type balanceKeyRangeScheduler struct { +type balanceRangeScheduler struct { *BaseScheduler - conf *balanceKeyRangeSchedulerConfig + conf *balanceRangeSchedulerConfig handler http.Handler filters []filter.Filter filterCounter *filter.Counter } // ServeHTTP implements the http.Handler interface. -func (s *balanceKeyRangeScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { +func (s *balanceRangeScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { s.handler.ServeHTTP(w, r) } // Schedule schedules the balance key range operator. -func (*balanceKeyRangeScheduler) Schedule(_cluster sche.SchedulerCluster, _dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (*balanceRangeScheduler) Schedule(_cluster sche.SchedulerCluster, _dryRun bool) ([]*operator.Operator, []plan.Plan) { log.Debug("balance key range scheduler is scheduling, need to implement") return nil, nil } // IsScheduleAllowed checks if the scheduler is allowed to schedule new operators. -func (s *balanceKeyRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { +func (s *balanceRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := s.OpController.OperatorCount(operator.OpRange) < cluster.GetSchedulerConfig().GetRegionScheduleLimit() if !allowed { operator.IncOperatorLimitCounter(s.GetType(), operator.OpRange) @@ -128,16 +138,16 @@ func (s *balanceKeyRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerClust return allowed } -// BalanceKeyRangeCreateOption is used to create a scheduler with an option. -type BalanceKeyRangeCreateOption func(s *balanceKeyRangeScheduler) +// BalanceRangeCreateOption is used to create a scheduler with an option. +type BalanceRangeCreateOption func(s *balanceRangeScheduler) -// newBalanceKeyRangeScheduler creates a scheduler that tends to keep given peer role on +// newBalanceRangeScheduler creates a scheduler that tends to keep given peer role on // special store balanced. -func newBalanceKeyRangeScheduler(opController *operator.Controller, conf *balanceKeyRangeSchedulerConfig, options ...BalanceKeyRangeCreateOption) Scheduler { - s := &balanceKeyRangeScheduler{ +func newBalanceRangeScheduler(opController *operator.Controller, conf *balanceRangeSchedulerConfig, options ...BalanceRangeCreateOption) Scheduler { + s := &balanceRangeScheduler{ BaseScheduler: NewBaseScheduler(opController, types.BalanceRangeScheduler, conf), conf: conf, - handler: newBalanceKeyRangeHandler(conf), + handler: newBalanceRangeHandler(conf), } for _, option := range options { option(s) diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index f86e1596f27..45e456efeb3 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -552,7 +552,7 @@ func schedulersRegister() { // args: [role, engine, timeout, range1, range2, ...] RegisterSliceDecoderBuilder(types.BalanceRangeScheduler, func(args []string) ConfigDecoder { return func(v any) error { - conf, ok := v.(*balanceKeyRangeSchedulerConfig) + conf, ok := v.(*balanceRangeSchedulerConfig) if !ok { return errs.ErrScheduleConfigNotExist.FastGenByArgs() } @@ -589,13 +589,13 @@ func schedulersRegister() { RegisterScheduler(types.BalanceRangeScheduler, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { - conf := &balanceKeyRangeSchedulerConfig{ + conf := &balanceRangeSchedulerConfig{ schedulerConfig: newBaseDefaultSchedulerConfig(), } if err := decoder(conf); err != nil { return nil, err } - sche := newBalanceKeyRangeScheduler(opController, conf) + sche := newBalanceRangeScheduler(opController, conf) conf.init(sche.GetName(), storage, conf) return sche, nil }) diff --git a/tools/pd-ctl/pdctl/command/scheduler.go b/tools/pd-ctl/pdctl/command/scheduler.go index 1492709fc79..e2bbb09120c 100644 --- a/tools/pd-ctl/pdctl/command/scheduler.go +++ b/tools/pd-ctl/pdctl/command/scheduler.go @@ -562,7 +562,7 @@ func NewConfigSchedulerCommand() *cobra.Command { newConfigEvictSlowStoreCommand(), newConfigShuffleHotRegionSchedulerCommand(), newConfigEvictSlowTrendCommand(), - newConfigBalanceKeyRangeCommand(), + newConfigBalanceRangeCommand(), ) return c } @@ -587,7 +587,7 @@ func newConfigBalanceLeaderCommand() *cobra.Command { return c } -func newConfigBalanceKeyRangeCommand() *cobra.Command { +func newConfigBalanceRangeCommand() *cobra.Command { c := &cobra.Command{ Use: "balance-range-scheduler", Short: "balance-range-scheduler config", From 66f70c2434d394bb71d7cea7aaa09fb613103640 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Tue, 21 Jan 2025 17:27:04 +0800 Subject: [PATCH 10/18] impl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/core/constant/kind.go | 4 - pkg/schedule/operator/operator_controller.go | 27 ++- pkg/schedule/schedulers/balance_range.go | 211 +++++++++++++------ pkg/schedule/schedulers/metrics.go | 14 +- 4 files changed, 186 insertions(+), 70 deletions(-) diff --git a/pkg/core/constant/kind.go b/pkg/core/constant/kind.go index 933d3463401..7e9d173c689 100644 --- a/pkg/core/constant/kind.go +++ b/pkg/core/constant/kind.go @@ -78,10 +78,6 @@ func (k ResourceKind) String() string { return "region" case WitnessKind: return "witness" - case LearnerKind: - return `learner` - case unKnownKind: - return "unknown" default: return "unknown" } diff --git a/pkg/schedule/operator/operator_controller.go b/pkg/schedule/operator/operator_controller.go index cd2470376e1..61a800ebb9b 100644 --- a/pkg/schedule/operator/operator_controller.go +++ b/pkg/schedule/operator/operator_controller.go @@ -15,6 +15,7 @@ package operator import ( + "bytes" "context" "fmt" "strconv" @@ -828,6 +829,25 @@ func (oc *Controller) GetHistory(start time.Time) []OpHistory { return history } +// OpInfluenceOption is used to filter the region. +// returns true if the region meets the condition, it will ignore this region in the influence calculation. +// returns false if the region does not meet the condition, it will calculate the influence of this region. +type OpInfluenceOption func(region *core.RegionInfo) bool + +// WithRangeOption returns an OpInfluenceOption that filters the region by the label. +func WithRangeOption(ranges []core.KeyRange) OpInfluenceOption { + return func(region *core.RegionInfo) bool { + for _, r := range ranges { + // the start key of the region must greater than the given range start key. + // the end key of the region must less than the given range end key. + if bytes.Compare(region.GetStartKey(), r.StartKey) < 0 || bytes.Compare(r.EndKey, region.GetEndKey()) < 0 { + return false + } + } + return true + } +} + // OperatorCount gets the count of operators filtered by kind. // kind only has one OpKind. func (oc *Controller) OperatorCount(kind OpKind) uint64 { @@ -835,7 +855,7 @@ func (oc *Controller) OperatorCount(kind OpKind) uint64 { } // GetOpInfluence gets OpInfluence. -func (oc *Controller) GetOpInfluence(cluster *core.BasicCluster) OpInfluence { +func (oc *Controller) GetOpInfluence(cluster *core.BasicCluster, ops ...OpInfluenceOption) OpInfluence { influence := OpInfluence{ StoresInfluence: make(map[uint64]*StoreInfluence), } @@ -844,6 +864,11 @@ func (oc *Controller) GetOpInfluence(cluster *core.BasicCluster) OpInfluence { op := value.(*Operator) if !op.CheckTimeout() && !op.CheckSuccess() { region := cluster.GetRegion(op.RegionID()) + for _, opt := range ops { + if !opt(region) { + return true + } + } if region != nil { op.UnfinishedInfluence(influence, region) } diff --git a/pkg/schedule/schedulers/balance_range.go b/pkg/schedule/schedulers/balance_range.go index 087c2743d5a..7dd8a539f48 100644 --- a/pkg/schedule/schedulers/balance_range.go +++ b/pkg/schedule/schedulers/balance_range.go @@ -18,27 +18,28 @@ import ( "go.uber.org/zap" "net/http" "sort" + "strconv" "time" "github.com/gorilla/mux" "github.com/unrolled/render" - "github.com/pingcap/log" "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/log" - "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/core/constant" "github.com/tikv/pd/pkg/errs" sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/schedule/filter" "github.com/tikv/pd/pkg/schedule/operator" + "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/schedule/plan" "github.com/tikv/pd/pkg/schedule/types" "github.com/tikv/pd/pkg/utils/syncutil" ) -const balanceRangeName ="balance-range-scheduler" +const balanceRangeName = "balance-range-scheduler" type balanceRangeSchedulerHandler struct { rd *render.Render @@ -144,7 +145,6 @@ func (s *balanceRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) return allowed } - // BalanceRangeCreateOption is used to create a scheduler with an option. type BalanceRangeCreateOption func(s *balanceRangeScheduler) @@ -174,21 +174,32 @@ func newBalanceRangeScheduler(opController *operator.Controller, conf *balanceRa // Schedule schedules the balance key range operator. func (s *balanceRangeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { balanceRangeCounter.Inc() - plan,err:=s.prepare(cluster) + opInfluence := s.OpController.GetOpInfluence(cluster.GetBasicCluster(), operator.WithRangeOption(s.conf.Ranges)) + plan, err := s.prepare(cluster, opInfluence) + if err != nil { + log.Error("failed to prepare balance key range scheduler", errs.ZapError(err)) + return nil, nil + } + downFilter := filter.NewRegionDownFilter() replicaFilter := filter.NewRegionReplicatedFilter(cluster) snapshotFilter := filter.NewSnapshotSendFilter(plan.stores, constant.Medium) - baseRegionFilters := []filter.RegionFilter{downFilter, replicaFilter, snapshotFilter} - - for sourceIndex,sourceStore:=range plan.stores{ - plan.source=sourceStore - switch s.role{ + pendingFilter := filter.NewRegionPendingFilter() + baseRegionFilters := []filter.RegionFilter{downFilter, replicaFilter, snapshotFilter, pendingFilter} + + for sourceIndex, sourceStore := range plan.stores { + plan.source = sourceStore + plan.sourceScore = plan.score(plan.source.GetID()) + if plan.sourceScore < plan.averageScore { + break + } + switch s.role { case Leader: - plan.region=filter.SelectOneRegion(cluster.RandLeaderRegions(plan.sourceStoreID(), s.conf.Ranges), nil,baseRegionFilters...) + plan.region = filter.SelectOneRegion(cluster.RandLeaderRegions(plan.sourceStoreID(), s.conf.Ranges), nil, baseRegionFilters...) case Learner: - plan.region=filter.SelectOneRegion(cluster.RandLearnerRegions(plan.sourceStoreID(), s.conf.Ranges), nil,baseRegionFilters...) + plan.region = filter.SelectOneRegion(cluster.RandLearnerRegions(plan.sourceStoreID(), s.conf.Ranges), nil, baseRegionFilters...) case Follower: - plan.region=filter.SelectOneRegion(cluster.RandFollowerRegions(plan.sourceStoreID(), s.conf.Ranges), nil,baseRegionFilters...) + plan.region = filter.SelectOneRegion(cluster.RandFollowerRegions(plan.sourceStoreID(), s.conf.Ranges), nil, baseRegionFilters...) } if plan.region == nil { balanceRangeNoRegionCounter.Inc() @@ -203,89 +214,171 @@ func (s *balanceRangeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun b } // Check region leader if plan.region.GetLeader() == nil { - log.Warn("region have no leader", zap.String("scheduler", s.GetName()), zap.Uint64("region-id", solver.Region.GetID())) + log.Warn("region have no leader", zap.String("scheduler", s.GetName()), zap.Uint64("region-id", plan.region.GetID())) balanceRangeNoLeaderCounter.Inc() continue } plan.fit = replicaFilter.(*filter.RegionReplicatedFilter).GetFit() if op := s.transferPeer(plan, plan.stores[sourceIndex+1:]); op != nil { - op.Counters = append(op.Counters, balanceRegionNewOpCounter) + op.Counters = append(op.Counters, balanceRangeNewOperatorCounter) return []*operator.Operator{op}, nil } } if err != nil { log.Error("failed to prepare balance key range scheduler", errs.ZapError(err)) - return nil,nil + return nil, nil } + return nil, nil } // transferPeer selects the best store to create a new peer to replace the old peer. -func (s *balanceRangeScheduler) transferPeer(plan *balanceRangeSchedulerPlan, dstStores []*storeInfo) *operator.Operator { +func (s *balanceRangeScheduler) transferPeer(plan *balanceRangeSchedulerPlan, dstStores []*core.StoreInfo) *operator.Operator { excludeTargets := plan.region.GetStoreIDs() - if s.role!=Leader{ - excludeTargets = append(excludeTargets, plan.sourceStoreID()) + if s.role != Leader { + excludeTargets = make(map[uint64]struct{}) + } + conf := plan.GetSchedulerConfig() + filters := []filter.Filter{ + filter.NewExcludedFilter(s.GetName(), nil, excludeTargets), + filter.NewPlacementSafeguard(s.GetName(), conf, plan.GetBasicCluster(), plan.GetRuleManager(), plan.region, plan.source, plan.fit), } + candidates := filter.NewCandidates(s.R, dstStores).FilterTarget(conf, nil, s.filterCounter, filters...) + for i := range candidates.Stores { + plan.target = candidates.Stores[len(candidates.Stores)-i-1] + plan.targetScore = plan.score(plan.target.GetID()) + if plan.targetScore > plan.averageScore { + break + } + regionID := plan.region.GetID() + sourceID := plan.source.GetID() + targetID := plan.target.GetID() + if !plan.shouldBalance(s.GetName()) { + continue + } + log.Debug("candidate store", zap.Uint64("region-id", regionID), zap.Uint64("source-store", sourceID), zap.Uint64("target-store", targetID)) + + oldPeer := plan.region.GetStorePeer(sourceID) + newPeer := &metapb.Peer{StoreId: plan.target.GetID(), Role: oldPeer.Role} + op, err := operator.CreateMovePeerOperator(s.GetName(), plan, plan.region, operator.OpRange, oldPeer.GetStoreId(), newPeer) + if err != nil { + balanceRangeCreateOpFailCounter.Inc() + return nil + } + sourceLabel := strconv.FormatUint(sourceID, 10) + targetLabel := strconv.FormatUint(targetID, 10) + op.FinishedCounters = append(op.FinishedCounters, + balanceDirectionCounter.WithLabelValues(s.GetName(), sourceLabel, targetLabel), + ) + op.SetAdditionalInfo("sourceScore", strconv.FormatInt(plan.sourceScore, 10)) + op.SetAdditionalInfo("targetScore", strconv.FormatInt(plan.targetScore, 10)) + return op + } + balanceRangeNoReplacementCounter.Inc() return nil } // balanceRangeSchedulerPlan is used to record the plan of balance key range scheduler. type balanceRangeSchedulerPlan struct { + sche.SchedulerCluster // stores is sorted by score desc - stores []*storeInfo - source *storeInfo - target *storeInfo - region *core.RegionInfo - fit *placement.RegionFit + stores []*core.StoreInfo + // sourceMap records the storeID -> score + sourceMap map[uint64]int64 + source *core.StoreInfo + sourceScore int64 + target *core.StoreInfo + targetScore int64 + region *core.RegionInfo + fit *placement.RegionFit + averageScore int64 } type storeInfo struct { store *core.StoreInfo - score uint64 + score int64 } -func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster)(*balanceRangeSchedulerPlan,error) { +func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluence operator.OpInfluence) (*balanceRangeSchedulerPlan, error) { krs := core.NewKeyRanges(s.conf.Ranges) scanRegions, err := cluster.BatchScanRegions(krs) if err != nil { - return nil,err + return nil, err } sources := filter.SelectSourceStores(cluster.GetStores(), s.filters, cluster.GetSchedulerConfig(), nil, nil) - storeInfos:=make(map[uint64]*storeInfo,len(sources)) + storeInfos := make(map[uint64]*storeInfo, len(sources)) for _, source := range sources { storeInfos[source.GetID()] = &storeInfo{store: source} } + totalScore := int64(0) for _, region := range scanRegions { for _, peer := range s.role.getPeers(region) { storeInfos[peer.GetStoreId()].score += 1 + totalScore += 1 } } - stores:=make([]*storeInfo,0,len(storeInfos)) - for _, store := range storeInfos { - stores = append(stores, store) + storeList := make([]*storeInfo, 0, len(storeInfos)) + for storeID, store := range storeInfos { + if influence := opInfluence.GetStoreInfluence(storeID); influence != nil { + store.score += s.role.getStoreInfluence(influence) + } + storeList = append(storeList, store) } - sort.Slice(stores, func(i, j int) bool { - return stores[i].score > stores[j].score + sort.Slice(storeList, func(i, j int) bool { + return storeList[i].score > storeList[j].score }) + sourceMap := make(map[uint64]int64) + for _, store := range storeList { + sourceMap[store.store.GetID()] = store.score + } + + stores := make([]*core.StoreInfo, 0, len(storeList)) + for _, store := range storeList { + stores = append(stores, store.store) + } + averageScore := totalScore / int64(len(storeList)) return &balanceRangeSchedulerPlan{ - stores:stores, - source: nil, - target: nil, - region: nil, - },nil + SchedulerCluster: cluster, + stores: stores, + sourceMap: sourceMap, + source: nil, + target: nil, + region: nil, + averageScore: averageScore, + }, nil } func (p *balanceRangeSchedulerPlan) sourceStoreID() uint64 { - return p.source.store.GetID() + return p.source.GetID() } func (p *balanceRangeSchedulerPlan) targetStoreID() uint64 { - return p.target.store.GetID() + return p.target.GetID() } +func (p *balanceRangeSchedulerPlan) score(storeID uint64) int64 { + return p.sourceMap[storeID] +} +func (p *balanceRangeSchedulerPlan) shouldBalance(scheduler string) bool { + sourceScore := p.score(p.sourceStoreID()) + targetScore := p.score(p.targetStoreID()) + shouldBalance := sourceScore > targetScore + if !shouldBalance && log.GetLevel() <= zap.DebugLevel { + log.Debug("skip balance ", + zap.String("scheduler", scheduler), + zap.Uint64("region-id", p.region.GetID()), + zap.Uint64("source-store", p.sourceStoreID()), + zap.Uint64("target-store", p.targetStoreID()), + zap.Int64("source-score", p.sourceScore), + zap.Int64("target-score", p.targetScore), + zap.Int64("average-region-size", p.averageScore), + ) + } + return shouldBalance +} type Role int @@ -310,33 +403,33 @@ func (r Role) String() string { } } -func NewRole(role string) Role { - switch role { - case "leader": - return Leader - case "follower": - return Follower - case "learner": - return Learner +func (r Role) getPeers(region *core.RegionInfo) []*metapb.Peer { + switch r { + case Leader: + return []*metapb.Peer{region.GetLeader()} + case Follower: + followers := region.GetFollowers() + ret := make([]*metapb.Peer, len(followers)) + for _, peer := range followers { + ret = append(ret, peer) + } + return ret + case Learner: + return region.GetLearners() default: - return Unknown + return nil } } -func (r Role) getPeers(region *core.RegionInfo) []*metapb.Peer {{ +func (r Role) getStoreInfluence(influence *operator.StoreInfluence) int64 { switch r { case Leader: - return []*metapb.Peer{region.GetLeader()} + return influence.LeaderCount case Follower: - followers:=region.GetFollowers() - ret:=make([]*metapb.Peer,len(followers)) - for _,peer:=range followers{ - ret=append(ret,peer) - } - return ret + return influence.RegionCount case Learner: - return region.GetLearners() + return influence.RegionCount default: - return nil + return 0 } } diff --git a/pkg/schedule/schedulers/metrics.go b/pkg/schedule/schedulers/metrics.go index 38c61443179..8297f3e30c0 100644 --- a/pkg/schedule/schedulers/metrics.go +++ b/pkg/schedule/schedulers/metrics.go @@ -347,10 +347,12 @@ var ( transferWitnessLeaderNewOperatorCounter = transferWitnessLeaderCounterWithEvent("new-operator") transferWitnessLeaderNoTargetStoreCounter = transferWitnessLeaderCounterWithEvent("no-target-store") - balanceRangeCounter = balanceRangeCounterWithEvent("schedule") - balanceKeyRangeNewOperatorCounter = balanceRangeCounterWithEvent("new-operator") - balanceRangeExpiredCounter = balanceRangeCounterWithEvent("expired") - balanceRangeNoRegionCounter = balanceRangeCounterWithEvent("no-region") - balanceRangeHotCounter = balanceRangeCounterWithEvent("region-hot") - balanceRangeNoLeaderCounter = balanceRangeCounterWithEvent("no-leader") + balanceRangeCounter = balanceRangeCounterWithEvent("schedule") + balanceRangeNewOperatorCounter = balanceRangeCounterWithEvent("new-operator") + balanceRangeExpiredCounter = balanceRangeCounterWithEvent("expired") + balanceRangeNoRegionCounter = balanceRangeCounterWithEvent("no-region") + balanceRangeHotCounter = balanceRangeCounterWithEvent("region-hot") + balanceRangeNoLeaderCounter = balanceRangeCounterWithEvent("no-leader") + balanceRangeCreateOpFailCounter = balanceRangeCounterWithEvent("create-operator-fail") + balanceRangeNoReplacementCounter = balanceRangeCounterWithEvent("no-replacement") ) From 5d5ee0fe261a126d33a9d48a275fd46516bf835c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Wed, 22 Jan 2025 15:53:57 +0800 Subject: [PATCH 11/18] add table configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/schedule/schedulers/balance_range.go | 18 ++++++++++-------- pkg/schedule/schedulers/init.go | 7 ++++++- server/api/scheduler.go | 10 ++++++++-- tools/pd-ctl/pdctl/command/scheduler.go | 13 +++++++------ tools/pd-ctl/tests/scheduler/scheduler_test.go | 6 +++--- 5 files changed, 34 insertions(+), 20 deletions(-) diff --git a/pkg/schedule/schedulers/balance_range.go b/pkg/schedule/schedulers/balance_range.go index 96e015e91c4..6e81633b80d 100644 --- a/pkg/schedule/schedulers/balance_range.go +++ b/pkg/schedule/schedulers/balance_range.go @@ -68,10 +68,11 @@ type balanceRangeSchedulerConfig struct { } type balanceRangeSchedulerParam struct { - Role string `json:"role"` - Engine string `json:"engine"` - Timeout time.Duration `json:"timeout"` - Ranges []core.KeyRange `json:"ranges"` + Role string `json:"role"` + Engine string `json:"engine"` + Timeout time.Duration `json:"timeout"` + Ranges []core.KeyRange `json:"ranges"` + TableName string `json:"table-name"` } func (conf *balanceRangeSchedulerConfig) clone() *balanceRangeSchedulerParam { @@ -80,10 +81,11 @@ func (conf *balanceRangeSchedulerConfig) clone() *balanceRangeSchedulerParam { ranges := make([]core.KeyRange, len(conf.Ranges)) copy(ranges, conf.Ranges) return &balanceRangeSchedulerParam{ - Ranges: ranges, - Role: conf.Role, - Engine: conf.Engine, - Timeout: conf.Timeout, + Ranges: ranges, + Role: conf.Role, + Engine: conf.Engine, + Timeout: conf.Timeout, + TableName: conf.TableName, } } diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index 45e456efeb3..d34a7f89168 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -575,7 +575,11 @@ func schedulersRegister() { if err != nil { return errs.ErrURLParse.Wrap(err) } - ranges, err := getKeyRanges(args[3:]) + tableName, err := url.QueryUnescape(args[3]) + if err != nil { + return errs.ErrURLParse.Wrap(err) + } + ranges, err := getKeyRanges(args[4:]) if err != nil { return err } @@ -583,6 +587,7 @@ func schedulersRegister() { conf.Engine = engine conf.Role = role conf.Timeout = duration + conf.TableName = tableName return nil } }) diff --git a/server/api/scheduler.go b/server/api/scheduler.go index d9f8aa6518d..867772e4e95 100644 --- a/server/api/scheduler.go +++ b/server/api/scheduler.go @@ -113,6 +113,7 @@ func (h *schedulerHandler) CreateScheduler(w http.ResponseWriter, r *http.Reques h.r.JSON(w, http.StatusInternalServerError, err.Error()) return } + defaultTimeout := "1h" if err := apiutil.CollectStringOption("timeout", input, collector); err != nil { if errors.ErrorEqual(err, errs.ErrOptionNotExist) { @@ -123,12 +124,17 @@ func (h *schedulerHandler) CreateScheduler(w http.ResponseWriter, r *http.Reques } } - if err := apiutil.CollectEscapeStringOption("start_key", input, collector); err != nil { + if err := apiutil.CollectStringOption("table-name", input, collector); err != nil { h.r.JSON(w, http.StatusInternalServerError, err.Error()) return } - if err := apiutil.CollectEscapeStringOption("end_key", input, collector); err != nil { + if err := apiutil.CollectEscapeStringOption("start-key", input, collector); err != nil { + h.r.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + + if err := apiutil.CollectEscapeStringOption("end-key", input, collector); err != nil { h.r.JSON(w, http.StatusInternalServerError, err.Error()) return } diff --git a/tools/pd-ctl/pdctl/command/scheduler.go b/tools/pd-ctl/pdctl/command/scheduler.go index e2bbb09120c..337f75437d8 100644 --- a/tools/pd-ctl/pdctl/command/scheduler.go +++ b/tools/pd-ctl/pdctl/command/scheduler.go @@ -378,7 +378,7 @@ func NewBalanceWitnessSchedulerCommand() *cobra.Command { // NewBalanceRangeSchedulerCommand returns a command to add a balance-range-scheduler. func NewBalanceRangeSchedulerCommand() *cobra.Command { c := &cobra.Command{ - Use: "balance-range-scheduler [--format=raw|encode|hex] ", + Use: "balance-range-scheduler [--format=raw|encode|hex] ", Short: "add a scheduler to balance region for given range", Run: addSchedulerForBalanceRangeCommandFunc, Deprecated: "balance-range will be deprecated in the future, please use sql instead", @@ -426,16 +426,16 @@ func addSchedulerForGrantHotRegionCommandFunc(cmd *cobra.Command, args []string) } func addSchedulerForBalanceRangeCommandFunc(cmd *cobra.Command, args []string) { - if len(args) != 4 { + if len(args) != 5 { cmd.Println(cmd.UsageString()) return } - startKey, err := parseKey(cmd.Flags(), args[2]) + startKey, err := parseKey(cmd.Flags(), args[3]) if err != nil { cmd.Println("Error: ", err) return } - endKey, err := parseKey(cmd.Flags(), args[3]) + endKey, err := parseKey(cmd.Flags(), args[4]) if err != nil { cmd.Println("Error: ", err) return @@ -445,8 +445,9 @@ func addSchedulerForBalanceRangeCommandFunc(cmd *cobra.Command, args []string) { input["name"] = cmd.Name() input["engine"] = args[0] input["role"] = args[1] - input["start_key"] = url.QueryEscape(startKey) - input["end_key"] = url.QueryEscape(endKey) + input["table-name"] = args[2] + input["start-key"] = url.QueryEscape(startKey) + input["end-key"] = url.QueryEscape(endKey) postJSON(cmd, schedulersPrefix, input) } diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go index 1d011329c42..46df4a4b064 100644 --- a/tools/pd-ctl/tests/scheduler/scheduler_test.go +++ b/tools/pd-ctl/tests/scheduler/scheduler_test.go @@ -544,19 +544,19 @@ func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestClust // test balance key range scheduler echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-range-scheduler"}, nil) re.NotContains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-range-scheduler", "--format=raw", "tiflash", "learner", "a", "b"}, nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-range-scheduler", "--format=raw", "tiflash", "learner", "test", "a", "b"}, nil) re.Contains(echo, "Success!") conf = make(map[string]any) testutil.Eventually(re, func() bool { mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-range-scheduler"}, &conf) - return conf["role"] == "learner" && conf["engine"] == "tiflash" + return conf["role"] == "learner" && conf["engine"] == "tiflash" && conf["table-name"] == "test" }) re.Equal(float64(time.Hour.Nanoseconds()), conf["timeout"]) ranges := conf["ranges"].([]any)[0].(map[string]any) re.Equal(core.HexRegionKeyStr([]byte("a")), ranges["start-key"]) re.Equal(core.HexRegionKeyStr([]byte("b")), ranges["end-key"]) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-range-scheduler", "--format=raw", "tiflash", "learner", "a", "b"}, nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-range-scheduler", "--format=raw", "tiflash", "learner", "learner", "a", "b"}, nil) re.Contains(echo, "400") re.Contains(echo, "scheduler already exists") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-range-scheduler"}, nil) From 6a440babbb4dc5624ccaeca33580eb2de8e795e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Wed, 22 Jan 2025 17:00:09 +0800 Subject: [PATCH 12/18] add test for getPeets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/schedule/schedulers/balance_range.go | 21 ++++++- pkg/schedule/schedulers/balance_range_test.go | 55 +++++++++++++++++++ 2 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 pkg/schedule/schedulers/balance_range_test.go diff --git a/pkg/schedule/schedulers/balance_range.go b/pkg/schedule/schedulers/balance_range.go index 87ff98038e5..2b39a2b8494 100644 --- a/pkg/schedule/schedulers/balance_range.go +++ b/pkg/schedule/schedulers/balance_range.go @@ -15,7 +15,6 @@ package schedulers import ( - "go.uber.org/zap" "net/http" "sort" "strconv" @@ -26,6 +25,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" + "go.uber.org/zap" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/core/constant" @@ -168,6 +168,7 @@ func newBalanceRangeScheduler(opController *operator.Controller, conf *balanceRa s.filters = []filter.Filter{ filter.NewEngineFilter(balanceRangeName, f), } + s.role = newRole(s.conf.Role) s.filterCounter = filter.NewCounter(s.GetName()) return s @@ -405,19 +406,33 @@ func (r Role) String() string { } } +func newRole(role string) Role { + switch role { + case "leader": + return Leader + case "follower": + return Follower + case "learner": + return Learner + default: + return Unknown + } +} + func (r Role) getPeers(region *core.RegionInfo) []*metapb.Peer { switch r { case Leader: return []*metapb.Peer{region.GetLeader()} case Follower: followers := region.GetFollowers() - ret := make([]*metapb.Peer, len(followers)) + ret := make([]*metapb.Peer, 0, len(followers)) for _, peer := range followers { ret = append(ret, peer) } return ret case Learner: - return region.GetLearners() + learners := region.GetLearners() + return learners default: return nil } diff --git a/pkg/schedule/schedulers/balance_range_test.go b/pkg/schedule/schedulers/balance_range_test.go new file mode 100644 index 00000000000..e63961d9f6f --- /dev/null +++ b/pkg/schedule/schedulers/balance_range_test.go @@ -0,0 +1,55 @@ +// Copyright 2025 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package schedulers + +import ( + "testing" + + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/stretchr/testify/require" + + "github.com/tikv/pd/pkg/core" +) + +func TestGetPeers(t *testing.T) { + re := require.New(t) + learner := &metapb.Peer{StoreId: 1, Id: 1, Role: metapb.PeerRole_Learner} + leader := &metapb.Peer{StoreId: 2, Id: 2} + follower1 := &metapb.Peer{StoreId: 3, Id: 3} + follower2 := &metapb.Peer{StoreId: 4, Id: 4} + region := core.NewRegionInfo(&metapb.Region{Id: 100, Peers: []*metapb.Peer{ + leader, follower1, follower2, learner, + }}, leader, core.WithLearners([]*metapb.Peer{learner})) + for _, v := range []struct { + role string + peers []*metapb.Peer + }{ + { + role: "leader", + peers: []*metapb.Peer{leader}, + }, + { + role: "follower", + peers: []*metapb.Peer{follower1, follower2}, + }, + { + role: "learner", + peers: []*metapb.Peer{learner}, + }, + } { + role := newRole(v.role) + re.Equal(v.peers, role.getPeers(region)) + } +} From 1e1d9342bcf11fcc06290c8cd2a9063cda47462c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Tue, 11 Feb 2025 16:18:18 +0800 Subject: [PATCH 13/18] add test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/schedule/schedulers/balance_range.go | 9 ++- pkg/schedule/schedulers/balance_range_test.go | 79 ++++++++++++++++++- 2 files changed, 83 insertions(+), 5 deletions(-) diff --git a/pkg/schedule/schedulers/balance_range.go b/pkg/schedule/schedulers/balance_range.go index 2b39a2b8494..07c392fff24 100644 --- a/pkg/schedule/schedulers/balance_range.go +++ b/pkg/schedule/schedulers/balance_range.go @@ -341,7 +341,10 @@ func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluen for _, store := range storeList { stores = append(stores, store.store) } - averageScore := totalScore / int64(len(storeList)) + averageScore := int64(0) + if len(storeList) != 0 { + averageScore = totalScore / int64(len(storeList)) + } return &balanceRangeSchedulerPlan{ SchedulerCluster: cluster, stores: stores, @@ -366,9 +369,7 @@ func (p *balanceRangeSchedulerPlan) score(storeID uint64) int64 { } func (p *balanceRangeSchedulerPlan) shouldBalance(scheduler string) bool { - sourceScore := p.score(p.sourceStoreID()) - targetScore := p.score(p.targetStoreID()) - shouldBalance := sourceScore > targetScore + shouldBalance := p.sourceScore > p.targetScore if !shouldBalance && log.GetLevel() <= zap.DebugLevel { log.Debug("skip balance ", zap.String("scheduler", scheduler), diff --git a/pkg/schedule/schedulers/balance_range_test.go b/pkg/schedule/schedulers/balance_range_test.go index e63961d9f6f..f2bd9116551 100644 --- a/pkg/schedule/schedulers/balance_range_test.go +++ b/pkg/schedule/schedulers/balance_range_test.go @@ -15,14 +15,40 @@ package schedulers import ( + "context" + "github.com/tikv/pd/pkg/schedule/types" + "github.com/tikv/pd/pkg/storage" "testing" - "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/mock/mockcluster" + "github.com/tikv/pd/pkg/schedule/operator" ) +type balanceRangeSchedulerTestSuite struct { + suite.Suite + cancel context.CancelFunc + tc *mockcluster.Cluster + oc *operator.Controller +} + +func TestBalanceRangeSchedulerTestSuite(t *testing.T) { + suite.Run(t, new(balanceRangeSchedulerTestSuite)) +} + +func (suite *balanceRangeSchedulerTestSuite) SetupTest() { + suite.cancel, _, suite.tc, suite.oc = prepareSchedulersTest() +} + +func (suite *balanceRangeSchedulerTestSuite) TearDownTest() { + suite.cancel() +} + func TestGetPeers(t *testing.T) { re := require.New(t) learner := &metapb.Peer{StoreId: 1, Id: 1, Role: metapb.PeerRole_Learner} @@ -53,3 +79,54 @@ func TestGetPeers(t *testing.T) { re.Equal(v.peers, role.getPeers(region)) } } + +func TestBalanceRangeShouldBalance(t *testing.T) { + re := require.New(t) + for _, v := range []struct { + sourceScore int64 + targetScore int64 + shouldBalance bool + }{ + { + 100, + 10, + true, + }, + { + 10, + 10, + false, + }, + } { + plan := balanceRangeSchedulerPlan{ + sourceScore: v.sourceScore, + targetScore: v.targetScore, + } + re.Equal(plan.shouldBalance(balanceRangeName), v.shouldBalance) + } +} + +//func TestBalanceRangePrepare(t *testing.T) { +// re := require.New(t) +// cancel, _, tc, oc := prepareSchedulersTest() +// defer cancel() +// // args: [role, engine, timeout, range1, range2, ...] +//} + +func TestBalanceRangeSchedule(t *testing.T) { + re := require.New(t) + cancel, _, tc, oc := prepareSchedulersTest() + defer cancel() + // args: [role, engine, timeout, range1, range2, ...] + scheduler, err := CreateScheduler(types.BalanceRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), ConfigSliceDecoder(types.BalanceRangeScheduler, []string{"leader", "tikv", "1h", "100", "200"})) + re.Nil(err) + op, _ := scheduler.Schedule(tc, true) + re.Empty(op) + for i := 0; i <= 4; i++ { + tc.AddLeaderStore(uint64(int64(i)), i*10) + } + tc.AddLeaderRegionWithRange(1, "100", "100", 1, 2, 3, 4) + tc.AddLeaderRegionWithRange(2, "110", "120", 1, 2, 3, 4) + op, _ = scheduler.Schedule(tc, true) + re.NotEmpty(op) +} From d0f2caea9696660af33c3354014f7bbaf149074c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Thu, 13 Feb 2025 10:20:44 +0800 Subject: [PATCH 14/18] add test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/schedule/schedulers/balance_range.go | 94 ++++++++-- pkg/schedule/schedulers/balance_range_test.go | 174 +++++++++++++----- pkg/schedule/schedulers/init.go | 18 +- 3 files changed, 218 insertions(+), 68 deletions(-) diff --git a/pkg/schedule/schedulers/balance_range.go b/pkg/schedule/schedulers/balance_range.go index db02cdd0431..25546c8f07d 100644 --- a/pkg/schedule/schedulers/balance_range.go +++ b/pkg/schedule/schedulers/balance_range.go @@ -77,7 +77,7 @@ type balanceRangeSchedulerConfig struct { type balanceRangeSchedulerJob struct { JobID uint64 `json:"job-id"` Role Role `json:"role"` - Engine string `json:"engine"` + Engine Engine `json:"engine"` Timeout time.Duration `json:"timeout"` Ranges []core.KeyRange `json:"ranges"` Alias string `json:"alias"` @@ -235,7 +235,7 @@ func (s *balanceRangeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun b downFilter := filter.NewRegionDownFilter() replicaFilter := filter.NewRegionReplicatedFilter(cluster) - snapshotFilter := filter.NewSnapshotSendFilter(plan.stores, constant.Medium) + snapshotFilter := filter.NewSnapshotSendFilter(cluster.GetStores(), constant.Medium) pendingFilter := filter.NewRegionPendingFilter() baseRegionFilters := []filter.RegionFilter{downFilter, replicaFilter, snapshotFilter, pendingFilter} @@ -276,19 +276,13 @@ func (s *balanceRangeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun b return []*operator.Operator{op}, nil } } - - if err != nil { - log.Error("failed to prepare balance key range scheduler", errs.ZapError(err)) - return nil, nil - - } return nil, nil } // transferPeer selects the best store to create a new peer to replace the old peer. func (s *balanceRangeScheduler) transferPeer(plan *balanceRangeSchedulerPlan, dstStores []*core.StoreInfo) *operator.Operator { excludeTargets := plan.region.GetStoreIDs() - if plan.job.Role != leader { + if plan.job.Role == leader { excludeTargets = make(map[uint64]struct{}) } conf := plan.GetSchedulerConfig() @@ -312,8 +306,25 @@ func (s *balanceRangeScheduler) transferPeer(plan *balanceRangeSchedulerPlan, ds log.Debug("candidate store", zap.Uint64("region-id", regionID), zap.Uint64("source-store", sourceID), zap.Uint64("target-store", targetID)) oldPeer := plan.region.GetStorePeer(sourceID) - newPeer := &metapb.Peer{StoreId: plan.target.GetID(), Role: oldPeer.Role} - op, err := operator.CreateMovePeerOperator(s.GetName(), plan, plan.region, operator.OpRange, oldPeer.GetStoreId(), newPeer) + exist := false + if plan.job.Role == leader { + peers := plan.region.GetPeers() + for _, peer := range peers { + if peer.GetStoreId() == targetID { + exist = true + break + } + } + } + var op *operator.Operator + var err error + if exist { + op, err = operator.CreateTransferLeaderOperator(s.GetName(), plan, plan.region, plan.targetStoreID(), []uint64{}, operator.OpRange) + } else { + newPeer := &metapb.Peer{StoreId: plan.target.GetID(), Role: oldPeer.Role} + op, err = operator.CreateMovePeerOperator(s.GetName(), plan, plan.region, operator.OpRange, oldPeer.GetStoreId(), newPeer) + } + if err != nil { balanceRangeCreateOpFailCounter.Inc() return nil @@ -336,8 +347,8 @@ type balanceRangeSchedulerPlan struct { sche.SchedulerCluster // stores is sorted by score desc stores []*core.StoreInfo - // sourceMap records the storeID -> score - sourceMap map[uint64]int64 + // scoreMap records the storeID -> score + scoreMap map[uint64]int64 source *core.StoreInfo sourceScore int64 target *core.StoreInfo @@ -359,7 +370,14 @@ func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluen if err != nil { return nil, err } - sources := filter.SelectSourceStores(cluster.GetStores(), s.filters, cluster.GetSchedulerConfig(), nil, nil) + filters := s.filters + if job.Engine == TiFlash { + filters = append(filters, filter.NewEngineFilter(balanceRangeName, filter.TiFlashEngineConstraint)) + } + sources := filter.SelectSourceStores(cluster.GetStores(), filters, cluster.GetSchedulerConfig(), nil, nil) + if sources == nil { + return nil, errs.ErrStoresNotEnough.FastGenByArgs("no store to select") + } storeInfos := make(map[uint64]*storeInfo, len(sources)) for _, source := range sources { storeInfos[source.GetID()] = &storeInfo{store: source} @@ -380,7 +398,10 @@ func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluen storeList = append(storeList, store) } sort.Slice(storeList, func(i, j int) bool { - return storeList[i].score > storeList[j].score + role := job.Role + iop := role.getStoreInfluence(opInfluence.GetStoreInfluence(storeList[i].store.GetID())) + jop := role.getStoreInfluence(opInfluence.GetStoreInfluence(storeList[j].store.GetID())) + return storeList[i].score+iop > storeList[j].score+jop }) sourceMap := make(map[uint64]int64) for _, store := range storeList { @@ -398,7 +419,7 @@ func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluen return &balanceRangeSchedulerPlan{ SchedulerCluster: cluster, stores: stores, - sourceMap: sourceMap, + scoreMap: sourceMap, source: nil, target: nil, region: nil, @@ -416,7 +437,7 @@ func (p *balanceRangeSchedulerPlan) targetStoreID() uint64 { } func (p *balanceRangeSchedulerPlan) score(storeID uint64) int64 { - return p.sourceMap[storeID] + return p.scoreMap[storeID] } func (p *balanceRangeSchedulerPlan) shouldBalance(scheduler string) bool { @@ -459,6 +480,41 @@ func (r Role) String() string { } } +type Engine int + +const ( + TiKV Engine = iota + TiFlash + Unknown +) + +func (e Engine) String() string { + switch e { + case TiKV: + return "tikv" + case TiFlash: + return "tiflash" + default: + return "unknown" + } +} + +func (e Engine) MarshalJSON() ([]byte, error) { + return []byte(`"` + e.String() + `"`), nil +} + +// NewEngine creates a new engine. +func NewEngine(role string) Engine { + switch role { + case "tikv": + return TiKV + case "tiflash": + return TiFlash + default: + return Unknown + } +} + // JobStatus is the status of the job. type JobStatus int @@ -489,11 +545,11 @@ func (s JobStatus) MarshalJSON() ([]byte, error) { func NewRole(role string) Role { switch role { case "leader": - return learner + return leader case "follower": return follower case "learner": - return leader + return learner default: return unknown } diff --git a/pkg/schedule/schedulers/balance_range_test.go b/pkg/schedule/schedulers/balance_range_test.go index ee0900e2577..49bc348e15f 100644 --- a/pkg/schedule/schedulers/balance_range_test.go +++ b/pkg/schedule/schedulers/balance_range_test.go @@ -15,40 +15,20 @@ package schedulers import ( - "context" - "github.com/tikv/pd/pkg/schedule/types" - "github.com/tikv/pd/pkg/storage" - "testing" + "fmt" + "github.com/tikv/pd/pkg/schedule/operator" - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" + "testing" "github.com/pingcap/kvproto/pkg/metapb" + "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/core" - "github.com/tikv/pd/pkg/mock/mockcluster" - "github.com/tikv/pd/pkg/schedule/operator" + "github.com/tikv/pd/pkg/schedule/placement" + "github.com/tikv/pd/pkg/schedule/types" + "github.com/tikv/pd/pkg/storage" ) -type balanceRangeSchedulerTestSuite struct { - suite.Suite - cancel context.CancelFunc - tc *mockcluster.Cluster - oc *operator.Controller -} - -func TestBalanceRangeSchedulerTestSuite(t *testing.T) { - suite.Run(t, new(balanceRangeSchedulerTestSuite)) -} - -func (suite *balanceRangeSchedulerTestSuite) SetupTest() { - suite.cancel, _, suite.tc, suite.oc = prepareSchedulersTest() -} - -func (suite *balanceRangeSchedulerTestSuite) TearDownTest() { - suite.cancel() -} - func TestGetPeers(t *testing.T) { re := require.New(t) learner := &metapb.Peer{StoreId: 1, Id: 1, Role: metapb.PeerRole_Learner} @@ -80,6 +60,39 @@ func TestGetPeers(t *testing.T) { } } +func TestJobStatus(t *testing.T) { + re := require.New(t) + conf := balanceRangeSchedulerConfig{} + for _, v := range []struct { + jobStatus JobStatus + begin bool + finish bool + }{ + { + pending, + true, + false, + }, + { + running, + false, + true, + }, + { + finished, + false, + false, + }, + } { + job := &balanceRangeSchedulerJob{ + Status: v.jobStatus, + } + re.Equal(v.begin, conf.begin(job)) + job.Status = v.jobStatus + re.Equal(v.finish, conf.finish(job)) + } +} + func TestBalanceRangeShouldBalance(t *testing.T) { re := require.New(t) for _, v := range []struct { @@ -106,27 +119,100 @@ func TestBalanceRangeShouldBalance(t *testing.T) { } } -//func TestBalanceRangePrepare(t *testing.T) { -// re := require.New(t) -// cancel, _, tc, oc := prepareSchedulersTest() -// defer cancel() -// // args: [role, engine, timeout, range1, range2, ...] -//} +func TestBalanceRangePlan(t *testing.T) { + re := require.New(t) + cancel, _, tc, oc := prepareSchedulersTest() + defer cancel() + sc := newBalanceRangeScheduler(oc, &balanceRangeSchedulerConfig{}).(*balanceRangeScheduler) + for i := 1; i <= 3; i++ { + tc.AddLeaderStore(uint64(i), 0) + } + tc.AddLeaderRegionWithRange(1, "100", "110", 1, 2, 3) + job := &balanceRangeSchedulerJob{ + Engine: TiKV, + Role: leader, + Ranges: []core.KeyRange{core.NewKeyRange("100", "110")}, + } + plan, err := sc.prepare(tc, *operator.NewOpInfluence(), job) + re.NoError(err) + re.NotNil(plan) + re.Len(plan.stores, 3) + re.Len(plan.scoreMap, 3) + re.Equal(plan.scoreMap[1], int64(1)) +} -func TestBalanceRangeSchedule(t *testing.T) { +func TestTIKVEngine(t *testing.T) { re := require.New(t) cancel, _, tc, oc := prepareSchedulersTest() defer cancel() - // args: [role, engine, timeout, range1, range2, ...] - scheduler, err := CreateScheduler(types.BalanceRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), ConfigSliceDecoder(types.BalanceRangeScheduler, []string{"leader", "tikv", "1h", "100", "200"})) + scheduler, err := CreateScheduler(types.BalanceRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), ConfigSliceDecoder(types.BalanceRangeScheduler, []string{"leader", "tikv", "1h", "test", "100", "200"})) re.Nil(err) - op, _ := scheduler.Schedule(tc, true) - re.Empty(op) - for i := 0; i <= 4; i++ { - tc.AddLeaderStore(uint64(int64(i)), i*10) + ops, _ := scheduler.Schedule(tc, true) + re.Empty(ops) + for i := 1; i <= 3; i++ { + tc.AddLeaderStore(uint64(i), 0) } - tc.AddLeaderRegionWithRange(1, "100", "100", 1, 2, 3, 4) - tc.AddLeaderRegionWithRange(2, "110", "120", 1, 2, 3, 4) - op, _ = scheduler.Schedule(tc, true) - re.NotEmpty(op) + // add regions: + // store-1: 3 leader regions + // store-2: 2 leader regions + // store-3: 1 leader regions + tc.AddLeaderRegionWithRange(1, "100", "110", 1, 2, 3) + tc.AddLeaderRegionWithRange(2, "110", "120", 1, 2, 3) + tc.AddLeaderRegionWithRange(3, "120", "140", 1, 2, 3) + tc.AddLeaderRegionWithRange(4, "140", "160", 2, 1, 3) + tc.AddLeaderRegionWithRange(5, "160", "180", 2, 1, 3) + tc.AddLeaderRegionWithRange(5, "180", "200", 3, 1, 2) + // case1: transfer leader from store 1 to store 3 + ops, _ = scheduler.Schedule(tc, true) + re.NotEmpty(ops) + op := ops[0] + re.Equal(op.GetAdditionalInfo("sourceScore"), "3") + re.Equal(op.GetAdditionalInfo("targetScore"), "1") + re.Contains(op.Brief(), "transfer leader: store 1 to 3") + tc.AddLeaderStore(4, 0) + + // case2: move peer from store 1 to store 4 + ops, _ = scheduler.Schedule(tc, true) + re.NotEmpty(ops) + op = ops[0] + re.Equal(op.GetAdditionalInfo("sourceScore"), "3") + re.Equal(op.GetAdditionalInfo("targetScore"), "0") + re.Contains(op.Brief(), "mv peer: store [1] to [4]") +} + +func TestTIFLASHEngine(t *testing.T) { + re := require.New(t) + cancel, _, tc, oc := prepareSchedulersTest() + defer cancel() + tikvCount := 3 + for i := 1; i <= tikvCount; i++ { + tc.AddLeaderStore(uint64(i), 0) + } + for i := tikvCount + 1; i <= tikvCount+3; i++ { + tc.AddLabelsStore(uint64(i), 0, map[string]string{"engine": "tiflash"}) + } + for i := 1; i <= 3; i++ { + tc.AddRegionWithLearner(uint64(i), 1, []uint64{2, 3}, []uint64{4}) + } + startKey := fmt.Sprintf("%20d0", 1) + endKey := fmt.Sprintf("%20d0", 10) + tc.RuleManager.SetRule(&placement.Rule{ + GroupID: "tiflash", + ID: "1", + Role: placement.Learner, + Count: 1, + StartKey: []byte(startKey), + EndKey: []byte(endKey), + LabelConstraints: []placement.LabelConstraint{ + {Key: "engine", Op: "in", Values: []string{"tiflash"}}, + }, + }) + + scheduler, err := CreateScheduler(types.BalanceRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), ConfigSliceDecoder(types.BalanceRangeScheduler, []string{"learner", "tiflash", "1h", "test", startKey, endKey})) + re.NoError(err) + ops, _ := scheduler.Schedule(tc, false) + re.NotEmpty(ops) + op := ops[0] + re.Equal(op.GetAdditionalInfo("sourceScore"), "3") + re.Contains(op.Brief(), "mv peer: store [4] to") } diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index 9d178a7bce1..b09b20d9659 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -559,18 +559,22 @@ func schedulersRegister() { if len(args) < 5 { return errs.ErrSchedulerConfig.FastGenByArgs("args length must be greater than 4") } - role, err := url.QueryUnescape(args[0]) + roleString, err := url.QueryUnescape(args[0]) if err != nil { return errs.ErrQueryUnescape.Wrap(err) } - jobRole := NewRole(role) - if jobRole == unknown { + role := NewRole(roleString) + if role == unknown { return errs.ErrQueryUnescape.FastGenByArgs("role") } - engine, err := url.QueryUnescape(args[1]) + engineString, err := url.QueryUnescape(args[1]) if err != nil { return errs.ErrQueryUnescape.Wrap(err) } + engine := NewEngine(engineString) + if engine == Unknown { + return errs.ErrQueryUnescape.FastGenByArgs("engine") + } timeout, err := url.QueryUnescape(args[2]) if err != nil { return errs.ErrQueryUnescape.Wrap(err) @@ -592,8 +596,12 @@ func schedulersRegister() { id = conf.jobs[len(conf.jobs)-1].JobID + 1 } + if engine == TiFlash && role != learner { + return errs.ErrURLParse.FastGenByArgs("TiFlash only support learner role") + } + job := &balanceRangeSchedulerJob{ - Role: jobRole, + Role: role, Engine: engine, Timeout: duration, Alias: alias, From 2854acd2b78cb49317d4a3b29efb826924e26521 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Thu, 13 Feb 2025 16:03:32 +0800 Subject: [PATCH 15/18] add more test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/core/constant/kind.go | 1 + pkg/schedule/filter/filters.go | 4 +- pkg/schedule/filter/filters_test.go | 10 +++ .../operator/operator_controller_test.go | 24 +++++ pkg/schedule/schedulers/balance_range.go | 88 +++++++++++++------ pkg/schedule/schedulers/balance_range_test.go | 72 +++++++-------- pkg/schedule/schedulers/init.go | 4 +- pkg/schedule/schedulers/metrics.go | 1 + server/api/scheduler.go | 4 - server/cluster/cluster_test.go | 12 --- tools/pd-ctl/pdctl/command/scheduler.go | 12 --- .../pd-ctl/tests/scheduler/scheduler_test.go | 10 --- 12 files changed, 134 insertions(+), 108 deletions(-) diff --git a/pkg/core/constant/kind.go b/pkg/core/constant/kind.go index 7e9d173c689..39c256c4f5d 100644 --- a/pkg/core/constant/kind.go +++ b/pkg/core/constant/kind.go @@ -66,6 +66,7 @@ const ( RegionKind // WitnessKind indicates the witness kind resource WitnessKind + // ResourceKindLen represents the ResourceKind count ResourceKindLen ) diff --git a/pkg/schedule/filter/filters.go b/pkg/schedule/filter/filters.go index 6363c903b35..2e1951f4690 100644 --- a/pkg/schedule/filter/filters.go +++ b/pkg/schedule/filter/filters.go @@ -932,8 +932,8 @@ var ( allSpecialEngines = []string{core.EngineTiFlash} // NotSpecialEngines is used to filter the special engine. NotSpecialEngines = placement.LabelConstraint{Key: core.EngineKey, Op: placement.NotIn, Values: allSpecialEngines} - // TiFlashEngineConstraint is used to filter the TiFlash engine. - TiFlashEngineConstraint = placement.LabelConstraint{Key: core.EngineKey, Op: placement.In, Values: allSpecialEngines} + // SpecialEngines is used to filter the TiFlash engine. + SpecialEngines = placement.LabelConstraint{Key: core.EngineKey, Op: placement.In, Values: allSpecialEngines} ) type isolationFilter struct { diff --git a/pkg/schedule/filter/filters_test.go b/pkg/schedule/filter/filters_test.go index c093f249083..481a39f97d3 100644 --- a/pkg/schedule/filter/filters_test.go +++ b/pkg/schedule/filter/filters_test.go @@ -474,6 +474,16 @@ func TestSpecialUseFilter(t *testing.T) { } } +func TestSpecialEngine(t *testing.T) { + re := require.New(t) + tiflash := core.NewStoreInfoWithLabel(1, map[string]string{core.EngineKey: core.EngineTiFlash}) + tikv := core.NewStoreInfoWithLabel(2, map[string]string{core.EngineKey: core.EngineTiKV}) + re.True(SpecialEngines.MatchStore(tiflash)) + re.False(SpecialEngines.MatchStore(tikv)) + re.True(NotSpecialEngines.MatchStore(tikv)) + re.False(NotSpecialEngines.MatchStore(tiflash)) +} + func BenchmarkCloneRegionTest(b *testing.B) { epoch := &metapb.RegionEpoch{ ConfVer: 1, diff --git a/pkg/schedule/operator/operator_controller_test.go b/pkg/schedule/operator/operator_controller_test.go index 3c9abe54f24..3026f0bbc45 100644 --- a/pkg/schedule/operator/operator_controller_test.go +++ b/pkg/schedule/operator/operator_controller_test.go @@ -653,6 +653,30 @@ func (suite *operatorControllerTestSuite) TestDispatchOutdatedRegion() { re.Equal(3, stream.MsgLength()) } +func (suite *operatorControllerTestSuite) TestInfluenceOpt() { + re := suite.Require() + cluster := mockcluster.NewCluster(suite.ctx, mockconfig.NewTestOptions()) + stream := hbstream.NewTestHeartbeatStreams(suite.ctx, cluster, false /* no need to run */) + controller := NewController(suite.ctx, cluster.GetBasicCluster(), cluster.GetSharedConfig(), stream) + cluster.AddLeaderRegionWithRange(1, "200", "300", 1, 2, 3) + op := &Operator{ + regionID: 1, + kind: OpRegion, + steps: []OpStep{ + AddLearner{ToStore: 2, PeerID: 2}, + }, + timeout: time.Minute, + } + re.True(controller.addOperatorInner(op)) + op.Start() + inf := controller.GetOpInfluence(cluster.GetBasicCluster()) + re.Len(inf.StoresInfluence, 1) + inf = controller.GetOpInfluence(cluster.GetBasicCluster(), WithRangeOption([]core.KeyRange{{StartKey: []byte("220"), EndKey: []byte("280")}})) + re.Empty(inf.StoresInfluence) + inf = controller.GetOpInfluence(cluster.GetBasicCluster(), WithRangeOption([]core.KeyRange{{StartKey: []byte("100"), EndKey: []byte("400")}})) + re.Len(inf.StoresInfluence, 1) +} + func (suite *operatorControllerTestSuite) TestCalcInfluence() { re := suite.Require() cluster := mockcluster.NewCluster(suite.ctx, mockconfig.NewTestOptions()) diff --git a/pkg/schedule/schedulers/balance_range.go b/pkg/schedule/schedulers/balance_range.go index 25546c8f07d..3c170fa17f5 100644 --- a/pkg/schedule/schedulers/balance_range.go +++ b/pkg/schedule/schedulers/balance_range.go @@ -22,10 +22,10 @@ import ( "github.com/gorilla/mux" "github.com/unrolled/render" + "go.uber.org/zap" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" - "go.uber.org/zap" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/core/constant" @@ -77,7 +77,7 @@ type balanceRangeSchedulerConfig struct { type balanceRangeSchedulerJob struct { JobID uint64 `json:"job-id"` Role Role `json:"role"` - Engine Engine `json:"engine"` + Engine engine `json:"engine"` Timeout time.Duration `json:"timeout"` Ranges []core.KeyRange `json:"ranges"` Alias string `json:"alias"` @@ -96,6 +96,10 @@ func (conf *balanceRangeSchedulerConfig) begin(job *balanceRangeSchedulerJob) bo now := time.Now() job.Start = &now job.Status = running + if err := conf.save(); err != nil { + log.Warn("failed to persist config", zap.Error(err), zap.Uint64("job-id", job.JobID)) + return false + } return true } @@ -107,10 +111,14 @@ func (conf *balanceRangeSchedulerConfig) finish(job *balanceRangeSchedulerJob) b } now := time.Now() job.Finish = &now + if err := conf.save(); err != nil { + log.Warn("failed to persist config", zap.Error(err), zap.Uint64("job-id", job.JobID)) + return false + } return true } -func (conf *balanceRangeSchedulerConfig) pop() *balanceRangeSchedulerJob { +func (conf *balanceRangeSchedulerConfig) peek() *balanceRangeSchedulerJob { conf.RLock() defer conf.RLock() for _, job := range conf.jobs { @@ -182,12 +190,12 @@ func (s *balanceRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) if !allowed { operator.IncOperatorLimitCounter(s.GetType(), operator.OpRange) } - job := s.conf.pop() + job := s.conf.peek() if job != nil { if job.Status == pending { s.conf.begin(job) } - if time.Now().Sub(*job.Start) > job.Timeout { + if time.Since(*job.Start) > job.Timeout { s.conf.finish(job) balanceRangeExpiredCounter.Inc() } @@ -220,12 +228,14 @@ func newBalanceRangeScheduler(opController *operator.Controller, conf *balanceRa } // Schedule schedules the balance key range operator. -func (s *balanceRangeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (s *balanceRangeScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { balanceRangeCounter.Inc() - job := s.conf.pop() + job := s.conf.peek() if job == nil { + balanceRangeNoJobCounter.Inc() return nil, nil } + opInfluence := s.OpController.GetOpInfluence(cluster.GetBasicCluster(), operator.WithRangeOption(job.Ranges)) plan, err := s.prepare(cluster, opInfluence, job) if err != nil { @@ -336,6 +346,7 @@ func (s *balanceRangeScheduler) transferPeer(plan *balanceRangeSchedulerPlan, ds ) op.SetAdditionalInfo("sourceScore", strconv.FormatInt(plan.sourceScore, 10)) op.SetAdditionalInfo("targetScore", strconv.FormatInt(plan.targetScore, 10)) + op.SetAdditionalInfo("tolerate", strconv.FormatInt(plan.tolerate, 10)) return op } balanceRangeNoReplacementCounter.Inc() @@ -357,6 +368,8 @@ type balanceRangeSchedulerPlan struct { fit *placement.RegionFit averageScore int64 job *balanceRangeSchedulerJob + opInfluence operator.OpInfluence + tolerate int64 } type storeInfo struct { @@ -371,8 +384,8 @@ func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluen return nil, err } filters := s.filters - if job.Engine == TiFlash { - filters = append(filters, filter.NewEngineFilter(balanceRangeName, filter.TiFlashEngineConstraint)) + if job.Engine == tiflash { + filters = append(filters, filter.NewEngineFilter(balanceRangeName, filter.SpecialEngines)) } sources := filter.SelectSourceStores(cluster.GetStores(), filters, cluster.GetSchedulerConfig(), nil, nil) if sources == nil { @@ -389,6 +402,10 @@ func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluen totalScore += 1 } } + tolerate := int64(float64(len(scanRegions)) * adjustRatio) + if tolerate < 1 { + tolerate = 1 + } storeList := make([]*storeInfo, 0, len(storeInfos)) for storeID, store := range storeInfos { @@ -425,6 +442,8 @@ func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluen region: nil, averageScore: averageScore, job: job, + opInfluence: opInfluence, + tolerate: tolerate, }, nil } @@ -441,16 +460,33 @@ func (p *balanceRangeSchedulerPlan) score(storeID uint64) int64 { } func (p *balanceRangeSchedulerPlan) shouldBalance(scheduler string) bool { - shouldBalance := p.sourceScore > p.targetScore + sourceInfluence := p.opInfluence.GetStoreInfluence(p.sourceStoreID()) + sourceInf := p.job.Role.getStoreInfluence(sourceInfluence) + if sourceInf < 0 { + sourceInf = -sourceInf + } + sourceScore := p.sourceScore - sourceInf - p.tolerate + + targetInfluence := p.opInfluence.GetStoreInfluence(p.targetStoreID()) + targetInf := p.job.Role.getStoreInfluence(targetInfluence) + if targetInf < 0 { + targetInf = -targetInf + } + targetScore := p.targetScore + targetInf + p.tolerate + + shouldBalance := sourceScore >= targetScore if !shouldBalance && log.GetLevel() <= zap.DebugLevel { log.Debug("skip balance ", zap.String("scheduler", scheduler), zap.Uint64("region-id", p.region.GetID()), zap.Uint64("source-store", p.sourceStoreID()), zap.Uint64("target-store", p.targetStoreID()), - zap.Int64("source-score", p.sourceScore), - zap.Int64("target-score", p.targetScore), + zap.Int64("origin-source-score", p.sourceScore), + zap.Int64("origin-target-score", p.targetScore), + zap.Int64("influence-source-score", sourceScore), + zap.Int64("influence-target-score", targetScore), zap.Int64("average-region-size", p.averageScore), + zap.Int64("tolerate", p.tolerate), ) } return shouldBalance @@ -480,38 +516,40 @@ func (r Role) String() string { } } -type Engine int +// engine is the engine of the store. +type engine int const ( - TiKV Engine = iota - TiFlash - Unknown + tiKV engine = iota + tiflash + notSupported ) -func (e Engine) String() string { +func (e engine) String() string { switch e { - case TiKV: + case tiKV: return "tikv" - case TiFlash: + case tiflash: return "tiflash" default: - return "unknown" + return "not-supported" } } -func (e Engine) MarshalJSON() ([]byte, error) { +// MarshalJSON marshals to json. +func (e engine) MarshalJSON() ([]byte, error) { return []byte(`"` + e.String() + `"`), nil } // NewEngine creates a new engine. -func NewEngine(role string) Engine { +func NewEngine(role string) engine { switch role { case "tikv": - return TiKV + return tiKV case "tiflash": - return TiFlash + return tiflash default: - return Unknown + return notSupported } } diff --git a/pkg/schedule/schedulers/balance_range_test.go b/pkg/schedule/schedulers/balance_range_test.go index 49bc348e15f..3193fe935ae 100644 --- a/pkg/schedule/schedulers/balance_range_test.go +++ b/pkg/schedule/schedulers/balance_range_test.go @@ -16,14 +16,14 @@ package schedulers import ( "fmt" - "github.com/tikv/pd/pkg/schedule/operator" - "testing" - "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/require" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/schedule/types" "github.com/tikv/pd/pkg/storage" @@ -61,8 +61,12 @@ func TestGetPeers(t *testing.T) { } func TestJobStatus(t *testing.T) { + s := storage.NewStorageWithMemoryBackend() re := require.New(t) - conf := balanceRangeSchedulerConfig{} + conf := &balanceRangeSchedulerConfig{ + schedulerConfig: &baseSchedulerConfig{}, + } + conf.init(balanceRangeName, s, conf) for _, v := range []struct { jobStatus JobStatus begin bool @@ -93,32 +97,6 @@ func TestJobStatus(t *testing.T) { } } -func TestBalanceRangeShouldBalance(t *testing.T) { - re := require.New(t) - for _, v := range []struct { - sourceScore int64 - targetScore int64 - shouldBalance bool - }{ - { - 100, - 10, - true, - }, - { - 10, - 10, - false, - }, - } { - plan := balanceRangeSchedulerPlan{ - sourceScore: v.sourceScore, - targetScore: v.targetScore, - } - re.Equal(plan.shouldBalance(balanceRangeName), v.shouldBalance) - } -} - func TestBalanceRangePlan(t *testing.T) { re := require.New(t) cancel, _, tc, oc := prepareSchedulersTest() @@ -129,7 +107,7 @@ func TestBalanceRangePlan(t *testing.T) { } tc.AddLeaderRegionWithRange(1, "100", "110", 1, 2, 3) job := &balanceRangeSchedulerJob{ - Engine: TiKV, + Engine: tiKV, Role: leader, Ranges: []core.KeyRange{core.NewKeyRange("100", "110")}, } @@ -138,7 +116,8 @@ func TestBalanceRangePlan(t *testing.T) { re.NotNil(plan) re.Len(plan.stores, 3) re.Len(plan.scoreMap, 3) - re.Equal(plan.scoreMap[1], int64(1)) + re.Equal(int64(1), plan.scoreMap[1]) + re.Equal(int64(1), plan.tolerate) } func TestTIKVEngine(t *testing.T) { @@ -146,7 +125,7 @@ func TestTIKVEngine(t *testing.T) { cancel, _, tc, oc := prepareSchedulersTest() defer cancel() scheduler, err := CreateScheduler(types.BalanceRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), ConfigSliceDecoder(types.BalanceRangeScheduler, []string{"leader", "tikv", "1h", "test", "100", "200"})) - re.Nil(err) + re.NoError(err) ops, _ := scheduler.Schedule(tc, true) re.Empty(ops) for i := 1; i <= 3; i++ { @@ -166,8 +145,8 @@ func TestTIKVEngine(t *testing.T) { ops, _ = scheduler.Schedule(tc, true) re.NotEmpty(ops) op := ops[0] - re.Equal(op.GetAdditionalInfo("sourceScore"), "3") - re.Equal(op.GetAdditionalInfo("targetScore"), "1") + re.Equal("3", op.GetAdditionalInfo("sourceScore")) + re.Equal("1", op.GetAdditionalInfo("targetScore")) re.Contains(op.Brief(), "transfer leader: store 1 to 3") tc.AddLeaderStore(4, 0) @@ -175,8 +154,8 @@ func TestTIKVEngine(t *testing.T) { ops, _ = scheduler.Schedule(tc, true) re.NotEmpty(ops) op = ops[0] - re.Equal(op.GetAdditionalInfo("sourceScore"), "3") - re.Equal(op.GetAdditionalInfo("targetScore"), "0") + re.Equal("3", op.GetAdditionalInfo("sourceScore")) + re.Equal("0", op.GetAdditionalInfo("targetScore")) re.Contains(op.Brief(), "mv peer: store [1] to [4]") } @@ -185,15 +164,15 @@ func TestTIFLASHEngine(t *testing.T) { cancel, _, tc, oc := prepareSchedulersTest() defer cancel() tikvCount := 3 + // 3 tikv and 3 tiflash for i := 1; i <= tikvCount; i++ { tc.AddLeaderStore(uint64(i), 0) } for i := tikvCount + 1; i <= tikvCount+3; i++ { tc.AddLabelsStore(uint64(i), 0, map[string]string{"engine": "tiflash"}) } - for i := 1; i <= 3; i++ { - tc.AddRegionWithLearner(uint64(i), 1, []uint64{2, 3}, []uint64{4}) - } + tc.AddRegionWithLearner(uint64(1), 1, []uint64{2, 3}, []uint64{4}) + startKey := fmt.Sprintf("%20d0", 1) endKey := fmt.Sprintf("%20d0", 10) tc.RuleManager.SetRule(&placement.Rule{ @@ -208,11 +187,22 @@ func TestTIFLASHEngine(t *testing.T) { }, }) + // generate a balance range scheduler with tiflash engine scheduler, err := CreateScheduler(types.BalanceRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), ConfigSliceDecoder(types.BalanceRangeScheduler, []string{"learner", "tiflash", "1h", "test", startKey, endKey})) re.NoError(err) + // tiflash-4 only has 1 region, so it doesn't need to balance ops, _ := scheduler.Schedule(tc, false) + re.Empty(ops) + + // add 2 learner on tiflash-4 + for i := 2; i <= 3; i++ { + tc.AddRegionWithLearner(uint64(i), 1, []uint64{2, 3}, []uint64{4}) + } + ops, _ = scheduler.Schedule(tc, false) re.NotEmpty(ops) op := ops[0] - re.Equal(op.GetAdditionalInfo("sourceScore"), "3") + re.Equal("3", op.GetAdditionalInfo("sourceScore")) + re.Equal("0", op.GetAdditionalInfo("targetScore")) + re.Equal("1", op.GetAdditionalInfo("tolerate")) re.Contains(op.Brief(), "mv peer: store [4] to") } diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index b09b20d9659..463bf87a363 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -572,7 +572,7 @@ func schedulersRegister() { return errs.ErrQueryUnescape.Wrap(err) } engine := NewEngine(engineString) - if engine == Unknown { + if engine == notSupported { return errs.ErrQueryUnescape.FastGenByArgs("engine") } timeout, err := url.QueryUnescape(args[2]) @@ -596,7 +596,7 @@ func schedulersRegister() { id = conf.jobs[len(conf.jobs)-1].JobID + 1 } - if engine == TiFlash && role != learner { + if engine == tiflash && role != learner { return errs.ErrURLParse.FastGenByArgs("TiFlash only support learner role") } diff --git a/pkg/schedule/schedulers/metrics.go b/pkg/schedule/schedulers/metrics.go index 8297f3e30c0..1ef8447f83b 100644 --- a/pkg/schedule/schedulers/metrics.go +++ b/pkg/schedule/schedulers/metrics.go @@ -355,4 +355,5 @@ var ( balanceRangeNoLeaderCounter = balanceRangeCounterWithEvent("no-leader") balanceRangeCreateOpFailCounter = balanceRangeCounterWithEvent("create-operator-fail") balanceRangeNoReplacementCounter = balanceRangeCounterWithEvent("no-replacement") + balanceRangeNoJobCounter = balanceRangeCounterWithEvent("no-job") ) diff --git a/server/api/scheduler.go b/server/api/scheduler.go index 4390bb94d94..52e65053b13 100644 --- a/server/api/scheduler.go +++ b/server/api/scheduler.go @@ -124,11 +124,7 @@ func (h *schedulerHandler) CreateScheduler(w http.ResponseWriter, r *http.Reques } } -<<<<<<< HEAD - if err := apiutil.CollectStringOption("table-name", input, collector); err != nil { -======= if err := apiutil.CollectStringOption("alias", input, collector); err != nil { ->>>>>>> 4eb7235c629747692b6d336bf626cb86d31ec023 h.r.JSON(w, http.StatusInternalServerError, err.Error()) return } diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index c1776a34116..1a588c24fb7 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -3213,22 +3213,11 @@ func TestAddScheduler(t *testing.T) { _, err = schedulers.CreateScheduler(types.BalanceRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceRangeScheduler, []string{}), controller.RemoveScheduler) re.Error(err) -<<<<<<< HEAD - gls, err = schedulers.CreateScheduler(types.BalanceRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceRangeScheduler, []string{"learner", "tiflash", "1h", "100", "200"}), controller.RemoveScheduler) -======= gls, err = schedulers.CreateScheduler(types.BalanceRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceRangeScheduler, []string{"learner", "tiflash", "1h", "test", "100", "200"}), controller.RemoveScheduler) ->>>>>>> 4eb7235c629747692b6d336bf626cb86d31ec023 re.NoError(err) re.NoError(controller.AddScheduler(gls)) conf, err = gls.EncodeConfig() re.NoError(err) -<<<<<<< HEAD - data = make(map[string]any) - re.NoError(json.Unmarshal(conf, &data)) - re.Equal("learner", data["role"]) - re.Equal("tiflash", data["engine"]) - re.Equal(float64(time.Hour.Nanoseconds()), data["timeout"]) -======= var cfg []map[string]any re.NoError(json.Unmarshal(conf, &cfg)) @@ -3236,7 +3225,6 @@ func TestAddScheduler(t *testing.T) { re.Equal("tiflash", cfg[0]["engine"]) re.Equal("test", cfg[0]["alias"]) re.Equal(float64(time.Hour.Nanoseconds()), cfg[0]["timeout"]) ->>>>>>> 4eb7235c629747692b6d336bf626cb86d31ec023 hb, err := schedulers.CreateScheduler(types.BalanceHotRegionScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigJSONDecoder([]byte("{}"))) re.NoError(err) diff --git a/tools/pd-ctl/pdctl/command/scheduler.go b/tools/pd-ctl/pdctl/command/scheduler.go index 8f00052a8ac..7db455b2119 100644 --- a/tools/pd-ctl/pdctl/command/scheduler.go +++ b/tools/pd-ctl/pdctl/command/scheduler.go @@ -377,19 +377,11 @@ func NewBalanceWitnessSchedulerCommand() *cobra.Command { // NewBalanceRangeSchedulerCommand returns a command to add a balance-range-scheduler. func NewBalanceRangeSchedulerCommand() *cobra.Command { -<<<<<<< HEAD - c := &cobra.Command{ - Use: "balance-range-scheduler [--format=raw|encode|hex] ", - Short: "add a scheduler to balance region for given range", - Run: addSchedulerForBalanceRangeCommandFunc, - Deprecated: "balance-range will be deprecated in the future, please use sql instead", -======= // todo: add deprecated warning if sql support c := &cobra.Command{ Use: "balance-range-scheduler [--format=raw|encode|hex] ", Short: "add a scheduler to balance region for given range", Run: addSchedulerForBalanceRangeCommandFunc, ->>>>>>> 4eb7235c629747692b6d336bf626cb86d31ec023 } c.Flags().String("format", "hex", "the key format") return c @@ -453,11 +445,7 @@ func addSchedulerForBalanceRangeCommandFunc(cmd *cobra.Command, args []string) { input["name"] = cmd.Name() input["engine"] = args[0] input["role"] = args[1] -<<<<<<< HEAD - input["table-name"] = args[2] -======= input["alias"] = args[2] ->>>>>>> 4eb7235c629747692b6d336bf626cb86d31ec023 input["start-key"] = url.QueryEscape(startKey) input["end-key"] = url.QueryEscape(endKey) diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go index f6c1ba47dc2..d060fc0e3eb 100644 --- a/tools/pd-ctl/tests/scheduler/scheduler_test.go +++ b/tools/pd-ctl/tests/scheduler/scheduler_test.go @@ -546,15 +546,6 @@ func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestClust re.NotContains(echo, "Success!") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-range-scheduler", "--format=raw", "tiflash", "learner", "test", "a", "b"}, nil) re.Contains(echo, "Success!") -<<<<<<< HEAD - conf = make(map[string]any) - testutil.Eventually(re, func() bool { - mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-range-scheduler"}, &conf) - return conf["role"] == "learner" && conf["engine"] == "tiflash" && conf["table-name"] == "test" - }) - re.Equal(float64(time.Hour.Nanoseconds()), conf["timeout"]) - ranges := conf["ranges"].([]any)[0].(map[string]any) -======= var rangeConf []map[string]any var jobConf map[string]any testutil.Eventually(re, func() bool { @@ -565,7 +556,6 @@ func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestClust re.Equal(float64(time.Hour.Nanoseconds()), jobConf["timeout"]) re.Equal("pending", jobConf["status"]) ranges := jobConf["ranges"].([]any)[0].(map[string]any) ->>>>>>> 4eb7235c629747692b6d336bf626cb86d31ec023 re.Equal(core.HexRegionKeyStr([]byte("a")), ranges["start-key"]) re.Equal(core.HexRegionKeyStr([]byte("b")), ranges["end-key"]) From 246aaa6330bbbbf9e540c1efd4f3b46f67f3550d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Tue, 18 Feb 2025 17:37:08 +0800 Subject: [PATCH 16/18] address comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/schedule/operator/operator_controller.go | 2 +- pkg/schedule/schedulers/balance_range.go | 129 ++++++------------ pkg/schedule/schedulers/balance_range_test.go | 4 +- pkg/schedule/schedulers/init.go | 11 +- pkg/schedule/schedulers/scheduler.go | 2 +- 5 files changed, 50 insertions(+), 98 deletions(-) diff --git a/pkg/schedule/operator/operator_controller.go b/pkg/schedule/operator/operator_controller.go index 61a800ebb9b..492d5e9bd1e 100644 --- a/pkg/schedule/operator/operator_controller.go +++ b/pkg/schedule/operator/operator_controller.go @@ -834,7 +834,7 @@ func (oc *Controller) GetHistory(start time.Time) []OpHistory { // returns false if the region does not meet the condition, it will calculate the influence of this region. type OpInfluenceOption func(region *core.RegionInfo) bool -// WithRangeOption returns an OpInfluenceOption that filters the region by the label. +// WithRangeOption returns an OpInfluenceOption that filters the region by the key ranges. func WithRangeOption(ranges []core.KeyRange) OpInfluenceOption { return func(region *core.RegionInfo) bool { for _, r := range ranges { diff --git a/pkg/schedule/schedulers/balance_range.go b/pkg/schedule/schedulers/balance_range.go index 3c170fa17f5..753d6556633 100644 --- a/pkg/schedule/schedulers/balance_range.go +++ b/pkg/schedule/schedulers/balance_range.go @@ -39,8 +39,6 @@ import ( "github.com/tikv/pd/pkg/utils/syncutil" ) -const balanceRangeName = "balance-range-scheduler" - type balanceRangeSchedulerHandler struct { rd *render.Render config *balanceRangeSchedulerConfig @@ -77,7 +75,7 @@ type balanceRangeSchedulerConfig struct { type balanceRangeSchedulerJob struct { JobID uint64 `json:"job-id"` Role Role `json:"role"` - Engine engine `json:"engine"` + Engine string `json:"engine"` Timeout time.Duration `json:"timeout"` Ranges []core.KeyRange `json:"ranges"` Alias string `json:"alias"` @@ -98,6 +96,8 @@ func (conf *balanceRangeSchedulerConfig) begin(job *balanceRangeSchedulerJob) bo job.Status = running if err := conf.save(); err != nil { log.Warn("failed to persist config", zap.Error(err), zap.Uint64("job-id", job.JobID)) + job.Status = pending + job.Start = nil return false } return true @@ -111,8 +111,11 @@ func (conf *balanceRangeSchedulerConfig) finish(job *balanceRangeSchedulerJob) b } now := time.Now() job.Finish = &now + job.Status = finished if err := conf.save(); err != nil { log.Warn("failed to persist config", zap.Error(err), zap.Uint64("job-id", job.JobID)) + job.Status = running + job.Finish = nil return false } return true @@ -120,7 +123,7 @@ func (conf *balanceRangeSchedulerConfig) finish(job *balanceRangeSchedulerJob) b func (conf *balanceRangeSchedulerConfig) peek() *balanceRangeSchedulerJob { conf.RLock() - defer conf.RLock() + defer conf.RUnlock() for _, job := range conf.jobs { if job.Status == finished { continue @@ -276,7 +279,7 @@ func (s *balanceRangeScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) } // Check region leader if plan.region.GetLeader() == nil { - log.Warn("region have no leader", zap.String("scheduler", s.GetName()), zap.Uint64("region-id", plan.region.GetID())) + log.Warn("region has no leader", zap.String("scheduler", s.GetName()), zap.Uint64("region-id", plan.region.GetID())) balanceRangeNoLeaderCounter.Inc() continue } @@ -294,6 +297,7 @@ func (s *balanceRangeScheduler) transferPeer(plan *balanceRangeSchedulerPlan, ds excludeTargets := plan.region.GetStoreIDs() if plan.job.Role == leader { excludeTargets = make(map[uint64]struct{}) + excludeTargets[plan.region.GetLeader().GetStoreId()] = struct{}{} } conf := plan.GetSchedulerConfig() filters := []filter.Filter{ @@ -372,71 +376,62 @@ type balanceRangeSchedulerPlan struct { tolerate int64 } -type storeInfo struct { - store *core.StoreInfo - score int64 -} - func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluence operator.OpInfluence, job *balanceRangeSchedulerJob) (*balanceRangeSchedulerPlan, error) { - krs := core.NewKeyRanges(job.Ranges) - scanRegions, err := cluster.BatchScanRegions(krs) - if err != nil { - return nil, err - } filters := s.filters - if job.Engine == tiflash { - filters = append(filters, filter.NewEngineFilter(balanceRangeName, filter.SpecialEngines)) + switch job.Engine { + case core.EngineTiKV: + filters = append(filters, filter.NewEngineFilter(string(types.BalanceRangeScheduler), filter.NotSpecialEngines)) + case core.EngineTiFlash: + filters = append(filters, filter.NewEngineFilter(string(types.BalanceRangeScheduler), filter.SpecialEngines)) + default: + return nil, errs.ErrGetSourceStore.FastGenByArgs(job.Engine) } sources := filter.SelectSourceStores(cluster.GetStores(), filters, cluster.GetSchedulerConfig(), nil, nil) if sources == nil { return nil, errs.ErrStoresNotEnough.FastGenByArgs("no store to select") } - storeInfos := make(map[uint64]*storeInfo, len(sources)) + + krs := core.NewKeyRanges(job.Ranges) + scanRegions, err := cluster.BatchScanRegions(krs) + if err != nil { + return nil, err + } + + // storeID <--> score mapping + scoreMap := make(map[uint64]int64, len(sources)) for _, source := range sources { - storeInfos[source.GetID()] = &storeInfo{store: source} + scoreMap[source.GetID()] = 0 } totalScore := int64(0) for _, region := range scanRegions { for _, peer := range job.Role.getPeers(region) { - storeInfos[peer.GetStoreId()].score += 1 + scoreMap[peer.GetStoreId()] += 1 totalScore += 1 } } - tolerate := int64(float64(len(scanRegions)) * adjustRatio) - if tolerate < 1 { - tolerate = 1 - } - storeList := make([]*storeInfo, 0, len(storeInfos)) - for storeID, store := range storeInfos { - if influence := opInfluence.GetStoreInfluence(storeID); influence != nil { - store.score += job.Role.getStoreInfluence(influence) - } - storeList = append(storeList, store) - } - sort.Slice(storeList, func(i, j int) bool { + sort.Slice(sources, func(i, j int) bool { role := job.Role - iop := role.getStoreInfluence(opInfluence.GetStoreInfluence(storeList[i].store.GetID())) - jop := role.getStoreInfluence(opInfluence.GetStoreInfluence(storeList[j].store.GetID())) - return storeList[i].score+iop > storeList[j].score+jop + iop := role.getStoreInfluence(opInfluence.GetStoreInfluence(sources[i].GetID())) + jop := role.getStoreInfluence(opInfluence.GetStoreInfluence(sources[j].GetID())) + iScore := scoreMap[sources[i].GetID()] + jScore := scoreMap[sources[j].GetID()] + return iScore+iop > jScore+jop }) - sourceMap := make(map[uint64]int64) - for _, store := range storeList { - sourceMap[store.store.GetID()] = store.score - } - stores := make([]*core.StoreInfo, 0, len(storeList)) - for _, store := range storeList { - stores = append(stores, store.store) - } averageScore := int64(0) - if len(storeList) != 0 { - averageScore = totalScore / int64(len(storeList)) + if len(sources) != 0 { + averageScore = totalScore / int64(len(sources)) + } + + tolerate := int64(float64(len(scanRegions)) * adjustRatio) + if tolerate < 1 { + tolerate = 1 } return &balanceRangeSchedulerPlan{ SchedulerCluster: cluster, - stores: stores, - scoreMap: sourceMap, + stores: sources, + scoreMap: scoreMap, source: nil, target: nil, region: nil, @@ -476,7 +471,7 @@ func (p *balanceRangeSchedulerPlan) shouldBalance(scheduler string) bool { shouldBalance := sourceScore >= targetScore if !shouldBalance && log.GetLevel() <= zap.DebugLevel { - log.Debug("skip balance ", + log.Debug("skip balance", zap.String("scheduler", scheduler), zap.Uint64("region-id", p.region.GetID()), zap.Uint64("source-store", p.sourceStoreID()), @@ -497,7 +492,6 @@ type Role int const ( leader Role = iota - // include leader + voter follower learner unknown @@ -516,43 +510,6 @@ func (r Role) String() string { } } -// engine is the engine of the store. -type engine int - -const ( - tiKV engine = iota - tiflash - notSupported -) - -func (e engine) String() string { - switch e { - case tiKV: - return "tikv" - case tiflash: - return "tiflash" - default: - return "not-supported" - } -} - -// MarshalJSON marshals to json. -func (e engine) MarshalJSON() ([]byte, error) { - return []byte(`"` + e.String() + `"`), nil -} - -// NewEngine creates a new engine. -func NewEngine(role string) engine { - switch role { - case "tikv": - return tiKV - case "tiflash": - return tiflash - default: - return notSupported - } -} - // JobStatus is the status of the job. type JobStatus int diff --git a/pkg/schedule/schedulers/balance_range_test.go b/pkg/schedule/schedulers/balance_range_test.go index 3193fe935ae..da558c0f84a 100644 --- a/pkg/schedule/schedulers/balance_range_test.go +++ b/pkg/schedule/schedulers/balance_range_test.go @@ -66,7 +66,7 @@ func TestJobStatus(t *testing.T) { conf := &balanceRangeSchedulerConfig{ schedulerConfig: &baseSchedulerConfig{}, } - conf.init(balanceRangeName, s, conf) + conf.init(string(types.BalanceRangeScheduler), s, conf) for _, v := range []struct { jobStatus JobStatus begin bool @@ -107,7 +107,7 @@ func TestBalanceRangePlan(t *testing.T) { } tc.AddLeaderRegionWithRange(1, "100", "110", 1, 2, 3) job := &balanceRangeSchedulerJob{ - Engine: tiKV, + Engine: core.EngineTiKV, Role: leader, Ranges: []core.KeyRange{core.NewKeyRange("100", "110")}, } diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index 463bf87a363..00313985edc 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -567,13 +567,12 @@ func schedulersRegister() { if role == unknown { return errs.ErrQueryUnescape.FastGenByArgs("role") } - engineString, err := url.QueryUnescape(args[1]) + engine, err := url.QueryUnescape(args[1]) if err != nil { return errs.ErrQueryUnescape.Wrap(err) } - engine := NewEngine(engineString) - if engine == notSupported { - return errs.ErrQueryUnescape.FastGenByArgs("engine") + if engine != core.EngineTiFlash && engine != core.EngineTiKV { + return errs.ErrQueryUnescape.FastGenByArgs("engine must be tikv or tiflash ") } timeout, err := url.QueryUnescape(args[2]) if err != nil { @@ -596,10 +595,6 @@ func schedulersRegister() { id = conf.jobs[len(conf.jobs)-1].JobID + 1 } - if engine == tiflash && role != learner { - return errs.ErrURLParse.FastGenByArgs("TiFlash only support learner role") - } - job := &balanceRangeSchedulerJob{ Role: role, Engine: engine, diff --git a/pkg/schedule/schedulers/scheduler.go b/pkg/schedule/schedulers/scheduler.go index 8976c3a1928..f7091f518df 100644 --- a/pkg/schedule/schedulers/scheduler.go +++ b/pkg/schedule/schedulers/scheduler.go @@ -49,7 +49,7 @@ type Scheduler interface { CleanConfig(cluster sche.SchedulerCluster) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) IsScheduleAllowed(cluster sche.SchedulerCluster) bool - // IsDiable returns if the scheduler is disabled, it only works for default schedulers. + // IsDisable returns if the scheduler is disabled, it only works for default schedulers. // - BalanceRegionScheduler // - BalanceLeaderScheduler // - BalanceHotRegionScheduler From 5795f447737be0452c8c00dcc5eee725480b63a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Fri, 21 Feb 2025 10:07:45 +0800 Subject: [PATCH 17/18] address comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/schedule/schedulers/balance_range.go | 76 +++++++++++++------ pkg/schedule/schedulers/balance_range_test.go | 53 ++++++++++++- 2 files changed, 103 insertions(+), 26 deletions(-) diff --git a/pkg/schedule/schedulers/balance_range.go b/pkg/schedule/schedulers/balance_range.go index 753d6556633..f677e73c1ad 100644 --- a/pkg/schedule/schedulers/balance_range.go +++ b/pkg/schedule/schedulers/balance_range.go @@ -15,6 +15,7 @@ package schedulers import ( + "bytes" "net/http" "sort" "strconv" @@ -85,11 +86,12 @@ type balanceRangeSchedulerJob struct { Status JobStatus `json:"status"` } -func (conf *balanceRangeSchedulerConfig) begin(job *balanceRangeSchedulerJob) bool { +func (conf *balanceRangeSchedulerConfig) begin(index int) *balanceRangeSchedulerJob { conf.Lock() defer conf.Unlock() + job := conf.jobs[index] if job.Status != pending { - return false + return nil } now := time.Now() job.Start = &now @@ -98,16 +100,16 @@ func (conf *balanceRangeSchedulerConfig) begin(job *balanceRangeSchedulerJob) bo log.Warn("failed to persist config", zap.Error(err), zap.Uint64("job-id", job.JobID)) job.Status = pending job.Start = nil - return false } - return true + return job } -func (conf *balanceRangeSchedulerConfig) finish(job *balanceRangeSchedulerJob) bool { +func (conf *balanceRangeSchedulerConfig) finish(index int) *balanceRangeSchedulerJob { conf.Lock() defer conf.Unlock() + job := conf.jobs[index] if job.Status != running { - return false + return nil } now := time.Now() job.Finish = &now @@ -116,21 +118,20 @@ func (conf *balanceRangeSchedulerConfig) finish(job *balanceRangeSchedulerJob) b log.Warn("failed to persist config", zap.Error(err), zap.Uint64("job-id", job.JobID)) job.Status = running job.Finish = nil - return false } - return true + return job } -func (conf *balanceRangeSchedulerConfig) peek() *balanceRangeSchedulerJob { +func (conf *balanceRangeSchedulerConfig) peek() (int, *balanceRangeSchedulerJob) { conf.RLock() defer conf.RUnlock() - for _, job := range conf.jobs { + for index, job := range conf.jobs { if job.Status == finished { continue } - return job + return index, job } - return nil + return 0, nil } func (conf *balanceRangeSchedulerConfig) clone() []*balanceRangeSchedulerJob { @@ -193,13 +194,14 @@ func (s *balanceRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) if !allowed { operator.IncOperatorLimitCounter(s.GetType(), operator.OpRange) } - job := s.conf.peek() + index, job := s.conf.peek() if job != nil { if job.Status == pending { - s.conf.begin(job) + job = s.conf.begin(index) } + // todo: add other conditions such as the diff of the score between the source and target store. if time.Since(*job.Start) > job.Timeout { - s.conf.finish(job) + s.conf.finish(index) balanceRangeExpiredCounter.Inc() } } @@ -233,13 +235,14 @@ func newBalanceRangeScheduler(opController *operator.Controller, conf *balanceRa // Schedule schedules the balance key range operator. func (s *balanceRangeScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { balanceRangeCounter.Inc() - job := s.conf.peek() + _, job := s.conf.peek() if job == nil { balanceRangeNoJobCounter.Inc() return nil, nil } opInfluence := s.OpController.GetOpInfluence(cluster.GetBasicCluster(), operator.WithRangeOption(job.Ranges)) + // todo: don't prepare every times, the prepare information can be reused. plan, err := s.prepare(cluster, opInfluence, job) if err != nil { log.Error("failed to prepare balance key range scheduler", errs.ZapError(err)) @@ -376,6 +379,30 @@ type balanceRangeSchedulerPlan struct { tolerate int64 } +func fetchAllRegions(cluster sche.SchedulerCluster, ranges *core.KeyRanges) []*core.RegionInfo { + scanLimit := 32 + regions := make([]*core.RegionInfo, 0) + krs := ranges.Ranges() + + for _, kr := range krs { + for { + region := cluster.ScanRegions(kr.StartKey, kr.EndKey, scanLimit) + if len(region) == 0 { + break + } + regions = append(regions, region...) + if len(region) < scanLimit { + break + } + kr.StartKey = region[len(region)-1].GetEndKey() + if bytes.Equal(kr.StartKey, kr.EndKey) { + break + } + } + } + return regions +} + func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluence operator.OpInfluence, job *balanceRangeSchedulerJob) (*balanceRangeSchedulerPlan, error) { filters := s.filters switch job.Engine { @@ -387,14 +414,14 @@ func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluen return nil, errs.ErrGetSourceStore.FastGenByArgs(job.Engine) } sources := filter.SelectSourceStores(cluster.GetStores(), filters, cluster.GetSchedulerConfig(), nil, nil) - if sources == nil { + if len(sources) <= 1 { return nil, errs.ErrStoresNotEnough.FastGenByArgs("no store to select") } krs := core.NewKeyRanges(job.Ranges) - scanRegions, err := cluster.BatchScanRegions(krs) - if err != nil { - return nil, err + scanRegions := fetchAllRegions(cluster, krs) + if len(scanRegions) == 0 { + return nil, errs.ErrRegionNotFound.FastGenByArgs("no region found") } // storeID <--> score mapping @@ -420,9 +447,7 @@ func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluen }) averageScore := int64(0) - if len(sources) != 0 { - averageScore = totalScore / int64(len(sources)) - } + averageScore = totalScore / int64(len(sources)) tolerate := int64(float64(len(scanRegions)) * adjustRatio) if tolerate < 1 { @@ -457,18 +482,23 @@ func (p *balanceRangeSchedulerPlan) score(storeID uint64) int64 { func (p *balanceRangeSchedulerPlan) shouldBalance(scheduler string) bool { sourceInfluence := p.opInfluence.GetStoreInfluence(p.sourceStoreID()) sourceInf := p.job.Role.getStoreInfluence(sourceInfluence) + // Sometimes, there are many remove-peer operators in the source store, we don't want to pick this store as source. if sourceInf < 0 { sourceInf = -sourceInf } + // to avoid schedule too much, if A's core greater than B and C a little + // we want that A should be moved out one region not two sourceScore := p.sourceScore - sourceInf - p.tolerate targetInfluence := p.opInfluence.GetStoreInfluence(p.targetStoreID()) targetInf := p.job.Role.getStoreInfluence(targetInfluence) + // Sometimes, there are many add-peer operators in the target store, we don't want to pick this store as target. if targetInf < 0 { targetInf = -targetInf } targetScore := p.targetScore + targetInf + p.tolerate + // the source score must be greater than the target score shouldBalance := sourceScore >= targetScore if !shouldBalance && log.GetLevel() <= zap.DebugLevel { log.Debug("skip balance", diff --git a/pkg/schedule/schedulers/balance_range_test.go b/pkg/schedule/schedulers/balance_range_test.go index da558c0f84a..930b8c84ed9 100644 --- a/pkg/schedule/schedulers/balance_range_test.go +++ b/pkg/schedule/schedulers/balance_range_test.go @@ -54,6 +54,10 @@ func TestGetPeers(t *testing.T) { role: "learner", peers: []*metapb.Peer{learner}, }, + { + role: "witness", + peers: nil, + }, } { role := NewRole(v.role) re.Equal(v.peers, role.getPeers(region)) @@ -65,6 +69,7 @@ func TestJobStatus(t *testing.T) { re := require.New(t) conf := &balanceRangeSchedulerConfig{ schedulerConfig: &baseSchedulerConfig{}, + jobs: make([]*balanceRangeSchedulerJob, 1), } conf.init(string(types.BalanceRangeScheduler), s, conf) for _, v := range []struct { @@ -91,10 +96,28 @@ func TestJobStatus(t *testing.T) { job := &balanceRangeSchedulerJob{ Status: v.jobStatus, } - re.Equal(v.begin, conf.begin(job)) + conf.jobs[0] = job + if v.begin { + re.Equal(running, conf.begin(0).Status) + } else { + re.Nil(conf.begin(0)) + } job.Status = v.jobStatus - re.Equal(v.finish, conf.finish(job)) + if v.finish { + re.Equal(finished, conf.finish(0).Status) + } else { + re.Nil(conf.finish(0)) + } } + idx, job := conf.peek() + re.Equal(0, idx) + re.Nil(job) + conf.jobs[0] = &balanceRangeSchedulerJob{ + Status: running, + } + idx, job = conf.peek() + re.Equal(0, idx) + re.NotNil(job) } func TestBalanceRangePlan(t *testing.T) { @@ -147,7 +170,7 @@ func TestTIKVEngine(t *testing.T) { op := ops[0] re.Equal("3", op.GetAdditionalInfo("sourceScore")) re.Equal("1", op.GetAdditionalInfo("targetScore")) - re.Contains(op.Brief(), "transfer leader: store 1 to 3") + re.Contains(op.Brief(), "transfer leader: store 1 to") tc.AddLeaderStore(4, 0) // case2: move peer from store 1 to store 4 @@ -206,3 +229,27 @@ func TestTIFLASHEngine(t *testing.T) { re.Equal("1", op.GetAdditionalInfo("tolerate")) re.Contains(op.Brief(), "mv peer: store [4] to") } + +func TestFetchAllRegions(t *testing.T) { + re := require.New(t) + cancel, _, tc, _ := prepareSchedulersTest() + defer cancel() + for i := 1; i <= 3; i++ { + tc.AddLeaderStore(uint64(i), 0) + } + for i := 1; i <= 100; i++ { + tc.AddLeaderRegion(uint64(i), 1, 2, 3) + } + + ranges := core.NewKeyRangesWithSize(1) + ranges.Append([]byte(""), []byte("")) + regions := fetchAllRegions(tc, ranges) + re.Len(regions, 100) + + ranges = core.NewKeyRangesWithSize(1) + region := tc.GetRegion(50) + ranges.Append([]byte(""), region.GetStartKey()) + ranges.Append(region.GetStartKey(), []byte("")) + regions = fetchAllRegions(tc, ranges) + re.Len(regions, 100) +} From 1f3e2614b23c8a5a6caee662cacd007112674b28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AB=A5=E5=89=91?= <1045931706@qq.com> Date: Wed, 5 Mar 2025 15:33:54 +0800 Subject: [PATCH 18/18] fmt pak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 童剑 <1045931706@qq.com> --- pkg/core/region.go | 77 +++++++++++-- pkg/core/region_test.go | 35 ++++++ pkg/schedule/operator/influence.go | 14 +++ pkg/schedule/schedulers/balance_range.go | 106 +++--------------- pkg/schedule/schedulers/balance_range_test.go | 95 +--------------- pkg/schedule/schedulers/init.go | 4 +- 6 files changed, 135 insertions(+), 196 deletions(-) diff --git a/pkg/core/region.go b/pkg/core/region.go index 94fc525f11b..3c8e76d297d 100644 --- a/pkg/core/region.go +++ b/pkg/core/region.go @@ -360,6 +360,73 @@ func (r *RegionInfo) GetPeer(peerID uint64) *metapb.Peer { return nil } +// Role is the role of the region. +type Role int + +const ( + // Leader is the leader of the region. + Leader Role = iota + // Follower is the follower of the region. + Follower + // Learner is the learner of the region. + Learner + // Unknown is the unknown role of the region include witness. + Unknown +) + +// String returns the string value of the role. +func (r Role) String() string { + switch r { + case Leader: + return "leader" + case Follower: + return "voter" + case Learner: + return "learner" + default: + return "unknown" + } +} + +// NewRole creates a new role. +func NewRole(role string) Role { + switch role { + case "leader": + return Leader + case "follower": + return Follower + case "learner": + return Learner + default: + return Unknown + } +} + +// MarshalJSON returns the JSON encoding of Role. +func (r Role) MarshalJSON() ([]byte, error) { + return []byte(`"` + r.String() + `"`), nil +} + +// GetPeersByRole returns the peers with specified role. +func (r *RegionInfo) GetPeersByRole(role Role) []*metapb.Peer { + switch role { + case Leader: + return []*metapb.Peer{r.GetLeader()} + case Follower: + followers := r.GetFollowers() + ret := make([]*metapb.Peer, 0, len(followers)) + for _, peer := range followers { + ret = append(ret, peer) + } + return ret + case Learner: + learners := r.GetLearners() + return learners + default: + return nil + } +} + // GetDownPeer returns the down peer with specified peer id. func (r *RegionInfo) GetDownPeer(peerID uint64) *metapb.Peer { for _, down := range r.downPeers { @@ -482,16 +549,6 @@ func (r *RegionInfo) GetFollowers() map[uint64]*metapb.Peer { return followers } -// GetFollower randomly returns a follow peer. -func (r *RegionInfo) GetFollower() *metapb.Peer { - for _, peer := range r.GetVoters() { - if r.leader == nil || r.leader.GetId() != peer.GetId() { - return peer - } - } - return nil -} - // GetNonWitnessVoters returns a map indicate the non-witness voter peers distributed. func (r *RegionInfo) GetNonWitnessVoters() map[uint64]*metapb.Peer { peers := r.GetVoters() diff --git a/pkg/core/region_test.go b/pkg/core/region_test.go index cfd05b776f2..ce44e37fbf5 100644 --- a/pkg/core/region_test.go +++ b/pkg/core/region_test.go @@ -1291,3 +1291,38 @@ func TestQueryRegions(t *testing.T) { re.Equal(uint64(2), regionsByID[2].GetRegion().GetId()) re.Equal(uint64(3), regionsByID[3].GetRegion().GetId()) } + +func TestGetPeers(t *testing.T) { + re := require.New(t) + learner := &metapb.Peer{StoreId: 1, Id: 1, Role: metapb.PeerRole_Learner} + leader := &metapb.Peer{StoreId: 2, Id: 2} + follower1 := &metapb.Peer{StoreId: 3, Id: 3} + follower2 := &metapb.Peer{StoreId: 4, Id: 4} + region := NewRegionInfo(&metapb.Region{Id: 100, Peers: []*metapb.Peer{ + leader, follower1, follower2, learner, + }}, leader, WithLearners([]*metapb.Peer{learner})) + for _, v := range []struct { + role string + peers []*metapb.Peer + }{ + { + role: "leader", + peers: []*metapb.Peer{leader}, + }, + { + role: "follower", + peers: []*metapb.Peer{follower1, follower2}, + }, + { + role: "learner", + peers: []*metapb.Peer{learner}, + }, + { + role: "witness", + peers: nil, + }, + } { + role := NewRole(v.role) + re.Equal(v.peers, region.GetPeersByRole(role)) + } +} diff --git a/pkg/schedule/operator/influence.go b/pkg/schedule/operator/influence.go index 2b42ffa3516..e3679d89479 100644 --- a/pkg/schedule/operator/influence.go +++ b/pkg/schedule/operator/influence.go @@ -59,6 +59,20 @@ type StoreInfluence struct { StepCost map[storelimit.Type]int64 } +// GetStoreInfluenceByRole returns the influence of the store according to the role. +func (s *StoreInfluence) GetStoreInfluenceByRole(r core.Role) int64 { + switch r { + case core.Leader: + return s.LeaderCount + case core.Follower: + return s.RegionCount + case core.Learner: + return s.RegionCount + default: + return 0 + } +} + func (s *StoreInfluence) add(other *StoreInfluence) { s.RegionCount += other.RegionCount s.RegionSize += other.RegionSize diff --git a/pkg/schedule/schedulers/balance_range.go b/pkg/schedule/schedulers/balance_range.go index f677e73c1ad..d257516eba9 100644 --- a/pkg/schedule/schedulers/balance_range.go +++ b/pkg/schedule/schedulers/balance_range.go @@ -75,7 +75,7 @@ type balanceRangeSchedulerConfig struct { type balanceRangeSchedulerJob struct { JobID uint64 `json:"job-id"` - Role Role `json:"role"` + Role core.Role `json:"role"` Engine string `json:"engine"` Timeout time.Duration `json:"timeout"` Ranges []core.KeyRange `json:"ranges"` @@ -262,11 +262,11 @@ func (s *balanceRangeScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) break } switch job.Role { - case leader: + case core.Leader: plan.region = filter.SelectOneRegion(cluster.RandLeaderRegions(plan.sourceStoreID(), job.Ranges), nil, baseRegionFilters...) - case learner: + case core.Learner: plan.region = filter.SelectOneRegion(cluster.RandLearnerRegions(plan.sourceStoreID(), job.Ranges), nil, baseRegionFilters...) - case follower: + case core.Follower: plan.region = filter.SelectOneRegion(cluster.RandFollowerRegions(plan.sourceStoreID(), job.Ranges), nil, baseRegionFilters...) } if plan.region == nil { @@ -298,7 +298,7 @@ func (s *balanceRangeScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) // transferPeer selects the best store to create a new peer to replace the old peer. func (s *balanceRangeScheduler) transferPeer(plan *balanceRangeSchedulerPlan, dstStores []*core.StoreInfo) *operator.Operator { excludeTargets := plan.region.GetStoreIDs() - if plan.job.Role == leader { + if plan.job.Role == core.Leader { excludeTargets = make(map[uint64]struct{}) excludeTargets[plan.region.GetLeader().GetStoreId()] = struct{}{} } @@ -324,7 +324,7 @@ func (s *balanceRangeScheduler) transferPeer(plan *balanceRangeSchedulerPlan, ds oldPeer := plan.region.GetStorePeer(sourceID) exist := false - if plan.job.Role == leader { + if plan.job.Role == core.Leader { peers := plan.region.GetPeers() for _, peer := range peers { if peer.GetStoreId() == targetID { @@ -431,7 +431,7 @@ func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluen } totalScore := int64(0) for _, region := range scanRegions { - for _, peer := range job.Role.getPeers(region) { + for _, peer := range region.GetPeersByRole(job.Role) { scoreMap[peer.GetStoreId()] += 1 totalScore += 1 } @@ -439,8 +439,8 @@ func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluen sort.Slice(sources, func(i, j int) bool { role := job.Role - iop := role.getStoreInfluence(opInfluence.GetStoreInfluence(sources[i].GetID())) - jop := role.getStoreInfluence(opInfluence.GetStoreInfluence(sources[j].GetID())) + iop := opInfluence.GetStoreInfluence(sources[i].GetID()).GetStoreInfluenceByRole(role) + jop := opInfluence.GetStoreInfluence(sources[j].GetID()).GetStoreInfluenceByRole(role) iScore := scoreMap[sources[i].GetID()] jScore := scoreMap[sources[j].GetID()] return iScore+iop > jScore+jop @@ -449,9 +449,9 @@ func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluen averageScore := int64(0) averageScore = totalScore / int64(len(sources)) - tolerate := int64(float64(len(scanRegions)) * adjustRatio) - if tolerate < 1 { - tolerate = 1 + tolerantSizeRatio := int64(float64(len(scanRegions)) * adjustRatio) + if tolerantSizeRatio < 1 { + tolerantSizeRatio = 1 } return &balanceRangeSchedulerPlan{ SchedulerCluster: cluster, @@ -463,7 +463,7 @@ func (s *balanceRangeScheduler) prepare(cluster sche.SchedulerCluster, opInfluen averageScore: averageScore, job: job, opInfluence: opInfluence, - tolerate: tolerate, + tolerate: tolerantSizeRatio, }, nil } @@ -481,7 +481,7 @@ func (p *balanceRangeSchedulerPlan) score(storeID uint64) int64 { func (p *balanceRangeSchedulerPlan) shouldBalance(scheduler string) bool { sourceInfluence := p.opInfluence.GetStoreInfluence(p.sourceStoreID()) - sourceInf := p.job.Role.getStoreInfluence(sourceInfluence) + sourceInf := sourceInfluence.GetStoreInfluenceByRole(p.job.Role) // Sometimes, there are many remove-peer operators in the source store, we don't want to pick this store as source. if sourceInf < 0 { sourceInf = -sourceInf @@ -491,7 +491,7 @@ func (p *balanceRangeSchedulerPlan) shouldBalance(scheduler string) bool { sourceScore := p.sourceScore - sourceInf - p.tolerate targetInfluence := p.opInfluence.GetStoreInfluence(p.targetStoreID()) - targetInf := p.job.Role.getStoreInfluence(targetInfluence) + targetInf := targetInfluence.GetStoreInfluenceByRole(p.job.Role) // Sometimes, there are many add-peer operators in the target store, we don't want to pick this store as target. if targetInf < 0 { targetInf = -targetInf @@ -510,36 +510,13 @@ func (p *balanceRangeSchedulerPlan) shouldBalance(scheduler string) bool { zap.Int64("origin-target-score", p.targetScore), zap.Int64("influence-source-score", sourceScore), zap.Int64("influence-target-score", targetScore), - zap.Int64("average-region-size", p.averageScore), + zap.Int64("average-region-score", p.averageScore), zap.Int64("tolerate", p.tolerate), ) } return shouldBalance } -// Role is the role of the region. -type Role int - -const ( - leader Role = iota - follower - learner - unknown -) - -func (r Role) String() string { - switch r { - case leader: - return "leader" - case follower: - return "voter" - case learner: - return "learner" - default: - return "unknown" - } -} - // JobStatus is the status of the job. type JobStatus int @@ -565,54 +542,3 @@ func (s JobStatus) String() string { func (s JobStatus) MarshalJSON() ([]byte, error) { return []byte(`"` + s.String() + `"`), nil } - -// NewRole creates a new role. -func NewRole(role string) Role { - switch role { - case "leader": - return leader - case "follower": - return follower - case "learner": - return learner - default: - return unknown - } -} - -func (r Role) getPeers(region *core.RegionInfo) []*metapb.Peer { - switch r { - case leader: - return []*metapb.Peer{region.GetLeader()} - case follower: - followers := region.GetFollowers() - ret := make([]*metapb.Peer, 0, len(followers)) - for _, peer := range followers { - ret = append(ret, peer) - } - return ret - case learner: - learners := region.GetLearners() - return learners - default: - return nil - } -} - -func (r Role) getStoreInfluence(influence *operator.StoreInfluence) int64 { - switch r { - case leader: - return influence.LeaderCount - case follower: - return influence.RegionCount - case learner: - return influence.RegionCount - default: - return 0 - } -} - -// MarshalJSON marshals to json. -func (r Role) MarshalJSON() ([]byte, error) { - return []byte(`"` + r.String() + `"`), nil -} diff --git a/pkg/schedule/schedulers/balance_range_test.go b/pkg/schedule/schedulers/balance_range_test.go index 930b8c84ed9..ca8373835e5 100644 --- a/pkg/schedule/schedulers/balance_range_test.go +++ b/pkg/schedule/schedulers/balance_range_test.go @@ -20,8 +20,6 @@ import ( "github.com/stretchr/testify/require" - "github.com/pingcap/kvproto/pkg/metapb" - "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/placement" @@ -29,97 +27,6 @@ import ( "github.com/tikv/pd/pkg/storage" ) -func TestGetPeers(t *testing.T) { - re := require.New(t) - learner := &metapb.Peer{StoreId: 1, Id: 1, Role: metapb.PeerRole_Learner} - leader := &metapb.Peer{StoreId: 2, Id: 2} - follower1 := &metapb.Peer{StoreId: 3, Id: 3} - follower2 := &metapb.Peer{StoreId: 4, Id: 4} - region := core.NewRegionInfo(&metapb.Region{Id: 100, Peers: []*metapb.Peer{ - leader, follower1, follower2, learner, - }}, leader, core.WithLearners([]*metapb.Peer{learner})) - for _, v := range []struct { - role string - peers []*metapb.Peer - }{ - { - role: "leader", - peers: []*metapb.Peer{leader}, - }, - { - role: "follower", - peers: []*metapb.Peer{follower1, follower2}, - }, - { - role: "learner", - peers: []*metapb.Peer{learner}, - }, - { - role: "witness", - peers: nil, - }, - } { - role := NewRole(v.role) - re.Equal(v.peers, role.getPeers(region)) - } -} - -func TestJobStatus(t *testing.T) { - s := storage.NewStorageWithMemoryBackend() - re := require.New(t) - conf := &balanceRangeSchedulerConfig{ - schedulerConfig: &baseSchedulerConfig{}, - jobs: make([]*balanceRangeSchedulerJob, 1), - } - conf.init(string(types.BalanceRangeScheduler), s, conf) - for _, v := range []struct { - jobStatus JobStatus - begin bool - finish bool - }{ - { - pending, - true, - false, - }, - { - running, - false, - true, - }, - { - finished, - false, - false, - }, - } { - job := &balanceRangeSchedulerJob{ - Status: v.jobStatus, - } - conf.jobs[0] = job - if v.begin { - re.Equal(running, conf.begin(0).Status) - } else { - re.Nil(conf.begin(0)) - } - job.Status = v.jobStatus - if v.finish { - re.Equal(finished, conf.finish(0).Status) - } else { - re.Nil(conf.finish(0)) - } - } - idx, job := conf.peek() - re.Equal(0, idx) - re.Nil(job) - conf.jobs[0] = &balanceRangeSchedulerJob{ - Status: running, - } - idx, job = conf.peek() - re.Equal(0, idx) - re.NotNil(job) -} - func TestBalanceRangePlan(t *testing.T) { re := require.New(t) cancel, _, tc, oc := prepareSchedulersTest() @@ -131,7 +38,7 @@ func TestBalanceRangePlan(t *testing.T) { tc.AddLeaderRegionWithRange(1, "100", "110", 1, 2, 3) job := &balanceRangeSchedulerJob{ Engine: core.EngineTiKV, - Role: leader, + Role: core.Leader, Ranges: []core.KeyRange{core.NewKeyRange("100", "110")}, } plan, err := sc.prepare(tc, *operator.NewOpInfluence(), job) diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index 00313985edc..bdf0f7dd510 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -563,8 +563,8 @@ func schedulersRegister() { if err != nil { return errs.ErrQueryUnescape.Wrap(err) } - role := NewRole(roleString) - if role == unknown { + role := core.NewRole(roleString) + if role == core.Unknown { return errs.ErrQueryUnescape.FastGenByArgs("role") } engine, err := url.QueryUnescape(args[1])