Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

api: add a new scheduler to balance the regions of the given key range #8988

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions pkg/core/basic_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package core

import (
"bytes"
"encoding/json"

"github.com/tikv/pd/pkg/core/constant"
)
Expand Down Expand Up @@ -156,6 +157,15 @@ type KeyRange struct {
EndKey []byte `json:"end-key"`
}

// MarshalJSON marshals to json.
func (kr KeyRange) MarshalJSON() ([]byte, error) {
m := map[string]string{
"start-key": HexRegionKeyStr(kr.StartKey),
"end-key": HexRegionKeyStr(kr.EndKey),
}
return json.Marshal(m)
}

// NewKeyRange create a KeyRange with the given start key and end key.
func NewKeyRange(startKey, endKey string) KeyRange {
return KeyRange{
Expand Down
6 changes: 5 additions & 1 deletion pkg/mcs/scheduling/server/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,11 @@
)
// Create the newly added schedulers.
for _, scheduler := range latestSchedulersConfig {
schedulerType := types.ConvertOldStrToType[scheduler.Type]
schedulerType, ok := types.ConvertOldStrToType[scheduler.Type]
if !ok {
log.Error("scheduler not found", zap.String("type", scheduler.Type))
continue

Check warning on line 320 in pkg/mcs/scheduling/server/cluster.go

View check run for this annotation

Codecov / codecov/patch

pkg/mcs/scheduling/server/cluster.go#L319-L320

Added lines #L319 - L320 were not covered by tests
}
s, err := schedulers.CreateScheduler(
schedulerType,
c.coordinator.GetOperatorController(),
Expand Down
151 changes: 151 additions & 0 deletions pkg/schedule/schedulers/balance_key_range.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package schedulers
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need a license and please also change the filename.


import (
"net/http"
"time"

"github.com/gorilla/mux"
"github.com/unrolled/render"

"github.com/pingcap/log"

"github.com/tikv/pd/pkg/core"
"github.com/tikv/pd/pkg/core/constant"
"github.com/tikv/pd/pkg/errs"
sche "github.com/tikv/pd/pkg/schedule/core"
"github.com/tikv/pd/pkg/schedule/filter"
"github.com/tikv/pd/pkg/schedule/operator"
"github.com/tikv/pd/pkg/schedule/plan"
"github.com/tikv/pd/pkg/schedule/types"
"github.com/tikv/pd/pkg/utils/syncutil"
)

type balanceKeyRangeSchedulerHandler struct {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keep using balanceRangeSchedulerHandler?

rd *render.Render
config *balanceKeyRangeSchedulerConfig
}

func newBalanceKeyRangeHandler(conf *balanceKeyRangeSchedulerConfig) http.Handler {
handler := &balanceKeyRangeSchedulerHandler{
config: conf,
rd: render.New(render.Options{IndentJSON: true}),
}
router := mux.NewRouter()
router.HandleFunc("/config", handler.updateConfig).Methods(http.MethodPost)
router.HandleFunc("/list", handler.listConfig).Methods(http.MethodGet)
return router
}

func (handler *balanceKeyRangeSchedulerHandler) updateConfig(w http.ResponseWriter, _ *http.Request) {
handler.rd.JSON(w, http.StatusBadRequest, "update config is not supported")

Check warning on line 40 in pkg/schedule/schedulers/balance_key_range.go

View check run for this annotation

Codecov / codecov/patch

pkg/schedule/schedulers/balance_key_range.go#L39-L40

Added lines #L39 - L40 were not covered by tests
}

func (handler *balanceKeyRangeSchedulerHandler) listConfig(w http.ResponseWriter, _ *http.Request) {
conf := handler.config.clone()
if err := handler.rd.JSON(w, http.StatusOK, conf); err != nil {
log.Error("failed to marshal balance key range scheduler config", errs.ZapError(err))
}

Check warning on line 47 in pkg/schedule/schedulers/balance_key_range.go

View check run for this annotation

Codecov / codecov/patch

pkg/schedule/schedulers/balance_key_range.go#L46-L47

Added lines #L46 - L47 were not covered by tests
}

type balanceKeyRangeSchedulerConfig struct {
syncutil.RWMutex
schedulerConfig
balanceKeyRangeSchedulerParam
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we use a slice to support multiple key ranges with different roles or engines?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to support a multi-key range with same role and engine.

}

type balanceKeyRangeSchedulerParam struct {
Role string `json:"role"`
Engine string `json:"engine"`
Timeout time.Duration `json:"timeout"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do all ranges share the same timeout?

Ranges []core.KeyRange `json:"ranges"`
Copy link
Contributor

@nolouch nolouch Jan 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we can let the API interface like:

type KeyRange strut {
     Alias string `json:"alias"`
     KeyRange string `json:"range"`
}

Then we can alias the table name by tidb or others to help the user read it.

}

func (conf *balanceKeyRangeSchedulerConfig) encodeConfig() ([]byte, error) {
conf.RLock()
defer conf.RUnlock()
return EncodeConfig(conf)
}

func (conf *balanceKeyRangeSchedulerConfig) clone() *balanceKeyRangeSchedulerParam {
conf.RLock()
defer conf.RUnlock()
ranges := make([]core.KeyRange, len(conf.Ranges))
copy(ranges, conf.Ranges)
return &balanceKeyRangeSchedulerParam{
Ranges: ranges,
Role: conf.Role,
Engine: conf.Engine,
Timeout: conf.Timeout,
}
}

// EncodeConfig serializes the config.
func (s *balanceKeyRangeScheduler) EncodeConfig() ([]byte, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a little bit confusing to have both EncodeConfig and encodeConfig.

return s.conf.encodeConfig()
}

// ReloadConfig reloads the config.
func (s *balanceKeyRangeScheduler) ReloadConfig() error {
s.conf.Lock()
defer s.conf.Unlock()

newCfg := &balanceKeyRangeSchedulerConfig{}
if err := s.conf.load(newCfg); err != nil {
return err
}
s.conf.Ranges = newCfg.Ranges
s.conf.Timeout = newCfg.Timeout
s.conf.Role = newCfg.Role
s.conf.Engine = newCfg.Engine
return nil

Check warning on line 100 in pkg/schedule/schedulers/balance_key_range.go

View check run for this annotation

Codecov / codecov/patch

pkg/schedule/schedulers/balance_key_range.go#L88-L100

Added lines #L88 - L100 were not covered by tests
}

type balanceKeyRangeScheduler struct {
*BaseScheduler
conf *balanceKeyRangeSchedulerConfig
handler http.Handler
filters []filter.Filter
filterCounter *filter.Counter
}

// ServeHTTP implements the http.Handler interface.
func (s *balanceKeyRangeScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
s.handler.ServeHTTP(w, r)
}

// Schedule schedules the balance key range operator.
func (*balanceKeyRangeScheduler) Schedule(_cluster sche.SchedulerCluster, _dryRun bool) ([]*operator.Operator, []plan.Plan) {
log.Debug("balance key range scheduler is scheduling, need to implement")
return nil, nil
}

// IsScheduleAllowed checks if the scheduler is allowed to schedule new operators.
func (s *balanceKeyRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool {
allowed := s.OpController.OperatorCount(operator.OpRange) < cluster.GetSchedulerConfig().GetRegionScheduleLimit()
if !allowed {
operator.IncOperatorLimitCounter(s.GetType(), operator.OpRange)
}

Check warning on line 127 in pkg/schedule/schedulers/balance_key_range.go

View check run for this annotation

Codecov / codecov/patch

pkg/schedule/schedulers/balance_key_range.go#L126-L127

Added lines #L126 - L127 were not covered by tests
return allowed
}

// BalanceKeyRangeCreateOption is used to create a scheduler with an option.
type BalanceKeyRangeCreateOption func(s *balanceKeyRangeScheduler)

// newBalanceKeyRangeScheduler creates a scheduler that tends to keep given peer role on
// special store balanced.
func newBalanceKeyRangeScheduler(opController *operator.Controller, conf *balanceKeyRangeSchedulerConfig, options ...BalanceKeyRangeCreateOption) Scheduler {
s := &balanceKeyRangeScheduler{
BaseScheduler: NewBaseScheduler(opController, types.BalanceRangeScheduler, conf),
conf: conf,
handler: newBalanceKeyRangeHandler(conf),
}
for _, option := range options {
option(s)
}

Check warning on line 144 in pkg/schedule/schedulers/balance_key_range.go

View check run for this annotation

Codecov / codecov/patch

pkg/schedule/schedulers/balance_key_range.go#L143-L144

Added lines #L143 - L144 were not covered by tests
s.filters = []filter.Filter{
&filter.StoreStateFilter{ActionScope: s.GetName(), TransferLeader: true, OperatorLevel: constant.Medium},
filter.NewSpecialUseFilter(s.GetName()),
}
s.filterCounter = filter.NewCounter(s.GetName())
return s
}
54 changes: 54 additions & 0 deletions pkg/schedule/schedulers/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
package schedulers

import (
"net/url"
"strconv"
"strings"
"sync"
"time"

"github.com/tikv/pd/pkg/core"
"github.com/tikv/pd/pkg/errs"
Expand Down Expand Up @@ -545,4 +547,56 @@
conf.init(sche.GetName(), storage, conf)
return sche, nil
})

// balance key range scheduler
// args: [role, engine, timeout, range1, range2, ...]
RegisterSliceDecoderBuilder(types.BalanceRangeScheduler, func(args []string) ConfigDecoder {
return func(v any) error {
conf, ok := v.(*balanceKeyRangeSchedulerConfig)
if !ok {
return errs.ErrScheduleConfigNotExist.FastGenByArgs()
}

Check warning on line 558 in pkg/schedule/schedulers/init.go

View check run for this annotation

Codecov / codecov/patch

pkg/schedule/schedulers/init.go#L557-L558

Added lines #L557 - L558 were not covered by tests
if len(args) < 4 {
return errs.ErrSchedulerConfig.FastGenByArgs("args length must be greater than 3")
}
role, err := url.QueryUnescape(args[0])
if err != nil {
return errs.ErrQueryUnescape.Wrap(err)
}

Check warning on line 565 in pkg/schedule/schedulers/init.go

View check run for this annotation

Codecov / codecov/patch

pkg/schedule/schedulers/init.go#L564-L565

Added lines #L564 - L565 were not covered by tests
engine, err := url.QueryUnescape(args[1])
if err != nil {
return errs.ErrQueryUnescape.Wrap(err)
}

Check warning on line 569 in pkg/schedule/schedulers/init.go

View check run for this annotation

Codecov / codecov/patch

pkg/schedule/schedulers/init.go#L568-L569

Added lines #L568 - L569 were not covered by tests
timeout, err := url.QueryUnescape(args[2])
if err != nil {
return errs.ErrQueryUnescape.Wrap(err)
}

Check warning on line 573 in pkg/schedule/schedulers/init.go

View check run for this annotation

Codecov / codecov/patch

pkg/schedule/schedulers/init.go#L572-L573

Added lines #L572 - L573 were not covered by tests
duration, err := time.ParseDuration(timeout)
if err != nil {
return errs.ErrURLParse.Wrap(err)
}

Check warning on line 577 in pkg/schedule/schedulers/init.go

View check run for this annotation

Codecov / codecov/patch

pkg/schedule/schedulers/init.go#L576-L577

Added lines #L576 - L577 were not covered by tests
ranges, err := getKeyRanges(args[3:])
if err != nil {
return err
}

Check warning on line 581 in pkg/schedule/schedulers/init.go

View check run for this annotation

Codecov / codecov/patch

pkg/schedule/schedulers/init.go#L580-L581

Added lines #L580 - L581 were not covered by tests
conf.Ranges = ranges
conf.Engine = engine
conf.Role = role
conf.Timeout = duration
return nil
}
})

RegisterScheduler(types.BalanceRangeScheduler, func(opController *operator.Controller,
storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) {
conf := &balanceKeyRangeSchedulerConfig{
schedulerConfig: newBaseDefaultSchedulerConfig(),
}
if err := decoder(conf); err != nil {
return nil, err
}
sche := newBalanceKeyRangeScheduler(opController, conf)
conf.init(sche.GetName(), storage, conf)
return sche, nil
})
}
5 changes: 5 additions & 0 deletions pkg/schedule/types/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ const (
TransferWitnessLeaderScheduler CheckerSchedulerType = "transfer-witness-leader-scheduler"
// LabelScheduler is label scheduler name.
LabelScheduler CheckerSchedulerType = "label-scheduler"
// BalanceRangeScheduler is balance key range scheduler name.
BalanceRangeScheduler CheckerSchedulerType = "balance-range-scheduler"
)

// TODO: SchedulerTypeCompatibleMap and ConvertOldStrToType should be removed after
Expand Down Expand Up @@ -97,6 +99,7 @@ var (
SplitBucketScheduler: "split-bucket",
TransferWitnessLeaderScheduler: "transfer-witness-leader",
LabelScheduler: "label",
BalanceRangeScheduler: "balance-range",
}

// ConvertOldStrToType exists for compatibility.
Expand All @@ -120,6 +123,7 @@ var (
"split-bucket": SplitBucketScheduler,
"transfer-witness-leader": TransferWitnessLeaderScheduler,
"label": LabelScheduler,
"balance-range": BalanceRangeScheduler,
}

// StringToSchedulerType is a map to convert the scheduler string to the CheckerSchedulerType.
Expand All @@ -143,6 +147,7 @@ var (
"split-bucket-scheduler": SplitBucketScheduler,
"transfer-witness-leader-scheduler": TransferWitnessLeaderScheduler,
"label-scheduler": LabelScheduler,
"balance-range-scheduler": BalanceRangeScheduler,
}

// DefaultSchedulers is the default scheduler types.
Expand Down
33 changes: 33 additions & 0 deletions server/api/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,39 @@
}

switch tp {
case types.BalanceRangeScheduler:
exist, _ := h.IsSchedulerExisted(name)
if exist {
h.r.JSON(w, http.StatusBadRequest, "The scheduler already exists, pls remove the exist scheduler first.")
return
}
if err := apiutil.CollectStringOption("role", input, collector); err != nil {
h.r.JSON(w, http.StatusInternalServerError, err.Error())
return
}

Check warning on line 111 in server/api/scheduler.go

View check run for this annotation

Codecov / codecov/patch

server/api/scheduler.go#L109-L111

Added lines #L109 - L111 were not covered by tests
if err := apiutil.CollectStringOption("engine", input, collector); err != nil {
h.r.JSON(w, http.StatusInternalServerError, err.Error())
return
}

Check warning on line 115 in server/api/scheduler.go

View check run for this annotation

Codecov / codecov/patch

server/api/scheduler.go#L113-L115

Added lines #L113 - L115 were not covered by tests
defaultTimeout := "1h"
if err := apiutil.CollectStringOption("timeout", input, collector); err != nil {
if errors.ErrorEqual(err, errs.ErrOptionNotExist) {
collector(defaultTimeout)
} else {
h.r.JSON(w, http.StatusInternalServerError, err.Error())
return
}

Check warning on line 123 in server/api/scheduler.go

View check run for this annotation

Codecov / codecov/patch

server/api/scheduler.go#L121-L123

Added lines #L121 - L123 were not covered by tests
}

if err := apiutil.CollectEscapeStringOption("start_key", input, collector); err != nil {
h.r.JSON(w, http.StatusInternalServerError, err.Error())
return
}

Check warning on line 129 in server/api/scheduler.go

View check run for this annotation

Codecov / codecov/patch

server/api/scheduler.go#L127-L129

Added lines #L127 - L129 were not covered by tests

if err := apiutil.CollectEscapeStringOption("end_key", input, collector); err != nil {
h.r.JSON(w, http.StatusInternalServerError, err.Error())
return
}

Check warning on line 134 in server/api/scheduler.go

View check run for this annotation

Codecov / codecov/patch

server/api/scheduler.go#L132-L134

Added lines #L132 - L134 were not covered by tests
case types.ScatterRangeScheduler:
if err := apiutil.CollectEscapeStringOption("start_key", input, collector); err != nil {
h.r.JSON(w, http.StatusInternalServerError, err.Error())
Expand Down
14 changes: 14 additions & 0 deletions server/cluster/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3209,6 +3209,20 @@ func TestAddScheduler(t *testing.T) {
re.NoError(err)
re.NoError(controller.AddScheduler(gls))

_, err = schedulers.CreateScheduler(types.BalanceRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceRangeScheduler, []string{}), controller.RemoveScheduler)
re.Error(err)

gls, err = schedulers.CreateScheduler(types.BalanceRangeScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigSliceDecoder(types.BalanceRangeScheduler, []string{"learner", "tiflash", "1h", "100", "200"}), controller.RemoveScheduler)
re.NoError(err)
re.NoError(controller.AddScheduler(gls))
conf, err = gls.EncodeConfig()
re.NoError(err)
data = make(map[string]any)
re.NoError(json.Unmarshal(conf, &data))
re.Equal("learner", data["role"])
re.Equal("tiflash", data["engine"])
re.Equal(float64(time.Hour.Nanoseconds()), data["timeout"])

hb, err := schedulers.CreateScheduler(types.BalanceHotRegionScheduler, oc, storage.NewStorageWithMemoryBackend(), schedulers.ConfigJSONDecoder([]byte("{}")))
re.NoError(err)
conf, err = hb.EncodeConfig()
Expand Down
Loading
Loading