Skip to content

scheduler: add test for creating evict-leader-scheduler twice #8757

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Nov 21, 2024
2 changes: 2 additions & 0 deletions pkg/core/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,7 @@ func (s *StoresInfo) ResetStores() {
func (s *StoresInfo) PauseLeaderTransfer(storeID uint64, direction constant.Direction) error {
s.Lock()
defer s.Unlock()
log.Info("pause store leader transfer", zap.Uint64("store-id", storeID), zap.String("direction", direction.String()))
store, ok := s.stores[storeID]
if !ok {
return errs.ErrStoreNotFound.FastGenByArgs(storeID)
Expand All @@ -810,6 +811,7 @@ func (s *StoresInfo) PauseLeaderTransfer(storeID uint64, direction constant.Dire
func (s *StoresInfo) ResumeLeaderTransfer(storeID uint64, direction constant.Direction) {
s.Lock()
defer s.Unlock()
log.Info("resume store leader transfer", zap.Uint64("store-id", storeID), zap.String("direction", direction.String()))
store, ok := s.stores[storeID]
if !ok {
log.Warn("try to clean a store's pause state, but it is not found. It may be cleanup",
Expand Down
4 changes: 3 additions & 1 deletion pkg/schedule/schedulers/grant_leader.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
}

func (conf *grantLeaderSchedulerConfig) buildWithArgs(args []string) error {
if len(args) != 1 {
if len(args) < 1 {
return errs.ErrSchedulerConfig.FastGenByArgs("id")
}

Expand Down Expand Up @@ -271,6 +271,7 @@

err := handler.config.buildWithArgs(args)
if err != nil {
log.Error("fail to build config", errs.ZapError(err))

Check warning on line 274 in pkg/schedule/schedulers/grant_leader.go

View check run for this annotation

Codecov / codecov/patch

pkg/schedule/schedulers/grant_leader.go#L274

Added line #L274 was not covered by tests
handler.config.Lock()
handler.config.cluster.ResumeLeaderTransfer(id, constant.Out)
handler.config.Unlock()
Expand All @@ -279,6 +280,7 @@
}
err = handler.config.persist()
if err != nil {
log.Error("fail to persist config", errs.ZapError(err))

Check warning on line 283 in pkg/schedule/schedulers/grant_leader.go

View check run for this annotation

Codecov / codecov/patch

pkg/schedule/schedulers/grant_leader.go#L283

Added line #L283 was not covered by tests
_, _ = handler.config.removeStore(id)
handler.rd.JSON(w, http.StatusInternalServerError, err.Error())
return
Expand Down
4 changes: 2 additions & 2 deletions pkg/schedule/schedulers/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ func schedulersRegister() {
// evict leader
RegisterSliceDecoderBuilder(types.EvictLeaderScheduler, func(args []string) ConfigDecoder {
return func(v any) error {
if len(args) != 1 {
if len(args) < 1 {
return errs.ErrSchedulerConfig.FastGenByArgs("id")
}
conf, ok := v.(*evictLeaderSchedulerConfig)
Expand Down Expand Up @@ -268,7 +268,7 @@ func schedulersRegister() {
// grant leader
RegisterSliceDecoderBuilder(types.GrantLeaderScheduler, func(args []string) ConfigDecoder {
return func(v any) error {
if len(args) != 1 {
if len(args) < 1 {
return errs.ErrSchedulerConfig.FastGenByArgs("id")
}

Expand Down
4 changes: 2 additions & 2 deletions plugin/scheduler_example/evict_leader.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ const (
func init() {
schedulers.RegisterSliceDecoderBuilder(userEvictLeaderScheduler, func(args []string) schedulers.ConfigDecoder {
return func(v any) error {
if len(args) != 1 {
if len(args) < 1 {
return errors.New("should specify the store-id")
}
conf, ok := v.(*evictLeaderSchedulerConfig)
Expand Down Expand Up @@ -101,7 +101,7 @@ type evictLeaderSchedulerConfig struct {

// BuildWithArgs builds the config with the args.
func (conf *evictLeaderSchedulerConfig) BuildWithArgs(args []string) error {
if len(args) != 1 {
if len(args) < 1 {
return errors.New("should specify the store-id")
}

Expand Down
2 changes: 1 addition & 1 deletion tests/integrations/realcluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ type clusterSuite struct {
}

var (
playgroundLogDir = filepath.Join("tmp", "real_cluster", "playground")
playgroundLogDir = "/tmp/real_cluster/playground"
tiupBin = os.Getenv("HOME") + "/.tiup/bin/tiup"
)

Expand Down
71 changes: 71 additions & 0 deletions tests/integrations/realcluster/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ import (
"testing"
"time"

"github.com/pingcap/log"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/tikv/pd/client/http"
"github.com/tikv/pd/client/utils/testutil"
"github.com/tikv/pd/pkg/schedule/labeler"
"github.com/tikv/pd/pkg/schedule/types"
"go.uber.org/zap"
)

type schedulerSuite struct {
Expand Down Expand Up @@ -201,3 +203,72 @@ func (s *schedulerSuite) TestRegionLabelDenyScheduler() {
return true
}, testutil.WithWaitFor(time.Minute))
}

func (s *schedulerSuite) TestGrantOrEvictLeaderTwice() {
re := require.New(s.T())
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

pdHTTPCli := http.NewClient("pd-real-cluster-test", getPDEndpoints(s.T()))
regions, err := pdHTTPCli.GetRegions(ctx)
re.NoError(err)
re.NotEmpty(regions.Regions)
region1 := regions.Regions[0]

var i int
evictLeader := func() {
re.NoError(pdHTTPCli.CreateScheduler(ctx, types.EvictLeaderScheduler.String(), uint64(region1.Leader.StoreID)))
// if the second evict leader scheduler cause the pause-leader-filter
// disable, the balance-leader-scheduler need some time to transfer
// leader. See details in https://github.com/tikv/pd/issues/8756.
if i == 1 {
time.Sleep(3 * time.Second)
}
Comment on lines +224 to +226
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should cover this kind of waiting inside the testutil.Eventually below.

testutil.Eventually(re, func() bool {
regions, err := pdHTTPCli.GetRegions(ctx)
if err != nil {
log.Error("get regions failed", zap.Error(err))
return false
}
for _, region := range regions.Regions {
if region.Leader.StoreID == region1.Leader.StoreID {
return false
}
}
return true
}, testutil.WithWaitFor(time.Minute))

i++
}

evictLeader()
evictLeader()
pdHTTPCli.DeleteScheduler(ctx, types.EvictLeaderScheduler.String())

i = 0
grantLeader := func() {
re.NoError(pdHTTPCli.CreateScheduler(ctx, types.GrantLeaderScheduler.String(), uint64(region1.Leader.StoreID)))
if i == 1 {
time.Sleep(3 * time.Second)
}
Comment on lines +251 to +253
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

testutil.Eventually(re, func() bool {
regions, err := pdHTTPCli.GetRegions(ctx)
if err != nil {
log.Error("get regions failed", zap.Error(err))
return false
}
for _, region := range regions.Regions {
if region.Leader.StoreID != region1.Leader.StoreID {
return false
}
}
return true
}, testutil.WithWaitFor(2*time.Minute))

i++
}

grantLeader()
grantLeader()
pdHTTPCli.DeleteScheduler(ctx, types.GrantLeaderScheduler.String())
}