Skip to content

Commit

Permalink
lightning: retry for WaitN error caused by store-write-bwlimit (pingc…
Browse files Browse the repository at this point in the history
  • Loading branch information
lance6716 authored Mar 1, 2024
1 parent 6099ec9 commit 8e29115
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 0 deletions.
1 change: 1 addition & 0 deletions br/pkg/lightning/common/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ go_test(
"@com_github_tikv_pd_client//http",
"@org_golang_google_grpc//codes",
"@org_golang_google_grpc//status",
"@org_golang_x_time//rate",
"@org_uber_go_goleak//:goleak",
"@org_uber_go_multierr//:multierr",
],
Expand Down
2 changes: 2 additions & 0 deletions br/pkg/lightning/common/retry.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ var retryableErrorMsgList = []string{
// this error happens on when distsql.Checksum calls TiKV
// see https://github.com/pingcap/tidb/blob/2c3d4f1ae418881a95686e8b93d4237f2e76eec6/store/copr/coprocessor.go#L941
"coprocessor task terminated due to exceeding the deadline",
// fix https://github.com/pingcap/tidb/issues/51383
"rate: wait",
}

func isRetryableFromErrorMessage(err error) bool {
Expand Down
11 changes: 11 additions & 0 deletions br/pkg/lightning/common/retry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@ import (
"net"
"net/url"
"testing"
"time"

"github.com/go-sql-driver/mysql"
"github.com/pingcap/errors"
tmysql "github.com/pingcap/tidb/pkg/errno"
drivererr "github.com/pingcap/tidb/pkg/store/driver/error"
"github.com/stretchr/testify/require"
"go.uber.org/multierr"
"golang.org/x/time/rate"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
Expand Down Expand Up @@ -117,4 +119,13 @@ func TestIsRetryableError(t *testing.T) {
require.False(t, IsRetryableError(multierr.Combine(context.Canceled, &net.DNSError{IsTimeout: true})))

require.True(t, IsRetryableError(errors.New("other error: Coprocessor task terminated due to exceeding the deadline")))

// error from limiter
l := rate.NewLimiter(rate.Limit(1), 1)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
// context has 1 second timeout, can't wait for 10 seconds
err = l.WaitN(ctx, 10)
require.Error(t, err)
require.True(t, IsRetryableError(err))
}

0 comments on commit 8e29115

Please sign in to comment.