Skip to content

Commit

Permalink
Merge pull request kubernetes-sigs#2316 from kubernetes-sigs/switch-d…
Browse files Browse the repository at this point in the history
…ataplane-throttling

fix: switch to data plane API call when create snapshot is throttled
  • Loading branch information
andyzhangx authored Jan 4, 2025
2 parents ce11503 + 1f9609f commit 98c1c74
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pkg/azurefile/azurefile.go
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,7 @@ func (d *Driver) DeleteFileShare(ctx context.Context, subsID, resourceGroup, acc

if isRetriableError(err) {
klog.Warningf("DeleteFileShare(%s) on account(%s) failed with error(%v), waiting for retrying", shareName, accountName, err)
if strings.Contains(strings.ToLower(err.Error()), strings.ToLower(tooManyRequests)) {
if isThrottlingError(err) {
klog.Warningf("switch to use data plane API instead for account %s since it's throttled", accountName)
d.dataPlaneAPIAccountCache.Set(accountName, "")
return true, err
Expand Down
8 changes: 8 additions & 0 deletions pkg/azurefile/controllerserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,10 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ
}
}

if !useDataPlaneAPI {
useDataPlaneAPI = d.useDataPlaneAPI(ctx, sourceVolumeID, accountName)
}

mc := metrics.NewMetricContext(azureFileCSIDriverName, "controller_create_snapshot", rgName, subsID, d.Name)
isOperationSucceeded := false
defer func() {
Expand Down Expand Up @@ -935,6 +939,10 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ
} else {
snapshotShare, err := d.cloud.FileClient.WithSubscriptionID(subsID).CreateFileShare(ctx, rgName, accountName, &fileclient.ShareOptions{Name: fileShareName, Metadata: map[string]*string{snapshotNameKey: &snapshotName}}, snapshotsExpand)
if err != nil {
if isThrottlingError(err) {
klog.Warningf("switch to use data plane API instead for account %s since it's throttled", accountName)
d.dataPlaneAPIAccountCache.Set(accountName, "")
}
return nil, status.Errorf(codes.Internal, "create snapshot from(%s) failed with %v, accountName: %q", sourceVolumeID, err, accountName)
}

Expand Down
13 changes: 9 additions & 4 deletions pkg/azurefile/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,16 @@ func isRetriableError(err error) bool {
return false
}

func sleepIfThrottled(err error, defaultSleepSec int) {
if err == nil {
return
func isThrottlingError(err error) bool {
if err != nil {
errMsg := strings.ToLower(err.Error())
return strings.Contains(errMsg, strings.ToLower(tooManyRequests)) || strings.Contains(errMsg, clientThrottled)
}
if strings.Contains(strings.ToLower(err.Error()), strings.ToLower(tooManyRequests)) || strings.Contains(strings.ToLower(err.Error()), clientThrottled) {
return false
}

func sleepIfThrottled(err error, defaultSleepSec int) {
if isThrottlingError(err) {
retryAfter := getRetryAfterSeconds(err)
if retryAfter == 0 {
retryAfter = defaultSleepSec
Expand Down
42 changes: 42 additions & 0 deletions pkg/azurefile/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -856,3 +856,45 @@ func TestGetSnapshotID(t *testing.T) {
}
}
}

func TestIsThrottlingError(t *testing.T) {
tests := []struct {
desc string
err error
expected bool
}{
{
desc: "nil error",
err: nil,
expected: false,
},
{
desc: "no match",
err: errors.New("no match"),
expected: false,
},
{
desc: "match error message",
err: errors.New("could not list storage accounts for account type Premium_LRS: Retriable: true, RetryAfter: 217s, HTTPStatusCode: 0, RawError: azure cloud provider throttled for operation StorageAccountListByResourceGroup with reason \"client throttled\""),
expected: true,
},
{
desc: "match error message exceeds 1200s",
err: errors.New("could not list storage accounts for account type Premium_LRS: Retriable: true, RetryAfter: 2170s, HTTPStatusCode: 0, RawError: azure cloud provider throttled for operation StorageAccountListByResourceGroup with reason \"client throttled\""),
expected: true,
},
{
desc: "match error message with TooManyRequests throttling",
err: errors.New("could not list storage accounts for account type Premium_LRS: Retriable: true, RetryAfter: 2170s, HTTPStatusCode: 429, RawError: azure cloud provider throttled for operation StorageAccountListByResourceGroup with reason \"TooManyRequests\""),
expected: true,
},
}

for _, test := range tests {
result := isThrottlingError(test.err)
if result != test.expected {
t.Errorf("desc: (%s), input: err(%v), IsThrottlingError returned with bool(%t), not equal to expected(%t)",
test.desc, test.err, result, test.expected)
}
}
}

0 comments on commit 98c1c74

Please sign in to comment.