Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unwrap original error from temporary when reporting to operation_errors metric #971

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License.
package metrics

import (
"errors"
"fmt"
"net/http"
"os"
Expand All @@ -23,6 +24,8 @@ import (
"google.golang.org/grpc/status"
"k8s.io/component-base/metrics"
"k8s.io/klog/v2"
"sigs.k8s.io/gcp-filestore-csi-driver/pkg/cloud_provider/file"
"sigs.k8s.io/gcp-filestore-csi-driver/pkg/common"
)

const (
Expand Down Expand Up @@ -147,7 +150,7 @@ func (mm *MetricsManager) recordComponentVersionMetric() error {
}

func (mm *MetricsManager) RecordOperationMetrics(opErr error, methodName string, filestoreMode string, opDuration time.Duration) {
operationSeconds.WithLabelValues(getErrorCode(opErr), methodName, filestoreMode).Observe(opDuration.Seconds())
operationSeconds.WithLabelValues(errorCodeLabelValue(opErr), methodName, filestoreMode).Observe(opDuration.Seconds())
}

func (mm *MetricsManager) RecordKubeAPIMetrics(opErr error, resourceType, opType, opSource string, opDuration time.Duration) {
Expand Down Expand Up @@ -194,6 +197,20 @@ func (mm *MetricsManager) EmitGKEComponentVersion() error {
return nil
}

// errorCodeLabelValue returns the label value for the given operation error.
func errorCodeLabelValue(operationErr error) string {
err := codes.OK.String()
if operationErr != nil {
// If the operationErr is a TemporaryError, unwrap the temporary error before passing it to CodeForError.
var tempErr *common.TemporaryError
if errors.As(operationErr, &tempErr) {
operationErr = tempErr.Unwrap()
}
err = getErrorCode(file.StatusError(operationErr))
}
return err
}

// Server represents any type that could serve HTTP requests for the metrics
// endpoint.
type Server interface {
Expand Down
83 changes: 83 additions & 0 deletions pkg/metrics/metrics_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
package metrics

import (
"context"
"errors"
"fmt"
"net/http"
"testing"

"google.golang.org/api/googleapi"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"github.com/google/go-cmp/cmp"
"sigs.k8s.io/gcp-filestore-csi-driver/pkg/common"
)

const (
Expand All @@ -24,3 +34,76 @@ func TestProcessStartTimeMetricExist(t *testing.T) {

t.Fatalf("Metrics does not contain %v. Scraped content: %v", ProcessStartTimeMetric, metricsFamilies)
}


func TestErrorCodeLabelValue(t *testing.T) {
testCases := []struct {
name string
operationErr error
wantErrorCode string
}{
{
name: "Not googleapi.Error",
operationErr: errors.New("I am not a googleapi.Error"),
wantErrorCode: "Internal",
},
{
name: "User error",
operationErr: &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"},
wantErrorCode: "InvalidArgument",
},
{
name: "googleapi.Error but not a user error",
operationErr: &googleapi.Error{Code: http.StatusInternalServerError, Message: "Internal error"},
wantErrorCode: "Internal",
},
{
name: "context canceled error",
operationErr: context.Canceled,
wantErrorCode: "Canceled",
},
{
name: "context deadline exceeded error",
operationErr: context.DeadlineExceeded,
wantErrorCode: "DeadlineExceeded",
},
{
name: "status error with Aborted error code",
operationErr: status.Error(codes.Aborted, "aborted error"),
wantErrorCode: "Aborted",
},
{
name: "user multiattach error",
operationErr: fmt.Errorf("The disk resource 'projects/foo/disk/bar' is already being used by 'projects/foo/instances/1'"),
wantErrorCode: "Internal",
},
{
name: "TemporaryError that wraps googleapi error",
operationErr: common.NewTemporaryError(codes.Unavailable, &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"}),
wantErrorCode: "InvalidArgument",
},
{
name: "TemporaryError that wraps fmt.Errorf, which wraps googleapi error",
operationErr: common.NewTemporaryError(codes.Aborted, fmt.Errorf("got error: %w", &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"})),
wantErrorCode: "InvalidArgument",
},
{
name: "TemporaryError that wraps status error",
operationErr: common.NewTemporaryError(codes.Aborted, status.Error(codes.InvalidArgument, "User error with bad request")),
wantErrorCode: "InvalidArgument",
},
{
name: "TemporaryError that wraps multiattach error",
operationErr: common.NewTemporaryError(codes.Unavailable, fmt.Errorf("The disk resource 'projects/foo/disk/bar' is already being used by 'projects/foo/instances/1'")),
wantErrorCode: "Internal",
},
}

for _, tc := range testCases {
t.Logf("Running test: %v", tc.name)
errCode := errorCodeLabelValue(tc.operationErr)
if diff := cmp.Diff(tc.wantErrorCode, errCode); diff != "" {
t.Errorf("%s: -want err, +got err\n%s", tc.name, diff)
}
}
}