From d4c44a56836a02f20bd913ff2cdac22583629bb4 Mon Sep 17 00:00:00 2001 From: Timo Beckers Date: Wed, 19 Feb 2025 16:22:43 +0100 Subject: [PATCH 1/3] prog: factor retry decision into separate testable function This commit special-cases LogDisabled into a clause without a retry loop since it doesn't need a buffer allocated. It also simplifies the logic somewhat. A follow-up commit will add another separate clause for a new LogSizeOverride CollectionOption to factor out even more complexity related to buffer sizing. The main purpose of this commit is to extract the decision-making logic around retries. Instead of directly manipulating loop control flow, a function is introduced that issues a retry verdict, and it can be tested indendently of the program load action. Signed-off-by: Timo Beckers --- prog.go | 100 +++++++++++++++++++++++++++++---------------------- prog_test.go | 40 +++++++++++++++++++++ 2 files changed, 97 insertions(+), 43 deletions(-) diff --git a/prog.go b/prog.go index 8fcae6a3f..85adf3b0f 100644 --- a/prog.go +++ b/prog.go @@ -409,59 +409,39 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er } } - // The caller requested a specific verifier log level. Set up the log buffer - // so that there is a chance of loading the program in a single shot. - logSize := internal.Between(opts.LogSizeStart, minVerifierLogSize, maxVerifierLogSize) var logBuf []byte - if !opts.LogDisabled && opts.LogLevel != 0 { - logBuf = make([]byte, logSize) - attr.LogLevel = opts.LogLevel - attr.LogSize = uint32(len(logBuf)) - attr.LogBuf = sys.NewSlicePointer(logBuf) - } + var fd *sys.FD - for { - var fd *sys.FD + if opts.LogDisabled { + // Loading with logging disabled should never retry. fd, err = sys.ProgLoad(attr) if err == nil { - return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil - } - - if opts.LogDisabled { - break + return &Program{"", fd, spec.Name, "", spec.Type}, nil } - - if attr.LogTrueSize != 0 && attr.LogSize >= attr.LogTrueSize { - // The log buffer already has the correct size. - break - } - - if attr.LogSize != 0 && !errors.Is(err, unix.ENOSPC) { - // Logging is enabled and the error is not ENOSPC, so we can infer - // that the log buffer is large enough. - break + } else { + // Only specify log size if log level is also specified. Setting size + // without level results in EINVAL. Level will be bumped to LogLevelBranch + // if the first load fails. + if opts.LogLevel != 0 { + attr.LogLevel = opts.LogLevel + attr.LogSize = internal.Between(opts.LogSizeStart, minVerifierLogSize, maxVerifierLogSize) } - if attr.LogLevel == 0 { - // Logging is not enabled but loading the program failed. Enable - // basic logging. - attr.LogLevel = LogLevelBranch - } + for { + if attr.LogLevel != 0 { + logBuf = make([]byte, attr.LogSize) + attr.LogBuf = sys.NewSlicePointer(logBuf) + } - // Make an educated guess how large the buffer should be by multiplying. - // Ensure the size doesn't overflow. - const factor = 2 - logSize = internal.Between(logSize, minVerifierLogSize, maxVerifierLogSize/factor) - logSize *= factor + fd, err = sys.ProgLoad(attr) + if err == nil { + return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil + } - if attr.LogTrueSize != 0 { - // The kernel has given us a hint how large the log buffer has to be. - logSize = attr.LogTrueSize + if !retryLogAttrs(attr, opts.LogSizeStart, err) { + break + } } - - logBuf = make([]byte, logSize) - attr.LogSize = logSize - attr.LogBuf = sys.NewSlicePointer(logBuf) } end := bytes.IndexByte(logBuf, 0) @@ -511,6 +491,40 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er return nil, internal.ErrorWithLog("load program", err, logBuf) } +func retryLogAttrs(attr *sys.ProgLoadAttr, startSize uint32, err error) bool { + // ENOSPC means the log was enabled on the previous iteration, so we only + // need to grow the buffer. + if errors.Is(err, unix.ENOSPC) { + if attr.LogTrueSize != 0 { + // Kernel supports LogTrueSize and previous iteration undershot the buffer + // size. Try again with the given true size. + attr.LogSize = attr.LogTrueSize + return true + } + + // ENOSPC means we've loaded the program before and the log buffer was too + // small. Make an educated guess how large the buffer should be by + // multiplying. Ensure the size doesn't overflow. + const factor = 2 + attr.LogSize = internal.Between(attr.LogSize, minVerifierLogSize, maxVerifierLogSize/factor) + attr.LogSize *= factor + + return true + } + + if attr.LogLevel == 0 { + // Loading the program failed, it wasn't a buffer-related error, and the log + // was disabled the previous iteration. Enable basic logging and retry. + attr.LogLevel = LogLevelBranch + attr.LogSize = internal.Between(startSize, minVerifierLogSize, maxVerifierLogSize) + return true + } + + // Loading the program failed for a reason other than buffer size and the log + // was already enabled the previous iteration. Don't retry. + return false +} + // NewProgramFromFD creates a program from a raw fd. // // You should not use fd after calling this function. diff --git a/prog_test.go b/prog_test.go index 7781a0ca6..e1877b8a1 100644 --- a/prog_test.go +++ b/prog_test.go @@ -544,6 +544,46 @@ func TestProgramVerifierLog(t *testing.T) { prog.Close() } +func TestProgramVerifierLogRetry(t *testing.T) { + someError := errors.New("not a buffer error") + + t.Run("retry with oversized buffer", func(t *testing.T) { + // First load failure, without logging enabled. Retry with logging enabled. + attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0} + qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError))) + qt.Assert(t, qt.Equals(attr.LogLevel, LogLevelBranch)) + qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize)) + + // Second failure with logging enabled. No buffer error, don't retry. + qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, someError))) + qt.Assert(t, qt.Equals(attr.LogLevel, LogLevelBranch)) + qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize)) + }) + + t.Run("retry with undersized buffer, no log_true_size", func(t *testing.T) { + // First load failure, without logging enabled. Retry with larger buffer. + attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0} + qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError))) + + // Second failure, this time the kernel signals an undersized buffer. Retry + // with double the size. + qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, unix.ENOSPC))) + qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize*2)) + }) + + t.Run("retry with undersized buffer, with log_true_size", func(t *testing.T) { + // First load failure, without logging enabled. Retry with larger buffer. + attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0} + qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError))) + + // Second failure, the kernel signals undersized buffer and also sets + // log_true_size. Retry with the exact size required. + attr.LogTrueSize = 123 + qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, unix.ENOSPC))) + qt.Assert(t, qt.Equals(attr.LogSize, 123)) + }) +} + func TestProgramWithUnsatisfiedMap(t *testing.T) { coll, err := LoadCollectionSpec("testdata/loader-el.elf") if err != nil { From 9e5357d5fb222906d24afeb560dc9b36d8ad4011 Mon Sep 17 00:00:00 2001 From: Timo Beckers Date: Wed, 19 Feb 2025 16:55:36 +0100 Subject: [PATCH 2/3] prog: give up when max verifier log size is reached Previously, loading a program would loop forever if the log didn't fit in the kernel's max log size. Signed-off-by: Timo Beckers --- prog.go | 16 ++++++++++++++-- prog_test.go | 24 ++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/prog.go b/prog.go index 85adf3b0f..0e8f2d41b 100644 --- a/prog.go +++ b/prog.go @@ -503,9 +503,21 @@ func retryLogAttrs(attr *sys.ProgLoadAttr, startSize uint32, err error) bool { } // ENOSPC means we've loaded the program before and the log buffer was too - // small. Make an educated guess how large the buffer should be by - // multiplying. Ensure the size doesn't overflow. + // small. + if attr.LogSize == maxVerifierLogSize { + // Maximum buffer size reached, don't grow or retry. + return false + } + + // Ensure the size doesn't overflow. const factor = 2 + if attr.LogSize >= maxVerifierLogSize/factor { + attr.LogSize = maxVerifierLogSize + return true + } + + // Make an educated guess how large the buffer should be by multiplying. Due + // to int division, this rounds down odd sizes. attr.LogSize = internal.Between(attr.LogSize, minVerifierLogSize, maxVerifierLogSize/factor) attr.LogSize *= factor diff --git a/prog_test.go b/prog_test.go index e1877b8a1..8b2b3bfaf 100644 --- a/prog_test.go +++ b/prog_test.go @@ -582,6 +582,30 @@ func TestProgramVerifierLogRetry(t *testing.T) { qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, unix.ENOSPC))) qt.Assert(t, qt.Equals(attr.LogSize, 123)) }) + + t.Run("grow to maximum buffer size", func(t *testing.T) { + // Previous loads pushed the log size to (or above) half of the maximum, + // which would make it overflow on the next retry. Make sure the log size + // actually hits the maximum so we can bail out. + attr := &sys.ProgLoadAttr{LogLevel: LogLevelBranch, LogSize: maxVerifierLogSize / 2} + qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, unix.ENOSPC))) + qt.Assert(t, qt.Equals(attr.LogSize, maxVerifierLogSize)) + + // Don't retry if the buffer is already at the maximum size. + qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, unix.ENOSPC))) + }) + + t.Run("start at maximum buffer size", func(t *testing.T) { + // The user requested a log buffer exceeding the maximum size, but no log + // level. Retry with the maximum size and default log level. + attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0} + qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, math.MaxUint32, unix.EINVAL))) + qt.Assert(t, qt.Equals(attr.LogLevel, LogLevelBranch)) + qt.Assert(t, qt.Equals(attr.LogSize, maxVerifierLogSize)) + + // Log still doesn't fit maximum-size buffer. Don't retry. + qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, unix.ENOSPC))) + }) } func TestProgramWithUnsatisfiedMap(t *testing.T) { From fbe9df5b62869e8f83d1fe81a07d4b6a1beb56ea Mon Sep 17 00:00:00 2001 From: Timo Beckers Date: Wed, 19 Feb 2025 17:11:04 +0100 Subject: [PATCH 3/3] prog: prevent prog_load loop of death To limit the damage of any future regressions, limit the attempts we'll take to load a program. Signed-off-by: Timo Beckers --- prog.go | 12 ++++++++++++ prog_test.go | 11 +++++++++++ 2 files changed, 23 insertions(+) diff --git a/prog.go b/prog.go index 0e8f2d41b..91ce7c90c 100644 --- a/prog.go +++ b/prog.go @@ -56,6 +56,12 @@ const minVerifierLogSize = 64 * 1024 // future, but avoid the unnecessary EINVAL for now. const maxVerifierLogSize = math.MaxUint32 >> 2 +// maxVerifierAttempts is the maximum number of times the verifier will retry +// loading a program with a growing log buffer before giving up. Since we double +// the log size on every attempt, this is the absolute maximum number of +// attempts before the buffer reaches [maxVerifierLogSize]. +const maxVerifierAttempts = 30 + // ProgramOptions control loading a program into the kernel. type ProgramOptions struct { // Bitmap controlling the detail emitted by the kernel's eBPF verifier log. @@ -427,6 +433,7 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er attr.LogSize = internal.Between(opts.LogSizeStart, minVerifierLogSize, maxVerifierLogSize) } + attempts := 1 for { if attr.LogLevel != 0 { logBuf = make([]byte, attr.LogSize) @@ -441,6 +448,11 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er if !retryLogAttrs(attr, opts.LogSizeStart, err) { break } + + if attempts >= maxVerifierAttempts { + return nil, fmt.Errorf("load program: %w (bug: hit %d verifier attempts)", err, maxVerifierAttempts) + } + attempts++ } } diff --git a/prog_test.go b/prog_test.go index 8b2b3bfaf..9ce7e83fe 100644 --- a/prog_test.go +++ b/prog_test.go @@ -606,6 +606,17 @@ func TestProgramVerifierLogRetry(t *testing.T) { // Log still doesn't fit maximum-size buffer. Don't retry. qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, unix.ENOSPC))) }) + + t.Run("ensure growth terminates within max attempts", func(t *testing.T) { + attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0} + var terminated bool + for i := 1; i <= maxVerifierAttempts; i++ { + if !retryLogAttrs(attr, 0, syscall.ENOSPC) { + terminated = true + } + } + qt.Assert(t, qt.IsTrue(terminated)) + }) } func TestProgramWithUnsatisfiedMap(t *testing.T) {