From 238996c8765c0a5e53602556613fb1e6981c8da9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Carlos=20Ch=C3=A1vez?= Date: Thu, 16 Nov 2023 11:13:16 +0100 Subject: [PATCH] feat: uses memoization to decrease memory consumption. --- internal/memoize/README.md | 19 ++++ internal/memoize/noop.go | 10 ++ internal/memoize/nosync.go | 36 +++++++ internal/memoize/nosync_test.go | 167 +++++++++++++++++++++++++++++++ internal/memoize/sync.go | 47 +++++++++ internal/memoize/sync_test.go | 169 ++++++++++++++++++++++++++++++++ pm.go | 5 +- rx.go | 9 +- 8 files changed, 457 insertions(+), 5 deletions(-) create mode 100644 internal/memoize/README.md create mode 100644 internal/memoize/noop.go create mode 100644 internal/memoize/nosync.go create mode 100644 internal/memoize/nosync_test.go create mode 100644 internal/memoize/sync.go create mode 100644 internal/memoize/sync_test.go diff --git a/internal/memoize/README.md b/internal/memoize/README.md new file mode 100644 index 0000000..242d922 --- /dev/null +++ b/internal/memoize/README.md @@ -0,0 +1,19 @@ +# Memoize + +**IMPORTANT: This package is copied from ** + +Memoize allows to cache certain expensive function calls and +cache the result. The main advantage in Coraza is to memoize +the regexes and aho-corasick dictionaries when the connects +spins up more than one WAF in the same process and hence same +regexes are being compiled over and over. + +Currently it is opt-in under the `memoize_builders` build tag +as under a misuse (e.g. using after build time) it could lead +to a memory leak as currently the cache is global. + +**Important:** Connectors with *live reload* functionality (e.g. Caddy) +could lead to memory leaks which might or might not be negligible in +most of the cases as usually config changes in a WAF are about a few +rules, this is old objects will be still alive in memory until the program +stops. diff --git a/internal/memoize/noop.go b/internal/memoize/noop.go new file mode 100644 index 0000000..959408a --- /dev/null +++ b/internal/memoize/noop.go @@ -0,0 +1,10 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build !memoize_builders + +package memoize + +func Do(_ string, fn func() (interface{}, error)) (interface{}, error) { + return fn() +} diff --git a/internal/memoize/nosync.go b/internal/memoize/nosync.go new file mode 100644 index 0000000..d238244 --- /dev/null +++ b/internal/memoize/nosync.go @@ -0,0 +1,36 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build tinygo && memoize_builders + +package memoize + +import "sync" + +var doer = makeDoer(new(sync.Map)) + +// Do executes and returns the results of the given function, unless there was a cached +// value of the same key. Only one execution is in-flight for a given key at a time. +// The boolean return value indicates whether v was previously stored. +func Do(key string, fn func() (interface{}, error)) (interface{}, error) { + value, err, _ := doer(key, fn) + return value, err +} + +// makeDoer returns a function that executes and returns the results of the given function +func makeDoer(cache *sync.Map) func(string, func() (interface{}, error)) (interface{}, error, bool) { + return func(key string, fn func() (interface{}, error)) (interface{}, error, bool) { + // Check cache + value, found := cache.Load(key) + if found { + return value, nil, true + } + + data, err := fn() + if err == nil { + cache.Store(key, data) + } + + return data, err, false + } +} diff --git a/internal/memoize/nosync_test.go b/internal/memoize/nosync_test.go new file mode 100644 index 0000000..c942a52 --- /dev/null +++ b/internal/memoize/nosync_test.go @@ -0,0 +1,167 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build tinygo && memoize_builders + +// https://github.com/kofalt/go-memoize/blob/master/memoize.go + +package memoize + +import ( + "errors" + "sync" + "testing" +) + +func TestDo(t *testing.T) { + expensiveCalls := 0 + + // Function tracks how many times its been called + expensive := func() (interface{}, error) { + expensiveCalls++ + return expensiveCalls, nil + } + + // First call SHOULD NOT be cached + result, err := Do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + // Second call on same key SHOULD be cached + result, err = Do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + // First call on a new key SHOULD NOT be cached + result, err = Do("key2", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } +} + +func TestSuccessCall(t *testing.T) { + do := makeDoer(new(sync.Map)) + + expensiveCalls := 0 + + // Function tracks how many times its been called + expensive := func() (interface{}, error) { + expensiveCalls++ + return expensiveCalls, nil + } + + // First call SHOULD NOT be cached + result, err, cached := do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Second call on same key SHOULD be cached + result, err, cached = do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := true, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // First call on a new key SHOULD NOT be cached + result, err, cached = do("key2", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } +} + +func TestFailedCall(t *testing.T) { + do := makeDoer(new(sync.Map)) + + calls := 0 + + // This function will fail IFF it has not been called before. + twoForTheMoney := func() (interface{}, error) { + calls++ + + if calls == 1 { + return calls, errors.New("Try again") + } else { + return calls, nil + } + } + + // First call should fail, and not be cached + result, err, cached := do("key1", twoForTheMoney) + if err == nil { + t.Fatalf("expected error") + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Second call should succeed, and not be cached + result, err, cached = do("key1", twoForTheMoney) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Third call should succeed, and be cached + result, err, cached = do("key1", twoForTheMoney) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := true, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } +} diff --git a/internal/memoize/sync.go b/internal/memoize/sync.go new file mode 100644 index 0000000..a8d5c96 --- /dev/null +++ b/internal/memoize/sync.go @@ -0,0 +1,47 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build !tinygo && memoize_builders + +// https://github.com/kofalt/go-memoize/blob/master/memoize.go + +package memoize + +import ( + "sync" + + "golang.org/x/sync/singleflight" +) + +var doer = makeDoer(new(sync.Map), new(singleflight.Group)) + +// Do executes and returns the results of the given function, unless there was a cached +// value of the same key. Only one execution is in-flight for a given key at a time. +// The boolean return value indicates whether v was previously stored. +func Do(key string, fn func() (interface{}, error)) (interface{}, error) { + value, err, _ := doer(key, fn) + return value, err +} + +// makeDoer returns a function that executes and returns the results of the given function +func makeDoer(cache *sync.Map, group *singleflight.Group) func(string, func() (interface{}, error)) (interface{}, error, bool) { + return func(key string, fn func() (interface{}, error)) (interface{}, error, bool) { + // Check cache + value, found := cache.Load(key) + if found { + return value, nil, true + } + + // Combine memoized function with a cache store + value, err, _ := group.Do(key, func() (interface{}, error) { + data, innerErr := fn() + if innerErr == nil { + cache.Store(key, data) + } + + return data, innerErr + }) + + return value, err, false + } +} diff --git a/internal/memoize/sync_test.go b/internal/memoize/sync_test.go new file mode 100644 index 0000000..d995d1d --- /dev/null +++ b/internal/memoize/sync_test.go @@ -0,0 +1,169 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build !tinygo && memoize_builders + +// https://github.com/kofalt/go-memoize/blob/master/memoize.go + +package memoize + +import ( + "errors" + "sync" + "testing" + + "golang.org/x/sync/singleflight" +) + +func TestDo(t *testing.T) { + expensiveCalls := 0 + + // Function tracks how many times its been called + expensive := func() (interface{}, error) { + expensiveCalls++ + return expensiveCalls, nil + } + + // First call SHOULD NOT be cached + result, err := Do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + // Second call on same key SHOULD be cached + result, err = Do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + // First call on a new key SHOULD NOT be cached + result, err = Do("key2", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } +} + +func TestSuccessCall(t *testing.T) { + do := makeDoer(new(sync.Map), &singleflight.Group{}) + + expensiveCalls := 0 + + // Function tracks how many times its been called + expensive := func() (interface{}, error) { + expensiveCalls++ + return expensiveCalls, nil + } + + // First call SHOULD NOT be cached + result, err, cached := do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Second call on same key SHOULD be cached + result, err, cached = do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := true, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // First call on a new key SHOULD NOT be cached + result, err, cached = do("key2", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } +} + +func TestFailedCall(t *testing.T) { + do := makeDoer(new(sync.Map), &singleflight.Group{}) + + calls := 0 + + // This function will fail IFF it has not been called before. + twoForTheMoney := func() (interface{}, error) { + calls++ + + if calls == 1 { + return calls, errors.New("Try again") + } else { + return calls, nil + } + } + + // First call should fail, and not be cached + result, err, cached := do("key1", twoForTheMoney) + if err == nil { + t.Fatalf("expected error") + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Second call should succeed, and not be cached + result, err, cached = do("key1", twoForTheMoney) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Third call should succeed, and be cached + result, err, cached = do("key1", twoForTheMoney) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := true, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } +} diff --git a/pm.go b/pm.go index a8ba5c3..c2deade 100644 --- a/pm.go +++ b/pm.go @@ -12,6 +12,7 @@ import ( "path" "strings" + "github.com/corazawaf/coraza-wasilibs/internal/memoize" "github.com/corazawaf/coraza/v3/experimental/plugins" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" ahocorasick "github.com/wasilibs/go-aho-corasick" @@ -35,8 +36,10 @@ func newPM(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { DFA: true, }) + m, _ := memoize.Do(data, func() (interface{}, error) { return builder.Build(dict), nil }) + // TODO this operator is supposed to support snort data syntax: "@pm A|42|C|44|F" - return &pm{matcher: builder.Build(dict)}, nil + return &pm{matcher: m.(ahocorasick.AhoCorasick)}, nil } func (o *pm) Evaluate(tx plugintypes.TransactionState, value string) bool { diff --git a/rx.go b/rx.go index c53a33c..bad99f5 100644 --- a/rx.go +++ b/rx.go @@ -8,6 +8,7 @@ import ( "strconv" "unicode/utf8" + "github.com/corazawaf/coraza-wasilibs/internal/memoize" "github.com/corazawaf/coraza/v3/experimental/plugins" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/wasilibs/go-re2" @@ -26,18 +27,18 @@ func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { // - https://groups.google.com/g/golang-nuts/c/jiVdamGFU9E data := fmt.Sprintf("(?sm)%s", options.Arguments) - var re *re2.Regexp + var re interface{} var err error if matchesArbitraryBytes(data) { - re, err = experimental.CompileLatin1(data) + re, err = memoize.Do(data, func() (interface{}, error) { return experimental.CompileLatin1(data) }) } else { - re, err = re2.Compile(data) + re, err = memoize.Do(data, func() (interface{}, error) { return re2.Compile(data) }) } if err != nil { return nil, err } - return &rx{re: re}, nil + return &rx{re: re.(*re2.Regexp)}, nil } func (o *rx) Evaluate(tx plugintypes.TransactionState, value string) bool {