Skip to content

Commit

Permalink
Merge branch 'main' into mutex_to_sync_map
Browse files Browse the repository at this point in the history
  • Loading branch information
piyushroshan authored Nov 21, 2024
2 parents a79c5ae + 244ba00 commit c0d3b05
Show file tree
Hide file tree
Showing 14 changed files with 220 additions and 31 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4

- name: Initialize CodeQL
uses: github/codeql-action/init@396bb3e45325a47dd9ef434068033c6d5bb0d11a # v3
uses: github/codeql-action/init@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3
with:
languages: go

- name: Autobuild
uses: github/codeql-action/autobuild@396bb3e45325a47dd9ef434068033c6d5bb0d11a # v3
uses: github/codeql-action/autobuild@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@396bb3e45325a47dd9ef434068033c6d5bb0d11a # v3
uses: github/codeql-action/analyze@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3
8 changes: 4 additions & 4 deletions .github/workflows/regression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,28 +48,28 @@ jobs:
export BUILD_TAGS=${{ matrix.build-flag }}
go run mage.go coverage
- name: "Codecov: General"
uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 # v4
uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a # v5
if: ${{ matrix.go-version == '1.22.x' }}
with:
files: build/coverage.txt
flags: default,${{ matrix.build-flag }}
token: ${{ secrets.CODECOV_TOKEN }}
- name: "Codecov: Examples"
uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 # v4
uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a # v5
if: ${{ matrix.go-version == '1.22.x' }}
with:
files: build/coverage-examples.txt
flags: examples+${{ matrix.build-flag }}
token: ${{ secrets.CODECOV_TOKEN }}
- name: "Codecov: FTW"
uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 # v4
uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a # v5
if: ${{ matrix.go-version == '1.22.x' }}
with:
files: build/coverage-ftw.txt
flags: ftw,${{ matrix.build-flag }}
token: ${{ secrets.CODECOV_TOKEN }}
- name: "Codecov: Tinygo"
uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 # v4
uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a # v5
# only if coverage-tinygo.txt exists
if: ${{ matrix.go-version == '1.22.x' && hashFiles('build/coverage-tinygo.txt') != '' }}
with:
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ dictionaries to reduce memory consumption in deployments that launch several cor
instances. For more context check [this issue](https://github.com/corazawaf/coraza-caddy/issues/76)
* `no_fs_access` - indicates that the target environment has no access to FS in order to not leverage OS' filesystem related functionality e.g. file body buffers.
* `coraza.rule.case_sensitive_args_keys` - enables case-sensitive matching for ARGS keys, aligning Coraza behavior with RFC 3986 specification. It will be enabled by default in the next major version.
* `coraza.rule.no_regex_multiline` - disables enabling by default regexes multiline modifiers in `@rx` operator. It aligns with CRS expected behavior, reduces false positives and might improve performances. No multiline regexes by default will be enabled in the next major version. For more context check [this PR](https://github.com/corazawaf/coraza/pull/876)

## E2E Testing

Expand Down
26 changes: 18 additions & 8 deletions internal/collections/map.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,15 @@ func (c *Map) Get(key string) []string {
if !c.isCaseSensitive {
key = strings.ToLower(key)
}
var values []string
for _, a := range c.data[key] {
values = append(values, a.value)
values := c.data[key]
if len(values) == 0 {
return nil
}
result := make([]string, len(values))
for i, v := range values {
result[i] = v.value
}
return values
return result
}

// FindRegex returns all map elements whose key matches the regular expression.
Expand Down Expand Up @@ -120,16 +124,22 @@ func (c *Map) Add(key string, value string) {
c.data[key] = append(c.data[key], aVal)
}

// Set sets the value of a key with the array of strings passed. If the key already exists, it will be overwritten.
// Sets the value of a key with the array of strings passed. If the key already exists, it will be overwritten.
func (c *Map) Set(key string, values []string) {
originalKey := key
if !c.isCaseSensitive {
key = strings.ToLower(key)
}
c.data[key] = make([]keyValue, 0, len(values))
for _, v := range values {
c.data[key] = append(c.data[key], keyValue{key: originalKey, value: v})
dataSlice, exists := c.data[key]
if !exists || cap(dataSlice) < len(values) {
dataSlice = make([]keyValue, len(values))
} else {
dataSlice = dataSlice[:len(values)] // Reuse existing slice with the same length
}
for i, v := range values {
dataSlice[i] = keyValue{key: originalKey, value: v}
}
c.data[key] = dataSlice
}

// SetIndex sets the value of a key at the specified index. If the key already exists, it will be overwritten.
Expand Down
22 changes: 22 additions & 0 deletions internal/collections/map_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,25 @@ func TestNewCaseSensitiveKeyMap(t *testing.T) {
}

}

func BenchmarkTxSetGet(b *testing.B) {
keys := make(map[int]string, b.N)
for i := 0; i < b.N; i++ {
keys[i] = fmt.Sprintf("key%d", i)
}
c := NewCaseSensitiveKeyMap(variables.RequestHeaders)

b.Run("Set", func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
c.Set(keys[i], []string{"value2"})
}
})
b.Run("Get", func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
c.Get(keys[i])
}
})
b.ReportAllocs()
}
8 changes: 4 additions & 4 deletions internal/collections/named.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ func (c *NamedCollection) Len() int {

// Data is an internal method used for serializing to JSON
func (c *NamedCollection) Data() map[string][]string {
result := map[string][]string{}
result := make(map[string][]string, len(c.data))
for k, v := range c.data {
result[k] = make([]string, 0, len(v))
for _, a := range v {
result[k] = append(result[k], a.value)
result[k] = make([]string, len(v))
for i, a := range v {
result[k][i] = a.value
}
}
return result
Expand Down
12 changes: 6 additions & 6 deletions internal/corazarules/rule_match.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,27 +33,27 @@ type MatchData struct {

var _ types.MatchData = (*MatchData)(nil)

func (m *MatchData) Variable() variables.RuleVariable {
func (m MatchData) Variable() variables.RuleVariable {
return m.Variable_
}

func (m *MatchData) Key() string {
func (m MatchData) Key() string {
return m.Key_
}

func (m *MatchData) Value() string {
func (m MatchData) Value() string {
return m.Value_
}

func (m *MatchData) Message() string {
func (m MatchData) Message() string {
return m.Message_
}

func (m *MatchData) Data() string {
func (m MatchData) Data() string {
return m.Data_
}

func (m *MatchData) ChainLevel() int {
func (m MatchData) ChainLevel() int {
return m.ChainLevel_
}

Expand Down
8 changes: 8 additions & 0 deletions internal/operators/multilineregex.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Copyright 2024 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

//go:build coraza.rule.no_regex_multiline

package operators

var shouldNotUseMultilineRegexesOperatorByDefault = true
8 changes: 8 additions & 0 deletions internal/operators/multilineregex_default.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Copyright 2024 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

//go:build !coraza.rule.no_regex_multiline

package operators

var shouldNotUseMultilineRegexesOperatorByDefault = false
17 changes: 13 additions & 4 deletions internal/operators/rx.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,19 @@ type rx struct {
var _ plugintypes.Operator = (*rx)(nil)

func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) {
// (?sm) enables multiline and dotall mode, required by some CRS rules and matching ModSec behavior, see
// - https://stackoverflow.com/a/27680233
// - https://groups.google.com/g/golang-nuts/c/jiVdamGFU9E
data := fmt.Sprintf("(?sm)%s", options.Arguments)
var data string
if shouldNotUseMultilineRegexesOperatorByDefault {
// (?s) enables dotall mode, required by some CRS rules and matching ModSec behavior, see
// - https://github.com/google/re2/wiki/Syntax
// - Flag usage: https://groups.google.com/g/golang-nuts/c/jiVdamGFU9E
data = fmt.Sprintf("(?s)%s", options.Arguments)
} else {
// TODO: deprecate multiline modifier set by default in Coraza v4
// CRS rules will explicitly set the multiline modifier when needed
// Having it enabled by default can lead to false positives and less performance
// See https://github.com/corazawaf/coraza/pull/876
data = fmt.Sprintf("(?sm)%s", options.Arguments)
}

if matchesArbitraryBytes(data) {
// Use binary regex matcher if expression matches non-utf8 bytes. The binary matcher does
Expand Down
119 changes: 119 additions & 0 deletions internal/operators/rx_no_multiline_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

//go:build coraza.rule.no_regex_multiline

package operators

import (
"fmt"
"testing"

"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
"github.com/corazawaf/coraza/v3/internal/corazawaf"
)

func TestRx(t *testing.T) {
tests := []struct {
pattern string
input string
want bool
}{
{
pattern: "som(.*)ta",
input: "somedata",
want: true,
},
{
pattern: "som(.*)ta",
input: "notdata",
want: false,
},
{
pattern: "ハロー",
input: "ハローワールド",
want: true,
},
{
pattern: "ハロー",
input: "グッバイワールド",
want: false,
},
{
pattern: `\xac\xed\x00\x05`,
input: "\xac\xed\x00\x05t\x00\x04test",
want: true,
},
{
pattern: `\xac\xed\x00\x05`,
input: "\xac\xed\x00t\x00\x04test",
want: false,
},
{
// Requires dotall
pattern: `hello.*world`,
input: "hello\nworld",
want: true,
},
{
// Requires multiline disabled by default
pattern: `^hello.*world`,
input: "test\nhello\nworld",
want: false,
},
{
// Makes sure multiline can be enabled by the user
pattern: `(?m)^hello.*world`,
input: "test\nhello\nworld",
want: true,
},
{
// Makes sure, (?s) passed by the user does not
// break the regex.
pattern: `(?s)hello.*world`,
input: "hello\nworld",
want: true,
},
{
// Make sure user flags are also applied
pattern: `(?i)hello.*world`,
input: "testHELLO\nworld",
want: true,
},
{
// The so called DOLLAR_ENDONLY modifier in PCRE2 is meant to tweak the meaning of dollar '$'
// so that it matches only at the very end of the string (see: https://www.pcre.org/current/doc/html/pcre2pattern.html#SEC6)
// It seems that re2 already behaves like that by default.
pattern: `123$`,
input: "123\n",
want: false,
},
{
// Dollar endonly match
pattern: `123$`,
input: "test123",
want: true,
},
}

for _, tc := range tests {
tt := tc
t.Run(fmt.Sprintf("%s/%s", tt.pattern, tt.input), func(t *testing.T) {

opts := plugintypes.OperatorOptions{
Arguments: tt.pattern,
}
rx, err := newRX(opts)
if err != nil {
t.Error(err)
}
waf := corazawaf.NewWAF()
tx := waf.NewTransaction()
tx.Capture = true
res := rx.Evaluate(tx, tt.input)
if res != tt.want {
t.Errorf("want %v, got %v", tt.want, res)
}
})
}
}
2 changes: 2 additions & 0 deletions internal/operators/rx_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

//go:build !coraza.rule.no_regex_multiline

package operators

import (
Expand Down
12 changes: 11 additions & 1 deletion magefile.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,15 @@ func Test() error {
return err
}

// Execute FTW tests with coraza.rule.no_regex_multiline as well
if err := sh.RunV("go", "test", "-tags=coraza.rule.no_regex_multiline", "./testing/coreruleset"); err != nil {
return err
}

if err := sh.RunV("go", "test", "-tags=coraza.rule.no_regex_multiline", "-run=^TestRx", "./..."); err != nil {
return err
}

if err := sh.RunV("go", "test", "-tags=coraza.rule.case_sensitive_args_keys", "-run=^TestCaseSensitive", "./..."); err != nil {
return err
}
Expand Down Expand Up @@ -174,7 +183,7 @@ func Coverage() error {
if err := sh.RunV("go", "test", tagsCmd, "-coverprofile=build/coverage-ftw.txt", "-covermode=atomic", "-coverpkg=./...", "./testing/coreruleset"); err != nil {
return err
}
// we run tinygo tag only if memoize_builders is is not enabled
// we run tinygo tag only if memoize_builders is not enabled
if !strings.Contains(tags, "memoize_builders") {
if tagsCmd != "" {
tagsCmd += ",tinygo"
Expand Down Expand Up @@ -271,6 +280,7 @@ func combinations(tags []string) []string {
func TagsMatrix() error {
tags := []string{
"coraza.rule.case_sensitive_args_keys",
"coraza.rule.no_regex_multiline",
"memoize_builders",
"coraza.rule.multiphase_valuation",
"no_fs_access",
Expand Down
2 changes: 1 addition & 1 deletion testing/coreruleset/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.22.3
require (
github.com/bmatcuk/doublestar/v4 v4.7.1
github.com/corazawaf/coraza-coreruleset/v4 v4.7.0
github.com/corazawaf/coraza/v3 v3.2.1
github.com/corazawaf/coraza/v3 v3.2.2
github.com/coreruleset/albedo v0.0.16
github.com/coreruleset/go-ftw v1.1.1
github.com/rs/zerolog v1.33.0
Expand Down

0 comments on commit c0d3b05

Please sign in to comment.