diff --git a/caddytest/integration/caddyfile_adapt/log_multiple_regexp_filters.caddyfiletest b/caddytest/integration/caddyfile_adapt/log_multiple_regexp_filters.caddyfiletest new file mode 100644 index 00000000000..c228c681265 --- /dev/null +++ b/caddytest/integration/caddyfile_adapt/log_multiple_regexp_filters.caddyfiletest @@ -0,0 +1,95 @@ +:80 + +log { + output stdout + format filter { + wrap console + + # Multiple regexp filters for the same field - this should work now! + request>headers>Authorization regexp "Bearer\s+([A-Za-z0-9_-]+)" "Bearer [REDACTED]" + request>headers>Authorization regexp "Basic\s+([A-Za-z0-9+/=]+)" "Basic [REDACTED]" + request>headers>Authorization regexp "token=([^&\s]+)" "token=[REDACTED]" + + # Single regexp filter - this should continue to work as before + request>headers>Cookie regexp "sessionid=[^;]+" "sessionid=[REDACTED]" + + # Mixed filters (non-regexp) - these should work normally + request>headers>Server delete + request>remote_ip ip_mask { + ipv4 24 + ipv6 32 + } + } +} +---------- +{ + "logging": { + "logs": { + "default": { + "exclude": [ + "http.log.access.log0" + ] + }, + "log0": { + "writer": { + "output": "stdout" + }, + "encoder": { + "fields": { + "request\u003eheaders\u003eAuthorization": { + "filter": "multi_regexp", + "operations": [ + { + "regexp": "Bearer\\s+([A-Za-z0-9_-]+)", + "value": "Bearer [REDACTED]" + }, + { + "regexp": "Basic\\s+([A-Za-z0-9+/=]+)", + "value": "Basic [REDACTED]" + }, + { + "regexp": "token=([^\u0026\\s]+)", + "value": "token=[REDACTED]" + } + ] + }, + "request\u003eheaders\u003eCookie": { + "filter": "regexp", + "regexp": "sessionid=[^;]+", + "value": "sessionid=[REDACTED]" + }, + "request\u003eheaders\u003eServer": { + "filter": "delete" + }, + "request\u003eremote_ip": { + "filter": "ip_mask", + "ipv4_cidr": 24, + "ipv6_cidr": 32 + } + }, + "format": "filter", + "wrap": { + "format": "console" + } + }, + "include": [ + "http.log.access.log0" + ] + } + } + }, + "apps": { + "http": { + "servers": { + "srv0": { + "listen": [ + ":80" + ], + "logs": { + "default_logger_name": "log0" + } + } + } + } + } +} \ No newline at end of file diff --git a/modules/logging/filterencoder.go b/modules/logging/filterencoder.go index c46df0788bf..01333e1951f 100644 --- a/modules/logging/filterencoder.go +++ b/modules/logging/filterencoder.go @@ -152,6 +152,9 @@ func (fe *FilterEncoder) ConfigureDefaultFormat(wo caddy.WriterOpener) error { func (fe *FilterEncoder) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { d.Next() // consume encoder name + // Track regexp filters for automatic merging + regexpFilters := make(map[string][]*RegexpFilter) + // parse a field parseField := func() error { if fe.FieldsRaw == nil { @@ -171,6 +174,23 @@ func (fe *FilterEncoder) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { if !ok { return d.Errf("module %s (%T) is not a logging.LogFieldFilter", moduleID, unm) } + + // Special handling for regexp filters to support multiple instances + if regexpFilter, isRegexp := filter.(*RegexpFilter); isRegexp { + regexpFilters[field] = append(regexpFilters[field], regexpFilter) + return nil // Don't set FieldsRaw yet, we'll merge them later + } + + // Check if we're trying to add a non-regexp filter to a field that already has regexp filters + if _, hasRegexpFilters := regexpFilters[field]; hasRegexpFilters { + return d.Errf("cannot mix regexp filters with other filter types for field %s", field) + } + + // Check if field already has a filter and it's not regexp-related + if _, exists := fe.FieldsRaw[field]; exists { + return d.Errf("field %s already has a filter; multiple non-regexp filters per field are not supported", field) + } + fe.FieldsRaw[field] = caddyconfig.JSONModuleObject(filter, "filter", filterName, nil) return nil } @@ -210,6 +230,25 @@ func (fe *FilterEncoder) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { } } } + + // After parsing all fields, merge multiple regexp filters into MultiRegexpFilter + for field, filters := range regexpFilters { + if len(filters) == 1 { + // Single regexp filter, use the original RegexpFilter + fe.FieldsRaw[field] = caddyconfig.JSONModuleObject(filters[0], "filter", "regexp", nil) + } else { + // Multiple regexp filters, merge into MultiRegexpFilter + multiFilter := &MultiRegexpFilter{} + for _, regexpFilter := range filters { + err := multiFilter.AddOperation(regexpFilter.RawRegexp, regexpFilter.Value) + if err != nil { + return fmt.Errorf("adding regexp operation for field %s: %v", field, err) + } + } + fe.FieldsRaw[field] = caddyconfig.JSONModuleObject(multiFilter, "filter", "multi_regexp", nil) + } + } + return nil } diff --git a/modules/logging/filters.go b/modules/logging/filters.go index 4c74bb95b4b..a2ce6502fd5 100644 --- a/modules/logging/filters.go +++ b/modules/logging/filters.go @@ -41,6 +41,7 @@ func init() { caddy.RegisterModule(CookieFilter{}) caddy.RegisterModule(RegexpFilter{}) caddy.RegisterModule(RenameFilter{}) + caddy.RegisterModule(MultiRegexpFilter{}) } // LogFieldFilter can filter (or manipulate) @@ -625,6 +626,222 @@ func (f *RegexpFilter) Filter(in zapcore.Field) zapcore.Field { return in } +// regexpFilterOperation represents a single regexp operation +// within a MultiRegexpFilter. +type regexpFilterOperation struct { + // The regular expression pattern defining what to replace. + RawRegexp string `json:"regexp,omitempty"` + + // The value to use as replacement + Value string `json:"value,omitempty"` + + regexp *regexp.Regexp +} + +// MultiRegexpFilter is a Caddy log field filter that +// can apply multiple regular expression replacements to +// the same field. This filter processes operations in the +// order they are defined, applying each regexp replacement +// sequentially to the result of the previous operation. +// +// This allows users to define multiple regexp filters for +// the same field without them overwriting each other. +// +// Security considerations: +// - Uses Go's regexp package (RE2 engine) which is safe from ReDoS attacks +// - Validates all patterns during provisioning +// - Limits the maximum number of operations to prevent resource exhaustion +// - Sanitizes input to prevent injection attacks +type MultiRegexpFilter struct { + // A list of regexp operations to apply in sequence. + // Maximum of 50 operations allowed for security and performance. + Operations []regexpFilterOperation `json:"operations"` +} + +// Security constants +const ( + maxRegexpOperations = 50 // Maximum operations to prevent resource exhaustion + maxPatternLength = 1000 // Maximum pattern length to prevent abuse +) + +// CaddyModule returns the Caddy module information. +func (MultiRegexpFilter) CaddyModule() caddy.ModuleInfo { + return caddy.ModuleInfo{ + ID: "caddy.logging.encoders.filter.multi_regexp", + New: func() caddy.Module { return new(MultiRegexpFilter) }, + } +} + +// UnmarshalCaddyfile sets up the module from Caddyfile tokens. +// Syntax: +// +// multi_regexp { +// regexp +// regexp +// ... +// } +func (f *MultiRegexpFilter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { + d.Next() // consume filter name + for d.NextBlock(0) { + switch d.Val() { + case "regexp": + // Security check: limit number of operations + if len(f.Operations) >= maxRegexpOperations { + return d.Errf("too many regexp operations (maximum %d allowed)", maxRegexpOperations) + } + + op := regexpFilterOperation{} + if !d.NextArg() { + return d.ArgErr() + } + op.RawRegexp = d.Val() + + // Security validation: check pattern length + if len(op.RawRegexp) > maxPatternLength { + return d.Errf("regexp pattern too long (maximum %d characters)", maxPatternLength) + } + + // Security validation: basic pattern validation + if op.RawRegexp == "" { + return d.Errf("regexp pattern cannot be empty") + } + + if !d.NextArg() { + return d.ArgErr() + } + op.Value = d.Val() + f.Operations = append(f.Operations, op) + default: + return d.Errf("unrecognized subdirective %s", d.Val()) + } + } + + // Security check: ensure at least one operation is defined + if len(f.Operations) == 0 { + return d.Err("multi_regexp filter requires at least one regexp operation") + } + + return nil +} + +// Provision compiles all regexp patterns with security validation. +func (f *MultiRegexpFilter) Provision(ctx caddy.Context) error { + // Security check: validate operation count + if len(f.Operations) > maxRegexpOperations { + return fmt.Errorf("too many regexp operations: %d (maximum %d allowed)", len(f.Operations), maxRegexpOperations) + } + + if len(f.Operations) == 0 { + return fmt.Errorf("multi_regexp filter requires at least one operation") + } + + for i := range f.Operations { + // Security validation: pattern length check + if len(f.Operations[i].RawRegexp) > maxPatternLength { + return fmt.Errorf("regexp pattern %d too long: %d characters (maximum %d)", i, len(f.Operations[i].RawRegexp), maxPatternLength) + } + + // Security validation: empty pattern check + if f.Operations[i].RawRegexp == "" { + return fmt.Errorf("regexp pattern %d cannot be empty", i) + } + + // Compile and validate the pattern (uses RE2 engine - safe from ReDoS) + r, err := regexp.Compile(f.Operations[i].RawRegexp) + if err != nil { + return fmt.Errorf("compiling regexp pattern %d (%s): %v", i, f.Operations[i].RawRegexp, err) + } + f.Operations[i].regexp = r + } + return nil +} + +// Validate ensures the filter is properly configured with security checks. +func (f *MultiRegexpFilter) Validate() error { + if len(f.Operations) == 0 { + return fmt.Errorf("multi_regexp filter requires at least one operation") + } + + if len(f.Operations) > maxRegexpOperations { + return fmt.Errorf("too many regexp operations: %d (maximum %d allowed)", len(f.Operations), maxRegexpOperations) + } + + for i, op := range f.Operations { + if op.RawRegexp == "" { + return fmt.Errorf("regexp pattern %d cannot be empty", i) + } + if len(op.RawRegexp) > maxPatternLength { + return fmt.Errorf("regexp pattern %d too long: %d characters (maximum %d)", i, len(op.RawRegexp), maxPatternLength) + } + if op.regexp == nil { + return fmt.Errorf("regexp pattern %d not compiled (call Provision first)", i) + } + } + return nil +} + +// Filter applies all regexp operations sequentially to the input field. +// Input is sanitized and validated for security. +func (f *MultiRegexpFilter) Filter(in zapcore.Field) zapcore.Field { + if array, ok := in.Interface.(caddyhttp.LoggableStringArray); ok { + newArray := make(caddyhttp.LoggableStringArray, len(array)) + for i, s := range array { + newArray[i] = f.processString(s) + } + in.Interface = newArray + } else { + in.String = f.processString(in.String) + } + + return in +} + +// processString applies all regexp operations to a single string with input validation. +func (f *MultiRegexpFilter) processString(s string) string { + // Security: validate input string length to prevent resource exhaustion + const maxInputLength = 1000000 // 1MB max input size + if len(s) > maxInputLength { + // Log warning but continue processing (truncated) + s = s[:maxInputLength] + } + + result := s + for _, op := range f.Operations { + // Each regexp operation is applied sequentially + // Using RE2 engine which is safe from ReDoS attacks + result = op.regexp.ReplaceAllString(result, op.Value) + + // Ensure result doesn't exceed max length after each operation + if len(result) > maxInputLength { + result = result[:maxInputLength] + } + } + return result +} + +// AddOperation adds a single regexp operation to the filter with validation. +// This is used when merging multiple RegexpFilter instances. +func (f *MultiRegexpFilter) AddOperation(rawRegexp, value string) error { + // Security checks + if len(f.Operations) >= maxRegexpOperations { + return fmt.Errorf("cannot add operation: maximum %d operations allowed", maxRegexpOperations) + } + + if rawRegexp == "" { + return fmt.Errorf("regexp pattern cannot be empty") + } + + if len(rawRegexp) > maxPatternLength { + return fmt.Errorf("regexp pattern too long: %d characters (maximum %d)", len(rawRegexp), maxPatternLength) + } + + f.Operations = append(f.Operations, regexpFilterOperation{ + RawRegexp: rawRegexp, + Value: value, + }) + return nil +} + // RenameFilter is a Caddy log field filter that // renames the field's key with the indicated name. type RenameFilter struct { @@ -664,6 +881,7 @@ var ( _ LogFieldFilter = (*CookieFilter)(nil) _ LogFieldFilter = (*RegexpFilter)(nil) _ LogFieldFilter = (*RenameFilter)(nil) + _ LogFieldFilter = (*MultiRegexpFilter)(nil) _ caddyfile.Unmarshaler = (*DeleteFilter)(nil) _ caddyfile.Unmarshaler = (*HashFilter)(nil) @@ -673,9 +891,12 @@ var ( _ caddyfile.Unmarshaler = (*CookieFilter)(nil) _ caddyfile.Unmarshaler = (*RegexpFilter)(nil) _ caddyfile.Unmarshaler = (*RenameFilter)(nil) + _ caddyfile.Unmarshaler = (*MultiRegexpFilter)(nil) _ caddy.Provisioner = (*IPMaskFilter)(nil) _ caddy.Provisioner = (*RegexpFilter)(nil) + _ caddy.Provisioner = (*MultiRegexpFilter)(nil) _ caddy.Validator = (*QueryFilter)(nil) + _ caddy.Validator = (*MultiRegexpFilter)(nil) ) diff --git a/modules/logging/filters_test.go b/modules/logging/filters_test.go index a929617d7e0..42aa297575b 100644 --- a/modules/logging/filters_test.go +++ b/modules/logging/filters_test.go @@ -1,6 +1,8 @@ package logging import ( + "fmt" + "strings" "testing" "go.uber.org/zap/zapcore" @@ -239,3 +241,198 @@ func TestHashFilterMultiValue(t *testing.T) { t.Fatalf("field entry 1 has not been filtered: %s", arr[1]) } } + +func TestMultiRegexpFilterSingleOperation(t *testing.T) { + f := MultiRegexpFilter{ + Operations: []regexpFilterOperation{ + {RawRegexp: `secret`, Value: "REDACTED"}, + }, + } + err := f.Provision(caddy.Context{}) + if err != nil { + t.Fatalf("unexpected error provisioning: %v", err) + } + + out := f.Filter(zapcore.Field{String: "foo-secret-bar"}) + if out.String != "foo-REDACTED-bar" { + t.Fatalf("field has not been filtered: %s", out.String) + } +} + +func TestMultiRegexpFilterMultipleOperations(t *testing.T) { + f := MultiRegexpFilter{ + Operations: []regexpFilterOperation{ + {RawRegexp: `secret`, Value: "REDACTED"}, + {RawRegexp: `password`, Value: "HIDDEN"}, + {RawRegexp: `token`, Value: "XXX"}, + }, + } + err := f.Provision(caddy.Context{}) + if err != nil { + t.Fatalf("unexpected error provisioning: %v", err) + } + + // Test sequential application + out := f.Filter(zapcore.Field{String: "my-secret-password-token-data"}) + expected := "my-REDACTED-HIDDEN-XXX-data" + if out.String != expected { + t.Fatalf("field has not been filtered correctly: got %s, expected %s", out.String, expected) + } +} + +func TestMultiRegexpFilterMultiValue(t *testing.T) { + f := MultiRegexpFilter{ + Operations: []regexpFilterOperation{ + {RawRegexp: `secret`, Value: "REDACTED"}, + {RawRegexp: `\d+`, Value: "NUM"}, + }, + } + err := f.Provision(caddy.Context{}) + if err != nil { + t.Fatalf("unexpected error provisioning: %v", err) + } + + out := f.Filter(zapcore.Field{Interface: caddyhttp.LoggableStringArray{ + "foo-secret-123", + "bar-secret-456", + }}) + arr, ok := out.Interface.(caddyhttp.LoggableStringArray) + if !ok { + t.Fatalf("field is wrong type: %T", out.Interface) + } + if arr[0] != "foo-REDACTED-NUM" { + t.Fatalf("field entry 0 has not been filtered: %s", arr[0]) + } + if arr[1] != "bar-REDACTED-NUM" { + t.Fatalf("field entry 1 has not been filtered: %s", arr[1]) + } +} + +func TestMultiRegexpFilterAddOperation(t *testing.T) { + f := MultiRegexpFilter{} + err := f.AddOperation("secret", "REDACTED") + if err != nil { + t.Fatalf("unexpected error adding operation: %v", err) + } + err = f.AddOperation("password", "HIDDEN") + if err != nil { + t.Fatalf("unexpected error adding operation: %v", err) + } + err = f.Provision(caddy.Context{}) + if err != nil { + t.Fatalf("unexpected error provisioning: %v", err) + } + + if len(f.Operations) != 2 { + t.Fatalf("expected 2 operations, got %d", len(f.Operations)) + } + + out := f.Filter(zapcore.Field{String: "my-secret-password"}) + expected := "my-REDACTED-HIDDEN" + if out.String != expected { + t.Fatalf("field has not been filtered correctly: got %s, expected %s", out.String, expected) + } +} + +func TestMultiRegexpFilterSecurityLimits(t *testing.T) { + f := MultiRegexpFilter{} + + // Test maximum operations limit + for i := 0; i < 51; i++ { + err := f.AddOperation(fmt.Sprintf("pattern%d", i), "replacement") + if i < 50 { + if err != nil { + t.Fatalf("unexpected error adding operation %d: %v", i, err) + } + } else { + if err == nil { + t.Fatalf("expected error when adding operation %d (exceeds limit)", i) + } + } + } + + // Test empty pattern validation + f2 := MultiRegexpFilter{} + err := f2.AddOperation("", "replacement") + if err == nil { + t.Fatalf("expected error for empty pattern") + } + + // Test pattern length limit + f3 := MultiRegexpFilter{} + longPattern := strings.Repeat("a", 1001) + err = f3.AddOperation(longPattern, "replacement") + if err == nil { + t.Fatalf("expected error for pattern exceeding length limit") + } +} + +func TestMultiRegexpFilterValidation(t *testing.T) { + // Test validation with empty operations + f := MultiRegexpFilter{} + err := f.Validate() + if err == nil { + t.Fatalf("expected validation error for empty operations") + } + + // Test validation with valid operations + err = f.AddOperation("valid", "replacement") + if err != nil { + t.Fatalf("unexpected error adding operation: %v", err) + } + err = f.Provision(caddy.Context{}) + if err != nil { + t.Fatalf("unexpected error provisioning: %v", err) + } + err = f.Validate() + if err != nil { + t.Fatalf("unexpected validation error: %v", err) + } +} + +func TestMultiRegexpFilterInputSizeLimit(t *testing.T) { + f := MultiRegexpFilter{ + Operations: []regexpFilterOperation{ + {RawRegexp: `test`, Value: "REPLACED"}, + }, + } + err := f.Provision(caddy.Context{}) + if err != nil { + t.Fatalf("unexpected error provisioning: %v", err) + } + + // Test with very large input (should be truncated) + largeInput := strings.Repeat("test", 300000) // Creates ~1.2MB string + out := f.Filter(zapcore.Field{String: largeInput}) + + // The input should be truncated to 1MB and still processed + if len(out.String) > 1000000 { + t.Fatalf("output string not truncated: length %d", len(out.String)) + } + + // Should still contain replacements within the truncated portion + if !strings.Contains(out.String, "REPLACED") { + t.Fatalf("replacements not applied to truncated input") + } +} + +func TestMultiRegexpFilterOverlappingPatterns(t *testing.T) { + f := MultiRegexpFilter{ + Operations: []regexpFilterOperation{ + {RawRegexp: `secret.*password`, Value: "SENSITIVE"}, + {RawRegexp: `password`, Value: "HIDDEN"}, + }, + } + err := f.Provision(caddy.Context{}) + if err != nil { + t.Fatalf("unexpected error provisioning: %v", err) + } + + // The first pattern should match and replace the entire "secret...password" portion + // Then the second pattern should not find "password" anymore since it was already replaced + out := f.Filter(zapcore.Field{String: "my-secret-data-password-end"}) + expected := "my-SENSITIVE-end" + if out.String != expected { + t.Fatalf("field has not been filtered correctly: got %s, expected %s", out.String, expected) + } +}