Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ Main (unreleased)

- Schedule new path targets faster in `loki.source.file`. (@kalleep)

- Support delimiters in `stage.luhn`. (@dehaansa)

### Bugfixes

- Stop `loki.source.kubernetes` discarding log lines with duplicate timestamps. (@ciaranj)
Expand Down
34 changes: 29 additions & 5 deletions docs/sources/reference/components/loki/loki.process.md
Original file line number Diff line number Diff line change
Expand Up @@ -695,15 +695,20 @@ Many Payment Card Industry environments require these numbers to be redacted.

The following arguments are supported:

| Name | Type | Description | Default | Required |
| ------------- | -------- | ---------------------------------------------- | ---------------- | -------- |
| `min_length` | `int` | Minimum length of digits to consider | `13` | no |
| `replacement` | `string` | String to substitute the matched patterns with | `"**REDACTED**"` | no |
| `source` | `string` | Source of the data to parse. | `""` | no |
| Name | Type | Description | Default | Required |
| ------------- | -------- | -------------------------------------------------------------- | ---------------- | -------- |
| `delimiters` | `string` | A list containing delimiters to accept as part of the number. | `""` | no |
| `min_length` | `int` | Minimum length of digits to consider | `13` | no |
| `replacement` | `string` | String to substitute the matched patterns with. | `"**REDACTED**"` | no |
| `source` | `string` | Source of the data to parse. | `""` | no |

The `source` field defines the source of data to search.
When `source` is missing or empty, the stage parses the log line itself, but it can also be used to parse a previously extracted value.

If you want the Luhn algorithm to identify numbers with delimiters, for example `4032-0325-1354-8443`, you can configure the `delimiters` field with the expected delimiters.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice to have an example for this


#### Example

The following example log line contains an approved credit card number.

```alloy
Expand All @@ -720,6 +725,25 @@ The stage parses the log line, redacts the credit card number, and produces the
time=2012-11-01T22:08:41+00:00 app=loki level=INFO duration=125 message="credit card approved **DELETED**" extra="user=example_name"
```

#### Example with `delimiters`

The following example log line contains an approved credit card number, represented with dash characters between each group of four digits.

```alloy
time=2012-11-01T22:08:41+00:00 app=loki level=WARN duration=125 message="credit card approved 4032-0325-1354-8443" extra="user=example_name"

stage.luhn {
replacement = "**DELETED**"
delimiters = "-"
}
```

The stage parses the log line, redacts the credit card number, and produces the following updated log line:

```text
time=2012-11-01T22:08:41+00:00 app=loki level=INFO duration=125 message="credit card approved **DELETED**" extra="user=example_name"
```

### `stage.match`

The `stage.match` inner block configures a filtering stage that can conditionally either apply a nested set of processing stages or drop an entry when a log entry matches a configurable LogQL stream selector and filter expressions.
Expand Down
68 changes: 65 additions & 3 deletions internal/component/loki/process/stages/luhn.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ type LuhnFilterConfig struct {
Replacement string `alloy:"replacement,attr,optional"`
Source *string `alloy:"source,attr,optional"`
MinLength int `alloy:"min_length,attr,optional"`
Delimiters string `alloy:"delimiters,attr,optional"`
}

// validateLuhnFilterConfig validates the LuhnFilterConfig.
Expand Down Expand Up @@ -65,12 +66,16 @@ func (r *luhnFilterStage) Process(labels model.LabelSet, extracted map[string]in
}

// Replace Luhn-valid numbers in the input.
updatedEntry := replaceLuhnValidNumbers(*input, r.config.Replacement, r.config.MinLength)
*entry = updatedEntry
if r.config.Delimiters != "" {
updatedEntry := replaceLuhnValidNumbersWithDelimiters(*input, r.config.Replacement, r.config.MinLength, r.config.Delimiters)
*entry = updatedEntry
} else {
updatedEntry := replaceLuhnValidNumbers(*input, r.config.Replacement, r.config.MinLength)
*entry = updatedEntry
}
}

// replaceLuhnValidNumbers scans the input for Luhn-valid numbers and replaces them.

func replaceLuhnValidNumbers(input, replacement string, minLength int) string {
var sb strings.Builder
var currentNumber strings.Builder
Expand Down Expand Up @@ -111,6 +116,63 @@ func replaceLuhnValidNumbers(input, replacement string, minLength int) string {
return sb.String()
}

// replaceLuhnValidNumbersWithDelimiters scans the input for Luhn-valid numbers with delimiter support and replaces them.
// These are separate functions to keep the base case as fast as possible, if no delimiters are needed.
func replaceLuhnValidNumbersWithDelimiters(input, replacement string, minLength int, delimiters string) string {
var sb strings.Builder
var currentNumber strings.Builder
var currentString strings.Builder
var trailingDelimiter rune

flushNumber := func() {
// If the number is at least minLength, check if it's a Luhn-valid number.
if currentNumber.Len() >= minLength {
numberStr := currentNumber.String()
number, err := strconv.Atoi(numberStr)
if err == nil && isLuhn(number) {
// If the number is Luhn-valid, replace it.
sb.WriteString(replacement)
if trailingDelimiter != 0 {
sb.WriteRune(trailingDelimiter)
}
} else {
// If the number is not Luhn-valid, write it as is.
sb.WriteString(currentString.String())
}
} else if currentNumber.Len() > 0 {
// If the number is less than minLength but not empty, write it as is.
sb.WriteString(currentString.String())
}
// Reset the current tracking.
currentNumber.Reset()
currentString.Reset()
trailingDelimiter = 0
}

// Iterate over the input, replacing Luhn-valid numbers.
for _, char := range input {
// If the character is a digit, add it to the current number.
if unicode.IsDigit(char) {
currentNumber.WriteRune(char)
currentString.WriteRune(char)
trailingDelimiter = 0
} else if delimiters != "" && strings.ContainsRune(delimiters, char) && currentNumber.Len() > 0 {
currentString.WriteRune(char)
trailingDelimiter = char
// If the character is a delimiter and we have a current number, skip the delimiter.
// This way we can capture credit card numbers for example with spaces or dashes in between.
continue
} else {
// If the character is not a digit, flush the current number and write the character.
flushNumber()
sb.WriteRune(char)
}
}
flushNumber() // Ensure any trailing number is processed

return sb.String()
}

// isLuhn check number is valid or not based on Luhn algorithm
func isLuhn(number int) bool {
// Luhn algorithm is a simple checksum formula used to validate a
Expand Down
26 changes: 20 additions & 6 deletions internal/component/loki/process/stages/luhn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,35 @@ func TestReplaceLuhnValidNumbers(t *testing.T) {
input string
replacement string
want string
delimiters string
}{
// Test case with a single Luhn-valid number
{"My credit card number is 3530111333300000.", "**REDACTED**", "My credit card number is **REDACTED**."},
{"My credit card number is 3530111333300000.", "**REDACTED**", "My credit card number is **REDACTED**.", ""},
// Test case with multiple Luhn-valid numbers
{"Cards 4532015112830366 and 6011111111111117 are valid.", "**REDACTED**", "Cards **REDACTED** and **REDACTED** are valid."},
{"Cards 4532015112830366 and 6011111111111117 are valid.", "**REDACTED**", "Cards **REDACTED** and **REDACTED** are valid.", ""},
// Test case with no Luhn-valid numbers
{"No valid numbers here.", "**REDACTED**", "No valid numbers here."},
{"No valid numbers here.", "**REDACTED**", "No valid numbers here.", ""},
// Test case with mixed content
{"Valid: 4556737586899855, invalid: 1234.", "**REDACTED**", "Valid: **REDACTED**, invalid: 1234."},
{"Valid: 4556737586899855, invalid: 1234.", "**REDACTED**", "Valid: **REDACTED**, invalid: 1234.", ""},
// Test case with edge cases
{"Edge cases: 0, 00, 000, 1.", "**REDACTED**", "Edge cases: 0, 00, 000, 1."},
{"Edge cases: 0, 00, 000, 1.", "**REDACTED**", "Edge cases: 0, 00, 000, 1.", ""},
// multiple luhns with different delimiters and trailing delimiter
{"Cards 4532-0151-1283-0366 and 6011 1111 1111 1117 are valid and 3530:1113:3330:0000 has unexpected delimiters.", "**REDACTED**", "Cards **REDACTED** and **REDACTED** are valid and 3530:1113:3330:0000 has unexpected delimiters.", " -"},
// luhn with delimiters but not valid
{"Card 4532-0151-1283-0367 is not valid.", "**REDACTED**", "Card 4532-0151-1283-0367 is not valid.", " -"},
// luhn with delimiters but below min length
{"Card 4532-0151-128 is too short.", "**REDACTED**", "Card 4532-0151-128 is too short.", "-"},
// luhn with delimiters but below min length with trailing delimiter
{"Card 4532-0151-128 is too short.", "**REDACTED**", "Card 4532-0151-128 is too short.", " -"},
}

for _, c := range cases {
got := replaceLuhnValidNumbers(c.input, c.replacement, 13)
var got string
if c.delimiters == "" {
got = replaceLuhnValidNumbers(c.input, c.replacement, 13)
} else {
got = replaceLuhnValidNumbersWithDelimiters(c.input, c.replacement, 13, c.delimiters)
}
if got != c.want {
t.Errorf("replaceLuhnValidNumbers(%q, %q) == %q, want %q", c.input, c.replacement, got, c.want)
}
Expand Down