Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ Main (unreleased)

- `prometheus.exporter.postgres` dependency has been updated to v0.18.1. This includes new `stat_progress_vacuum` and `buffercache_summary` collectors, as well as other bugfixes and enhancements. (@cristiangreco)

- Support delimiters in `stage.luhn`. (@dehaansa)

### Bugfixes

- Fix direction of arrows for pyroscope components in UI graph. (@dehaansa)
Expand Down
13 changes: 8 additions & 5 deletions docs/sources/reference/components/loki/loki.process.md
Original file line number Diff line number Diff line change
Expand Up @@ -695,15 +695,18 @@ Many Payment Card Industry environments require these numbers to be redacted.

The following arguments are supported:

| Name | Type | Description | Default | Required |
| ------------- | -------- | ---------------------------------------------- | ---------------- | -------- |
| `min_length` | `int` | Minimum length of digits to consider | `13` | no |
| `replacement` | `string` | String to substitute the matched patterns with | `"**REDACTED**"` | no |
| `source` | `string` | Source of the data to parse. | `""` | no |
| Name | Type | Description | Default | Required |
| ------------- | -------- | -------------------------------------------------------------- | ---------------- | -------- |
| `delimiters` | `string` | A list containing delimiters to accept as part of the number. | `""` | no |
| `min_length` | `int` | Minimum length of digits to consider | `13` | no |
| `replacement` | `string` | String to substitute the matched patterns with. | `"**REDACTED**"` | no |
| `source` | `string` | Source of the data to parse. | `""` | no |

The `source` field defines the source of data to search.
When `source` is missing or empty, the stage parses the log line itself, but it can also be used to parse a previously extracted value.

If you want the Luhn algorithm to identify numbers with delimiters, for example `4032-0325-1354-8443`, you can configure the `delimiters` field with the expected delimiters.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice to have an example for this


The following example log line contains an approved credit card number.

```alloy
Expand Down
68 changes: 65 additions & 3 deletions internal/component/loki/process/stages/luhn.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ type LuhnFilterConfig struct {
Replacement string `alloy:"replacement,attr,optional"`
Source *string `alloy:"source,attr,optional"`
MinLength int `alloy:"min_length,attr,optional"`
Delimiters string `alloy:"delimiters,attr,optional"`
}

// validateLuhnFilterConfig validates the LuhnFilterConfig.
Expand Down Expand Up @@ -65,12 +66,16 @@ func (r *luhnFilterStage) Process(labels model.LabelSet, extracted map[string]in
}

// Replace Luhn-valid numbers in the input.
updatedEntry := replaceLuhnValidNumbers(*input, r.config.Replacement, r.config.MinLength)
*entry = updatedEntry
if r.config.Delimiters != "" {
updatedEntry := replaceLuhnValidNumbersWithDelimiters(*input, r.config.Replacement, r.config.MinLength, r.config.Delimiters)
*entry = updatedEntry
} else {
updatedEntry := replaceLuhnValidNumbers(*input, r.config.Replacement, r.config.MinLength)
*entry = updatedEntry
}
}

// replaceLuhnValidNumbers scans the input for Luhn-valid numbers and replaces them.

func replaceLuhnValidNumbers(input, replacement string, minLength int) string {
var sb strings.Builder
var currentNumber strings.Builder
Expand Down Expand Up @@ -111,6 +116,63 @@ func replaceLuhnValidNumbers(input, replacement string, minLength int) string {
return sb.String()
}

// replaceLuhnValidNumbersWithDelimiters scans the input for Luhn-valid numbers with delimiter support and replaces them.
// These are separate functions to keep the base case as fast as possible, if no delimiters are needed.
func replaceLuhnValidNumbersWithDelimiters(input, replacement string, minLength int, delimiters string) string {
var sb strings.Builder
var currentNumber strings.Builder
var currentString strings.Builder
var trailingDelimiter rune

flushNumber := func() {
// If the number is at least minLength, check if it's a Luhn-valid number.
if currentNumber.Len() >= minLength {
numberStr := currentNumber.String()
number, err := strconv.Atoi(numberStr)
if err == nil && isLuhn(number) {
// If the number is Luhn-valid, replace it.
sb.WriteString(replacement)
if trailingDelimiter != 0 {
sb.WriteRune(trailingDelimiter)
}
} else {
// If the number is not Luhn-valid, write it as is.
sb.WriteString(currentString.String())
}
} else if currentNumber.Len() > 0 {
// If the number is less than minLength but not empty, write it as is.
sb.WriteString(currentString.String())
}
// Reset the current tracking.
currentNumber.Reset()
currentString.Reset()
trailingDelimiter = 0
}

// Iterate over the input, replacing Luhn-valid numbers.
for _, char := range input {
// If the character is a digit, add it to the current number.
if unicode.IsDigit(char) {
currentNumber.WriteRune(char)
currentString.WriteRune(char)
trailingDelimiter = 0
} else if delimiters != "" && strings.ContainsRune(delimiters, char) && currentNumber.Len() > 0 {
currentString.WriteRune(char)
trailingDelimiter = char
// If the character is a delimiter and we have a current number, skip the delimiter.
// This way we can capture credit card numbers for example with spaces or dashes in between.
continue
} else {
// If the character is not a digit, flush the current number and write the character.
flushNumber()
sb.WriteRune(char)
}
}
flushNumber() // Ensure any trailing number is processed

return sb.String()
}

// isLuhn check number is valid or not based on Luhn algorithm
func isLuhn(number int) bool {
// Luhn algorithm is a simple checksum formula used to validate a
Expand Down
26 changes: 20 additions & 6 deletions internal/component/loki/process/stages/luhn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,35 @@ func TestReplaceLuhnValidNumbers(t *testing.T) {
input string
replacement string
want string
delimiters string
}{
// Test case with a single Luhn-valid number
{"My credit card number is 3530111333300000.", "**REDACTED**", "My credit card number is **REDACTED**."},
{"My credit card number is 3530111333300000.", "**REDACTED**", "My credit card number is **REDACTED**.", ""},
// Test case with multiple Luhn-valid numbers
{"Cards 4532015112830366 and 6011111111111117 are valid.", "**REDACTED**", "Cards **REDACTED** and **REDACTED** are valid."},
{"Cards 4532015112830366 and 6011111111111117 are valid.", "**REDACTED**", "Cards **REDACTED** and **REDACTED** are valid.", ""},
// Test case with no Luhn-valid numbers
{"No valid numbers here.", "**REDACTED**", "No valid numbers here."},
{"No valid numbers here.", "**REDACTED**", "No valid numbers here.", ""},
// Test case with mixed content
{"Valid: 4556737586899855, invalid: 1234.", "**REDACTED**", "Valid: **REDACTED**, invalid: 1234."},
{"Valid: 4556737586899855, invalid: 1234.", "**REDACTED**", "Valid: **REDACTED**, invalid: 1234.", ""},
// Test case with edge cases
{"Edge cases: 0, 00, 000, 1.", "**REDACTED**", "Edge cases: 0, 00, 000, 1."},
{"Edge cases: 0, 00, 000, 1.", "**REDACTED**", "Edge cases: 0, 00, 000, 1.", ""},
// multiple luhns with different delimiters and trailing delimiter
{"Cards 4532-0151-1283-0366 and 6011 1111 1111 1117 are valid and 3530:1113:3330:0000 has unexpected delimiters.", "**REDACTED**", "Cards **REDACTED** and **REDACTED** are valid and 3530:1113:3330:0000 has unexpected delimiters.", " -"},
// luhn with delimiters but not valid
{"Card 4532-0151-1283-0367 is not valid.", "**REDACTED**", "Card 4532-0151-1283-0367 is not valid.", " -"},
// luhn with delimiters but below min length
{"Card 4532-0151-128 is too short.", "**REDACTED**", "Card 4532-0151-128 is too short.", "-"},
// luhn with delimiters but below min length with trailing delimiter
{"Card 4532-0151-128 is too short.", "**REDACTED**", "Card 4532-0151-128 is too short.", " -"},
}

for _, c := range cases {
got := replaceLuhnValidNumbers(c.input, c.replacement, 13)
var got string
if c.delimiters == "" {
got = replaceLuhnValidNumbers(c.input, c.replacement, 13)
} else {
got = replaceLuhnValidNumbersWithDelimiters(c.input, c.replacement, 13, c.delimiters)
}
if got != c.want {
t.Errorf("replaceLuhnValidNumbers(%q, %q) == %q, want %q", c.input, c.replacement, got, c.want)
}
Expand Down
Loading