Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Strip values #43

Merged
merged 8 commits into from
Jan 13, 2025
87 changes: 74 additions & 13 deletions logging/batch_logger.go
Original file line number Diff line number Diff line change
@@ -7,28 +7,34 @@

// A batchLogger receives log entries via its Enqueue method & arranges them into batches that it then passes to its batchHandler
type batchLogger struct {
queue chan *LogEntry // A channel down which LogEntrys will be queued to be sent to Firetail
maxBatchSize int // The maximum size of a batch in bytes
maxLogAge time.Duration // The maximum age of a log item to hold onto
batchCallback func([][]byte) // A handler that takes a batch of log entries as a slice of slices of bytes & sends them to Firetail
queue chan *LogEntry // A channel down which LogEntrys will be queued to be sent to Firetail
maxBatchSize int // The maximum size of a batch in bytes
maxLogAge time.Duration // The maximum age of a log item to hold onto
batchCallback func([][]byte) // A handler that takes a batch of log entries as a slice of slices of bytes & sends them to Firetail
redactRequestBodies bool // Whether or not to redact request bodies
redactResponseBodies bool // Whether or not to redact response bodies
}

// BatchLoggerOptions is an options struct used by the NewBatchLogger constructor
type BatchLoggerOptions struct {
MaxBatchSize int // The maximum size of a batch in bytes
MaxLogAge time.Duration // The maximum age of a log item in a batch - once an item is older than this, the batch is passed to the callback
LogApiKey string // The API key used by the default BatchCallback used to send logs to the Firetail logging API
LogApiUrl string // The URL of the Firetail logging API endpoint to send log entries to
BatchCallback func([][]byte) // An optional callback to which batches will be passed; the default callback sends logs to the Firetail logging API
MaxBatchSize int // The maximum size of a batch in bytes
MaxLogAge time.Duration // The maximum age of a log item in a batch - once an item is older than this, the batch is passed to the callback
LogApiKey string // The API key used by the default BatchCallback used to send logs to the Firetail logging API
LogApiUrl string // The URL of the Firetail logging API endpoint to send log entries to
RedactRequestJSONPayloadValues bool // Whether or not values in the request payloads should be redacted, assuming they're JSON
RedactResponseJSONPayloadValues bool // Whether or not values in the response payloads should be redacted, assuming they're JSON
BatchCallback func([][]byte) // An optional callback to which batches will be passed; the default callback sends logs to the Firetail logging API
}

// NewBatchLogger creates a new batchLogger with the provided options
func NewBatchLogger(options BatchLoggerOptions) *batchLogger {
newLogger := &batchLogger{
queue: make(chan *LogEntry),
maxBatchSize: options.MaxBatchSize,
maxLogAge: options.MaxLogAge,
batchCallback: options.BatchCallback,
queue: make(chan *LogEntry),
maxBatchSize: options.MaxBatchSize,
maxLogAge: options.MaxLogAge,
batchCallback: options.BatchCallback,
redactRequestBodies: options.RedactRequestJSONPayloadValues,
redactResponseBodies: options.RedactResponseJSONPayloadValues,
}

if options.BatchCallback == nil {
@@ -61,6 +67,24 @@
// Read a new entry from the queue if there's one available
select {
case newEntry := <-l.queue:
// Redact request/response bodies if this feature is enabled
if l.redactRequestBodies {
redactedRequestBody, err := redactJson([]byte(newEntry.Request.Body))
if err != nil {
newEntry.Request.Body = ""

Check warning on line 74 in logging/batch_logger.go

Codecov / codecov/patch

logging/batch_logger.go#L74

Added line #L74 was not covered by tests
} else {
newEntry.Request.Body = string(redactedRequestBody)
}
}
if l.redactResponseBodies {
redactedResponseBody, err := redactJson([]byte(newEntry.Response.Body))
if err != nil {
newEntry.Response.Body = ""

Check warning on line 82 in logging/batch_logger.go

Codecov / codecov/patch

logging/batch_logger.go#L82

Added line #L82 was not covered by tests
} else {
newEntry.Response.Body = string(redactedResponseBody)
}
}

// Marshal the entry to bytes...
entryBytes, err := json.Marshal(newEntry)
if err != nil {
@@ -126,3 +150,40 @@
time.Sleep(1)
}
}

func redactJson(v []byte) ([]byte, error) {
var unmarshalledJSON interface{}
err := json.Unmarshal(v, &unmarshalledJSON)
if err != nil {
return nil, err
}

Check warning on line 159 in logging/batch_logger.go

Codecov / codecov/patch

logging/batch_logger.go#L158-L159

Added lines #L158 - L159 were not covered by tests
var redact func(interface{}) interface{}
redact = func(v interface{}) interface{} {
switch u := v.(type) {
case map[string]interface{}:
for k, w := range u {
u[k] = redact(w)
}
return u
case []interface{}:
for i, w := range u {
u[i] = redact(w)
}
return u
case string:
return ""
case float64:
return 0
case bool:
return true
default:
return nil
}
}
redactedJSON := redact(unmarshalledJSON)
redactedBytes, err := json.Marshal(redactedJSON)
if err != nil {
return nil, err
}

Check warning on line 187 in logging/batch_logger.go

Codecov / codecov/patch

logging/batch_logger.go#L186-L187

Added lines #L186 - L187 were not covered by tests
return redactedBytes, nil
}
161 changes: 155 additions & 6 deletions logging/batch_logger_test.go
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@ package logging
import (
"encoding/json"
"math/rand"
"os"
"strings"
"testing"
"time"
@@ -11,10 +12,12 @@ import (
"github.com/stretchr/testify/require"
)

func SetupLogger(batchChannel chan *[][]byte, maxBatchSize int, maxLogAge time.Duration) *batchLogger {
func SetupLogger(batchChannel chan *[][]byte, maxBatchSize int, maxLogAge time.Duration, redactRequests bool, redactResponses bool) *batchLogger {
batchLogger := NewBatchLogger(BatchLoggerOptions{
MaxBatchSize: maxBatchSize,
MaxLogAge: maxLogAge,
MaxBatchSize: maxBatchSize,
MaxLogAge: maxLogAge,
RedactRequestJSONPayloadValues: redactRequests,
RedactResponseJSONPayloadValues: redactResponses,
})

// Replace the batchHandler with a custom one to throw the batches into a queue that we can receive from for testing
@@ -29,7 +32,7 @@ func TestOldLogIsSentImmediately(t *testing.T) {
const MaxLogAge = time.Minute

batchChannel := make(chan *[][]byte, 2)
batchLogger := SetupLogger(batchChannel, 1024*512, MaxLogAge)
batchLogger := SetupLogger(batchChannel, 1024*512, MaxLogAge, false, false)

// Create a test log entry & enqueue it
testLogEntry := LogEntry{
@@ -58,7 +61,7 @@ func TestBatchesDoNotExceedMaxSize(t *testing.T) {

// Buffer our batchChannel with TestLogEntryCount spaces (worst case, each entry ends up in its own batch)
batchChannel := make(chan *[][]byte, TestLogEntryCount)
batchLogger := SetupLogger(batchChannel, MaxBatchSize, time.Second)
batchLogger := SetupLogger(batchChannel, MaxBatchSize, time.Second, false, false)

// Create a bunch of test entries
testLogEntries := []*LogEntry{}
@@ -124,7 +127,7 @@ func TestOldLogTriggersBatch(t *testing.T) {
const MaxLogAge = time.Minute

batchChannel := make(chan *[][]byte, 2)
batchLogger := SetupLogger(batchChannel, 1024*512, MaxLogAge)
batchLogger := SetupLogger(batchChannel, 1024*512, MaxLogAge, false, false)

// Create ExpectedLogEntryCount-1 test log entries (the last one will trigger a batch)
testLogEntries := []*LogEntry{}
@@ -172,3 +175,149 @@ func TestOldLogTriggersBatch(t *testing.T) {
// Assert that the batch has all the same byte slices as the expected batch
require.ElementsMatch(t, expectedBatch, *batch)
}

func TestRedaction(t *testing.T) {
testCases := []struct {
enableRequestRedaction bool
enableResponseRedaction bool
}{
{false, false},
{false, true},
{true, false},
{true, true},
}
for _, testCase := range testCases {
const MaxLogAge = time.Minute

batchChannel := make(chan *[][]byte, 2)
batchLogger := SetupLogger(batchChannel, 1024*512, MaxLogAge, testCase.enableRequestRedaction, testCase.enableResponseRedaction)

const testRequestBody = "{\"foo\":\"bar\"}"
const expectedRedactedRequestBody = "{\"foo\":\"\"}"
const testResponseBody = "{\"baz\":\"qux\"}"
const expectedRedactedResponseBody = "{\"baz\":\"\"}"

// Create a test log entry & enqueue it
testLogEntry := LogEntry{
Request: Request{
Body: testRequestBody,
},
Response: Response{
Body: testResponseBody,
},
DateCreated: time.Now().UnixMilli() - MaxLogAge.Milliseconds()*2,
}

// Enqueue the test log entry
batchLogger.Enqueue(&testLogEntry)

// There should then be a batch in the channel for us to receive
batch := <-batchChannel

// Channel should be empty now, as it should only have had one batch in it
assert.Equal(t, 0, len(batchChannel))

// Assert the batch had one log entry in it
require.Equal(t, 1, len(*batch))

// Unmarshal the log and assert its request/response bodies were redacted as expected
var logEntry LogEntry
err := json.Unmarshal((*batch)[0], &logEntry)
require.Nil(t, err)
if testCase.enableRequestRedaction {
assert.Equal(t, expectedRedactedRequestBody, logEntry.Request.Body)
} else {
assert.Equal(t, testRequestBody, logEntry.Request.Body)
}
if testCase.enableResponseRedaction {
assert.Equal(t, expectedRedactedResponseBody, logEntry.Response.Body)
} else {
assert.Equal(t, testResponseBody, logEntry.Response.Body)
}
}
}

func BenchmarkRedactFiretailJSLibPackageLockJson1536B(b *testing.B) {
benchmarkRedactJson(b, "testdata/package-lock-firetail-js-lib-1536B.json")
}

func BenchmarkRedactFiretailJSLibPackageLockJson278KiB(b *testing.B) {
benchmarkRedactJson(b, "testdata/package-lock-firetail-js-lib-478KiB.json")
}

func benchmarkRedactJson(b *testing.B, testFile string) {
testJson, err := os.ReadFile(testFile)
if err != nil {
b.Error(err.Error())
}
b.ResetTimer()
b.StopTimer()
for n := 0; n < b.N; n++ {
// redactJson mutates the input, so we need to copy it each time
testJsonCopy := make([]byte, len(testJson))
copy(testJsonCopy, testJson)
b.StartTimer()
_, err = redactJson(testJsonCopy)
b.StopTimer()
if err != nil {
b.Error(err.Error())
}
}
}

func TestRedactJson(t *testing.T) {
testCases := []struct {
testJson string
expectedJson string
}{
{"\"foo\"", "\"\""},
{"3.14159", "0"},
{"true", "true"},
{"false", "true"},
{"null", "null"},
{"{}", "{}"},
{"[]", "[]"},
// Objects with values of different types
{"{\"foo\":\"bar\"}", "{\"foo\":\"\"}"},
{"{\"foo\":3.14159}", "{\"foo\":0}"},
{"{\"foo\":true}", "{\"foo\":true}"},
{"{\"foo\":false}", "{\"foo\":true}"},
{"{\"foo\":null}", "{\"foo\":null}"},
// Lists with values of different types
{"[\"foo\"]", "[\"\"]"},
{"[3.14159]", "[0]"},
{"[false]", "[true]"},
{"[true]", "[true]"},
{"[null]", "[null]"},
// Lists with two values of the same type
{"[\"foo\",\"bar\"]", "[\"\",\"\"]"},
{"[3.14159,2.71828]", "[0,0]"},
{"[false,false]", "[true,true]"},
{"[false,true]", "[true,true]"},
{"[true,false]", "[true,true]"},
{"[true,true]", "[true,true]"},
{"[null,null]", "[null,null]"},
// Lists with three values of the same type
{"[\"foo\",\"bar\",\"baz\"]", "[\"\",\"\",\"\"]"},
{"[3.14159,2.71828,1.61803]", "[0,0,0]"},
{"[false,false,false]", "[true,true,true]"},
{"[false,false,true]", "[true,true,true]"},
{"[false,true,false]", "[true,true,true]"},
{"[false,true,true]", "[true,true,true]"},
{"[true,false,false]", "[true,true,true]"},
{"[true,false,true]", "[true,true,true]"},
{"[true,true,false]", "[true,true,true]"},
{"[true,true,true]", "[true,true,true]"},
{"[null,null,null]", "[null,null,null]"},
// Lists with mixed types
{"[\"foo\",3.14159,true,false,null]", "[\"\",0,true,true,null]"},
// Some more complex examples
{"{\"foo\":[\"bar\",3.14159,true,false,null]}", "{\"foo\":[\"\",0,true,true,null]}"},
{"[{\"foo\":\"bar\"},{\"baz\":3.14159},{\"qux\":true},{\"quux\":false},{\"corge\":null}]", "[{\"foo\":\"\"},{\"baz\":0},{\"qux\":true},{\"quux\":true},{\"corge\":null}]"},
}
for _, testCase := range testCases {
result, err := redactJson([]byte(testCase.testJson))
require.Nil(t, err)
assert.Equal(t, testCase.expectedJson, string(result))
}
}
50 changes: 50 additions & 0 deletions logging/testdata/package-lock-firetail-js-lib-1536B.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
"name": "@public.firetail.io/firetail-api",
"version": "0.5.2",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "@public.firetail.io/firetail-api",
"version": "0.5.2",
"license": "LGPL-3.0",
"dependencies": {
"@apidevtools/swagger-parser": "9.1.0",
"ajv": "8.12.0",
"lodash": "4.17.21",
"node-fetch": "2.6.9",
"openapi-request-validator": "12.1.0",
"openapi-response-validator": "12.1.0",
"openapi-security-handler": "12.1.0",
"type-is": "1.6.18"
},
"devDependencies": {
"@types/aws-lambda": "8.10.109",
"@types/express-serve-static-core": "4.17.33",
"@types/http-server": "0.12.1",
"@types/jest": "29.4.0",
"@types/node": "18.11.18",
"@typescript-eslint/eslint-plugin": "5.51.0",
"@typescript-eslint/parser": "5.51.0",
"eslint": "8.34.0",
"eslint-config-standard": "17.0.0",
"eslint-plugin-import": "2.27.5"
},
"engines": {
"node": ">=14.0.0"
}
}
},
"dependencies": {
"@ampproject/remapping": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.2.0.tgz",
"integrity": "sha512-qRmjj8nj9qmLTQXXmaR1cck3UXSRMPrbsLJAasZpF+t3riI71BXed5ebIOYwQntykeZuhjsdweEc9BxH5Jc26w==",
"dev": true,
"requires": {
"@jridgewell/gen-mapping": "^0.1.0",
"@jridgewell/trace-mapping": "^0.3.9"
}
}
}
}
12,709 changes: 12,709 additions & 0 deletions logging/testdata/package-lock-firetail-js-lib-478KiB.json

Large diffs are not rendered by default.