From 5b768a9097c779fa96b136b06d013e0ae98b9b28 Mon Sep 17 00:00:00 2001 From: Marius Jensen Date: Mon, 11 Nov 2024 14:00:17 +0100 Subject: [PATCH 1/5] Add support for negative lookups to constraints. --- README.md | 4 +++- internal/anonymize/anonymize.go | 20 ++++++++++++++++---- internal/config/config.go | 1 + internal/embed/files/config.default.json | 18 ++++++++++++++++-- 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1c3a073..0989ecd 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,7 @@ The config is composed of many objects in the `patterns` array: - `field`: a string representing the name of the field. - `position`: the 1-based index of what number column this field represents. For instance, assuming a table with 3 columns `foo`, `bar`, and `baz`, and you wished to modify the `bar` column, this value would be `2`. - `value`: string value to match against. + - `compare`: An optional string stating how to treat the constraints. Passing `not like` will make it a negative lookup, and if the `value` matches, this line will not be processed. ### Constraints @@ -126,7 +127,8 @@ Supposing you have a WordPress database and you need to modify certain meta, be { "field": "meta_key", "position": 3, - "value": "last_ip_address" + "value": "last_ip_address", + "compare": "like" } ] } diff --git a/internal/anonymize/anonymize.go b/internal/anonymize/anonymize.go index fbe97ae..48d48a3 100644 --- a/internal/anonymize/anonymize.go +++ b/internal/anonymize/anonymize.go @@ -357,10 +357,22 @@ func rowObeysConstraints(constraints []config.PatternFieldConstraint, row sqlpar parsedValue := convertSQLValToString(value) // TODO: Add behing a flag for debugging. - //log.Printf("Error: Constraint obediance, parsed value: %s, constraint value: %s.", parsedValue, constraint.Value) - - if parsedValue != constraint.Value { - return false + //log.Printf("Error: Constraint obediance, parsed value: %s, constraint value: %s, and comparator: %s.", parsedValue, constraint.Value, constraint.Compare) + + switch constraint.Compare { + case "not like", + "<>", + "!=": + if parsedValue == constraint.Value { + return false + } + case "like", + "==", + "=": + default: + if parsedValue != constraint.Value { + return false + } } } return true diff --git a/internal/config/config.go b/internal/config/config.go index 96a7b37..7f550da 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -38,6 +38,7 @@ type PatternFieldConstraint struct { Field string `json:"field"` Position int `json:"position"` Value string `json:"value"` + Compare string `json:"compare"` } // New creates a new Config from flags and environment variables diff --git a/internal/embed/files/config.default.json b/internal/embed/files/config.default.json index c78cde9..a159bd7 100644 --- a/internal/embed/files/config.default.json +++ b/internal/embed/files/config.default.json @@ -7,13 +7,27 @@ "field": "user_login", "position": 2, "type": "username", - "constraints": null + "constraints": [ + { + "field": "user_login", + "position": 2, + "value": "myaccount", + "compare": "not like" + } + ] }, { "field": "user_pass", "position": 3, "type": "password", - "constraints": null + "constraints": [ + { + "field": "user_login", + "position": 2, + "value": "myaccount", + "compare": "not like" + } + ] }, { "field": "user_nicename", From 81c2e412c3ab265b337ba557341eba35f749f4c2 Mon Sep 17 00:00:00 2001 From: Marius Jensen Date: Mon, 11 Nov 2024 14:27:05 +0100 Subject: [PATCH 2/5] Add support for negative lookups to constraints. --- internal/anonymize/anonymize.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/anonymize/anonymize.go b/internal/anonymize/anonymize.go index 48d48a3..c5bbfbe 100644 --- a/internal/anonymize/anonymize.go +++ b/internal/anonymize/anonymize.go @@ -369,6 +369,9 @@ func rowObeysConstraints(constraints []config.PatternFieldConstraint, row sqlpar case "like", "==", "=": + if parsedValue != constraint.Value { + return false + } default: if parsedValue != constraint.Value { return false From 611ff2fe5b59da7f64cc2f816212efb0a832c3e6 Mon Sep 17 00:00:00 2001 From: Marius Jensen Date: Mon, 11 Nov 2024 15:09:26 +0100 Subject: [PATCH 3/5] Add regex constraint rules --- README.md | 13 +++++++++++-- internal/anonymize/anonymize.go | 12 ++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0989ecd..5d927ec 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ The config is composed of many objects in the `patterns` array: - `field`: a string representing the name of the field. - `position`: the 1-based index of what number column this field represents. For instance, assuming a table with 3 columns `foo`, `bar`, and `baz`, and you wished to modify the `bar` column, this value would be `2`. - `value`: string value to match against. - - `compare`: An optional string stating how to treat the constraints. Passing `not like` will make it a negative lookup, and if the `value` matches, this line will not be processed. + - `compare`: An optional string stating how to treat the constraints. ### Constraints @@ -132,9 +132,18 @@ Supposing you have a WordPress database and you need to modify certain meta, be } ] } - ``` +#### Compare rules +Constraints allow the user to define rules for how to treat the comparison value. The following rules are supported: + +**PS: Remember that comparison rules are first come first serve, so as soon as a rule that would negate the anonymization of a field is found, it will short-circuit any further rules.** + +- `like`: The default behavior. The SQL value must be equal to the constraint `value` field. +- `not like`: The SQL value must not be equal to the constraint `value` field. +- `regex`: The SQL value must match the regex string given in the `value` field. +- `regex not like`: The inverse of `regex`, and requires the regex patter to not match the SQL value. +- ### Field Types Each column stores a certain type of data, be it a name, username, email, etc. The `type` property in the config is used to define the type of data stored, and ultimately the type of random data to be inserted into the field. [https://github.com/dmgk/faker](https://github.com/dmgk/faker) is used for generating the fake data. These are the types currently supported: diff --git a/internal/anonymize/anonymize.go b/internal/anonymize/anonymize.go index c5bbfbe..a06cdbc 100644 --- a/internal/anonymize/anonymize.go +++ b/internal/anonymize/anonymize.go @@ -366,6 +366,18 @@ func rowObeysConstraints(constraints []config.PatternFieldConstraint, row sqlpar if parsedValue == constraint.Value { return false } + case "regex not like": + re := regexp.MustCompile(constraint.Value) + match := re.MatchString(parsedValue) + if match { + return false + } + case "regex": + re := regexp.MustCompile(constraint.Value) + match := re.MatchString(parsedValue) + if !match { + return false + } case "like", "==", "=": From 6bfca8ffc795e45b0666d4f26b352477449721cc Mon Sep 17 00:00:00 2001 From: Marius Jensen Date: Mon, 11 Nov 2024 15:12:11 +0100 Subject: [PATCH 4/5] Be explicit in our readme that previous columns that are already processed are not safe comparators if they are also anonymized. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5d927ec..d54f955 100644 --- a/README.md +++ b/README.md @@ -137,7 +137,7 @@ Supposing you have a WordPress database and you need to modify certain meta, be #### Compare rules Constraints allow the user to define rules for how to treat the comparison value. The following rules are supported: -**PS: Remember that comparison rules are first come first serve, so as soon as a rule that would negate the anonymization of a field is found, it will short-circuit any further rules.** +**PS: Remember that comparison rules are first come first serve, so as soon as a rule that would negate the anonymization of a field is found, it will short-circuit any further rules. You should also try to avoid comapring against other fields, remember that a field you may wish to compare against may already have been modified and no longer give the expected value!** - `like`: The default behavior. The SQL value must be equal to the constraint `value` field. - `not like`: The SQL value must not be equal to the constraint `value` field. From 91f6dc0eca913f31b81a3d9f7243b4ed891f0093 Mon Sep 17 00:00:00 2001 From: Marius Jensen Date: Mon, 11 Nov 2024 15:14:28 +0100 Subject: [PATCH 5/5] Remove errant empty bullet point --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d54f955..88f7562 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ Constraints allow the user to define rules for how to treat the comparison value - `not like`: The SQL value must not be equal to the constraint `value` field. - `regex`: The SQL value must match the regex string given in the `value` field. - `regex not like`: The inverse of `regex`, and requires the regex patter to not match the SQL value. -- + ### Field Types Each column stores a certain type of data, be it a name, username, email, etc. The `type` property in the config is used to define the type of data stored, and ultimately the type of random data to be inserted into the field. [https://github.com/dmgk/faker](https://github.com/dmgk/faker) is used for generating the fake data. These are the types currently supported: