diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..38196f5
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,13 @@
+Copyright (c) 2018- Kanmu, Inc.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a9000a0
--- /dev/null
+++ b/README.md
@@ -0,0 +1,67 @@
+# fluent-plugin-pan-anonymizer
+
+A Fluent filter plugin to anonymize records which have PAN (Primary Account Number = Credit card number). The plugin validates PAN using [Luhn algorithm](https://en.wikipedia.org/wiki/Luhn_algorithm) after matching.
+
+Inspired by [fluent-plugin-anonymizer](https://github.com/y-ken/fluent-plugin-anonymizer).
+
+# Requirements
+
+- fluentd: v0.14.x
+- Ruby: 2.4
+
+# Installation
+
+```
+gem install fluent-plugin-pan-anonymizer
+```
+
+# Configuration
+
+NOTE: Card numbers in the example don't exist in the world.
+
+```
+
+
+
+ @type pan_anonymizer
+ ignore_keys time
+
+ formats /4\d{15}/, /4[0-9]{15}/
+ checksum_algorithm luhn
+ mask 9999999999999999
+
+
+ formats /4\d{3}-\d{4}-\d{4}-\d{4}/, /4\d{3}\s*\d{4}\s*\d{4}\s*\d{4}/
+ checksum_algorithm luhn
+ mask xxxx-xxxx-xxxx-xxxx
+
+
+
+
+ @type stdout
+
+```
+
+## The result of the example given above
+
+```
+2018-11-13 22:01:35.074963000 +0900 dummy: {"time":12345678901234567,"subject":"xxxxxx","user_inquiry":"hi, my card number is 9999999999999999 !"}
+2018-11-13 22:01:36.001053000 +0900 dummy: {"time":12345678901234568,"subject":"xxxxxx","user_inquiry":"hello inquiry code is 4567890123456789"}
+2018-11-13 22:01:37.021032000 +0900 dummy: {"time":12345678901234569,"subject":"I am xxxx-xxxx-xxxx-xxxx","user_inquiry":"xxxx-xxxx-xxxx-xxxx is my number"}
+2018-11-13 22:01:38.050578000 +0900 dummy: {"time":14019249331712145,"subject":"ユーザーです","user_inquiry":"9999999999999999 のカードを使っています"}
+```
+
+Card numbers were masked with given configuration except `time` key and `4567890123456789` in "hello inquiry code is 4567890123456789". `4567890123456789` is not a valid card number.
+
+# License
+
+Apache License, Version 2.0
diff --git a/lib/fluent/plugin/filter_pan_anonymizer.rb b/lib/fluent/plugin/filter_pan_anonymizer.rb
index 627721f..be83696 100644
--- a/lib/fluent/plugin/filter_pan_anonymizer.rb
+++ b/lib/fluent/plugin/filter_pan_anonymizer.rb
@@ -9,6 +9,7 @@ class PANAnonymizerFilter < Filter
config_param :formats, :array, value_type: :regexp, default: []
config_param :checksum_algorithm, :enum, list: Fluent::PAN::Masker::CHECKSUM_FUNC.keys, default: :luhn
config_param :mask, :string, default: "****"
+ config_param :force, :bool, default: false
end
config_param :ignore_keys, :array, default: []
@@ -21,14 +22,14 @@ def configure(conf)
@pan_masker = @pan_configs.map do |i|
i[:formats].map do |format|
- Fluent::PAN::Masker.new(format, i[:checksum_algorithm], i[:mask])
+ Fluent::PAN::Masker.new(format, i[:checksum_algorithm], i[:mask], i[:force])
end
end.flatten
end
def filter(tag, time, record)
record.map do |key, value|
- if @ignore_keys.include? key
+ if @ignore_keys.include? key.to_s
[key, value]
else
_value = value
diff --git a/lib/fluent/plugin/pan/masker.rb b/lib/fluent/plugin/pan/masker.rb
index 67c7513..688c6cd 100644
--- a/lib/fluent/plugin/pan/masker.rb
+++ b/lib/fluent/plugin/pan/masker.rb
@@ -32,14 +32,12 @@ def initialize(regexp, checksum_algorithm, mask, force=false)
def mask_if_found_pan(orgval)
filtered = orgval.to_s.gsub(@regexp) do |match|
- digits = match.split("").select do |i|
- i =~ /\d/
- end.map do |j|
- j.to_i
- end
+ pan = match.split("").select { |i| i =~ /\d/ }.map { |j| j.to_i }
- if valid?(digits)
+ if valid?(pan)
match = @mask
+ else
+ match
end
end
@@ -58,8 +56,8 @@ def mask_if_found_pan(orgval)
retval
end
- def valid?(digits)
- @checksum_func.call(digits)
+ def valid?(pan)
+ @checksum_func.call(pan)
end
def numerals_mask?
diff --git a/test/plugin/test_filter_pan_anonymizer.rb b/test/plugin/test_filter_pan_anonymizer.rb
index 144f007..0b4ede8 100644
--- a/test/plugin/test_filter_pan_anonymizer.rb
+++ b/test/plugin/test_filter_pan_anonymizer.rb
@@ -3,6 +3,8 @@
require 'fluent/plugin/filter_pan_anonymizer'
+# NOTE: The card number in the test doesn't exist in the world!
+
class PANAnonymizerFilterTest < Test::Unit::TestCase
def setup
Fluent::Test::setup
@@ -11,24 +13,24 @@ def setup
CONFIG = %[
- formats /4\d{15}/
+ formats /4\\d{15}/
checksum_algorithm luhn
- mask ***
+ mask xxxx
- formats /4\d{15}/
+ formats /4\\d{15}/
checksum_algorithm none
- mask ***
+ mask xxxx
- formats /4019-\d{4}-\d{4}-\d{4}/
+ formats /4019-\\d{4}-\\d{4}-\\d{4}/
checksum_algorithm luhn
- mask ***
+ mask xxxx
- formats /4019\d{10}/, /4019-\d{4}-\d{4}-\d{4}/
+ formats /4019\\d{10}/, /4019-\\d{4}-\\d{4}-\\d{4}/
checksum_algorithm luhn
- mask ***
+ mask xxxx
ignore_keys ignore1, ignore2
]
@@ -120,25 +122,187 @@ def filter(conf, messages)
end
end
- test "aaaaaaaa" do
- conf = %[
-
- formats /4\\d{15}/
- checksum_algorithm luhn
- mask xxxx
-
- ]
- messages = [
- {
- "key": "9994019249331712145999"
- }
- ]
- expected = [
- {
- "key": "999xxxx999"
- }
- ]
- filtered = filter(conf, messages)
- assert_equal(expected, filtered)
+ sub_test_case 'normal case' do
+ test "in case of nnnnnnnnnnnnnnnn" do
+ conf = %[
+
+ formats /4\\d{15}/
+ checksum_algorithm luhn
+ mask xxxx
+
+ ]
+ messages = [
+ {
+ "key": "9994019249331712145999"
+ }
+ ]
+ expected = [
+ {
+ "key": "999xxxx999"
+ }
+ ]
+ filtered = filter(conf, messages)
+ assert_equal(expected, filtered)
+ end
+ test "in case of nnnn-nnnn-nnnn-nnnn" do
+ conf = %[
+
+ formats /4\\d{3}-\\d{4}-\\d{4}-\\d{4}/
+ checksum_algorithm luhn
+ mask xxxx
+
+ ]
+ messages = [
+ {
+ "key": "9994019-2493-3171-2145999"
+ }
+ ]
+ expected = [
+ {
+ "key": "999xxxx999"
+ }
+ ]
+ filtered = filter(conf, messages)
+ assert_equal(expected, filtered)
+ end
+ end
+
+ sub_test_case 'checksum_algorithm' do
+ test "not be masked if PAN is not satisfied luhn" do
+ conf = %[
+
+ formats /4\\d{15}/
+ checksum_algorithm luhn
+ mask xxxx
+
+ ]
+ messages = [
+ {
+ "key": "9994019111122223333999"
+ }
+ ]
+ expected = [
+ {
+ "key": "9994019111122223333999"
+ }
+ ]
+ filtered = filter(conf, messages)
+ assert_equal(expected, filtered)
+ end
+ test "be masked if checksum_algorithm is none" do
+ conf = %[
+
+ formats /4\\d{15}/
+ checksum_algorithm none
+ mask xxxx
+
+ ]
+ messages = [
+ {
+ "key": "9994019111122223333999"
+ }
+ ]
+ expected = [
+ {
+ "key": "999xxxx999"
+ }
+ ]
+ filtered = filter(conf, messages)
+ assert_equal(expected, filtered)
+ end
+ end
+
+ sub_test_case 'integer value' do
+ test "not be masked if mask is string" do
+ conf = %[
+
+ formats /4\\d{15}/
+ checksum_algorithm luhn
+ mask xxxx
+
+ ]
+ messages = [
+ {
+ "key": 9994019249331712145999
+ }
+ ]
+ expected = [
+ {
+ "key": 9994019249331712145999
+ }
+ ]
+ filtered = filter(conf, messages)
+ assert_equal(expected, filtered)
+ end
+ test "be masked if force flag exists" do
+ conf = %[
+
+ formats /4\\d{15}/
+ checksum_algorithm luhn
+ mask xxxx
+ force true
+
+ ]
+ messages = [
+ {
+ "key": 9994019249331712145999
+ }
+ ]
+ expected = [
+ {
+ "key": "999xxxx999"
+ }
+ ]
+ filtered = filter(conf, messages)
+ assert_equal(expected, filtered)
+ end
+ test "be masked if mask is integer value" do
+ conf = %[
+
+ formats /4\\d{15}/
+ checksum_algorithm luhn
+ mask 1111111111111111
+
+ ]
+ messages = [
+ {
+ "key": 9994019249331712145999
+ }
+ ]
+ expected = [
+ {
+ "key": 9991111111111111111999
+ }
+ ]
+ filtered = filter(conf, messages)
+ assert_equal(expected, filtered)
+ end
+ end
+
+ sub_test_case 'ignore keys' do
+ test "not be masked" do
+ conf = %[
+
+ formats /4\\d{15}/
+ checksum_algorithm luhn
+ mask 9999999999999999
+
+ ignore_keys time
+ ]
+ messages = [
+ {
+ "time": 40192493317121459,
+ "key": 40192493317121459
+ }
+ ]
+ expected = [
+ {
+ "time": 40192493317121459,
+ "key": 99999999999999999
+ }
+ ]
+ filtered = filter(conf, messages)
+ assert_equal(expected, filtered)
+ end
end
end