From 4e3352a3040ecc7dd9fea527c4f4a4191d2abe4d Mon Sep 17 00:00:00 2001 From: Hiroaki Sano Date: Tue, 13 Nov 2018 22:25:27 +0900 Subject: [PATCH] Ready to release --- LICENSE | 13 ++ README.md | 67 +++++++ lib/fluent/plugin/filter_pan_anonymizer.rb | 5 +- lib/fluent/plugin/pan/masker.rb | 14 +- test/plugin/test_filter_pan_anonymizer.rb | 220 ++++++++++++++++++--- 5 files changed, 281 insertions(+), 38 deletions(-) create mode 100644 LICENSE create mode 100644 README.md diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..38196f5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ +Copyright (c) 2018- Kanmu, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..a9000a0 --- /dev/null +++ b/README.md @@ -0,0 +1,67 @@ +# fluent-plugin-pan-anonymizer + +A Fluent filter plugin to anonymize records which have PAN (Primary Account Number = Credit card number). The plugin validates PAN using [Luhn algorithm](https://en.wikipedia.org/wiki/Luhn_algorithm) after matching. + +Inspired by [fluent-plugin-anonymizer](https://github.com/y-ken/fluent-plugin-anonymizer). + +# Requirements + +- fluentd: v0.14.x +- Ruby: 2.4 + +# Installation + +``` +gem install fluent-plugin-pan-anonymizer +``` + +# Configuration + +NOTE: Card numbers in the example don't exist in the world. + +``` + + @type dummy + tag dummy + dummy [ + {"time": 12345678901234567, "subject": "xxxxxx", "user_inquiry": "hi, my card number is 4019249331712145 !"}, + {"time": 12345678901234568, "subject": "xxxxxx", "user_inquiry": "hello inquiry code is 4567890123456789"}, + {"time": 12345678901234569, "subject": "I am 4019 2493 3171 2145", "user_inquiry": "4019-2493-3171-2145 is my number"}, + {"time": 14019249331712145, "subject": "ユーザーです", "user_inquiry": "4019249331712145 のカードを使っています"} + ] + + + + @type pan_anonymizer + ignore_keys time + + formats /4\d{15}/, /4[0-9]{15}/ + checksum_algorithm luhn + mask 9999999999999999 + + + formats /4\d{3}-\d{4}-\d{4}-\d{4}/, /4\d{3}\s*\d{4}\s*\d{4}\s*\d{4}/ + checksum_algorithm luhn + mask xxxx-xxxx-xxxx-xxxx + + + + + @type stdout + +``` + +## The result of the example given above + +``` +2018-11-13 22:01:35.074963000 +0900 dummy: {"time":12345678901234567,"subject":"xxxxxx","user_inquiry":"hi, my card number is 9999999999999999 !"} +2018-11-13 22:01:36.001053000 +0900 dummy: {"time":12345678901234568,"subject":"xxxxxx","user_inquiry":"hello inquiry code is 4567890123456789"} +2018-11-13 22:01:37.021032000 +0900 dummy: {"time":12345678901234569,"subject":"I am xxxx-xxxx-xxxx-xxxx","user_inquiry":"xxxx-xxxx-xxxx-xxxx is my number"} +2018-11-13 22:01:38.050578000 +0900 dummy: {"time":14019249331712145,"subject":"ユーザーです","user_inquiry":"9999999999999999 のカードを使っています"} +``` + +Card numbers were masked with given configuration except `time` key and `4567890123456789` in "hello inquiry code is 4567890123456789". `4567890123456789` is not a valid card number. + +# License + +Apache License, Version 2.0 diff --git a/lib/fluent/plugin/filter_pan_anonymizer.rb b/lib/fluent/plugin/filter_pan_anonymizer.rb index 627721f..be83696 100644 --- a/lib/fluent/plugin/filter_pan_anonymizer.rb +++ b/lib/fluent/plugin/filter_pan_anonymizer.rb @@ -9,6 +9,7 @@ class PANAnonymizerFilter < Filter config_param :formats, :array, value_type: :regexp, default: [] config_param :checksum_algorithm, :enum, list: Fluent::PAN::Masker::CHECKSUM_FUNC.keys, default: :luhn config_param :mask, :string, default: "****" + config_param :force, :bool, default: false end config_param :ignore_keys, :array, default: [] @@ -21,14 +22,14 @@ def configure(conf) @pan_masker = @pan_configs.map do |i| i[:formats].map do |format| - Fluent::PAN::Masker.new(format, i[:checksum_algorithm], i[:mask]) + Fluent::PAN::Masker.new(format, i[:checksum_algorithm], i[:mask], i[:force]) end end.flatten end def filter(tag, time, record) record.map do |key, value| - if @ignore_keys.include? key + if @ignore_keys.include? key.to_s [key, value] else _value = value diff --git a/lib/fluent/plugin/pan/masker.rb b/lib/fluent/plugin/pan/masker.rb index 67c7513..688c6cd 100644 --- a/lib/fluent/plugin/pan/masker.rb +++ b/lib/fluent/plugin/pan/masker.rb @@ -32,14 +32,12 @@ def initialize(regexp, checksum_algorithm, mask, force=false) def mask_if_found_pan(orgval) filtered = orgval.to_s.gsub(@regexp) do |match| - digits = match.split("").select do |i| - i =~ /\d/ - end.map do |j| - j.to_i - end + pan = match.split("").select { |i| i =~ /\d/ }.map { |j| j.to_i } - if valid?(digits) + if valid?(pan) match = @mask + else + match end end @@ -58,8 +56,8 @@ def mask_if_found_pan(orgval) retval end - def valid?(digits) - @checksum_func.call(digits) + def valid?(pan) + @checksum_func.call(pan) end def numerals_mask? diff --git a/test/plugin/test_filter_pan_anonymizer.rb b/test/plugin/test_filter_pan_anonymizer.rb index 144f007..0b4ede8 100644 --- a/test/plugin/test_filter_pan_anonymizer.rb +++ b/test/plugin/test_filter_pan_anonymizer.rb @@ -3,6 +3,8 @@ require 'fluent/plugin/filter_pan_anonymizer' +# NOTE: The card number in the test doesn't exist in the world! + class PANAnonymizerFilterTest < Test::Unit::TestCase def setup Fluent::Test::setup @@ -11,24 +13,24 @@ def setup CONFIG = %[ - formats /4\d{15}/ + formats /4\\d{15}/ checksum_algorithm luhn - mask *** + mask xxxx - formats /4\d{15}/ + formats /4\\d{15}/ checksum_algorithm none - mask *** + mask xxxx - formats /4019-\d{4}-\d{4}-\d{4}/ + formats /4019-\\d{4}-\\d{4}-\\d{4}/ checksum_algorithm luhn - mask *** + mask xxxx - formats /4019\d{10}/, /4019-\d{4}-\d{4}-\d{4}/ + formats /4019\\d{10}/, /4019-\\d{4}-\\d{4}-\\d{4}/ checksum_algorithm luhn - mask *** + mask xxxx ignore_keys ignore1, ignore2 ] @@ -120,25 +122,187 @@ def filter(conf, messages) end end - test "aaaaaaaa" do - conf = %[ - - formats /4\\d{15}/ - checksum_algorithm luhn - mask xxxx - - ] - messages = [ - { - "key": "9994019249331712145999" - } - ] - expected = [ - { - "key": "999xxxx999" - } - ] - filtered = filter(conf, messages) - assert_equal(expected, filtered) + sub_test_case 'normal case' do + test "in case of nnnnnnnnnnnnnnnn" do + conf = %[ + + formats /4\\d{15}/ + checksum_algorithm luhn + mask xxxx + + ] + messages = [ + { + "key": "9994019249331712145999" + } + ] + expected = [ + { + "key": "999xxxx999" + } + ] + filtered = filter(conf, messages) + assert_equal(expected, filtered) + end + test "in case of nnnn-nnnn-nnnn-nnnn" do + conf = %[ + + formats /4\\d{3}-\\d{4}-\\d{4}-\\d{4}/ + checksum_algorithm luhn + mask xxxx + + ] + messages = [ + { + "key": "9994019-2493-3171-2145999" + } + ] + expected = [ + { + "key": "999xxxx999" + } + ] + filtered = filter(conf, messages) + assert_equal(expected, filtered) + end + end + + sub_test_case 'checksum_algorithm' do + test "not be masked if PAN is not satisfied luhn" do + conf = %[ + + formats /4\\d{15}/ + checksum_algorithm luhn + mask xxxx + + ] + messages = [ + { + "key": "9994019111122223333999" + } + ] + expected = [ + { + "key": "9994019111122223333999" + } + ] + filtered = filter(conf, messages) + assert_equal(expected, filtered) + end + test "be masked if checksum_algorithm is none" do + conf = %[ + + formats /4\\d{15}/ + checksum_algorithm none + mask xxxx + + ] + messages = [ + { + "key": "9994019111122223333999" + } + ] + expected = [ + { + "key": "999xxxx999" + } + ] + filtered = filter(conf, messages) + assert_equal(expected, filtered) + end + end + + sub_test_case 'integer value' do + test "not be masked if mask is string" do + conf = %[ + + formats /4\\d{15}/ + checksum_algorithm luhn + mask xxxx + + ] + messages = [ + { + "key": 9994019249331712145999 + } + ] + expected = [ + { + "key": 9994019249331712145999 + } + ] + filtered = filter(conf, messages) + assert_equal(expected, filtered) + end + test "be masked if force flag exists" do + conf = %[ + + formats /4\\d{15}/ + checksum_algorithm luhn + mask xxxx + force true + + ] + messages = [ + { + "key": 9994019249331712145999 + } + ] + expected = [ + { + "key": "999xxxx999" + } + ] + filtered = filter(conf, messages) + assert_equal(expected, filtered) + end + test "be masked if mask is integer value" do + conf = %[ + + formats /4\\d{15}/ + checksum_algorithm luhn + mask 1111111111111111 + + ] + messages = [ + { + "key": 9994019249331712145999 + } + ] + expected = [ + { + "key": 9991111111111111111999 + } + ] + filtered = filter(conf, messages) + assert_equal(expected, filtered) + end + end + + sub_test_case 'ignore keys' do + test "not be masked" do + conf = %[ + + formats /4\\d{15}/ + checksum_algorithm luhn + mask 9999999999999999 + + ignore_keys time + ] + messages = [ + { + "time": 40192493317121459, + "key": 40192493317121459 + } + ] + expected = [ + { + "time": 40192493317121459, + "key": 99999999999999999 + } + ] + filtered = filter(conf, messages) + assert_equal(expected, filtered) + end end end