From 57363b3eb9d01d435cae841353243638c7481164 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Fri, 7 Mar 2025 10:00:50 +0200 Subject: [PATCH] Sentry Auth Token patterns (#693) * Sentry Organization Auth Token * Duplicate rules name check * Sentry User Auth Token * assertRaisesRegex * ssws && ntlm authentication schemes in keyword * sample added * test added --- credsweeper/common/keyword_pattern.py | 2 +- credsweeper/rules/config.yaml | 31 +++++++++++ credsweeper/scanner/scanner.py | 7 ++- tests/__init__.py | 6 +-- tests/data/depth_3.json | 75 +++++++++++++++++++++++++++ tests/data/doc.json | 50 ++++++++++++++++++ tests/data/ml_threshold.json | 75 +++++++++++++++++++++++++++ tests/data/output.json | 75 +++++++++++++++++++++++++++ tests/samples/auth.hs | 1 + tests/samples/sentry | 2 + tests/test_main.py | 59 ++++++++++++++++++++- 11 files changed, 376 insertions(+), 7 deletions(-) create mode 100644 tests/samples/sentry diff --git a/credsweeper/common/keyword_pattern.py b/credsweeper/common/keyword_pattern.py index 8237f7fc1..c03aba24d 100644 --- a/credsweeper/common/keyword_pattern.py +++ b/credsweeper/common/keyword_pattern.py @@ -26,7 +26,7 @@ class KeywordPattern: string_prefix = r"(((b|r|br|rb|u|f|rf|fr|l|@)(?=(\\*[`'\"])))?" left_quote = r"(?P((?P\\{1,8})?([`'\"]|&(quot|apos);)){1,4}))?" # Authentication scheme ( oauth | basic | bearer | apikey ) precedes to credential - auth_keywords = r"(\s?(oauth|bot|basic|bearer|apikey|accesskey)\s)?" + auth_keywords = r"(\s?(oauth|bot|basic|bearer|apikey|accesskey|ssws|ntlm)\s)?" value = r"(?P" \ r"(?(value_leftquote)" \ r"(" \ diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index 030b90303..12d38347e 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -1407,6 +1407,37 @@ - code - doc +- name: Sentry Organization Auth Token + severity: high + confidence: strong + type: pattern + values: + - (?:(?sntrys_eyJ[0-9A-Za-z_-]{80,8000}=*([0-9A-Za-z_-]{32,256})?)(?![0-9A-Za-z_-]) + min_line_len: 37 + filter_type: + - ValuePatternCheck(5) + - ValueEntropyBase64Check + required_substrings: + - sntrys_eyJ + target: + - code + - doc + +- name: Sentry User Auth Token + severity: high + confidence: strong + type: pattern + values: + - (?:(?sntryu_[0-9a-f]{64})(?![0-9A-Za-z_-]) + min_line_len: 37 + filter_type: + - ValuePatternCheck(5) + required_substrings: + - sntryu_ + target: + - code + - doc + - name: Discord Bot Token severity: high confidence: strong diff --git a/credsweeper/scanner/scanner.py b/credsweeper/scanner/scanner.py index 1f69cfb95..3008c4241 100644 --- a/credsweeper/scanner/scanner.py +++ b/credsweeper/scanner/scanner.py @@ -69,6 +69,7 @@ def _set_rules_scanners(self, rule_path: Union[None, str, Path]) -> None: rule_path = APP_PATH / "rules" / "config.yaml" rule_templates = Util.yaml_load(rule_path) if rule_templates and isinstance(rule_templates, list): + rule_names = set() for rule_template in rule_templates: try: rule = Rule(self.config, rule_template) @@ -77,6 +78,10 @@ def _set_rules_scanners(self, rule_path: Union[None, str, Path]) -> None: raise exc if not self._is_available(rule): continue + if rule.rule_name in rule_names: + raise RuntimeError(f"Duplicated rule name {rule.rule_name}") + else: + rule_names.add(rule.rule_name) if 0 < rule.min_line_len: if rule.rule_type == RuleType.KEYWORD: self.min_keyword_len = min(self.min_keyword_len, rule.min_line_len) @@ -141,7 +146,7 @@ def scan(self, provider: ContentProvider) -> List[Candidate]: # "cache" - YAPF and pycharm formatters ... matched_keyword = \ target_line_stripped_len >= self.min_keyword_len and ( # - '=' in target_line_stripped or ':' in target_line_stripped) # + '=' in target_line_stripped or ':' in target_line_stripped) # matched_pem_key = \ target_line_stripped_len >= self.min_pem_key_len \ and PEM_BEGIN_PATTERN in target_line_stripped and "PRIVATE" in target_line_stripped diff --git a/tests/__init__.py b/tests/__init__.py index 8e9bb2677..87b54feb0 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,13 +1,13 @@ from pathlib import Path # total number of files in test samples -SAMPLES_FILES_COUNT = 148 +SAMPLES_FILES_COUNT = 149 # the lowest value of ML threshold is used to display possible lowest values NEGLIGIBLE_ML_THRESHOLD = 0.0001 # credentials count after scan with negligible ML threshold -SAMPLES_CRED_COUNT = 472 +SAMPLES_CRED_COUNT = 475 SAMPLES_CRED_LINE_COUNT = SAMPLES_CRED_COUNT + 19 # Number of filtered credentials with ML @@ -17,7 +17,7 @@ SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED # with option --doc -SAMPLES_IN_DOC = 654 +SAMPLES_IN_DOC = 656 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 89 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index b58638d04..3144bdc92 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -342,6 +342,31 @@ } ] }, + { + "rule": "Auth", + "severity": "medium", + "confidence": "moderate", + "ml_probability": 1.0, + "line_data_list": [ + { + "line": "Authorization: NTLM TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "line_num": 4, + "path": "./tests/samples/auth.hs", + "info": "FILE|RAW", + "value": "TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "value_start": 20, + "value_end": 59, + "variable": "Authorization", + "variable_start": 0, + "variable_end": 13, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 4.002348372264613, + "valid": false + } + } + ] + }, { "rule": "Auth", "severity": "medium", @@ -10056,6 +10081,56 @@ } ] }, + { + "rule": "Sentry Organization Auth Token", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "sntrys_eyJpYXQiOjE3NDEyNjQzNTYuMDAwMCwidXJsIjoiaHR0cHM6Ly9zZW50cnkuaW8iLCJyZWdpb25fdXJsIjoiaHR0cHM6Ly91YS5zZW50cnkuaW8iLCJvcmciOiIifQ==v8D-whr2cUQK91Civi4yNoLRjC3MDZH5I2aMcs_j5GDv", + "line_num": 1, + "path": "./tests/samples/sentry", + "info": "FILE|RAW", + "value": "sntrys_eyJpYXQiOjE3NDEyNjQzNTYuMDAwMCwidXJsIjoiaHR0cHM6Ly9zZW50cnkuaW8iLCJyZWdpb25fdXJsIjoiaHR0cHM6Ly91YS5zZW50cnkuaW8iLCJvcmciOiIifQ==v8D-whr2cUQK91Civi4yNoLRjC3MDZH5I2aMcs_j5GDv", + "value_start": 0, + "value_end": 179, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 5.424476232986917, + "valid": true + } + } + ] + }, + { + "rule": "Sentry User Auth Token", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "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", + "line_num": 2, + "path": "./tests/samples/sentry", + "info": "FILE|RAW", + "value": "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", + "value_start": 0, + "value_end": 71, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 4.0572205343720595, + "valid": true + } + } + ] + }, { "rule": "Shopify Token", "severity": "high", diff --git a/tests/data/doc.json b/tests/data/doc.json index df3eeb786..d00064153 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -16413,6 +16413,56 @@ } ] }, + { + "rule": "Sentry Organization Auth Token", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "sntrys_eyJpYXQiOjE3NDEyNjQzNTYuMDAwMCwidXJsIjoiaHR0cHM6Ly9zZW50cnkuaW8iLCJyZWdpb25fdXJsIjoiaHR0cHM6Ly91YS5zZW50cnkuaW8iLCJvcmciOiIifQ==v8D-whr2cUQK91Civi4yNoLRjC3MDZH5I2aMcs_j5GDv", + "line_num": 1, + "path": "./tests/samples/sentry", + "info": "FILE|RAW", + "value": "sntrys_eyJpYXQiOjE3NDEyNjQzNTYuMDAwMCwidXJsIjoiaHR0cHM6Ly9zZW50cnkuaW8iLCJyZWdpb25fdXJsIjoiaHR0cHM6Ly91YS5zZW50cnkuaW8iLCJvcmciOiIifQ==v8D-whr2cUQK91Civi4yNoLRjC3MDZH5I2aMcs_j5GDv", + "value_start": 0, + "value_end": 179, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 5.424476232986917, + "valid": true + } + } + ] + }, + { + "rule": "Sentry User Auth Token", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "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", + "line_num": 2, + "path": "./tests/samples/sentry", + "info": "FILE|RAW", + "value": "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", + "value_start": 0, + "value_end": 71, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 4.0572205343720595, + "valid": true + } + } + ] + }, { "rule": "Shopify Token", "severity": "high", diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json index f81a0088a..596ca09c6 100644 --- a/tests/data/ml_threshold.json +++ b/tests/data/ml_threshold.json @@ -249,6 +249,31 @@ } ] }, + { + "rule": "Auth", + "severity": "medium", + "confidence": "moderate", + "ml_probability": 1.0, + "line_data_list": [ + { + "line": "Authorization: NTLM TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "line_num": 4, + "path": "./tests/samples/auth.hs", + "info": "", + "value": "TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "value_start": 20, + "value_end": 59, + "variable": "Authorization", + "variable_start": 0, + "variable_end": 13, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 4.002348372264613, + "valid": false + } + } + ] + }, { "rule": "Auth", "severity": "medium", @@ -10197,6 +10222,56 @@ } ] }, + { + "rule": "Sentry Organization Auth Token", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "sntrys_eyJpYXQiOjE3NDEyNjQzNTYuMDAwMCwidXJsIjoiaHR0cHM6Ly9zZW50cnkuaW8iLCJyZWdpb25fdXJsIjoiaHR0cHM6Ly91YS5zZW50cnkuaW8iLCJvcmciOiIifQ==v8D-whr2cUQK91Civi4yNoLRjC3MDZH5I2aMcs_j5GDv", + "line_num": 1, + "path": "./tests/samples/sentry", + "info": "", + "value": "sntrys_eyJpYXQiOjE3NDEyNjQzNTYuMDAwMCwidXJsIjoiaHR0cHM6Ly9zZW50cnkuaW8iLCJyZWdpb25fdXJsIjoiaHR0cHM6Ly91YS5zZW50cnkuaW8iLCJvcmciOiIifQ==v8D-whr2cUQK91Civi4yNoLRjC3MDZH5I2aMcs_j5GDv", + "value_start": 0, + "value_end": 179, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 5.424476232986917, + "valid": true + } + } + ] + }, + { + "rule": "Sentry User Auth Token", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "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", + "line_num": 2, + "path": "./tests/samples/sentry", + "info": "", + "value": "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", + "value_start": 0, + "value_end": 71, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 4.0572205343720595, + "valid": true + } + } + ] + }, { "rule": "Shopify Token", "severity": "high", diff --git a/tests/data/output.json b/tests/data/output.json index a2b3c09eb..f7b27df55 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -249,6 +249,31 @@ } ] }, + { + "rule": "Auth", + "severity": "medium", + "confidence": "moderate", + "ml_probability": 1.0, + "line_data_list": [ + { + "line": "Authorization: NTLM TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "line_num": 4, + "path": "./tests/samples/auth.hs", + "info": "", + "value": "TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "value_start": 20, + "value_end": 59, + "variable": "Authorization", + "variable_start": 0, + "variable_end": 13, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 4.002348372264613, + "valid": false + } + } + ] + }, { "rule": "Auth", "severity": "medium", @@ -8022,6 +8047,56 @@ } ] }, + { + "rule": "Sentry Organization Auth Token", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "sntrys_eyJpYXQiOjE3NDEyNjQzNTYuMDAwMCwidXJsIjoiaHR0cHM6Ly9zZW50cnkuaW8iLCJyZWdpb25fdXJsIjoiaHR0cHM6Ly91YS5zZW50cnkuaW8iLCJvcmciOiIifQ==v8D-whr2cUQK91Civi4yNoLRjC3MDZH5I2aMcs_j5GDv", + "line_num": 1, + "path": "./tests/samples/sentry", + "info": "", + "value": "sntrys_eyJpYXQiOjE3NDEyNjQzNTYuMDAwMCwidXJsIjoiaHR0cHM6Ly9zZW50cnkuaW8iLCJyZWdpb25fdXJsIjoiaHR0cHM6Ly91YS5zZW50cnkuaW8iLCJvcmciOiIifQ==v8D-whr2cUQK91Civi4yNoLRjC3MDZH5I2aMcs_j5GDv", + "value_start": 0, + "value_end": 179, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 5.424476232986917, + "valid": true + } + } + ] + }, + { + "rule": "Sentry User Auth Token", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "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", + "line_num": 2, + "path": "./tests/samples/sentry", + "info": "", + "value": "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", + "value_start": 0, + "value_end": 71, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 4.0572205343720595, + "valid": true + } + } + ] + }, { "rule": "Shopify Token", "severity": "high", diff --git a/tests/samples/auth.hs b/tests/samples/auth.hs index 40bef8b18..b7830e5f1 100644 --- a/tests/samples/auth.hs +++ b/tests/samples/auth.hs @@ -1,3 +1,4 @@ "kerberos_authentication": "YI7IB6wYJgaMgHAgIKoZI2AQBuIh2cSA0IB1qA" "authorization": "aMgHAgIKhwLgGq02iQoZI1AQBuOh4cSAQ8B1qA" headers = {authorization: /oauth_signature="JgEWaL6V6eM%2FFb9wuXG4I3IB6wY%3D"/, content_type: 'application/json; charset=utf-8'} +Authorization: NTLM TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A= diff --git a/tests/samples/sentry b/tests/samples/sentry new file mode 100644 index 000000000..d1915efb1 --- /dev/null +++ b/tests/samples/sentry @@ -0,0 +1,2 @@ +sntrys_eyJpYXQiOjE3NDEyNjQzNTYuMDAwMCwidXJsIjoiaHR0cHM6Ly9zZW50cnkuaW8iLCJyZWdpb25fdXJsIjoiaHR0cHM6Ly91YS5zZW50cnkuaW8iLCJvcmciOiIifQ==v8D-whr2cUQK91Civi4yNoLRjC3MDZH5I2aMcs_j5GDv +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 diff --git a/tests/test_main.py b/tests/test_main.py index 6c5e504cd..c9ba383c3 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -69,6 +69,59 @@ def test_use_filters_n(self) -> None: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def test_rules_dub_n(self) -> None: + with self.assertRaisesRegex(RuntimeError, r"Wrong rules 'None' were read from 'NotExistedPath'"): + CredSweeper(rule_path="NotExistedPath") + with tempfile.TemporaryDirectory() as tmp_dir: + test_rules_file = os.path.join(tmp_dir, "test_rules.yaml") + dub_rules = [{ + "name": "TestRuleNameDub", + "severity": "high", + "confidence": "moderate", + "type": "pattern", + "min_line_len": 42, + "values": ["(?P.*)"], + "target": ["code"], + }, { + "name": "TestRuleNameDub", + "severity": "high", + "confidence": "moderate", + "type": "pattern", + "min_line_len": 42, + "values": ["(?P.*)"], + "target": ["code", "doc"], + }] + Util.yaml_dump(dub_rules, test_rules_file) + with self.assertRaisesRegex(RuntimeError, r"Duplicated rule name TestRuleNameDub"): + CredSweeper(rule_path=test_rules_file) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + + def test_rules_dub_p(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + test_rules_file = os.path.join(tmp_dir, "test_rules.yaml") + dub_rules = [{ + "name": "TestRuleNameDub", + "severity": "high", + "confidence": "moderate", + "type": "pattern", + "min_line_len": 42, + "values": ["(?P.*)"], + "target": ["code"], + }, { + "name": "TestRuleNameDub", + "severity": "high", + "confidence": "moderate", + "type": "pattern", + "min_line_len": 42, + "values": ["(?P.*)"], + "target": ["doc"], + }] + Util.yaml_dump(dub_rules, test_rules_file) + self.assertIsNotNone(CredSweeper(rule_path=test_rules_file)) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + @mock.patch("credsweeper.__main__.scan", return_value=1) @mock.patch("credsweeper.__main__.get_arguments") def test_main_n(self, mock_get_arguments, mock_scan) -> None: @@ -108,7 +161,7 @@ def test_main_path_p(self, mock_get_arguments) -> None: ml_batch_size=1, depth=0, doc=False, - severity="info", + severity=Severity.INFO.value, size_limit="1G", denylist_path=None) mock_get_arguments.return_value = args_mock @@ -144,7 +197,7 @@ def test_binary_patch_p(self, mock_get_arguments) -> None: ml_batch_size=1, depth=9, doc=False, - severity="info", + severity=Severity.INFO.value, size_limit="1G", denylist_path=None) mock_get_arguments.return_value = args_mock @@ -800,6 +853,8 @@ def test_param_n(self) -> None: def test_param_p(self) -> None: # internal parametrized tests for quick debug items = [ # + ("log.txt", b'Authorization: SSWS 00QEi8-WW0HmCjAl4MlVjFx-vbGPXMD8sWXsua', "Authorization", + "00QEi8-WW0HmCjAl4MlVjFx-vbGPXMD8sWXsua"), ('test.yaml', b'password: "Fd[q#pX+@4*r`1]Io"', 'password', 'Fd[q#pX+@4*r`1]Io'), ("any", b'docker swarm join --token qii7t1m6423127xto389xc914l34451qz5135865564sg', 'token', 'qii7t1m6423127xto389xc914l34451qz5135865564sg'),