diff --git a/README.md b/README.md index 69a687094..d1c3849c3 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ Full documentation can be found here: ### Main Requirements -- Python 3.8, 3.9, 3.10, 3.11, 3.12 +- Python 3.9, 3.10, 3.11, 3.12 ### Installation diff --git a/credsweeper/__main__.py b/credsweeper/__main__.py index 3796c7676..82054bb12 100644 --- a/credsweeper/__main__.py +++ b/credsweeper/__main__.py @@ -205,8 +205,8 @@ def get_arguments() -> Namespace: metavar="POSITIVE_INT") parser.add_argument("--thrifty", help="clear objects after scan to reduce memory consumption", - action="store_const", - const=True) + action=BooleanOptionalAction, + default=True) parser.add_argument("--skip_ignored", help="parse .gitignore files and skip credentials from ignored objects", dest="skip_ignored", diff --git a/credsweeper/common/keyword_pattern.py b/credsweeper/common/keyword_pattern.py index 219e99325..8237f7fc1 100644 --- a/credsweeper/common/keyword_pattern.py +++ b/credsweeper/common/keyword_pattern.py @@ -13,7 +13,7 @@ class KeywordPattern: r"(&(quot|apos);|%[0-9a-f]{2}|[`'\"])*" \ r")" # separator = r"(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*" \ - r"(?P:(\s[a-z]{3,9}[?]?\s)?=|:|=(>|>|\\u0026gt;)|!=|===|==|=|%3d)" \ + r"(?P:(\s[a-z]{3,9}[?]?\s)?=|:|=(>|>|\\u0026gt;)|!==|!=|===|==|=|%3d)" \ r"(\s|\\{1,8}[tnr])*" # might be curly, square or parenthesis with words before wrap = r"(?P(" \ diff --git a/credsweeper/filters/value_allowlist_check.py b/credsweeper/filters/value_allowlist_check.py index 1759d7c8a..18bc6dba3 100644 --- a/credsweeper/filters/value_allowlist_check.py +++ b/credsweeper/filters/value_allowlist_check.py @@ -16,7 +16,7 @@ class ValueAllowlistCheck(Filter): r"\$\{(\*|[0-9]+|[a-z_].*)\}", # r"\$[0-9]+(\s|$)", # r"\$\$[a-z_]+(\^%[0-9a-z_]+)?", # - r"#\{.*\}", # + r"#\{.+\}", # Ruby: String Interpolation r"\{\{.+\}\}", # r".*@@@hl@@@(암호|비번|PW|PASS)@@@endhl@@@", # ] diff --git a/credsweeper/filters/value_blocklist_check.py b/credsweeper/filters/value_blocklist_check.py index 3f85f25cc..32c0ff386 100644 --- a/credsweeper/filters/value_blocklist_check.py +++ b/credsweeper/filters/value_blocklist_check.py @@ -11,8 +11,11 @@ class ValueBlocklistCheck(Filter): "true", "false", "null", + "none", "bearer", "string", + "value", + "undefined", ] def __init__(self, config: Config = None) -> None: diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index e5725688e..030b90303 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -1391,6 +1391,22 @@ - code - doc +- name: Tavily API Key + severity: high + confidence: strong + type: pattern + values: + - (?:(?tvly-[0-9A-Za-z_-]{32,40})(?![0-9A-Za-z_-]) + min_line_len: 37 + filter_type: + - ValuePatternCheck(5) + - ValueEntropyBase64Check + required_substrings: + - tvly- + target: + - code + - doc + - name: Discord Bot Token severity: high confidence: strong diff --git a/credsweeper/utils/hop_stat.py b/credsweeper/utils/hop_stat.py index 031e3adb0..6897bb703 100644 --- a/credsweeper/utils/hop_stat.py +++ b/credsweeper/utils/hop_stat.py @@ -25,11 +25,37 @@ class HopStat: ')': '0', '_': '-', '+': '=', + 'Q': 'q', + 'W': 'w', + 'E': 'e', + 'R': 'r', + 'T': 't', + 'Y': 'y', + 'U': 'u', + 'I': 'i', + 'O': 'o', + 'P': 'p', '{': '[', '}': ']', '|': '\\', + 'A': 'a', + 'S': 's', + 'D': 'd', + 'F': 'f', + 'G': 'g', + 'H': 'h', + 'J': 'j', + 'K': 'k', + 'L': 'l', ':': ';', '"': "'", + 'Z': 'z', + 'X': 'x', + 'C': 'c', + 'V': 'v', + 'B': 'b', + 'N': 'n', + 'M': 'm', '<': ',', '>': '.', '?': '/', @@ -75,7 +101,7 @@ def stat(self, value: str) -> Tuple[float, float]: """ hops = [] - value = value.lower().translate(HopStat.TRANSLATION) + value = value.translate(HopStat.TRANSLATION) for a, b in zip(value[:-1], value[1:]): hop = self.__hop_dict.get((a, b)) if hop is None: diff --git a/docs/source/guide.rst b/docs/source/guide.rst index 86bcca388..fd706664c 100644 --- a/docs/source/guide.rst +++ b/docs/source/guide.rst @@ -22,7 +22,7 @@ Get all argument list: [--ml_threshold FLOAT_OR_STR] [--ml_batch_size POSITIVE_INT] [--ml_config PATH] [--ml_model PATH] [--ml_providers STR] - [--jobs POSITIVE_INT] [--thrifty] + [--jobs POSITIVE_INT] [--thrifty | --no-thrifty] [--skip_ignored] [--error | --no-error] [--save-json [PATH]] [--save-xlsx [PATH]] [--stdout | --no-stdout] [--color | --no-color] @@ -72,7 +72,9 @@ Get all argument list: (CPUExecutionProvider is used by default) --jobs POSITIVE_INT, -j POSITIVE_INT number of parallel processes to use (default: 1) - --thrifty clear objects after scan to reduce memory consumption + --thrifty, --no-thrifty + clear objects after scan to reduce memory consumption + (default: True) --skip_ignored parse .gitignore files and skip credentials from ignored objects --error, --no-error produce error code if credentials are found (default: diff --git a/tests/__init__.py b/tests/__init__.py index e7add9dcc..9f57fa9d5 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,13 +1,13 @@ from pathlib import Path # total number of files in test samples -SAMPLES_FILES_COUNT = 147 +SAMPLES_FILES_COUNT = 148 # the lowest value of ML threshold is used to display possible lowest values NEGLIGIBLE_ML_THRESHOLD = 0.0001 # credentials count after scan with negligible ML threshold -SAMPLES_CRED_COUNT = 470 +SAMPLES_CRED_COUNT = 472 SAMPLES_CRED_LINE_COUNT = SAMPLES_CRED_COUNT + 19 # Number of filtered credentials with ML @@ -17,7 +17,7 @@ SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED # with option --doc -SAMPLES_IN_DOC = 656 +SAMPLES_IN_DOC = 657 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 87 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index f8fffa535..cfd97ac95 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -12673,6 +12673,31 @@ } ] }, + { + "rule": "Tavily API Key", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "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 # TP", + "line_num": 1, + "path": "./tests/samples/tvly", + "info": "FILE|RAW", + "value": "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", + "value_start": 0, + "value_end": 37, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 4.703528274549062, + "valid": true + } + } + ] + }, { "rule": "Twilio Credentials", "severity": "high", @@ -12898,6 +12923,31 @@ } ] }, + { + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "ml_probability": 0.857, + "line_data_list": [ + { + "line": "if (password !== \"PaS5w0rD2#\"){", + "line_num": 21, + "path": "./tests/samples/url_cred.js", + "info": "FILE|RAW", + "value": "PaS5w0rD2#", + "value_start": 18, + "value_end": 28, + "variable": "password", + "variable_start": 4, + "variable_end": 12, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 2.989735285398626, + "valid": false + } + } + ] + }, { "rule": "UUID", "severity": "info", diff --git a/tests/data/doc.json b/tests/data/doc.json index 9b1ec9d3e..91011b7b0 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -18122,6 +18122,31 @@ } ] }, + { + "rule": "Tavily API Key", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "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 # TP", + "line_num": 1, + "path": "./tests/samples/tvly", + "info": "FILE|RAW", + "value": "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", + "value_start": 0, + "value_end": 37, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 4.703528274549062, + "valid": true + } + } + ] + }, { "rule": "Twilio Credentials", "severity": "high", diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json index 60f9643b5..d2a529eaa 100644 --- a/tests/data/ml_threshold.json +++ b/tests/data/ml_threshold.json @@ -11597,6 +11597,31 @@ } ] }, + { + "rule": "Tavily API Key", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "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 # TP", + "line_num": 1, + "path": "./tests/samples/tvly", + "info": "", + "value": "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", + "value_start": 0, + "value_end": 37, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 4.703528274549062, + "valid": true + } + } + ] + }, { "rule": "Twilio Credentials", "severity": "high", @@ -11847,6 +11872,31 @@ } ] }, + { + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "ml_probability": 0.857, + "line_data_list": [ + { + "line": "if (password !== \"PaS5w0rD2#\"){", + "line_num": 21, + "path": "./tests/samples/url_cred.js", + "info": "", + "value": "PaS5w0rD2#", + "value_start": 18, + "value_end": 28, + "variable": "password", + "variable_start": 4, + "variable_end": 12, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 2.989735285398626, + "valid": false + } + } + ] + }, { "rule": "UUID", "severity": "info", diff --git a/tests/data/output.json b/tests/data/output.json index 514f9c749..2cbdcdda9 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -9372,6 +9372,31 @@ } ] }, + { + "rule": "Tavily API Key", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "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 # TP", + "line_num": 1, + "path": "./tests/samples/tvly", + "info": "", + "value": "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", + "value_start": 0, + "value_end": 37, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 4.703528274549062, + "valid": true + } + } + ] + }, { "rule": "Twilio Credentials", "severity": "high", @@ -9597,6 +9622,31 @@ } ] }, + { + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "ml_probability": 0.857, + "line_data_list": [ + { + "line": "if (password !== \"PaS5w0rD2#\"){", + "line_num": 21, + "path": "./tests/samples/url_cred.js", + "info": "", + "value": "PaS5w0rD2#", + "value_start": 18, + "value_end": 28, + "variable": "password", + "variable_start": 4, + "variable_end": 12, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 2.989735285398626, + "valid": false + } + } + ] + }, { "rule": "UUID", "severity": "info", diff --git a/tests/samples/tvly b/tests/samples/tvly new file mode 100644 index 000000000..708deca6c --- /dev/null +++ b/tests/samples/tvly @@ -0,0 +1,2 @@ +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 # TP +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 # FP diff --git a/tests/samples/url_cred.js b/tests/samples/url_cred.js index a309f6ea7..e4745f92a 100644 --- a/tests/samples/url_cred.js +++ b/tests/samples/url_cred.js @@ -17,3 +17,7 @@ email_as_login = "smtps://example@gmail.com:FnD83JZs@smtp.gmail.com:465"; */ url3d = "https://localhost.com/013948?26timestamp%3D1395782596%26token%3Dh1d3Me4ch534d801sl3jdk%26version%3D3.14%26si"; + +if (password !== "PaS5w0rD2#"){ +// Strict inequality (!==) +} diff --git a/tests/test_app.py b/tests/test_app.py index dab047734..e0368eb8d 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -223,7 +223,7 @@ def test_it_works_n(self) -> None: " [--ml_model PATH]" \ " [--ml_providers STR] " \ " [--jobs POSITIVE_INT]" \ - " [--thrifty]" \ + " [--thrifty | --no-thrifty]" \ " [--skip_ignored]" \ " [--error | --no-error]"\ " [--save-json [PATH]]" \