diff --git a/README.md b/README.md
index 69a687094..d1c3849c3 100644
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ Full documentation can be found here:
### Main Requirements
-- Python 3.8, 3.9, 3.10, 3.11, 3.12
+- Python 3.9, 3.10, 3.11, 3.12
### Installation
diff --git a/credsweeper/__main__.py b/credsweeper/__main__.py
index 3796c7676..82054bb12 100644
--- a/credsweeper/__main__.py
+++ b/credsweeper/__main__.py
@@ -205,8 +205,8 @@ def get_arguments() -> Namespace:
metavar="POSITIVE_INT")
parser.add_argument("--thrifty",
help="clear objects after scan to reduce memory consumption",
- action="store_const",
- const=True)
+ action=BooleanOptionalAction,
+ default=True)
parser.add_argument("--skip_ignored",
help="parse .gitignore files and skip credentials from ignored objects",
dest="skip_ignored",
diff --git a/credsweeper/common/keyword_pattern.py b/credsweeper/common/keyword_pattern.py
index 219e99325..8237f7fc1 100644
--- a/credsweeper/common/keyword_pattern.py
+++ b/credsweeper/common/keyword_pattern.py
@@ -13,7 +13,7 @@ class KeywordPattern:
r"(&(quot|apos);|%[0-9a-f]{2}|[`'\"])*" \
r")" #
separator = r"(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*" \
- r"(?P:(\s[a-z]{3,9}[?]?\s)?=|:|=(>|>|\\u0026gt;)|!=|===|==|=|%3d)" \
+ r"(?P:(\s[a-z]{3,9}[?]?\s)?=|:|=(>|>|\\u0026gt;)|!==|!=|===|==|=|%3d)" \
r"(\s|\\{1,8}[tnr])*"
# might be curly, square or parenthesis with words before
wrap = r"(?P(" \
diff --git a/credsweeper/filters/value_allowlist_check.py b/credsweeper/filters/value_allowlist_check.py
index 1759d7c8a..18bc6dba3 100644
--- a/credsweeper/filters/value_allowlist_check.py
+++ b/credsweeper/filters/value_allowlist_check.py
@@ -16,7 +16,7 @@ class ValueAllowlistCheck(Filter):
r"\$\{(\*|[0-9]+|[a-z_].*)\}", #
r"\$[0-9]+(\s|$)", #
r"\$\$[a-z_]+(\^%[0-9a-z_]+)?", #
- r"#\{.*\}", #
+ r"#\{.+\}", # Ruby: String Interpolation
r"\{\{.+\}\}", #
r".*@@@hl@@@(암호|비번|PW|PASS)@@@endhl@@@", #
]
diff --git a/credsweeper/filters/value_blocklist_check.py b/credsweeper/filters/value_blocklist_check.py
index 3f85f25cc..32c0ff386 100644
--- a/credsweeper/filters/value_blocklist_check.py
+++ b/credsweeper/filters/value_blocklist_check.py
@@ -11,8 +11,11 @@ class ValueBlocklistCheck(Filter):
"true",
"false",
"null",
+ "none",
"bearer",
"string",
+ "value",
+ "undefined",
]
def __init__(self, config: Config = None) -> None:
diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml
index e5725688e..030b90303 100644
--- a/credsweeper/rules/config.yaml
+++ b/credsweeper/rules/config.yaml
@@ -1391,6 +1391,22 @@
- code
- doc
+- name: Tavily API Key
+ severity: high
+ confidence: strong
+ type: pattern
+ values:
+ - (?:(?tvly-[0-9A-Za-z_-]{32,40})(?![0-9A-Za-z_-])
+ min_line_len: 37
+ filter_type:
+ - ValuePatternCheck(5)
+ - ValueEntropyBase64Check
+ required_substrings:
+ - tvly-
+ target:
+ - code
+ - doc
+
- name: Discord Bot Token
severity: high
confidence: strong
diff --git a/credsweeper/utils/hop_stat.py b/credsweeper/utils/hop_stat.py
index 031e3adb0..6897bb703 100644
--- a/credsweeper/utils/hop_stat.py
+++ b/credsweeper/utils/hop_stat.py
@@ -25,11 +25,37 @@ class HopStat:
')': '0',
'_': '-',
'+': '=',
+ 'Q': 'q',
+ 'W': 'w',
+ 'E': 'e',
+ 'R': 'r',
+ 'T': 't',
+ 'Y': 'y',
+ 'U': 'u',
+ 'I': 'i',
+ 'O': 'o',
+ 'P': 'p',
'{': '[',
'}': ']',
'|': '\\',
+ 'A': 'a',
+ 'S': 's',
+ 'D': 'd',
+ 'F': 'f',
+ 'G': 'g',
+ 'H': 'h',
+ 'J': 'j',
+ 'K': 'k',
+ 'L': 'l',
':': ';',
'"': "'",
+ 'Z': 'z',
+ 'X': 'x',
+ 'C': 'c',
+ 'V': 'v',
+ 'B': 'b',
+ 'N': 'n',
+ 'M': 'm',
'<': ',',
'>': '.',
'?': '/',
@@ -75,7 +101,7 @@ def stat(self, value: str) -> Tuple[float, float]:
"""
hops = []
- value = value.lower().translate(HopStat.TRANSLATION)
+ value = value.translate(HopStat.TRANSLATION)
for a, b in zip(value[:-1], value[1:]):
hop = self.__hop_dict.get((a, b))
if hop is None:
diff --git a/docs/source/guide.rst b/docs/source/guide.rst
index 86bcca388..fd706664c 100644
--- a/docs/source/guide.rst
+++ b/docs/source/guide.rst
@@ -22,7 +22,7 @@ Get all argument list:
[--ml_threshold FLOAT_OR_STR]
[--ml_batch_size POSITIVE_INT] [--ml_config PATH]
[--ml_model PATH] [--ml_providers STR]
- [--jobs POSITIVE_INT] [--thrifty]
+ [--jobs POSITIVE_INT] [--thrifty | --no-thrifty]
[--skip_ignored] [--error | --no-error]
[--save-json [PATH]] [--save-xlsx [PATH]]
[--stdout | --no-stdout] [--color | --no-color]
@@ -72,7 +72,9 @@ Get all argument list:
(CPUExecutionProvider is used by default)
--jobs POSITIVE_INT, -j POSITIVE_INT
number of parallel processes to use (default: 1)
- --thrifty clear objects after scan to reduce memory consumption
+ --thrifty, --no-thrifty
+ clear objects after scan to reduce memory consumption
+ (default: True)
--skip_ignored parse .gitignore files and skip credentials from
ignored objects
--error, --no-error produce error code if credentials are found (default:
diff --git a/tests/__init__.py b/tests/__init__.py
index e7add9dcc..9f57fa9d5 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,13 +1,13 @@
from pathlib import Path
# total number of files in test samples
-SAMPLES_FILES_COUNT = 147
+SAMPLES_FILES_COUNT = 148
# the lowest value of ML threshold is used to display possible lowest values
NEGLIGIBLE_ML_THRESHOLD = 0.0001
# credentials count after scan with negligible ML threshold
-SAMPLES_CRED_COUNT = 470
+SAMPLES_CRED_COUNT = 472
SAMPLES_CRED_LINE_COUNT = SAMPLES_CRED_COUNT + 19
# Number of filtered credentials with ML
@@ -17,7 +17,7 @@
SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED
# with option --doc
-SAMPLES_IN_DOC = 656
+SAMPLES_IN_DOC = 657
# archived credentials that are not found without --depth
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 87
diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json
index f8fffa535..cfd97ac95 100644
--- a/tests/data/depth_3.json
+++ b/tests/data/depth_3.json
@@ -12673,6 +12673,31 @@
}
]
},
+ {
+ "rule": "Tavily API Key",
+ "severity": "high",
+ "confidence": "strong",
+ "ml_probability": null,
+ "line_data_list": [
+ {
+ "line": "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 # TP",
+ "line_num": 1,
+ "path": "./tests/samples/tvly",
+ "info": "FILE|RAW",
+ "value": "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",
+ "value_start": 0,
+ "value_end": 37,
+ "variable": null,
+ "variable_start": -2,
+ "variable_end": -2,
+ "entropy_validation": {
+ "iterator": "BASE64STDPAD_CHARS",
+ "entropy": 4.703528274549062,
+ "valid": true
+ }
+ }
+ ]
+ },
{
"rule": "Twilio Credentials",
"severity": "high",
@@ -12898,6 +12923,31 @@
}
]
},
+ {
+ "rule": "Password",
+ "severity": "medium",
+ "confidence": "moderate",
+ "ml_probability": 0.857,
+ "line_data_list": [
+ {
+ "line": "if (password !== \"PaS5w0rD2#\"){",
+ "line_num": 21,
+ "path": "./tests/samples/url_cred.js",
+ "info": "FILE|RAW",
+ "value": "PaS5w0rD2#",
+ "value_start": 18,
+ "value_end": 28,
+ "variable": "password",
+ "variable_start": 4,
+ "variable_end": 12,
+ "entropy_validation": {
+ "iterator": "BASE64STDPAD_CHARS",
+ "entropy": 2.989735285398626,
+ "valid": false
+ }
+ }
+ ]
+ },
{
"rule": "UUID",
"severity": "info",
diff --git a/tests/data/doc.json b/tests/data/doc.json
index 9b1ec9d3e..91011b7b0 100644
--- a/tests/data/doc.json
+++ b/tests/data/doc.json
@@ -18122,6 +18122,31 @@
}
]
},
+ {
+ "rule": "Tavily API Key",
+ "severity": "high",
+ "confidence": "strong",
+ "ml_probability": null,
+ "line_data_list": [
+ {
+ "line": "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 # TP",
+ "line_num": 1,
+ "path": "./tests/samples/tvly",
+ "info": "FILE|RAW",
+ "value": "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",
+ "value_start": 0,
+ "value_end": 37,
+ "variable": null,
+ "variable_start": -2,
+ "variable_end": -2,
+ "entropy_validation": {
+ "iterator": "BASE64STDPAD_CHARS",
+ "entropy": 4.703528274549062,
+ "valid": true
+ }
+ }
+ ]
+ },
{
"rule": "Twilio Credentials",
"severity": "high",
diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json
index 60f9643b5..d2a529eaa 100644
--- a/tests/data/ml_threshold.json
+++ b/tests/data/ml_threshold.json
@@ -11597,6 +11597,31 @@
}
]
},
+ {
+ "rule": "Tavily API Key",
+ "severity": "high",
+ "confidence": "strong",
+ "ml_probability": null,
+ "line_data_list": [
+ {
+ "line": "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 # TP",
+ "line_num": 1,
+ "path": "./tests/samples/tvly",
+ "info": "",
+ "value": "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",
+ "value_start": 0,
+ "value_end": 37,
+ "variable": null,
+ "variable_start": -2,
+ "variable_end": -2,
+ "entropy_validation": {
+ "iterator": "BASE64STDPAD_CHARS",
+ "entropy": 4.703528274549062,
+ "valid": true
+ }
+ }
+ ]
+ },
{
"rule": "Twilio Credentials",
"severity": "high",
@@ -11847,6 +11872,31 @@
}
]
},
+ {
+ "rule": "Password",
+ "severity": "medium",
+ "confidence": "moderate",
+ "ml_probability": 0.857,
+ "line_data_list": [
+ {
+ "line": "if (password !== \"PaS5w0rD2#\"){",
+ "line_num": 21,
+ "path": "./tests/samples/url_cred.js",
+ "info": "",
+ "value": "PaS5w0rD2#",
+ "value_start": 18,
+ "value_end": 28,
+ "variable": "password",
+ "variable_start": 4,
+ "variable_end": 12,
+ "entropy_validation": {
+ "iterator": "BASE64STDPAD_CHARS",
+ "entropy": 2.989735285398626,
+ "valid": false
+ }
+ }
+ ]
+ },
{
"rule": "UUID",
"severity": "info",
diff --git a/tests/data/output.json b/tests/data/output.json
index 514f9c749..2cbdcdda9 100644
--- a/tests/data/output.json
+++ b/tests/data/output.json
@@ -9372,6 +9372,31 @@
}
]
},
+ {
+ "rule": "Tavily API Key",
+ "severity": "high",
+ "confidence": "strong",
+ "ml_probability": null,
+ "line_data_list": [
+ {
+ "line": "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 # TP",
+ "line_num": 1,
+ "path": "./tests/samples/tvly",
+ "info": "",
+ "value": "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",
+ "value_start": 0,
+ "value_end": 37,
+ "variable": null,
+ "variable_start": -2,
+ "variable_end": -2,
+ "entropy_validation": {
+ "iterator": "BASE64STDPAD_CHARS",
+ "entropy": 4.703528274549062,
+ "valid": true
+ }
+ }
+ ]
+ },
{
"rule": "Twilio Credentials",
"severity": "high",
@@ -9597,6 +9622,31 @@
}
]
},
+ {
+ "rule": "Password",
+ "severity": "medium",
+ "confidence": "moderate",
+ "ml_probability": 0.857,
+ "line_data_list": [
+ {
+ "line": "if (password !== \"PaS5w0rD2#\"){",
+ "line_num": 21,
+ "path": "./tests/samples/url_cred.js",
+ "info": "",
+ "value": "PaS5w0rD2#",
+ "value_start": 18,
+ "value_end": 28,
+ "variable": "password",
+ "variable_start": 4,
+ "variable_end": 12,
+ "entropy_validation": {
+ "iterator": "BASE64STDPAD_CHARS",
+ "entropy": 2.989735285398626,
+ "valid": false
+ }
+ }
+ ]
+ },
{
"rule": "UUID",
"severity": "info",
diff --git a/tests/samples/tvly b/tests/samples/tvly
new file mode 100644
index 000000000..708deca6c
--- /dev/null
+++ b/tests/samples/tvly
@@ -0,0 +1,2 @@
+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 # TP
+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 # FP
diff --git a/tests/samples/url_cred.js b/tests/samples/url_cred.js
index a309f6ea7..e4745f92a 100644
--- a/tests/samples/url_cred.js
+++ b/tests/samples/url_cred.js
@@ -17,3 +17,7 @@ email_as_login = "smtps://example@gmail.com:FnD83JZs@smtp.gmail.com:465";
*/
url3d = "https://localhost.com/013948?26timestamp%3D1395782596%26token%3Dh1d3Me4ch534d801sl3jdk%26version%3D3.14%26si";
+
+if (password !== "PaS5w0rD2#"){
+// Strict inequality (!==)
+}
diff --git a/tests/test_app.py b/tests/test_app.py
index dab047734..e0368eb8d 100644
--- a/tests/test_app.py
+++ b/tests/test_app.py
@@ -223,7 +223,7 @@ def test_it_works_n(self) -> None:
" [--ml_model PATH]" \
" [--ml_providers STR] " \
" [--jobs POSITIVE_INT]" \
- " [--thrifty]" \
+ " [--thrifty | --no-thrifty]" \
" [--skip_ignored]" \
" [--error | --no-error]"\
" [--save-json [PATH]]" \