From aafc50e755c31792c9e376dcc4488f56ebb7b071 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 17 Feb 2025 16:00:47 +0200 Subject: [PATCH] Error code return feature. Version up to v1.10.6 (#678) * Error code return feature * skip args test for python3.11 --- .pre-commit-config.yaml | 11 +-- .pre-commit-hooks.yaml | 2 +- credsweeper/__init__.py | 2 +- credsweeper/__main__.py | 34 ++++++-- credsweeper/app.py | 28 ++++--- credsweeper/credentials/credential_manager.py | 20 +++-- credsweeper/filters/value_token_base_check.py | 7 +- docs/source/conf.py | 2 +- docs/source/credsweeper.filters.rst | 8 ++ docs/source/credsweeper.utils.rst | 8 ++ docs/source/guide.rst | 83 +++++++++++++------ docs/source/install.rst | 14 +++- .../test_data_content_provider.py | 2 +- tests/test_app.py | 48 ++++++----- tests/test_main.py | 48 +++++++---- tests/utils/test_util.py | 10 +-- 16 files changed, 219 insertions(+), 108 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b06d0ae8b..791339347 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,13 +1,6 @@ repos: -- repo: https://github.com/pre-commit/mirrors-yapf - rev: v0.30.0 +- repo: https://github.com/google/yapf + rev: v0.43.0 hooks: - id: yapf args: ['--style=.style.yapf', '--parallel', '--in-place'] -- repo: https://github.com/pycqa/pydocstyle - rev: 6.1.1 - hooks: - - id: pydocstyle - args: - - --convention=google - - --add-ignore=D1 diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 70087f8b9..5fff49bde 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -1,6 +1,6 @@ - id: CredSweeper name: CredSweeper description: Advanced credential scanner - entry: python -m credsweeper --path + entry: python -m credsweeper --banner --error --color --no-stdout --path language: python types: [text] diff --git a/credsweeper/__init__.py b/credsweeper/__init__.py index 697bde8f9..925781236 100644 --- a/credsweeper/__init__.py +++ b/credsweeper/__init__.py @@ -18,4 +18,4 @@ '__version__' ] -__version__ = "1.10.5" +__version__ = "1.10.6" diff --git a/credsweeper/__main__.py b/credsweeper/__main__.py index bcd5e8f9a..3796c7676 100644 --- a/credsweeper/__main__.py +++ b/credsweeper/__main__.py @@ -3,7 +3,7 @@ import os import sys import time -from argparse import ArgumentParser, ArgumentTypeError, Namespace +from argparse import ArgumentParser, ArgumentTypeError, Namespace, BooleanOptionalAction from typing import Any, Union, Dict from credsweeper import __version__ @@ -211,6 +211,10 @@ def get_arguments() -> Namespace: help="parse .gitignore files and skip credentials from ignored objects", dest="skip_ignored", action="store_true") + parser.add_argument("--error", + help="produce error code if credentials are found", + action=BooleanOptionalAction, + default=False) parser.add_argument("--save-json", nargs="?", help="save result to json file (default: output.json)", @@ -223,16 +227,21 @@ def get_arguments() -> Namespace: const="output.xlsx", dest="xlsx_filename", metavar="PATH") - parser.add_argument("--color", "-C", help="print results with colorization", action="store_const", const=True) + parser.add_argument("--stdout", help="print results to stdout", action=BooleanOptionalAction, default=True) + parser.add_argument("--color", help="print results with colorization", action=BooleanOptionalAction, default=False) parser.add_argument("--hashed", help="line, variable, value will be hashed in output", - action="store_const", - const=True) + action=BooleanOptionalAction, + default=False) parser.add_argument("--subtext", help=f"line text will be stripped in {2 * ML_HUNK} symbols but value and variable are kept", - action="store_const", - const=True) - parser.add_argument("--sort", help="enable output sorting", dest="sort_output", action="store_true") + action=BooleanOptionalAction, + default=False) + parser.add_argument("--sort", + help="enable output sorting", + dest="sort_output", + action=BooleanOptionalAction, + default=False) parser.add_argument("--log", "-l", help=f"provide logging level of {list(Logger.LEVELS.keys())}" @@ -281,6 +290,7 @@ def scan(args: Namespace, content_provider: AbstractProvider) -> int: config_path=args.config_path, json_filename=args.json_filename, xlsx_filename=args.xlsx_filename, + stdout=args.stdout, color=args.color, hashed=args.hashed, subtext=args.subtext, @@ -310,6 +320,7 @@ def scan(args: Namespace, content_provider: AbstractProvider) -> int: def main() -> int: """Main function""" result = EXIT_FAILURE + credentials_number = 0 start_time = time.time() args = get_arguments() if args.banner: @@ -336,15 +347,20 @@ def main() -> int: del_credentials_number = scan(args, content_provider) summary["Deleted File Credentials"] = del_credentials_number if 0 <= add_credentials_number and 0 <= del_credentials_number: + # it means the scan was successful done result = EXIT_SUCCESS + # collect number of all found credential to produce error code when necessary + credentials_number = add_credentials_number + del_credentials_number elif args.export_config: logging.info(f"Exporting default config to file: {args.export_config}") config_dict = Util.json_load(APP_PATH / "secret" / "config.json") Util.json_dump(config_dict, args.export_config) + result = EXIT_SUCCESS elif args.export_log_config: logging.info(f"Exporting default logger config to file: {args.export_log_config}") config_dict = Util.yaml_load(APP_PATH / "secret" / "log.yaml") Util.yaml_dump(config_dict, args.export_log_config) + result = EXIT_SUCCESS elif args.banner and 2 == len(sys.argv): # only extend version invocation result = EXIT_SUCCESS @@ -357,6 +373,10 @@ def main() -> int: end_time = time.time() print(f"Time Elapsed: {end_time - start_time}s") + if args.error and EXIT_SUCCESS == result and 0 < credentials_number: + # override result when credentials were found with the requirement + result = EXIT_FAILURE + return result diff --git a/credsweeper/app.py b/credsweeper/app.py index 50bca7e78..647ff6a78 100644 --- a/credsweeper/app.py +++ b/credsweeper/app.py @@ -1,3 +1,4 @@ +import json import logging import multiprocessing import signal @@ -41,6 +42,7 @@ def __init__(self, config_path: Optional[str] = None, json_filename: Union[None, str, Path] = None, xlsx_filename: Union[None, str, Path] = None, + stdout: bool = False, color: bool = False, hashed: bool = False, subtext: bool = False, @@ -70,7 +72,8 @@ def __init__(self, default built-in config is used if None json_filename: optional string variable, path to save result to json xlsx_filename: optional string variable, path to save result to xlsx - color: print results to stdout with colorization + stdout: print results to stdout + color: print concise results to stdout with colorization hashed: use hash of line, value and variable instead plain text subtext: use subtext of line near variable-value like it performed in ML use_filters: boolean variable, specifying the need of rule filters @@ -110,6 +113,7 @@ def __init__(self, self.credential_manager = CredentialManager() self.json_filename: Union[None, str, Path] = json_filename self.xlsx_filename: Union[None, str, Path] = xlsx_filename + self.stdout = stdout self.color = color self.hashed = hashed self.subtext = subtext @@ -245,8 +249,7 @@ def run(self, content_provider: AbstractProvider) -> int: # PatchesProvider has the attribute. Circular import error appears with using the isinstance change_type = content_provider.change_type if hasattr(content_provider, "change_type") else None self.export_results(change_type) - - return len(self.credential_manager.get_credentials()) + return self.credential_manager.len_credentials() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -392,7 +395,6 @@ def export_results(self, change_type: Optional[DiffRowType] = None) -> None: Args: change_type: flag to know which file should be created for a patch """ - is_exported = False credentials = self.credential_manager.get_credentials() @@ -410,15 +412,22 @@ def export_results(self, change_type: Optional[DiffRowType] = None) -> None: if self.json_filename: json_path = Path(self.json_filename) - is_exported = True if isinstance(change_type, DiffRowType): # add suffix for appropriated reports to create two files for the patch scan json_path = json_path.with_suffix(f".{change_type.value}{json_path.suffix}") - Util.json_dump([credential.to_json(hashed=self.hashed, subtext=self.subtext) for credential in credentials], - file_path=json_path) + with open(json_path, 'w') as f: + # use the approach to reduce total memory usage in case of huge data + first_item = True + f.write('[\n') + for credential in credentials: + if first_item: + first_item = False + else: + f.write(",\n") + f.write(json.dumps(credential.to_json(hashed=self.hashed, subtext=self.subtext), indent=4)) + f.write("\n]") if self.xlsx_filename: - is_exported = True data_list = [] for credential in credentials: data_list.extend(credential.to_dict_list(hashed=self.hashed, subtext=self.subtext)) @@ -434,7 +443,6 @@ def export_results(self, change_type: Optional[DiffRowType] = None) -> None: df.to_excel(self.xlsx_filename, sheet_name="report", index=False) if self.color: - is_exported = True for credential in credentials: for line_data in credential.line_data_list: # bright rule name and path or info @@ -443,6 +451,6 @@ def export_results(self, change_type: Optional[DiffRowType] = None) -> None: Style.RESET_ALL) print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext)) - if is_exported is False: + if self.stdout: for credential in credentials: print(credential.to_str(hashed=self.hashed, subtext=self.subtext)) diff --git a/credsweeper/credentials/credential_manager.py b/credsweeper/credentials/credential_manager.py index 2f1dec2e0..b32b36f7b 100644 --- a/credsweeper/credentials/credential_manager.py +++ b/credsweeper/credentials/credential_manager.py @@ -9,16 +9,24 @@ class CredentialManager: - """The manager allows you to store, add and delete separate credit candidates. - - Parameters: - candidates: list of credential candidates - - """ + """The manager allows you to store, add and delete separate credit candidates.""" def __init__(self) -> None: self.candidates: List[Candidate] = list(Manager().list()) + def clear_credentials(self) -> None: + """Clear credential candidates stored in the manager.""" + self.candidates.clear() + + def len_credentials(self) -> int: + """Get number of credential candidates stored in the manager. + + Return: + Non-negative integer + + """ + return len(self.candidates) + def get_credentials(self) -> List[Candidate]: """Get all credential candidates stored in the manager. diff --git a/credsweeper/filters/value_token_base_check.py b/credsweeper/filters/value_token_base_check.py index e90a676f3..df434f0c1 100644 --- a/credsweeper/filters/value_token_base_check.py +++ b/credsweeper/filters/value_token_base_check.py @@ -37,10 +37,9 @@ def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[float, float]] @staticmethod def get_ppf(n: int) -> float: - """ - from scipy.stats import t - print('\n'.join(f'{n}: {t.ppf(0.9827, n-1):.8f},' for n in [8,10,15,16,20,24,25,32,40,50,64])) - """ + """Code used to produce the values""" + # from scipy.stats import t + # print('\n'.join(f'{n}: {t.ppf(0.9827, n-1):.8f},' for n in [8,10,15,16,20,24,25,32,40,50,64])) return ValueTokenBaseCheck.MUL_DICT[n] def run(self, line_data: LineData, target: AnalysisTarget) -> bool: diff --git a/docs/source/conf.py b/docs/source/conf.py index 8e7ccdcfb..253b3321b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -17,7 +17,7 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = 'CredSweeper' -copyright = '2024, Samsung CredTeam' +copyright = '2025, Samsung CredTeam' author = 'CredTeam' from credsweeper import __version__ as credsweeper_version diff --git a/docs/source/credsweeper.filters.rst b/docs/source/credsweeper.filters.rst index e3c324e76..02a02bfa9 100644 --- a/docs/source/credsweeper.filters.rst +++ b/docs/source/credsweeper.filters.rst @@ -348,6 +348,14 @@ credsweeper.filters.value\_token\_base64\_check module :undoc-members: :show-inheritance: +credsweeper.filters.value\_token\_base\_check module +---------------------------------------------------- + +.. automodule:: credsweeper.filters.value_token_base_check + :members: + :undoc-members: + :show-inheritance: + credsweeper.filters.value\_token\_check module ---------------------------------------------- diff --git a/docs/source/credsweeper.utils.rst b/docs/source/credsweeper.utils.rst index 3a168d0cf..ce2e3802f 100644 --- a/docs/source/credsweeper.utils.rst +++ b/docs/source/credsweeper.utils.rst @@ -12,6 +12,14 @@ credsweeper.utils.entropy\_validator module :undoc-members: :show-inheritance: +credsweeper.utils.hop\_stat module +---------------------------------- + +.. automodule:: credsweeper.utils.hop_stat + :members: + :undoc-members: + :show-inheritance: + credsweeper.utils.pem\_key\_detector module ------------------------------------------- diff --git a/docs/source/guide.rst b/docs/source/guide.rst index 343609984..86bcca388 100644 --- a/docs/source/guide.rst +++ b/docs/source/guide.rst @@ -13,14 +13,24 @@ Get all argument list: .. code-block:: text - usage: python -m credsweeper [-h] (--path PATH [PATH ...] | --diff_path PATH [PATH ...] | --export_config [PATH] | --export_log_config [PATH]) - [--rules PATH] [--severity SEVERITY] [--config PATH] [--log_config PATH] [--denylist PATH] - [--find-by-ext] [--depth POSITIVE_INT] [--no-filters] [--doc] [--ml_threshold FLOAT_OR_STR] - [--ml_batch_size POSITIVE_INT] [--ml_config PATH] [--ml_model PATH] [--ml_providers STR] - [--jobs POSITIVE_INT] [--thrifty] [--skip_ignored] [--save-json [PATH]] - [--save-xlsx [PATH]] [--color] [--hashed] [--subtext] [--sort] [--log LOG_LEVEL] - [--size_limit SIZE_LIMIT] - [--banner] [--version] + usage: python -m credsweeper [-h] + (--path PATH [PATH ...] | --diff_path PATH [PATH ...] | --export_config [PATH] | --export_log_config [PATH]) + [--rules PATH] [--severity SEVERITY] + [--config PATH] [--log_config PATH] + [--denylist PATH] [--find-by-ext] + [--depth POSITIVE_INT] [--no-filters] [--doc] + [--ml_threshold FLOAT_OR_STR] + [--ml_batch_size POSITIVE_INT] [--ml_config PATH] + [--ml_model PATH] [--ml_providers STR] + [--jobs POSITIVE_INT] [--thrifty] + [--skip_ignored] [--error | --no-error] + [--save-json [PATH]] [--save-xlsx [PATH]] + [--stdout | --no-stdout] [--color | --no-color] + [--hashed | --no-hashed] + [--subtext | --no-subtext] [--sort | --no-sort] + [--log LOG_LEVEL] [--size_limit SIZE_LIMIT] + [--banner] [--version] + options: -h, --help show this help message and exit --path PATH [PATH ...] @@ -28,41 +38,66 @@ Get all argument list: --diff_path PATH [PATH ...] git diff file to scan --export_config [PATH] - exporting default config to file (default: config.json) + exporting default config to file (default: + config.json) --export_log_config [PATH] - exporting default logger config to file (default: log.yaml) - --rules PATH path of rule config file (default: credsweeper/rules/config.yaml). severity:['critical', 'high', 'medium', 'low', 'info'] type:['keyword', 'pattern', 'pem_key', 'multi'] - --severity SEVERITY set minimum level for rules to apply ['critical', 'high', 'medium', 'low', 'info'](default: 'Severity.INFO', case insensitive) + exporting default logger config to file (default: + log.yaml) + --rules PATH path of rule config file (default: + credsweeper/rules/config.yaml). severity:['critical', + 'high', 'medium', 'low', 'info'] type:['keyword', + 'pattern', 'pem_key', 'multi'] + --severity SEVERITY set minimum level for rules to apply ['critical', + 'high', 'medium', 'low', 'info'](default: + 'Severity.INFO', case insensitive) --config PATH use custom config (default: built-in) --log_config PATH use custom log config (default: built-in) - --denylist PATH path to a plain text file with lines or secrets to ignore + --denylist PATH path to a plain text file with lines or secrets to + ignore --find-by-ext find files by predefined extension --depth POSITIVE_INT additional recursive search in data (experimental) --no-filters disable filters --doc document-specific scanning --ml_threshold FLOAT_OR_STR - setup threshold for the ml model. The lower the threshold - the more credentials will be reported. Allowed values: float between 0 and 1, or any of ['lowest', 'low', 'medium', 'high', - 'highest'] (default: medium) + setup threshold for the ml model. The lower the + threshold - the more credentials will be reported. + Allowed values: float between 0 and 1, or any of + ['lowest', 'low', 'medium', 'high', 'highest'] + (default: medium) --ml_batch_size POSITIVE_INT, -b POSITIVE_INT batch size for model inference (default: 16) --ml_config PATH use external config for ml model --ml_model PATH use external ml model - --ml_providers STR comma separated list of providers for onnx (CPUExecutionProvider is used by default) + --ml_providers STR comma separated list of providers for onnx + (CPUExecutionProvider is used by default) --jobs POSITIVE_INT, -j POSITIVE_INT number of parallel processes to use (default: 1) --thrifty clear objects after scan to reduce memory consumption - --skip_ignored parse .gitignore files and skip credentials from ignored objects + --skip_ignored parse .gitignore files and skip credentials from + ignored objects + --error, --no-error produce error code if credentials are found (default: + False) --save-json [PATH] save result to json file (default: output.json) --save-xlsx [PATH] save result to xlsx file (default: output.xlsx) - --color, -C print results with colorization - --hashed line, variable, value will be hashed in output - --subtext line text will be stripped in 160 symbols but value and variable are kept - --sort enable output sorting + --stdout, --no-stdout + print results to stdout (default: True) + --color, --no-color print results with colorization (default: False) + --hashed, --no-hashed + line, variable, value will be hashed in output + (default: False) + --subtext, --no-subtext + line text will be stripped in 160 symbols but value + and variable are kept (default: False) + --sort, --no-sort enable output sorting (default: False) --log LOG_LEVEL, -l LOG_LEVEL - provide logging level of ['DEBUG', 'INFO', 'WARN', 'WARNING', 'ERROR', 'FATAL', 'CRITICAL', 'SILENCE'](default: 'warning', case insensitive) + provide logging level of ['DEBUG', 'INFO', 'WARN', + 'WARNING', 'ERROR', 'FATAL', 'CRITICAL', + 'SILENCE'](default: 'warning', case insensitive) --size_limit SIZE_LIMIT - set size limit of files that for scanning (eg. 1GB / 10MiB / 1000) - --banner show version and crc32 sum of CredSweeper files at start + set size limit of files that for scanning (eg. 1GB / + 10MiB / 1000) + --banner show version and crc32 sum of CredSweeper files at + start --version, -V show program's version number and exit .. note:: diff --git a/docs/source/install.rst b/docs/source/install.rst index d5ca94af9..80038c89e 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -51,7 +51,7 @@ Via git clone (dev install) git clone https://github.com/Samsung/CredSweeper.git cd CredSweeper - # Annotate "numpy", "scikit-learn" and "tensorflow" if you don't want to use the ML validation feature. + # Annotate "onnxruntime" if you don't want to use the ML validation feature. pip install -qr requirements.txt Pre-commit git hook @@ -104,3 +104,15 @@ Pre-commit git hook if __name__ == "__main__": sys.exit(main()) + +Or use pre-commit with ``.pre-commit-config.yaml`` in your repo: + +.. code-block:: none + + repos: + - repo: https://github.com/Samsung/CredSweeper + rev: v1.10.6 + hooks: + - id: CredSweeper + +Install with: ``pre-commit install --install-hooks`` diff --git a/tests/file_handler/test_data_content_provider.py b/tests/file_handler/test_data_content_provider.py index 4dce63565..a0b12fc9a 100644 --- a/tests/file_handler/test_data_content_provider.py +++ b/tests/file_handler/test_data_content_provider.py @@ -124,7 +124,7 @@ def test_scan_zipfile_p(self) -> None: # clean credentials to test zip cs.credential_manager.candidates.clear() - self.assertEqual(0, len(cs.credential_manager.get_credentials())) + self.assertEqual(0, cs.credential_manager.len_credentials()) # use the same approach but with single zip file which is made from the samples zip_file_path = os.path.join(tmp_dir, "test_p.zip") diff --git a/tests/test_app.py b/tests/test_app.py index 2f2741689..dab047734 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -12,6 +12,7 @@ import deepdiff import numpy as np import pandas as pd +import pytest from credsweeper.app import APP_PATH from credsweeper.utils import Util @@ -180,7 +181,8 @@ def test_it_works_with_multiline_in_patch_p(self) -> None: def test_it_works_with_patch_color_p(self) -> None: target_path = str(SAMPLES_PATH / "uuid-update.patch") - _stdout, _stderr = self._m_credsweeper(["--diff_path", target_path, "--log", "silence", "--color"]) + _stdout, _stderr = self._m_credsweeper( + ["--diff_path", target_path, "--log", "silence", "--color", "--no-stdout"]) output = " ".join(_stdout.split()[:-1]) expected = """ \x1b[1mUUID uuid:added:1 None\x1b[0m @@ -223,12 +225,14 @@ def test_it_works_n(self) -> None: " [--jobs POSITIVE_INT]" \ " [--thrifty]" \ " [--skip_ignored]" \ + " [--error | --no-error]"\ " [--save-json [PATH]]" \ " [--save-xlsx [PATH]]" \ - " [--color]" \ - " [--hashed]" \ - " [--subtext]" \ - " [--sort]" \ + " [--stdout | --no-stdout]" \ + " [--color | --no-color]" \ + " [--hashed | --no-hashed]" \ + " [--subtext | --no-subtext]" \ + " [--sort | --no-sort]" \ " [--log LOG_LEVEL]" \ " [--size_limit SIZE_LIMIT]" \ " [--banner] " \ @@ -288,11 +292,12 @@ def test_log_n(self) -> None: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + @pytest.mark.skipif(10 < sys.version_info.minor, reason="argparse default was changed in 3.11") def test_help_p(self) -> None: _stdout, _stderr = self._m_credsweeper(["--help"]) output = " ".join(_stdout.split()) if 10 > sys.version_info.minor and output.find("options:"): - # Legacy support python3.8 - 3.9 to display "optional arguments:" like in python 3.10 + # Legacy support python3.9 to display "optional arguments:" like in python 3.10 output = output.replace("options:", "optional arguments:") help_path = os.path.join(TESTS_PATH, "..", "docs", "source", "guide.rst") with open(help_path, "r") as f: @@ -306,7 +311,7 @@ def test_help_p(self) -> None: continue if started: if 10 > sys.version_info.minor and line.strip() == "options:": - # Legacy support python3.8 - 3.9 to display "optional arguments:" + # Legacy support python3.9 to display "optional arguments:" text = ' '.join([text, line.replace("options:", "optional arguments:")]) else: text = ' '.join([text, line]) @@ -335,7 +340,7 @@ def test_patch_save_json_p(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: json_filename = os.path.join(tmp_dir, f"{__name__}.json") _stdout, _stderr = self._m_credsweeper( - ["--diff_path", target_path, "--save-json", json_filename, "--log", "silence"]) + ["--diff_path", target_path, "--no-stdout", "--save-json", json_filename, "--log", "silence"]) self.assertTrue(os.path.exists(os.path.join(tmp_dir, f"{__name__}.added.json"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, f"{__name__}.deleted.json"))) @@ -455,7 +460,7 @@ def test_find_by_ext_p(self) -> None: json_filename = os.path.join(tmp_dir, f"{__name__}.json") _stdout, _stderr = self._m_credsweeper( - ["--path", tmp_dir, "--find-by-ext", "--save-json", json_filename, "--log", "silence"]) + ["--path", tmp_dir, "--find-by-ext", "--no-stdout", "--save-json", json_filename, "--log", "silence"]) self.assertTrue(os.path.exists(json_filename)) with open(json_filename, "r") as json_file: report = json.load(json_file) @@ -474,7 +479,7 @@ def test_find_by_ext_n(self) -> None: open(file_path, "w").write(AZ_STRING) json_filename = os.path.join(tmp_dir, f"{__name__}.json") _stdout, _stderr = self._m_credsweeper( - ["--path", tmp_dir, "--save-json", json_filename, "--log", "silence"]) + ["--path", tmp_dir, "--no-stdout", "--save-json", json_filename, "--log", "silence"]) self.assertTrue(os.path.exists(json_filename)) with open(json_filename, "r") as json_file: report = json.load(json_file) @@ -488,9 +493,10 @@ def test_depth_p(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: json_filename = os.path.join(tmp_dir, f"{__name__}.json") # depth = 3 - _stdout, _stderr = self._m_credsweeper( - ["--log", "silence", "--path", - str(SAMPLES_PATH), "--save-json", json_filename, "--depth", "3"]) + _stdout, _stderr = self._m_credsweeper([ + "--log", "silence", "--path", + str(SAMPLES_PATH), "--no-stdout", "--save-json", json_filename, "--depth", "3" + ]) self.assertTrue(os.path.exists(json_filename)) with open(json_filename, "r") as json_file: normal_report.extend(json.load(json_file)) @@ -498,7 +504,7 @@ def test_depth_p(self) -> None: sorted_json_filename = os.path.join(tmp_dir, f"{__name__}.json") _stdout, _stderr = self._m_credsweeper([ "--log", "silence", "--path", - str(SAMPLES_PATH), "--sort", "--save-json", sorted_json_filename, "--depth", "3" + str(SAMPLES_PATH), "--sort", "--no-stdout", "--save-json", sorted_json_filename, "--depth", "3" ]) self.assertTrue(os.path.exists(sorted_json_filename)) with open(sorted_json_filename, "r") as json_file: @@ -522,7 +528,7 @@ def test_depth_n(self) -> None: # depth is not set _stdout, _stderr = self._m_credsweeper( ["--log", "silence", "--path", - str(SAMPLES_PATH), "--save-json", json_filename]) + str(SAMPLES_PATH), "--no-stdout", "--save-json", json_filename]) self.assertTrue(os.path.exists(json_filename)) with open(json_filename, "r") as json_file: report = json.load(json_file) @@ -538,7 +544,8 @@ def test_denylist_p(self) -> None: with open(denylist_filename, "w") as f: f.write('ghp_00000000000000000000000000000004WZ4EQ # classic') # full line _stdout, _stderr = self._m_credsweeper([ - "--path", target_path, "--denylist", denylist_filename, "--save-json", json_filename, "--log", "silence" + "--path", target_path, "--denylist", denylist_filename, "--no-stdout", "--save-json", json_filename, + "--log", "silence" ]) with open(json_filename, "r") as json_file: report = json.load(json_file) @@ -546,7 +553,8 @@ def test_denylist_p(self) -> None: with open(denylist_filename, "w") as f: f.write('ghp_00000000000000000000000000000004WZ4EQ') # value only _stdout, _stderr = self._m_credsweeper([ - "--path", target_path, "--denylist", denylist_filename, "--save-json", json_filename, "--log", "silence" + "--path", target_path, "--denylist", denylist_filename, "--no-stdout", "--save-json", json_filename, + "--log", "silence" ]) with open(json_filename, "r") as json_file: report = json.load(json_file) @@ -562,7 +570,8 @@ def test_denylist_n(self) -> None: with open(denylist_filename, "w") as f: f.write('4WZ4EQ # classic') # part of line - will not exclude _stdout, _stderr = self._m_credsweeper([ - "--path", target_path, "--denylist", denylist_filename, "--save-json", json_filename, "--log", "silence" + "--path", target_path, "--denylist", denylist_filename, "--no-stdout", "--save-json", json_filename, + "--log", "silence" ]) with open(json_filename, "r") as json_file: report = json.load(json_file) @@ -704,7 +713,8 @@ def test_severity_patch_xlsx_p(self) -> None: def test_doc_n(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: json_filename = os.path.join(tmp_dir, f"{__name__}.json") - _stdout, _stderr = self._m_credsweeper(["--doc", "--path", str(SAMPLES_PATH), "--save-json", json_filename]) + _stdout, _stderr = self._m_credsweeper( + ["--doc", "--path", str(SAMPLES_PATH), "--no-stdout", "--save-json", json_filename]) report = Util.json_load(json_filename) self.assertEqual(SAMPLES_IN_DOC, len(report)) diff --git a/tests/test_main.py b/tests/test_main.py index 9906d0ff5..6c5e504cd 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -69,13 +69,22 @@ def test_use_filters_n(self) -> None: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # - @mock.patch("credsweeper.__main__.scan", return_value=None) + @mock.patch("credsweeper.__main__.scan", return_value=1) @mock.patch("credsweeper.__main__.get_arguments") def test_main_n(self, mock_get_arguments, mock_scan) -> None: - args_mock = Mock(log='silence', path=None, diff_path=None, json_filename=None, rule_path=None, jobs=1) + args_mock = Mock(log='debug', + path="mocked-scan", + diff_path=None, + error=True, + json_filename=None, + xlsx_filename=None, + stdout=False, + color=False, + rule_path=None, + jobs=1) mock_get_arguments.return_value = args_mock self.assertEqual(EXIT_FAILURE, app_main.main()) - self.assertFalse(mock_scan.called) + self.assertTrue(mock_scan.called) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -87,6 +96,7 @@ def test_main_path_p(self, mock_get_arguments) -> None: path=None, config_path=None, diff_path=[str(target_path)], + error=False, json_filename=Path(os.path.join(tmp_dir, f"{__name__}.json")), xlsx_filename=Path(os.path.join(tmp_dir, f"{__name__}.xlsx")), color=False, @@ -122,6 +132,7 @@ def test_binary_patch_p(self, mock_get_arguments) -> None: path=None, config_path=None, diff_path=[str(target_path)], + error=False, json_filename=os.path.join(tmp_dir, f"{__name__}.json"), xlsx_filename=None, subtext=False, @@ -173,6 +184,7 @@ def test_report_p(self, mock_get_arguments) -> None: config_path=None, path=[str(SAMPLES_PATH)], diff_path=None, + error=False, json_filename=json_filename, xlsx_filename=xlsx_filename, subtext=False, @@ -330,7 +342,7 @@ def test_multi_jobs_p(self) -> None: content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH]) cred_sweeper = CredSweeper(pool_count=3) cred_sweeper.run(content_provider=content_provider) - self.assertEqual(SAMPLES_POST_CRED_COUNT, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(SAMPLES_POST_CRED_COUNT, cred_sweeper.credential_manager.len_credentials()) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -362,7 +374,7 @@ def test_tar_p(self) -> None: content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "passwords.tar.bz2"]) cred_sweeper = CredSweeper(depth=2, ml_threshold=0) cred_sweeper.run(content_provider=content_provider) - self.assertEqual(3, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(3, cred_sweeper.credential_manager.len_credentials()) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -373,7 +385,7 @@ def test_tar_n(self) -> None: cred_sweeper = CredSweeper(depth=2) with patch('logging.Logger.error') as mocked_logger: cred_sweeper.run(content_provider=content_provider) - self.assertEqual(0, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(0, cred_sweeper.credential_manager.len_credentials()) mocked_logger.assert_called_with(f"{file_path.as_posix()[:-4]}:unexpected end of data") # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -397,13 +409,13 @@ def test_depth_p(self) -> None: content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH]) cred_sweeper = CredSweeper(depth=1) cred_sweeper.run(content_provider=content_provider) - self.assertEqual(SAMPLES_IN_DEEP_1, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(SAMPLES_IN_DEEP_1, cred_sweeper.credential_manager.len_credentials()) cred_sweeper.config.depth = 2 cred_sweeper.run(content_provider=content_provider) - self.assertEqual(SAMPLES_IN_DEEP_2, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(SAMPLES_IN_DEEP_2, cred_sweeper.credential_manager.len_credentials()) cred_sweeper.config.depth = 3 cred_sweeper.run(content_provider=content_provider) - self.assertEqual(SAMPLES_IN_DEEP_3, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(SAMPLES_IN_DEEP_3, cred_sweeper.credential_manager.len_credentials()) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -411,7 +423,7 @@ def test_depth_n(self) -> None: content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH]) cred_sweeper = CredSweeper(depth=0) cred_sweeper.run(content_provider=content_provider) - self.assertEqual(SAMPLES_POST_CRED_COUNT, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(SAMPLES_POST_CRED_COUNT, cred_sweeper.credential_manager.len_credentials()) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -420,7 +432,7 @@ def test_bzip2_p(self) -> None: content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "pem_key.bz2"]) cred_sweeper = CredSweeper(depth=1) cred_sweeper.run(content_provider=content_provider) - self.assertEqual(1, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(1, cred_sweeper.credential_manager.len_credentials()) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -436,7 +448,7 @@ def test_bzip2_n(self) -> None: cred_sweeper.run(content_provider=content_provider) mocked_logger.assert_called_with( f"{test_filename}:Compressed data ended before the end-of-stream marker was reached") - self.assertEqual(0, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(0, cred_sweeper.credential_manager.len_credentials()) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -468,7 +480,7 @@ def test_pdf_n(self) -> None: content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "sample.pdf"]) cred_sweeper = CredSweeper() cred_sweeper.run(content_provider=content_provider) - self.assertEqual(0, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(0, cred_sweeper.credential_manager.len_credentials()) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -487,7 +499,7 @@ def test_py_n(self) -> None: content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "sample.py"]) cred_sweeper = CredSweeper() cred_sweeper.run(content_provider=content_provider) - self.assertEqual(0, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(0, cred_sweeper.credential_manager.len_credentials()) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -623,7 +635,7 @@ def test_exclude_value_p(self) -> None: files = [SAMPLES_PATH / "password.gradle"] files_provider = [TextContentProvider(file_path) for file_path in files] cred_sweeper.scan(files_provider) - self.assertEqual(0, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(0, cred_sweeper.credential_manager.len_credentials()) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -632,7 +644,7 @@ def test_exclude_value_n(self) -> None: files = [SAMPLES_PATH / "password.gradle"] files_provider = [TextContentProvider(file_path) for file_path in files] cred_sweeper.scan(files_provider) - self.assertEqual(1, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(1, cred_sweeper.credential_manager.len_credentials()) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -641,7 +653,7 @@ def test_exclude_line_p(self) -> None: files = [SAMPLES_PATH / "password.gradle"] files_provider = [TextContentProvider(file_path) for file_path in files] cred_sweeper.scan(files_provider) - self.assertEqual(0, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(0, cred_sweeper.credential_manager.len_credentials()) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -650,7 +662,7 @@ def test_exclude_line_n(self) -> None: files = [SAMPLES_PATH / "password.gradle"] files_provider = [TextContentProvider(file_path) for file_path in files] cred_sweeper.scan(files_provider) - self.assertEqual(1, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(1, cred_sweeper.credential_manager.len_credentials()) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # diff --git a/tests/utils/test_util.py b/tests/utils/test_util.py index 5f2a37ef0..87f63ff13 100644 --- a/tests/utils/test_util.py +++ b/tests/utils/test_util.py @@ -343,12 +343,10 @@ def test_is_ascii_entropy_validate_n(self): self.assertFalse(Util.is_ascii_entropy_validate(various_lang_data)) decoded_like_base64 = base64.b64decode(f"{AZ_STRING}=") self.assertFalse(Util.is_ascii_entropy_validate(decoded_like_base64)) - if 9 <= sys.version_info.minor: - for random_data_len in range(16, 40): - # only sice python 3.9 - data = random.randbytes(random_data_len) - # VERY RARELY IT MIGHT FAIL - self.assertFalse(Util.is_ascii_entropy_validate(data), data) + for random_data_len in range(16, 40): + data = random.randbytes(random_data_len) + # VERY RARELY IT MIGHT FAIL + self.assertFalse(Util.is_ascii_entropy_validate(data), data) def test_read_bin_file_n(self): with tempfile.TemporaryDirectory() as tmp_dir: