From a003d11fe654abbd0751b7355973f8db34860479 Mon Sep 17 00:00:00 2001 From: Eric Brown Date: Mon, 8 Jan 2024 20:43:36 -0800 Subject: [PATCH] Use .gitignore as part of the excluded file list When using Bandit to scan projects based on Git source control, it would be benefitual to ignore files based on the patterns in the .gitignore file. Today, Bandit has some default excludes that get overridden if a user passes in other excludes. This is a bit confusing to the end user. But it also serves a purpose similar to .gitignore in that the paths excluded by default are typically included in a .gitignore. Note, it will only check for .gitignore files in top-level directories specified on the Bandit command line as targets. It does not recursive look for .gitignore files. This is done because recursive searching for .gitignore files would be complex to add to Bandit existing file discovery. This change adds a new Apache 2 licensed dependency of ignorelib. Fixes #826 Signed-off-by: Eric Brown --- bandit/cli/main.py | 3 ++- bandit/core/manager.py | 20 ++++++++++++++++---- doc/source/man/bandit.rst | 3 ++- requirements.txt | 1 + tests/unit/core/test_manager.py | 10 ++++++---- 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/bandit/cli/main.py b/bandit/cli/main.py index 119380b28..f38dd9122 100644 --- a/bandit/cli/main.py +++ b/bandit/cli/main.py @@ -340,7 +340,8 @@ def main(): help="comma-separated list of paths (glob patterns " "supported) to exclude from scan " "(note that these are in addition to the excluded " - "paths provided in the config file) (default: " + "paths provided in the config file and any files " + "matching patterns defined in .gitignore) (default: " + ",".join(constants.EXCLUDE) + ")", ) diff --git a/bandit/core/manager.py b/bandit/core/manager.py index 57e0e8570..2f7e38770 100644 --- a/bandit/core/manager.py +++ b/bandit/core/manager.py @@ -13,6 +13,7 @@ import tokenize import traceback +import ignorelib from rich import progress from bandit.core import constants as b_constants @@ -225,11 +226,14 @@ def discover_files(self, targets, recursive=False, excluded_paths=""): # if this is a directory and recursive is set, find all files if os.path.isdir(fname): if recursive: + gitignore_mgr = _build_gitignore_mgr(fname) + new_files, newly_excluded = _get_files_from_dir( - fname, + gitignore_mgr, included_globs=included_globs, excluded_path_strings=excluded_path_globs, ) + files_list.update(new_files) excluded_files.update(newly_excluded) else: @@ -238,7 +242,6 @@ def discover_files(self, targets, recursive=False, excluded_paths=""): "scan contents", fname, ) - else: # if the user explicitly mentions a file on command line, # we'll scan it, regardless of whether it's in the included @@ -365,8 +368,17 @@ def _execute_ast_visitor(self, fname, fdata, data, nosec_lines): return score +def _build_gitignore_mgr(path): + return ignorelib.IgnoreFilterManager.build( + path, + global_ignore_file_paths=[], + global_patterns=[], + ignore_file_name=".gitignore", + ) + + def _get_files_from_dir( - files_dir, included_globs=None, excluded_path_strings=None + ignore_mgr, included_globs=None, excluded_path_strings=None ): if not included_globs: included_globs = ["*.py"] @@ -376,7 +388,7 @@ def _get_files_from_dir( files_list = set() excluded_files = set() - for root, _, files in os.walk(files_dir): + for root, _, files in ignore_mgr.walk(): for filename in files: path = os.path.join(root, filename) if _is_file_included(path, included_globs, excluded_path_strings): diff --git a/doc/source/man/bandit.rst b/doc/source/man/bandit.rst index 46125e613..dffa63f5f 100644 --- a/doc/source/man/bandit.rst +++ b/doc/source/man/bandit.rst @@ -62,7 +62,8 @@ OPTIONS comma-separated list of paths (glob patterns supported) to exclude from scan (note that these are in addition to the excluded paths provided in the - config file) (default: + config file and any files matching patterns defined in + .gitignore) (default: .svn,CVS,.bzr,.hg,.git,__pycache__,.tox,.eggs,*.egg) -b BASELINE, --baseline BASELINE path of a baseline report to compare against (only diff --git a/requirements.txt b/requirements.txt index 289782022..77348fba0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ PyYAML>=5.3.1 # MIT stevedore>=1.20.0 # Apache-2.0 colorama>=0.3.9;platform_system=="Windows" # BSD License (3 clause) rich # MIT +ignorelib # Apache-2.0 diff --git a/tests/unit/core/test_manager.py b/tests/unit/core/test_manager.py index df815f588..544c3c030 100644 --- a/tests/unit/core/test_manager.py +++ b/tests/unit/core/test_manager.py @@ -113,15 +113,17 @@ def test_is_file_included(self): self.assertFalse(e) self.assertTrue(f) - @mock.patch("os.walk") - def test_get_files_from_dir(self, os_walk): - os_walk.return_value = [ + def test_get_files_from_dir(self): + ignore_walk = mock.Mock() + ignore_walk.walk.return_value = [ ("/", ("a"), ()), ("/a", (), ("a.py", "b.py", "c.ww")), ] inc, exc = manager._get_files_from_dir( - files_dir="", included_globs=["*.py"], excluded_path_strings=None + ignore_mgr=ignore_walk, + included_globs=["*.py"], + excluded_path_strings=None, ) self.assertEqual({"/a/c.ww"}, exc)