Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

upgrade to v4.11 #265

Merged
merged 12 commits into from
Jun 7, 2023
6 changes: 3 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ jobs:
--health-retries 5

steps:
- uses: actions/checkout@master
- uses: actions/checkout@v3

- name: Create PostgreSQL database
run: |
PGPASSWORD=${{ secrets.POSTGRES_PASSWORD }} psql -U ${{ secrets.POSTGRES_USER }} -h 127.0.0.1 -p 5432 -d credential_digger_tests -f sql/create_table.sql

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

Expand All @@ -46,7 +46,7 @@ jobs:
sudo apt install -y build-essential python3-dev libhyperscan-dev

- name: Cache python dependencies
uses: actions/cache@v1
uses: actions/cache@v3
with:
path: ~/.cache/pip # This path is specific to Ubuntu
key: ${{ runner.os }}-pip-${{ hashFiles('./requirements.txt') }}-${{ hashFiles('./tests/tests-requirements.txt') }}
Expand Down
1 change: 1 addition & 0 deletions credentialdigger/cli/get_discoveries.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def run(client, args):
args: `argparse.Namespace`
Arguments from command line parser.
"""
discoveries = []
try:
discoveries = client.get_discoveries(
repo_url=args.repo_url, file_name=args.filename, with_rules=args.with_rules)
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ python-dotenv
pyyaml
rich~=12.2
srsly>=2.4.0
tensorflow==2.9.3; python_version >= "3.8"
tensorflow==2.11.1; python_version >= "3.8"
tensorflow~=2.4; python_version < "3.8"
tensorflow-estimator==2.9.0; python_version >= "3.8"
tensorflow-estimator==2.11.0; python_version >= "3.8"
tensorflow-estimator~=2.4; python_version < "3.8"
tensorflow-text==2.9.0; python_version >= "3.8"
tensorflow-text==2.11.0; python_version >= "3.8"
tensorflow-text~=2.4; python_version < "3.8"
tf-models-official
transformers
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def requirements():

setuptools.setup(
name='credentialdigger',
version='4.10.0',
version='4.11.0',
author='SAP SE',
maintainer='Marco Rosa, Slim Trabelsi',
maintainer_email='[email protected], [email protected]',
Expand Down
33 changes: 32 additions & 1 deletion tests/functional_tests/test_get_discoveries_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ def tearDownClass(cls):
""" Remove the repo and all its discoveries. """
cls.client.delete_repo(REPO_URL)
cls.client.delete_discoveries(REPO_URL)
os.remove(cls.csv_path)
try:
os.remove(cls.csv_path)
except OSError as ex:
print(f'Failed to cleanup {cls.csv_path}, error={ex}')

@parameterized.expand([
param(state='new', count=5),
Expand Down Expand Up @@ -142,5 +145,33 @@ def test_csv_written(self):
data_frame = pd.read_csv(self.csv_path)
try:
assert data_frame.notna().values.all()
self.assertEqual(len(data_frame.columns), 9)
self.assertFalse('rule_regex' in data_frame.columns)
self.assertFalse('rule_category' in data_frame.columns)
self.assertFalse('rule_description' in data_frame.columns)
except AssertionError:
assert False, 'CSV file contains NaN'

def test_csv_written_with_rules(self):
""" Test if the CLI command writes correctly the CSV file with the rule details. """
with self.assertRaises(SystemExit) as cm:
cli.main(
[
'',
'get_discoveries',
REPO_URL,
'--save',
self.csv_path,
'--dotenv',
self.dotenv,
'--with_rules',
]
)
data_frame = pd.read_csv(self.csv_path)
try:
self.assertEqual(len(data_frame.columns), 12)
self.assertTrue('rule_regex' in data_frame.columns)
self.assertTrue('rule_category' in data_frame.columns)
self.assertTrue('rule_description' in data_frame.columns)
except AssertionError:
assert False, 'CSV file does not contain the rule details'
28 changes: 28 additions & 0 deletions tests/functional_tests/test_get_discoveries_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,5 +132,33 @@ def test_csv_written(self):
data_frame = pd.read_csv(self.csv_path)
try:
assert data_frame.notna().values.all()
self.assertEqual(len(data_frame.columns), 9)
self.assertFalse('rule_regex' in data_frame.columns)
self.assertFalse('rule_category' in data_frame.columns)
self.assertFalse('rule_description' in data_frame.columns)
except AssertionError:
assert False, 'CSV file contains NaN'

def test_csv_written_with_rules(self):
""" Test if the CLI command writes correctly the CSV file with the rule details. """
with self.assertRaises(SystemExit):
cli.main(
[
'',
'get_discoveries',
'test_repo',
'--sqlite',
self.db_path,
'--save',
self.csv_path,
'--with_rules',
]
)
data_frame = pd.read_csv(self.csv_path)
try:
self.assertEqual(len(data_frame.columns), 12)
self.assertTrue('rule_regex' in data_frame.columns)
self.assertTrue('rule_category' in data_frame.columns)
self.assertTrue('rule_description' in data_frame.columns)
except AssertionError:
assert False, 'CSV file does not contain the rule details'
33 changes: 33 additions & 0 deletions ui/backend/client_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from credentialdigger import Client
from git import GitCommandError, InvalidGitRepositoryError, NoSuchPathError
from git import Repo as GitRepo
from credentialdigger.client import DiscoveryWithRule

FilesSummary = namedtuple(
'FilesSummary',
Expand Down Expand Up @@ -223,3 +224,35 @@ def _check_repo_commit(self, repo_url, commit_id, local_repo=False):
return False, 'WrongBranchError'

return True, None

def get_discoveries_with_rules(self, query, repo_url, file_name=None):
""" Get all the discoveries of a repository with rule details.

Parameters
----------
query: str
The query to be run, with placeholders in place of parameters
repo_url: str
The url of the repository
file_name: str, optional
The name of the file to filter discoveries on

Returns
-------
list
A list of discoveries (dictionaries)

Raises
------
TypeError
If any of the required arguments is missing
"""
cursor = self.db.cursor()
all_discoveries = []
params = (repo_url,) if not file_name else (repo_url, file_name)
cursor.execute(query, params)
result = cursor.fetchone()
while result:
all_discoveries.append(dict(DiscoveryWithRule(*result)._asdict()))
result = cursor.fetchone()
return all_discoveries
29 changes: 29 additions & 0 deletions ui/backend/client_ui_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,32 @@ def get_files_summary(self, repo_url):
" FROM discoveries WHERE repo_url=%s"
" GROUP BY file_name"
))

def get_discoveries_with_rules(self, repo_url, file_name=None):
""" Get all the discoveries of a repository with rule details.

Parameters
----------
repo_url: str
The url of the repository
file_name: str, optional
The filename to filter discoveries on

Returns
-------
list
A list of discoveries (dictionaries)
"""
query = '''
SELECT discoveries.*, r.regex as rule_regex, r.category as rule_category, r.description as rule_description
FROM discoveries
LEFT JOIN rules r
ON rule_id=r.id
WHERE repo_url=%s
'''
if file_name:
query += ' AND file_name=%s'
return super().get_discoveries_with_rules(
repo_url=repo_url,
file_name=file_name,
query=query)
29 changes: 29 additions & 0 deletions ui/backend/client_ui_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,32 @@ def get_files_summary(self, repo_url):
" FROM discoveries WHERE repo_url=?"
" GROUP BY file_name"
))

def get_discoveries_with_rules(self, repo_url, file_name=None):
""" Get all the discoveries of a repository with rule details.

Parameters
----------
repo_url: str
The url of the repository
file_name: str, optional
The filename to filter discoveries on

Returns
-------
list
A list of discoveries (dictionaries)
"""
query = '''
SELECT discoveries.*, r.regex as rule_regex, r.category as rule_category, r.description as rule_description
FROM discoveries
LEFT JOIN rules r
ON rule_id=r.id
WHERE repo_url=?
'''
if file_name:
query += ' AND file_name=?'
return super().get_discoveries_with_rules(
repo_url=repo_url,
file_name=file_name,
query=query)
60 changes: 60 additions & 0 deletions ui/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,66 @@ def update_similar_discoveries():
return 'OK', 200


@app.route('/scan_file', methods=['POST'])
def scan_file():
""" Scan a file. """
# Get scan properties
rules_to_use = request.form.get('rule_to_use')
use_password_model = request.form.get('passwordModel')
use_path_model = request.form.get('pathModel')
force_scan = request.form.get('forceScan') == 'force'
file = request.files['filename']
filename = secure_filename(file.filename)
# Save file
# TODO: perform malware scan on the file
try:
file_path = os.path.abspath(os.path.join(
app.config['UPLOAD_FOLDER'], 'uploads', filename))
file.save(file_path)
app.logger.debug(f'File saved to {file_path}')
except Exception as ex:
app.logger.error(
f'Error occured when saving file={filename}, file path={file_path}, error={ex}')
return 'Error in saving file', 500

# Set up models
models = []
if use_path_model == 'path':
models.append('PathModel')
if use_password_model == 'password':
models.append('PasswordModel')

# Setup scan arguments
if rules_to_use != 'all':
app.logger.debug(f'Use rules only from {rules_to_use} category')
else:
rules_to_use = None

# Scan
try:
discoveries = c.scan_path(scan_path=file_path, models=models, force=force_scan,
similarity=False, max_depth=-1, ignore_list=[], category=rules_to_use)
except OSError as ex:
app.logger.error(
f'Error occured when scanning file={filename}, file path={file_path}, error={ex}')
os.remove(file_path)
return f'Error in scanning file {filename}', 500

# Get discoveries
discoveries_with_rules = []
if len(discoveries):
try:
discoveries_with_rules = c.get_discoveries_with_rules(
repo_url=file_path)
except OSError as ex:
app.logger.error(
f'Error occured when getting discoveries of file={filename}, file path={file_path}, error={ex}')
return f'Error in getting discoveries of file {filename}', 500
finally:
os.remove(file_path)
return jsonify(discoveries_with_rules)


jwt = JWTManager(app)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)