From 0e87460ffcd498100aec115f63237d251a9175f7 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Thu, 14 Feb 2019 17:59:27 -0500 Subject: [PATCH 01/38] Consolidate config parsing code --- multiscanner/analytics/ssdeep_analytics.py | 6 +-- multiscanner/common/utils.py | 39 ++++++++++++++++++- multiscanner/distributed/api.py | 39 +++++-------------- multiscanner/distributed/celery_worker.py | 23 ++--------- .../distributed/distributed_worker.py | 12 +----- multiscanner/web/app.py | 16 +------- 6 files changed, 54 insertions(+), 81 deletions(-) diff --git a/multiscanner/analytics/ssdeep_analytics.py b/multiscanner/analytics/ssdeep_analytics.py index 1630d366..5dd68cca 100644 --- a/multiscanner/analytics/ssdeep_analytics.py +++ b/multiscanner/analytics/ssdeep_analytics.py @@ -22,7 +22,6 @@ ''' import argparse -import configparser import json import sys from pprint import pprint @@ -43,10 +42,7 @@ class SSDeepAnalytic: def __init__(self, debug=False): storage_conf = utils.get_config_path(MS_CONFIG, 'storage') - config_object = configparser.SafeConfigParser() - config_object.optionxform = str - config_object.read(storage_conf) - conf = utils.parse_config(config_object) + conf = utils.read_config(storage_conf) storage_handler = storage.StorageHandler(configfile=storage_conf) es_handler = storage_handler.load_required_module('ElasticSearchStorage') diff --git a/multiscanner/common/utils.py b/multiscanner/common/utils.py index 72b72f44..a7c69605 100644 --- a/multiscanner/common/utils.py +++ b/multiscanner/common/utils.py @@ -5,6 +5,7 @@ unicode_literals, with_statement) import ast +import codecs import configparser import imp import os @@ -73,7 +74,7 @@ def convert_encoding(data, encoding='UTF-8', errors='replace'): def parse_config(config_object): - """Take a config object and returns it as a dictionary""" + """Converts a config object to a dictionary""" return_var = {} for section in config_object.sections(): section_dict = dict(config_object.items(section)) @@ -107,6 +108,42 @@ def get_config_path(config_file, component): sys.exit() +def write_config(config_object, config_file, section_name, default_config): + """Write the default configuration to the given config file + + config_object - the ConfigParser object + config_file - the filename of the config file + section_name - the name of the section of defaults to be added + default_config - values to set this configuration to + """ + config_object.add_section(section_name) + for key in default_config: + config_object.set(section_name, key, str(default_config[key])) + conffile = codecs.open(config_file, 'w', 'utf-8') + config_object.write(conffile) + conffile.close() + + +def read_config(config_file, section_name=None, default_config=None): + """Parse a config file into a dictionary + + Can optionally set a default configuration by providing 'section_name' and + 'default_config' arguments. + + config_file - the filename of the config file + section_name - the name of the section of defaults to be added + default_config - values to set this configuration to + """ + config_object = configparser.SafeConfigParser() + config_object.optionxform = str + config_object.read(config_file) + if section_name is not None and default_config is not None and \ + (not config_object.has_section(section_name) or not os.path.isfile(config_file)): + # Write default config + write_config(config_object, config_file, section_name, default_config) + return parse_config(config_object) + + def dirname(path): """OS independent version of os.path.dirname""" split = path.split('/') diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index 027def04..17164e80 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -41,8 +41,6 @@ ''' from __future__ import print_function -import codecs -import configparser import hashlib import json import multiprocessing @@ -106,20 +104,8 @@ def default(self, obj): app = Flask(__name__) app.json_encoder = CustomJSONEncoder -api_config_object = configparser.SafeConfigParser() -api_config_object.optionxform = str -# TODO: Why does this multiscanner.common instead of just common? api_config_file = utils.get_config_path(MS_CONFIG, 'api') -api_config_object.read(api_config_file) -if not api_config_object.has_section('api') or not os.path.isfile(api_config_file): - # Write default config - api_config_object.add_section('api') - for key in DEFAULTCONF: - api_config_object.set('api', key, str(DEFAULTCONF[key])) - conffile = codecs.open(api_config_file, 'w', 'utf-8') - api_config_object.write(conffile) - conffile.close() -api_config = utils.parse_config(api_config_object) +api_config = utils.read_config(api_config_file, 'api', DEFAULTCONF) # TODO: fix this mess # Needs api_config in order to function properly @@ -131,13 +117,13 @@ def default(self, obj): # Sleep and retry until database connection is successful try: # wait this many seconds between tries - db_sleep_time = int(api_config_object.get('Database', 'retry_time')) -except (configparser.NoSectionError, configparser.NoOptionError): + db_sleep_time = int(api_config['Database']['retry_time']) +except KeyError: db_sleep_time = database.Database.DEFAULTCONF['retry_time'] try: # max number of times to retry - db_num_retries = int(api_config_object.get('Database', 'retry_num')) -except (configparser.NoSectionError, configparser.NoOptionError): + db_num_retries = int(api_config['Database']['retry_num']) +except KeyError: db_num_retries = database.Database.DEFAULTCONF['retry_num'] for x in range(0, db_num_retries): @@ -159,11 +145,8 @@ def default(self, obj): storage_handler = StorageHandler(configfile=storage_conf) handler = storage_handler.load_required_module('ElasticSearchStorage') -ms_config_object = configparser.SafeConfigParser() -ms_config_object.optionxform = str -ms_configfile = MS_CONFIG -ms_config_object.read(ms_configfile) -ms_config = utils.parse_config(ms_config_object) +ms_config_file = MS_CONFIG +ms_config = utils.read_config(ms_config_file) try: DISTRIBUTED = api_config['api']['distributed'] @@ -279,14 +262,12 @@ def modules(): filenames = [os.path.splitext(os.path.basename(f)) for f in files] module_names = [m[0] for m in filenames if m[1] == '.py'] - ms_config = configparser.SafeConfigParser() - ms_config.optionxform = str - ms_config.read(MS_CONFIG) + global ms_config modules = {} for module in module_names: try: - modules[module] = ms_config.get(module, 'ENABLED') - except (configparser.NoSectionError, configparser.NoOptionError): + modules[module] = ms_config[module]['ENABLED'] + except KeyError: pass return jsonify({'Modules': modules}) diff --git a/multiscanner/distributed/celery_worker.py b/multiscanner/distributed/celery_worker.py index 622b434a..bb0ed1bf 100644 --- a/multiscanner/distributed/celery_worker.py +++ b/multiscanner/distributed/celery_worker.py @@ -4,9 +4,7 @@ from the utils/ directory. ''' -import codecs import configparser -import os from datetime import datetime from socket import gethostname @@ -35,30 +33,15 @@ 'tz': 'US/Eastern', } -config_object = configparser.SafeConfigParser() -config_object.optionxform = str configfile = utils.get_config_path(MS_CONFIG, 'api') -config_object.read(configfile) - -if not config_object.has_section('celery') or not os.path.isfile(configfile): - # Write default config - config_object.add_section('celery') - for key in DEFAULTCONF: - config_object.set('celery', key, str(DEFAULTCONF[key])) - conffile = codecs.open(configfile, 'w', 'utf-8') - config_object.write(conffile) - conffile.close() -config = utils.parse_config(config_object) +config = utils.read_config(configfile, 'celery', DEFAULTCONF) api_config = config.get('api') worker_config = config.get('celery') db_config = config.get('Database') -storage_config_object = configparser.SafeConfigParser() -storage_config_object.optionxform = str storage_configfile = utils.get_config_path(MS_CONFIG, 'storage') -storage_config_object.read(storage_configfile) -config = utils.parse_config(storage_config_object) -es_storage_config = config.get('ElasticSearchStorage') +storage_config = utils.read_config(storage_configfile) +es_storage_config = storage_config.get('ElasticSearchStorage') app = Celery(broker='{0}://{1}:{2}@{3}/{4}'.format( worker_config.get('protocol'), diff --git a/multiscanner/distributed/distributed_worker.py b/multiscanner/distributed/distributed_worker.py index 4c097535..5b0d478b 100755 --- a/multiscanner/distributed/distributed_worker.py +++ b/multiscanner/distributed/distributed_worker.py @@ -6,8 +6,6 @@ unicode_literals, with_statement) import argparse -import codecs -import configparser import multiprocessing import os import queue @@ -66,18 +64,10 @@ def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, e storage_handler.close() -def _read_conf(file_path): - conf = configparser.SafeConfigParser() - conf.optionxform = str - with codecs.open(file_path, 'r', encoding='utf-8') as fp: - conf.readfp(fp) - return utils.parse_config(conf) - - def _main(): args = _parse_args() # Pull config options - conf = _read_conf(args.config) + conf = utils.read_config(args.config) multiscanner_config = conf['worker']['multiscanner_config'] # Start worker task diff --git a/multiscanner/web/app.py b/multiscanner/web/app.py index 7efd101b..8af567eb 100644 --- a/multiscanner/web/app.py +++ b/multiscanner/web/app.py @@ -1,8 +1,5 @@ -import codecs from collections import namedtuple -import configparser from flask import Flask, render_template, request -import os import re from multiscanner import CONFIG as MS_CONFIG @@ -32,19 +29,8 @@ app = Flask(__name__) # Finagle Flask to read config from .ini file instead of .py file -web_config_object = configparser.SafeConfigParser() -web_config_object.optionxform = str web_config_file = utils.get_config_path(MS_CONFIG, 'web') -web_config_object.read(web_config_file) -if not web_config_object.has_section('web') or not os.path.isfile(web_config_file): - # Write default config - web_config_object.add_section('web') - for key in DEFAULTCONF: - web_config_object.set('web', key, str(DEFAULTCONF[key])) - conffile = codecs.open(web_config_file, 'w', 'utf-8') - web_config_object.write(conffile) - conffile.close() -web_config = utils.parse_config(web_config_object)['web'] +web_config = utils.read_config(web_config_file, 'web', DEFAULTCONF) conf_tuple = namedtuple('WebConfig', web_config.keys())(*web_config.values()) app.config.from_object(conf_tuple) From c61a84b0a18ab82a12212c2edf6ac3808924d453 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Wed, 20 Feb 2019 15:50:00 -0500 Subject: [PATCH 02/38] Consolidate module directory parsing code --- multiscanner/__init__.py | 2 +- multiscanner/common/utils.py | 10 +++--- multiscanner/config.py | 23 ++++++++++++++ multiscanner/distributed/api.py | 37 ++++++++-------------- multiscanner/modules/Signature/YaraScan.py | 4 +-- multiscanner/ms.py | 12 +++---- multiscanner/storage/storage.py | 2 +- multiscanner/tests/test_celery_worker.py | 2 +- multiscanner/tests/test_common_lib.py | 4 +-- multiscanner/tests/test_configs.py | 2 +- multiscanner/tests/test_modules.py | 4 +-- multiscanner/tests/test_multiscanner.py | 2 +- multiscanner/utils/cython_compile_libs.py | 2 +- 13 files changed, 60 insertions(+), 46 deletions(-) diff --git a/multiscanner/__init__.py b/multiscanner/__init__.py index 1ec3f674..004718ab 100644 --- a/multiscanner/__init__.py +++ b/multiscanner/__init__.py @@ -4,7 +4,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. from .config import ( # noqa F401 - PY3, MS_WD, CONFIG, MODULESDIR + PY3, MS_WD, CONFIG, MODULESDIR, MODULESLIST, ) from .ms import ( # noqa F401 diff --git a/multiscanner/common/utils.py b/multiscanner/common/utils.py index a7c69605..230dc4ed 100644 --- a/multiscanner/common/utils.py +++ b/multiscanner/common/utils.py @@ -11,7 +11,7 @@ import os import sys -from multiscanner.config import PY3 +from six import PY3 try: import paramiko @@ -166,7 +166,7 @@ def basename(path): return split[-1] -def parseDir(directory, recursive=False, exclude=['__init__']): +def parse_dir(directory, recursive=False, exclude=['__init__']): """ Returns a list of files in a directory. @@ -180,7 +180,7 @@ def parseDir(directory, recursive=False, exclude=['__init__']): item = os.path.join(directory, item) if os.path.isdir(item): if recursive: - filelist.extend(parseDir(item, recursive)) + filelist.extend(parse_dir(item, recursive)) else: continue else: @@ -194,7 +194,7 @@ def parseDir(directory, recursive=False, exclude=['__init__']): return filelist -def parseFileList(FileList, recursive=False): +def parse_file_list(FileList, recursive=False): """ Takes a list of files and directories and returns a list of files. @@ -205,7 +205,7 @@ def parseFileList(FileList, recursive=False): filelist = [] for item in FileList: if os.path.isdir(item): - filelist.extend(parseDir(item, recursive)) + filelist.extend(parse_dir(item, recursive)) elif os.path.isfile(item): if not PY3: filelist.append(item.decode('utf8')) diff --git a/multiscanner/config.py b/multiscanner/config.py index f206fef7..cd4a954b 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -5,6 +5,7 @@ import sys from six import PY3 # noqa F401 +from multiscanner.common.utils import parse_dir if sys.version_info < (2, 7) or sys.version_info > (4,): print("WARNING: You're running an untested version of python") @@ -50,3 +51,25 @@ def determine_configuration_path(filepath): # The default config file CONFIG = determine_configuration_path(None) + + +def get_enabled_modules(): + """Returns a dictionary with module names as keys, with boolean values + denoting whether or not they are enabled in the config. + """ + files = parse_dir(MODULESDIR, recursive=True, exclude=["__init__"]) + filenames = [os.path.splitext(os.path.basename(f)) for f in files] + module_names = [m[0] for m in filenames if m[1] == '.py'] + + global CONFIG + modules = {} + for module in module_names: + try: + modules[module] = CONFIG[module]['ENABLED'] + except KeyError: + pass + return modules + + +# The list of enabled modules +MODULESLIST = get_enabled_modules() diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index 17164e80..ae2586d2 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -61,8 +61,7 @@ from flask_cors import CORS from jinja2 import Markup -# TODO: Why do we need to parseDir(MODULEDIR) multiple times? -from multiscanner import MODULESDIR, MS_WD, multiscan, parse_reports, CONFIG as MS_CONFIG +from multiscanner import MODULESLIST, MS_WD, multiscan, parse_reports, CONFIG as MS_CONFIG from multiscanner.common import utils, pdf_generator, stix2_generator from multiscanner.config import PY3 from multiscanner.storage import StorageHandler @@ -258,18 +257,7 @@ def modules(): Return a list of module names available for MultiScanner to use, and whether or not they are enabled in the config. ''' - files = utils.parseDir(MODULESDIR, True) - filenames = [os.path.splitext(os.path.basename(f)) for f in files] - module_names = [m[0] for m in filenames if m[1] == '.py'] - - global ms_config - modules = {} - for module in module_names: - try: - modules[module] = ms_config[module]['ENABLED'] - except KeyError: - pass - return jsonify({'Modules': modules}) + return jsonify({'Modules': MODULESLIST}) @app.route('/api/v1/tasks', methods=['GET']) @@ -408,7 +396,7 @@ def import_task(file_): return task_id -def queue_task(original_filename, f_name, full_path, metadata, rescan=False): +def queue_task(original_filename, f_name, full_path, metadata, rescan=False, module_list=None): ''' Queue up a single new task, for a single non-archive file. ''' @@ -427,7 +415,7 @@ def queue_task(original_filename, f_name, full_path, metadata, rescan=False): # Publish the task to Celery multiscanner_celery.delay(full_path, original_filename, task_id, f_name, metadata, - config=MS_CONFIG) + config=MS_CONFIG, module_list=module_list) else: # Put the task on the queue work_queue.put((full_path, original_filename, task_id, f_name, metadata)) @@ -469,6 +457,7 @@ def create_task(): task_id_list = [] extract_dir = None rescan = False + modules = None for key in request.form.keys(): if key in ['file_id', 'archive-password', 'upload_type'] or request.form[key] == '': continue @@ -478,13 +467,15 @@ def create_task(): elif request.form[key] == 'rescan': rescan = True elif key == 'modules': - module_names = request.form[key] - files = utils.parseDir(MODULESDIR, True) - modules = [] - for f in files: - split = os.path.splitext(os.path.basename(f)) - if split[0] in module_names and split[1] == '.py': - modules.append(f) + module_names = request.form[key].split(',') + modules = list(set(module_names).intersection(MODULESLIST.keys())) + + # files = utils.parse_dir(MODULESDIR, True) + # modules = [] + # for f in files: + # split = os.path.splitext(os.path.basename(f)) + # if split[0] in module_names and split[1] == '.py': + # modules.append(f) elif key == 'archive-analyze' and request.form[key] == 'true': extract_dir = api_config['api']['upload_folder'] if not os.path.isdir(extract_dir): diff --git a/multiscanner/modules/Signature/YaraScan.py b/multiscanner/modules/Signature/YaraScan.py index 50c4af0d..576dc8a9 100644 --- a/multiscanner/modules/Signature/YaraScan.py +++ b/multiscanner/modules/Signature/YaraScan.py @@ -7,7 +7,7 @@ import time from multiscanner.config import CONFIG -from multiscanner.common.utils import parseDir +from multiscanner.common.utils import parse_dir __authors__ = "Nick Beede, Drew Bonasera" @@ -44,7 +44,7 @@ def scan(filelist, conf=DEFAULTCONF): includes = 'includes' in conf and conf['includes'] ruleset = {} - rules = parseDir(ruleDir, recursive=True) + rules = parse_dir(ruleDir, recursive=True) for r in rules: for ext in extlist: if r.endswith(ext): diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 8c365815..39897a36 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -29,7 +29,7 @@ import multiscanner from multiscanner.common.utils import (basename, convert_encoding, load_module, - parse_config, parseDir, parseFileList, + parse_config, parse_dir, parse_file_list, queue2list) from multiscanner.config import PY3, CONFIG, MODULESDIR, determine_configuration_path from multiscanner.storage import storage @@ -483,7 +483,7 @@ def _rewrite_config(ModuleList, config, filepath=CONFIG): config.write(f) -def config_init(filepath, module_list=parseDir(MODULESDIR, recursive=True, exclude=["__init__"])): +def config_init(filepath, module_list=parse_dir(MODULESDIR, recursive=True, exclude=["__init__"])): """ Creates a new config file at filepath @@ -570,12 +570,12 @@ def multiscan(Files, recursive=False, configregen=False, configfile=CONFIG, conf # Init some vars # If recursive is False we don't parse the file list and take it as is. if recursive: - filelist = parseFileList(Files, recursive=recursive) + filelist = parse_file_list(Files, recursive=recursive) else: filelist = Files # A list of files in the module dir if module_list is None: - module_list = parseDir(MODULESDIR, recursive=True, exclude=["__init__"]) + module_list = parse_dir(MODULESDIR, recursive=True, exclude=["__init__"]) # A dictionary used for the copyfileto parameter filedic = {} # What will be the config file object @@ -892,7 +892,7 @@ def _init(args): config_init(args.config) else: print('Checking for missing modules in configuration...') - ModuleList = parseDir(MODULESDIR, recursive=True, exclude=["__init__"]) + ModuleList = parse_dir(MODULESDIR, recursive=True, exclude=["__init__"]) config = configparser.SafeConfigParser() config.optionxform = str config.read(args.config) @@ -955,7 +955,7 @@ def _main(): sys.exit('ERROR:', args.json, 'is a directory, a file is expected') # Parse the file list - parsedlist = parseFileList(args.Files, recursive=args.recursive) + parsedlist = parse_file_list(args.Files, recursive=args.recursive) # Unzip zip files if asked to if args.extractzips: diff --git a/multiscanner/storage/storage.py b/multiscanner/storage/storage.py index 782aa611..201deb28 100644 --- a/multiscanner/storage/storage.py +++ b/multiscanner/storage/storage.py @@ -329,7 +329,7 @@ def _write_missing_config(config_object, filepath, storage_classes=None): def _get_storage_classes(dir_path=STORAGE_DIR): storage_classes = {} - dir_list = utils.parseDir(dir_path, recursive=True) + dir_list = utils.parse_dir(dir_path, recursive=True) dir_list.remove(os.path.join(dir_path, 'storage.py')) # dir_list.remove(os.path.join(dir_path, '__init__.py')) dir_list.remove(os.path.join(dir_path, 'sql_driver.py')) diff --git a/multiscanner/tests/test_celery_worker.py b/multiscanner/tests/test_celery_worker.py index 7d5ba8df..049ced41 100644 --- a/multiscanner/tests/test_celery_worker.py +++ b/multiscanner/tests/test_celery_worker.py @@ -20,7 +20,7 @@ # Get a subset of simple modules to run in testing # the celery worker -MODULE_LIST = utils.parseDir(multiscanner.MODULESDIR, recursive=True) +MODULE_LIST = utils.parse_dir(multiscanner.MODULESDIR, recursive=True) DESIRED_MODULES = [ 'entropy.py', 'MD5.py', diff --git a/multiscanner/tests/test_common_lib.py b/multiscanner/tests/test_common_lib.py index 30fe1741..6143ccf8 100644 --- a/multiscanner/tests/test_common_lib.py +++ b/multiscanner/tests/test_common_lib.py @@ -58,8 +58,8 @@ def test_basename_win_path(): assert result == 'd' -def test_parseDir(): +def test_parse_dir(): path = os.path.abspath(os.path.join(MS_WD, 'tests', 'dir_test')) - result = utils.parseDir(path, recursive=False) + result = utils.parse_dir(path, recursive=False) expected = [os.path.join(path, '1.1.txt'), os.path.join(path, '1.2.txt')] assert sorted(result) == sorted(expected) diff --git a/multiscanner/tests/test_configs.py b/multiscanner/tests/test_configs.py index e8e5d874..2a6681de 100644 --- a/multiscanner/tests/test_configs.py +++ b/multiscanner/tests/test_configs.py @@ -9,7 +9,7 @@ CWD = os.path.dirname(os.path.abspath(__file__)) module_list = [os.path.join(CWD, 'modules', 'test_conf.py')] -filelist = utils.parseDir(os.path.join(CWD, 'files')) +filelist = utils.parse_dir(os.path.join(CWD, 'files')) def test_no_config(): diff --git a/multiscanner/tests/test_modules.py b/multiscanner/tests/test_modules.py index 395b25c3..e7fe3d95 100644 --- a/multiscanner/tests/test_modules.py +++ b/multiscanner/tests/test_modules.py @@ -28,7 +28,7 @@ class _runmod_tests(object): def setup_class(cls): cls.real_mod_dir = multiscanner.MODULESDIR multiscanner.MODULESDIR = os.path.join(CWD, "modules") - cls.filelist = utils.parseDir(os.path.join(CWD, 'files')) + cls.filelist = utils.parse_dir(os.path.join(CWD, 'files')) cls.files = ['a', 'b', 'C:\\c', '/d/d'] cls.threadDict = {} @@ -100,7 +100,7 @@ def teardown(self): def test_all_started(self): ThreadList = multiscanner._start_module_threads( - self.filelist, utils.parseDir(os.path.join(CWD, "modules")), self.config, self.global_module_interface) + self.filelist, utils.parse_dir(os.path.join(CWD, "modules")), self.config, self.global_module_interface) time.sleep(.001) for t in ThreadList: assert t.started diff --git a/multiscanner/tests/test_multiscanner.py b/multiscanner/tests/test_multiscanner.py index 21b07333..047a70fa 100644 --- a/multiscanner/tests/test_multiscanner.py +++ b/multiscanner/tests/test_multiscanner.py @@ -14,7 +14,7 @@ class _runmulti_tests(object): def setup_class(cls): cls.real_mod_dir = multiscanner.MODULESDIR multiscanner.MODULEDIR = os.path.join(CWD, "modules") - cls.filelist = utils.parseDir(os.path.join(CWD, 'files')) + cls.filelist = utils.parse_dir(os.path.join(CWD, 'files')) multiscanner.CONFIG = '.tmpfile.ini' @classmethod diff --git a/multiscanner/utils/cython_compile_libs.py b/multiscanner/utils/cython_compile_libs.py index c7e946f0..0948a631 100644 --- a/multiscanner/utils/cython_compile_libs.py +++ b/multiscanner/utils/cython_compile_libs.py @@ -15,7 +15,7 @@ def main(): - filelist = utils.parseFileList([LIBS], recursive=True) + filelist = utils.parse_file_list([LIBS], recursive=True) try: import pefile filepath = pefile.__file__[:-1] From 2fed0997dd9a705a588349a5dcd28b6c60e75b71 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Thu, 21 Feb 2019 15:44:49 -0500 Subject: [PATCH 03/38] Rename ModuleList -> module_list to be more pythonic --- multiscanner/ms.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 39897a36..b19b5a60 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -342,12 +342,12 @@ def _copy_to_share(filelist, filedic, sharedir): return filelist -def _start_module_threads(filelist, ModuleList, config, global_module_interface): +def _start_module_threads(filelist, module_list, config, global_module_interface): """ Starts each module on the file list in a separate thread. Returns a list of threads filelist - A lists of strings. The strings are files to be scanned - ModuleList - A list of all the modules to be run + module_list - A list of all the modules to be run config - The config dictionary global_module_interface - The global module interface to be injected in each module """ @@ -357,7 +357,7 @@ def _start_module_threads(filelist, ModuleList, config, global_module_interface) ThreadDict = {} global_module_interface.run_count += 1 # Starts a thread for each module. - for module in ModuleList: + for module in module_list: if module.endswith(".py"): modname = os.path.basename(module[:-3]) @@ -405,17 +405,17 @@ def _start_module_threads(filelist, ModuleList, config, global_module_interface) return ThreadList -def _write_missing_module_configs(ModuleList, config, filepath=CONFIG): +def _write_missing_module_configs(module_list, config, filepath=CONFIG): """ Write in default config for modules not in config file. Returns True if config was written, False if not. - ModuleList - The list of modules + module_list - The list of modules config - The config object """ filepath = determine_configuration_path(filepath) ConfNeedsWrite = False - ModuleList.sort() - for module in ModuleList: + module_list.sort() + for module in module_list: if module.endswith(".py"): modname = os.path.basename(module).split('.')[0] moddir = os.path.dirname(module) @@ -447,18 +447,18 @@ def _write_missing_module_configs(ModuleList, config, filepath=CONFIG): return False -def _rewrite_config(ModuleList, config, filepath=CONFIG): +def _rewrite_config(module_list, config, filepath=CONFIG): """ Write in default config for all modules. - ModuleList - The list of modules + module_list - The list of modules config - The config object """ filepath = determine_configuration_path(filepath) if VERBOSE: print('Rewriting config...') - ModuleList.sort() - for module in ModuleList: + module_list.sort() + for module in module_list: if module.endswith('.py'): modname = os.path.basename(module).split('.')[0] moddir = os.path.dirname(module) @@ -892,11 +892,11 @@ def _init(args): config_init(args.config) else: print('Checking for missing modules in configuration...') - ModuleList = parse_dir(MODULESDIR, recursive=True, exclude=["__init__"]) + module_list = parse_dir(MODULESDIR, recursive=True, exclude=["__init__"]) config = configparser.SafeConfigParser() config.optionxform = str config.read(args.config) - _write_missing_module_configs(ModuleList, config, filepath=args.config) + _write_missing_module_configs(module_list, config, filepath=args.config) else: config_init(args.config) From 5d386a3c35b8c7856545060a8af6e13fba6dec22 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Mon, 25 Feb 2019 13:30:10 -0500 Subject: [PATCH 04/38] Ignore an expected error when parsing configs And introduce a variable to make code easier to read --- multiscanner/common/utils.py | 3 +++ multiscanner/ms.py | 15 ++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/multiscanner/common/utils.py b/multiscanner/common/utils.py index 331ca48e..447206ce 100644 --- a/multiscanner/common/utils.py +++ b/multiscanner/common/utils.py @@ -85,6 +85,9 @@ def parse_config(config_object): for key in section_dict: try: section_dict[key] = ast.literal_eval(section_dict[key]) + except SyntaxError as e: + # Ignore if config value isn't convertible to a Python literal + pass except Exception as e: logger.debug(e) return_var[section] = section_dict diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 222cfb8e..4160d173 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -878,22 +878,23 @@ def _init(args): config.optionxform = str config.read(args.config) config = _get_main_config(config) - if os.path.isfile(config["storage-config"]): - logger.warning('{} already exists, overwriting will destroy changes'.format(config["storage-config"])) + storage_config = config["storage-config"] + if os.path.isfile(storage_config): + logger.warning('{} already exists, overwriting will destroy changes'.format(storage_config)) try: answer = input('Do you wish to overwrite the configuration file [y/N]:') except EOFError as e: logger.warn(e) answer = 'N' if answer == 'y': - storage.config_init(config["storage-config"], overwrite=True) - logger.info('Storage configuration file initialized at {}'.format(config["storage-config"])) + storage.config_init(storage_config, overwrite=True) + logger.info('Storage configuration file initialized at {}'.format(storage_config)) else: logger.info('Checking for missing modules in storage configuration...') - storage.config_init(config["storage-config"], overwrite=False) + storage.config_init(storage_config, overwrite=False) else: - storage.config_init(config["storage-config"]) - logger.info('Storage configuration file initialized at {}'.format(config["storage-config"])) + storage.config_init(storage_config) + logger.info('Storage configuration file initialized at {}'.format(storage_config)) exit(0) From 08b856ab94eaa32dad790af29942a1c84f976b5b Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Tue, 26 Feb 2019 12:37:34 -0500 Subject: [PATCH 05/38] Move config functions from utils.py to config.py --- multiscanner/analytics/ssdeep_analytics.py | 6 +- multiscanner/common/dir_monitor.py | 4 +- multiscanner/common/utils.py | 79 ------------------- multiscanner/config.py | 78 ++++++++++++++++++ multiscanner/distributed/api.py | 12 +-- multiscanner/distributed/celery_worker.py | 16 ++-- .../distributed/distributed_worker.py | 6 +- multiscanner/ms.py | 5 +- multiscanner/storage/storage.py | 9 ++- multiscanner/web/app.py | 6 +- 10 files changed, 110 insertions(+), 111 deletions(-) diff --git a/multiscanner/analytics/ssdeep_analytics.py b/multiscanner/analytics/ssdeep_analytics.py index ded6974c..ec68771e 100644 --- a/multiscanner/analytics/ssdeep_analytics.py +++ b/multiscanner/analytics/ssdeep_analytics.py @@ -37,15 +37,15 @@ from multiscanner import CONFIG as MS_CONFIG -from multiscanner.common import utils +from multiscanner.config import get_config_path, read_config from multiscanner.storage import storage class SSDeepAnalytic: def __init__(self, debug=False): - storage_conf = utils.get_config_path(MS_CONFIG, 'storage') - conf = utils.read_config(storage_conf) + storage_conf = get_config_path(MS_CONFIG, 'storage') + conf = read_config(storage_conf) storage_handler = storage.StorageHandler(configfile=storage_conf) es_handler = storage_handler.load_required_module('ElasticSearchStorage') diff --git a/multiscanner/common/dir_monitor.py b/multiscanner/common/dir_monitor.py index d555a659..444c3f99 100755 --- a/multiscanner/common/dir_monitor.py +++ b/multiscanner/common/dir_monitor.py @@ -21,7 +21,7 @@ from multiscanner import CONFIG as MS_CONFIG from multiscanner import multiscan, parse_reports -from multiscanner.common import utils +from multiscanner.config import get_config_path from multiscanner.storage import storage logger = logging.getLogger(__name__) @@ -81,7 +81,7 @@ def start_observer(directory, work_queue, recursive=False): def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, exit_signal): filelist = [] time_stamp = None - storage_conf = utils.get_config_path(config, 'storage') + storage_conf = get_config_path(config, 'storage') storage_handler = storage.StorageHandler(configfile=storage_conf) while not exit_signal.value: time.sleep(1) diff --git a/multiscanner/common/utils.py b/multiscanner/common/utils.py index 447206ce..59e75769 100644 --- a/multiscanner/common/utils.py +++ b/multiscanner/common/utils.py @@ -3,13 +3,9 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. from __future__ import (absolute_import, division, unicode_literals, with_statement) -import ast -import codecs -import configparser import imp import logging import os -import sys from six import PY3 @@ -77,81 +73,6 @@ def convert_encoding(data, encoding='UTF-8', errors='replace'): return data -def parse_config(config_object): - """Converts a config object to a dictionary""" - return_var = {} - for section in config_object.sections(): - section_dict = dict(config_object.items(section)) - for key in section_dict: - try: - section_dict[key] = ast.literal_eval(section_dict[key]) - except SyntaxError as e: - # Ignore if config value isn't convertible to a Python literal - pass - except Exception as e: - logger.debug(e) - return_var[section] = section_dict - return return_var - - -def get_config_path(config_file, component): - """Gets the location of the config file for the given multiscanner component - from the multiscanner config file - - Components: - storage - api - web""" - conf = configparser.ConfigParser() - conf.read(config_file) - conf = parse_config(conf) - try: - return conf['main']['%s-config' % component] - except KeyError: - logger.error( - "Couldn't find '{}-config' value in 'main' section " - "of config file. Have you run 'python multiscanner.py init'?" - .format(component) - ) - sys.exit() - - -def write_config(config_object, config_file, section_name, default_config): - """Write the default configuration to the given config file - - config_object - the ConfigParser object - config_file - the filename of the config file - section_name - the name of the section of defaults to be added - default_config - values to set this configuration to - """ - config_object.add_section(section_name) - for key in default_config: - config_object.set(section_name, key, str(default_config[key])) - conffile = codecs.open(config_file, 'w', 'utf-8') - config_object.write(conffile) - conffile.close() - - -def read_config(config_file, section_name=None, default_config=None): - """Parse a config file into a dictionary - - Can optionally set a default configuration by providing 'section_name' and - 'default_config' arguments. - - config_file - the filename of the config file - section_name - the name of the section of defaults to be added - default_config - values to set this configuration to - """ - config_object = configparser.ConfigParser() - config_object.optionxform = str - config_object.read(config_file) - if section_name is not None and default_config is not None and \ - (not config_object.has_section(section_name) or not os.path.isfile(config_file)): - # Write default config - write_config(config_object, config_file, section_name, default_config) - return parse_config(config_object) - - def dirname(path): """OS independent version of os.path.dirname""" split = path.split('/') diff --git a/multiscanner/config.py b/multiscanner/config.py index 4d345b4c..08d42aca 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -1,6 +1,9 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. +import ast +import codecs +import configparser import logging import os import sys @@ -56,6 +59,81 @@ def determine_configuration_path(filepath): CONFIG = determine_configuration_path(None) +def parse_config(config_object): + """Converts a config object to a dictionary""" + return_var = {} + for section in config_object.sections(): + section_dict = dict(config_object.items(section)) + for key in section_dict: + try: + section_dict[key] = ast.literal_eval(section_dict[key]) + except SyntaxError as e: + # Ignore if config value isn't convertible to a Python literal + pass + except Exception as e: + logger.debug(e) + return_var[section] = section_dict + return return_var + + +def get_config_path(config_file, component): + """Gets the location of the config file for the given multiscanner component + from the multiscanner config file + + Components: + storage + api + web""" + conf = configparser.ConfigParser() + conf.read(config_file) + conf = parse_config(conf) + try: + return conf['main']['%s-config' % component] + except KeyError: + logger.error( + "Couldn't find '{}-config' value in 'main' section " + "of config file. Have you run 'python multiscanner.py init'?" + .format(component) + ) + sys.exit() + + +def write_config(config_object, config_file, section_name, default_config): + """Write the default configuration to the given config file + + config_object - the ConfigParser object + config_file - the filename of the config file + section_name - the name of the section of defaults to be added + default_config - values to set this configuration to + """ + config_object.add_section(section_name) + for key in default_config: + config_object.set(section_name, key, str(default_config[key])) + conffile = codecs.open(config_file, 'w', 'utf-8') + config_object.write(conffile) + conffile.close() + + +def read_config(config_file, section_name=None, default_config=None): + """Parse a config file into a dictionary + + Can optionally set a default configuration by providing 'section_name' and + 'default_config' arguments. + + config_file - the filename of the config file + section_name - the name of the section of defaults to be added + default_config - values to set this configuration to + """ + config_object = configparser.ConfigParser() + config_object.optionxform = str + config_object.read(config_file) + if section_name is not None and default_config is not None and \ + (not config_object.has_section(section_name) or not os.path.isfile(config_file)): + # Write default config + write_config(config_object, config_file, section_name, default_config) + return parse_config(config_object) + + def get_enabled_modules(): """Returns a dictionary with module names as keys, with boolean values denoting whether or not they are enabled in the config. diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index d5bd813d..1ea136d7 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -61,8 +61,8 @@ from jinja2 import Markup from multiscanner import MODULESLIST, MS_WD, multiscan, parse_reports, CONFIG as MS_CONFIG -from multiscanner.common import utils, pdf_generator, stix2_generator -from multiscanner.config import PY3 +from multiscanner.common import pdf_generator, stix2_generator +from multiscanner.config import PY3, get_config_path, read_config from multiscanner.storage import StorageHandler from multiscanner.storage import sql_driver as database from multiscanner.storage.storage import StorageNotLoadedError @@ -104,8 +104,8 @@ def default(self, obj): app = Flask(__name__) app.json_encoder = CustomJSONEncoder -api_config_file = utils.get_config_path(MS_CONFIG, 'api') -api_config = utils.read_config(api_config_file, 'api', DEFAULTCONF) +api_config_file = get_config_path(MS_CONFIG, 'api') +api_config = read_config(api_config_file, 'api', DEFAULTCONF) # TODO: fix this mess # Needs api_config in order to function properly @@ -143,12 +143,12 @@ def default(self, obj): logger.error("Retrying...") time.sleep(db_sleep_time) -storage_conf = utils.get_config_path(MS_CONFIG, 'storage') +storage_conf = get_config_path(MS_CONFIG, 'storage') storage_handler = StorageHandler(configfile=storage_conf) handler = storage_handler.load_required_module('ElasticSearchStorage') ms_config_file = MS_CONFIG -ms_config = utils.read_config(ms_config_file) +ms_config = read_config(ms_config_file) try: DISTRIBUTED = api_config['api']['distributed'] diff --git a/multiscanner/distributed/celery_worker.py b/multiscanner/distributed/celery_worker.py index 92980950..ce1d0245 100644 --- a/multiscanner/distributed/celery_worker.py +++ b/multiscanner/distributed/celery_worker.py @@ -12,9 +12,9 @@ from celery.schedules import crontab from celery.utils.log import get_task_logger -from multiscanner import CONFIG as MS_CONFIG from multiscanner import multiscan, parse_reports -from multiscanner.common import utils +from multiscanner.config import CONFIG as MS_CONFIG +from multiscanner.config import get_config_path, parse_config, read_config from multiscanner.storage import elasticsearch_storage, storage from multiscanner.storage import sql_driver as database from multiscanner.analytics.ssdeep_analytics import SSDeepAnalytic @@ -33,14 +33,14 @@ 'tz': 'US/Eastern', } -configfile = utils.get_config_path(MS_CONFIG, 'api') -config = utils.read_config(configfile, 'celery', DEFAULTCONF) +configfile = get_config_path(MS_CONFIG, 'api') +config = read_config(configfile, 'celery', DEFAULTCONF) api_config = config.get('api') worker_config = config.get('celery') db_config = config.get('Database') -storage_configfile = utils.get_config_path(MS_CONFIG, 'storage') -storage_config = utils.read_config(storage_configfile) +storage_configfile = get_config_path(MS_CONFIG, 'storage') +storage_config = read_config(storage_configfile) es_storage_config = storage_config.get('ElasticSearchStorage') app = Celery(broker='{0}://{1}:{2}@{3}/{4}'.format( @@ -117,7 +117,7 @@ def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, logger.info('\n\n{}{}Got file: {}.\nOriginal filename: {}.\n'.format('=' * 48, '\n', file_hash, original_filename)) # Get the storage config - storage_conf = utils.get_config_path(config, 'storage') + storage_conf = get_config_path(config, 'storage') storage_handler = storage.StorageHandler(configfile=storage_conf) resultlist = multiscan( @@ -134,7 +134,7 @@ def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, scan_config_object = configparser.ConfigParser() scan_config_object.optionxform = str scan_config_object.read(config) - full_conf = utils.parse_config(scan_config_object) + full_conf = parse_config(scan_config_object) sub_conf = {} # Count number of modules enabled out of total possible # and add it to the Scan Metadata diff --git a/multiscanner/distributed/distributed_worker.py b/multiscanner/distributed/distributed_worker.py index 3786f36e..5c13b130 100755 --- a/multiscanner/distributed/distributed_worker.py +++ b/multiscanner/distributed/distributed_worker.py @@ -16,7 +16,7 @@ standard_library.install_aliases() from multiscanner import multiscan, parse_reports -from multiscanner.common import utils +from multiscanner.config import get_config_path, read_config from multiscanner.storage import storage @@ -31,7 +31,7 @@ def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, exit_signal): filelist = [] time_stamp = None - storage_conf = utils.get_config_path(config, 'storage') + storage_conf = get_config_path(config, 'storage') storage_handler = storage.StorageHandler(configfile=storage_conf) while not exit_signal.value: time.sleep(1) @@ -69,7 +69,7 @@ def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, e def _main(): args = _parse_args() # Pull config options - conf = utils.read_config(args.config) + conf = read_config(args.config) multiscanner_config = conf['worker']['multiscanner_config'] # Start worker task diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 4160d173..ebf213db 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -29,9 +29,8 @@ from multiscanner.version import __version__ as MS_VERSION from multiscanner.common.utils import (basename, convert_encoding, load_module, - parse_config, parse_dir, parse_file_list, - queue2list) -from multiscanner.config import PY3, CONFIG, MODULESDIR, determine_configuration_path + parse_dir, parse_file_list, queue2list) +from multiscanner.config import PY3, CONFIG, MODULESDIR, determine_configuration_path, parse_config from multiscanner.storage import storage diff --git a/multiscanner/storage/storage.py b/multiscanner/storage/storage.py index e6191bad..09646225 100644 --- a/multiscanner/storage/storage.py +++ b/multiscanner/storage/storage.py @@ -17,8 +17,9 @@ standard_library.install_aliases() -from multiscanner.config import CONFIG as MS_CONFIG from multiscanner.common import utils +from multiscanner.config import CONFIG as MS_CONFIG +from multiscanner.config import get_config_path, parse_config DEFAULTCONF = { @@ -97,7 +98,7 @@ def __init__(self, configfile=MS_CONFIG, config=None, configregen=False): # Read in config if configfile: - configfile = utils.get_config_path(MS_CONFIG, 'storage') + configfile = get_config_path(MS_CONFIG, 'storage') config_object = configparser.ConfigParser() config_object.optionxform = str # Regen the config if needed or wanted @@ -107,7 +108,7 @@ def __init__(self, configfile=MS_CONFIG, config=None, configregen=False): config_object.read(configfile) if config: - file_conf = utils.parse_config(config_object) + file_conf = parse_config(config_object) for key in config: if key not in file_conf: file_conf[key] = config[key] @@ -116,7 +117,7 @@ def __init__(self, configfile=MS_CONFIG, config=None, configregen=False): file_conf[key].update(config[key]) config = file_conf else: - config = utils.parse_config(config_object) + config = parse_config(config_object) else: if config is None: config = {} diff --git a/multiscanner/web/app.py b/multiscanner/web/app.py index 8af567eb..cc35a9a9 100644 --- a/multiscanner/web/app.py +++ b/multiscanner/web/app.py @@ -4,7 +4,7 @@ from multiscanner import CONFIG as MS_CONFIG from multiscanner import __version__ -from multiscanner.common import utils +from multiscanner.config import get_config_path, read_config DEFAULTCONF = { 'HOST': "localhost", @@ -29,8 +29,8 @@ app = Flask(__name__) # Finagle Flask to read config from .ini file instead of .py file -web_config_file = utils.get_config_path(MS_CONFIG, 'web') -web_config = utils.read_config(web_config_file, 'web', DEFAULTCONF) +web_config_file = get_config_path(MS_CONFIG, 'web') +web_config = read_config(web_config_file, 'web', DEFAULTCONF) conf_tuple = namedtuple('WebConfig', web_config.keys())(*web_config.values()) app.config.from_object(conf_tuple) From 882bf3d65f3f70a0f5ca53a14c413c5c28a5a30e Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Wed, 27 Feb 2019 09:43:49 -0500 Subject: [PATCH 06/38] Only parse main MultiScanner config once get_config_path() should work on an already-parsed config object, and not need to parse it again. --- multiscanner/__init__.py | 2 +- multiscanner/analytics/ssdeep_analytics.py | 3 +-- multiscanner/common/dir_monitor.py | 5 ++-- multiscanner/config.py | 27 +++++++++++-------- multiscanner/distributed/api.py | 6 ++--- multiscanner/distributed/celery_worker.py | 3 +-- multiscanner/modules/Antivirus/AVGScan.py | 4 +-- multiscanner/modules/Antivirus/MSEScan.py | 4 +-- multiscanner/modules/Antivirus/McAfeeScan.py | 4 +-- multiscanner/modules/Database/NSRL.py | 6 ++--- .../modules/MachineLearning/EndgameEmber.py | 4 +-- .../modules/Metadata/ExifToolsScan.py | 4 +-- multiscanner/modules/Metadata/TrID.py | 4 +-- multiscanner/modules/Signature/YaraScan.py | 4 +-- multiscanner/ms.py | 26 +++++++++--------- multiscanner/storage/sql_driver.py | 7 +++-- multiscanner/storage/storage.py | 3 +-- multiscanner/tests/test_multiscanner.py | 2 +- multiscanner/web/app.py | 3 +-- 19 files changed, 60 insertions(+), 61 deletions(-) diff --git a/multiscanner/__init__.py b/multiscanner/__init__.py index 004718ab..aeff1e77 100644 --- a/multiscanner/__init__.py +++ b/multiscanner/__init__.py @@ -4,7 +4,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. from .config import ( # noqa F401 - PY3, MS_WD, CONFIG, MODULESDIR, MODULESLIST, + CONFIG_FILE, MS_WD, MS_CONFIG, MODULESDIR, MODULESLIST, PY3, ) from .ms import ( # noqa F401 diff --git a/multiscanner/analytics/ssdeep_analytics.py b/multiscanner/analytics/ssdeep_analytics.py index ec68771e..1232214f 100644 --- a/multiscanner/analytics/ssdeep_analytics.py +++ b/multiscanner/analytics/ssdeep_analytics.py @@ -36,8 +36,7 @@ ssdeep = None -from multiscanner import CONFIG as MS_CONFIG -from multiscanner.config import get_config_path, read_config +from multiscanner.config import MS_CONFIG, get_config_path, read_config from multiscanner.storage import storage diff --git a/multiscanner/common/dir_monitor.py b/multiscanner/common/dir_monitor.py index 444c3f99..a1ddcb72 100755 --- a/multiscanner/common/dir_monitor.py +++ b/multiscanner/common/dir_monitor.py @@ -19,9 +19,8 @@ from watchdog.events import FileSystemEventHandler from watchdog.observers import Observer -from multiscanner import CONFIG as MS_CONFIG from multiscanner import multiscan, parse_reports -from multiscanner.config import get_config_path +from multiscanner.config import CONFIG_FILE, get_config_path from multiscanner.storage import storage logger = logging.getLogger(__name__) @@ -141,7 +140,7 @@ def _main(): def _parse_args(): parser = argparse.ArgumentParser(description='Monitor a directory and submit new files to MultiScanner') parser.add_argument("-c", "--config", help="The config file to use", required=False, - default=MS_CONFIG) + default=CONFIG_FILE) parser.add_argument("-s", "--seconds", help="The number of seconds to wait for additional files", required=False, default=120, type=int) parser.add_argument("-b", "--batch", help="The max number of files per batch", required=False, diff --git a/multiscanner/config.py b/multiscanner/config.py index 08d42aca..2a626db7 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -56,7 +56,7 @@ def determine_configuration_path(filepath): # The default config file -CONFIG = determine_configuration_path(None) +CONFIG_FILE = determine_configuration_path(None) def parse_config(config_object): @@ -76,19 +76,20 @@ def parse_config(config_object): return return_var -def get_config_path(config_file, component): - """Gets the location of the config file for the given multiscanner component - from the multiscanner config file +def get_config_path(config, component): + """Gets the location of the config file for the given MultiScanner component + from the MultiScanner config Components: storage api - web""" - conf = configparser.ConfigParser() - conf.read(config_file) - conf = parse_config(conf) + web + + config - dictionary or ConfigParser object containing MultiScanner config + component - component to get the path for + """ try: - return conf['main']['%s-config' % component] + return config['main']['%s-config' % component] except KeyError: logger.error( "Couldn't find '{}-config' value in 'main' section " @@ -134,6 +135,10 @@ def read_config(config_file, section_name=None, default_config=None): return parse_config(config_object) +# Main MultiScanner config, as a dictionary +MS_CONFIG = read_config(CONFIG_FILE) + + def get_enabled_modules(): """Returns a dictionary with module names as keys, with boolean values denoting whether or not they are enabled in the config. @@ -142,11 +147,11 @@ def get_enabled_modules(): filenames = [os.path.splitext(os.path.basename(f)) for f in files] module_names = [m[0] for m in filenames if m[1] == '.py'] - global CONFIG + global MS_CONFIG modules = {} for module in module_names: try: - modules[module] = CONFIG[module]['ENABLED'] + modules[module] = MS_CONFIG[module]['ENABLED'] except KeyError as e: logger.debug(e) return modules diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index 1ea136d7..b6403da4 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -60,7 +60,7 @@ from flask_cors import CORS from jinja2 import Markup -from multiscanner import MODULESLIST, MS_WD, multiscan, parse_reports, CONFIG as MS_CONFIG +from multiscanner import CONFIG_FILE, MODULESLIST, MS_CONFIG, MS_WD, multiscan, parse_reports from multiscanner.common import pdf_generator, stix2_generator from multiscanner.config import PY3, get_config_path, read_config from multiscanner.storage import StorageHandler @@ -199,7 +199,7 @@ def multiscanner_process(work_queue, exit_signal): filelist = [item[0] for item in metadata_list] # modulelist = [item[5] for item in metadata_list] resultlist = multiscan( - filelist, configfile=MS_CONFIG + filelist, configfile=CONFIG_FILE # module_list ) results = parse_reports(resultlist, python=True) @@ -991,7 +991,7 @@ def get_pdf_report(task_id): if not success: return jsonify(report_dict) - pdf = pdf_generator.create_pdf_document(MS_CONFIG, report_dict) + pdf = pdf_generator.create_pdf_document(CONFIG_FILE, report_dict) response = make_response(pdf) response.headers['Content-Type'] = 'application/pdf' response.headers['Content-Disposition'] = 'attachment; filename=%s.pdf' % task_id diff --git a/multiscanner/distributed/celery_worker.py b/multiscanner/distributed/celery_worker.py index ce1d0245..093f124e 100644 --- a/multiscanner/distributed/celery_worker.py +++ b/multiscanner/distributed/celery_worker.py @@ -13,8 +13,7 @@ from celery.utils.log import get_task_logger from multiscanner import multiscan, parse_reports -from multiscanner.config import CONFIG as MS_CONFIG -from multiscanner.config import get_config_path, parse_config, read_config +from multiscanner.config import MS_CONFIG, get_config_path, parse_config, read_config from multiscanner.storage import elasticsearch_storage, storage from multiscanner.storage import sql_driver as database from multiscanner.analytics.ssdeep_analytics import SSDeepAnalytic diff --git a/multiscanner/modules/Antivirus/AVGScan.py b/multiscanner/modules/Antivirus/AVGScan.py index c0193a1c..90d18c6b 100644 --- a/multiscanner/modules/Antivirus/AVGScan.py +++ b/multiscanner/modules/Antivirus/AVGScan.py @@ -7,7 +7,7 @@ import subprocess import re -from multiscanner.config import CONFIG +from multiscanner.config import CONFIG_FILE from multiscanner.common.utils import list2cmdline, sshexec, SSH subprocess.list2cmdline = list2cmdline @@ -21,7 +21,7 @@ # Hostname, port, username HOST = ("MultiScanner", 22, "User") # SSH Key -KEY = os.path.join(os.path.split(CONFIG)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" DEFAULTCONF = { diff --git a/multiscanner/modules/Antivirus/MSEScan.py b/multiscanner/modules/Antivirus/MSEScan.py index 145edcc9..2187070b 100644 --- a/multiscanner/modules/Antivirus/MSEScan.py +++ b/multiscanner/modules/Antivirus/MSEScan.py @@ -6,7 +6,7 @@ import os import subprocess -from multiscanner.config import CONFIG +from multiscanner.config import CONFIG_FILE from multiscanner.common.utils import list2cmdline, sshconnect, SSH subprocess.list2cmdline = list2cmdline @@ -18,7 +18,7 @@ NAME = "Microsoft Security Essentials" # These are overwritten by the config file # SSH Key -KEY = os.path.join(os.path.split(CONFIG)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" HOST = ("MultiScanner", 22, "User") diff --git a/multiscanner/modules/Antivirus/McAfeeScan.py b/multiscanner/modules/Antivirus/McAfeeScan.py index a36963f8..ddc85a79 100644 --- a/multiscanner/modules/Antivirus/McAfeeScan.py +++ b/multiscanner/modules/Antivirus/McAfeeScan.py @@ -7,7 +7,7 @@ import subprocess import re -from multiscanner.config import CONFIG +from multiscanner.config import CONFIG_FILE from multiscanner.common.utils import list2cmdline, sshexec, SSH subprocess.list2cmdline = list2cmdline @@ -19,7 +19,7 @@ NAME = "McAfee" # These are overwritten by the config file # SSH Key -KEY = os.path.join(os.path.split(CONFIG)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" HOST = ("MultiScanner", 22, "User") diff --git a/multiscanner/modules/Database/NSRL.py b/multiscanner/modules/Database/NSRL.py index f3042429..4e244472 100755 --- a/multiscanner/modules/Database/NSRL.py +++ b/multiscanner/modules/Database/NSRL.py @@ -8,7 +8,7 @@ import os import struct -from multiscanner.config import CONFIG +from multiscanner.config import CONFIG_FILE __author__ = "Drew Bonasera" __license__ = "MPL 2.0" @@ -19,8 +19,8 @@ REQUIRES = ["SHA1", "MD5"] DEFAULTCONF = { - 'hash_list': os.path.join(os.path.split(CONFIG)[0], 'etc', 'nsrl', 'hash_list'), - 'offsets': os.path.join(os.path.split(CONFIG)[0], 'etc', 'nsrl', 'offsets'), + 'hash_list': os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'nsrl', 'hash_list'), + 'offsets': os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'nsrl', 'offsets'), 'ENABLED': True } diff --git a/multiscanner/modules/MachineLearning/EndgameEmber.py b/multiscanner/modules/MachineLearning/EndgameEmber.py index 554cd316..48b8aaba 100644 --- a/multiscanner/modules/MachineLearning/EndgameEmber.py +++ b/multiscanner/modules/MachineLearning/EndgameEmber.py @@ -19,7 +19,7 @@ import os from pathlib import Path -from multiscanner import CONFIG +from multiscanner import CONFIG_FILE __authors__ = "Patrick Copeland" @@ -30,7 +30,7 @@ REQUIRES = ['libmagic'] DEFAULTCONF = { 'ENABLED': False, - 'path-to-model': os.path.join(os.path.split(CONFIG)[0], 'etc', 'ember', 'ember_model_2017.txt'), + 'path-to-model': os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'ember', 'ember_model_2017.txt'), } LGBM_MODEL = None diff --git a/multiscanner/modules/Metadata/ExifToolsScan.py b/multiscanner/modules/Metadata/ExifToolsScan.py index e6da3f18..acf59e0c 100644 --- a/multiscanner/modules/Metadata/ExifToolsScan.py +++ b/multiscanner/modules/Metadata/ExifToolsScan.py @@ -8,7 +8,7 @@ import subprocess import re -from multiscanner.config import CONFIG +from multiscanner.config import CONFIG_FILE from multiscanner.common.utils import list2cmdline, sshexec, SSH subprocess.list2cmdline = list2cmdline @@ -20,7 +20,7 @@ NAME = "ExifTool" # These are overwritten by the config file HOST = ("MultiScanner", 22, "User") -KEY = os.path.join(os.path.split(CONFIG)[0], "etc", "id_rsa") +KEY = os.path.join(os.path.split(CONFIG_FILE)[0], "etc", "id_rsa") PATHREPLACE = "X:\\" # Entries to be removed from the final results REMOVEENTRY = ["ExifTool Version Number", "File Name", "Directory", "File Modification Date/Time", diff --git a/multiscanner/modules/Metadata/TrID.py b/multiscanner/modules/Metadata/TrID.py index 62a2b8c3..ac3197b9 100644 --- a/multiscanner/modules/Metadata/TrID.py +++ b/multiscanner/modules/Metadata/TrID.py @@ -8,7 +8,7 @@ import subprocess import re -from multiscanner.config import CONFIG +from multiscanner.config import CONFIG_FILE from multiscanner.common.utils import list2cmdline, sshexec, SSH logger = logging.getLogger(__name__) @@ -24,7 +24,7 @@ # Hostname, port, username HOST = ("MultiScanner", 22, "User") # SSH Key -KEY = os.path.join(os.path.split(CONFIG)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" DEFAULTCONF = { diff --git a/multiscanner/modules/Signature/YaraScan.py b/multiscanner/modules/Signature/YaraScan.py index 49d0d77e..a657c764 100644 --- a/multiscanner/modules/Signature/YaraScan.py +++ b/multiscanner/modules/Signature/YaraScan.py @@ -7,7 +7,7 @@ import os import time -from multiscanner.config import CONFIG +from multiscanner.config import CONFIG_FILE from multiscanner.common.utils import parse_dir @@ -17,7 +17,7 @@ TYPE = "Signature" NAME = "Yara" DEFAULTCONF = { - "ruledir": os.path.join(os.path.split(CONFIG)[0], "etc", "yarasigs"), + "ruledir": os.path.join(os.path.split(CONFIG_FILE)[0], "etc", "yarasigs"), "fileextensions": [".yar", ".yara", ".sig"], "ignore-tags": ["TLPRED"], "includes": False, diff --git a/multiscanner/ms.py b/multiscanner/ms.py index ebf213db..a97ba79b 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -30,7 +30,7 @@ from multiscanner.version import __version__ as MS_VERSION from multiscanner.common.utils import (basename, convert_encoding, load_module, parse_dir, parse_file_list, queue2list) -from multiscanner.config import PY3, CONFIG, MODULESDIR, determine_configuration_path, parse_config +from multiscanner.config import CONFIG_FILE, MODULESDIR, PY3, determine_configuration_path, parse_config from multiscanner.storage import storage @@ -38,9 +38,9 @@ DEFAULTCONF = { "copyfilesto": False, "group-types": ["Antivirus"], - "storage-config": CONFIG.replace('config.ini', 'storage.ini'), - "api-config": CONFIG.replace('config.ini', 'api_config.ini'), - "web-config": CONFIG.replace('config.ini', 'web_config.ini'), + "storage-config": CONFIG_FILE.replace('config.ini', 'storage.ini'), + "api-config": CONFIG_FILE.replace('config.ini', 'api_config.ini'), + "web-config": CONFIG_FILE.replace('config.ini', 'web_config.ini'), } logger = logging.getLogger(__name__) @@ -264,7 +264,7 @@ def _update_DEFAULTCONF(defaultconf, filepath): defaultconf['offsets'] = os.path.join(os.path.split(filepath)[0], 'etc', 'nsrl', 'offsets') -def _get_main_config(config_object, filepath=CONFIG): +def _get_main_config(config_object, filepath=CONFIG_FILE): """ Reads in config for main script. It will write defaults if not present. Returns dictionary. @@ -286,7 +286,7 @@ def _get_main_config(config_object, filepath=CONFIG): with codecs.open(filepath, 'w', 'utf-8') as f: config_object.write(f) - # Read in main config + # Return main config as a dictionary return parse_config(config_object)['main'] @@ -383,7 +383,7 @@ def _start_module_threads(filelist, module_list, config, global_module_interface return ThreadList -def _write_missing_module_configs(module_list, config, filepath=CONFIG): +def _write_missing_module_configs(module_list, config, filepath=CONFIG_FILE): """ Write in default config for modules not in config file. Returns True if config was written, False if not. @@ -425,7 +425,7 @@ def _write_missing_module_configs(module_list, config, filepath=CONFIG): return False -def _rewrite_config(module_list, config, filepath=CONFIG): +def _rewrite_config(module_list, config, filepath=CONFIG_FILE): """ Write in default config for all modules. @@ -528,7 +528,7 @@ def parse_reports(resultlist, groups=None, ugly=True, includeMetadata=False, pyt return json.dumps(finaldata, sort_keys=True, separators=(',', ':'), ensure_ascii=False) -def multiscan(Files, recursive=False, configregen=False, configfile=CONFIG, config=None, module_list=None): +def multiscan(Files, recursive=False, configregen=False, configfile=CONFIG_FILE, config=None, module_list=None): """ The meat and potatoes. Returns the list of module results @@ -899,16 +899,16 @@ def _init(args): def _main(): - global CONFIG + global CONFIG_FILE # Get args args = _parse_args() # Set config or update locations if args.config is None: - args.config = CONFIG + args.config = CONFIG_FILE else: - CONFIG = args.config - _update_DEFAULTCONF(DEFAULTCONF, CONFIG) + CONFIG_FILE = args.config + _update_DEFAULTCONF(DEFAULTCONF, CONFIG_FILE) # Send all logs to stderr and set verbose if args.debug or args.verbose > 1: diff --git a/multiscanner/storage/sql_driver.py b/multiscanner/storage/sql_driver.py index 5bd5be27..44057966 100644 --- a/multiscanner/storage/sql_driver.py +++ b/multiscanner/storage/sql_driver.py @@ -17,10 +17,9 @@ from sqlalchemy.orm import aliased, sessionmaker from sqlalchemy_utils import create_database, database_exists -from multiscanner import CONFIG +from multiscanner.config import MS_CONFIG, get_config_path - -CONFIG_FILE = os.path.join(os.path.split(CONFIG)[0], "api_config.ini") +CONFIG_FILE = get_config_path(MS_CONFIG, 'api') Base = declarative_base() Session = sessionmaker() @@ -119,7 +118,7 @@ def init_db(self): db_name = self.config['db_name'] if db_type == 'sqlite': # we can ignore host, username, password, etc - sql_lite_db_path = os.path.join(os.path.split(CONFIG)[0], db_name) + sql_lite_db_path = os.path.join(os.path.split(CONFIG_FILE)[0], db_name) self.db_connection_string = 'sqlite:///{}'.format(sql_lite_db_path) else: username = self.config['username'] diff --git a/multiscanner/storage/storage.py b/multiscanner/storage/storage.py index 09646225..994898b6 100644 --- a/multiscanner/storage/storage.py +++ b/multiscanner/storage/storage.py @@ -18,8 +18,7 @@ from multiscanner.common import utils -from multiscanner.config import CONFIG as MS_CONFIG -from multiscanner.config import get_config_path, parse_config +from multiscanner.config import MS_CONFIG, get_config_path, parse_config DEFAULTCONF = { diff --git a/multiscanner/tests/test_multiscanner.py b/multiscanner/tests/test_multiscanner.py index 047a70fa..5703b6a8 100644 --- a/multiscanner/tests/test_multiscanner.py +++ b/multiscanner/tests/test_multiscanner.py @@ -15,7 +15,7 @@ def setup_class(cls): cls.real_mod_dir = multiscanner.MODULESDIR multiscanner.MODULEDIR = os.path.join(CWD, "modules") cls.filelist = utils.parse_dir(os.path.join(CWD, 'files')) - multiscanner.CONFIG = '.tmpfile.ini' + multiscanner.CONFIG_FILE = '.tmpfile.ini' @classmethod def teardown_class(cls): diff --git a/multiscanner/web/app.py b/multiscanner/web/app.py index cc35a9a9..b31ce859 100644 --- a/multiscanner/web/app.py +++ b/multiscanner/web/app.py @@ -2,9 +2,8 @@ from flask import Flask, render_template, request import re -from multiscanner import CONFIG as MS_CONFIG from multiscanner import __version__ -from multiscanner.config import get_config_path, read_config +from multiscanner.config import MS_CONFIG, get_config_path, read_config DEFAULTCONF = { 'HOST': "localhost", From ec1fdeb91c076e49acfc40300f24965d61f78cf5 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Wed, 27 Feb 2019 10:16:43 -0500 Subject: [PATCH 07/38] Streamline get_config_path() usage It will default to using the main MultiScanner config, but a different config can be passed in. The component parameter must be provided. This is a backwards-incompatible change as the order of the parameters has switched. --- multiscanner/analytics/ssdeep_analytics.py | 4 +- multiscanner/common/dir_monitor.py | 2 +- multiscanner/config.py | 46 +++++++++---------- multiscanner/distributed/api.py | 4 +- multiscanner/distributed/celery_worker.py | 6 +-- .../distributed/distributed_worker.py | 2 +- multiscanner/storage/sql_driver.py | 4 +- multiscanner/storage/storage.py | 2 +- multiscanner/web/app.py | 4 +- 9 files changed, 37 insertions(+), 37 deletions(-) diff --git a/multiscanner/analytics/ssdeep_analytics.py b/multiscanner/analytics/ssdeep_analytics.py index 1232214f..ad5f95d1 100644 --- a/multiscanner/analytics/ssdeep_analytics.py +++ b/multiscanner/analytics/ssdeep_analytics.py @@ -36,14 +36,14 @@ ssdeep = None -from multiscanner.config import MS_CONFIG, get_config_path, read_config +from multiscanner.config import get_config_path, read_config from multiscanner.storage import storage class SSDeepAnalytic: def __init__(self, debug=False): - storage_conf = get_config_path(MS_CONFIG, 'storage') + storage_conf = get_config_path('storage') conf = read_config(storage_conf) storage_handler = storage.StorageHandler(configfile=storage_conf) es_handler = storage_handler.load_required_module('ElasticSearchStorage') diff --git a/multiscanner/common/dir_monitor.py b/multiscanner/common/dir_monitor.py index a1ddcb72..0f8b9d68 100755 --- a/multiscanner/common/dir_monitor.py +++ b/multiscanner/common/dir_monitor.py @@ -80,7 +80,7 @@ def start_observer(directory, work_queue, recursive=False): def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, exit_signal): filelist = [] time_stamp = None - storage_conf = get_config_path(config, 'storage') + storage_conf = get_config_path('storage', config) storage_handler = storage.StorageHandler(configfile=storage_conf) while not exit_signal.value: time.sleep(1) diff --git a/multiscanner/config.py b/multiscanner/config.py index 2a626db7..59023f25 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -76,29 +76,6 @@ def parse_config(config_object): return return_var -def get_config_path(config, component): - """Gets the location of the config file for the given MultiScanner component - from the MultiScanner config - - Components: - storage - api - web - - config - dictionary or ConfigParser object containing MultiScanner config - component - component to get the path for - """ - try: - return config['main']['%s-config' % component] - except KeyError: - logger.error( - "Couldn't find '{}-config' value in 'main' section " - "of config file. Have you run 'python multiscanner.py init'?" - .format(component) - ) - sys.exit() - - def write_config(config_object, config_file, section_name, default_config): """Write the default configuration to the given config file @@ -139,6 +116,29 @@ def read_config(config_file, section_name=None, default_config=None): MS_CONFIG = read_config(CONFIG_FILE) +def get_config_path(component, config=MS_CONFIG): + """Gets the location of the config file for the given MultiScanner component + from the MultiScanner config + + Components: + storage + api + web + + component - component to get the path for + config - dictionary or ConfigParser object containing MultiScanner config + """ + try: + return config['main']['%s-config' % component] + except KeyError: + logger.error( + "Couldn't find '{}-config' value in 'main' section " + "of config file. Have you run 'python multiscanner.py init'?" + .format(component) + ) + sys.exit() + + def get_enabled_modules(): """Returns a dictionary with module names as keys, with boolean values denoting whether or not they are enabled in the config. diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index b6403da4..851d524d 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -104,7 +104,7 @@ def default(self, obj): app = Flask(__name__) app.json_encoder = CustomJSONEncoder -api_config_file = get_config_path(MS_CONFIG, 'api') +api_config_file = get_config_path('api') api_config = read_config(api_config_file, 'api', DEFAULTCONF) # TODO: fix this mess @@ -143,7 +143,7 @@ def default(self, obj): logger.error("Retrying...") time.sleep(db_sleep_time) -storage_conf = get_config_path(MS_CONFIG, 'storage') +storage_conf = get_config_path('storage') storage_handler = StorageHandler(configfile=storage_conf) handler = storage_handler.load_required_module('ElasticSearchStorage') diff --git a/multiscanner/distributed/celery_worker.py b/multiscanner/distributed/celery_worker.py index 093f124e..4e3bfdef 100644 --- a/multiscanner/distributed/celery_worker.py +++ b/multiscanner/distributed/celery_worker.py @@ -32,13 +32,13 @@ 'tz': 'US/Eastern', } -configfile = get_config_path(MS_CONFIG, 'api') +configfile = get_config_path('api') config = read_config(configfile, 'celery', DEFAULTCONF) api_config = config.get('api') worker_config = config.get('celery') db_config = config.get('Database') -storage_configfile = get_config_path(MS_CONFIG, 'storage') +storage_configfile = get_config_path('storage') storage_config = read_config(storage_configfile) es_storage_config = storage_config.get('ElasticSearchStorage') @@ -116,7 +116,7 @@ def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, logger.info('\n\n{}{}Got file: {}.\nOriginal filename: {}.\n'.format('=' * 48, '\n', file_hash, original_filename)) # Get the storage config - storage_conf = get_config_path(config, 'storage') + storage_conf = get_config_path('storage', config) storage_handler = storage.StorageHandler(configfile=storage_conf) resultlist = multiscan( diff --git a/multiscanner/distributed/distributed_worker.py b/multiscanner/distributed/distributed_worker.py index 5c13b130..afa86ed1 100755 --- a/multiscanner/distributed/distributed_worker.py +++ b/multiscanner/distributed/distributed_worker.py @@ -31,7 +31,7 @@ def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, exit_signal): filelist = [] time_stamp = None - storage_conf = get_config_path(config, 'storage') + storage_conf = get_config_path('storage', config) storage_handler = storage.StorageHandler(configfile=storage_conf) while not exit_signal.value: time.sleep(1) diff --git a/multiscanner/storage/sql_driver.py b/multiscanner/storage/sql_driver.py index 44057966..1e22fe0e 100644 --- a/multiscanner/storage/sql_driver.py +++ b/multiscanner/storage/sql_driver.py @@ -17,9 +17,9 @@ from sqlalchemy.orm import aliased, sessionmaker from sqlalchemy_utils import create_database, database_exists -from multiscanner.config import MS_CONFIG, get_config_path +from multiscanner.config import get_config_path -CONFIG_FILE = get_config_path(MS_CONFIG, 'api') +CONFIG_FILE = get_config_path('api') Base = declarative_base() Session = sessionmaker() diff --git a/multiscanner/storage/storage.py b/multiscanner/storage/storage.py index 994898b6..67df5938 100644 --- a/multiscanner/storage/storage.py +++ b/multiscanner/storage/storage.py @@ -97,7 +97,7 @@ def __init__(self, configfile=MS_CONFIG, config=None, configregen=False): # Read in config if configfile: - configfile = get_config_path(MS_CONFIG, 'storage') + configfile = get_config_path('storage') config_object = configparser.ConfigParser() config_object.optionxform = str # Regen the config if needed or wanted diff --git a/multiscanner/web/app.py b/multiscanner/web/app.py index b31ce859..3f50868b 100644 --- a/multiscanner/web/app.py +++ b/multiscanner/web/app.py @@ -3,7 +3,7 @@ import re from multiscanner import __version__ -from multiscanner.config import MS_CONFIG, get_config_path, read_config +from multiscanner.config import get_config_path, read_config DEFAULTCONF = { 'HOST': "localhost", @@ -28,7 +28,7 @@ app = Flask(__name__) # Finagle Flask to read config from .ini file instead of .py file -web_config_file = get_config_path(MS_CONFIG, 'web') +web_config_file = get_config_path('web') web_config = read_config(web_config_file, 'web', DEFAULTCONF) conf_tuple = namedtuple('WebConfig', web_config.keys())(*web_config.values()) app.config.from_object(conf_tuple) From 0e31c033c42cd3399b575532cc272f9644d6dc20 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Thu, 28 Feb 2019 11:57:53 -0500 Subject: [PATCH 08/38] Cleanup code --- multiscanner/distributed/celery_worker.py | 18 ++++++------------ multiscanner/ms.py | 8 ++++---- multiscanner/tests/test_api.py | 2 +- multiscanner/tests/test_celery_worker.py | 7 +------ 4 files changed, 12 insertions(+), 23 deletions(-) diff --git a/multiscanner/distributed/celery_worker.py b/multiscanner/distributed/celery_worker.py index 4e3bfdef..a809bab8 100644 --- a/multiscanner/distributed/celery_worker.py +++ b/multiscanner/distributed/celery_worker.py @@ -4,7 +4,6 @@ from the utils/ directory. ''' -import configparser from datetime import datetime from socket import gethostname @@ -13,7 +12,7 @@ from celery.utils.log import get_task_logger from multiscanner import multiscan, parse_reports -from multiscanner.config import MS_CONFIG, get_config_path, parse_config, read_config +from multiscanner.config import MS_CONFIG, get_config_path, read_config from multiscanner.storage import elasticsearch_storage, storage from multiscanner.storage import sql_driver as database from multiscanner.analytics.ssdeep_analytics import SSDeepAnalytic @@ -34,7 +33,6 @@ configfile = get_config_path('api') config = read_config(configfile, 'celery', DEFAULTCONF) -api_config = config.get('api') worker_config = config.get('celery') db_config = config.get('Database') @@ -130,26 +128,22 @@ def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, # Get the Scan Config that the task was run with and # add it to the task metadata - scan_config_object = configparser.ConfigParser() - scan_config_object.optionxform = str - scan_config_object.read(config) - full_conf = parse_config(scan_config_object) sub_conf = {} - # Count number of modules enabled out of total possible + # Count number of modules enabled out of total possible (-1 for main) # and add it to the Scan Metadata total_enabled = 0 - total_modules = len(full_conf.keys()) + total_modules = len(config.keys()) - 1 # Get the count of modules enabled from the module_list # if it exists, else count via the config if module_list: total_enabled = len(module_list) else: - for key in full_conf: + for key in config: if key == 'main': continue sub_conf[key] = {} - sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED'] + sub_conf[key]['ENABLED'] = config[key]['ENABLED'] if sub_conf[key]['ENABLED'] is True: total_enabled += 1 @@ -204,7 +198,7 @@ def ssdeep_compare_celery(): @app.task() -def metricbeat_rollover(days, config=MS_CONFIG): +def metricbeat_rollover(days): ''' Clean up old Elastic Beats indices ''' diff --git a/multiscanner/ms.py b/multiscanner/ms.py index a97ba79b..939b61bd 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -545,6 +545,10 @@ def multiscan(Files, recursive=False, configregen=False, configfile=CONFIG_FILE, filelist = parse_file_list(Files, recursive=recursive) else: filelist = Files + # If none of the files existed, why continue? + if not filelist: + raise ValueError("No valid files") + # A list of files in the module dir if module_list is None: module_list = parse_dir(MODULESDIR, recursive=True, exclude=["__init__"]) @@ -584,10 +588,6 @@ def multiscan(Files, recursive=False, configregen=False, configfile=CONFIG_FILE, else: main_config = DEFAULTCONF - # If none of the files existed - if not filelist: - raise ValueError("No valid files") - # Copy files to a share if configured if "copyfilesto" not in main_config: main_config["copyfilesto"] = False diff --git a/multiscanner/tests/test_api.py b/multiscanner/tests/test_api.py index a1a2fd5b..6dd69a51 100644 --- a/multiscanner/tests/test_api.py +++ b/multiscanner/tests/test_api.py @@ -43,7 +43,7 @@ def post_file(app): data={'file': (BytesIO(b'my file contents'), 'hello world.txt'), }) -def mock_delay(full_path, original_filename, task_id, f_name, metadata, config): +def mock_delay(*args, **kwargs): pass diff --git a/multiscanner/tests/test_celery_worker.py b/multiscanner/tests/test_celery_worker.py index 049ced41..2d6b6cce 100644 --- a/multiscanner/tests/test_celery_worker.py +++ b/multiscanner/tests/test_celery_worker.py @@ -45,7 +45,7 @@ with open(TEST_FULL_PATH, 'r') as f: TEST_FILE_HASH = hashlib.sha256(f.read().encode('utf-8')).hexdigest() TEST_METADATA = {} -TEST_CONFIG = multiscanner.CONFIG +TEST_CONFIG = multiscanner.MS_CONFIG TEST_REPORT = { 'MD5': '96b47da202ddba8d7a6b91fecbf89a41', @@ -61,10 +61,6 @@ def post_file(app): data={'file': (BytesIO(b'my file contents'), 'hello world.txt'), }) -# def mock_delay(file_, original_filename, task_id, f_name, metadata, config): -# return TEST_REPORT - - class CeleryTestCase(unittest.TestCase): def setUp(self): self.sql_db = Database(config=DB_CONF) @@ -80,7 +76,6 @@ def tearDown(self): class TestCeleryCase(CeleryTestCase): def setUp(self): super(self.__class__, self).setUp() - # api.multiscanner_celery.delay = mock_delay def test_base(self): self.assertEqual(True, True) From 328d58abb573bed62c6a69c7345361a189d37705 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Thu, 28 Feb 2019 14:36:44 -0500 Subject: [PATCH 09/38] Remove configregen; use `multiscanner init` instead They do the same thing, and it makes more sense to handle this someplace other than the main scanning function. --- multiscanner/config.py | 8 ++++---- multiscanner/ms.py | 7 +------ multiscanner/storage/storage.py | 10 +++------- multiscanner/tests/test_multiscanner.py | 2 +- 4 files changed, 9 insertions(+), 18 deletions(-) diff --git a/multiscanner/config.py b/multiscanner/config.py index 59023f25..4721cbca 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -84,12 +84,12 @@ def write_config(config_object, config_file, section_name, default_config): section_name - the name of the section of defaults to be added default_config - values to set this configuration to """ - config_object.add_section(section_name) + if section_name not in config_object.sections(): + config_object.add_section(section_name) for key in default_config: config_object.set(section_name, key, str(default_config[key])) - conffile = codecs.open(config_file, 'w', 'utf-8') - config_object.write(conffile) - conffile.close() + with codecs.open(config_file, 'w', 'utf-8') as conffile: + config_object.write(conffile) def read_config(config_file, section_name=None, default_config=None): diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 939b61bd..3df82c48 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -528,13 +528,12 @@ def parse_reports(resultlist, groups=None, ugly=True, includeMetadata=False, pyt return json.dumps(finaldata, sort_keys=True, separators=(',', ':'), ensure_ascii=False) -def multiscan(Files, recursive=False, configregen=False, configfile=CONFIG_FILE, config=None, module_list=None): +def multiscan(Files, recursive=False, configfile=CONFIG_FILE, config=None, module_list=None): """ The meat and potatoes. Returns the list of module results Files - A list of files and dirs to be scanned recursive - If true it will search the dirs in Files recursively - configregen - If True a new config file will be created overwriting the old configfile - What config file to use. Can be None. config - A dictionary containing the configuration options to be used. module_list - A list of file paths to be used as modules. Each string should end in .py @@ -561,10 +560,6 @@ def multiscan(Files, recursive=False, configregen=False, configfile=CONFIG_FILE, if configfile: config_object = configparser.ConfigParser() config_object.optionxform = str - # Regen the config if needed or wanted - if configregen or not os.path.isfile(configfile): - _rewrite_config(module_list, config_object, filepath=configfile) - config_object.read(configfile) main_config = _get_main_config(config_object, filepath=configfile) if config: diff --git a/multiscanner/storage/storage.py b/multiscanner/storage/storage.py index 67df5938..25789cd4 100644 --- a/multiscanner/storage/storage.py +++ b/multiscanner/storage/storage.py @@ -89,7 +89,7 @@ def teardown(self): class StorageHandler(object): - def __init__(self, configfile=MS_CONFIG, config=None, configregen=False): + def __init__(self, configfile=MS_CONFIG, config=None): self.storage_lock = threading.Lock() self.storage_counter = ThreadCounter() # Load all storage classes @@ -100,11 +100,6 @@ def __init__(self, configfile=MS_CONFIG, config=None, configregen=False): configfile = get_config_path('storage') config_object = configparser.ConfigParser() config_object.optionxform = str - # Regen the config if needed or wanted - if configregen or not os.path.isfile(configfile): - _write_main_config(config_object) - _rewrite_config(storage_classes, config_object, configfile) - config_object.read(configfile) if config: file_conf = parse_config(config_object) @@ -279,8 +274,9 @@ def config_init(filepath, overwrite=False, storage_classes=None): def _write_main_config(config_object): + """Write default config for storage config's [main] section + """ if not config_object.has_section('main'): - # Write default config config_object.add_section('main') for key in DEFAULTCONF: config_object.set('main', key, str(DEFAULTCONF[key])) diff --git a/multiscanner/tests/test_multiscanner.py b/multiscanner/tests/test_multiscanner.py index 5703b6a8..dc1061a5 100644 --- a/multiscanner/tests/test_multiscanner.py +++ b/multiscanner/tests/test_multiscanner.py @@ -25,7 +25,7 @@ def teardown_class(cls): class Test_multiscan(_runmulti_tests): def setup(self): self.result = multiscanner.multiscan( - self.filelist, recursive=False, configregen=False, configfile='.tmpfile.ini') + self.filelist, recursive=False, configfile='.tmpfile.ini') self.report = multiscanner.parse_reports(self.result, includeMetadata=False, python=True) self.report_m = multiscanner.parse_reports(self.result, includeMetadata=True, python=True) From 322a7ba1d646d6da5d06b41622ad1a59cd4778de Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Fri, 1 Mar 2019 12:39:37 -0500 Subject: [PATCH 10/38] Switch multiscan() to not use config filename It now just uses the config parameter. Config file handling and making sure missing sections or values has been relegated to main() in ms.py. We shouldn't need to rewrite config files every time we do a scan, just once when start MultiScanner. --- multiscanner/__init__.py | 1 + multiscanner/config.py | 17 ++- multiscanner/distributed/celery_worker.py | 2 +- multiscanner/ms.py | 110 ++++++-------------- multiscanner/tests/test_configs.py | 10 +- multiscanner/tests/test_module_interface.py | 2 +- multiscanner/tests/test_multiscanner.py | 6 +- 7 files changed, 58 insertions(+), 90 deletions(-) diff --git a/multiscanner/__init__.py b/multiscanner/__init__.py index aeff1e77..4255fca4 100644 --- a/multiscanner/__init__.py +++ b/multiscanner/__init__.py @@ -5,6 +5,7 @@ from .config import ( # noqa F401 CONFIG_FILE, MS_WD, MS_CONFIG, MODULESDIR, MODULESLIST, PY3, + update_ms_config ) from .ms import ( # noqa F401 diff --git a/multiscanner/config.py b/multiscanner/config.py index 4721cbca..8e0b6ce8 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -139,7 +139,7 @@ def get_config_path(component, config=MS_CONFIG): sys.exit() -def get_enabled_modules(): +def get_modules(): """Returns a dictionary with module names as keys, with boolean values denoting whether or not they are enabled in the config. """ @@ -154,8 +154,19 @@ def get_enabled_modules(): modules[module] = MS_CONFIG[module]['ENABLED'] except KeyError as e: logger.debug(e) + modules[module] = False return modules -# The list of enabled modules -MODULESLIST = get_enabled_modules() +# The dictionary of modules and whether they're enabled or not +MODULESLIST = get_modules() + + +def update_ms_config(config_file): + """Update config globals to a different file than the default. + + config_file - the file to be assigned to CONFIG_FILE and read into MS_CONFIG + """ + global CONFIG_FILE, MS_CONFIG + CONFIG_FILE = config_file + MS_CONFIG = read_config(CONFIG_FILE) diff --git a/multiscanner/distributed/celery_worker.py b/multiscanner/distributed/celery_worker.py index a809bab8..b0b7fcce 100644 --- a/multiscanner/distributed/celery_worker.py +++ b/multiscanner/distributed/celery_worker.py @@ -119,7 +119,7 @@ def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, resultlist = multiscan( [file_], - configfile=config, + config=config, module_list=module_list ) results = parse_reports(resultlist, python=True) diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 3df82c48..6d116c6a 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -30,7 +30,9 @@ from multiscanner.version import __version__ as MS_VERSION from multiscanner.common.utils import (basename, convert_encoding, load_module, parse_dir, parse_file_list, queue2list) -from multiscanner.config import CONFIG_FILE, MODULESDIR, PY3, determine_configuration_path, parse_config +from multiscanner.config import (CONFIG_FILE, MODULESDIR, MODULESLIST, + MS_CONFIG, PY3, determine_configuration_path, + get_config_path, update_ms_config) from multiscanner.storage import storage @@ -264,32 +266,6 @@ def _update_DEFAULTCONF(defaultconf, filepath): defaultconf['offsets'] = os.path.join(os.path.split(filepath)[0], 'etc', 'nsrl', 'offsets') -def _get_main_config(config_object, filepath=CONFIG_FILE): - """ - Reads in config for main script. It will write defaults if not present. - Returns dictionary. - - Config - The config object - filepath - The path to the config file - """ - filepath = determine_configuration_path(filepath) - # Write main defaults if needed - ConfNeedsWrite = False - if 'main' not in config_object.sections(): - ConfNeedsWrite = True - _update_DEFAULTCONF(DEFAULTCONF, filepath) - config_object.add_section('main') - for key in DEFAULTCONF: - config_object.set('main', key, str(DEFAULTCONF[key])) - - if ConfNeedsWrite: - with codecs.open(filepath, 'w', 'utf-8') as f: - config_object.write(f) - - # Return main config as a dictionary - return parse_config(config_object)['main'] - - def _copy_to_share(filelist, filedic, sharedir): """ Copies files from filelist to a share and populates the filedic. Returns a @@ -387,6 +363,8 @@ def _write_missing_module_configs(module_list, config, filepath=CONFIG_FILE): """ Write in default config for modules not in config file. Returns True if config was written, False if not. + Also adds a '[main]' section if not present. + module_list - The list of modules config - The config object """ @@ -397,7 +375,7 @@ def _write_missing_module_configs(module_list, config, filepath=CONFIG_FILE): if module.endswith(".py"): modname = os.path.basename(module).split('.')[0] moddir = os.path.dirname(module) - if modname not in config.sections(): + if modname not in config.keys(): mod = load_module(os.path.basename(module).split('.')[0], [moddir]) if mod: try: @@ -407,20 +385,23 @@ def _write_missing_module_configs(module_list, config, filepath=CONFIG_FILE): continue ConfNeedsWrite = True _update_DEFAULTCONF(conf, filepath) - config.add_section(modname) + config[modname] = {} for key in conf: - config.set(modname, key, str(conf[key])) + config[modname][key] = str(conf[key]) - if 'main' not in config.sections(): + if 'main' not in config.keys(): ConfNeedsWrite = True _update_DEFAULTCONF(DEFAULTCONF, filepath) - config.add_section('main') + config['main'] = {} for key in DEFAULTCONF: - config.set('main', key, str(DEFAULTCONF[key])) + config['main'][key] = str(DEFAULTCONF[key]) if ConfNeedsWrite: + config_object = configparser.ConfigParser() + config_object.optionxform = str + config_object.read_dict(config) with codecs.open(filepath, 'w', 'utf-8') as f: - config.write(f) + config_object.write(f) return True return False @@ -528,13 +509,12 @@ def parse_reports(resultlist, groups=None, ugly=True, includeMetadata=False, pyt return json.dumps(finaldata, sort_keys=True, separators=(',', ':'), ensure_ascii=False) -def multiscan(Files, recursive=False, configfile=CONFIG_FILE, config=None, module_list=None): +def multiscan(Files, recursive=False, config=None, module_list=None): """ The meat and potatoes. Returns the list of module results Files - A list of files and dirs to be scanned recursive - If true it will search the dirs in Files recursively - configfile - What config file to use. Can be None. config - A dictionary containing the configuration options to be used. module_list - A list of file paths to be used as modules. Each string should end in .py """ @@ -553,35 +533,16 @@ def multiscan(Files, recursive=False, configfile=CONFIG_FILE, config=None, modul module_list = parse_dir(MODULESDIR, recursive=True, exclude=["__init__"]) # A dictionary used for the copyfileto parameter filedic = {} - # What will be the config file object - config_object = None # Read in config - if configfile: - config_object = configparser.ConfigParser() - config_object.optionxform = str - config_object.read(configfile) - main_config = _get_main_config(config_object, filepath=configfile) - if config: - file_conf = parse_config(config_object) - for key in config: - if key not in file_conf: - file_conf[key] = config[key] - file_conf[key]['_load_default'] = True - else: - file_conf[key].update(config[key]) - config = file_conf - else: - config = parse_config(config_object) + if config is None: + config = {} else: - if config is None: - config = {} - else: - config['_load_default'] = True - if 'main' in config: - main_config = config['main'] - else: - main_config = DEFAULTCONF + config['_load_default'] = True + if 'main' in config: + main_config = config['main'] + else: + main_config = DEFAULTCONF # Copy files to a share if configured if "copyfilesto" not in main_config: @@ -598,10 +559,6 @@ def multiscan(Files, recursive=False, configfile=CONFIG_FILE, config=None, modul # Start a thread for each module thread_list = _start_module_threads(filelist, module_list, config, global_module_interface) - # Write the default configure settings for missing ones - if config_object: - _write_missing_module_configs(module_list, config_object, filepath=configfile) - # Warn about spaces in file names for f in filelist: if ' ' in f: @@ -868,11 +825,7 @@ def _init(args): config_init(args.config) # Init storage - config = configparser.ConfigParser() - config.optionxform = str - config.read(args.config) - config = _get_main_config(config) - storage_config = config["storage-config"] + storage_config = get_config_path('storage') if os.path.isfile(storage_config): logger.warning('{} already exists, overwriting will destroy changes'.format(storage_config)) try: @@ -894,15 +847,13 @@ def _init(args): def _main(): - global CONFIG_FILE - # Get args args = _parse_args() # Set config or update locations if args.config is None: args.config = CONFIG_FILE else: - CONFIG_FILE = args.config + update_ms_config(args.config) _update_DEFAULTCONF(DEFAULTCONF, CONFIG_FILE) # Send all logs to stderr and set verbose @@ -926,6 +877,9 @@ def _main(): if not os.path.isfile(args.config): config_init(args.config) + else: + # Write the default configure settings for any missing modules + _write_missing_module_configs(MODULESLIST.keys(), MS_CONFIG, filepath=CONFIG_FILE) # Make sure report is not a dir if args.json: @@ -985,17 +939,15 @@ def _main(): starttime = str(datetime.datetime.now()) # Run the multiscan - results = multiscan(filelist, configfile=args.config) + results = multiscan(filelist, config=MS_CONFIG) # We need to read in the config for the parseReports call config = configparser.ConfigParser() config.optionxform = str config.read(args.config) - config = _get_main_config(config) + config = MS_CONFIG['main'] # Make sure we have a group-types - if "group-types" not in config: - config["group-types"] = [] - elif not config["group-types"]: + if "group-types" not in config or not config["group-types"]: config["group-types"] = [] # Add in script metadata diff --git a/multiscanner/tests/test_configs.py b/multiscanner/tests/test_configs.py index 2a6681de..e48fea33 100644 --- a/multiscanner/tests/test_configs.py +++ b/multiscanner/tests/test_configs.py @@ -14,7 +14,7 @@ def test_no_config(): results, metadata = multiscanner.multiscan( - filelist, configfile=None, config=None, + filelist, config=None, recursive=None, module_list=module_list)[0] assert metadata['conf'] == {'a': 'b', 'c': 'd'} @@ -22,7 +22,7 @@ def test_no_config(): def test_config_api_no_file(): config = {'test_conf': {'a': 'z'}} results, metadata = multiscanner.multiscan( - filelist, configfile=None, config=config, + filelist, config=config, recursive=None, module_list=module_list)[0] assert metadata['conf'] == {'a': 'z', 'c': 'd'} @@ -30,8 +30,9 @@ def test_config_api_no_file(): def test_config_api_with_empty_file(): config = {'test_conf': {'a': 'z'}} config_file = tempfile.mkstemp()[1] + multiscanner.update_ms_config(config_file) results, metadata = multiscanner.multiscan( - filelist, configfile=config_file, config=config, + filelist, config=config, recursive=None, module_list=module_list)[0] os.remove(config_file) assert metadata['conf'] == {'a': 'z', 'c': 'd'} @@ -41,8 +42,9 @@ def test_config_api_with_real_file(): config = {'test_conf': {'a': 'z'}} config_file = tempfile.mkstemp()[1] multiscanner.config_init(config_file) + multiscanner.update_ms_config(config_file) results, metadata = multiscanner.multiscan( - filelist, configfile=config_file, config=config, + filelist, config=config, recursive=None, module_list=module_list)[0] os.remove(config_file) assert metadata['conf'] == {'a': 'z', 'c': 'd'} diff --git a/multiscanner/tests/test_module_interface.py b/multiscanner/tests/test_module_interface.py index eedbc1e4..ca27a7ed 100644 --- a/multiscanner/tests/test_module_interface.py +++ b/multiscanner/tests/test_module_interface.py @@ -12,7 +12,7 @@ def add_int(x, y): def test_subscan(): m = multiscanner.multiscan( - ['fake.zip'], recursive=None, configfile=None, + ['fake.zip'], recursive=None, module_list=[os.path.join(CWD, 'modules', 'test_subscan.py')]) assert m == [([(u'fake.zip', 0)], {'Type': 'Test', 'Name': 'test_subscan'}), ([(u'fake.zip/0', u'fake.zip')], {u'Include': False, u'Type': u'subscan', u'Name': u'Parent'}), ([(u'fake.zip', [u'fake.zip/0'])], {u'Include': False, u'Type': u'subscan', u'Name': u'Children'}), ([(u'fake.zip/0', u'test_subscan')], {u'Include': False, u'Type': u'subscan', u'Name': u'Created by'}), ([(u'fake.zip/0', 1)], {'Type': 'Test', 'Name': 'test_subscan'}), ([(u'fake.zip/0/1', u'fake.zip/0')], {u'Include': False, u'Type': u'subscan', u'Name': u'Parent'}), ([(u'fake.zip/0', [u'fake.zip/0/1'])], {u'Include': False, u'Type': u'subscan', u'Name': u'Children'}), ([(u'fake.zip/0/1', u'test_subscan')], {u'Include': False, u'Type': u'subscan', u'Name': u'Created by'}), ([(u'fake.zip/0/1', 2)], {'Type': 'Test', 'Name': 'test_subscan'})] # noqa: E501 diff --git a/multiscanner/tests/test_multiscanner.py b/multiscanner/tests/test_multiscanner.py index dc1061a5..18603bee 100644 --- a/multiscanner/tests/test_multiscanner.py +++ b/multiscanner/tests/test_multiscanner.py @@ -15,7 +15,9 @@ def setup_class(cls): cls.real_mod_dir = multiscanner.MODULESDIR multiscanner.MODULEDIR = os.path.join(CWD, "modules") cls.filelist = utils.parse_dir(os.path.join(CWD, 'files')) - multiscanner.CONFIG_FILE = '.tmpfile.ini' + config_file = '.tmpfile.ini' + multiscanner.config_init(config_file) + multiscanner.update_ms_config(config_file) @classmethod def teardown_class(cls): @@ -25,7 +27,7 @@ def teardown_class(cls): class Test_multiscan(_runmulti_tests): def setup(self): self.result = multiscanner.multiscan( - self.filelist, recursive=False, configfile='.tmpfile.ini') + self.filelist, recursive=False) self.report = multiscanner.parse_reports(self.result, includeMetadata=False, python=True) self.report_m = multiscanner.parse_reports(self.result, includeMetadata=True, python=True) From 74245b33e08870f7058778b8334d15f3545253a6 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Tue, 5 Mar 2019 08:34:37 -0500 Subject: [PATCH 11/38] Update module list handling Functions like multiscan() and _subscan() now take a list of module names instead of full paths. Renamed MODULESLIST -> MODULE_LIST. Stopped using globals as default parameters in certain functions so the functions will use the most up-to-date version of the global instead of the global's state when the function was defined. --- multiscanner/__init__.py | 4 +- multiscanner/config.py | 52 ++++-- multiscanner/distributed/api.py | 8 +- multiscanner/distributed/celery_worker.py | 4 +- multiscanner/ms.py | 168 ++++++++++---------- multiscanner/storage/storage.py | 4 +- multiscanner/tests/test_celery_worker.py | 17 +- multiscanner/tests/test_configs.py | 12 +- multiscanner/tests/test_module_interface.py | 5 +- multiscanner/tests/test_modules.py | 10 +- multiscanner/tests/test_multiscanner.py | 8 +- 11 files changed, 163 insertions(+), 129 deletions(-) diff --git a/multiscanner/__init__.py b/multiscanner/__init__.py index 4255fca4..61d2c1d7 100644 --- a/multiscanner/__init__.py +++ b/multiscanner/__init__.py @@ -4,8 +4,8 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. from .config import ( # noqa F401 - CONFIG_FILE, MS_WD, MS_CONFIG, MODULESDIR, MODULESLIST, PY3, - update_ms_config + CONFIG_FILE, MS_WD, MS_CONFIG, MODULES_DIR, MODULE_LIST, PY3, + update_ms_config, update_ms_config_file ) from .ms import ( # noqa F401 diff --git a/multiscanner/config.py b/multiscanner/config.py index 8e0b6ce8..dcc919f7 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -21,7 +21,7 @@ MS_WD = os.path.dirname(os.path.abspath(__file__)) # The directory where the modules are kept -MODULESDIR = os.path.join(MS_WD, 'modules') +MODULES_DIR = os.path.join(MS_WD, 'modules') def get_configuration_paths(): @@ -67,7 +67,7 @@ def parse_config(config_object): for key in section_dict: try: section_dict[key] = ast.literal_eval(section_dict[key]) - except SyntaxError as e: + except (SyntaxError, ValueError) as e: # Ignore if config value isn't convertible to a Python literal pass except Exception as e: @@ -116,7 +116,7 @@ def read_config(config_file, section_name=None, default_config=None): MS_CONFIG = read_config(CONFIG_FILE) -def get_config_path(component, config=MS_CONFIG): +def get_config_path(component, config=None): """Gets the location of the config file for the given MultiScanner component from the MultiScanner config @@ -128,6 +128,9 @@ def get_config_path(component, config=MS_CONFIG): component - component to get the path for config - dictionary or ConfigParser object containing MultiScanner config """ + if config is None: + config = MS_CONFIG + try: return config['main']['%s-config' % component] except KeyError: @@ -140,29 +143,46 @@ def get_config_path(component, config=MS_CONFIG): def get_modules(): - """Returns a dictionary with module names as keys, with boolean values - denoting whether or not they are enabled in the config. + """Returns a dictionary with module names as keys. Values contain a boolean + denoting whether or not they are enabled in the config, and the folder + containing the module. """ - files = parse_dir(MODULESDIR, recursive=True, exclude=["__init__"]) - filenames = [os.path.splitext(os.path.basename(f)) for f in files] - module_names = [m[0] for m in filenames if m[1] == '.py'] + files = parse_dir(MODULES_DIR, recursive=True, exclude=["__init__"]) global MS_CONFIG modules = {} - for module in module_names: - try: - modules[module] = MS_CONFIG[module]['ENABLED'] - except KeyError as e: - logger.debug(e) - modules[module] = False + # for module in module_names: + for f in files: + folder = os.path.dirname(f) + filename = os.path.splitext(os.path.basename(f)) + + if filename[1] == '.py': + module = filename[0] + try: + modules[module] = [MS_CONFIG[module]['ENABLED'], folder] + except KeyError as e: + logger.debug(e) + modules[module] = [False, folder] return modules # The dictionary of modules and whether they're enabled or not -MODULESLIST = get_modules() +MODULE_LIST = get_modules() + + +def update_ms_config(config): + """Update global config dictionary. + + config - the ConfigParser object or dictionary to replace MS_CONFIG with + """ + global MS_CONFIG + if isinstance(config, configparser.ConfigParser): + MS_CONFIG = parse_config(config) + else: + MS_CONFIG = config -def update_ms_config(config_file): +def update_ms_config_file(config_file): """Update config globals to a different file than the default. config_file - the file to be assigned to CONFIG_FILE and read into MS_CONFIG diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index 851d524d..37f62ba8 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -60,7 +60,7 @@ from flask_cors import CORS from jinja2 import Markup -from multiscanner import CONFIG_FILE, MODULESLIST, MS_CONFIG, MS_WD, multiscan, parse_reports +from multiscanner import CONFIG_FILE, MODULE_LIST, MS_CONFIG, MS_WD, multiscan, parse_reports from multiscanner.common import pdf_generator, stix2_generator from multiscanner.config import PY3, get_config_path, read_config from multiscanner.storage import StorageHandler @@ -262,7 +262,7 @@ def modules(): Return a list of module names available for MultiScanner to use, and whether or not they are enabled in the config. ''' - return jsonify({'Modules': MODULESLIST}) + return jsonify({'Modules': MODULE_LIST}) @app.route('/api/v1/tasks', methods=['GET']) @@ -477,9 +477,9 @@ def create_task(): rescan = True elif key == 'modules': module_names = request.form[key].split(',') - modules = list(set(module_names).intersection(MODULESLIST.keys())) + modules = list(set(module_names).intersection(MODULE_LIST.keys())) - # files = utils.parse_dir(MODULESDIR, True) + # files = utils.parse_dir(MODULES_DIR, True) # modules = [] # for f in files: # split = os.path.splitext(os.path.basename(f)) diff --git a/multiscanner/distributed/celery_worker.py b/multiscanner/distributed/celery_worker.py index b0b7fcce..f949b399 100644 --- a/multiscanner/distributed/celery_worker.py +++ b/multiscanner/distributed/celery_worker.py @@ -98,7 +98,7 @@ def on_failure(self, exc, task_id, args, kwargs, einfo): @app.task(base=MultiScannerTask) def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, - config=MS_CONFIG, module_list=None): + config=None, module_list=None): ''' Queue up multiscanner tasks @@ -114,6 +114,8 @@ def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, logger.info('\n\n{}{}Got file: {}.\nOriginal filename: {}.\n'.format('=' * 48, '\n', file_hash, original_filename)) # Get the storage config + if config is None: + config = MS_CONFIG storage_conf = get_config_path('storage', config) storage_handler = storage.StorageHandler(configfile=storage_conf) diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 6d116c6a..44e65bcf 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -16,6 +16,7 @@ import random import re import shutil +import six import string import sys import tempfile @@ -29,10 +30,11 @@ from multiscanner.version import __version__ as MS_VERSION from multiscanner.common.utils import (basename, convert_encoding, load_module, - parse_dir, parse_file_list, queue2list) -from multiscanner.config import (CONFIG_FILE, MODULESDIR, MODULESLIST, + parse_file_list, queue2list) +from multiscanner.config import (CONFIG_FILE, MODULE_LIST, MS_CONFIG, PY3, determine_configuration_path, - get_config_path, update_ms_config) + get_config_path, update_ms_config, + update_ms_config_file) from multiscanner.storage import storage @@ -303,7 +305,7 @@ def _start_module_threads(filelist, module_list, config, global_module_interface Starts each module on the file list in a separate thread. Returns a list of threads filelist - A lists of strings. The strings are files to be scanned - module_list - A list of all the modules to be run + module_list - A list of the names of all modules to be run config - The config dictionary global_module_interface - The global module interface to be injected in each module """ @@ -312,79 +314,82 @@ def _start_module_threads(filelist, module_list, config, global_module_interface ThreadDict = {} global_module_interface.run_count += 1 # Starts a thread for each module. - for module in module_list: - if module.endswith(".py"): - modname = os.path.basename(module[:-3]) - - # If the module is disabled we don't mess with it further to prevent spamming errors on screen - if modname in config: - if not config[modname].get('ENABLED', True): - continue - - moddir = os.path.dirname(module) - mod = load_module(os.path.basename(module).split('.')[0], [moddir]) - if not mod: - logger.warning("{} not a valid module...".format(module)) + for modname in module_list: + # If the module is disabled we don't mess with it further to prevent spamming errors on screen + if modname in config: + if not config[modname].get('ENABLED', True): continue - conf = None - if modname in config: - if '_load_default' in config or '_load_default' in config[modname]: - try: - conf = mod.DEFAULTCONF - conf.update(config[modname]) - except Exception as e: - logger.warning(e) - conf = config[modname] - # Remove _load_default from config - if '_load_default' in conf: - del conf['_load_default'] - else: - conf = config[modname] + # TODO: What if the module isn't specified in the config - # Try and read in the default conf if one was not passed - if not conf: + try: + moddir = MODULE_LIST[modname][1] + except KeyError: + logger.warning(MODULE_LIST) + logger.warning("{} not a valid module...".format(modname)) + continue + + mod = load_module(modname, [moddir]) + if not mod: + logger.warning("{} not a valid module...".format(modname)) + continue + conf = None + if modname in config: + if '_load_default' in config or '_load_default' in config[modname]: try: conf = mod.DEFAULTCONF + conf.update(config[modname]) except Exception as e: - logger.error(e) - thread = _Thread( - target=_run_module, - args=(modname, mod, filelist, ThreadDict, global_module_interface, conf)) - thread.name = modname - thread.setDaemon(True) - ThreadList.append(thread) - ThreadDict[modname] = thread + logger.warning(e) + conf = config[modname] + # Remove _load_default from config + if '_load_default' in conf: + del conf['_load_default'] + else: + conf = config[modname] + + # Try and read in the default conf if one was not passed + if not conf: + try: + conf = mod.DEFAULTCONF + except Exception as e: + logger.error(e) + thread = _Thread( + target=_run_module, + args=(modname, mod, filelist, ThreadDict, global_module_interface, conf)) + thread.name = modname + thread.setDaemon(True) + ThreadList.append(thread) + ThreadDict[modname] = thread + for thread in ThreadList: thread.start() return ThreadList -def _write_missing_module_configs(module_list, config, filepath=CONFIG_FILE): +def _write_missing_module_configs(config, filepath=CONFIG_FILE): """ Write in default config for modules not in config file. Returns True if config was written, False if not. Also adds a '[main]' section if not present. - module_list - The list of modules + module_list - The list of modules (filenames) config - The config object """ filepath = determine_configuration_path(filepath) ConfNeedsWrite = False - module_list.sort() - for module in module_list: - if module.endswith(".py"): - modname = os.path.basename(module).split('.')[0] - moddir = os.path.dirname(module) - if modname not in config.keys(): - mod = load_module(os.path.basename(module).split('.')[0], [moddir]) - if mod: - try: - conf = mod.DEFAULTCONF - except Exception as e: - logger.warning(e) - continue + for modname, module in sorted(six.iteritems(MODULE_LIST)): + if modname not in config.keys(): + moddir = module[1] + print(moddir) + mod = load_module(modname, [moddir]) + if mod: + try: + conf = mod.DEFAULTCONF + except Exception as e: + logger.warning(e) + continue + if modname not in config.keys(): ConfNeedsWrite = True - _update_DEFAULTCONF(conf, filepath) config[modname] = {} for key in conf: config[modname][key] = str(conf[key]) @@ -415,22 +420,19 @@ def _rewrite_config(module_list, config, filepath=CONFIG_FILE): """ filepath = determine_configuration_path(filepath) logger.info('Rewriting config...') - module_list.sort() - for module in module_list: - if module.endswith('.py'): - modname = os.path.basename(module).split('.')[0] - moddir = os.path.dirname(module) - mod = load_module(os.path.basename(module).split('.')[0], [moddir]) - if mod: - try: - conf = mod.DEFAULTCONF - except Exception as e: - logger.warning(e) - continue - _update_DEFAULTCONF(conf, filepath) - config.add_section(modname) - for key in conf: - config.set(modname, key, str(conf[key])) + for modname, module in sorted(six.iteritems(module_list)): + moddir = module[1] + mod = load_module(modname, [moddir]) + if mod: + try: + conf = mod.DEFAULTCONF + except Exception as e: + logger.warning(e) + continue + _update_DEFAULTCONF(conf, filepath) + config.add_section(modname) + for key in conf: + config.set(modname, key, str(conf[key])) _update_DEFAULTCONF(DEFAULTCONF, filepath) config.add_section('main') @@ -440,13 +442,18 @@ def _rewrite_config(module_list, config, filepath=CONFIG_FILE): with codecs.open(filepath, 'w', 'utf-8') as f: config.write(f) + # Set global main config + update_ms_config(config) -def config_init(filepath, module_list=parse_dir(MODULESDIR, recursive=True, exclude=["__init__"])): + +def config_init(filepath, module_list=None): """ Creates a new config file at filepath filepath - The config file to create """ + if module_list is None: + module_list = MODULE_LIST config = configparser.ConfigParser() config.optionxform = str @@ -530,7 +537,7 @@ def multiscan(Files, recursive=False, config=None, module_list=None): # A list of files in the module dir if module_list is None: - module_list = parse_dir(MODULESDIR, recursive=True, exclude=["__init__"]) + module_list = [modname for modname in MODULE_LIST] # A dictionary used for the copyfileto parameter filedic = {} @@ -816,11 +823,8 @@ def _init(args): config_init(args.config) else: logger.info('Checking for missing modules in configuration...') - module_list = parse_dir(MODULESDIR, recursive=True, exclude=["__init__"]) - config = configparser.ConfigParser() - config.optionxform = str - config.read(args.config) - _write_missing_module_configs(module_list, config, filepath=args.config) + config = MS_CONFIG # MS_CONFIG will already have been set in main() + _write_missing_module_configs(config, filepath=args.config) else: config_init(args.config) @@ -853,7 +857,7 @@ def _main(): if args.config is None: args.config = CONFIG_FILE else: - update_ms_config(args.config) + update_ms_config_file(args.config) _update_DEFAULTCONF(DEFAULTCONF, CONFIG_FILE) # Send all logs to stderr and set verbose @@ -879,7 +883,7 @@ def _main(): config_init(args.config) else: # Write the default configure settings for any missing modules - _write_missing_module_configs(MODULESLIST.keys(), MS_CONFIG, filepath=CONFIG_FILE) + _write_missing_module_configs(MS_CONFIG, filepath=CONFIG_FILE) # Make sure report is not a dir if args.json: diff --git a/multiscanner/storage/storage.py b/multiscanner/storage/storage.py index 25789cd4..fa7eda29 100644 --- a/multiscanner/storage/storage.py +++ b/multiscanner/storage/storage.py @@ -89,11 +89,13 @@ def teardown(self): class StorageHandler(object): - def __init__(self, configfile=MS_CONFIG, config=None): + def __init__(self, configfile=None, config=None): self.storage_lock = threading.Lock() self.storage_counter = ThreadCounter() # Load all storage classes storage_classes = _get_storage_classes() + if configfile is None: + configfile = MS_CONFIG # Read in config if configfile: diff --git a/multiscanner/tests/test_celery_worker.py b/multiscanner/tests/test_celery_worker.py index 2d6b6cce..5d7bcc75 100644 --- a/multiscanner/tests/test_celery_worker.py +++ b/multiscanner/tests/test_celery_worker.py @@ -5,7 +5,6 @@ import mock import multiscanner -from multiscanner.common import utils from multiscanner.distributed import celery_worker from multiscanner.storage.sql_driver import Database @@ -20,16 +19,14 @@ # Get a subset of simple modules to run in testing # the celery worker -MODULE_LIST = utils.parse_dir(multiscanner.MODULESDIR, recursive=True) -DESIRED_MODULES = [ - 'entropy.py', - 'MD5.py', - 'SHA1.py', - 'SHA256.py', - 'libmagic.py', - 'ssdeep.py' +MODULES_TO_TEST = [ + 'entropy', + 'MD5', + 'SHA1', + 'SHA256', + 'libmagic', + 'ssdeep' ] -MODULES_TO_TEST = [i for e in DESIRED_MODULES for i in MODULE_LIST if e in i] TEST_DB_PATH = os.path.join(CWD, 'testing.db') diff --git a/multiscanner/tests/test_configs.py b/multiscanner/tests/test_configs.py index e48fea33..7b665df0 100644 --- a/multiscanner/tests/test_configs.py +++ b/multiscanner/tests/test_configs.py @@ -1,4 +1,5 @@ from __future__ import division, absolute_import, with_statement, print_function, unicode_literals +import mock import os import tempfile @@ -8,10 +9,12 @@ # Makes sure we use the multiscanner in ../ CWD = os.path.dirname(os.path.abspath(__file__)) -module_list = [os.path.join(CWD, 'modules', 'test_conf.py')] +mock_modlist = {'test_conf': [True, os.path.join(CWD, 'modules')]} filelist = utils.parse_dir(os.path.join(CWD, 'files')) +module_list = ['test_conf'] +@mock.patch('multiscanner.ms.MODULE_LIST', mock_modlist) def test_no_config(): results, metadata = multiscanner.multiscan( filelist, config=None, @@ -19,6 +22,7 @@ def test_no_config(): assert metadata['conf'] == {'a': 'b', 'c': 'd'} +@mock.patch('multiscanner.ms.MODULE_LIST', mock_modlist) def test_config_api_no_file(): config = {'test_conf': {'a': 'z'}} results, metadata = multiscanner.multiscan( @@ -27,10 +31,11 @@ def test_config_api_no_file(): assert metadata['conf'] == {'a': 'z', 'c': 'd'} +@mock.patch('multiscanner.ms.MODULE_LIST', mock_modlist) def test_config_api_with_empty_file(): config = {'test_conf': {'a': 'z'}} config_file = tempfile.mkstemp()[1] - multiscanner.update_ms_config(config_file) + multiscanner.update_ms_config_file(config_file) results, metadata = multiscanner.multiscan( filelist, config=config, recursive=None, module_list=module_list)[0] @@ -38,11 +43,12 @@ def test_config_api_with_empty_file(): assert metadata['conf'] == {'a': 'z', 'c': 'd'} +@mock.patch('multiscanner.ms.MODULE_LIST', mock_modlist) def test_config_api_with_real_file(): config = {'test_conf': {'a': 'z'}} config_file = tempfile.mkstemp()[1] multiscanner.config_init(config_file) - multiscanner.update_ms_config(config_file) + multiscanner.update_ms_config_file(config_file) results, metadata = multiscanner.multiscan( filelist, config=config, recursive=None, module_list=module_list)[0] diff --git a/multiscanner/tests/test_module_interface.py b/multiscanner/tests/test_module_interface.py index ca27a7ed..d236695e 100644 --- a/multiscanner/tests/test_module_interface.py +++ b/multiscanner/tests/test_module_interface.py @@ -1,19 +1,22 @@ from __future__ import division, absolute_import, print_function, unicode_literals +import mock import os import multiscanner CWD = os.path.dirname(os.path.abspath(__file__)) +mock_modlist = {'test_subscan': [True, os.path.join(CWD, 'modules')]} def add_int(x, y): return x + y +@mock.patch('multiscanner.ms.MODULE_LIST', mock_modlist) def test_subscan(): m = multiscanner.multiscan( ['fake.zip'], recursive=None, - module_list=[os.path.join(CWD, 'modules', 'test_subscan.py')]) + module_list=['test_subscan']) assert m == [([(u'fake.zip', 0)], {'Type': 'Test', 'Name': 'test_subscan'}), ([(u'fake.zip/0', u'fake.zip')], {u'Include': False, u'Type': u'subscan', u'Name': u'Parent'}), ([(u'fake.zip', [u'fake.zip/0'])], {u'Include': False, u'Type': u'subscan', u'Name': u'Children'}), ([(u'fake.zip/0', u'test_subscan')], {u'Include': False, u'Type': u'subscan', u'Name': u'Created by'}), ([(u'fake.zip/0', 1)], {'Type': 'Test', 'Name': 'test_subscan'}), ([(u'fake.zip/0/1', u'fake.zip/0')], {u'Include': False, u'Type': u'subscan', u'Name': u'Parent'}), ([(u'fake.zip/0', [u'fake.zip/0/1'])], {u'Include': False, u'Type': u'subscan', u'Name': u'Children'}), ([(u'fake.zip/0/1', u'test_subscan')], {u'Include': False, u'Type': u'subscan', u'Name': u'Created by'}), ([(u'fake.zip/0/1', 2)], {'Type': 'Test', 'Name': 'test_subscan'})] # noqa: E501 diff --git a/multiscanner/tests/test_modules.py b/multiscanner/tests/test_modules.py index e7fe3d95..5a0509b4 100644 --- a/multiscanner/tests/test_modules.py +++ b/multiscanner/tests/test_modules.py @@ -26,20 +26,20 @@ def test_fail_loadModule(): class _runmod_tests(object): @classmethod def setup_class(cls): - cls.real_mod_dir = multiscanner.MODULESDIR - multiscanner.MODULESDIR = os.path.join(CWD, "modules") + cls.real_mod_dir = multiscanner.MODULES_DIR + multiscanner.MODULES_DIR = os.path.join(CWD, "modules") cls.filelist = utils.parse_dir(os.path.join(CWD, 'files')) cls.files = ['a', 'b', 'C:\\c', '/d/d'] cls.threadDict = {} @classmethod def teardown_class(cls): - multiscanner.MODULESDIR = cls.real_mod_dir + multiscanner.MODULES_DIR = cls.real_mod_dir class Test_runModule_test_1(_runmod_tests): def setup(self): - m = utils.load_module('test_1', [multiscanner.MODULESDIR]) + m = utils.load_module('test_1', [multiscanner.MODULES_DIR]) global_module_interface = multiscanner._GlobalModuleInterface() self.result = multiscanner._run_module('test_1', m, self.filelist, self.threadDict, global_module_interface) global_module_interface._cleanup() @@ -55,7 +55,7 @@ def test_runModule_results(self): class Test_runModule_test_2(_runmod_tests): def setup(self): - self.m = utils.load_module('test_2', [multiscanner.MODULESDIR]) + self.m = utils.load_module('test_2', [multiscanner.MODULES_DIR]) self.threadDict['test_2'] = mock.Mock() self.threadDict['test_1'] = mock.Mock() self.threadDict['test_1'].ret = ([('a', 'a'), ('C:\\c', 'c')], {}) diff --git a/multiscanner/tests/test_multiscanner.py b/multiscanner/tests/test_multiscanner.py index 18603bee..614c0b50 100644 --- a/multiscanner/tests/test_multiscanner.py +++ b/multiscanner/tests/test_multiscanner.py @@ -12,16 +12,16 @@ class _runmulti_tests(object): @classmethod def setup_class(cls): - cls.real_mod_dir = multiscanner.MODULESDIR - multiscanner.MODULEDIR = os.path.join(CWD, "modules") + cls.real_mod_dir = multiscanner.MODULES_DIR + multiscanner.MODULES_DIR = os.path.join(CWD, "modules") cls.filelist = utils.parse_dir(os.path.join(CWD, 'files')) config_file = '.tmpfile.ini' multiscanner.config_init(config_file) - multiscanner.update_ms_config(config_file) + multiscanner.update_ms_config_file(config_file) @classmethod def teardown_class(cls): - multiscanner.MODULESDIR = cls.real_mod_dir + multiscanner.MODULES_DIR = cls.real_mod_dir class Test_multiscan(_runmulti_tests): From 1404df1722df385f3cb647c73482af7d14043eb6 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Tue, 5 Mar 2019 10:09:36 -0500 Subject: [PATCH 12/38] Fix a few calls to multiscan() --- multiscanner/common/dir_monitor.py | 7 +++++-- multiscanner/distributed/api.py | 2 +- multiscanner/distributed/distributed_worker.py | 6 ++++-- multiscanner/web/app.py | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/multiscanner/common/dir_monitor.py b/multiscanner/common/dir_monitor.py index 0f8b9d68..7bb9297c 100755 --- a/multiscanner/common/dir_monitor.py +++ b/multiscanner/common/dir_monitor.py @@ -20,7 +20,7 @@ from watchdog.observers import Observer from multiscanner import multiscan, parse_reports -from multiscanner.config import CONFIG_FILE, get_config_path +from multiscanner.config import CONFIG_FILE, MS_CONFIG, get_config_path, update_ms_config_file from multiscanner.storage import storage logger = logging.getLogger(__name__) @@ -117,13 +117,16 @@ def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, e def _main(): args = _parse_args() + if args.config != CONFIG_FILE: + update_ms_config_file(args.config) + work_queue = multiprocessing.Queue() exit_signal = multiprocessing.Value('b') exit_signal.value = False observer = start_observer(args.Directory, work_queue, args.recursive) ms_process = multiprocessing.Process( target=multiscanner_process, - args=(work_queue, args.config, args.batch, args.seconds, args.delete, exit_signal)) + args=(work_queue, MS_CONFIG, args.batch, args.seconds, args.delete, exit_signal)) ms_process.start() try: while True: diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index 37f62ba8..10e662ac 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -199,7 +199,7 @@ def multiscanner_process(work_queue, exit_signal): filelist = [item[0] for item in metadata_list] # modulelist = [item[5] for item in metadata_list] resultlist = multiscan( - filelist, configfile=CONFIG_FILE + filelist, # module_list ) results = parse_reports(resultlist, python=True) diff --git a/multiscanner/distributed/distributed_worker.py b/multiscanner/distributed/distributed_worker.py index afa86ed1..b648ee7f 100755 --- a/multiscanner/distributed/distributed_worker.py +++ b/multiscanner/distributed/distributed_worker.py @@ -16,7 +16,7 @@ standard_library.install_aliases() from multiscanner import multiscan, parse_reports -from multiscanner.config import get_config_path, read_config +from multiscanner.config import get_config_path, read_config, update_ms_config_file from multiscanner.storage import storage @@ -71,6 +71,8 @@ def _main(): # Pull config options conf = read_config(args.config) multiscanner_config = conf['worker']['multiscanner_config'] + update_ms_config_file(multiscanner_config) + config = read_config(multiscanner_config) # Start worker task work_queue = multiprocessing.Queue() @@ -78,7 +80,7 @@ def _main(): exit_signal.value = False ms_process = multiprocessing.Process( target=multiscanner_process, - args=(work_queue, multiscanner_config, args.delete, exit_signal)) + args=(work_queue, config, args.delete, exit_signal)) ms_process.start() # Start message pickup task diff --git a/multiscanner/web/app.py b/multiscanner/web/app.py index 3f50868b..d735f940 100644 --- a/multiscanner/web/app.py +++ b/multiscanner/web/app.py @@ -29,7 +29,7 @@ # Finagle Flask to read config from .ini file instead of .py file web_config_file = get_config_path('web') -web_config = read_config(web_config_file, 'web', DEFAULTCONF) +web_config = read_config(web_config_file, 'web', DEFAULTCONF).get('web') conf_tuple = namedtuple('WebConfig', web_config.keys())(*web_config.values()) app.config.from_object(conf_tuple) From 089d17b7e8514dfae33e64d8c050749a27c499b5 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Tue, 5 Mar 2019 12:01:57 -0500 Subject: [PATCH 13/38] Allow selecting which modules to run in web UI --- multiscanner/distributed/api.py | 14 +++++++------- multiscanner/web/templates/index.html | 18 +++++++++--------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index 10e662ac..298c3016 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -197,10 +197,10 @@ def multiscanner_process(work_queue, exit_signal): continue filelist = [item[0] for item in metadata_list] - # modulelist = [item[5] for item in metadata_list] + module_list = [item[5] for item in metadata_list] resultlist = multiscan( filelist, - # module_list + module_list=module_list ) results = parse_reports(resultlist, python=True) @@ -262,7 +262,7 @@ def modules(): Return a list of module names available for MultiScanner to use, and whether or not they are enabled in the config. ''' - return jsonify({'Modules': MODULE_LIST}) + return jsonify({name: mod[0] for (name, mod) in MODULE_LIST.items()}) @app.route('/api/v1/tasks', methods=['GET']) @@ -424,7 +424,7 @@ def queue_task(original_filename, f_name, full_path, metadata, rescan=False, mod config=MS_CONFIG, module_list=module_list) else: # Put the task on the queue - work_queue.put((full_path, original_filename, task_id, f_name, metadata)) + work_queue.put((full_path, original_filename, task_id, f_name, metadata, module_list)) return task_id @@ -514,7 +514,7 @@ def create_task(): for uzfile in z.namelist(): unzipped_file = open(os.path.join(extract_dir, uzfile)) f_name, full_path = save_hashed_filename(unzipped_file, True) - tid = queue_task(uzfile, f_name, full_path, metadata, rescan=rescan) + tid = queue_task(uzfile, f_name, full_path, metadata, rescan=rescan, module_list=modules) task_id_list.append(tid) except RuntimeError as e: msg = "ERROR: Failed to extract " + str(file_) + ' - ' + str(e) @@ -530,7 +530,7 @@ def create_task(): for urfile in r.namelist(): unrarred_file = open(os.path.join(extract_dir, urfile)) f_name, full_path = save_hashed_filename(unrarred_file, True) - tid = queue_task(urfile, f_name, full_path, metadata, rescan=rescan) + tid = queue_task(urfile, f_name, full_path, metadata, rescan=rescan, module_list=modules) task_id_list.append(tid) except RuntimeError as e: msg = "ERROR: Failed to extract " + str(file_) + ' - ' + str(e) @@ -541,7 +541,7 @@ def create_task(): else: # File was not an archive to extract f_name, full_path = save_hashed_filename(file_) - tid = queue_task(original_filename, f_name, full_path, metadata, rescan=rescan) + tid = queue_task(original_filename, f_name, full_path, metadata, rescan=rescan, module_list=modules) task_id_list = [tid] msg = {'task_ids': task_id_list} diff --git a/multiscanner/web/templates/index.html b/multiscanner/web/templates/index.html index 8c9172ca..0b1fdadd 100644 --- a/multiscanner/web/templates/index.html +++ b/multiscanner/web/templates/index.html @@ -233,15 +233,15 @@ }) // Add options for selecting which modules to run - //$.get("{{ api_loc }}/api/v1/modules", function(data) { - // var modules = '

Modules

Select which modules to use:

'; - // for (mod in data.Modules) { - // checked = (data.Modules[mod] == "True" ? "checked" : ""); - // modules += '
'; - // } - // modules += '
'; - // $('#adv-options').prepend(modules); - //}); + $.get("{{ api_loc }}/api/v1/modules", function(data) { + var modules = '

Modules

Select which modules to use:

'; + for (mod in data) { + checked = (data[mod] ? "checked" : ""); + modules += '
'; + } + modules += '
'; + $('#adv-options').append(modules); + }); // Set metadata field values for all files $('#adv-options .metadata-group input').change(function() { From 2734f2d699b764352ad674bc1a2f9701e47e1cdd Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Tue, 26 Mar 2019 09:56:38 -0400 Subject: [PATCH 14/38] Fix failing test --- multiscanner/tests/test_api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/multiscanner/tests/test_api.py b/multiscanner/tests/test_api.py index 6dd69a51..7b11b34f 100644 --- a/multiscanner/tests/test_api.py +++ b/multiscanner/tests/test_api.py @@ -88,7 +88,10 @@ def test_create_first_task(self): def test_get_modules(self): resp = self.app.get('/api/v1/modules').get_data().decode('utf-8') - self.assertIn('Modules', resp) + self.assertIn('AVGScan', resp) + self.assertIn('MD5', resp) + self.assertIn('SHA256', resp) + self.assertIn('libmagic', resp) class TestTaskCreateCase(APITestCase): From c5e34ef3806f18ce50fc458a752a99a6c3abf598 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Tue, 26 Mar 2019 11:57:04 -0400 Subject: [PATCH 15/38] Declare config globals at top of file --- multiscanner/config.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/multiscanner/config.py b/multiscanner/config.py index dcc919f7..f6ed2a77 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -23,6 +23,15 @@ # The directory where the modules are kept MODULES_DIR = os.path.join(MS_WD, 'modules') +# The default config file +CONFIG_FILE = None + +# Main MultiScanner config, as a dictionary +MS_CONFIG = None + +# The dictionary of modules and whether they're enabled or not +MODULE_LIST = None + def get_configuration_paths(): # Possible paths for the configuration file. @@ -55,7 +64,6 @@ def determine_configuration_path(filepath): return config_file -# The default config file CONFIG_FILE = determine_configuration_path(None) @@ -112,7 +120,6 @@ def read_config(config_file, section_name=None, default_config=None): return parse_config(config_object) -# Main MultiScanner config, as a dictionary MS_CONFIG = read_config(CONFIG_FILE) @@ -166,7 +173,6 @@ def get_modules(): return modules -# The dictionary of modules and whether they're enabled or not MODULE_LIST = get_modules() From 7ca5ea21b63b6108673be650b0ed9b930d0bbcfa Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Tue, 26 Mar 2019 13:13:52 -0400 Subject: [PATCH 16/38] Remove unused `recursive` param from multiscan() --- multiscanner/ms.py | 14 ++------------ multiscanner/tests/test_configs.py | 8 ++++---- multiscanner/tests/test_module_interface.py | 2 +- multiscanner/tests/test_multiscanner.py | 3 +-- 4 files changed, 8 insertions(+), 19 deletions(-) diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 44e65bcf..a3cd6f43 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -380,7 +380,6 @@ def _write_missing_module_configs(config, filepath=CONFIG_FILE): for modname, module in sorted(six.iteritems(MODULE_LIST)): if modname not in config.keys(): moddir = module[1] - print(moddir) mod = load_module(modname, [moddir]) if mod: try: @@ -516,25 +515,16 @@ def parse_reports(resultlist, groups=None, ugly=True, includeMetadata=False, pyt return json.dumps(finaldata, sort_keys=True, separators=(',', ':'), ensure_ascii=False) -def multiscan(Files, recursive=False, config=None, module_list=None): +def multiscan(Files, config=None, module_list=None): """ The meat and potatoes. Returns the list of module results Files - A list of files and dirs to be scanned - recursive - If true it will search the dirs in Files recursively config - A dictionary containing the configuration options to be used. module_list - A list of file paths to be used as modules. Each string should end in .py """ # Init some vars - # If recursive is False we don't parse the file list and take it as is. - if recursive: - filelist = parse_file_list(Files, recursive=recursive) - else: - filelist = Files - # If none of the files existed, why continue? - if not filelist: - raise ValueError("No valid files") - + filelist = Files # A list of files in the module dir if module_list is None: module_list = [modname for modname in MODULE_LIST] diff --git a/multiscanner/tests/test_configs.py b/multiscanner/tests/test_configs.py index 7b665df0..41b2bbb2 100644 --- a/multiscanner/tests/test_configs.py +++ b/multiscanner/tests/test_configs.py @@ -18,7 +18,7 @@ def test_no_config(): results, metadata = multiscanner.multiscan( filelist, config=None, - recursive=None, module_list=module_list)[0] + module_list=module_list)[0] assert metadata['conf'] == {'a': 'b', 'c': 'd'} @@ -27,7 +27,7 @@ def test_config_api_no_file(): config = {'test_conf': {'a': 'z'}} results, metadata = multiscanner.multiscan( filelist, config=config, - recursive=None, module_list=module_list)[0] + module_list=module_list)[0] assert metadata['conf'] == {'a': 'z', 'c': 'd'} @@ -38,7 +38,7 @@ def test_config_api_with_empty_file(): multiscanner.update_ms_config_file(config_file) results, metadata = multiscanner.multiscan( filelist, config=config, - recursive=None, module_list=module_list)[0] + module_list=module_list)[0] os.remove(config_file) assert metadata['conf'] == {'a': 'z', 'c': 'd'} @@ -51,6 +51,6 @@ def test_config_api_with_real_file(): multiscanner.update_ms_config_file(config_file) results, metadata = multiscanner.multiscan( filelist, config=config, - recursive=None, module_list=module_list)[0] + module_list=module_list)[0] os.remove(config_file) assert metadata['conf'] == {'a': 'z', 'c': 'd'} diff --git a/multiscanner/tests/test_module_interface.py b/multiscanner/tests/test_module_interface.py index d236695e..e3b26289 100644 --- a/multiscanner/tests/test_module_interface.py +++ b/multiscanner/tests/test_module_interface.py @@ -15,7 +15,7 @@ def add_int(x, y): @mock.patch('multiscanner.ms.MODULE_LIST', mock_modlist) def test_subscan(): m = multiscanner.multiscan( - ['fake.zip'], recursive=None, + ['fake.zip'], module_list=['test_subscan']) assert m == [([(u'fake.zip', 0)], {'Type': 'Test', 'Name': 'test_subscan'}), ([(u'fake.zip/0', u'fake.zip')], {u'Include': False, u'Type': u'subscan', u'Name': u'Parent'}), ([(u'fake.zip', [u'fake.zip/0'])], {u'Include': False, u'Type': u'subscan', u'Name': u'Children'}), ([(u'fake.zip/0', u'test_subscan')], {u'Include': False, u'Type': u'subscan', u'Name': u'Created by'}), ([(u'fake.zip/0', 1)], {'Type': 'Test', 'Name': 'test_subscan'}), ([(u'fake.zip/0/1', u'fake.zip/0')], {u'Include': False, u'Type': u'subscan', u'Name': u'Parent'}), ([(u'fake.zip/0', [u'fake.zip/0/1'])], {u'Include': False, u'Type': u'subscan', u'Name': u'Children'}), ([(u'fake.zip/0/1', u'test_subscan')], {u'Include': False, u'Type': u'subscan', u'Name': u'Created by'}), ([(u'fake.zip/0/1', 2)], {'Type': 'Test', 'Name': 'test_subscan'})] # noqa: E501 diff --git a/multiscanner/tests/test_multiscanner.py b/multiscanner/tests/test_multiscanner.py index 614c0b50..b48ba8f0 100644 --- a/multiscanner/tests/test_multiscanner.py +++ b/multiscanner/tests/test_multiscanner.py @@ -26,8 +26,7 @@ def teardown_class(cls): class Test_multiscan(_runmulti_tests): def setup(self): - self.result = multiscanner.multiscan( - self.filelist, recursive=False) + self.result = multiscanner.multiscan(self.filelist) self.report = multiscanner.parse_reports(self.result, includeMetadata=False, python=True) self.report_m = multiscanner.parse_reports(self.result, includeMetadata=True, python=True) From ac90491fcee335e4b9579e9f50c3328fe23eb952 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Thu, 28 Mar 2019 10:26:15 -0400 Subject: [PATCH 17/38] Fix modules list - jQuery wasn't reading it correctly - ElasticSearch would break if SHA256 module wasn't enabled in the UI - Call multiscan() once per sample in non-distributed mode so we can select which modules to run per-sample --- multiscanner/distributed/api.py | 33 +++++++++++-------- multiscanner/ms.py | 2 +- multiscanner/storage/elasticsearch_storage.py | 1 + multiscanner/web/templates/index.html | 2 +- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index 298c3016..d4bcc3c2 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -196,21 +196,18 @@ def multiscanner_process(work_queue, exit_signal): else: continue - filelist = [item[0] for item in metadata_list] - module_list = [item[5] for item in metadata_list] - resultlist = multiscan( - filelist, - module_list=module_list - ) - results = parse_reports(resultlist, python=True) - - scan_time = datetime.now().isoformat() + for item in metadata_list: + filelist = [item[0]] + module_list = item[5] + resultlist = multiscan( + filelist, + config=MS_CONFIG, + module_list=module_list + ) + results = parse_reports(resultlist, python=True) - if delete_after_scan: - for file_name in results: - os.remove(file_name) + scan_time = datetime.now().isoformat() - for item in metadata_list: # Use the original filename as the index instead of the full path results[item[1]] = results[item[0]] del results[item[0]] @@ -230,6 +227,10 @@ def multiscanner_process(work_queue, exit_signal): storage_handler.store(results, wait=False) + if delete_after_scan: + for file_name in results: + os.remove(file_name) + filelist = [] time_stamp = None storage_handler.close() @@ -477,6 +478,10 @@ def create_task(): rescan = True elif key == 'modules': module_names = request.form[key].split(',') + if 'SHA256' not in module_names: + # Elasticsearch won't work without it + # TODO: Don't let users enable/disable SHA256 module? + module_names.append('SHA256') modules = list(set(module_names).intersection(MODULE_LIST.keys())) # files = utils.parse_dir(MODULES_DIR, True) @@ -558,7 +563,7 @@ def get_report(task_id): to the given task ID. ''' - download = request.args.get('d', default='False', type=str)[0].lower() + download = request.args.get('d', default='false', type=str)[0].lower() report_dict, success = get_report_dict(task_id) if success: diff --git a/multiscanner/ms.py b/multiscanner/ms.py index a3cd6f43..a52f6782 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -521,7 +521,7 @@ def multiscan(Files, config=None, module_list=None): Files - A list of files and dirs to be scanned config - A dictionary containing the configuration options to be used. - module_list - A list of file paths to be used as modules. Each string should end in .py + module_list - A list of the names of the modules to run on the files. """ # Init some vars filelist = Files diff --git a/multiscanner/storage/elasticsearch_storage.py b/multiscanner/storage/elasticsearch_storage.py index f19d2b73..a8ed89bf 100644 --- a/multiscanner/storage/elasticsearch_storage.py +++ b/multiscanner/storage/elasticsearch_storage.py @@ -170,6 +170,7 @@ def store(self, report): try: sample_id = report[filename]['SHA256'] except KeyError: + logger.debug('SHA256 not found in report; generating UUID') sample_id = uuid4() # Store metadata with the sample, not the report sample = {'doc_type': 'sample', 'filename': filename, 'tags': []} diff --git a/multiscanner/web/templates/index.html b/multiscanner/web/templates/index.html index 0b1fdadd..2395ddef 100644 --- a/multiscanner/web/templates/index.html +++ b/multiscanner/web/templates/index.html @@ -134,7 +134,7 @@ obj['duplicate'] = duplicate_action; // Modules options var moduleList = $("#module-opts input:checked").map(function(){return $(this).attr("name");}); - obj['modules'] = moduleList; + obj['modules'] = moduleList.toArray(); // Archive options if ($('#archive-analyze').is(':checked')) { obj['archive-analyze'] = 'true'; From a5bbc89f167a6f54e5b53d2e144cd68bf4f6fa1a Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Mon, 1 Apr 2019 14:00:35 -0400 Subject: [PATCH 18/38] Simplify storage handler initialization --- multiscanner/analytics/ssdeep_analytics.py | 7 +--- multiscanner/common/dir_monitor.py | 5 +-- multiscanner/distributed/api.py | 3 +- multiscanner/storage/sql_driver.py | 4 +- multiscanner/storage/storage.py | 45 ++++++++++------------ 5 files changed, 28 insertions(+), 36 deletions(-) diff --git a/multiscanner/analytics/ssdeep_analytics.py b/multiscanner/analytics/ssdeep_analytics.py index ad5f95d1..50e00666 100644 --- a/multiscanner/analytics/ssdeep_analytics.py +++ b/multiscanner/analytics/ssdeep_analytics.py @@ -36,16 +36,13 @@ ssdeep = None -from multiscanner.config import get_config_path, read_config from multiscanner.storage import storage class SSDeepAnalytic: def __init__(self, debug=False): - storage_conf = get_config_path('storage') - conf = read_config(storage_conf) - storage_handler = storage.StorageHandler(configfile=storage_conf) + storage_handler = storage.StorageHandler() es_handler = storage_handler.load_required_module('ElasticSearchStorage') if not es_handler: @@ -54,7 +51,7 @@ def __init__(self, debug=False): # probably not ideal... self.es = es_handler.es - self.index = conf['ElasticSearchStorage']['index'] + self.index = es_handler.index self.doc_type = '_doc' self.debug = debug diff --git a/multiscanner/common/dir_monitor.py b/multiscanner/common/dir_monitor.py index 7bb9297c..8cdfa4d5 100755 --- a/multiscanner/common/dir_monitor.py +++ b/multiscanner/common/dir_monitor.py @@ -20,7 +20,7 @@ from watchdog.observers import Observer from multiscanner import multiscan, parse_reports -from multiscanner.config import CONFIG_FILE, MS_CONFIG, get_config_path, update_ms_config_file +from multiscanner.config import CONFIG_FILE, MS_CONFIG, update_ms_config_file from multiscanner.storage import storage logger = logging.getLogger(__name__) @@ -80,8 +80,7 @@ def start_observer(directory, work_queue, recursive=False): def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, exit_signal): filelist = [] time_stamp = None - storage_conf = get_config_path('storage', config) - storage_handler = storage.StorageHandler(configfile=storage_conf) + storage_handler = storage.StorageHandler() while not exit_signal.value: time.sleep(1) try: diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index d4bcc3c2..2b3575c4 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -143,8 +143,7 @@ def default(self, obj): logger.error("Retrying...") time.sleep(db_sleep_time) -storage_conf = get_config_path('storage') -storage_handler = StorageHandler(configfile=storage_conf) +storage_handler = StorageHandler() handler = storage_handler.load_required_module('ElasticSearchStorage') ms_config_file = MS_CONFIG diff --git a/multiscanner/storage/sql_driver.py b/multiscanner/storage/sql_driver.py index 1e22fe0e..61b674c0 100644 --- a/multiscanner/storage/sql_driver.py +++ b/multiscanner/storage/sql_driver.py @@ -63,13 +63,15 @@ class Database(object): 'retry_num': 20, # Number of times to retry to connect to task database } - def __init__(self, config=None, configfile=CONFIG_FILE, regenconfig=False): + def __init__(self, config=None, configfile=None, regenconfig=False): self.db_connection_string = None self.db_engine = None # Configuration parsing config_parser = configparser.ConfigParser() config_parser.optionxform = str + if configfile is None: + configfile = CONFIG_FILE # (re)generate conf file if necessary if regenconfig or not os.path.isfile(configfile): diff --git a/multiscanner/storage/storage.py b/multiscanner/storage/storage.py index fa7eda29..5386454c 100644 --- a/multiscanner/storage/storage.py +++ b/multiscanner/storage/storage.py @@ -18,7 +18,7 @@ from multiscanner.common import utils -from multiscanner.config import MS_CONFIG, get_config_path, parse_config +from multiscanner.config import get_config_path, parse_config DEFAULTCONF = { @@ -95,34 +95,27 @@ def __init__(self, configfile=None, config=None): # Load all storage classes storage_classes = _get_storage_classes() if configfile is None: - configfile = MS_CONFIG + configfile = get_config_path('storage') # Read in config - if configfile: - configfile = get_config_path('storage') - config_object = configparser.ConfigParser() - config_object.optionxform = str - config_object.read(configfile) - if config: - file_conf = parse_config(config_object) - for key in config: - if key not in file_conf: - file_conf[key] = config[key] - file_conf[key]['_load_default'] = True - else: - file_conf[key].update(config[key]) - config = file_conf - else: - config = parse_config(config_object) + config_object = configparser.ConfigParser() + config_object.optionxform = str + config_object.read(configfile) + if config: + file_conf = parse_config(config_object) + for key in config: + if key not in file_conf: + file_conf[key] = config[key] + file_conf[key]['_load_default'] = True + else: + file_conf[key].update(config[key]) + config = file_conf else: - if config is None: - config = {} - for storage_name in storage_classes: - config[storage_name] = {} - config['_load_default'] = True + config = parse_config(config_object) - self.sleep_time = config.get('main', {}).get('retry_time', DEFAULTCONF['retry_time']) - self.num_retries = config.get('main', {}).get('retry_num', DEFAULTCONF['retry_num']) + config_main = config.get('main', {}) + self.sleep_time = config_main.get('retry_time', DEFAULTCONF['retry_time']) + self.num_retries = config_main.get('retry_num', DEFAULTCONF['retry_num']) # Set the config inside of the storage classes for storage_name in storage_classes: @@ -335,6 +328,8 @@ def _get_storage_classes(dir_path=STORAGE_DIR): dir_list = utils.parse_dir(dir_path, recursive=True) dir_list.remove(os.path.join(dir_path, 'storage.py')) # dir_list.remove(os.path.join(dir_path, '__init__.py')) + # sql_driver is not configurable in storage.ini, and is used by the api + # and celery workers rather than by the StorageHandler dir_list.remove(os.path.join(dir_path, 'sql_driver.py')) for filename in dir_list: if filename.endswith('.py'): From ae138e93eef04638910e8de72a6b49574d368b0e Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Wed, 3 Apr 2019 22:18:08 -0400 Subject: [PATCH 19/38] Consolidate _rewrite_config functions - Consolidated _rewrite_config functions from ms.py, storage.py and sql_driver.py into reset_config in config.py. - Renamed _update_DEFAULTCONF to update_paths_in_configs and put it in config.py. - The sql_driver version of the function allowed passing in a configuration to override the default values. However it was only used by api.py, which creates a sql_driver.Database instance and passes in api_config.ini's Database section config. The consolidated reset_config doesn't have a parameter for overriding a DEFAULTCONF, so api.py uses regenconfig=False on the Database constructor so the Database config section will only be regenerated if it doesn't already exist. - We never used the module_list option for config_init() in ms.py, so I removed it to simplify things. --- multiscanner/config.py | 62 +++++++++++++++++++++- multiscanner/distributed/api.py | 2 +- multiscanner/ms.py | 84 +++++++----------------------- multiscanner/storage/sql_driver.py | 34 ++++-------- multiscanner/storage/storage.py | 17 +----- 5 files changed, 92 insertions(+), 107 deletions(-) diff --git a/multiscanner/config.py b/multiscanner/config.py index f6ed2a77..d6af9785 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -8,7 +8,7 @@ import os import sys -from six import PY3 # noqa F401 +from six import PY3, iteritems # noqa F401 from multiscanner.common.utils import parse_dir logger = logging.getLogger(__name__) @@ -196,3 +196,63 @@ def update_ms_config_file(config_file): global CONFIG_FILE, MS_CONFIG CONFIG_FILE = config_file MS_CONFIG = read_config(CONFIG_FILE) + + +def update_paths_in_config(conf, filepath): + """Rewrite config values containing paths to point to a new multiscanner config directory. + """ + base_dir = os.path.split(filepath)[0] + if 'storage-config' in conf: + conf['storage-config'] = os.path.join(base_dir, 'storage.ini') + if 'api-config' in conf: + conf['api-config'] = os.path.join(base_dir, 'api_config.ini') + if 'web-config' in conf: + conf['web-config'] = os.path.join(base_dir, 'web_config.ini') + if 'ruledir' in conf: + conf['ruledir'] = os.path.join(base_dir, "etc", "yarasigs") + if 'key' in conf: + conf['key'] = os.path.join(base_dir, 'etc', 'id_rsa') + if 'hash_list' in conf: + conf['hash_list'] = os.path.join(base_dir, 'etc', 'nsrl', 'hash_list') + if 'offsets' in conf: + conf['offsets'] = os.path.join(base_dir, 'etc', 'nsrl', 'offsets') + + +def reset_config(sections, config, filepath=None): + """ + Reset specific sections of a config file to their factory defaults. + + sections - Dictionary mapping section names to the Python module containing its DEFAULTCONF + config - ConfigParser object in which to store config + filepath - Path to the config file + + Returns: + The ConfigParser object that was written to the file. + """ + if not filepath: + CONFIG_FILE + + # Read in the old config to preserve any sections not being reset + if os.path.isfile(filepath): + config.read(filepath) + + logger.info('Rewriting config at {}...'.format(filepath)) + + keys = list(sections.keys()) + keys.sort() + for section_name in keys: + try: + conf = sections[section_name].DEFAULTCONF + except Exception as e: + logger.warning(e) + continue + + update_paths_in_config(conf, filepath) + if not config.has_section(section_name): + config.add_section(section_name) + for key in conf: + config.set(section_name, key, str(conf[key])) + + with codecs.open(filepath, 'w', 'utf-8') as f: + config.write(f) + return config diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index 2b3575c4..1c709246 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -112,7 +112,7 @@ def default(self, obj): from multiscanner.distributed.celery_worker import multiscanner_celery, ssdeep_compare_celery from multiscanner.analytics.ssdeep_analytics import SSDeepAnalytic -db = database.Database(config=api_config.get('Database')) +db = database.Database(config=api_config.get('Database'), regenconfig=False) # To run under Apache, we need to set up the DB outside of __main__ # Sleep and retry until database connection is successful try: diff --git a/multiscanner/ms.py b/multiscanner/ms.py index a52f6782..84210237 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -32,9 +32,9 @@ from multiscanner.common.utils import (basename, convert_encoding, load_module, parse_file_list, queue2list) from multiscanner.config import (CONFIG_FILE, MODULE_LIST, - MS_CONFIG, PY3, determine_configuration_path, - get_config_path, update_ms_config, - update_ms_config_file) + MS_CONFIG, PY3, get_config_path, + reset_config, update_ms_config, + update_ms_config_file, update_paths_in_config) from multiscanner.storage import storage @@ -251,23 +251,6 @@ def _run_module(modname, mod, filelist, threadDict, global_module_interface, con logger.debug("{} failed check()".format(modname)) -def _update_DEFAULTCONF(defaultconf, filepath): - if 'storage-config' in defaultconf: - defaultconf['storage-config'] = filepath.replace('config.ini', 'storage.ini') - if 'api-config' in defaultconf: - defaultconf['api-config'] = filepath.replace('config.ini', 'api_config.ini') - if 'web-config' in defaultconf: - defaultconf['web-config'] = filepath.replace('config.ini', 'web_config.ini') - if 'ruledir' in defaultconf: - defaultconf['ruledir'] = os.path.join(os.path.split(filepath)[0], "etc", "yarasigs") - if 'key' in defaultconf: - defaultconf['key'] = os.path.join(os.path.split(filepath)[0], 'etc', 'id_rsa') - if 'hash_list' in defaultconf: - defaultconf['hash_list'] = os.path.join(os.path.split(filepath)[0], 'etc', 'nsrl', 'hash_list') - if 'offsets' in defaultconf: - defaultconf['offsets'] = os.path.join(os.path.split(filepath)[0], 'etc', 'nsrl', 'offsets') - - def _copy_to_share(filelist, filedic, sharedir): """ Copies files from filelist to a share and populates the filedic. Returns a @@ -366,7 +349,7 @@ def _start_module_threads(filelist, module_list, config, global_module_interface return ThreadList -def _write_missing_module_configs(config, filepath=CONFIG_FILE): +def _write_missing_module_configs(config, filepath=None): """ Write in default config for modules not in config file. Returns True if config was written, False if not. @@ -375,7 +358,9 @@ def _write_missing_module_configs(config, filepath=CONFIG_FILE): module_list - The list of modules (filenames) config - The config object """ - filepath = determine_configuration_path(filepath) + if not filepath: + filepath = CONFIG_FILE + ConfNeedsWrite = False for modname, module in sorted(six.iteritems(MODULE_LIST)): if modname not in config.keys(): @@ -395,7 +380,7 @@ def _write_missing_module_configs(config, filepath=CONFIG_FILE): if 'main' not in config.keys(): ConfNeedsWrite = True - _update_DEFAULTCONF(DEFAULTCONF, filepath) + update_paths_in_config(DEFAULTCONF, filepath) config['main'] = {} for key in DEFAULTCONF: config['main'][key] = str(DEFAULTCONF[key]) @@ -410,57 +395,26 @@ def _write_missing_module_configs(config, filepath=CONFIG_FILE): return False -def _rewrite_config(module_list, config, filepath=CONFIG_FILE): +def config_init(filepath=None): """ - Write in default config for all modules. + Creates a new config file at filepath - module_list - The list of modules - config - The config object + filepath - The config file to create """ - filepath = determine_configuration_path(filepath) - logger.info('Rewriting config...') - for modname, module in sorted(six.iteritems(module_list)): + # Compile all the sections to go in the config + module_list = {} + module_list['main'] = sys.modules[__name__] # current module + for modname, module in sorted(six.iteritems(MODULE_LIST)): moddir = module[1] mod = load_module(modname, [moddir]) if mod: - try: - conf = mod.DEFAULTCONF - except Exception as e: - logger.warning(e) - continue - _update_DEFAULTCONF(conf, filepath) - config.add_section(modname) - for key in conf: - config.set(modname, key, str(conf[key])) + module_list[modname] = mod - _update_DEFAULTCONF(DEFAULTCONF, filepath) - config.add_section('main') - for key in DEFAULTCONF: - config.set('main', key, str(DEFAULTCONF[key])) - - with codecs.open(filepath, 'w', 'utf-8') as f: - config.write(f) - - # Set global main config - update_ms_config(config) - - -def config_init(filepath, module_list=None): - """ - Creates a new config file at filepath - - filepath - The config file to create - """ - if module_list is None: - module_list = MODULE_LIST config = configparser.ConfigParser() config.optionxform = str - if filepath: - _rewrite_config(module_list, config, filepath) - else: - filepath = determine_configuration_path(filepath) - _rewrite_config(module_list, config, filepath) + reset_config(module_list, config, filepath) + update_ms_config(config) # Set global main config logger.info('Configuration file initialized at {}'.format(filepath)) @@ -848,7 +802,7 @@ def _main(): args.config = CONFIG_FILE else: update_ms_config_file(args.config) - _update_DEFAULTCONF(DEFAULTCONF, CONFIG_FILE) + update_paths_in_config(DEFAULTCONF, CONFIG_FILE) # Send all logs to stderr and set verbose if args.debug or args.verbose > 1: diff --git a/multiscanner/storage/sql_driver.py b/multiscanner/storage/sql_driver.py index 61b674c0..61d2e7c5 100644 --- a/multiscanner/storage/sql_driver.py +++ b/multiscanner/storage/sql_driver.py @@ -1,7 +1,6 @@ #!/usr/bin/env python from __future__ import print_function -import codecs import configparser import json import logging @@ -17,7 +16,7 @@ from sqlalchemy.orm import aliased, sessionmaker from sqlalchemy_utils import create_database, database_exists -from multiscanner.config import get_config_path +from multiscanner.config import get_config_path, reset_config CONFIG_FILE = get_config_path('api') @@ -73,45 +72,30 @@ def __init__(self, config=None, configfile=None, regenconfig=False): if configfile is None: configfile = CONFIG_FILE + section_name = self.__class__.__name__ # (re)generate conf file if necessary if regenconfig or not os.path.isfile(configfile): - self._rewrite_config(config_parser, configfile, config) + sections = {section_name: self} + reset_config(sections, config_parser, configfile) + # now read in and parse the conf file config_parser.read(configfile) # If we didn't regen the config file in the above check, it's possible # that the file is missing our DB settings... - if not config_parser.has_section(self.__class__.__name__): - self._rewrite_config(config_parser, configfile, config) - config_parser.read(configfile) + if not config_parser.has_section(section_name): + sections = {section_name: self} + reset_config(sections, config_parser, configfile) # If configuration was specified, use what was stored in the config file # as a base and then override specific settings as contained in the user's # config. This allows the user to specify ONLY the config settings they want to # override - config_from_file = dict(config_parser.items(self.__class__.__name__)) + config_from_file = dict(config_parser.items(section_name)) if config: for key_ in config: config_from_file[key_] = config[key_] self.config = config_from_file - def _rewrite_config(self, config_parser, configfile, usr_override_config): - """ - Regenerates the Database-specific part of the API config file - """ - if os.path.isfile(configfile): - # Read in the old config - config_parser.read(configfile) - if not config_parser.has_section(self.__class__.__name__): - config_parser.add_section(self.__class__.__name__) - if not usr_override_config: - usr_override_config = self.DEFAULTCONF - # Update config - for key_ in usr_override_config: - config_parser.set(self.__class__.__name__, key_, str(usr_override_config[key_])) - - with codecs.open(configfile, 'w', 'utf-8') as conffile: - config_parser.write(conffile) - def init_db(self): """ Initializes the database connection based on the configuration parameters diff --git a/multiscanner/storage/storage.py b/multiscanner/storage/storage.py index 5386454c..7a1fd565 100644 --- a/multiscanner/storage/storage.py +++ b/multiscanner/storage/storage.py @@ -18,7 +18,7 @@ from multiscanner.common import utils -from multiscanner.config import get_config_path, parse_config +from multiscanner.config import get_config_path, parse_config, reset_config DEFAULTCONF = { @@ -261,7 +261,7 @@ def config_init(filepath, overwrite=False, storage_classes=None): config_object.optionxform = str if overwrite or not os.path.isfile(filepath): _write_main_config(config_object) - _rewrite_config(storage_classes, config_object, filepath) + reset_config(storage_classes, config_object, filepath) else: config_object.read(filepath) _write_main_config(config_object) @@ -277,19 +277,6 @@ def _write_main_config(config_object): config_object.set('main', key, str(DEFAULTCONF[key])) -def _rewrite_config(storage_classes, config_object, filepath): - keys = list(storage_classes.keys()) - keys.sort() - for class_name in keys: - conf = storage_classes[class_name].DEFAULTCONF - config_object.add_section(class_name) - for key in conf: - config_object.set(class_name, key, str(conf[key])) - - with codecs.open(filepath, 'w', 'utf-8') as f: - config_object.write(f) - - def _write_missing_config(config_object, filepath, storage_classes=None): """ Write in default config for modules not in config file. Returns True if config was written, False if not. From 6745bb8238982c077ae83c3a88d8e96d89bb16af Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Thu, 4 Apr 2019 17:25:09 -0400 Subject: [PATCH 20/38] Fix config globals imports Have to import config globals like CONFIG_FILE as qualified names because importing them by name copies them so we end up not modifying the global. Also moved config_init and _write_missing_module_configs to make it easier to see how they were called, in preparation for consolidating them into config.py. --- multiscanner/common/dir_monitor.py | 10 +- multiscanner/distributed/api.py | 20 +- multiscanner/distributed/celery_worker.py | 14 +- multiscanner/modules/Antivirus/AVGScan.py | 4 +- multiscanner/modules/Antivirus/MSEScan.py | 4 +- multiscanner/modules/Antivirus/McAfeeScan.py | 4 +- multiscanner/modules/Database/NSRL.py | 6 +- .../modules/MachineLearning/EndgameEmber.py | 4 +- .../modules/Metadata/ExifToolsScan.py | 4 +- multiscanner/modules/Metadata/TrID.py | 4 +- multiscanner/modules/Signature/YaraScan.py | 4 +- multiscanner/ms.py | 184 +++++++++--------- multiscanner/tests/test_configs.py | 8 +- multiscanner/tests/test_module_interface.py | 2 +- 14 files changed, 138 insertions(+), 134 deletions(-) diff --git a/multiscanner/common/dir_monitor.py b/multiscanner/common/dir_monitor.py index 8cdfa4d5..f6255e83 100755 --- a/multiscanner/common/dir_monitor.py +++ b/multiscanner/common/dir_monitor.py @@ -20,7 +20,7 @@ from watchdog.observers import Observer from multiscanner import multiscan, parse_reports -from multiscanner.config import CONFIG_FILE, MS_CONFIG, update_ms_config_file +from multiscanner import config as msconf from multiscanner.storage import storage logger = logging.getLogger(__name__) @@ -116,8 +116,8 @@ def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, e def _main(): args = _parse_args() - if args.config != CONFIG_FILE: - update_ms_config_file(args.config) + if args.config != msconf.CONFIG_FILE: + msconf.update_ms_config_file(args.config) work_queue = multiprocessing.Queue() exit_signal = multiprocessing.Value('b') @@ -125,7 +125,7 @@ def _main(): observer = start_observer(args.Directory, work_queue, args.recursive) ms_process = multiprocessing.Process( target=multiscanner_process, - args=(work_queue, MS_CONFIG, args.batch, args.seconds, args.delete, exit_signal)) + args=(work_queue, msconf.MS_CONFIG, args.batch, args.seconds, args.delete, exit_signal)) ms_process.start() try: while True: @@ -142,7 +142,7 @@ def _main(): def _parse_args(): parser = argparse.ArgumentParser(description='Monitor a directory and submit new files to MultiScanner') parser.add_argument("-c", "--config", help="The config file to use", required=False, - default=CONFIG_FILE) + default=msconf.CONFIG_FILE) parser.add_argument("-s", "--seconds", help="The number of seconds to wait for additional files", required=False, default=120, type=int) parser.add_argument("-b", "--batch", help="The max number of files per batch", required=False, diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index 1c709246..0989af41 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -60,7 +60,7 @@ from flask_cors import CORS from jinja2 import Markup -from multiscanner import CONFIG_FILE, MODULE_LIST, MS_CONFIG, MS_WD, multiscan, parse_reports +import multiscanner as ms from multiscanner.common import pdf_generator, stix2_generator from multiscanner.config import PY3, get_config_path, read_config from multiscanner.storage import StorageHandler @@ -146,7 +146,7 @@ def default(self, obj): storage_handler = StorageHandler() handler = storage_handler.load_required_module('ElasticSearchStorage') -ms_config_file = MS_CONFIG +ms_config_file = ms.MS_CONFIG ms_config = read_config(ms_config_file) try: @@ -198,12 +198,12 @@ def multiscanner_process(work_queue, exit_signal): for item in metadata_list: filelist = [item[0]] module_list = item[5] - resultlist = multiscan( + resultlist = ms.multiscan( filelist, - config=MS_CONFIG, + config=ms.MS_CONFIG, module_list=module_list ) - results = parse_reports(resultlist, python=True) + results = ms.parse_reports(resultlist, python=True) scan_time = datetime.now().isoformat() @@ -262,7 +262,7 @@ def modules(): Return a list of module names available for MultiScanner to use, and whether or not they are enabled in the config. ''' - return jsonify({name: mod[0] for (name, mod) in MODULE_LIST.items()}) + return jsonify({name: mod[0] for (name, mod) in ms.MODULE_LIST.items()}) @app.route('/api/v1/tasks', methods=['GET']) @@ -369,7 +369,7 @@ def save_hashed_filename(f, zipped=False): # TODO: should we check if the file is already there # and skip this step if it is? file_path = os.path.join(api_config['api']['upload_folder'], f_name) - full_path = os.path.join(MS_WD, file_path) + full_path = os.path.join(ms.MS_WD, file_path) if zipped: shutil.copy2(f.name, full_path) else: @@ -421,7 +421,7 @@ def queue_task(original_filename, f_name, full_path, metadata, rescan=False, mod # Publish the task to Celery multiscanner_celery.delay(full_path, original_filename, task_id, f_name, metadata, - config=MS_CONFIG, module_list=module_list) + config=ms.MS_CONFIG, module_list=module_list) else: # Put the task on the queue work_queue.put((full_path, original_filename, task_id, f_name, metadata, module_list)) @@ -481,7 +481,7 @@ def create_task(): # Elasticsearch won't work without it # TODO: Don't let users enable/disable SHA256 module? module_names.append('SHA256') - modules = list(set(module_names).intersection(MODULE_LIST.keys())) + modules = list(set(module_names).intersection(ms.MODULE_LIST.keys())) # files = utils.parse_dir(MODULES_DIR, True) # modules = [] @@ -995,7 +995,7 @@ def get_pdf_report(task_id): if not success: return jsonify(report_dict) - pdf = pdf_generator.create_pdf_document(CONFIG_FILE, report_dict) + pdf = pdf_generator.create_pdf_document(ms.CONFIG_FILE, report_dict) response = make_response(pdf) response.headers['Content-Type'] = 'application/pdf' response.headers['Content-Disposition'] = 'attachment; filename=%s.pdf' % task_id diff --git a/multiscanner/distributed/celery_worker.py b/multiscanner/distributed/celery_worker.py index f949b399..ae29949e 100644 --- a/multiscanner/distributed/celery_worker.py +++ b/multiscanner/distributed/celery_worker.py @@ -12,7 +12,7 @@ from celery.utils.log import get_task_logger from multiscanner import multiscan, parse_reports -from multiscanner.config import MS_CONFIG, get_config_path, read_config +from multiscanner import config as msconf from multiscanner.storage import elasticsearch_storage, storage from multiscanner.storage import sql_driver as database from multiscanner.analytics.ssdeep_analytics import SSDeepAnalytic @@ -31,13 +31,13 @@ 'tz': 'US/Eastern', } -configfile = get_config_path('api') -config = read_config(configfile, 'celery', DEFAULTCONF) +configfile = msconf.get_config_path('api') +config = msconf.read_config(configfile, 'celery', DEFAULTCONF) worker_config = config.get('celery') db_config = config.get('Database') -storage_configfile = get_config_path('storage') -storage_config = read_config(storage_configfile) +storage_configfile = msconf.get_config_path('storage') +storage_config = msconf.read_config(storage_configfile) es_storage_config = storage_config.get('ElasticSearchStorage') app = Celery(broker='{0}://{1}:{2}@{3}/{4}'.format( @@ -115,8 +115,8 @@ def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, # Get the storage config if config is None: - config = MS_CONFIG - storage_conf = get_config_path('storage', config) + config = msconf.MS_CONFIG + storage_conf = msconf.get_config_path('storage', config) storage_handler = storage.StorageHandler(configfile=storage_conf) resultlist = multiscan( diff --git a/multiscanner/modules/Antivirus/AVGScan.py b/multiscanner/modules/Antivirus/AVGScan.py index 90d18c6b..36f982d2 100644 --- a/multiscanner/modules/Antivirus/AVGScan.py +++ b/multiscanner/modules/Antivirus/AVGScan.py @@ -7,7 +7,7 @@ import subprocess import re -from multiscanner.config import CONFIG_FILE +import multiscanner as ms from multiscanner.common.utils import list2cmdline, sshexec, SSH subprocess.list2cmdline = list2cmdline @@ -21,7 +21,7 @@ # Hostname, port, username HOST = ("MultiScanner", 22, "User") # SSH Key -KEY = os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" DEFAULTCONF = { diff --git a/multiscanner/modules/Antivirus/MSEScan.py b/multiscanner/modules/Antivirus/MSEScan.py index 2187070b..f242a989 100644 --- a/multiscanner/modules/Antivirus/MSEScan.py +++ b/multiscanner/modules/Antivirus/MSEScan.py @@ -6,7 +6,7 @@ import os import subprocess -from multiscanner.config import CONFIG_FILE +import multiscanner as ms from multiscanner.common.utils import list2cmdline, sshconnect, SSH subprocess.list2cmdline = list2cmdline @@ -18,7 +18,7 @@ NAME = "Microsoft Security Essentials" # These are overwritten by the config file # SSH Key -KEY = os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" HOST = ("MultiScanner", 22, "User") diff --git a/multiscanner/modules/Antivirus/McAfeeScan.py b/multiscanner/modules/Antivirus/McAfeeScan.py index ddc85a79..dc6b6e44 100644 --- a/multiscanner/modules/Antivirus/McAfeeScan.py +++ b/multiscanner/modules/Antivirus/McAfeeScan.py @@ -7,7 +7,7 @@ import subprocess import re -from multiscanner.config import CONFIG_FILE +import multiscanner as ms from multiscanner.common.utils import list2cmdline, sshexec, SSH subprocess.list2cmdline = list2cmdline @@ -19,7 +19,7 @@ NAME = "McAfee" # These are overwritten by the config file # SSH Key -KEY = os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" HOST = ("MultiScanner", 22, "User") diff --git a/multiscanner/modules/Database/NSRL.py b/multiscanner/modules/Database/NSRL.py index 4e244472..1341bb44 100755 --- a/multiscanner/modules/Database/NSRL.py +++ b/multiscanner/modules/Database/NSRL.py @@ -8,7 +8,7 @@ import os import struct -from multiscanner.config import CONFIG_FILE +import multiscanner as ms __author__ = "Drew Bonasera" __license__ = "MPL 2.0" @@ -19,8 +19,8 @@ REQUIRES = ["SHA1", "MD5"] DEFAULTCONF = { - 'hash_list': os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'nsrl', 'hash_list'), - 'offsets': os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'nsrl', 'offsets'), + 'hash_list': os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'nsrl', 'hash_list'), + 'offsets': os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'nsrl', 'offsets'), 'ENABLED': True } diff --git a/multiscanner/modules/MachineLearning/EndgameEmber.py b/multiscanner/modules/MachineLearning/EndgameEmber.py index 48b8aaba..79e2cf98 100644 --- a/multiscanner/modules/MachineLearning/EndgameEmber.py +++ b/multiscanner/modules/MachineLearning/EndgameEmber.py @@ -19,7 +19,7 @@ import os from pathlib import Path -from multiscanner import CONFIG_FILE +import multiscanner as ms __authors__ = "Patrick Copeland" @@ -30,7 +30,7 @@ REQUIRES = ['libmagic'] DEFAULTCONF = { 'ENABLED': False, - 'path-to-model': os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'ember', 'ember_model_2017.txt'), + 'path-to-model': os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'ember', 'ember_model_2017.txt'), } LGBM_MODEL = None diff --git a/multiscanner/modules/Metadata/ExifToolsScan.py b/multiscanner/modules/Metadata/ExifToolsScan.py index acf59e0c..d5a4f273 100644 --- a/multiscanner/modules/Metadata/ExifToolsScan.py +++ b/multiscanner/modules/Metadata/ExifToolsScan.py @@ -8,7 +8,7 @@ import subprocess import re -from multiscanner.config import CONFIG_FILE +import multiscanner as ms from multiscanner.common.utils import list2cmdline, sshexec, SSH subprocess.list2cmdline = list2cmdline @@ -20,7 +20,7 @@ NAME = "ExifTool" # These are overwritten by the config file HOST = ("MultiScanner", 22, "User") -KEY = os.path.join(os.path.split(CONFIG_FILE)[0], "etc", "id_rsa") +KEY = os.path.join(os.path.split(ms.CONFIG_FILE)[0], "etc", "id_rsa") PATHREPLACE = "X:\\" # Entries to be removed from the final results REMOVEENTRY = ["ExifTool Version Number", "File Name", "Directory", "File Modification Date/Time", diff --git a/multiscanner/modules/Metadata/TrID.py b/multiscanner/modules/Metadata/TrID.py index ac3197b9..3973a94a 100644 --- a/multiscanner/modules/Metadata/TrID.py +++ b/multiscanner/modules/Metadata/TrID.py @@ -8,7 +8,7 @@ import subprocess import re -from multiscanner.config import CONFIG_FILE +import multiscanner as ms from multiscanner.common.utils import list2cmdline, sshexec, SSH logger = logging.getLogger(__name__) @@ -24,7 +24,7 @@ # Hostname, port, username HOST = ("MultiScanner", 22, "User") # SSH Key -KEY = os.path.join(os.path.split(CONFIG_FILE)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" DEFAULTCONF = { diff --git a/multiscanner/modules/Signature/YaraScan.py b/multiscanner/modules/Signature/YaraScan.py index a657c764..3c554665 100644 --- a/multiscanner/modules/Signature/YaraScan.py +++ b/multiscanner/modules/Signature/YaraScan.py @@ -7,7 +7,7 @@ import os import time -from multiscanner.config import CONFIG_FILE +import multiscanner as ms from multiscanner.common.utils import parse_dir @@ -17,7 +17,7 @@ TYPE = "Signature" NAME = "Yara" DEFAULTCONF = { - "ruledir": os.path.join(os.path.split(CONFIG_FILE)[0], "etc", "yarasigs"), + "ruledir": os.path.join(os.path.split(ms.CONFIG_FILE)[0], "etc", "yarasigs"), "fileextensions": [".yar", ".yara", ".sig"], "ignore-tags": ["TLPRED"], "includes": False, diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 84210237..8a002436 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -31,8 +31,8 @@ from multiscanner.version import __version__ as MS_VERSION from multiscanner.common.utils import (basename, convert_encoding, load_module, parse_file_list, queue2list) -from multiscanner.config import (CONFIG_FILE, MODULE_LIST, - MS_CONFIG, PY3, get_config_path, +from multiscanner import config as msconf +from multiscanner.config import (PY3, get_config_path, reset_config, update_ms_config, update_ms_config_file, update_paths_in_config) from multiscanner.storage import storage @@ -42,9 +42,9 @@ DEFAULTCONF = { "copyfilesto": False, "group-types": ["Antivirus"], - "storage-config": CONFIG_FILE.replace('config.ini', 'storage.ini'), - "api-config": CONFIG_FILE.replace('config.ini', 'api_config.ini'), - "web-config": CONFIG_FILE.replace('config.ini', 'web_config.ini'), + "storage-config": msconf.CONFIG_FILE.replace('config.ini', 'storage.ini'), + "api-config": msconf.CONFIG_FILE.replace('config.ini', 'api_config.ini'), + "web-config": msconf.CONFIG_FILE.replace('config.ini', 'web_config.ini'), } logger = logging.getLogger(__name__) @@ -305,9 +305,9 @@ def _start_module_threads(filelist, module_list, config, global_module_interface # TODO: What if the module isn't specified in the config try: - moddir = MODULE_LIST[modname][1] + moddir = msconf.MODULE_LIST[modname][1] except KeyError: - logger.warning(MODULE_LIST) + logger.warning(msconf.MODULE_LIST) logger.warning("{} not a valid module...".format(modname)) continue @@ -349,75 +349,6 @@ def _start_module_threads(filelist, module_list, config, global_module_interface return ThreadList -def _write_missing_module_configs(config, filepath=None): - """ - Write in default config for modules not in config file. Returns True if config was written, False if not. - - Also adds a '[main]' section if not present. - - module_list - The list of modules (filenames) - config - The config object - """ - if not filepath: - filepath = CONFIG_FILE - - ConfNeedsWrite = False - for modname, module in sorted(six.iteritems(MODULE_LIST)): - if modname not in config.keys(): - moddir = module[1] - mod = load_module(modname, [moddir]) - if mod: - try: - conf = mod.DEFAULTCONF - except Exception as e: - logger.warning(e) - continue - if modname not in config.keys(): - ConfNeedsWrite = True - config[modname] = {} - for key in conf: - config[modname][key] = str(conf[key]) - - if 'main' not in config.keys(): - ConfNeedsWrite = True - update_paths_in_config(DEFAULTCONF, filepath) - config['main'] = {} - for key in DEFAULTCONF: - config['main'][key] = str(DEFAULTCONF[key]) - - if ConfNeedsWrite: - config_object = configparser.ConfigParser() - config_object.optionxform = str - config_object.read_dict(config) - with codecs.open(filepath, 'w', 'utf-8') as f: - config_object.write(f) - return True - return False - - -def config_init(filepath=None): - """ - Creates a new config file at filepath - - filepath - The config file to create - """ - # Compile all the sections to go in the config - module_list = {} - module_list['main'] = sys.modules[__name__] # current module - for modname, module in sorted(six.iteritems(MODULE_LIST)): - moddir = module[1] - mod = load_module(modname, [moddir]) - if mod: - module_list[modname] = mod - - config = configparser.ConfigParser() - config.optionxform = str - - reset_config(module_list, config, filepath) - update_ms_config(config) # Set global main config - logger.info('Configuration file initialized at {}'.format(filepath)) - - def parse_reports(resultlist, groups=None, ugly=True, includeMetadata=False, python=False): """Turn report dictionaries into json output. Returns a string. @@ -481,7 +412,7 @@ def multiscan(Files, config=None, module_list=None): filelist = Files # A list of files in the module dir if module_list is None: - module_list = [modname for modname in MODULE_LIST] + module_list = [modname for modname in msconf.MODULE_LIST] # A dictionary used for the copyfileto parameter filedic = {} @@ -721,7 +652,7 @@ def _parse_args(): Parses arguments """ # argparse stuff - desc = "MultiScanner v{} - Analyse files against multiple engines" + desc = "MultiScanner v{} - Analyze files against multiple engines" parser = argparse.ArgumentParser(description=desc.format(MS_VERSION)) parser.add_argument("-c", "--config", required=False, default=None, help="The config file to use") @@ -750,11 +681,83 @@ def _parse_args(): parser.add_argument("--resume", action="store_true", help="Read in the report file and continue where we left off") parser.add_argument('Files', nargs='+', - help="Files and Directories to analyse") + help="Files and Directories to analyze") return parser.parse_args() +def _write_missing_module_configs(config, filepath=None): + """ + Write in default config for modules not in config file. Returns True if config was written, False if not. + + Also adds a '[main]' section if not present. + + module_list - The list of modules (filenames) + config - The config object + """ + if not filepath: + filepath = msconf.CONFIG_FILE + + ConfNeedsWrite = False + for modname, module in sorted(six.iteritems(msconf.MODULE_LIST)): + if modname not in config.keys(): + moddir = module[1] + mod = load_module(modname, [moddir]) + if mod: + try: + conf = mod.DEFAULTCONF + except Exception as e: + logger.warning(e) + continue + if modname not in config.keys(): + ConfNeedsWrite = True + config[modname] = {} + for key in conf: + config[modname][key] = str(conf[key]) + + if 'main' not in config.keys(): + ConfNeedsWrite = True + update_paths_in_config(DEFAULTCONF, filepath) + config['main'] = {} + for key in DEFAULTCONF: + config['main'][key] = str(DEFAULTCONF[key]) + + if ConfNeedsWrite: + config_object = configparser.ConfigParser() + config_object.optionxform = str + config_object.read_dict(config) + with codecs.open(filepath, 'w', 'utf-8') as f: + config_object.write(f) + return True + return False + + +def config_init(filepath=None): + """ + Creates a new config file at filepath + + filepath - The config file to create + """ + # Compile all the sections to go in the config + module_list = {} + module_list['main'] = sys.modules[__name__] # current module + for modname, module in sorted(six.iteritems(msconf.MODULE_LIST)): + moddir = module[1] + mod = load_module(modname, [moddir]) + if mod: + module_list[modname] = mod + + config = configparser.ConfigParser() + config.optionxform = str + + reset_config(module_list, config, filepath) + update_ms_config(config) # Set global main config + logger.info('Configuration file initialized at {}'.format(filepath)) + + def _init(args): + if args.config is None: + args.config = msconf.CONFIG_FILE + # Initialize configuration file if os.path.isfile(args.config): logger.warning('{} already exists, overwriting will destroy changes'.format(args.config)) @@ -767,7 +770,7 @@ def _init(args): config_init(args.config) else: logger.info('Checking for missing modules in configuration...') - config = MS_CONFIG # MS_CONFIG will already have been set in main() + config = msconf.MS_CONFIG # MS_CONFIG will already have been set in main() _write_missing_module_configs(config, filepath=args.config) else: config_init(args.config) @@ -797,12 +800,6 @@ def _init(args): def _main(): # Get args args = _parse_args() - # Set config or update locations - if args.config is None: - args.config = CONFIG_FILE - else: - update_ms_config_file(args.config) - update_paths_in_config(DEFAULTCONF, CONFIG_FILE) # Send all logs to stderr and set verbose if args.debug or args.verbose > 1: @@ -819,15 +816,22 @@ def _main(): logging.basicConfig(format="%(asctime)s [%(module)s] %(levelname)s: %(message)s", stream=sys.stderr, level=log_lvl) - # Checks if user is trying to initialize + # Check if user is trying to initialize if str(args.Files) == "['init']" and not os.path.isfile('init'): _init(args) + # Set config or update locations + if args.config is None: + args.config = msconf.CONFIG_FILE + else: + update_ms_config_file(args.config) + update_paths_in_config(DEFAULTCONF, msconf.CONFIG_FILE) + if not os.path.isfile(args.config): config_init(args.config) else: # Write the default configure settings for any missing modules - _write_missing_module_configs(MS_CONFIG, filepath=CONFIG_FILE) + _write_missing_module_configs(msconf.MS_CONFIG, filepath=msconf.CONFIG_FILE) # Make sure report is not a dir if args.json: @@ -887,13 +891,13 @@ def _main(): starttime = str(datetime.datetime.now()) # Run the multiscan - results = multiscan(filelist, config=MS_CONFIG) + results = multiscan(filelist, config=msconf.MS_CONFIG) # We need to read in the config for the parseReports call config = configparser.ConfigParser() config.optionxform = str config.read(args.config) - config = MS_CONFIG['main'] + config = msconf.MS_CONFIG['main'] # Make sure we have a group-types if "group-types" not in config or not config["group-types"]: config["group-types"] = [] diff --git a/multiscanner/tests/test_configs.py b/multiscanner/tests/test_configs.py index 41b2bbb2..8242b2a8 100644 --- a/multiscanner/tests/test_configs.py +++ b/multiscanner/tests/test_configs.py @@ -14,7 +14,7 @@ module_list = ['test_conf'] -@mock.patch('multiscanner.ms.MODULE_LIST', mock_modlist) +@mock.patch('multiscanner.config.MODULE_LIST', mock_modlist) def test_no_config(): results, metadata = multiscanner.multiscan( filelist, config=None, @@ -22,7 +22,7 @@ def test_no_config(): assert metadata['conf'] == {'a': 'b', 'c': 'd'} -@mock.patch('multiscanner.ms.MODULE_LIST', mock_modlist) +@mock.patch('multiscanner.config.MODULE_LIST', mock_modlist) def test_config_api_no_file(): config = {'test_conf': {'a': 'z'}} results, metadata = multiscanner.multiscan( @@ -31,7 +31,7 @@ def test_config_api_no_file(): assert metadata['conf'] == {'a': 'z', 'c': 'd'} -@mock.patch('multiscanner.ms.MODULE_LIST', mock_modlist) +@mock.patch('multiscanner.config.MODULE_LIST', mock_modlist) def test_config_api_with_empty_file(): config = {'test_conf': {'a': 'z'}} config_file = tempfile.mkstemp()[1] @@ -43,7 +43,7 @@ def test_config_api_with_empty_file(): assert metadata['conf'] == {'a': 'z', 'c': 'd'} -@mock.patch('multiscanner.ms.MODULE_LIST', mock_modlist) +@mock.patch('multiscanner.config.MODULE_LIST', mock_modlist) def test_config_api_with_real_file(): config = {'test_conf': {'a': 'z'}} config_file = tempfile.mkstemp()[1] diff --git a/multiscanner/tests/test_module_interface.py b/multiscanner/tests/test_module_interface.py index e3b26289..df5882af 100644 --- a/multiscanner/tests/test_module_interface.py +++ b/multiscanner/tests/test_module_interface.py @@ -12,7 +12,7 @@ def add_int(x, y): return x + y -@mock.patch('multiscanner.ms.MODULE_LIST', mock_modlist) +@mock.patch('multiscanner.config.MODULE_LIST', mock_modlist) def test_subscan(): m = multiscanner.multiscan( ['fake.zip'], From ced6d757d29490d5b422f5fd0dc53499e21e00d9 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Thu, 11 Apr 2019 13:01:05 -0400 Subject: [PATCH 21/38] Consolidate config functions into config.py Namely, _get_main_config, _write_missing_module_config, _rewrite_config. --- multiscanner/__init__.py | 6 +- multiscanner/config.py | 52 ++++++++++++++ multiscanner/ms.py | 96 +++++++------------------ multiscanner/storage/__init__.py | 2 +- multiscanner/storage/storage.py | 59 +-------------- multiscanner/tests/test_configs.py | 3 +- multiscanner/tests/test_multiscanner.py | 2 +- 7 files changed, 84 insertions(+), 136 deletions(-) diff --git a/multiscanner/__init__.py b/multiscanner/__init__.py index 61d2c1d7..770c52ce 100644 --- a/multiscanner/__init__.py +++ b/multiscanner/__init__.py @@ -5,12 +5,12 @@ from .config import ( # noqa F401 CONFIG_FILE, MS_WD, MS_CONFIG, MODULES_DIR, MODULE_LIST, PY3, - update_ms_config, update_ms_config_file + config_init, update_ms_config, update_ms_config_file ) from .ms import ( # noqa F401 - config_init, multiscan, parse_reports, _ModuleInterface, - _GlobalModuleInterface, _Thread, _run_module, _main + multiscan, parse_reports, _ModuleInterface, + _GlobalModuleInterface, _Thread, _run_module, _main, _get_main_modules ) from .version import __version__ # noqa F401 diff --git a/multiscanner/config.py b/multiscanner/config.py index d6af9785..b6f2ebc8 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -218,6 +218,58 @@ def update_paths_in_config(conf, filepath): conf['offsets'] = os.path.join(base_dir, 'etc', 'nsrl', 'offsets') +def config_init(filepath, sections, overwrite=False): + """ + Creates a new config file at filepath + + filepath - The config file to create + sections - Dictionary mapping section names to the Python module containing its DEFAULTCONF + overwrite - Whether to overwrite the config file at filepath, if it already exists + """ + + config = configparser.ConfigParser() + config.optionxform = str + + if overwrite or not os.path.isfile(filepath): + return reset_config(sections, config, filepath) + else: + config.read(filepath) + write_missing_config(sections, config, filepath) + return config + + +def write_missing_config(sections, config_object, filepath): + """ + Write in default config for modules not in config file. Returns True if config was written, False if not. + + config_object - The config object + filepath - The path to the config file + sections - Dictionary mapping section names to the Python module containing its DEFAULTCONF + """ + ConfNeedsWrite = False + keys = list(sections.keys()) + keys.sort() + for section_name in keys: + if section_name in config_object: + continue + try: + conf = sections[section_name].DEFAULTCONF + except Exception as e: + logger.warning(e) + continue + ConfNeedsWrite = True + update_paths_in_config(conf, filepath) + config_object.add_section(section_name) + for key in conf: + config_object.set(section_name, key, str(conf[key])) + + if ConfNeedsWrite: + with codecs.open(filepath, 'w', 'utf-8') as f: + config_object.write(f) + return True + return False + + def reset_config(sections, config, filepath=None): """ Reset specific sections of a config file to their factory defaults. diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 8a002436..acfd8c43 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -32,9 +32,9 @@ from multiscanner.common.utils import (basename, convert_encoding, load_module, parse_file_list, queue2list) from multiscanner import config as msconf -from multiscanner.config import (PY3, get_config_path, - reset_config, update_ms_config, - update_ms_config_file, update_paths_in_config) +from multiscanner.config import (PY3, config_init, get_config_path, + update_ms_config, update_ms_config_file, + update_paths_in_config, write_missing_config) from multiscanner.storage import storage @@ -685,59 +685,7 @@ def _parse_args(): return parser.parse_args() -def _write_missing_module_configs(config, filepath=None): - """ - Write in default config for modules not in config file. Returns True if config was written, False if not. - - Also adds a '[main]' section if not present. - - module_list - The list of modules (filenames) - config - The config object - """ - if not filepath: - filepath = msconf.CONFIG_FILE - - ConfNeedsWrite = False - for modname, module in sorted(six.iteritems(msconf.MODULE_LIST)): - if modname not in config.keys(): - moddir = module[1] - mod = load_module(modname, [moddir]) - if mod: - try: - conf = mod.DEFAULTCONF - except Exception as e: - logger.warning(e) - continue - if modname not in config.keys(): - ConfNeedsWrite = True - config[modname] = {} - for key in conf: - config[modname][key] = str(conf[key]) - - if 'main' not in config.keys(): - ConfNeedsWrite = True - update_paths_in_config(DEFAULTCONF, filepath) - config['main'] = {} - for key in DEFAULTCONF: - config['main'][key] = str(DEFAULTCONF[key]) - - if ConfNeedsWrite: - config_object = configparser.ConfigParser() - config_object.optionxform = str - config_object.read_dict(config) - with codecs.open(filepath, 'w', 'utf-8') as f: - config_object.write(f) - return True - return False - - -def config_init(filepath=None): - """ - Creates a new config file at filepath - - filepath - The config file to create - """ - # Compile all the sections to go in the config +def _get_main_modules(): module_list = {} module_list['main'] = sys.modules[__name__] # current module for modname, module in sorted(six.iteritems(msconf.MODULE_LIST)): @@ -745,20 +693,17 @@ def config_init(filepath=None): mod = load_module(modname, [moddir]) if mod: module_list[modname] = mod - - config = configparser.ConfigParser() - config.optionxform = str - - reset_config(module_list, config, filepath) - update_ms_config(config) # Set global main config - logger.info('Configuration file initialized at {}'.format(filepath)) + return module_list def _init(args): + # Initialize configuration file if args.config is None: args.config = msconf.CONFIG_FILE - # Initialize configuration file + # Compile all the sections to go in the config + module_list = _get_main_modules() + if os.path.isfile(args.config): logger.warning('{} already exists, overwriting will destroy changes'.format(args.config)) try: @@ -767,16 +712,22 @@ def _init(args): logger.warn(e) answer = 'N' if answer == 'y': - config_init(args.config) + config = config_init(args.config, module_list) + update_ms_config(config) # Set global main config + logger.info('Main configuration file initialized at {}'.format(args.config)) else: - logger.info('Checking for missing modules in configuration...') + logger.info('Checking for missing modules in main configuration...') config = msconf.MS_CONFIG # MS_CONFIG will already have been set in main() - _write_missing_module_configs(config, filepath=args.config) + write_missing_config(module_list, config, args.config) else: - config_init(args.config) + config = config_init(args.config, module_list) + update_ms_config(config) # Set global main config + logger.info('Main configuration file initialized at {}'.format(args.config)) # Init storage storage_config = get_config_path('storage') + storage_classes = storage._get_storage_classes() + storage_classes['main'] = sys.modules[storage.__name__] if os.path.isfile(storage_config): logger.warning('{} already exists, overwriting will destroy changes'.format(storage_config)) try: @@ -785,13 +736,13 @@ def _init(args): logger.warn(e) answer = 'N' if answer == 'y': - storage.config_init(storage_config, overwrite=True) + config_init(storage_config, storage_classes, overwrite=True) logger.info('Storage configuration file initialized at {}'.format(storage_config)) else: logger.info('Checking for missing modules in storage configuration...') - storage.config_init(storage_config, overwrite=False) + config_init(storage_config, storage_classes, overwrite=False) else: - storage.config_init(storage_config) + config_init(storage_config, storage_classes) logger.info('Storage configuration file initialized at {}'.format(storage_config)) exit(0) @@ -831,7 +782,8 @@ def _main(): config_init(args.config) else: # Write the default configure settings for any missing modules - _write_missing_module_configs(msconf.MS_CONFIG, filepath=msconf.CONFIG_FILE) + module_list = _get_main_modules() + write_missing_config(module_list, msconf.MS_CONFIG, msconf.CONFIG_FILE) # Make sure report is not a dir if args.json: diff --git a/multiscanner/storage/__init__.py b/multiscanner/storage/__init__.py index 30410fc0..40ce5430 100644 --- a/multiscanner/storage/__init__.py +++ b/multiscanner/storage/__init__.py @@ -1,3 +1,3 @@ -from .storage import config_init, Storage, StorageHandler +from .storage import Storage, StorageHandler __all__ = ['config_init', 'Storage', 'StorageHandler', ] diff --git a/multiscanner/storage/storage.py b/multiscanner/storage/storage.py index 7a1fd565..be5471fd 100644 --- a/multiscanner/storage/storage.py +++ b/multiscanner/storage/storage.py @@ -4,7 +4,6 @@ from __future__ import (absolute_import, division, unicode_literals, with_statement) -import codecs import configparser import inspect import logging @@ -18,7 +17,7 @@ from multiscanner.common import utils -from multiscanner.config import get_config_path, parse_config, reset_config +from multiscanner.config import get_config_path, parse_config DEFAULTCONF = { @@ -254,62 +253,6 @@ def is_done(self, wait=False): return self.storage_counter.is_done() -def config_init(filepath, overwrite=False, storage_classes=None): - if storage_classes is None: - storage_classes = _get_storage_classes() - config_object = configparser.ConfigParser() - config_object.optionxform = str - if overwrite or not os.path.isfile(filepath): - _write_main_config(config_object) - reset_config(storage_classes, config_object, filepath) - else: - config_object.read(filepath) - _write_main_config(config_object) - _write_missing_config(config_object, filepath, storage_classes=storage_classes) - - -def _write_main_config(config_object): - """Write default config for storage config's [main] section - """ - if not config_object.has_section('main'): - config_object.add_section('main') - for key in DEFAULTCONF: - config_object.set('main', key, str(DEFAULTCONF[key])) - - -def _write_missing_config(config_object, filepath, storage_classes=None): - """ - Write in default config for modules not in config file. Returns True if config was written, False if not. - - config_object - The config object - filepath - The path to the config file - storage_classes - The dictionary object from _get_storage_classes. If None we call _get_storage_classes() - """ - if storage_classes is None: - storage_classes = _get_storage_classes() - ConfNeedsWrite = False - keys = list(storage_classes.keys()) - keys.sort() - for module in keys: - if module in config_object: - continue - try: - conf = module.DEFAULTCONF - except Exception as e: - logger.warning(e) - continue - ConfNeedsWrite = True - config_object.add_section(module) - for key in conf: - config_object.set(module, key, str(conf[key])) - - if ConfNeedsWrite: - with codecs.open(filepath, 'w', 'utf-8') as f: - config_object.write(f) - return True - return False - - def _get_storage_classes(dir_path=STORAGE_DIR): storage_classes = {} dir_list = utils.parse_dir(dir_path, recursive=True) diff --git a/multiscanner/tests/test_configs.py b/multiscanner/tests/test_configs.py index 8242b2a8..45f27364 100644 --- a/multiscanner/tests/test_configs.py +++ b/multiscanner/tests/test_configs.py @@ -47,7 +47,8 @@ def test_config_api_with_empty_file(): def test_config_api_with_real_file(): config = {'test_conf': {'a': 'z'}} config_file = tempfile.mkstemp()[1] - multiscanner.config_init(config_file) + module_list = multiscanner._get_main_modules() + multiscanner.config_init(config_file, module_list) multiscanner.update_ms_config_file(config_file) results, metadata = multiscanner.multiscan( filelist, config=config, diff --git a/multiscanner/tests/test_multiscanner.py b/multiscanner/tests/test_multiscanner.py index b48ba8f0..3b0819d8 100644 --- a/multiscanner/tests/test_multiscanner.py +++ b/multiscanner/tests/test_multiscanner.py @@ -16,7 +16,7 @@ def setup_class(cls): multiscanner.MODULES_DIR = os.path.join(CWD, "modules") cls.filelist = utils.parse_dir(os.path.join(CWD, 'files')) config_file = '.tmpfile.ini' - multiscanner.config_init(config_file) + multiscanner.config_init(config_file, multiscanner._get_main_modules()) multiscanner.update_ms_config_file(config_file) @classmethod From 798369d9ad9d6abca2391c963933e78e8cabc58c Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Fri, 12 Apr 2019 14:11:05 -0400 Subject: [PATCH 22/38] Fix API test due to filemeta module --- multiscanner/tests/test_api.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/multiscanner/tests/test_api.py b/multiscanner/tests/test_api.py index 66086361..d6480256 100644 --- a/multiscanner/tests/test_api.py +++ b/multiscanner/tests/test_api.py @@ -89,9 +89,8 @@ def test_create_first_task(self): def test_get_modules(self): resp = self.app.get('/api/v1/modules').get_data().decode('utf-8') self.assertIn('AVGScan', resp) - self.assertIn('MD5', resp) - self.assertIn('SHA256', resp) - self.assertIn('libmagic', resp) + self.assertIn('filemeta', resp) + self.assertIn('Cuckoo', resp) class TestTaskCreateCase(APITestCase): From d526d36fbe5a3b038694638ed5e271bd8d526e90 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Tue, 23 Apr 2019 11:00:10 -0400 Subject: [PATCH 23/38] Update tests --- multiscanner/__init__.py | 3 +- multiscanner/modules/antivirus/AVGScan.py | 2 +- multiscanner/modules/antivirus/MSEScan.py | 2 +- multiscanner/modules/antivirus/McAfeeScan.py | 2 +- multiscanner/modules/database/NSRL.py | 4 +- .../modules/machinelearning/EndgameEmber.py | 2 +- .../modules/metadata/ExifToolsScan.py | 2 +- multiscanner/modules/metadata/TrID.py | 2 +- multiscanner/modules/signature/YaraScan.py | 2 +- multiscanner/ms.py | 3 +- multiscanner/tests/modules/test_1.py | 7 +- multiscanner/tests/modules/test_subscan.py | 7 +- multiscanner/tests/test_multiscanner.py | 67 +++++++++++++++---- 13 files changed, 77 insertions(+), 28 deletions(-) diff --git a/multiscanner/__init__.py b/multiscanner/__init__.py index 770c52ce..b7e8e9ad 100644 --- a/multiscanner/__init__.py +++ b/multiscanner/__init__.py @@ -4,8 +4,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. from .config import ( # noqa F401 - CONFIG_FILE, MS_WD, MS_CONFIG, MODULES_DIR, MODULE_LIST, PY3, - config_init, update_ms_config, update_ms_config_file + MS_WD, PY3, config_init, update_ms_config, update_ms_config_file ) from .ms import ( # noqa F401 diff --git a/multiscanner/modules/antivirus/AVGScan.py b/multiscanner/modules/antivirus/AVGScan.py index eb613944..67b25ff2 100644 --- a/multiscanner/modules/antivirus/AVGScan.py +++ b/multiscanner/modules/antivirus/AVGScan.py @@ -21,7 +21,7 @@ # Hostname, port, username HOST = ("MultiScanner", 22, "User") # SSH Key -KEY = os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" DEFAULTCONF = { diff --git a/multiscanner/modules/antivirus/MSEScan.py b/multiscanner/modules/antivirus/MSEScan.py index 63349d49..0489c2b0 100644 --- a/multiscanner/modules/antivirus/MSEScan.py +++ b/multiscanner/modules/antivirus/MSEScan.py @@ -18,7 +18,7 @@ NAME = "Microsoft Security Essentials" # These are overwritten by the config file # SSH Key -KEY = os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" HOST = ("MultiScanner", 22, "User") diff --git a/multiscanner/modules/antivirus/McAfeeScan.py b/multiscanner/modules/antivirus/McAfeeScan.py index 9224ec43..4256601b 100644 --- a/multiscanner/modules/antivirus/McAfeeScan.py +++ b/multiscanner/modules/antivirus/McAfeeScan.py @@ -19,7 +19,7 @@ NAME = "McAfee" # These are overwritten by the config file # SSH Key -KEY = os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" HOST = ("MultiScanner", 22, "User") diff --git a/multiscanner/modules/database/NSRL.py b/multiscanner/modules/database/NSRL.py index f066c8c0..712b2434 100755 --- a/multiscanner/modules/database/NSRL.py +++ b/multiscanner/modules/database/NSRL.py @@ -19,8 +19,8 @@ REQUIRES = ["filemeta"] DEFAULTCONF = { - 'hash_list': os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'nsrl', 'hash_list'), - 'offsets': os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'nsrl', 'offsets'), + 'hash_list': os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'nsrl', 'hash_list'), + 'offsets': os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'nsrl', 'offsets'), 'ENABLED': True } diff --git a/multiscanner/modules/machinelearning/EndgameEmber.py b/multiscanner/modules/machinelearning/EndgameEmber.py index 79e2cf98..4f3e9da4 100644 --- a/multiscanner/modules/machinelearning/EndgameEmber.py +++ b/multiscanner/modules/machinelearning/EndgameEmber.py @@ -30,7 +30,7 @@ REQUIRES = ['libmagic'] DEFAULTCONF = { 'ENABLED': False, - 'path-to-model': os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'ember', 'ember_model_2017.txt'), + 'path-to-model': os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'ember', 'ember_model_2017.txt'), } LGBM_MODEL = None diff --git a/multiscanner/modules/metadata/ExifToolsScan.py b/multiscanner/modules/metadata/ExifToolsScan.py index 7b32d768..bf4b875a 100644 --- a/multiscanner/modules/metadata/ExifToolsScan.py +++ b/multiscanner/modules/metadata/ExifToolsScan.py @@ -20,7 +20,7 @@ NAME = "ExifTool" # These are overwritten by the config file HOST = ("MultiScanner", 22, "User") -KEY = os.path.join(os.path.split(ms.CONFIG_FILE)[0], "etc", "id_rsa") +KEY = os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], "etc", "id_rsa") PATHREPLACE = "X:\\" # Entries to be removed from the final results REMOVEENTRY = ["ExifTool Version Number", "File Name", "Directory", "File Modification Date/Time", diff --git a/multiscanner/modules/metadata/TrID.py b/multiscanner/modules/metadata/TrID.py index 375ab933..c4103c32 100644 --- a/multiscanner/modules/metadata/TrID.py +++ b/multiscanner/modules/metadata/TrID.py @@ -24,7 +24,7 @@ # Hostname, port, username HOST = ("MultiScanner", 22, "User") # SSH Key -KEY = os.path.join(os.path.split(ms.CONFIG_FILE)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" DEFAULTCONF = { diff --git a/multiscanner/modules/signature/YaraScan.py b/multiscanner/modules/signature/YaraScan.py index e468aebc..09729e9f 100644 --- a/multiscanner/modules/signature/YaraScan.py +++ b/multiscanner/modules/signature/YaraScan.py @@ -18,7 +18,7 @@ TYPE = "Signature" NAME = "Yara" DEFAULTCONF = { - "ruledir": os.path.join(os.path.split(ms.CONFIG_FILE)[0], "etc", "yarasigs"), + "ruledir": os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], "etc", "yarasigs"), "fileextensions": [".yar", ".yara", ".sig"], "ignore-tags": ["TLPRED"], "string-threshold": 30, diff --git a/multiscanner/ms.py b/multiscanner/ms.py index acfd8c43..1f243f74 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -779,7 +779,8 @@ def _main(): update_paths_in_config(DEFAULTCONF, msconf.CONFIG_FILE) if not os.path.isfile(args.config): - config_init(args.config) + module_list = _get_main_modules() + config_init(args.config, module_list) else: # Write the default configure settings for any missing modules module_list = _get_main_modules() diff --git a/multiscanner/tests/modules/test_1.py b/multiscanner/tests/modules/test_1.py index 88569fcb..c4699dd5 100644 --- a/multiscanner/tests/modules/test_1.py +++ b/multiscanner/tests/modules/test_1.py @@ -3,13 +3,16 @@ """ TYPE = "Test" NAME = "test_1" +DEFAULTCONF = { + 'ENABLED': True +} -def check(): +def check(conf=DEFAULTCONF): return True -def scan(filelist): +def scan(filelist, conf=DEFAULTCONF): results = [] for fname in filelist: diff --git a/multiscanner/tests/modules/test_subscan.py b/multiscanner/tests/modules/test_subscan.py index 8786999a..4dccff11 100644 --- a/multiscanner/tests/modules/test_subscan.py +++ b/multiscanner/tests/modules/test_subscan.py @@ -3,16 +3,19 @@ """ TYPE = "Test" NAME = "test_subscan" +DEFAULTCONF = { + 'ENABLED': True +} # Overwritten in multiscanner multiscanner = None -def check(): +def check(conf=DEFAULTCONF): return True -def scan(filelist): +def scan(filelist, conf=DEFAULTCONF): results = [] for f in filelist: diff --git a/multiscanner/tests/test_multiscanner.py b/multiscanner/tests/test_multiscanner.py index 3b0819d8..d46a6e41 100644 --- a/multiscanner/tests/test_multiscanner.py +++ b/multiscanner/tests/test_multiscanner.py @@ -1,4 +1,5 @@ from __future__ import division, absolute_import, print_function, unicode_literals +import configparser import os import sys @@ -8,30 +9,35 @@ # Makes sure we use the multiscanner in ../ CWD = os.path.dirname(os.path.abspath(__file__)) +TEST_CONFIG_FILE = '.tmpfile.ini' +TEST_REPORT = 'tmp_report.json' + class _runmulti_tests(object): @classmethod def setup_class(cls): - cls.real_mod_dir = multiscanner.MODULES_DIR - multiscanner.MODULES_DIR = os.path.join(CWD, "modules") + cls.real_mod_dir = multiscanner.config.MODULES_DIR + cls.real_mod_list = multiscanner.config.MODULE_LIST + multiscanner.config.MODULES_DIR = os.path.join(CWD, "modules") + multiscanner.config.MODULE_LIST = multiscanner.config.get_modules() cls.filelist = utils.parse_dir(os.path.join(CWD, 'files')) - config_file = '.tmpfile.ini' - multiscanner.config_init(config_file, multiscanner._get_main_modules()) - multiscanner.update_ms_config_file(config_file) @classmethod def teardown_class(cls): - multiscanner.MODULES_DIR = cls.real_mod_dir + multiscanner.config.MODULES_DIR = cls.real_mod_dir + multiscanner.config.MODULE_LIST = cls.real_mod_list -class Test_multiscan(_runmulti_tests): +class TestMultiscan(_runmulti_tests): def setup(self): + multiscanner.config_init(TEST_CONFIG_FILE, multiscanner._get_main_modules()) + multiscanner.update_ms_config_file(TEST_CONFIG_FILE) self.result = multiscanner.multiscan(self.filelist) self.report = multiscanner.parse_reports(self.result, includeMetadata=False, python=True) self.report_m = multiscanner.parse_reports(self.result, includeMetadata=True, python=True) def teardown(self): - os.remove('.tmpfile.ini') + os.remove(TEST_CONFIG_FILE) def test_multiscan_results(self): for f in self.filelist: @@ -39,19 +45,56 @@ def test_multiscan_results(self): assert f in self.report_m['Files'] -class Test_main(_runmulti_tests): +class TestMain(_runmulti_tests): def setup(self): + multiscanner.config_init(TEST_CONFIG_FILE, multiscanner._get_main_modules()) + multiscanner.update_ms_config_file(TEST_CONFIG_FILE) sys.argv = [''] def teardown(self): try: - os.remove('.tmpfile.ini') - os.remove('tmp_report.json') + os.remove(TEST_CONFIG_FILE) + os.remove(TEST_REPORT) except Exception as e: # TODO: log exception pass def test_basic_main(self): - sys.argv = ['-z', '-j', 'tmp_report.json'] + sys.argv = ['-z', '-j', TEST_REPORT] sys.argv.extend(self.filelist) multiscanner._main() + + +class TestMissingConfig(_runmulti_tests): + def setup(self): + sys.argv = ['-c', TEST_CONFIG_FILE, 'init'] + multiscanner._main() + + def test_config_init(self): + config_object = configparser.ConfigParser() + config_object.optionxform = str + config_object.read(TEST_CONFIG_FILE) + + assert config_object.has_section('main') + assert config_object.has_section('test_1') + assert not config_object.has_section('Cuckoo') + + def test_fill_in_missing_config_sections(self): + # Simulate a section missing from config file before multiscanner is imported/run + config_object = configparser.ConfigParser() + config_object.optionxform = str + config_object.read(TEST_CONFIG_FILE) + config_object.remove_section('main') + config_object.remove_section('test_1') + with open(TEST_CONFIG_FILE, 'w') as conf_file: + config_object.write(conf_file) + + # Run MultiScanner + sys.argv = ['-c', TEST_CONFIG_FILE, os.path.join(CWD, 'files')] + multiscanner._main() + with open(TEST_CONFIG_FILE, 'r') as conf_file: + conf = conf_file.read() + assert 'test_1' in conf + + def teardown(self): + os.remove(TEST_CONFIG_FILE) From 508b7f4a59135ddf95ecb15fb2dc749ccd2d3d7d Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Mon, 29 Apr 2019 12:02:58 -0400 Subject: [PATCH 24/38] Switch to using ConfigParser objects ...instead of dicts for main configs. Needed because write_missing_config() needs a ConfigParser object and it was being called from different places, sometimes as a dict, sometimes ConfigParser. --- multiscanner/config.py | 39 ++++++++++++++++++++++++------ multiscanner/distributed/api.py | 4 +-- multiscanner/ms.py | 3 ++- multiscanner/storage/sql_driver.py | 4 +-- multiscanner/storage/storage.py | 22 +++++++---------- multiscanner/web/app.py | 2 +- 6 files changed, 47 insertions(+), 27 deletions(-) diff --git a/multiscanner/config.py b/multiscanner/config.py index b6f2ebc8..d1d03964 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -26,13 +26,22 @@ # The default config file CONFIG_FILE = None -# Main MultiScanner config, as a dictionary +# Main MultiScanner config, as a ConfigParser object MS_CONFIG = None # The dictionary of modules and whether they're enabled or not MODULE_LIST = None +def get_with_default(config, section, option, default): + """Get 'option' from the named 'section' in given ConfigParser object. + If option is not present, returns the provided default.""" + if config.has_option(section, option): + return config.get(section, option) + else: + return default + + def get_configuration_paths(): # Possible paths for the configuration file. # This should go in order from local to global. @@ -68,7 +77,7 @@ def determine_configuration_path(filepath): def parse_config(config_object): - """Converts a config object to a dictionary""" + """Converts a ConfigParser object to a dictionary""" return_var = {} for section in config_object.sections(): section_dict = dict(config_object.items(section)) @@ -84,6 +93,20 @@ def parse_config(config_object): return return_var +def dict_to_config(dictionary): + """Converts a dictionary to a ConfigParser object""" + config = configparser.ConfigParser() + config.optionxform = str + + for name, section in dictionary.items(): + if name == '_load_default': + continue + config.add_section(name) + for key in section.keys(): + config.set(name, key, str(section[key])) + return config + + def write_config(config_object, config_file, section_name, default_config): """Write the default configuration to the given config file @@ -101,7 +124,7 @@ def write_config(config_object, config_file, section_name, default_config): def read_config(config_file, section_name=None, default_config=None): - """Parse a config file into a dictionary + """Parse a config file into a ConfigParser object Can optionally set a default configuration by providing 'section_name' and 'default_config' arguments. @@ -117,7 +140,7 @@ def read_config(config_file, section_name=None, default_config=None): (not config_object.has_section(section_name) or not os.path.isfile(config_file)): # Write default config write_config(config_object, config_file, section_name, default_config) - return parse_config(config_object) + return config_object MS_CONFIG = read_config(CONFIG_FILE) @@ -177,15 +200,15 @@ def get_modules(): def update_ms_config(config): - """Update global config dictionary. + """Update global config object. config - the ConfigParser object or dictionary to replace MS_CONFIG with """ global MS_CONFIG if isinstance(config, configparser.ConfigParser): - MS_CONFIG = parse_config(config) - else: MS_CONFIG = config + else: + MS_CONFIG = dict_to_config(config) def update_ms_config_file(config_file): @@ -242,7 +265,7 @@ def write_missing_config(sections, config_object, filepath): """ Write in default config for modules not in config file. Returns True if config was written, False if not. - config_object - The config object + config_object - The ConfigParser object filepath - The path to the config file sections - Dictionary mapping section names to the Python module containing its DEFAULTCONF """ diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index 4f34542a..3b701d0c 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -121,7 +121,7 @@ def default(self, obj): from multiscanner.distributed.celery_worker import multiscanner_celery, ssdeep_compare_celery from multiscanner.analytics.ssdeep_analytics import SSDeepAnalytic -db = database.Database(config=api_config.get('Database'), regenconfig=False) +db = database.Database(config=api_config['Database'], regenconfig=False) # To run under Apache, we need to set up the DB outside of __main__ # Sleep and retry until database connection is successful try: @@ -861,7 +861,7 @@ def get_maec_report(task_id): # Get the MAEC report from Cuckoo try: maec_report = requests.get( - '{}/v1/tasks/report/{}/maec'.format(ms_config.get('Cuckoo', {}).get('API URL', ''), cuckoo_task_id) + '{}/v1/tasks/report/{}/maec'.format(ms_config['Cuckoo'].get('API URL', ''), cuckoo_task_id) ) except Exception as e: logger.warning('No MAEC report found for that task! - {}'.format(e)) diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 1f243f74..9d0b6158 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -416,6 +416,7 @@ def multiscan(Files, config=None, module_list=None): # A dictionary used for the copyfileto parameter filedic = {} + print(dict(config.items())) # Read in config if config is None: config = {} @@ -782,7 +783,7 @@ def _main(): module_list = _get_main_modules() config_init(args.config, module_list) else: - # Write the default configure settings for any missing modules + # Write the default config settings for any missing modules module_list = _get_main_modules() write_missing_config(module_list, msconf.MS_CONFIG, msconf.CONFIG_FILE) diff --git a/multiscanner/storage/sql_driver.py b/multiscanner/storage/sql_driver.py index fbff4da6..d9f25158 100644 --- a/multiscanner/storage/sql_driver.py +++ b/multiscanner/storage/sql_driver.py @@ -17,7 +17,7 @@ from sqlalchemy.pool import QueuePool from sqlalchemy_utils import create_database, database_exists -from multiscanner.config import get_config_path, reset_config +from multiscanner.config import dict_to_config, get_config_path, reset_config CONFIG_FILE = get_config_path('api') @@ -101,7 +101,7 @@ def __init__(self, config=None, configfile=None, regenconfig=False): if config: for key_ in config: config_from_file[key_] = config[key_] - self.config = config_from_file + self.config = dict_to_config(config_from_file) def init_db(self): """ diff --git a/multiscanner/storage/storage.py b/multiscanner/storage/storage.py index be5471fd..6dd4ffa6 100644 --- a/multiscanner/storage/storage.py +++ b/multiscanner/storage/storage.py @@ -17,7 +17,7 @@ from multiscanner.common import utils -from multiscanner.config import get_config_path, parse_config +from multiscanner.config import get_config_path, get_with_default DEFAULTCONF = { @@ -101,20 +101,16 @@ def __init__(self, configfile=None, config=None): config_object.optionxform = str config_object.read(configfile) if config: - file_conf = parse_config(config_object) for key in config: - if key not in file_conf: - file_conf[key] = config[key] - file_conf[key]['_load_default'] = True + if key not in config_object: + config_object[key] = config[key] + config_object[key]['_load_default'] = True else: - file_conf[key].update(config[key]) - config = file_conf - else: - config = parse_config(config_object) + config_object[key].update(config[key]) + config = config_object - config_main = config.get('main', {}) - self.sleep_time = config_main.get('retry_time', DEFAULTCONF['retry_time']) - self.num_retries = config_main.get('retry_num', DEFAULTCONF['retry_num']) + self.sleep_time = get_with_default(config, 'main', 'retry_time', DEFAULTCONF['retry_time']) + self.num_retries = get_with_default(config, 'main', 'retry_num', DEFAULTCONF['retry_num']) # Set the config inside of the storage classes for storage_name in storage_classes: @@ -125,7 +121,7 @@ def __init__(self, configfile=None, config=None): del config[storage_name]['_load_default'] # Update the default storage config storage_classes[storage_name].config = storage_classes[storage_name].DEFAULTCONF - storage_classes[storage_name].config.update(config[storage_name]) + storage_classes[storage_name].config.read_dict(config[storage_name]) else: storage_classes[storage_name].config = config[storage_name] diff --git a/multiscanner/web/app.py b/multiscanner/web/app.py index ea98bfb5..9e110d16 100644 --- a/multiscanner/web/app.py +++ b/multiscanner/web/app.py @@ -29,7 +29,7 @@ # Finagle Flask to read config from .ini file instead of .py file web_config_file = get_config_path('web') -web_config = read_config(web_config_file, 'web', DEFAULTCONF).get('web') +web_config = dict(read_config(web_config_file, 'web', DEFAULTCONF).items('web')) conf_tuple = namedtuple('WebConfig', web_config.keys())(*web_config.values()) app.config.from_object(conf_tuple) From c3586f7caa77dfcf0af262849bd6c78a2ce64177 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Mon, 29 Apr 2019 14:01:13 -0400 Subject: [PATCH 25/38] Remove _load_default handling from ms.py We can always start with the default config and then update it based on the config passed in. --- multiscanner/ms.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 9d0b6158..83234ec1 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -315,27 +315,14 @@ def _start_module_threads(filelist, module_list, config, global_module_interface if not mod: logger.warning("{} not a valid module...".format(modname)) continue - conf = None + try: + conf = mod.DEFAULTCONF + except Exception as e: + logger.error(e) + conf = {} if modname in config: - if '_load_default' in config or '_load_default' in config[modname]: - try: - conf = mod.DEFAULTCONF - conf.update(config[modname]) - except Exception as e: - logger.warning(e) - conf = config[modname] - # Remove _load_default from config - if '_load_default' in conf: - del conf['_load_default'] - else: - conf = config[modname] + conf.update(config[modname]) - # Try and read in the default conf if one was not passed - if not conf: - try: - conf = mod.DEFAULTCONF - except Exception as e: - logger.error(e) thread = _Thread( target=_run_module, args=(modname, mod, filelist, ThreadDict, global_module_interface, conf)) @@ -416,12 +403,9 @@ def multiscan(Files, config=None, module_list=None): # A dictionary used for the copyfileto parameter filedic = {} - print(dict(config.items())) # Read in config if config is None: config = {} - else: - config['_load_default'] = True if 'main' in config: main_config = config['main'] else: From e6eaad2db476cc6354eb0881ea39211586ca463d Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Wed, 1 May 2019 07:54:19 -0400 Subject: [PATCH 26/38] Use custom ConfigParser class to convert config values to Python literals (eg. "True" to a Python boolean) --- multiscanner/__init__.py | 2 +- multiscanner/config.py | 34 +++++++++++++---------- multiscanner/distributed/api.py | 8 +++--- multiscanner/distributed/celery_worker.py | 24 ++++++++-------- multiscanner/ms.py | 9 +++--- multiscanner/storage/sql_driver.py | 8 ++---- multiscanner/storage/storage.py | 11 +++----- multiscanner/tests/test_celery_worker.py | 2 +- multiscanner/tests/test_multiscanner.py | 7 ++--- requirements.txt | 2 +- 10 files changed, 54 insertions(+), 53 deletions(-) diff --git a/multiscanner/__init__.py b/multiscanner/__init__.py index b7e8e9ad..89606184 100644 --- a/multiscanner/__init__.py +++ b/multiscanner/__init__.py @@ -4,7 +4,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. from .config import ( # noqa F401 - MS_WD, PY3, config_init, update_ms_config, update_ms_config_file + MSConfigParser, MS_WD, PY3, config_init, update_ms_config, update_ms_config_file ) from .ms import ( # noqa F401 diff --git a/multiscanner/config.py b/multiscanner/config.py index d1d03964..91073cb6 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -33,13 +33,22 @@ MODULE_LIST = None -def get_with_default(config, section, option, default): - """Get 'option' from the named 'section' in given ConfigParser object. - If option is not present, returns the provided default.""" - if config.has_option(section, option): - return config.get(section, option) - else: - return default +class MSConfigParser(configparser.ConfigParser): + def __init__(self, *args, **kwargs): + super(MSConfigParser, self).__init__(*args, **kwargs) + self.optionxform = str # Preserve case + + def __getitem__(self, key): + """Attempts to convert value to a Python literal if possible.""" + value = super(MSConfigParser, self).__getitem__(key) + try: + return ast.literal_eval(value) + except (SyntaxError, ValueError) as e: + # Ignore if config value isn't convertible to a Python literal + pass + except Exception as e: + logger.debug(e) + return value def get_configuration_paths(): @@ -95,8 +104,7 @@ def parse_config(config_object): def dict_to_config(dictionary): """Converts a dictionary to a ConfigParser object""" - config = configparser.ConfigParser() - config.optionxform = str + config = MSConfigParser() for name, section in dictionary.items(): if name == '_load_default': @@ -133,8 +141,7 @@ def read_config(config_file, section_name=None, default_config=None): section_name - the name of the section of defaults to be added default_config - values to set this configuration to """ - config_object = configparser.ConfigParser() - config_object.optionxform = str + config_object = MSConfigParser() config_object.read(config_file) if section_name is not None and default_config is not None and \ (not config_object.has_section(section_name) or not os.path.isfile(config_file)): @@ -205,7 +212,7 @@ def update_ms_config(config): config - the ConfigParser object or dictionary to replace MS_CONFIG with """ global MS_CONFIG - if isinstance(config, configparser.ConfigParser): + if isinstance(config, MSConfigParser): MS_CONFIG = config else: MS_CONFIG = dict_to_config(config) @@ -250,8 +257,7 @@ def config_init(filepath, sections, overwrite=False): overwrite - Whether to overwrite the config file at filepath, if it already exists """ - config = configparser.ConfigParser() - config.optionxform = str + config = MSConfigParser() if overwrite or not os.path.isfile(filepath): return reset_config(sections, config, filepath) diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index 3b701d0c..c1cfa7e9 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -121,7 +121,7 @@ def default(self, obj): from multiscanner.distributed.celery_worker import multiscanner_celery, ssdeep_compare_celery from multiscanner.analytics.ssdeep_analytics import SSDeepAnalytic -db = database.Database(config=api_config['Database'], regenconfig=False) +db = database.Database(config=dict(api_config.items('Database')), regenconfig=False) # To run under Apache, we need to set up the DB outside of __main__ # Sleep and retry until database connection is successful try: @@ -155,7 +155,7 @@ def default(self, obj): storage_handler = StorageHandler() handler = storage_handler.load_required_module('ElasticSearchStorage') -ms_config_file = ms.MS_CONFIG +ms_config_file = ms.config.MS_CONFIG ms_config = read_config(ms_config_file) try: @@ -439,7 +439,7 @@ def queue_task(original_filename, f_name, full_path, metadata, rescan=False, # Publish the task to Celery multiscanner_celery.apply_async( args=(full_path, original_filename, task_id, f_name, metadata), - kwargs=dict(config=ms.MS_CONFIG, module_list=module_list), + kwargs=dict(config=ms.config.MS_CONFIG, module_list=module_list), **{'queue': queue_name, 'priority': priority, 'routing_key': routing_key} ) else: @@ -861,7 +861,7 @@ def get_maec_report(task_id): # Get the MAEC report from Cuckoo try: maec_report = requests.get( - '{}/v1/tasks/report/{}/maec'.format(ms_config['Cuckoo'].get('API URL', ''), cuckoo_task_id) + '{}/v1/tasks/report/{}/maec'.format(ms_config.get('Cuckoo', 'API URL', fallback=''), cuckoo_task_id) ) except Exception as e: logger.warning('No MAEC report found for that task! - {}'.format(e)) diff --git a/multiscanner/distributed/celery_worker.py b/multiscanner/distributed/celery_worker.py index ad00d021..b4b400f2 100644 --- a/multiscanner/distributed/celery_worker.py +++ b/multiscanner/distributed/celery_worker.py @@ -35,23 +35,25 @@ configfile = msconf.get_config_path('api') config = msconf.read_config(configfile, 'celery', DEFAULTCONF) -worker_config = config.get('celery') -db_config = config.get('Database') +db_config = dict(config.items('Database')) storage_configfile = msconf.get_config_path('storage') storage_config = msconf.read_config(storage_configfile) -es_storage_config = storage_config.get('ElasticSearchStorage') +try: + es_storage_config = storage_config['ElasticSearchStorage'] +except KeyError: + es_storage_config = {} default_exchange = Exchange('celery', type='direct') app = Celery(broker='{0}://{1}:{2}@{3}/{4}'.format( - worker_config.get('protocol'), - worker_config.get('user'), - worker_config.get('password'), - worker_config.get('host'), - worker_config.get('vhost'), + config.get('celery', 'protocol'), + config.get('celery', 'user'), + config.get('celery', 'password'), + config.get('celery', 'host'), + config.get('celery', 'vhost'), )) -app.conf.timezone = worker_config.get('tz') +app.conf.timezone = config.get('celery', 'tz') app.conf.task_queues = [ Queue('low_tasks', default_exchange, routing_key='tasks.low', queue_arguments={'x-max-priority': 10}), Queue('medium_tasks', default_exchange, routing_key='tasks.medium', queue_arguments={'x-max-priority': 10}), @@ -80,7 +82,7 @@ def setup_periodic_tasks(sender, **kwargs): sender.add_periodic_task( crontab(hour=3, minute=0), metricbeat_rollover_celery.s(), - args=(es_storage_config.get('metricbeat_rollover_days')), + args=(es_storage_config.get('metricbeat_rollover_days'), 7), kwargs=dict(config=msconf.MS_CONFIG), **{ 'queue': 'low_tasks', @@ -241,7 +243,7 @@ def metricbeat_rollover_celery(days): return if not days: - days = es_storage_config.get('metricbeat_rollover_days') + days = es_storage_config.get('metricbeat_rollover_days', 7) if not days: raise NameError("name 'days' is not defined, check storage.ini for 'metricbeat_rollover_days' setting") diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 83234ec1..b6ba3f45 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -7,7 +7,6 @@ import argparse import codecs -import configparser import datetime import json import logging @@ -32,7 +31,7 @@ from multiscanner.common.utils import (basename, convert_encoding, load_module, parse_file_list, queue2list) from multiscanner import config as msconf -from multiscanner.config import (PY3, config_init, get_config_path, +from multiscanner.config import (MSConfigParser, PY3, config_init, get_config_path, update_ms_config, update_ms_config_file, update_paths_in_config, write_missing_config) from multiscanner.storage import storage @@ -415,6 +414,9 @@ def multiscan(Files, config=None, module_list=None): if "copyfilesto" not in main_config: main_config["copyfilesto"] = False if main_config["copyfilesto"]: + print(str(type(config))) + print(str(type(main_config))) + print(str(type(main_config['copyfilesto']))) if os.path.isdir(main_config["copyfilesto"]): filelist = _copy_to_share(filelist, filedic, main_config["copyfilesto"]) else: @@ -832,8 +834,7 @@ def _main(): results = multiscan(filelist, config=msconf.MS_CONFIG) # We need to read in the config for the parseReports call - config = configparser.ConfigParser() - config.optionxform = str + config = MSConfigParser() config.read(args.config) config = msconf.MS_CONFIG['main'] # Make sure we have a group-types diff --git a/multiscanner/storage/sql_driver.py b/multiscanner/storage/sql_driver.py index d9f25158..386bbe2c 100644 --- a/multiscanner/storage/sql_driver.py +++ b/multiscanner/storage/sql_driver.py @@ -1,7 +1,6 @@ #!/usr/bin/env python from __future__ import print_function -import configparser import json import logging import os @@ -17,7 +16,7 @@ from sqlalchemy.pool import QueuePool from sqlalchemy_utils import create_database, database_exists -from multiscanner.config import dict_to_config, get_config_path, reset_config +from multiscanner.config import MSConfigParser, get_config_path, reset_config CONFIG_FILE = get_config_path('api') @@ -74,8 +73,7 @@ def __init__(self, config=None, configfile=None, regenconfig=False): self.db_engine = None # Configuration parsing - config_parser = configparser.ConfigParser() - config_parser.optionxform = str + config_parser = MSConfigParser() if configfile is None: configfile = CONFIG_FILE @@ -101,7 +99,7 @@ def __init__(self, config=None, configfile=None, regenconfig=False): if config: for key_ in config: config_from_file[key_] = config[key_] - self.config = dict_to_config(config_from_file) + self.config = config_from_file def init_db(self): """ diff --git a/multiscanner/storage/storage.py b/multiscanner/storage/storage.py index 6dd4ffa6..3596a67c 100644 --- a/multiscanner/storage/storage.py +++ b/multiscanner/storage/storage.py @@ -4,7 +4,6 @@ from __future__ import (absolute_import, division, unicode_literals, with_statement) -import configparser import inspect import logging import os @@ -17,7 +16,7 @@ from multiscanner.common import utils -from multiscanner.config import get_config_path, get_with_default +from multiscanner.config import MSConfigParser, get_config_path DEFAULTCONF = { @@ -97,8 +96,7 @@ def __init__(self, configfile=None, config=None): configfile = get_config_path('storage') # Read in config - config_object = configparser.ConfigParser() - config_object.optionxform = str + config_object = MSConfigParser() config_object.read(configfile) if config: for key in config: @@ -109,9 +107,8 @@ def __init__(self, configfile=None, config=None): config_object[key].update(config[key]) config = config_object - self.sleep_time = get_with_default(config, 'main', 'retry_time', DEFAULTCONF['retry_time']) - self.num_retries = get_with_default(config, 'main', 'retry_num', DEFAULTCONF['retry_num']) - + self.sleep_time = config.get('main', 'retry_time', fallback=DEFAULTCONF['retry_time']) + self.num_retries = config.get('main', 'retry_num', fallback=DEFAULTCONF['retry_num']) # Set the config inside of the storage classes for storage_name in storage_classes: if storage_name in config: diff --git a/multiscanner/tests/test_celery_worker.py b/multiscanner/tests/test_celery_worker.py index b3e7ee1b..fa76db24 100644 --- a/multiscanner/tests/test_celery_worker.py +++ b/multiscanner/tests/test_celery_worker.py @@ -38,7 +38,7 @@ with open(TEST_FULL_PATH, 'r') as f: TEST_FILE_HASH = hashlib.sha256(f.read().encode('utf-8')).hexdigest() TEST_METADATA = {} -TEST_CONFIG = multiscanner.MS_CONFIG +TEST_CONFIG = multiscanner.config.MS_CONFIG TEST_REPORT = { 'filemeta': { diff --git a/multiscanner/tests/test_multiscanner.py b/multiscanner/tests/test_multiscanner.py index d46a6e41..7bc091bf 100644 --- a/multiscanner/tests/test_multiscanner.py +++ b/multiscanner/tests/test_multiscanner.py @@ -1,5 +1,4 @@ from __future__ import division, absolute_import, print_function, unicode_literals -import configparser import os import sys @@ -71,8 +70,7 @@ def setup(self): multiscanner._main() def test_config_init(self): - config_object = configparser.ConfigParser() - config_object.optionxform = str + config_object = multiscanner.MSConfigParser() config_object.read(TEST_CONFIG_FILE) assert config_object.has_section('main') @@ -81,8 +79,7 @@ def test_config_init(self): def test_fill_in_missing_config_sections(self): # Simulate a section missing from config file before multiscanner is imported/run - config_object = configparser.ConfigParser() - config_object.optionxform = str + config_object = multiscanner.MSConfigParser() config_object.read(TEST_CONFIG_FILE) config_object.remove_section('main') config_object.remove_section('test_1') diff --git a/requirements.txt b/requirements.txt index f1a052ae..97f3f5f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ stix2 reportlab #Required by API flask -psycopg2 +psycopg2-binary rarfile sqlalchemy sqlalchemy-utils From b5e2c2ed2851c5a39a8f7a87fe60886a152ec955 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Tue, 7 May 2019 11:04:40 -0400 Subject: [PATCH 27/38] Don't pass entire main config to multiscan --- multiscanner/ms.py | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/multiscanner/ms.py b/multiscanner/ms.py index b6ba3f45..be3085ff 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -405,22 +405,14 @@ def multiscan(Files, config=None, module_list=None): # Read in config if config is None: config = {} - if 'main' in config: - main_config = config['main'] - else: - main_config = DEFAULTCONF # Copy files to a share if configured - if "copyfilesto" not in main_config: - main_config["copyfilesto"] = False - if main_config["copyfilesto"]: - print(str(type(config))) - print(str(type(main_config))) - print(str(type(main_config['copyfilesto']))) - if os.path.isdir(main_config["copyfilesto"]): - filelist = _copy_to_share(filelist, filedic, main_config["copyfilesto"]) + copyfilesto = config.get('main', 'copyfilesto', fallback=DEFAULTCONF['copyfilesto']) + if copyfilesto: + if os.path.isdir(copyfilesto): + filelist = _copy_to_share(filelist, filedic, copyfilesto) else: - raise IOError('The copyfilesto dir "' + main_config["copyfilesto"] + '" is not a valid dir') + raise IOError('The copyfilesto dir "' + copyfilesto + '" is not a valid dir') # Create the global module interface global_module_interface = _GlobalModuleInterface() @@ -456,7 +448,7 @@ def multiscan(Files, config=None, module_list=None): time.sleep(1) # Delete copied files - if main_config["copyfilesto"]: + if copyfilesto: for item in filelist: try: os.remove(item) @@ -498,20 +490,20 @@ def multiscan(Files, config=None, module_list=None): from_filename = filedic[base] subscan_list[i] = (file_path, from_filename, module_name) - results.extend(_subscan(subscan_list, config, main_config, module_list, global_module_interface)) + results.extend(_subscan(subscan_list, config, copyfilesto, module_list, global_module_interface)) global_module_interface._cleanup() return results -def _subscan(subscan_list, config, main_config, module_list, global_module_interface): +def _subscan(subscan_list, config, copyfilesto, module_list, global_module_interface): """ Scans files created by modules subscan_list - The result of _get_subscan_list() from the global module interface config - The configuration dictionary - main_config - A dictionary of the configuration for main + copyfilesto - Directory to copy files to; if False files will not be copied module_list - The list of modules global_module_interface - The global module interface """ @@ -560,10 +552,8 @@ def _subscan(subscan_list, config, main_config, module_list, global_module_inter del subscan_list, subfiles_dict # Copy files to a share if configured - if "copyfilesto" not in main_config: - main_config["copyfilesto"] = False - if main_config["copyfilesto"]: - filelist = _copy_to_share(filelist, filedic, main_config["copyfilesto"]) + if copyfilesto: + filelist = _copy_to_share(filelist, filedic, copyfilesto) # Start a thread for each module thread_list = _start_module_threads(filelist, module_list, config, global_module_interface) @@ -590,7 +580,7 @@ def _subscan(subscan_list, config, main_config, module_list, global_module_inter time.sleep(1) # Delete copied files - if main_config["copyfilesto"]: + if copyfilesto: for item in filelist: os.remove(item) @@ -629,7 +619,7 @@ def _subscan(subscan_list, config, main_config, module_list, global_module_inter null, from_filename = file_mapping[from_filename] subscan_list[i] = (file_path, from_filename, module_name) - results.extend(_subscan(subscan_list, config, main_config, module_list, global_module_interface)) + results.extend(_subscan(subscan_list, config, copyfilesto, module_list, global_module_interface)) return results From fa43f6850470ff2eddb1c8f5d0222d4f4646beaa Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Thu, 9 May 2019 19:10:21 -0400 Subject: [PATCH 28/38] Fix tests, including... - Removed _load_defalt from storage.py - Changed write_config and read_config to use a dict for the default config instead of a section name and dict of the values in that section. This allows passing in more than one section at once. - Fixed some misnamed parameters. - Fixed bug where Mock couldn't find the Metadefender module. --- multiscanner/common/dir_monitor.py | 2 +- multiscanner/config.py | 30 +++++++++---------- multiscanner/distributed/api.py | 5 ++-- multiscanner/distributed/celery_worker.py | 2 +- .../distributed/distributed_worker.py | 2 +- multiscanner/ms.py | 7 ++--- multiscanner/storage/storage.py | 12 ++------ .../Metadefender/test_metadefender_module.py | 22 +++++++------- multiscanner/tests/test_configs.py | 6 ++-- multiscanner/tests/test_modules.py | 10 +++---- multiscanner/web/app.py | 2 +- 11 files changed, 46 insertions(+), 54 deletions(-) diff --git a/multiscanner/common/dir_monitor.py b/multiscanner/common/dir_monitor.py index f6255e83..df00c0ee 100755 --- a/multiscanner/common/dir_monitor.py +++ b/multiscanner/common/dir_monitor.py @@ -100,7 +100,7 @@ def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, e else: continue - resultlist = multiscan(filelist, configfile=config) + resultlist = multiscan(filelist, config=config) results = parse_reports(resultlist, python=True) if delete: for file_name in results: diff --git a/multiscanner/config.py b/multiscanner/config.py index 91073cb6..0c1da99c 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -107,46 +107,44 @@ def dict_to_config(dictionary): config = MSConfigParser() for name, section in dictionary.items(): - if name == '_load_default': - continue config.add_section(name) for key in section.keys(): config.set(name, key, str(section[key])) return config -def write_config(config_object, config_file, section_name, default_config): +def write_config(config_object, config_file, default_config): """Write the default configuration to the given config file config_object - the ConfigParser object config_file - the filename of the config file - section_name - the name of the section of defaults to be added - default_config - values to set this configuration to + default_config - dictionary of section names and values to set within this configuration """ - if section_name not in config_object.sections(): - config_object.add_section(section_name) - for key in default_config: - config_object.set(section_name, key, str(default_config[key])) + for section_name, section in default_config.items(): + if section_name not in config_object.sections(): + config_object.add_section(section_name) + for key in section: + config_object.set(section_name, key, str(default_config[section_name][key])) with codecs.open(config_file, 'w', 'utf-8') as conffile: config_object.write(conffile) -def read_config(config_file, section_name=None, default_config=None): +def read_config(config_file, default_config=None): """Parse a config file into a ConfigParser object Can optionally set a default configuration by providing 'section_name' and 'default_config' arguments. config_file - the filename of the config file - section_name - the name of the section of defaults to be added - default_config - values to set this configuration to + default_config - dictionary of section names and values to set within this configuration """ config_object = MSConfigParser() config_object.read(config_file) - if section_name is not None and default_config is not None and \ - (not config_object.has_section(section_name) or not os.path.isfile(config_file)): - # Write default config - write_config(config_object, config_file, section_name, default_config) + if default_config is not None: + contains_sections = set(default_config.keys()).issubset(config_object.sections()) + if not contains_sections or not os.path.isfile(config_file): + # Write default config + write_config(config_object, config_file, default_config) return config_object diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index c1cfa7e9..82e700d2 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -114,9 +114,10 @@ def default(self, obj): app = Flask(__name__) app.json_encoder = CustomJSONEncoder api_config_file = get_config_path('api') -api_config = read_config(api_config_file, 'api', DEFAULTCONF) +api_config = read_config(api_config_file, {'api': DEFAULTCONF, 'Database': database.Database.DEFAULTCONF}) # TODO: fix this mess +# TODO: test moving these imports up with the others # Needs api_config in order to function properly from multiscanner.distributed.celery_worker import multiscanner_celery, ssdeep_compare_celery from multiscanner.analytics.ssdeep_analytics import SSDeepAnalytic @@ -271,7 +272,7 @@ def modules(): Return a list of module names available for MultiScanner to use, and whether or not they are enabled in the config. ''' - return jsonify({name: mod[0] for (name, mod) in ms.MODULE_LIST.items()}) + return jsonify({name: mod[0] for (name, mod) in ms.config.MODULE_LIST.items()}) @app.route('/api/v1/tasks', methods=['GET']) diff --git a/multiscanner/distributed/celery_worker.py b/multiscanner/distributed/celery_worker.py index b4b400f2..216012a2 100644 --- a/multiscanner/distributed/celery_worker.py +++ b/multiscanner/distributed/celery_worker.py @@ -34,7 +34,7 @@ } configfile = msconf.get_config_path('api') -config = msconf.read_config(configfile, 'celery', DEFAULTCONF) +config = msconf.read_config(configfile, {'celery': DEFAULTCONF, 'Database': database.Database.DEFAULTCONF}) db_config = dict(config.items('Database')) storage_configfile = msconf.get_config_path('storage') diff --git a/multiscanner/distributed/distributed_worker.py b/multiscanner/distributed/distributed_worker.py index b648ee7f..b8b66010 100755 --- a/multiscanner/distributed/distributed_worker.py +++ b/multiscanner/distributed/distributed_worker.py @@ -52,7 +52,7 @@ def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, e else: continue - resultlist = multiscan(filelist, configfile=config) + resultlist = multiscan(filelist, config=config) results = parse_reports(resultlist, python=True) if delete: for file_name in results: diff --git a/multiscanner/ms.py b/multiscanner/ms.py index be3085ff..684b39d4 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -391,7 +391,7 @@ def multiscan(Files, config=None, module_list=None): The meat and potatoes. Returns the list of module results Files - A list of files and dirs to be scanned - config - A dictionary containing the configuration options to be used. + config - ConfigParser object containing the configuration options to be used. module_list - A list of the names of the modules to run on the files. """ # Init some vars @@ -402,12 +402,11 @@ def multiscan(Files, config=None, module_list=None): # A dictionary used for the copyfileto parameter filedic = {} - # Read in config if config is None: - config = {} + config = MSConfigParser() # Copy files to a share if configured - copyfilesto = config.get('main', 'copyfilesto', fallback=DEFAULTCONF['copyfilesto']) + copyfilesto = config.getboolean('main', 'copyfilesto', fallback=DEFAULTCONF['copyfilesto']) if copyfilesto: if os.path.isdir(copyfilesto): filelist = _copy_to_share(filelist, filedic, copyfilesto) diff --git a/multiscanner/storage/storage.py b/multiscanner/storage/storage.py index 3596a67c..93670e71 100644 --- a/multiscanner/storage/storage.py +++ b/multiscanner/storage/storage.py @@ -102,7 +102,6 @@ def __init__(self, configfile=None, config=None): for key in config: if key not in config_object: config_object[key] = config[key] - config_object[key]['_load_default'] = True else: config_object[key].update(config[key]) config = config_object @@ -112,15 +111,8 @@ def __init__(self, configfile=None, config=None): # Set the config inside of the storage classes for storage_name in storage_classes: if storage_name in config: - if '_load_default' in config or '_load_default' in config[storage_name]: - # Remove _load_default from config - if '_load_default' in config[storage_name]: - del config[storage_name]['_load_default'] - # Update the default storage config - storage_classes[storage_name].config = storage_classes[storage_name].DEFAULTCONF - storage_classes[storage_name].config.read_dict(config[storage_name]) - else: - storage_classes[storage_name].config = config[storage_name] + storage_classes[storage_name].config = storage_classes[storage_name].DEFAULTCONF + storage_classes[storage_name].config.update(config[storage_name]) self.storage_classes = storage_classes self.loaded_storage = {} diff --git a/multiscanner/tests/module_tests/Metadefender/test_metadefender_module.py b/multiscanner/tests/module_tests/Metadefender/test_metadefender_module.py index 4c2d95fa..13d6fe1f 100644 --- a/multiscanner/tests/module_tests/Metadefender/test_metadefender_module.py +++ b/multiscanner/tests/module_tests/Metadefender/test_metadefender_module.py @@ -20,6 +20,8 @@ MSG_SERVER_UNAVAILABLE = 'Server unavailable, try again later' FILE_200_COMPLETE_REPORT = 'retrieval_responses/200_found_complete.json' FILE_200_INCOMPLETE_REPORT = 'retrieval_responses/200_found_incomplete.json' +MDF_GET = 'multiscanner.modules.antivirus.Metadefender.requests.get' +MDF_POST = 'multiscanner.modules.antivirus.Metadefender.requests.post' class MockResponse(object): @@ -146,7 +148,7 @@ def create_conf_short_timeout(self): # possible responses to sample submission requests # --------------------------------------------------------------------- - @mock.patch('Metadefender.requests.post', side_effect=mocked_requests_post_sample_submitted) + @mock.patch(MDF_POST, side_effect=mocked_requests_post_sample_submitted) def test_submit_sample_success(self, mock_get): ''' Tests Metadefender._submit_sample()'s handling of a successful response from @@ -158,7 +160,7 @@ def test_submit_sample_success(self, mock_get): self.assertEqual(submit_resp['error'], None) self.assertEqual(submit_resp['scan_id'], generate_scan_id(RANDOM_INPUT_FILES[0])) - @mock.patch('Metadefender.requests.post', side_effect=mocked_requests_post_sample_failed_w_msg) + @mock.patch(MDF_POST, side_effect=mocked_requests_post_sample_failed_w_msg) def test_submit_sample_fail_unavailable(self, mock_get): ''' Tests Metadefender._submit_sample()'s handling of a submission that fails due to @@ -170,7 +172,7 @@ def test_submit_sample_fail_unavailable(self, mock_get): self.assertEqual(submit_resp['error'], MSG_SERVER_UNAVAILABLE) self.assertEqual(submit_resp['scan_id'], None) - @mock.patch('Metadefender.requests.post', side_effect=mocked_requests_post_sample_failed_no_msg) + @mock.patch(MDF_POST, side_effect=mocked_requests_post_sample_failed_no_msg) def test_submit_sample_fail_unavailable_no_msg(self, mock_get): ''' Tests Metadefender._submit_sample()'s handling of a submission that fails due to @@ -186,7 +188,7 @@ def test_submit_sample_fail_unavailable_no_msg(self, mock_get): # This section tests the logic for parsing Metadefender's responses # to requests for analysis results # --------------------------------------------------------------------- - @mock.patch('Metadefender.requests.get', side_effect=mocked_requests_get_sample_200_success) + @mock.patch(MDF_GET, side_effect=mocked_requests_get_sample_200_success) def test_get_results_200_success(self, mock_get): ''' Tests Metadefender._parse_scan_result()'s handling of a complete @@ -218,7 +220,7 @@ def test_get_results_200_success(self, mock_get): else: self.fail('Unexpected Engine: %s' % engine_name) - @mock.patch('Metadefender.requests.get', side_effect=mocked_requests_get_sample_200_not_found) + @mock.patch(MDF_GET, side_effect=mocked_requests_get_sample_200_not_found) def test_get_results_200_not_found(self, mock_get): ''' Tests Metadefender._parse_scan_result()'s handling of a 200 response @@ -235,7 +237,7 @@ def test_get_results_200_not_found(self, mock_get): if len(engine_results) != 0: self.fail('Engine result list should be empty') - @mock.patch('Metadefender.requests.get', side_effect=mocked_requests_get_sample_200_in_progress) + @mock.patch(MDF_GET, side_effect=mocked_requests_get_sample_200_in_progress) def test_get_results_200_succes_in_progress(self, mock_get): ''' Tests Metadefender._parse_scan_result()'s handling of a 200 response @@ -256,8 +258,8 @@ def test_get_results_200_succes_in_progress(self, mock_get): # --------------------------------------------------------------------- # This section tests the entire scan() method # --------------------------------------------------------------------- - @mock.patch('Metadefender.requests.get', side_effect=mocked_requests_get_sample_200_success) - @mock.patch('Metadefender.requests.post', side_effect=mocked_requests_post_sample_submitted) + @mock.patch(MDF_GET, side_effect=mocked_requests_get_sample_200_success) + @mock.patch(MDF_POST, side_effect=mocked_requests_post_sample_submitted) def test_scan_complete_success(self, mock_post, mock_get): ''' Test for a perfect scan. No submission errors, no retrieval errors @@ -269,8 +271,8 @@ def test_scan_complete_success(self, mock_post, mock_get): for scan_res in resultlist: self.assertEqual(scan_res[1]['overall_status'], Metadefender.STATUS_SUCCESS) - @mock.patch('Metadefender.requests.get', side_effect=mocked_requests_get_sample_200_in_progress) - @mock.patch('Metadefender.requests.post', side_effect=mocked_requests_post_sample_submitted) + @mock.patch(MDF_GET, side_effect=mocked_requests_get_sample_200_in_progress) + @mock.patch(MDF_POST, side_effect=mocked_requests_post_sample_submitted) def test_scan_timeout_scan_in_progress(self, mock_post, mock_get): ''' Test for a scan where analysis time exceeds timeout period diff --git a/multiscanner/tests/test_configs.py b/multiscanner/tests/test_configs.py index 45f27364..a9d88752 100644 --- a/multiscanner/tests/test_configs.py +++ b/multiscanner/tests/test_configs.py @@ -24,7 +24,7 @@ def test_no_config(): @mock.patch('multiscanner.config.MODULE_LIST', mock_modlist) def test_config_api_no_file(): - config = {'test_conf': {'a': 'z'}} + config = multiscanner.config.dict_to_config({'test_conf': {'a': 'z'}}) results, metadata = multiscanner.multiscan( filelist, config=config, module_list=module_list)[0] @@ -33,7 +33,7 @@ def test_config_api_no_file(): @mock.patch('multiscanner.config.MODULE_LIST', mock_modlist) def test_config_api_with_empty_file(): - config = {'test_conf': {'a': 'z'}} + config = multiscanner.config.dict_to_config({'test_conf': {'a': 'z'}}) config_file = tempfile.mkstemp()[1] multiscanner.update_ms_config_file(config_file) results, metadata = multiscanner.multiscan( @@ -45,7 +45,7 @@ def test_config_api_with_empty_file(): @mock.patch('multiscanner.config.MODULE_LIST', mock_modlist) def test_config_api_with_real_file(): - config = {'test_conf': {'a': 'z'}} + config = multiscanner.config.dict_to_config({'test_conf': {'a': 'z'}}) config_file = tempfile.mkstemp()[1] module_list = multiscanner._get_main_modules() multiscanner.config_init(config_file, module_list) diff --git a/multiscanner/tests/test_modules.py b/multiscanner/tests/test_modules.py index 5a0509b4..43a71e4c 100644 --- a/multiscanner/tests/test_modules.py +++ b/multiscanner/tests/test_modules.py @@ -26,20 +26,20 @@ def test_fail_loadModule(): class _runmod_tests(object): @classmethod def setup_class(cls): - cls.real_mod_dir = multiscanner.MODULES_DIR - multiscanner.MODULES_DIR = os.path.join(CWD, "modules") + cls.real_mod_dir = multiscanner.config.MODULES_DIR + multiscanner.config.MODULES_DIR = os.path.join(CWD, "modules") cls.filelist = utils.parse_dir(os.path.join(CWD, 'files')) cls.files = ['a', 'b', 'C:\\c', '/d/d'] cls.threadDict = {} @classmethod def teardown_class(cls): - multiscanner.MODULES_DIR = cls.real_mod_dir + multiscanner.config.MODULES_DIR = cls.real_mod_dir class Test_runModule_test_1(_runmod_tests): def setup(self): - m = utils.load_module('test_1', [multiscanner.MODULES_DIR]) + m = utils.load_module('test_1', [multiscanner.config.MODULES_DIR]) global_module_interface = multiscanner._GlobalModuleInterface() self.result = multiscanner._run_module('test_1', m, self.filelist, self.threadDict, global_module_interface) global_module_interface._cleanup() @@ -55,7 +55,7 @@ def test_runModule_results(self): class Test_runModule_test_2(_runmod_tests): def setup(self): - self.m = utils.load_module('test_2', [multiscanner.MODULES_DIR]) + self.m = utils.load_module('test_2', [multiscanner.config.MODULES_DIR]) self.threadDict['test_2'] = mock.Mock() self.threadDict['test_1'] = mock.Mock() self.threadDict['test_1'].ret = ([('a', 'a'), ('C:\\c', 'c')], {}) diff --git a/multiscanner/web/app.py b/multiscanner/web/app.py index 9e110d16..751173e1 100644 --- a/multiscanner/web/app.py +++ b/multiscanner/web/app.py @@ -29,7 +29,7 @@ # Finagle Flask to read config from .ini file instead of .py file web_config_file = get_config_path('web') -web_config = dict(read_config(web_config_file, 'web', DEFAULTCONF).items('web')) +web_config = dict(read_config(web_config_file, {'web': DEFAULTCONF}).items('web')) conf_tuple = namedtuple('WebConfig', web_config.keys())(*web_config.values()) app.config.from_object(conf_tuple) From 4a7a731d86b1680d41c5fe95ce27a93742c29160 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Mon, 13 May 2019 15:19:32 -0400 Subject: [PATCH 29/38] Fix a test, remove unnecessary lines --- multiscanner/ms.py | 5 +---- multiscanner/tests/test_multiscanner.py | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 684b39d4..aa41aa90 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -754,12 +754,11 @@ def _main(): update_ms_config_file(args.config) update_paths_in_config(DEFAULTCONF, msconf.CONFIG_FILE) + module_list = _get_main_modules() if not os.path.isfile(args.config): - module_list = _get_main_modules() config_init(args.config, module_list) else: # Write the default config settings for any missing modules - module_list = _get_main_modules() write_missing_config(module_list, msconf.MS_CONFIG, msconf.CONFIG_FILE) # Make sure report is not a dir @@ -823,8 +822,6 @@ def _main(): results = multiscan(filelist, config=msconf.MS_CONFIG) # We need to read in the config for the parseReports call - config = MSConfigParser() - config.read(args.config) config = msconf.MS_CONFIG['main'] # Make sure we have a group-types if "group-types" not in config or not config["group-types"]: diff --git a/multiscanner/tests/test_multiscanner.py b/multiscanner/tests/test_multiscanner.py index 7bc091bf..130cd7a2 100644 --- a/multiscanner/tests/test_multiscanner.py +++ b/multiscanner/tests/test_multiscanner.py @@ -85,6 +85,7 @@ def test_fill_in_missing_config_sections(self): config_object.remove_section('test_1') with open(TEST_CONFIG_FILE, 'w') as conf_file: config_object.write(conf_file) + multiscanner.update_ms_config_file(TEST_CONFIG_FILE) # Run MultiScanner sys.argv = ['-c', TEST_CONFIG_FILE, os.path.join(CWD, 'files')] From 3a0165576a83d95877b6002f74ea7e7ca5491a80 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Mon, 13 May 2019 16:22:38 -0400 Subject: [PATCH 30/38] Convert config vals in sections to Python literals --- multiscanner/config.py | 26 +++++++++++++++++--------- multiscanner/ms.py | 2 +- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/multiscanner/config.py b/multiscanner/config.py index 0c1da99c..2e23a24b 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -39,16 +39,24 @@ def __init__(self, *args, **kwargs): self.optionxform = str # Preserve case def __getitem__(self, key): - """Attempts to convert value to a Python literal if possible.""" value = super(MSConfigParser, self).__getitem__(key) - try: - return ast.literal_eval(value) - except (SyntaxError, ValueError) as e: - # Ignore if config value isn't convertible to a Python literal - pass - except Exception as e: - logger.debug(e) - return value + return _convert_to_literal(value) + + def get(self, *args, **kwargs): + value = super(MSConfigParser, self).get(*args, **kwargs) + return _convert_to_literal(value) + + +def _convert_to_literal(value): + """Attempts to convert value to a Python literal if possible.""" + try: + return ast.literal_eval(value) + except (SyntaxError, ValueError) as e: + # Ignore if config value isn't convertible to a Python literal + pass + except Exception as e: + logger.debug(e) + return value def get_configuration_paths(): diff --git a/multiscanner/ms.py b/multiscanner/ms.py index aa41aa90..979e30c7 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -406,7 +406,7 @@ def multiscan(Files, config=None, module_list=None): config = MSConfigParser() # Copy files to a share if configured - copyfilesto = config.getboolean('main', 'copyfilesto', fallback=DEFAULTCONF['copyfilesto']) + copyfilesto = config.get('main', 'copyfilesto', fallback=DEFAULTCONF['copyfilesto']) if copyfilesto: if os.path.isdir(copyfilesto): filelist = _copy_to_share(filelist, filedic, copyfilesto) From 6817f4bf4fa94e20fa4b4390c3500369407c00f6 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Thu, 16 May 2019 17:22:42 -0400 Subject: [PATCH 31/38] Improve some tests We were incorrectly replacing sys.argv, omitting the first term. It doesn't matter what we set it to, as argparse won't care. The try/catches are needed because 'init' calls `exit()`. --- multiscanner/config.py | 6 ++--- multiscanner/tests/test_configs.py | 8 +++++++ multiscanner/tests/test_multiscanner.py | 30 ++++++++++++++++++------- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/multiscanner/config.py b/multiscanner/config.py index 2e23a24b..0cc5a34f 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -69,7 +69,7 @@ def get_configuration_paths(): ] -def determine_configuration_path(filepath): +def determine_configuration_path(filepath=None): if filepath: return filepath @@ -90,7 +90,7 @@ def determine_configuration_path(filepath): return config_file -CONFIG_FILE = determine_configuration_path(None) +CONFIG_FILE = determine_configuration_path() def parse_config(config_object): @@ -317,7 +317,7 @@ def reset_config(sections, config, filepath=None): The ConfigParser object that was written to the file. """ if not filepath: - CONFIG_FILE + filepath = CONFIG_FILE # Read in the old config to preserve any sections not being reset if os.path.isfile(filepath): diff --git a/multiscanner/tests/test_configs.py b/multiscanner/tests/test_configs.py index a9d88752..ac151357 100644 --- a/multiscanner/tests/test_configs.py +++ b/multiscanner/tests/test_configs.py @@ -14,6 +14,14 @@ module_list = ['test_conf'] +def test_config_parse_round_trip(): + conf_dict = {'test': {'a': 'b', 'c': 'd'}} + conf_parser = multiscanner.config.dict_to_config(conf_dict) + assert conf_parser.get('test', 'a') == 'b' + conf_dict2 = multiscanner.config.parse_config(conf_parser) + assert conf_dict == conf_dict2 + + @mock.patch('multiscanner.config.MODULE_LIST', mock_modlist) def test_no_config(): results, metadata = multiscanner.multiscan( diff --git a/multiscanner/tests/test_multiscanner.py b/multiscanner/tests/test_multiscanner.py index 130cd7a2..04d8b60d 100644 --- a/multiscanner/tests/test_multiscanner.py +++ b/multiscanner/tests/test_multiscanner.py @@ -1,4 +1,5 @@ from __future__ import division, absolute_import, print_function, unicode_literals +import mock import os import sys @@ -10,6 +11,7 @@ TEST_CONFIG_FILE = '.tmpfile.ini' TEST_REPORT = 'tmp_report.json' +TEST_FILES = os.path.join(CWD, 'files') class _runmulti_tests(object): @@ -19,7 +21,7 @@ def setup_class(cls): cls.real_mod_list = multiscanner.config.MODULE_LIST multiscanner.config.MODULES_DIR = os.path.join(CWD, "modules") multiscanner.config.MODULE_LIST = multiscanner.config.get_modules() - cls.filelist = utils.parse_dir(os.path.join(CWD, 'files')) + cls.filelist = utils.parse_dir(TEST_FILES) @classmethod def teardown_class(cls): @@ -59,15 +61,21 @@ def teardown(self): pass def test_basic_main(self): - sys.argv = ['-z', '-j', TEST_REPORT] - sys.argv.extend(self.filelist) - multiscanner._main() + with mock.patch.object(sys, 'argv', ['ms.py', '-z', '-j', TEST_REPORT] + self.filelist): + try: + multiscanner._main() + except SystemExit: + pass class TestMissingConfig(_runmulti_tests): def setup(self): - sys.argv = ['-c', TEST_CONFIG_FILE, 'init'] - multiscanner._main() + with mock.patch.object(sys, 'argv', ['ms.py', '-c', TEST_CONFIG_FILE, 'init']), \ + mock.patch('multiscanner.ms.input', return_value='y'): + try: + multiscanner._main() + except SystemExit: + pass def test_config_init(self): config_object = multiscanner.MSConfigParser() @@ -88,11 +96,17 @@ def test_fill_in_missing_config_sections(self): multiscanner.update_ms_config_file(TEST_CONFIG_FILE) # Run MultiScanner - sys.argv = ['-c', TEST_CONFIG_FILE, os.path.join(CWD, 'files')] - multiscanner._main() + with mock.patch.object(sys, 'argv', ['ms.py', '-c', TEST_CONFIG_FILE, TEST_FILES]): + multiscanner._main() with open(TEST_CONFIG_FILE, 'r') as conf_file: conf = conf_file.read() assert 'test_1' in conf + def test_read_config_with_default(self): + multiscanner.config.read_config(TEST_CONFIG_FILE, {'test': {'foo': 'bar'}}) + with open(TEST_CONFIG_FILE, 'r') as conf_file: + conf = conf_file.read() + assert 'foo' in conf + def teardown(self): os.remove(TEST_CONFIG_FILE) From 9edd05052dc9eed82232405897d4261a46f183bd Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Thu, 23 May 2019 23:57:53 -0400 Subject: [PATCH 32/38] Add/improve config tests --- multiscanner/ms.py | 6 +-- multiscanner/tests/modules/test_2.py | 6 ++- multiscanner/tests/test_configs.py | 25 +++++++++ multiscanner/tests/test_multiscanner.py | 71 ++++++++++++++++++++++++- 4 files changed, 102 insertions(+), 6 deletions(-) diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 979e30c7..b88ce7bc 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -688,13 +688,13 @@ def _init(args): logger.warn(e) answer = 'N' if answer == 'y': - config = config_init(args.config, module_list) + config = config_init(args.config, module_list, overwrite=True) update_ms_config(config) # Set global main config logger.info('Main configuration file initialized at {}'.format(args.config)) else: logger.info('Checking for missing modules in main configuration...') - config = msconf.MS_CONFIG # MS_CONFIG will already have been set in main() - write_missing_config(module_list, config, args.config) + config = config_init(args.config, module_list, overwrite=False) + update_ms_config(config) # Set global main config else: config = config_init(args.config, module_list) update_ms_config(config) # Set global main config diff --git a/multiscanner/tests/modules/test_2.py b/multiscanner/tests/modules/test_2.py index 57cd7611..71e16663 100644 --- a/multiscanner/tests/modules/test_2.py +++ b/multiscanner/tests/modules/test_2.py @@ -4,7 +4,11 @@ TYPE = "Test" NAME = "test_2" REQUIRES = ["test_1"] -DEFAULTCONF = {'a': 1, 'b': 2} +DEFAULTCONF = { + 'ENABLED': True, + 'a': 1, + 'b': 2 +} def check(conf=DEFAULTCONF): diff --git a/multiscanner/tests/test_configs.py b/multiscanner/tests/test_configs.py index ac151357..59a43512 100644 --- a/multiscanner/tests/test_configs.py +++ b/multiscanner/tests/test_configs.py @@ -10,6 +10,9 @@ CWD = os.path.dirname(os.path.abspath(__file__)) mock_modlist = {'test_conf': [True, os.path.join(CWD, 'modules')]} +mock_modlist2 = {'test_conf': [True, os.path.join(CWD, 'modules')], + 'test_1': [True, os.path.join(CWD, 'modules')], + 'test_2': [True, os.path.join(CWD, 'modules')]} filelist = utils.parse_dir(os.path.join(CWD, 'files')) module_list = ['test_conf'] @@ -63,3 +66,25 @@ def test_config_api_with_real_file(): module_list=module_list)[0] os.remove(config_file) assert metadata['conf'] == {'a': 'z', 'c': 'd'} + + +@mock.patch('multiscanner.config.MODULE_LIST', mock_modlist2) +def test_config_reset_not_overwrite(): + config_file = tempfile.mkstemp()[1] + module_list = multiscanner._get_main_modules() + multiscanner.config_init(config_file, module_list) + multiscanner.update_ms_config_file(config_file) + + # Change a config val from default + config_object = multiscanner.MSConfigParser() + config_object.read(config_file) + config_object.set('test_2', 'ENABLED', 'False') + with open(config_file, 'w') as conf_file: + config_object.write(conf_file) + + # call config_init with overwrite=true, but since test_2 isn't in the module list it won't be overwritten + del module_list['test_2'] + multiscanner.config.config_init(config_file, module_list, True) + multiscanner.update_ms_config_file(config_file) + os.remove(config_file) + assert multiscanner.config.MS_CONFIG.get('test_2', 'ENABLED') is False diff --git a/multiscanner/tests/test_multiscanner.py b/multiscanner/tests/test_multiscanner.py index 04d8b60d..63506d6f 100644 --- a/multiscanner/tests/test_multiscanner.py +++ b/multiscanner/tests/test_multiscanner.py @@ -50,7 +50,6 @@ class TestMain(_runmulti_tests): def setup(self): multiscanner.config_init(TEST_CONFIG_FILE, multiscanner._get_main_modules()) multiscanner.update_ms_config_file(TEST_CONFIG_FILE) - sys.argv = [''] def teardown(self): try: @@ -61,13 +60,38 @@ def teardown(self): pass def test_basic_main(self): - with mock.patch.object(sys, 'argv', ['ms.py', '-z', '-j', TEST_REPORT] + self.filelist): + with mock.patch.object(sys, 'argv', ['ms.py', '-j', TEST_REPORT] + self.filelist): try: multiscanner._main() except SystemExit: pass +@mock.patch.object(multiscanner.config, 'CONFIG_FILE', TEST_CONFIG_FILE) +class TestInitNoConfig(_runmulti_tests): + def test_basic_main(self): + with mock.patch.object(sys, 'argv', ['ms.py', 'init']), \ + mock.patch('multiscanner.ms.input', return_value='y'): + try: + multiscanner._main() + except SystemExit: + pass + + with mock.patch.object(sys, 'argv', ['ms.py', '-j', TEST_REPORT] + self.filelist): + try: + multiscanner._main() + except SystemExit: + pass + + def teardown(self): + try: + os.remove(TEST_CONFIG_FILE) + os.remove(TEST_REPORT) + except Exception as e: + # TODO: log exception + pass + + class TestMissingConfig(_runmulti_tests): def setup(self): with mock.patch.object(sys, 'argv', ['ms.py', '-c', TEST_CONFIG_FILE, 'init']), \ @@ -108,5 +132,48 @@ def test_read_config_with_default(self): conf = conf_file.read() assert 'foo' in conf + def test_overwriting_config_on_reset(self): + # Change a config val from default + config_object = multiscanner.MSConfigParser() + config_object.read(TEST_CONFIG_FILE) + config_object.set('test_2', 'ENABLED', 'False') + with open(TEST_CONFIG_FILE, 'w') as conf_file: + config_object.write(conf_file) + + # Trigger reset_config and it gets overwritten + self.setup() + with mock.patch.object(sys, 'argv', ['ms.py', '-c', TEST_CONFIG_FILE, '-j', TEST_REPORT, self.filelist[0]]): + try: + multiscanner._main() + except SystemExit: + pass + + with open(TEST_REPORT, 'r') as report_file: + report = report_file.read() + assert 'test_2' in report + + # teardown + os.remove(TEST_REPORT) + + def test_config_init_no_overwrite(self): + # Remove a section from config file + config_object = multiscanner.MSConfigParser() + config_object.read(TEST_CONFIG_FILE) + config_object.remove_section('test_1') + with open(TEST_CONFIG_FILE, 'w') as conf_file: + config_object.write(conf_file) + + # this time we answer 'no' so config won't be overwritten, but missing modules' configs will be regenerated + with mock.patch.object(sys, 'argv', ['ms.py', '-c', TEST_CONFIG_FILE, 'init']), \ + mock.patch('multiscanner.ms.input', return_value='n'): + try: + multiscanner._main() + except SystemExit: + pass + + with open(TEST_CONFIG_FILE, 'r') as conf_file: + conf = conf_file.read() + assert 'test_1' in conf + def teardown(self): os.remove(TEST_CONFIG_FILE) From 0f8cd117940dd90b4f05d84fc73b5d535cacf8bc Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Tue, 11 Jun 2019 13:17:54 -0400 Subject: [PATCH 33/38] Always run filemeta and ssdeeper modules Also fix a couple config bugs --- multiscanner/config.py | 7 +++++ multiscanner/distributed/api.py | 31 +++++++------------ multiscanner/storage/elasticsearch_storage.py | 7 ++++- multiscanner/web/app.py | 6 ++-- 4 files changed, 27 insertions(+), 24 deletions(-) diff --git a/multiscanner/config.py b/multiscanner/config.py index 0cc5a34f..51397f23 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -46,6 +46,10 @@ def get(self, *args, **kwargs): value = super(MSConfigParser, self).get(*args, **kwargs) return _convert_to_literal(value) + def get_section(self, section_name): + section = self.items(section_name) + return {k: _convert_to_literal(v) for k, v in section} + def _convert_to_literal(value): """Attempts to convert value to a Python literal if possible.""" @@ -201,6 +205,9 @@ def get_modules(): if filename[1] == '.py': module = filename[0] + if module == 'filemeta' or module == 'ssdeeper': + modules[module] = [True, folder] + continue try: modules[module] = [MS_CONFIG[module]['ENABLED'], folder] except KeyError as e: diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index 82e700d2..14f16fcc 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -156,9 +156,6 @@ def default(self, obj): storage_handler = StorageHandler() handler = storage_handler.load_required_module('ElasticSearchStorage') -ms_config_file = ms.config.MS_CONFIG -ms_config = read_config(ms_config_file) - try: DISTRIBUTED = api_config['api']['distributed'] except KeyError as e: @@ -210,7 +207,7 @@ def multiscanner_process(work_queue, exit_signal): module_list = item[5] resultlist = ms.multiscan( filelist, - config=ms.MS_CONFIG, + config=ms.config.MS_CONFIG, module_list=module_list ) results = ms.parse_reports(resultlist, python=True) @@ -272,7 +269,10 @@ def modules(): Return a list of module names available for MultiScanner to use, and whether or not they are enabled in the config. ''' - return jsonify({name: mod[0] for (name, mod) in ms.config.MODULE_LIST.items()}) + modlist = {name: mod[0] for (name, mod) in ms.config.MODULE_LIST.items()} + del modlist['filemeta'] + del modlist['ssdeeper'] + return jsonify(modlist) @app.route('/api/v1/tasks', methods=['GET']) @@ -438,9 +438,10 @@ def queue_task(original_filename, f_name, full_path, metadata, rescan=False, if DISTRIBUTED: # Publish the task to Celery + tmp_config = ms.config.parse_config(ms.config.MS_CONFIG) multiscanner_celery.apply_async( args=(full_path, original_filename, task_id, f_name, metadata), - kwargs=dict(config=ms.config.MS_CONFIG, module_list=module_list), + kwargs=dict(config=tmp_config, module_list=module_list), **{'queue': queue_name, 'priority': priority, 'routing_key': routing_key} ) else: @@ -505,18 +506,9 @@ def create_task(): rescan = True elif key == 'modules': module_names = request.form[key].split(',') - if 'SHA256' not in module_names: - # Elasticsearch won't work without it - # TODO: Don't let users enable/disable SHA256 module? - module_names.append('SHA256') - modules = list(set(module_names).intersection(ms.MODULE_LIST.keys())) - - # files = utils.parse_dir(MODULES_DIR, True) - # modules = [] - # for f in files: - # split = os.path.splitext(os.path.basename(f)) - # if split[0] in module_names and split[1] == '.py': - # modules.append(f) + modules = list(set(module_names).intersection(ms.config.MODULE_LIST.keys())) + modules.append('filemeta') + modules.append('ssdeeper') elif key == 'archive-analyze' and request.form[key] == 'true': extract_dir = api_config['api']['upload_folder'] if not os.path.isdir(extract_dir): @@ -861,8 +853,9 @@ def get_maec_report(task_id): # Get the MAEC report from Cuckoo try: + cuckoo_report = ms.config.MS_CONFIG.get('Cuckoo', 'API URL', fallback='') maec_report = requests.get( - '{}/v1/tasks/report/{}/maec'.format(ms_config.get('Cuckoo', 'API URL', fallback=''), cuckoo_task_id) + '{}/v1/tasks/report/{}/maec'.format(cuckoo_report, cuckoo_task_id) ) except Exception as e: logger.warning('No MAEC report found for that task! - {}'.format(e)) diff --git a/multiscanner/storage/elasticsearch_storage.py b/multiscanner/storage/elasticsearch_storage.py index bb43e110..71b8c119 100644 --- a/multiscanner/storage/elasticsearch_storage.py +++ b/multiscanner/storage/elasticsearch_storage.py @@ -164,10 +164,15 @@ def store(self, report): except KeyError: logger.warn("Unable to find sha256 hash for sample_id; generating UUID") sample_id = uuid4() + try: + ssdeep = report[filename]['ssdeep'] + except KeyError: + logger.warn("Unable to find ssdeep hash for sample_id") + ssdeep = None # Store metadata with the sample, not the report sample = { 'doc_type': 'sample', - 'ssdeep': report[filename]['ssdeep'], + 'ssdeep': ssdeep, 'tags': [], } diff --git a/multiscanner/web/app.py b/multiscanner/web/app.py index 751173e1..64b76d6b 100644 --- a/multiscanner/web/app.py +++ b/multiscanner/web/app.py @@ -1,4 +1,3 @@ -from collections import namedtuple from flask import Flask, render_template, request import re @@ -29,9 +28,8 @@ # Finagle Flask to read config from .ini file instead of .py file web_config_file = get_config_path('web') -web_config = dict(read_config(web_config_file, {'web': DEFAULTCONF}).items('web')) -conf_tuple = namedtuple('WebConfig', web_config.keys())(*web_config.values()) -app.config.from_object(conf_tuple) +web_config = read_config(web_config_file, {'web': DEFAULTCONF}).get_section('web') +app.config.update(**web_config) @app.context_processor From db1ca0d4076d4f0ad275dc1f7fc47155f0c56672 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Tue, 18 Jun 2019 16:44:18 -0400 Subject: [PATCH 34/38] Change pdf config to .ini and use the same config handling code as the other configs --- multiscanner/common/pdf_generator/__init__.py | 9 ++++----- multiscanner/distributed/api.py | 3 ++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/multiscanner/common/pdf_generator/__init__.py b/multiscanner/common/pdf_generator/__init__.py index f0bc130c..4604aded 100644 --- a/multiscanner/common/pdf_generator/__init__.py +++ b/multiscanner/common/pdf_generator/__init__.py @@ -1,13 +1,13 @@ from __future__ import (division, absolute_import, with_statement, print_function, unicode_literals) -import json import os from reportlab.lib import colors, units from reportlab.platypus import TableStyle from multiscanner.common.pdf_generator import generic_pdf +from multiscanner import config as msconf def create_pdf_document(DIR, report): @@ -15,13 +15,12 @@ def create_pdf_document(DIR, report): Method to create a PDF report based of a multiscanner JSON report. Args: - DIR: Represents the a directory containing the 'pdf_config.json' file. + DIR: Represents the a directory containing the 'pdf_config.ini' file. report: A JSON object. ''' - with open(os.path.join(os.path.split(DIR)[0], 'pdf_config.json')) as data_file: - pdf_components = json.load(data_file) - + pdf_config = os.path.join(DIR, 'pdf_config.ini') + pdf_components = msconf.read_config(pdf_config).get_section('pdf') gen_pdf = generic_pdf.GenericPDF(pdf_components) notice = [] diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index 5ff29c8b..ae7b92e9 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -1105,7 +1105,8 @@ def generate_pdf_report(task_id): if report_dict == TASK_STILL_PROCESSING: return make_response(jsonify(TASK_STILL_PROCESSING), HTTP_STILL_PROCESSING) - pdf = pdf_generator.create_pdf_document(ms.CONFIG_FILE, report_dict) + config_dir = os.path.split(ms.config.CONFIG_FILE)[0] + pdf = pdf_generator.create_pdf_document(config_dir, report_dict) response = make_response(pdf) response.headers['Content-Type'] = 'application/pdf' response.headers['Content-Disposition'] = 'attachment; filename=%s.pdf' % task_id From d66ed9cdfd5f3d02ec5eb071b9548aa5e39757c7 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Thu, 20 Jun 2019 16:02:53 -0400 Subject: [PATCH 35/38] Rename CONFIG_FILE -> CONFIG_FILEPATH and fix a couple other minor things found in code review --- docker_utils/api_config.ini | 1 + multiscanner/common/dir_monitor.py | 4 +- multiscanner/config.py | 17 +++--- multiscanner/distributed/api.py | 4 +- multiscanner/modules/antivirus/AVGScan.py | 2 +- multiscanner/modules/antivirus/MSEScan.py | 2 +- multiscanner/modules/antivirus/McAfeeScan.py | 2 +- multiscanner/modules/database/NSRL.py | 4 +- .../modules/machinelearning/EndgameEmber.py | 2 +- .../modules/metadata/ExifToolsScan.py | 2 +- multiscanner/modules/metadata/TrID.py | 2 +- multiscanner/modules/signature/YaraScan.py | 2 +- multiscanner/ms.py | 17 +++--- multiscanner/storage/sql_driver.py | 6 +-- multiscanner/tests/test_multiscanner.py | 52 +++++++++---------- 15 files changed, 60 insertions(+), 59 deletions(-) diff --git a/docker_utils/api_config.ini b/docker_utils/api_config.ini index 02213546..84bb3f17 100644 --- a/docker_utils/api_config.ini +++ b/docker_utils/api_config.ini @@ -16,6 +16,7 @@ password = vhost = / flush_every = 100 flush_interval = 10 +tz = US\Eastern [Database] db_type = sqlite diff --git a/multiscanner/common/dir_monitor.py b/multiscanner/common/dir_monitor.py index df00c0ee..5c57d662 100755 --- a/multiscanner/common/dir_monitor.py +++ b/multiscanner/common/dir_monitor.py @@ -116,7 +116,7 @@ def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, e def _main(): args = _parse_args() - if args.config != msconf.CONFIG_FILE: + if args.config != msconf.CONFIG_FILEPATH: msconf.update_ms_config_file(args.config) work_queue = multiprocessing.Queue() @@ -142,7 +142,7 @@ def _main(): def _parse_args(): parser = argparse.ArgumentParser(description='Monitor a directory and submit new files to MultiScanner') parser.add_argument("-c", "--config", help="The config file to use", required=False, - default=msconf.CONFIG_FILE) + default=msconf.CONFIG_FILEPATH) parser.add_argument("-s", "--seconds", help="The number of seconds to wait for additional files", required=False, default=120, type=int) parser.add_argument("-b", "--batch", help="The max number of files per batch", required=False, diff --git a/multiscanner/config.py b/multiscanner/config.py index fb296a31..69ed354b 100644 --- a/multiscanner/config.py +++ b/multiscanner/config.py @@ -24,7 +24,7 @@ MODULES_DIR = os.path.join(MS_WD, 'modules') # The default config file -CONFIG_FILE = None +CONFIG_FILEPATH = None # Main MultiScanner config, as a ConfigParser object MS_CONFIG = None @@ -83,6 +83,7 @@ def determine_configuration_path(filepath=None): for config_path in config_paths: if os.path.exists(config_path): config_file = config_path + break if not config_file: # If the local storage folder doesn't exist, we create it. @@ -94,7 +95,7 @@ def determine_configuration_path(filepath=None): return config_file -CONFIG_FILE = determine_configuration_path() +CONFIG_FILEPATH = determine_configuration_path() def parse_config(config_object): @@ -160,7 +161,7 @@ def read_config(config_file, default_config=None): return config_object -MS_CONFIG = read_config(CONFIG_FILE) +MS_CONFIG = read_config(CONFIG_FILEPATH) def get_config_path(component, config=None): @@ -235,11 +236,11 @@ def update_ms_config(config): def update_ms_config_file(config_file): """Update config globals to a different file than the default. - config_file - the file to be assigned to CONFIG_FILE and read into MS_CONFIG + config_file - the file to be assigned to CONFIG_FILEPATH and read into MS_CONFIG """ - global CONFIG_FILE, MS_CONFIG - CONFIG_FILE = config_file - MS_CONFIG = read_config(CONFIG_FILE) + global CONFIG_FILEPATH, MS_CONFIG + CONFIG_FILEPATH = config_file + MS_CONFIG = read_config(CONFIG_FILEPATH) def update_paths_in_config(conf, filepath): @@ -325,7 +326,7 @@ def reset_config(sections, config, filepath=None): The ConfigParser object that was written to the file. """ if not filepath: - filepath = CONFIG_FILE + filepath = CONFIG_FILEPATH # Read in the old config to preserve any sections not being reset if os.path.isfile(filepath): diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index ae7b92e9..a9c0a58c 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -123,7 +123,7 @@ def default(self, obj): from multiscanner.distributed.celery_worker import multiscanner_celery, ssdeep_compare_celery from multiscanner.analytics.ssdeep_analytics import SSDeepAnalytic -db = database.Database(config=dict(api_config.items('Database')), regenconfig=False) +db = database.Database(config=api_config.get_section('Database'), regenconfig=False) # To run under Apache, we need to set up the DB outside of __main__ # Sleep and retry until database connection is successful try: @@ -1105,7 +1105,7 @@ def generate_pdf_report(task_id): if report_dict == TASK_STILL_PROCESSING: return make_response(jsonify(TASK_STILL_PROCESSING), HTTP_STILL_PROCESSING) - config_dir = os.path.split(ms.config.CONFIG_FILE)[0] + config_dir = os.path.split(ms.config.CONFIG_FILEPATH)[0] pdf = pdf_generator.create_pdf_document(config_dir, report_dict) response = make_response(pdf) response.headers['Content-Type'] = 'application/pdf' diff --git a/multiscanner/modules/antivirus/AVGScan.py b/multiscanner/modules/antivirus/AVGScan.py index 67b25ff2..09b81406 100644 --- a/multiscanner/modules/antivirus/AVGScan.py +++ b/multiscanner/modules/antivirus/AVGScan.py @@ -21,7 +21,7 @@ # Hostname, port, username HOST = ("MultiScanner", 22, "User") # SSH Key -KEY = os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(ms.config.CONFIG_FILEPATH)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" DEFAULTCONF = { diff --git a/multiscanner/modules/antivirus/MSEScan.py b/multiscanner/modules/antivirus/MSEScan.py index 0489c2b0..2e263a7e 100644 --- a/multiscanner/modules/antivirus/MSEScan.py +++ b/multiscanner/modules/antivirus/MSEScan.py @@ -18,7 +18,7 @@ NAME = "Microsoft Security Essentials" # These are overwritten by the config file # SSH Key -KEY = os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(ms.config.CONFIG_FILEPATH)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" HOST = ("MultiScanner", 22, "User") diff --git a/multiscanner/modules/antivirus/McAfeeScan.py b/multiscanner/modules/antivirus/McAfeeScan.py index 4256601b..3baa953d 100644 --- a/multiscanner/modules/antivirus/McAfeeScan.py +++ b/multiscanner/modules/antivirus/McAfeeScan.py @@ -19,7 +19,7 @@ NAME = "McAfee" # These are overwritten by the config file # SSH Key -KEY = os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(ms.config.CONFIG_FILEPATH)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" HOST = ("MultiScanner", 22, "User") diff --git a/multiscanner/modules/database/NSRL.py b/multiscanner/modules/database/NSRL.py index 712b2434..05c5f242 100755 --- a/multiscanner/modules/database/NSRL.py +++ b/multiscanner/modules/database/NSRL.py @@ -19,8 +19,8 @@ REQUIRES = ["filemeta"] DEFAULTCONF = { - 'hash_list': os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'nsrl', 'hash_list'), - 'offsets': os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'nsrl', 'offsets'), + 'hash_list': os.path.join(os.path.split(ms.config.CONFIG_FILEPATH)[0], 'etc', 'nsrl', 'hash_list'), + 'offsets': os.path.join(os.path.split(ms.config.CONFIG_FILEPATH)[0], 'etc', 'nsrl', 'offsets'), 'ENABLED': True } diff --git a/multiscanner/modules/machinelearning/EndgameEmber.py b/multiscanner/modules/machinelearning/EndgameEmber.py index 4f3e9da4..a5a837d1 100644 --- a/multiscanner/modules/machinelearning/EndgameEmber.py +++ b/multiscanner/modules/machinelearning/EndgameEmber.py @@ -30,7 +30,7 @@ REQUIRES = ['libmagic'] DEFAULTCONF = { 'ENABLED': False, - 'path-to-model': os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'ember', 'ember_model_2017.txt'), + 'path-to-model': os.path.join(os.path.split(ms.config.CONFIG_FILEPATH)[0], 'etc', 'ember', 'ember_model_2017.txt'), } LGBM_MODEL = None diff --git a/multiscanner/modules/metadata/ExifToolsScan.py b/multiscanner/modules/metadata/ExifToolsScan.py index bf4b875a..47aa3b3d 100644 --- a/multiscanner/modules/metadata/ExifToolsScan.py +++ b/multiscanner/modules/metadata/ExifToolsScan.py @@ -20,7 +20,7 @@ NAME = "ExifTool" # These are overwritten by the config file HOST = ("MultiScanner", 22, "User") -KEY = os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], "etc", "id_rsa") +KEY = os.path.join(os.path.split(ms.config.CONFIG_FILEPATH)[0], "etc", "id_rsa") PATHREPLACE = "X:\\" # Entries to be removed from the final results REMOVEENTRY = ["ExifTool Version Number", "File Name", "Directory", "File Modification Date/Time", diff --git a/multiscanner/modules/metadata/TrID.py b/multiscanner/modules/metadata/TrID.py index c4103c32..344bc868 100644 --- a/multiscanner/modules/metadata/TrID.py +++ b/multiscanner/modules/metadata/TrID.py @@ -24,7 +24,7 @@ # Hostname, port, username HOST = ("MultiScanner", 22, "User") # SSH Key -KEY = os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], 'etc', 'id_rsa') +KEY = os.path.join(os.path.split(ms.config.CONFIG_FILEPATH)[0], 'etc', 'id_rsa') # Replacement path for SSH connections PATHREPLACE = "X:\\" DEFAULTCONF = { diff --git a/multiscanner/modules/signature/YaraScan.py b/multiscanner/modules/signature/YaraScan.py index 09729e9f..31ed5f85 100644 --- a/multiscanner/modules/signature/YaraScan.py +++ b/multiscanner/modules/signature/YaraScan.py @@ -18,7 +18,7 @@ TYPE = "Signature" NAME = "Yara" DEFAULTCONF = { - "ruledir": os.path.join(os.path.split(ms.config.CONFIG_FILE)[0], "etc", "yarasigs"), + "ruledir": os.path.join(os.path.split(ms.config.CONFIG_FILEPATH)[0], "etc", "yarasigs"), "fileextensions": [".yar", ".yara", ".sig"], "ignore-tags": ["TLPRED"], "string-threshold": 30, diff --git a/multiscanner/ms.py b/multiscanner/ms.py index 88a424e9..de311313 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -41,9 +41,9 @@ DEFAULTCONF = { "copyfilesto": False, "group-types": ["Antivirus"], - "storage-config": msconf.CONFIG_FILE.replace('config.ini', 'storage.ini'), - "api-config": msconf.CONFIG_FILE.replace('config.ini', 'api_config.ini'), - "web-config": msconf.CONFIG_FILE.replace('config.ini', 'web_config.ini'), + "storage-config": msconf.CONFIG_FILEPATH.replace('config.ini', 'storage.ini'), + "api-config": msconf.CONFIG_FILEPATH.replace('config.ini', 'api_config.ini'), + "web-config": msconf.CONFIG_FILEPATH.replace('config.ini', 'web_config.ini'), } logger = logging.getLogger(__name__) @@ -675,7 +675,7 @@ def _get_main_modules(): def _init(args): # Initialize configuration file if args.config is None: - args.config = msconf.CONFIG_FILE + args.config = msconf.CONFIG_FILEPATH # Compile all the sections to go in the config module_list = _get_main_modules() @@ -744,23 +744,22 @@ def _main(): stream=sys.stderr, level=log_lvl) # Check if user is trying to initialize - # if str(args.Files) == "['init']" and not os.path.isfile('init'): if args.Files == ['init'] and not os.path.isfile('init'): _init(args) # Set config or update locations if args.config is None: - args.config = msconf.CONFIG_FILE + args.config = msconf.CONFIG_FILEPATH else: update_ms_config_file(args.config) - update_paths_in_config(DEFAULTCONF, msconf.CONFIG_FILE) + update_paths_in_config(DEFAULTCONF, msconf.CONFIG_FILEPATH) module_list = _get_main_modules() if not os.path.isfile(args.config): config_init(args.config, module_list) else: # Write the default config settings for any missing modules - write_missing_config(module_list, msconf.MS_CONFIG, msconf.CONFIG_FILE) + write_missing_config(module_list, msconf.MS_CONFIG, msconf.CONFIG_FILEPATH) # Make sure report is not a dir if args.json: @@ -823,7 +822,7 @@ def _main(): results = multiscan(filelist, config=msconf.MS_CONFIG) # We need to read in the config for the parseReports call - config = msconf.MS_CONFIG['main'] + config = msconf.MS_CONFIG.get_section('main') # Make sure we have a group-types if "group-types" not in config or not config["group-types"]: config["group-types"] = [] diff --git a/multiscanner/storage/sql_driver.py b/multiscanner/storage/sql_driver.py index 386bbe2c..23546447 100644 --- a/multiscanner/storage/sql_driver.py +++ b/multiscanner/storage/sql_driver.py @@ -18,7 +18,7 @@ from multiscanner.config import MSConfigParser, get_config_path, reset_config -CONFIG_FILE = get_config_path('api') +CONFIG_FILEPATH = get_config_path('api') Base = declarative_base() logger = logging.getLogger(__name__) @@ -75,7 +75,7 @@ def __init__(self, config=None, configfile=None, regenconfig=False): # Configuration parsing config_parser = MSConfigParser() if configfile is None: - configfile = CONFIG_FILE + configfile = CONFIG_FILEPATH section_name = self.__class__.__name__ # (re)generate conf file if necessary @@ -109,7 +109,7 @@ def init_db(self): db_name = self.config['db_name'] if db_type == 'sqlite': # we can ignore host, username, password, etc - sql_lite_db_path = os.path.join(os.path.split(CONFIG_FILE)[0], db_name) + sql_lite_db_path = os.path.join(os.path.split(CONFIG_FILEPATH)[0], db_name) self.db_connection_string = 'sqlite:///{}'.format(sql_lite_db_path) else: username = self.config['username'] diff --git a/multiscanner/tests/test_multiscanner.py b/multiscanner/tests/test_multiscanner.py index 63506d6f..8ac437c5 100644 --- a/multiscanner/tests/test_multiscanner.py +++ b/multiscanner/tests/test_multiscanner.py @@ -9,7 +9,7 @@ # Makes sure we use the multiscanner in ../ CWD = os.path.dirname(os.path.abspath(__file__)) -TEST_CONFIG_FILE = '.tmpfile.ini' +TEST_CONFIG_FILEPATH = '.tmpfile.ini' TEST_REPORT = 'tmp_report.json' TEST_FILES = os.path.join(CWD, 'files') @@ -31,14 +31,14 @@ def teardown_class(cls): class TestMultiscan(_runmulti_tests): def setup(self): - multiscanner.config_init(TEST_CONFIG_FILE, multiscanner._get_main_modules()) - multiscanner.update_ms_config_file(TEST_CONFIG_FILE) + multiscanner.config_init(TEST_CONFIG_FILEPATH, multiscanner._get_main_modules()) + multiscanner.update_ms_config_file(TEST_CONFIG_FILEPATH) self.result = multiscanner.multiscan(self.filelist) self.report = multiscanner.parse_reports(self.result, includeMetadata=False, python=True) self.report_m = multiscanner.parse_reports(self.result, includeMetadata=True, python=True) def teardown(self): - os.remove(TEST_CONFIG_FILE) + os.remove(TEST_CONFIG_FILEPATH) def test_multiscan_results(self): for f in self.filelist: @@ -48,12 +48,12 @@ def test_multiscan_results(self): class TestMain(_runmulti_tests): def setup(self): - multiscanner.config_init(TEST_CONFIG_FILE, multiscanner._get_main_modules()) - multiscanner.update_ms_config_file(TEST_CONFIG_FILE) + multiscanner.config_init(TEST_CONFIG_FILEPATH, multiscanner._get_main_modules()) + multiscanner.update_ms_config_file(TEST_CONFIG_FILEPATH) def teardown(self): try: - os.remove(TEST_CONFIG_FILE) + os.remove(TEST_CONFIG_FILEPATH) os.remove(TEST_REPORT) except Exception as e: # TODO: log exception @@ -67,7 +67,7 @@ def test_basic_main(self): pass -@mock.patch.object(multiscanner.config, 'CONFIG_FILE', TEST_CONFIG_FILE) +@mock.patch.object(multiscanner.config, 'CONFIG_FILEPATH', TEST_CONFIG_FILEPATH) class TestInitNoConfig(_runmulti_tests): def test_basic_main(self): with mock.patch.object(sys, 'argv', ['ms.py', 'init']), \ @@ -85,7 +85,7 @@ def test_basic_main(self): def teardown(self): try: - os.remove(TEST_CONFIG_FILE) + os.remove(TEST_CONFIG_FILEPATH) os.remove(TEST_REPORT) except Exception as e: # TODO: log exception @@ -94,7 +94,7 @@ def teardown(self): class TestMissingConfig(_runmulti_tests): def setup(self): - with mock.patch.object(sys, 'argv', ['ms.py', '-c', TEST_CONFIG_FILE, 'init']), \ + with mock.patch.object(sys, 'argv', ['ms.py', '-c', TEST_CONFIG_FILEPATH, 'init']), \ mock.patch('multiscanner.ms.input', return_value='y'): try: multiscanner._main() @@ -103,7 +103,7 @@ def setup(self): def test_config_init(self): config_object = multiscanner.MSConfigParser() - config_object.read(TEST_CONFIG_FILE) + config_object.read(TEST_CONFIG_FILEPATH) assert config_object.has_section('main') assert config_object.has_section('test_1') @@ -112,37 +112,37 @@ def test_config_init(self): def test_fill_in_missing_config_sections(self): # Simulate a section missing from config file before multiscanner is imported/run config_object = multiscanner.MSConfigParser() - config_object.read(TEST_CONFIG_FILE) + config_object.read(TEST_CONFIG_FILEPATH) config_object.remove_section('main') config_object.remove_section('test_1') - with open(TEST_CONFIG_FILE, 'w') as conf_file: + with open(TEST_CONFIG_FILEPATH, 'w') as conf_file: config_object.write(conf_file) - multiscanner.update_ms_config_file(TEST_CONFIG_FILE) + multiscanner.update_ms_config_file(TEST_CONFIG_FILEPATH) # Run MultiScanner - with mock.patch.object(sys, 'argv', ['ms.py', '-c', TEST_CONFIG_FILE, TEST_FILES]): + with mock.patch.object(sys, 'argv', ['ms.py', '-c', TEST_CONFIG_FILEPATH, TEST_FILES]): multiscanner._main() - with open(TEST_CONFIG_FILE, 'r') as conf_file: + with open(TEST_CONFIG_FILEPATH, 'r') as conf_file: conf = conf_file.read() assert 'test_1' in conf def test_read_config_with_default(self): - multiscanner.config.read_config(TEST_CONFIG_FILE, {'test': {'foo': 'bar'}}) - with open(TEST_CONFIG_FILE, 'r') as conf_file: + multiscanner.config.read_config(TEST_CONFIG_FILEPATH, {'test': {'foo': 'bar'}}) + with open(TEST_CONFIG_FILEPATH, 'r') as conf_file: conf = conf_file.read() assert 'foo' in conf def test_overwriting_config_on_reset(self): # Change a config val from default config_object = multiscanner.MSConfigParser() - config_object.read(TEST_CONFIG_FILE) + config_object.read(TEST_CONFIG_FILEPATH) config_object.set('test_2', 'ENABLED', 'False') - with open(TEST_CONFIG_FILE, 'w') as conf_file: + with open(TEST_CONFIG_FILEPATH, 'w') as conf_file: config_object.write(conf_file) # Trigger reset_config and it gets overwritten self.setup() - with mock.patch.object(sys, 'argv', ['ms.py', '-c', TEST_CONFIG_FILE, '-j', TEST_REPORT, self.filelist[0]]): + with mock.patch.object(sys, 'argv', ['ms.py', '-c', TEST_CONFIG_FILEPATH, '-j', TEST_REPORT, self.filelist[0]]): try: multiscanner._main() except SystemExit: @@ -158,22 +158,22 @@ def test_overwriting_config_on_reset(self): def test_config_init_no_overwrite(self): # Remove a section from config file config_object = multiscanner.MSConfigParser() - config_object.read(TEST_CONFIG_FILE) + config_object.read(TEST_CONFIG_FILEPATH) config_object.remove_section('test_1') - with open(TEST_CONFIG_FILE, 'w') as conf_file: + with open(TEST_CONFIG_FILEPATH, 'w') as conf_file: config_object.write(conf_file) # this time we answer 'no' so config won't be overwritten, but missing modules' configs will be regenerated - with mock.patch.object(sys, 'argv', ['ms.py', '-c', TEST_CONFIG_FILE, 'init']), \ + with mock.patch.object(sys, 'argv', ['ms.py', '-c', TEST_CONFIG_FILEPATH, 'init']), \ mock.patch('multiscanner.ms.input', return_value='n'): try: multiscanner._main() except SystemExit: pass - with open(TEST_CONFIG_FILE, 'r') as conf_file: + with open(TEST_CONFIG_FILEPATH, 'r') as conf_file: conf = conf_file.read() assert 'test_1' in conf def teardown(self): - os.remove(TEST_CONFIG_FILE) + os.remove(TEST_CONFIG_FILEPATH) From 625b5fab3febe5583c8caaf5693fb88f03e4289e Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Sat, 29 Jun 2019 15:59:05 -0400 Subject: [PATCH 36/38] move imports, metadata_list is now cleared after the loop completes - Similarly we now need to submit each item individually. --- multiscanner/distributed/api.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/multiscanner/distributed/api.py b/multiscanner/distributed/api.py index a9c0a58c..43a5595f 100755 --- a/multiscanner/distributed/api.py +++ b/multiscanner/distributed/api.py @@ -67,13 +67,14 @@ from sqlalchemy.exc import SQLAlchemyError import multiscanner as ms +from multiscanner.analytics.ssdeep_analytics import SSDeepAnalytic from multiscanner.common import pdf_generator, stix2_generator from multiscanner.config import PY3, get_config_path, read_config +from multiscanner.distributed.celery_worker import multiscanner_celery, ssdeep_compare_celery from multiscanner.storage import StorageHandler from multiscanner.storage import sql_driver as database from multiscanner.storage.storage import StorageNotLoadedError - TASK_NOT_FOUND = {'Message': 'No task with that ID found!'} INVALID_REQUEST = {'Message': 'Invalid request parameters'} TASK_STILL_PROCESSING = {'Message': 'Task still pending'} @@ -117,12 +118,6 @@ def default(self, obj): api_config_file = get_config_path('api') api_config = read_config(api_config_file, {'api': DEFAULTCONF, 'Database': database.Database.DEFAULTCONF}) -# TODO: fix this mess -# TODO: test moving these imports up with the others -# Needs api_config in order to function properly -from multiscanner.distributed.celery_worker import multiscanner_celery, ssdeep_compare_celery -from multiscanner.analytics.ssdeep_analytics import SSDeepAnalytic - db = database.Database(config=api_config.get_section('Database'), regenconfig=False) # To run under Apache, we need to set up the DB outside of __main__ # Sleep and retry until database connection is successful @@ -230,16 +225,15 @@ def multiscanner_process(work_queue, exit_signal): task_status='Complete', timestamp=scan_time, ) - metadata_list = [] - storage_handler.store(results, wait=False) + storage_handler.store(results, wait=False) - if delete_after_scan: - for file_name in results: - os.remove(file_name) + if delete_after_scan: + for file_name in results: + os.remove(file_name) - filelist = [] time_stamp = None + metadata_list = [] storage_handler.close() From ddc4451362314112de44e2bdcf41eacb7dbc0454 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Mon, 15 Jul 2019 15:10:31 -0400 Subject: [PATCH 37/38] change slash for correct one and convert dict into object In the distributed case, we are obligated to pass a dict, but later we need to convert it back into a MSConfigParser. Essentially, for line 411 fallback="..." --- docker_utils/api_config.ini | 2 +- multiscanner/ms.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docker_utils/api_config.ini b/docker_utils/api_config.ini index 84bb3f17..0b52d404 100644 --- a/docker_utils/api_config.ini +++ b/docker_utils/api_config.ini @@ -16,7 +16,7 @@ password = vhost = / flush_every = 100 flush_interval = 10 -tz = US\Eastern +tz = US/Eastern [Database] db_type = sqlite diff --git a/multiscanner/ms.py b/multiscanner/ms.py index de311313..03c3803e 100644 --- a/multiscanner/ms.py +++ b/multiscanner/ms.py @@ -404,6 +404,8 @@ def multiscan(Files, config=None, module_list=None): if config is None: config = MSConfigParser() + elif isinstance(config, dict): + config = msconf.dict_to_config(config) # Copy files to a share if configured copyfilesto = config.get('main', 'copyfilesto', fallback=DEFAULTCONF['copyfilesto']) @@ -411,7 +413,7 @@ def multiscan(Files, config=None, module_list=None): if os.path.isdir(copyfilesto): filelist = _copy_to_share(filelist, filedic, copyfilesto) else: - raise IOError('The copyfilesto dir "' + copyfilesto + '" is not a valid dir') + raise IOError('The copyfilesto dir "{}" is not a valid dir'.format(copyfilesto)) # Create the global module interface global_module_interface = _GlobalModuleInterface() From 8dc9732de8eecce218b61c2ea51ec4a7670e4518 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Tue, 6 Aug 2019 16:05:01 -0400 Subject: [PATCH 38/38] Convert config dict to object for distributed mode --- multiscanner/distributed/celery_worker.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/multiscanner/distributed/celery_worker.py b/multiscanner/distributed/celery_worker.py index 216012a2..3ea6d58f 100644 --- a/multiscanner/distributed/celery_worker.py +++ b/multiscanner/distributed/celery_worker.py @@ -154,6 +154,8 @@ def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, # Get the storage config if config is None: config = msconf.MS_CONFIG + elif isinstance(config, dict): + config = msconf.dict_to_config(config) storage_conf = msconf.get_config_path('storage', config) storage_handler = storage.StorageHandler(configfile=storage_conf)