Skip to content

Commit

Permalink
Updates in response to feedback.
Browse files Browse the repository at this point in the history
  • Loading branch information
elsif2 committed Jul 27, 2023
1 parent b279b69 commit 3d6a87e
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,19 @@ def init(self):

if self.file_format is not None:
if not (self.file_format == 'csv'):
raise ValueError('Invalid file_format')
raise ValueError("Invalid file_format '%s'. Must be 'csv'." % self.file_format)
else:
self.file_format = 'csv'

self.preamble = f'{{ "apikey": "{self.api_key}" '

def check(parameters: dict):
for key in parameters:
if key == 'file_format' and parameters[key] != 'csv':
return [["error", "Invalid file_format '%s'. Must be 'csv'." % parameters[key]]]
elif key == 'country':
return [["warning", "Deprecated parameter 'country' found. Please use 'reports' instead. The backwards-compatibility will be removed in IntelMQ version 4.0.0."]]

def _headers(self, data):
return {'HMAC2': hmac.new(self.secret.encode(), data.encode('utf-8'), digestmod=hashlib.sha256).hexdigest()}

Expand Down
21 changes: 17 additions & 4 deletions intelmq/bots/parsers/shadowserver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,28 @@ This module is maintained by [The Shadowserver Foundation](https://www.shadowser

Please contact [email protected] with any issues or concerns.

The report configuration is now stored in a _schema.json_ file downloaded from https://interchange.shadowserver.org/intelmq/v1/schema.
The report configuration is now stored in a _shadowserver-schema.json_ file downloaded from https://interchange.shadowserver.org/intelmq/v1/schema.

For environments that have internet connectivity the `update_schema.py` script should be called from a cron job to obtain the latest revision.
The parser will attempt to download a schema update on startup unless INTELMQ_SKIP_INTERNET is set.
The parser will attempt to download a schema update on startup when the *auto_update* option is enabled.

For air-gapped systems automation will be required to download and copy the _schema.json_ file into this directory.
Schema downloads can also be scheduled as a cron job:

```
02 01 * * * intelmq.bots.parsers.shadowserver.parser --update-schema
```

For air-gapped systems automation will be required to download and copy the file to VAR_STATE_PATH/shadowserver-schema.json.

The parser will automatically reload the configuration when the file changes.


## Schema contract

Once set the `classification.identifier`, `classification.taxonomy`, and `classification.type` fields will remain static.

Once set report fields will not be deleted.


## Sample configuration:

```
Expand Down Expand Up @@ -46,6 +58,7 @@ shadowserver-parser:
parameters:
destination_queues:
_default: [file-output-queue]
auto_update: true
run_mode: continuous
```

53 changes: 38 additions & 15 deletions intelmq/bots/parsers/shadowserver/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,23 @@
import base64
import binascii
import json
import urllib.request
import tempfile
from typing import Optional, Dict, Tuple, Any

import intelmq.lib.harmonization as harmonization
from intelmq.lib.utils import create_request_session
from intelmq import VAR_STATE_PATH


class __Container:
pass


__config = __Container()
__config.schema_file = os.path.join(os.path.dirname(__file__), 'schema.json')
__config.schema_file = os.path.join(VAR_STATE_PATH, 'shadowserver-schema.json')
__config.schema_base = os.path.join(os.path.dirname(__file__), 'schema.json.test')
__config.schema_mtime = 0.0
__config.auto_update = False
__config.feedname_mapping = {}
__config.filename_mapping = {}

Expand All @@ -105,13 +108,16 @@ def set_logger(logger):
__config.logger = logger


def enable_auto_update(enable):
""" Enable automatic schema update. """
__config.auto_update = enable


def get_feed_by_feedname(given_feedname: str) -> Optional[Dict[str, Any]]:
reload()
return __config.feedname_mapping.get(given_feedname, None)


def get_feed_by_filename(given_filename: str) -> Optional[Tuple[str, Dict[str, Any]]]:
reload()
return __config.filename_mapping.get(given_filename, None)


Expand Down Expand Up @@ -289,19 +295,18 @@ def reload():
else:
__config.logger.info("The schema file does not exist.")

if __config.schema_mtime == 0.0 and mtime == 0.0 and not os.environ.get('INTELMQ_SKIP_INTERNET'):
__config.logger.info("Attempting to download schema.")
if __config.schema_mtime == 0.0 and mtime == 0.0 and __config.auto_update:
update_schema()

__config.feedname_mapping.clear()
__config.filename_mapping.clear()
for schema_file in [__config.schema_file, ".".join([__config.schema_file, 'test'])]:
for schema_file in [__config.schema_file, __config.schema_base]:
if os.path.isfile(schema_file):
with open(schema_file) as fh:
schema = json.load(fh)
for report in schema:
if report == "_meta":
__config.logger.info("Loading schema %s." % schema[report]['date_created'])
__config.logger.info("Loading schema %r." % schema[report]['date_created'])
for msg in schema[report]['change_log']:
__config.logger.info(msg)
else:
Expand All @@ -313,37 +318,55 @@ def reload():
def update_schema():
""" download the latest configuration """
if os.environ.get('INTELMQ_SKIP_INTERNET'):
return None
return False

(th, tmp) = tempfile.mkstemp(dir=os.path.dirname(__file__))
# download the schema to a temp file
(th, tmp) = tempfile.mkstemp(dir=VAR_STATE_PATH)
url = 'https://interchange.shadowserver.org/intelmq/v1/schema'
__config.logger.info("Attempting to download schema from %r" % url)
__config.logger.debug("Using temp file %r for the download." % tmp)
try:
urllib.request.urlretrieve(url, tmp)
with create_request_session() as session:
with session.get(url, stream=True) as r:
r.raise_for_status()
with open(tmp, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
except:
raise ValueError("Failed to download %r" % url)
__config.logger.error("Failed to download %r" % url)
return False
__config.logger.info("Download successful.")

new_version = ''
old_version = ''

try:
# validate the downloaded file
with open(tmp) as fh:
schema = json.load(fh)
new_version = schema['_meta']['date_created']
except:
# leave tempfile behind for diagnosis
raise ValueError("Failed to validate %r" % tmp)
__config.logger.error("Failed to validate %r" % tmp)
return False

if os.path.exists(__config.schema_file):
# compare the new version against the old; rename the existing file
try:
with open(__config.schema_file) as fh:
schema = json.load(fh)
old_version = schema['_meta']['date_created']
if new_version != old_version:
os.replace(__config.schema_file, ".".join([__config.schema_file, 'bak']))
except:
pass
except Exception as e:
__config.logger.error("Unable to replace schema file: %s" % str(e))
return False

if new_version != old_version:
os.replace(tmp, __config.schema_file)
__config.logger.info("New schema version is %r." % new_version)
return True
else:
os.unlink(tmp)

return False
45 changes: 39 additions & 6 deletions intelmq/bots/parsers/shadowserver/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

from intelmq.lib.bot import ParserBot
from intelmq.lib.exceptions import InvalidKey, InvalidValue
from intelmq.bin.intelmqctl import IntelMQController
import intelmq.lib.utils as utils
import intelmq.bots.parsers.shadowserver._config as config


Expand All @@ -34,8 +36,7 @@ class ShadowserverParserBot(ParserBot):
Parse all ShadowServer feeds
Parameters:
schema_file (str): Path to the report schema file
auto_update (boolean): Enable automatic schema download
"""

recover_line = ParserBot.recover_line_csv_dict
Expand All @@ -45,13 +46,15 @@ class ShadowserverParserBot(ParserBot):
feedname = None
_mode = None
overwrite = False
auto_update = False

def init(self):
config.set_logger(self.logger)
try:
config.update_schema()
except Exception as e:
self.logger.warning("Schema update failed: %s." % e)
if self.auto_update:
config.enable_auto_update(True)
self.logger.debug("Feature 'auto_update' is enabled.")
config.reload()

if self.feedname is not None:
self._sparser_config = config.get_feed_by_feedname(self.feedname)
if self._sparser_config:
Expand Down Expand Up @@ -228,5 +231,35 @@ def parse_line(self, row, report):
def shutdown(self):
self.feedname = None

@classmethod
def _create_argparser(cls):
argparser = super()._create_argparser()
argparser.add_argument("--update-schema", action='store_true', help='downloads latest report schema')
argparser.add_argument("--verbose", action='store_true', help='be verbose')
return argparser

@classmethod
def run(cls, parsed_args=None):
if not parsed_args:
parsed_args = cls._create_argparser().parse_args()
if parsed_args.update_schema:
logger = utils.log(__name__, log_path=None)
if parsed_args.verbose:
logger.setLevel('INFO')
else:
logger.setLevel('ERROR')
config.set_logger(logger)
if config.update_schema():
runtime_conf = utils.get_bots_settings()
try:
ctl = IntelMQController()
for bot in runtime_conf:
if runtime_conf[bot]["module"] == __name__ and runtime_conf[bot]['parameters'].get('auto_update', True):
ctl.bot_reload(bot)
except Exception as e:
logger.error("Failed to signal bot: %r" % str(e))
else:
super().run(parsed_args=parsed_args)


BOT = ShadowserverParserBot
11 changes: 0 additions & 11 deletions intelmq/bots/parsers/shadowserver/update_schema.py

This file was deleted.

28 changes: 28 additions & 0 deletions intelmq/tests/bots/parsers/shadowserver/test_download_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# SPDX-FileCopyrightText: 2023 The Shadowserver Foundation
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-
"""
Created on Thu Jul 27 19:44:44 2023
"""

import unittest
import os
import logging
from intelmq import VAR_STATE_PATH
import intelmq.bots.parsers.shadowserver._config as config
import intelmq.lib.utils as utils
import intelmq.lib.test as test

@test.skip_internet()
class TestShadowserverSchemaDownload(unittest.TestCase):

def test_download(self):
schema_file = os.path.join(VAR_STATE_PATH, 'shadowserver-schema.json')
config.set_logger(utils.log('test-bot', log_path=None))
if os.path.exists(schema_file):
os.unlink(schema_file)
self.assertEqual(True, config.update_schema())
self.assertEqual(True, os.path.exists(schema_file))

0 comments on commit 3d6a87e

Please sign in to comment.