From 134ca6573fdb729683fee8b62c3b47a668a5108d Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Wed, 3 Jul 2024 15:46:59 +0200 Subject: [PATCH] Add documentation. Fix code compatibility --- CHANGELOG.md | 4 +- docs/user/bots.md | 7 ++ intelmq/bots/outputs/misp/output_feed.py | 10 ++- intelmq/lib/mixins/cache.py | 11 +++ .../bots/outputs/misp/test_output_feed.py | 76 ++++++++++--------- 5 files changed, 66 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0bc8d47b3..cbf96573a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,7 +33,9 @@ #### Experts #### Outputs -- `intelmq.bots.outputs.misp.output_feed`: Handle failures if saved current event wasn't saved or is incorrect (PR by Kamil Mankowski). +- `intelmq.bots.outputs.misp.output_feed`: + - Handle failures if saved current event wasn't saved or is incorrect (PR by Kamil Mankowski). + - Allow saving messages in bulks instead of refreshing the feed immediately (PR#2505 by Kamil Mankowski). - `intelmq.bots.outputs.smtp_batch.output`: Documentation on multiple recipients added (PR#2501 by Edvard Rejthar). ### Documentation diff --git a/docs/user/bots.md b/docs/user/bots.md index 7b2d3fd9a..bc7dcec21 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -4591,6 +4591,13 @@ The PyMISP library >= 2.4.119.1 is required, see () The output bot creates one event per each interval, all data in this time frame is part of this event. Default "1 hour", string. +**`bulk_save_count`** + +(optional, int) If set to a non-0 value, the bot won't refresh the MISP feed immeadiately, but will cache +incoming messages until the given number of them. Use it if your bot proceeds a high number of messages +and constant saving to the disk is a problem. Reloading or restarting bot as well as generating +a new MISP event based on `interval_event` triggers saving regardless of the cache size. + **Usage in MISP** Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server. diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py index 49829d9ed..53c655679 100644 --- a/intelmq/bots/outputs/misp/output_feed.py +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -27,7 +27,7 @@ class MISPFeedOutputBot(OutputBot, CacheMixin): """Generate an output in the MISP Feed format""" interval_event: str = "1 hour" - delay_save_event_count: int = None + bulk_save_count: int = None misp_org_name = None misp_org_uuid = None output_dir: str = "/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path @@ -115,12 +115,12 @@ def process(self): event = self.receive_message().to_dict(jsondict_as_string=True) cache_size = None - if self.delay_save_event_count: + if self.bulk_save_count: cache_size = self.cache_put(event) if cache_size is None: self._generate_feed(event) - elif cache_size >= self.delay_save_event_count: + elif cache_size >= self.bulk_save_count: self._generate_feed() self.acknowledge_message() @@ -138,8 +138,10 @@ def _generate_feed(self, message: dict = None): if message: self._add_message_to_feed(message) - while message := self.cache_pop(): + message = self.cache_pop() + while message: self._add_message_to_feed(message) + message = self.cache_pop() feed_output = self.current_event.to_feed(with_meta=False) with self.current_file.open("w") as f: diff --git a/intelmq/lib/mixins/cache.py b/intelmq/lib/mixins/cache.py index 956517540..ee945fbb5 100644 --- a/intelmq/lib/mixins/cache.py +++ b/intelmq/lib/mixins/cache.py @@ -13,6 +13,17 @@ class CacheMixin: + """Provides caching possibilities for bots + + For key-value cache, use methods: + cache_exists + cache_get + cache_set + + To store dict elements in a cache queue named after bot id, use methods: + cache_put + cache_pop + """ __redis: redis.Redis = None redis_cache_host: str = "127.0.0.1" redis_cache_port: int = 6379 diff --git a/intelmq/tests/bots/outputs/misp/test_output_feed.py b/intelmq/tests/bots/outputs/misp/test_output_feed.py index 1627e29c4..631b7b7bd 100644 --- a/intelmq/tests/bots/outputs/misp/test_output_feed.py +++ b/intelmq/tests/bots/outputs/misp/test_output_feed.py @@ -11,29 +11,30 @@ import intelmq.lib.test as test from intelmq.bots.outputs.misp.output_feed import MISPFeedOutputBot -EXAMPLE_EVENT = {"classification.type": "infected-system", - "destination.port": 9796, - "feed.accuracy": 100.0, - "destination.ip": "52.18.196.169", - "malware.name": "salityp2p", - "event_description.text": "Sinkhole attempted connection", - "time.source": "2016-04-19T23:16:08+00:00", - "source.ip": "152.166.119.2", - "feed.url": "http://alerts.bitsighttech.com:8080/stream?", - "source.geolocation.country": "Dominican Republic", - "time.observation": "2016-04-19T23:16:08+00:00", - "source.port": 65118, - "__type": "Event", - "feed.name": "BitSight", - "extra.non_ascii": "ççãããã\x80\ua000 \164 \x80\x80 abcd \165\166", - "raw": "eyJ0cm9qYW5mYW1pbHkiOiJTYWxpdHlwMnAiLCJlbnYiOnsic" - "mVtb3RlX2FkZHIiOiIxNTIuMTY2LjExOS4yIiwicmVtb3RlX3" - "BvcnQiOiI2NTExOCIsInNlcnZlcl9hZGRyIjoiNTIuMTguMTk" - "2LjE2OSIsInNlcnZlcl9wb3J0IjoiOTc5NiJ9LCJfdHMiOjE0" - "NjExMDc3NjgsIl9nZW9fZW52X3JlbW90ZV9hZGRyIjp7ImNvd" - "W50cnlfbmFtZSI6IkRvbWluaWNhbiBSZXB1YmxpYyJ9fQ==", - "__type": "Event", - } +EXAMPLE_EVENT = { + "classification.type": "infected-system", + "destination.port": 9796, + "feed.accuracy": 100.0, + "destination.ip": "52.18.196.169", + "malware.name": "salityp2p", + "event_description.text": "Sinkhole attempted connection", + "time.source": "2016-04-19T23:16:08+00:00", + "source.ip": "152.166.119.2", + "feed.url": "http://alerts.bitsighttech.com:8080/stream?", + "source.geolocation.country": "Dominican Republic", + "time.observation": "2016-04-19T23:16:08+00:00", + "source.port": 65118, + "__type": "Event", + "feed.name": "BitSight", + "extra.non_ascii": "ççãããã\x80\ua000 \164 \x80\x80 abcd \165\166", + "raw": "eyJ0cm9qYW5mYW1pbHkiOiJTYWxpdHlwMnAiLCJlbnYiOnsic" + "mVtb3RlX2FkZHIiOiIxNTIuMTY2LjExOS4yIiwicmVtb3RlX3" + "BvcnQiOiI2NTExOCIsInNlcnZlcl9hZGRyIjoiNTIuMTguMTk" + "2LjE2OSIsInNlcnZlcl9wb3J0IjoiOTc5NiJ9LCJfdHMiOjE0" + "NjExMDc3NjgsIl9nZW9fZW52X3JlbW90ZV9hZGRyIjp7ImNvd" + "W50cnlfbmFtZSI6IkRvbWluaWNhbiBSZXB1YmxpYyJ9fQ==", + "__type": "Event", +} @test.skip_exotic() @@ -43,11 +44,16 @@ def set_bot(cls): cls.use_cache = True cls.bot_reference = MISPFeedOutputBot cls.default_input_message = EXAMPLE_EVENT - cls.directory = TemporaryDirectory() - cls.sysconfig = {"misp_org_name": 'IntelMQTestOrg', - "misp_org_uuid": "b89da4c2-0f74-11ea-96a1-6fa873a0eb4d", - "output_dir": cls.directory.name, - "interval_event": '1 hour'} + cls.sysconfig = { + "misp_org_name": "IntelMQTestOrg", + "misp_org_uuid": "b89da4c2-0f74-11ea-96a1-6fa873a0eb4d", + "interval_event": "1 hour", + } + + def setUp(self) -> None: + super().setUp() + self.directory = TemporaryDirectory() + self.sysconfig["output_dir"] = self.directory.name def test_event(self): self.run_bot() @@ -59,7 +65,7 @@ def test_event(self): def test_accumulating_events(self): self.input_message = [EXAMPLE_EVENT, EXAMPLE_EVENT] - self.run_bot(iterations=2, parameters={"delay_save_event_count": 3}) + self.run_bot(iterations=2, parameters={"bulk_save_count": 3}) current_event = open(f"{self.directory.name}/.current").read() @@ -69,7 +75,7 @@ def test_accumulating_events(self): assert len(objects) == 0 self.input_message = [EXAMPLE_EVENT] - self.run_bot(parameters={"delay_save_event_count": 3}) + self.run_bot(parameters={"bulk_save_count": 3}) # When enough events were collected, save them with open(current_event) as f: @@ -77,7 +83,7 @@ def test_accumulating_events(self): assert len(objects) == 3 self.input_message = [EXAMPLE_EVENT, EXAMPLE_EVENT, EXAMPLE_EVENT] - self.run_bot(iterations=3, parameters={"delay_save_event_count": 3}) + self.run_bot(iterations=3, parameters={"bulk_save_count": 3}) # We continue saving to the same file until interval timeout with open(current_event) as f: @@ -87,22 +93,18 @@ def test_accumulating_events(self): # Simulating leftovers in the queue when it's time to generate new event Path(f"{self.directory.name}/.current").unlink() self.bot.cache_put(EXAMPLE_EVENT) - self.run_bot(parameters={"delay_save_event_count": 3}) + self.run_bot(parameters={"bulk_save_count": 3}) new_event = open(f"{self.directory.name}/.current").read() with open(new_event) as f: objects = json.load(f)["Event"]["Object"] assert len(objects) == 1 - def tearDown(self): self.cache.delete(self.bot_id) + self.directory.cleanup() super().tearDown() - @classmethod - def tearDownClass(cls): - cls.directory.cleanup() - if __name__ == "__main__": # pragma: no cover unittest.main()