From 968a43b0e18ec86af820f84b863ccca16e81174b Mon Sep 17 00:00:00 2001 From: azazelm3dj3d <56496067+azazelm3dj3d@users.noreply.github.com> Date: Fri, 30 Jun 2023 15:50:02 -0500 Subject: [PATCH 1/4] Working version for remote ioc extraction --- iocextract.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 5 deletions(-) diff --git a/iocextract.py b/iocextract.py index 3b70e4a..98ef2c4 100644 --- a/iocextract.py +++ b/iocextract.py @@ -8,10 +8,12 @@ """ import io +import os import sys import json import base64 import argparse +import requests import binascii import itertools import ipaddress @@ -966,7 +968,6 @@ def main(): If no arguments are specified, the default behavior is to extract all IOCs. """ ) - parser.add_argument( "-i", "--input", @@ -1030,6 +1031,15 @@ def main(): parser.add_argument( "-dn", "--dirname", help="Path of the directory to extract IOCs" ) + parser.add_argument( + "-ri", + "--remote_input", + action="store_true", + help="Extract IOCs from a remote data source", + ) + parser.add_argument( + "-url", "--url", help="URL to extract IOCs from" + ) args = parser.parse_args() @@ -1041,7 +1051,7 @@ def main(): for path in dir_path: dir_db.append(str(path)) - if not args.dir: + if not args.dir and not args.remote_input: # Read user unput # TODO: Improve the method of data input data = args.input.read() @@ -1076,9 +1086,8 @@ def main(): if args.extract_ipv6s or args.extract_ips or extract_all: memo["ipv6s"] = list(extract_ipv6s(data)) if args.extract_urls or extract_all: - memo["urls"] = list( - extract_urls(data, refang=args.refang, strip=args.strip_urls) - ) + memo["urls"] = list(extract_urls(data, refang=args.refang, strip=args.strip_urls)) + if args.open: memo["open_punc"] = list( extract_urls( @@ -1088,6 +1097,7 @@ def main(): open_punc=args.open, ) ) + if args.rm_scheme: memo["no_protocol"] = list( extract_urls( @@ -1098,8 +1108,10 @@ def main(): no_scheme=args.rm_scheme, ) ) + if args.extract_yara_rules or extract_all: memo["yara_rules"] = list(extract_yara_rules(data)) + if args.extract_hashes or extract_all: memo["hashes"] = list(extract_hashes(data)) @@ -1120,6 +1132,77 @@ def main(): args.output.write("{ioc}\n".format(ioc=ioc)) args.output.flush() + elif args.remote_input: + remote_url = requests.get(args.url) + + if remote_url.status_code != 200: + args.output.write(f"Unable to access remote host: {args.url}") + sys.exit(1) + + d = "/tmp/url.txt" + + with open(d, "w") as f: + f.write(remote_url.content) + + with open(d, "r") as f: + data = f.read() + + if args.extract_emails or extract_all: + memo["emails"] = list(extract_emails(data, refang=args.refang)) + if args.extract_ipv4s or args.extract_ips or extract_all: + memo["ipv4s"] = list(extract_ipv4s(data, refang=args.refang)) + if args.extract_ipv6s or args.extract_ips or extract_all: + memo["ipv6s"] = list(extract_ipv6s(data)) + if args.extract_urls or extract_all: + memo["urls"] = list(extract_urls(data, refang=args.refang, strip=args.strip_urls)) + + if args.open: + memo["open_punc"] = list( + extract_urls( + data, + refang=args.refang, + strip=args.strip_urls, + open_punc=args.open, + ) + ) + + if args.rm_scheme: + memo["no_protocol"] = list( + extract_urls( + data, + refang=args.refang, + strip=args.strip_urls, + open_punc=args.open, + no_scheme=args.rm_scheme, + ) + ) + + if args.extract_yara_rules or extract_all: + memo["yara_rules"] = list(extract_yara_rules(data)) + + if args.extract_hashes or extract_all: + memo["hashes"] = list(extract_hashes(data)) + + # Custom regex file, one per line + if args.custom_regex: + regex_list = [l.strip() for l in args.custom_regex.readlines()] + + try: + memo["custom_regex"] = list(extract_custom_iocs(data, regex_list)) + except (IndexError, re.error) as e: + sys.stderr.write("Error in custom regex: {e}\n".format(e=e)) + + if args.json: + ioc = json.dumps(memo, indent=4, sort_keys=True) + else: + ioc = "\n".join(sum(memo.values(), [])) + + args.output.write("{ioc}\n".format(ioc=ioc)) + args.output.flush() + + # Cleanup temp file + os.remove(d) + else: if args.extract_emails or extract_all: memo["emails"] = list(extract_emails(data, refang=args.refang)) From f13f763867ef1bb99066c4523b3666529fbd927b Mon Sep 17 00:00:00 2001 From: azazelm3dj3d <56496067+azazelm3dj3d@users.noreply.github.com> Date: Fri, 30 Jun 2023 15:51:52 -0500 Subject: [PATCH 2/4] Update requirements.txt --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3bb6bdc..9381634 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -regex==2020.6.8 \ No newline at end of file +regex==2020.6.8 +requests \ No newline at end of file From f924d1a4990d2aac1539775dc20716ab998d840e Mon Sep 17 00:00:00 2001 From: azazelm3dj3d <56496067+azazelm3dj3d@users.noreply.github.com> Date: Fri, 30 Jun 2023 15:58:42 -0500 Subject: [PATCH 3/4] Fixing stuff for workflow --- iocextract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iocextract.py b/iocextract.py index 98ef2c4..8b7caf8 100644 --- a/iocextract.py +++ b/iocextract.py @@ -1136,7 +1136,7 @@ def main(): remote_url = requests.get(args.url) if remote_url.status_code != 200: - args.output.write(f"Unable to access remote host: {args.url}") + args.output.write("Unable to access remote host: {0}".format(args.url)) sys.exit(1) d = "/tmp/url.txt" From dd4a042fe5fba4e37631955702b7b42b4e87bdbc Mon Sep 17 00:00:00 2001 From: azazelm3dj3d <56496067+azazelm3dj3d@users.noreply.github.com> Date: Mon, 10 Jul 2023 11:11:17 -0500 Subject: [PATCH 4/4] Cleanup remote server extraction --- iocextract.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/iocextract.py b/iocextract.py index 8b7caf8..909ffcd 100644 --- a/iocextract.py +++ b/iocextract.py @@ -12,6 +12,8 @@ import sys import json import base64 +import random +import string import argparse import requests import binascii @@ -1139,12 +1141,12 @@ def main(): args.output.write("Unable to access remote host: {0}".format(args.url)) sys.exit(1) - d = "/tmp/url.txt" + file_contents = "/tmp/{0}.txt".format("".join(random.choice(string.ascii_lowercase) for _ in range(10))) - with open(d, "w") as f: - f.write(remote_url.content) + with open(file_contents, "w") as f: + f.write(str(remote_url.content)) - with open(d, "r") as f: + with open(file_contents, "r") as f: data = f.read() if args.extract_emails or extract_all: @@ -1201,7 +1203,7 @@ def main(): args.output.flush() # Cleanup temp file - os.remove(d) + os.remove(file_contents) else: if args.extract_emails or extract_all: