diff --git a/api_app/analyzers_manager/file_analyzers/capa_info.py b/api_app/analyzers_manager/file_analyzers/capa_info.py index bf5a394bf8..ed1d4fd85c 100644 --- a/api_app/analyzers_manager/file_analyzers/capa_info.py +++ b/api_app/analyzers_manager/file_analyzers/capa_info.py @@ -1,40 +1,251 @@ # This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl # See the file 'LICENSE' for copying permission. -from typing import Dict -from api_app.analyzers_manager.classes import DockerBasedAnalyzer, FileAnalyzer +import json +import logging +import os +import shutil +import subprocess +from shlex import quote +from zipfile import ZipFile +import requests +from django.conf import settings +from django.utils import timezone -class CapaInfo(FileAnalyzer, DockerBasedAnalyzer): - name: str = "Capa" - url: str = "http://malware_tools_analyzers:4002/capa" - # interval between http request polling - poll_distance: int = 10 - # http request polling max number of tries - max_tries: int = 60 - # here, max_tries * poll_distance = 10 minutes - timeout: int = 60 * 9 - # whereas subprocess timeout is kept as 60 * 9 = 9 minutes +from api_app.analyzers_manager.classes import FileAnalyzer +from api_app.analyzers_manager.exceptions import AnalyzerRunException +from api_app.analyzers_manager.models import AnalyzerRulesFileVersion, PythonModule +from tests.mock_utils import if_mock_connections, patch +logger = logging.getLogger(__name__) + +BASE_LOCATION = f"{settings.MEDIA_ROOT}/capa" +RULES_LOCATION = f"{BASE_LOCATION}/capa-rules" +SIGNATURE_LOCATION = f"{BASE_LOCATION}/sigs" +RULES_FILE = f"{RULES_LOCATION}/capa_rules.zip" +RULES_URL = "https://github.com/mandiant/capa-rules/archive/refs/tags/" + + +class CapaInfo(FileAnalyzer): shellcode: bool arch: str + timeout: float = 15 + force_pull_signatures: bool = False + + def _check_if_latest_version(self, latest_version: str) -> bool: + + analyzer_rules_file_version = AnalyzerRulesFileVersion.objects.filter( + python_module=self.python_module + ).first() + + if analyzer_rules_file_version is None: + return False + + return latest_version == analyzer_rules_file_version.last_downloaded_version + + @classmethod + def _update_rules_file_version(cls, latest_version: str, file_url: str): + capa_module = PythonModule.objects.get( + module="capa_info.CapaInfo", + base_path="api_app.analyzers_manager.file_analyzers", + ) + + _, created = AnalyzerRulesFileVersion.objects.update_or_create( + python_module=capa_module, + defaults={ + "last_downloaded_version": latest_version, + "download_url": file_url, + "downloaded_at": timezone.now(), + }, + ) + + if created: + logger.info(f"Created new entry for {capa_module} rules file version") + else: + logger.info(f"Updated existing entry for {capa_module} rules file version") + + @classmethod + def _unzip_rules(cls): + logger.info(f"Extracting rules at {RULES_LOCATION}") + with ZipFile(RULES_FILE, mode="r") as archive: + archive.extractall( + RULES_LOCATION + ) # this will overwrite any existing directory + logger.info("Rules have been succesfully extracted") + + @classmethod + def _download_rules(cls, latest_version: str): + + if os.path.exists(RULES_LOCATION): + logger.info(f"Removing existing rules at {RULES_LOCATION}") + shutil.rmtree(RULES_LOCATION) + + os.makedirs(RULES_LOCATION) + logger.info(f"Created fresh rules directory at {RULES_LOCATION}") + + file_to_download = latest_version + ".zip" + file_url = RULES_URL + file_to_download + try: + + response = requests.get(file_url, stream=True) + logger.info( + f"Started downloading rules with version: {latest_version} from {file_url}" + ) + with open(RULES_FILE, mode="wb+") as file: + for chunk in response.iter_content(chunk_size=10 * 1024): + file.write(chunk) + + cls._update_rules_file_version(latest_version, file_url) + logger.info(f"Bumped up version number in db to {latest_version}") + + except Exception as e: + logger.error(f"Failed to download rules with error: {e}") + raise AnalyzerRunException("Failed to download rules") + + logger.info( + f"Rules with version: {latest_version} have been successfully downloaded at {RULES_LOCATION}" + ) + + @classmethod + def _download_signatures(cls) -> None: + logger.info(f"Downloading signatures at {SIGNATURE_LOCATION} now") + + if os.path.exists(SIGNATURE_LOCATION): + logger.info(f"Removing existing signatures at {SIGNATURE_LOCATION}") + shutil.rmtree(SIGNATURE_LOCATION) + + os.makedirs(SIGNATURE_LOCATION) + logger.info(f"Created fresh signatures directory at {SIGNATURE_LOCATION}") + + signatures_url = "https://api.github.com/repos/mandiant/capa/contents/sigs" + try: + response = requests.get(signatures_url) + signatures_list = response.json() + + for signature in signatures_list: - def config(self, runtime_configuration: Dict): - super().config(runtime_configuration) - self.args = [] - if self.arch != "64": - self.arch = "32" - if self.shellcode: - self.args.append("-f") - self.args.append("sc" + self.arch) + filename = signature["name"] + download_url = signature["download_url"] + + signature_file_path = os.path.join(SIGNATURE_LOCATION, filename) + + sig_content = requests.get(download_url, stream=True) + with open(signature_file_path, mode="wb") as file: + for chunk in sig_content.iter_content(chunk_size=10 * 1024): + file.write(chunk) + + except Exception as e: + logger.error(f"Failed to download signature: {e}") + raise AnalyzerRunException("Failed to update signatures") + logger.info("Successfully updated signatures") + + @classmethod + def update(cls) -> bool: + try: + logger.info("Updating capa rules") + response = requests.get( + "https://api.github.com/repos/mandiant/capa-rules/releases/latest" + ) + latest_version = response.json()["tag_name"] + cls._download_rules(latest_version) + cls._unzip_rules() + logger.info("Successfully updated capa rules") + + return True + + except Exception as e: + logger.error(f"Failed to update capa rules with error: {e}") + + return False def run(self): - # get binary - binary = self.read_file_bytes() - # make request data - fname = str(self.filename).replace("/", "_").replace(" ", "_") - args = [f"@{fname}", *self.args] - req_data = {"args": args, "timeout": self.timeout} - req_files = {fname: binary} - - return self._docker_run(req_data, req_files) + try: + + response = requests.get( + "https://api.github.com/repos/mandiant/capa-rules/releases/latest" + ) + latest_version = response.json()["tag_name"] + + update_status = ( + True if self._check_if_latest_version(latest_version) else self.update() + ) + + if self.force_pull_signatures or not os.path.isdir(SIGNATURE_LOCATION): + self._download_signatures() + + if not (os.path.isdir(RULES_LOCATION)) and not update_status: + + raise AnalyzerRunException("Couldn't update capa rules") + + command: list[str] = ["/usr/local/bin/capa", "--quiet", "--json"] + shell_code_arch = "sc64" if self.arch == "64" else "sc32" + if self.shellcode: + command.append("-f") + command.append(shell_code_arch) + + # Setting default capa-rules path + command.append("-r") + command.append(RULES_LOCATION) + + # Setting default signatures location + command.append("-s") + command.append(SIGNATURE_LOCATION) + + command.append(quote(self.filepath)) + + logger.info( + f"Starting CAPA analysis for {self.filename} with hash: {self.md5} and command: {command}" + ) + + process: subprocess.CompletedProcess = subprocess.run( + command, + capture_output=True, + text=True, + timeout=self.timeout, + check=True, + ) + + result = json.loads(process.stdout) + result["command_executed"] = command + result["rules_version"] = latest_version + + logger.info( + f"CAPA analysis successfully completed for file: {self.filename} with hash {self.md5}" + ) + + except subprocess.CalledProcessError as e: + stderr = e.stderr + logger.info( + f"Capa Info failed to run for {self.filename} with hash: {self.md5} with command {e}" + ) + raise AnalyzerRunException( + f" Analyzer for {self.filename} with hash: {self.md5} failed with error: {stderr}" + ) + + return result + + @classmethod + def _monkeypatch(cls): + response_from_command = subprocess.CompletedProcess( + args=[ + "capa", + "--quiet", + "--json", + "-r", + "/opt/deploy/files_required/capa/capa-rules", + "-s", + "/opt/deploy/files_required/capa/sigs", + "/opt/deploy/files_required/06ebf06587b38784e2af42dd5fbe56e5", + ], + returncode=0, + stdout='{"meta": {}, "rules": {"contain obfuscated stackstrings": {}, "enumerate PE sections":{}}}', + stderr="", + ) + patches = [ + if_mock_connections( + patch.object(CapaInfo, "update", return_value=True), + patch("subprocess.run", return_value=response_from_command), + ) + ] + return super()._monkeypatch(patches) diff --git a/api_app/analyzers_manager/file_analyzers/floss.py b/api_app/analyzers_manager/file_analyzers/floss.py index cc6d5a9d6d..8dcb219793 100644 --- a/api_app/analyzers_manager/file_analyzers/floss.py +++ b/api_app/analyzers_manager/file_analyzers/floss.py @@ -1,16 +1,20 @@ # This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl # See the file 'LICENSE' for copying permission. -from json import dumps as json_dumps +import logging +import subprocess +from json import dumps, loads +from shlex import quote from api_app.analyzers_manager.classes import DockerBasedAnalyzer, FileAnalyzer from api_app.analyzers_manager.exceptions import AnalyzerRunException +logger = logging.getLogger(__name__) + class Floss(FileAnalyzer, DockerBasedAnalyzer): name: str = "Floss" - url: str = "http://malware_tools_analyzers:4002/floss" - ranking_url: str = "http://malware_tools_analyzers:4002/stringsifter" + url: str = "http://malware_tools_analyzers:4002/stringsifter" # interval between http request polling poll_distance: int = 10 # http request polling max number of tries @@ -29,30 +33,39 @@ def update(cls) -> bool: pass def run(self): - # get binary - binary = self.read_file_bytes() - # make request data - fname = str(self.filename).replace("/", "_").replace(" ", "_") # From floss v3 there is prompt that can be overcome # by using the flag --no static. # We can lose static strings considering that we can easily # retrieve them with more simple tools - args = [f"@{fname}", "--json", "--no", "static"] - req_data = {"args": args, "timeout": self.timeout} - req_files = {fname: binary} - result = self._docker_run(req_data, req_files) - if not isinstance(result, dict): + try: + process: subprocess.CompletedProcess = subprocess.run( + [ + "/usr/local/bin/floss", + "--json", + "--no", + "static", + "--", + quote(self.filepath), + ], + capture_output=True, + text=True, + check=True, + ) + + result = loads(process.stdout) + + except subprocess.CalledProcessError as e: + stderr = e.stderr + logger.info(f"Floss failed to run for {self.filename} with command {e}") raise AnalyzerRunException( - f"result from floss tool is not a dict but is {type(result)}." - f" Full dump: {result}" + f" Analyzer for {self.filename} failed with error: {stderr}" ) + result["exceeded_max_number_of_strings"] = {} - # we are changing the endpoint of _docker_run to stringsifter - self.url = self.ranking_url for key in self.max_no_of_strings: if self.rank_strings[key]: - strings = json_dumps(result["strings"][key]) + strings = dumps(result["strings"][key]) # 4 is the number of arguments that we are already passing analyzable_strings = strings[: self.OS_MAX_ARGS - 5] args = [ diff --git a/api_app/analyzers_manager/migrations/0166_update_capa.py b/api_app/analyzers_manager/migrations/0166_update_capa.py new file mode 100644 index 0000000000..b23a0a87be --- /dev/null +++ b/api_app/analyzers_manager/migrations/0166_update_capa.py @@ -0,0 +1,64 @@ +# Generated by Django 4.2.17 on 2025-07-24 14:57 + +from django.db import migrations + + +def migrate(apps, schema_editor): + PythonModule = apps.get_model("api_app", "PythonModule") + Parameter = apps.get_model("api_app", "Parameter") + CrontabSchedule = apps.get_model("django_celery_beat", "CrontabSchedule") + AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig") + + pm = PythonModule.objects.get( + module="capa_info.CapaInfo", + base_path="api_app.analyzers_manager.file_analyzers", + ) + + new_crontab, created = CrontabSchedule.objects.get_or_create( + minute="0", + hour="0", + day_of_week="*", + day_of_month="*", + month_of_year="*", + timezone="UTC", + ) + if created: + pm.update_schedule = new_crontab + pm.full_clean() + pm.save() + + AnalyzerConfig.objects.filter(python_module=pm).update(soft_time_limit=1800) + AnalyzerConfig.objects.filter(python_module=pm).update(docker_based=False) + + p1 = Parameter( + name="timeout", + type="float", + description="Duration in seconds for which intelowl waits for capa to return results. Default is set to 15 seconds.", + is_secret=False, + required=False, + python_module=pm, + ) + + p2 = Parameter( + name="force_pull_signatures", + type="bool", + description="Force download signatures from flare-capa github repository", + is_secret=False, + required=False, + python_module=pm, + ) + + p1.full_clean() + p1.save() + + p2.full_clean() + p2.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ("analyzers_manager", "0165_analyzer_config_joesandboxurl"), + ] + + operations = [migrations.RunPython(migrate, migrations.RunPython.noop)] diff --git a/api_app/analyzers_manager/migrations/0167_analyzerrulesfileversion.py b/api_app/analyzers_manager/migrations/0167_analyzerrulesfileversion.py new file mode 100644 index 0000000000..ae509ddacd --- /dev/null +++ b/api_app/analyzers_manager/migrations/0167_analyzerrulesfileversion.py @@ -0,0 +1,43 @@ +# Generated by Django 4.2.17 on 2025-09-05 19:42 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("api_app", "0071_delete_last_elastic_report"), + ("analyzers_manager", "0166_update_capa"), + ] + + operations = [ + migrations.CreateModel( + name="AnalyzerRulesFileVersion", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "last_downloaded_version", + models.CharField(max_length=50, null=True, blank=True), + ), + ("download_url", models.URLField(null=True, blank=True)), + ("downloaded_at", models.DateTimeField(auto_now_add=True)), + ( + "python_module", + models.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + related_name="rules_version", + to="api_app.pythonmodule", + ), + ), + ], + ), + ] diff --git a/api_app/analyzers_manager/models.py b/api_app/analyzers_manager/models.py index 704ace31fb..5eddeb8503 100644 --- a/api_app/analyzers_manager/models.py +++ b/api_app/analyzers_manager/models.py @@ -350,3 +350,13 @@ def plugin_type(cls) -> str: @property def config_exception(cls): return AnalyzerConfigurationException + + +class AnalyzerRulesFileVersion(models.Model): + last_downloaded_version = models.CharField(max_length=50, blank=True, null=True) + download_url = models.URLField(max_length=200, blank=True, null=True) + downloaded_at = models.DateTimeField(auto_now_add=True) + + python_module = models.ForeignKey( + PythonModule, on_delete=models.PROTECT, related_name="rules_version" + ) diff --git a/integrations/malware_tools_analyzers/Dockerfile b/integrations/malware_tools_analyzers/Dockerfile index 34e2c297ea..104b308610 100644 --- a/integrations/malware_tools_analyzers/Dockerfile +++ b/integrations/malware_tools_analyzers/Dockerfile @@ -27,24 +27,6 @@ RUN npm install box-js@1.9.27 --global --production \ && mkdir -p /tmp/boxjs \ && chown -R ${USER}:${USER} /tmp/boxjs -# Install Mandiant's CAPA -WORKDIR ${PROJECT_PATH}/capa -RUN if [[ $TARGETARCH == "amd64" ]]; \ - then export CAPA_ARCH="linux"; \ - else export CAPA_ARCH="macos"; fi \ - && wget -q "https://github.com/mandiant/capa/releases/download/v9.0.0/capa-v9.0.0-$CAPA_ARCH.zip" \ - && unzip "capa-v9.0.0-$CAPA_ARCH.zip" \ - && ln -s ${PROJECT_PATH}/capa/capa /usr/local/bin/capa - -# Install Mandiant's Floss -WORKDIR ${PROJECT_PATH}/floss -RUN if [[ $TARGETARCH == "amd64" ]]; \ - then export FLOSS_ARCH="linux"; \ - else export FLOSS_ARCH="macos"; fi \ - && wget -q "https://github.com/mandiant/flare-floss/releases/download/v3.1.1/floss-v3.1.1-$FLOSS_ARCH.zip" \ - && unzip "floss-v3.1.1-$FLOSS_ARCH.zip" \ - && ln -s ${PROJECT_PATH}/floss/floss /usr/local/bin/floss - # Install Mandiant's GoReSym WORKDIR ${PROJECT_PATH}/goresym RUN if [[ $TARGETARCH == "amd64" ]]; \ diff --git a/integrations/malware_tools_analyzers/app.py b/integrations/malware_tools_analyzers/app.py index c85abbce5f..6b89ec9356 100644 --- a/integrations/malware_tools_analyzers/app.py +++ b/integrations/malware_tools_analyzers/app.py @@ -130,15 +130,6 @@ def intercept_droidlysis_result(context, future: Future) -> None: shutil.rmtree(dir_loc, ignore_errors=True) -# with this, we can make http calls to the endpoint: /capa -shell2http.register_command(endpoint="capa", command_name="/usr/local/bin/capa -q -j") - -# with this, we can make http calls to the endpoint: /floss -shell2http.register_command( - endpoint="floss", - command_name="/usr/local/bin/floss -q -j", -) - # with this, we can make http calls to the endpoint: /peframe shell2http.register_command( endpoint="peframe", command_name="/opt/deploy/peframe/venv/bin/peframe" diff --git a/requirements/hardcoded-requirements.txt b/requirements/hardcoded-requirements.txt index 0e7ae68042..d07a1a1e30 100644 --- a/requirements/hardcoded-requirements.txt +++ b/requirements/hardcoded-requirements.txt @@ -5,9 +5,6 @@ # docker/Dockerfile pycti==6.5.1 -# integrations/malware_tools_analyzers/Dockerfile -flare-capa==9.0.0 -flare-floss==3.1.1 # other unmanaged versions # droydlys - they make no new versions, we pin the commit diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt index 7f63ba3bcc..92455a9ab7 100644 --- a/requirements/project-requirements.txt +++ b/requirements/project-requirements.txt @@ -91,8 +91,10 @@ wad==0.4.6 debloat==1.6.4 phonenumbers==9.0.3 die-python==0.4.0 -guarddog==2.1.0 # version greater than 2.1.0 raises dependency conflicts +# guarddog==2.1.0 # version greater than 2.1.0 raises dependency conflicts. Commenting this out due to dependency conflicts. jbxapi==3.23.0 +flare-floss==3.1.1 +flare-capa==9.2.1 # httpx required for HTTP/2 support (Mullvad DNS rejects HTTP/1.1 with protocol errors) httpx[http2]==0.28.1 diff --git a/tests/api_app/analyzers_manager/unit_tests/file_analyzers/test_capa_info.py b/tests/api_app/analyzers_manager/unit_tests/file_analyzers/test_capa_info.py index be810b77c6..6775adab65 100644 --- a/tests/api_app/analyzers_manager/unit_tests/file_analyzers/test_capa_info.py +++ b/tests/api_app/analyzers_manager/unit_tests/file_analyzers/test_capa_info.py @@ -1,3 +1,4 @@ +import subprocess from unittest.mock import patch from api_app.analyzers_manager.file_analyzers.capa_info import CapaInfo @@ -9,18 +10,30 @@ class TestCapaInfoAnalyzer(BaseFileAnalyzerTest): analyzer_class = CapaInfo def get_mocked_response(self): - mock_response = { - "rules": [ - {"name": "create process", "namespace": "host-interaction/process"}, - {"name": "read file", "namespace": "host-interaction/file"}, + response_from_command = subprocess.CompletedProcess( + args=[ + "capa", + "--quiet", + "--json", + "-r", + "/opt/deploy/files_required/capa/capa-rules", + "-s", + "/opt/deploy/files_required/capa/sigs", + "/opt/deploy/files_required/06ebf06587b38784e2af42dd5fbe56e5", ], - "meta": {"analysis": "mocked capa analysis"}, - } - return patch.object(CapaInfo, "_docker_run", return_value=mock_response) + returncode=0, + stdout='{"meta": {}, "rules": {"contain obfuscated stackstrings": {}, "enumerate PE sections":{}}}', + stderr="", + ) + return [ + patch.object(CapaInfo, "update", return_value=True), + patch("subprocess.run", return_value=response_from_command), + ] def get_extra_config(self): return { "shellcode": False, "arch": "64", - "args": [], + "timeout": 15, + "force_pull_signatures": False, } diff --git a/tests/api_app/analyzers_manager/unit_tests/file_analyzers/test_floss.py b/tests/api_app/analyzers_manager/unit_tests/file_analyzers/test_floss.py index 89b5bc239a..f4d0f71b48 100644 --- a/tests/api_app/analyzers_manager/unit_tests/file_analyzers/test_floss.py +++ b/tests/api_app/analyzers_manager/unit_tests/file_analyzers/test_floss.py @@ -1,3 +1,4 @@ +import subprocess from unittest.mock import patch from api_app.analyzers_manager.file_analyzers.floss import Floss @@ -10,23 +11,30 @@ class TestFloss(BaseFileAnalyzerTest): def get_extra_config(self): return { - "max_no_of_strings": {"decoded": 10, "stack": 5}, - "rank_strings": {"decoded": True, "stack": False}, + "max_no_of_strings": {"decoded_strings": 10, "stack_strings": 5}, + "rank_strings": {"decoded_strings": True, "stack_strings": False}, } def get_mocked_response(self): + + response_from_command = subprocess.CompletedProcess( + args=[ + "floss", + "--json", + "--no", + "static", + "--", + "/opt/deploy/files_required/06ebf06587b38784e2af42dd5fbe56e5", + ], + returncode=0, + stdout='{"metadata": {}, "analysis": {}, "strings": {"decoded_strings":["de_string2", "de_string1"],"stack_strings":[]}}', + stderr="", + ) + return [ + patch("subprocess.run", return_value=response_from_command), patch( "api_app.analyzers_manager.file_analyzers.floss.Floss._docker_run", - side_effect=[ - { - "strings": { - "decoded": ["de_string1", "de_string2"], - "stack": ["st_string1", "st_string2"], - } - }, - # second call for ranking decoded strings only - ["de_string1", "de_string2"], # simulate ranked strings - ], + return_value=["de_string1", "de_string2"], # simulating ranked strings ), ]