From 8a8cc6925af40ff45d339d55701c230a62af6a2b Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Tue, 23 Sep 2025 10:31:06 +1000 Subject: [PATCH 1/2] chore: fix a bug for uncaught SourceCodeError exception Signed-off-by: behnazh-w --- .../pypi_heuristics/base_analyzer.py | 7 ++++++- .../metadata/type_stub_file.py | 8 ++++---- .../checks/detect_malicious_metadata_check.py | 2 +- .../pypi_ibm-agent-analytics-common/policy.dl | 10 ++++++++++ .../pypi_ibm-agent-analytics-common/test.yaml | 20 +++++++++++++++++++ .../pypi/test_type_stub_file.py | 11 ++++------ 6 files changed, 45 insertions(+), 13 deletions(-) create mode 100644 tests/integration/cases/pypi_ibm-agent-analytics-common/policy.dl create mode 100644 tests/integration/cases/pypi_ibm-agent-analytics-common/test.yaml diff --git a/src/macaron/malware_analyzer/pypi_heuristics/base_analyzer.py b/src/macaron/malware_analyzer/pypi_heuristics/base_analyzer.py index 0c55b03fd..f97b4c96d 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/base_analyzer.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/base_analyzer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Define and initialize the base analyzer.""" @@ -40,4 +40,9 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes ------- tuple[HeuristicResult, dict[str, JsonType]]: The result and related information collected during the analysis. + + Raises + ------ + HeuristicAnalyzerValueError + If a heuristic analysis fails due to malformed package information. """ diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/type_stub_file.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/type_stub_file.py index a1447bac1..b7caa7fd2 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/type_stub_file.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/type_stub_file.py @@ -6,7 +6,6 @@ import logging import os -from macaron.errors import SourceCodeError from macaron.json_tools import JsonType from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics @@ -40,11 +39,12 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes tuple[HeuristicResult, dict[str, JsonType]]: The result and related information collected during the analysis. """ + # TODO: .pyi stub files may be present in both source distributions (sdist) and wheels. + # Currently, we only check the sdist, which can lead to false positives in this heuristic. + # To improve accuracy, we should also check for stub files in the wheel distribution. result = pypi_package_json.download_sourcecode() if not result: - error_msg = "No source code files have been downloaded" - logger.debug(error_msg) - raise SourceCodeError(error_msg) + return HeuristicResult.FAIL, {"message": "No source code files have been downloaded.", "pyi_files": 0} file_count = sum( sum(1 for f in files if f.endswith(".pyi")) diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index da49be6bb..da0e7ae1b 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -154,7 +154,7 @@ def analyze_source( return {analyzer.heuristic: result}, detail_info except SourceCodeError as error: - error_msg = f"Unable to perform analysis, source code not available: {error}" + error_msg = f"Unable to perform source code analysis: {error}" logger.debug(error_msg) raise HeuristicAnalyzerValueError(error_msg) from error diff --git a/tests/integration/cases/pypi_ibm-agent-analytics-common/policy.dl b/tests/integration/cases/pypi_ibm-agent-analytics-common/policy.dl new file mode 100644 index 000000000..8b8c74d8b --- /dev/null +++ b/tests/integration/cases/pypi_ibm-agent-analytics-common/policy.dl @@ -0,0 +1,10 @@ +/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("check-malicious-package", component_id, "Check the malicious package.") :- + check_passed(component_id, "mcn_detect_malicious_metadata_1"). + +apply_policy_to("check-malicious-package", component_id) :- + is_component(component_id, "pkg:pypi/ibm-agent-analytics-common@0.1.3"). diff --git a/tests/integration/cases/pypi_ibm-agent-analytics-common/test.yaml b/tests/integration/cases/pypi_ibm-agent-analytics-common/test.yaml new file mode 100644 index 000000000..94e97aa11 --- /dev/null +++ b/tests/integration/cases/pypi_ibm-agent-analytics-common/test.yaml @@ -0,0 +1,20 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing a Python package that is distributed as a wheel, with no source (sdist) available. + +tags: +- macaron-python-package + +steps: +- name: Run macaron analyze + kind: analyze + options: + command_args: + - -purl + - pkg:pypi/ibm-agent-analytics-common@0.1.3 +- name: Run macaron verify-policy to verify that the malicious metadata check passes. + kind: verify + options: + policy: policy.dl diff --git a/tests/malware_analyzer/pypi/test_type_stub_file.py b/tests/malware_analyzer/pypi/test_type_stub_file.py index fed53963b..4f8542aec 100644 --- a/tests/malware_analyzer/pypi/test_type_stub_file.py +++ b/tests/malware_analyzer/pypi/test_type_stub_file.py @@ -7,7 +7,6 @@ import pytest -from macaron.errors import SourceCodeError from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult from macaron.malware_analyzer.pypi_heuristics.metadata.type_stub_file import TypeStubFileAnalyzer @@ -66,12 +65,10 @@ def test_analyze_no_files_fail(analyzer: TypeStubFileAnalyzer, pypi_package_json def test_analyze_download_failed_raises_error(analyzer: TypeStubFileAnalyzer, pypi_package_json: MagicMock) -> None: """Test the analyzer raises SourceCodeError when source code download fails.""" pypi_package_json.download_sourcecode.return_value = False - - with pytest.raises(SourceCodeError) as exc_info: - analyzer.analyze(pypi_package_json) - - assert "No source code files have been downloaded" in str(exc_info.value) - pypi_package_json.download_sourcecode.assert_called_once() + assert ( + HeuristicResult.FAIL, + {"message": "No source code files have been downloaded.", "pyi_files": 0}, + ) == analyzer.analyze(pypi_package_json) @pytest.mark.parametrize( From fa655eb83f178ff0f901c9b884d6cdabf6f9a272 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Thu, 25 Sep 2025 10:30:05 +1000 Subject: [PATCH 2/2] fix: return skip result instead of fail when source not downloaded Signed-off-by: behnazh-w --- .../pypi_heuristics/metadata/type_stub_file.py | 2 +- tests/malware_analyzer/pypi/test_type_stub_file.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/type_stub_file.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/type_stub_file.py index b7caa7fd2..b400f60cb 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/type_stub_file.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/type_stub_file.py @@ -44,7 +44,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes # To improve accuracy, we should also check for stub files in the wheel distribution. result = pypi_package_json.download_sourcecode() if not result: - return HeuristicResult.FAIL, {"message": "No source code files have been downloaded.", "pyi_files": 0} + return HeuristicResult.SKIP, {"message": "No source code files have been downloaded.", "pyi_files": 0} file_count = sum( sum(1 for f in files if f.endswith(".pyi")) diff --git a/tests/malware_analyzer/pypi/test_type_stub_file.py b/tests/malware_analyzer/pypi/test_type_stub_file.py index 4f8542aec..f22f65c8b 100644 --- a/tests/malware_analyzer/pypi/test_type_stub_file.py +++ b/tests/malware_analyzer/pypi/test_type_stub_file.py @@ -63,10 +63,10 @@ def test_analyze_no_files_fail(analyzer: TypeStubFileAnalyzer, pypi_package_json def test_analyze_download_failed_raises_error(analyzer: TypeStubFileAnalyzer, pypi_package_json: MagicMock) -> None: - """Test the analyzer raises SourceCodeError when source code download fails.""" + """Test the analyzer when source code download fails.""" pypi_package_json.download_sourcecode.return_value = False assert ( - HeuristicResult.FAIL, + HeuristicResult.SKIP, {"message": "No source code files have been downloaded.", "pyi_files": 0}, ) == analyzer.analyze(pypi_package_json)