diff --git a/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py b/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py index 3c1507a11..02c738c6f 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py @@ -55,6 +55,9 @@ class Heuristics(str, Enum): #: Indicates that the package's description is unsecure, such as not having a descriptive keywords. UNSECURE_DESCRIPTION = "unsecure_description" + #: Indicates that the package contains stub files. + STUB_NAME = "stub_name" + class HeuristicResult(str, Enum): """Result type indicating the outcome of a heuristic.""" diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/minimal_content.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/minimal_content.py index 47f6dc5cd..3e544c9f8 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/minimal_content.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/minimal_content.py @@ -18,7 +18,7 @@ class MinimalContentAnalyzer(BaseHeuristicAnalyzer): """Check whether the package has minimal content.""" - FILES_THRESHOLD = 50 + FILES_THRESHOLD = 10 def __init__(self) -> None: super().__init__( @@ -46,9 +46,12 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes logger.debug(error_msg) raise SourceCodeError(error_msg) - file_count = sum(len(files) for _, _, files in os.walk(pypi_package_json.package_sourcecode_path)) + file_count = sum( + sum(1 for f in files if f.endswith(".pyi")) + for _, _, files in os.walk(pypi_package_json.package_sourcecode_path) + ) if file_count >= self.FILES_THRESHOLD: - return HeuristicResult.PASS, {"message": "Package has sufficient content"} + return HeuristicResult.PASS, {"message": "Package has sufficient pyi files", "pyi_files": file_count} - return HeuristicResult.FAIL, {"message": "Not enough files found"} + return HeuristicResult.FAIL, {"message": "Not enough pyi files found", "pyi_files": file_count} diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/stub_name.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/stub_name.py new file mode 100644 index 000000000..c55a0e8f0 --- /dev/null +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/stub_name.py @@ -0,0 +1,42 @@ +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This analyzer checks if a PyPI package has 'stub' in its name.""" + +import logging + +from macaron.json_tools import JsonType +from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer +from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics +from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset + +logger: logging.Logger = logging.getLogger(__name__) + + +class StubNameAnalyzer(BaseHeuristicAnalyzer): + """Check whether the package name contains 'stub'.""" + + def __init__(self) -> None: + super().__init__( + name="stub_name_analyzer", + heuristic=Heuristics.STUB_NAME, + depends_on=None, + ) + + def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]: + """Analyze the package. + + Parameters + ---------- + pypi_package_json: PyPIPackageJsonAsset + The PyPI package JSON asset object. + + Returns + ------- + tuple[HeuristicResult, dict[str, JsonType]]: + The result and related information collected during the analysis. + """ + package_name = pypi_package_json.component_name + if "stub" in package_name.lower(): + return HeuristicResult.PASS, {} + return HeuristicResult.FAIL, {} diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index 03f4aa093..ee012c290 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -421,12 +421,16 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: % Package released with a name similar to a popular package. {Confidence.HIGH.value}::trigger(malware_high_confidence_4) :- - quickUndetailed, forceSetup, failed({Heuristics.TYPOSQUATTING_PRESENCE.value}). + quickUndetailed, + forceSetup, + failed({Heuristics.TYPOSQUATTING_PRESENCE.value}), + failed({Heuristics.STUB_NAME.value}). % Package released with dependency confusion . {Confidence.HIGH.value}::trigger(malware_high_confidence_5) :- forceSetup, - passed({Heuristics.MINIMAL_CONTENT.value}), + failed({Heuristics.MINIMAL_CONTENT.value}), + failed({Heuristics.STUB_NAME.value}), failed({Heuristics.ANOMALOUS_VERSION.value}), failed({Heuristics.UNSECURE_DESCRIPTION.value}). diff --git a/tests/malware_analyzer/pypi/test_minimal_content.py b/tests/malware_analyzer/pypi/test_minimal_content.py index 1ebe3714c..fe6f24646 100644 --- a/tests/malware_analyzer/pypi/test_minimal_content.py +++ b/tests/malware_analyzer/pypi/test_minimal_content.py @@ -23,11 +23,10 @@ def test_analyze_sufficient_files_pass(analyzer: MinimalContentAnalyzer, pypi_pa pypi_package_json.download_sourcecode.return_value = True pypi_package_json.package_sourcecode_path = "/fake/path" with patch("os.walk") as mock_walk: - mock_walk.return_value = [("root", [], [f"file{i}.py" for i in range(60)])] - result, info = analyzer.analyze(pypi_package_json) + mock_walk.return_value = [("root", [], [f"file{i}.pyi" for i in range(60)])] + result, _ = analyzer.analyze(pypi_package_json) assert result == HeuristicResult.PASS - assert info == {"message": "Package has sufficient content"} pypi_package_json.download_sourcecode.assert_called_once() @@ -36,11 +35,10 @@ def test_analyze_exactly_threshold_files_pass(analyzer: MinimalContentAnalyzer, pypi_package_json.download_sourcecode.return_value = True pypi_package_json.package_sourcecode_path = "/fake/path" with patch("os.walk") as mock_walk: - mock_walk.return_value = [("root", [], [f"file{i}.py" for i in range(50)])] - result, info = analyzer.analyze(pypi_package_json) + mock_walk.return_value = [("root", [], [f"file{i}.pyi" for i in range(10)])] + result, _ = analyzer.analyze(pypi_package_json) assert result == HeuristicResult.PASS - assert info == {"message": "Package has sufficient content"} def test_analyze_insufficient_files_fail(analyzer: MinimalContentAnalyzer, pypi_package_json: MagicMock) -> None: @@ -48,11 +46,10 @@ def test_analyze_insufficient_files_fail(analyzer: MinimalContentAnalyzer, pypi_ pypi_package_json.download_sourcecode.return_value = True pypi_package_json.package_sourcecode_path = "/fake/path" with patch("os.walk") as mock_walk: - mock_walk.return_value = [("root", [], ["file1.py"])] - result, info = analyzer.analyze(pypi_package_json) + mock_walk.return_value = [("root", [], ["file1.pyi"])] + result, _ = analyzer.analyze(pypi_package_json) assert result == HeuristicResult.FAIL - assert info == {"message": "Not enough files found"} def test_analyze_no_files_fail(analyzer: MinimalContentAnalyzer, pypi_package_json: MagicMock) -> None: @@ -61,10 +58,9 @@ def test_analyze_no_files_fail(analyzer: MinimalContentAnalyzer, pypi_package_js pypi_package_json.package_sourcecode_path = "/fake/path" with patch("os.walk") as mock_walk: mock_walk.return_value = [("root", [], [])] - result, info = analyzer.analyze(pypi_package_json) + result, _ = analyzer.analyze(pypi_package_json) assert result == HeuristicResult.FAIL - assert info == {"message": "Not enough files found"} def test_analyze_download_failed_raises_error(analyzer: MinimalContentAnalyzer, pypi_package_json: MagicMock) -> None: @@ -84,8 +80,8 @@ def test_analyze_download_failed_raises_error(analyzer: MinimalContentAnalyzer, (0, HeuristicResult.FAIL), (1, HeuristicResult.FAIL), (2, HeuristicResult.FAIL), - (55, HeuristicResult.PASS), - (70, HeuristicResult.PASS), + (12, HeuristicResult.PASS), + (15, HeuristicResult.PASS), ], ) def test_analyze_various_file_counts( @@ -98,7 +94,7 @@ def test_analyze_various_file_counts( """Test the analyzer with various file counts.""" pypi_package_json.download_sourcecode.return_value = True pypi_package_json.package_sourcecode_path = "/fake/path" - files = [f"file{i}.py" for i in range(file_count)] + files = [f"file{i}.pyi" for i in range(file_count)] mock_walk = MagicMock(return_value=[("root", [], files)]) monkeypatch.setattr("os.walk", mock_walk) diff --git a/tests/malware_analyzer/pypi/test_stub_name.py b/tests/malware_analyzer/pypi/test_stub_name.py new file mode 100644 index 000000000..d25a7ed17 --- /dev/null +++ b/tests/malware_analyzer/pypi/test_stub_name.py @@ -0,0 +1,45 @@ +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for the StubNameAnalyzer heuristic.""" + +from unittest.mock import MagicMock + +import pytest + +from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult +from macaron.malware_analyzer.pypi_heuristics.metadata.stub_name import StubNameAnalyzer + + +@pytest.fixture(name="analyzer") +def analyzer_() -> StubNameAnalyzer: + """Pytest fixture to create a StubNameAnalyzer instance.""" + return StubNameAnalyzer() + + +@pytest.mark.parametrize( + ("package_name", "expected_result"), + [ + ("numpy", HeuristicResult.FAIL), + ("pandas", HeuristicResult.FAIL), + ("scikit-learn", HeuristicResult.FAIL), + ("tensorflow-stub", HeuristicResult.PASS), + ("torch-stubs", HeuristicResult.PASS), + ("requests", HeuristicResult.FAIL), + ("flask-stub", HeuristicResult.PASS), + ("my_package", HeuristicResult.FAIL), + ("requests-stub-client", HeuristicResult.PASS), + ("testpackage", HeuristicResult.FAIL), + ], +) +def test_analyze_various_package_names( + analyzer: StubNameAnalyzer, + pypi_package_json: MagicMock, + package_name: str, + expected_result: HeuristicResult, +) -> None: + """Test the analyzer with various package names.""" + pypi_package_json.component_name = package_name + result, _ = analyzer.analyze(pypi_package_json) + + assert result == expected_result