Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/macaron/malware_analyzer/pypi_heuristics/heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ class Heuristics(str, Enum):
#: Indicates that the package's description is unsecure, such as not having a descriptive keywords.
UNSECURE_DESCRIPTION = "unsecure_description"

#: Indicates that the package contains stub files.
STUB_NAME = "stub_name"


class HeuristicResult(str, Enum):
"""Result type indicating the outcome of a heuristic."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
class MinimalContentAnalyzer(BaseHeuristicAnalyzer):
"""Check whether the package has minimal content."""

FILES_THRESHOLD = 50
FILES_THRESHOLD = 10

def __init__(self) -> None:
super().__init__(
Expand Down Expand Up @@ -46,9 +46,12 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
logger.debug(error_msg)
raise SourceCodeError(error_msg)

file_count = sum(len(files) for _, _, files in os.walk(pypi_package_json.package_sourcecode_path))
file_count = sum(
sum(1 for f in files if f.endswith(".pyi"))
for _, _, files in os.walk(pypi_package_json.package_sourcecode_path)
)

if file_count >= self.FILES_THRESHOLD:
return HeuristicResult.PASS, {"message": "Package has sufficient content"}
return HeuristicResult.PASS, {"message": "Package has sufficient pyi files", "pyi_files": file_count}

return HeuristicResult.FAIL, {"message": "Not enough files found"}
return HeuristicResult.FAIL, {"message": "Not enough pyi files found", "pyi_files": file_count}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This analyzer checks if a PyPI package has 'stub' in its name."""

import logging

from macaron.json_tools import JsonType
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset

logger: logging.Logger = logging.getLogger(__name__)


class StubNameAnalyzer(BaseHeuristicAnalyzer):
"""Check whether the package name contains 'stub'."""

def __init__(self) -> None:
super().__init__(
name="stub_name_analyzer",
heuristic=Heuristics.STUB_NAME,
depends_on=None,
)

def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
"""Analyze the package.

Parameters
----------
pypi_package_json: PyPIPackageJsonAsset
The PyPI package JSON asset object.

Returns
-------
tuple[HeuristicResult, dict[str, JsonType]]:
The result and related information collected during the analysis.
"""
package_name = pypi_package_json.component_name
if "stub" in package_name.lower():
return HeuristicResult.PASS, {}
return HeuristicResult.FAIL, {}
Original file line number Diff line number Diff line change
Expand Up @@ -421,12 +421,16 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:

% Package released with a name similar to a popular package.
{Confidence.HIGH.value}::trigger(malware_high_confidence_4) :-
quickUndetailed, forceSetup, failed({Heuristics.TYPOSQUATTING_PRESENCE.value}).
quickUndetailed,
forceSetup,
failed({Heuristics.TYPOSQUATTING_PRESENCE.value}),
failed({Heuristics.STUB_NAME.value}).

% Package released with dependency confusion .
{Confidence.HIGH.value}::trigger(malware_high_confidence_5) :-
forceSetup,
passed({Heuristics.MINIMAL_CONTENT.value}),
failed({Heuristics.MINIMAL_CONTENT.value}),
failed({Heuristics.STUB_NAME.value}),
failed({Heuristics.ANOMALOUS_VERSION.value}),
failed({Heuristics.UNSECURE_DESCRIPTION.value}).

Expand Down
24 changes: 10 additions & 14 deletions tests/malware_analyzer/pypi/test_minimal_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,10 @@ def test_analyze_sufficient_files_pass(analyzer: MinimalContentAnalyzer, pypi_pa
pypi_package_json.download_sourcecode.return_value = True
pypi_package_json.package_sourcecode_path = "/fake/path"
with patch("os.walk") as mock_walk:
mock_walk.return_value = [("root", [], [f"file{i}.py" for i in range(60)])]
result, info = analyzer.analyze(pypi_package_json)
mock_walk.return_value = [("root", [], [f"file{i}.pyi" for i in range(60)])]
result, _ = analyzer.analyze(pypi_package_json)

assert result == HeuristicResult.PASS
assert info == {"message": "Package has sufficient content"}
pypi_package_json.download_sourcecode.assert_called_once()


Expand All @@ -36,23 +35,21 @@ def test_analyze_exactly_threshold_files_pass(analyzer: MinimalContentAnalyzer,
pypi_package_json.download_sourcecode.return_value = True
pypi_package_json.package_sourcecode_path = "/fake/path"
with patch("os.walk") as mock_walk:
mock_walk.return_value = [("root", [], [f"file{i}.py" for i in range(50)])]
result, info = analyzer.analyze(pypi_package_json)
mock_walk.return_value = [("root", [], [f"file{i}.pyi" for i in range(10)])]
result, _ = analyzer.analyze(pypi_package_json)

assert result == HeuristicResult.PASS
assert info == {"message": "Package has sufficient content"}


def test_analyze_insufficient_files_fail(analyzer: MinimalContentAnalyzer, pypi_package_json: MagicMock) -> None:
"""Test the analyzer fails when the package has insufficient files."""
pypi_package_json.download_sourcecode.return_value = True
pypi_package_json.package_sourcecode_path = "/fake/path"
with patch("os.walk") as mock_walk:
mock_walk.return_value = [("root", [], ["file1.py"])]
result, info = analyzer.analyze(pypi_package_json)
mock_walk.return_value = [("root", [], ["file1.pyi"])]
result, _ = analyzer.analyze(pypi_package_json)

assert result == HeuristicResult.FAIL
assert info == {"message": "Not enough files found"}


def test_analyze_no_files_fail(analyzer: MinimalContentAnalyzer, pypi_package_json: MagicMock) -> None:
Expand All @@ -61,10 +58,9 @@ def test_analyze_no_files_fail(analyzer: MinimalContentAnalyzer, pypi_package_js
pypi_package_json.package_sourcecode_path = "/fake/path"
with patch("os.walk") as mock_walk:
mock_walk.return_value = [("root", [], [])]
result, info = analyzer.analyze(pypi_package_json)
result, _ = analyzer.analyze(pypi_package_json)

assert result == HeuristicResult.FAIL
assert info == {"message": "Not enough files found"}


def test_analyze_download_failed_raises_error(analyzer: MinimalContentAnalyzer, pypi_package_json: MagicMock) -> None:
Expand All @@ -84,8 +80,8 @@ def test_analyze_download_failed_raises_error(analyzer: MinimalContentAnalyzer,
(0, HeuristicResult.FAIL),
(1, HeuristicResult.FAIL),
(2, HeuristicResult.FAIL),
(55, HeuristicResult.PASS),
(70, HeuristicResult.PASS),
(12, HeuristicResult.PASS),
(15, HeuristicResult.PASS),
],
)
def test_analyze_various_file_counts(
Expand All @@ -98,7 +94,7 @@ def test_analyze_various_file_counts(
"""Test the analyzer with various file counts."""
pypi_package_json.download_sourcecode.return_value = True
pypi_package_json.package_sourcecode_path = "/fake/path"
files = [f"file{i}.py" for i in range(file_count)]
files = [f"file{i}.pyi" for i in range(file_count)]
mock_walk = MagicMock(return_value=[("root", [], files)])
monkeypatch.setattr("os.walk", mock_walk)

Expand Down
45 changes: 45 additions & 0 deletions tests/malware_analyzer/pypi/test_stub_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""Tests for the StubNameAnalyzer heuristic."""

from unittest.mock import MagicMock

import pytest

from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult
from macaron.malware_analyzer.pypi_heuristics.metadata.stub_name import StubNameAnalyzer


@pytest.fixture(name="analyzer")
def analyzer_() -> StubNameAnalyzer:
"""Pytest fixture to create a StubNameAnalyzer instance."""
return StubNameAnalyzer()


@pytest.mark.parametrize(
("package_name", "expected_result"),
[
("numpy", HeuristicResult.FAIL),
("pandas", HeuristicResult.FAIL),
("scikit-learn", HeuristicResult.FAIL),
("tensorflow-stub", HeuristicResult.PASS),
("torch-stubs", HeuristicResult.PASS),
("requests", HeuristicResult.FAIL),
("flask-stub", HeuristicResult.PASS),
("my_package", HeuristicResult.FAIL),
("requests-stub-client", HeuristicResult.PASS),
("testpackage", HeuristicResult.FAIL),
],
)
def test_analyze_various_package_names(
analyzer: StubNameAnalyzer,
pypi_package_json: MagicMock,
package_name: str,
expected_result: HeuristicResult,
) -> None:
"""Test the analyzer with various package names."""
pypi_package_json.component_name = package_name
result, _ = analyzer.analyze(pypi_package_json)

assert result == expected_result
Loading