Skip to content

Commit 3732c61

Browse files
committed
chore: check file size limit before downloading source and skip to prevent loss of result
Signed-off-by: Carl Flottmann <[email protected]>
1 parent e6e3266 commit 3732c61

File tree

3 files changed

+69
-1
lines changed

3 files changed

+69
-1
lines changed

src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,10 @@ def analyze_source(
148148
if not force and analyzer.depends_on and self._should_skip(results, analyzer.depends_on):
149149
return {analyzer.heuristic: HeuristicResult.SKIP}, {}
150150

151+
if not pypi_package_json.can_download_sourcecode():
152+
logger.debug("Source code will exceed download limits. Please increase the download size limit to analyze.")
153+
return {analyzer.heuristic: HeuristicResult.SKIP}, {}
154+
151155
try:
152156
with pypi_package_json.sourcecode():
153157
result, detail_info = analyzer.analyze(pypi_package_json)

src/macaron/slsa_analyzer/package_registry/pypi_registry.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,12 @@
2626
from macaron.json_tools import json_extract
2727
from macaron.malware_analyzer.datetime_parser import parse_datetime
2828
from macaron.slsa_analyzer.package_registry.package_registry import PackageRegistry
29-
from macaron.util import download_file_with_size_limit, send_get_http_raw, stream_file_with_size_limit
29+
from macaron.util import (
30+
can_download_file,
31+
download_file_with_size_limit,
32+
send_get_http_raw,
33+
stream_file_with_size_limit,
34+
)
3035

3136
if TYPE_CHECKING:
3237
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
@@ -209,6 +214,23 @@ def cleanup_sourcecode_directory(
209214
raise InvalidHTTPResponseError(error_message) from error
210215
raise InvalidHTTPResponseError(error_message)
211216

217+
def can_download_package_sourcecode(self, url: str) -> bool:
218+
"""Check if the package source code can be downloaded within the default file limits.
219+
220+
Parameters
221+
----------
222+
url: str
223+
The package source code url.
224+
225+
Returns
226+
-------
227+
bool
228+
True if it can be downloaded within the size limits, otherwise False.
229+
"""
230+
size_limit = defaults.getint("slsa.verifier", "max_download_size", fallback=10000000)
231+
timeout = defaults.getint("downloads", "timeout", fallback=120)
232+
return can_download_file(url, size_limit, timeout=timeout)
233+
212234
def download_package_sourcecode(self, url: str) -> str:
213235
"""Download the package source code from pypi registry.
214236
@@ -624,6 +646,19 @@ def download_sourcecode(self) -> bool:
624646
logger.debug(error)
625647
return False
626648

649+
def can_download_sourcecode(self) -> bool:
650+
"""Return whether the package source code can be downloaded within the download file size limits.
651+
652+
Returns
653+
-------
654+
bool
655+
``True`` if the source code can be downloaded; ``False`` if not.
656+
"""
657+
url = self.get_sourcecode_url()
658+
if url:
659+
return self.pypi_registry.can_download_package_sourcecode(url)
660+
return False
661+
627662
def get_sourcecode_file_contents(self, path: str) -> bytes:
628663
"""
629664
Get the contents of a single source code file specified by the path.

src/macaron/util.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,35 @@ def chunk_function(self, chunk: bytes) -> None:
286286
self.file.write(chunk)
287287

288288

289+
def can_download_file(url: str, size_limit: int, timeout: int | None = None) -> bool:
290+
"""Send a head request to check if the file provided at url can be downloaded within the size limit.
291+
292+
It expects a URL to a file, and checks the "Content-Length" field of the response.
293+
294+
Parameters
295+
----------
296+
url: str
297+
The target of the request.
298+
size_limit: int
299+
The size limit in bytes of the file.
300+
timeout: int | None
301+
The request timeout (optional).
302+
303+
Returns
304+
-------
305+
bool
306+
True if the file can be downloaded within the size limit, False otherwise.
307+
"""
308+
response = send_head_http_raw(url, timeout=timeout, allow_redirects=True)
309+
if not response:
310+
return False
311+
312+
size = response.headers.get("Content-Length")
313+
if size and int(size) <= size_limit:
314+
return True
315+
return False
316+
317+
289318
def download_file_with_size_limit(
290319
url: str, headers: dict, file_path: str, timeout: int = 40, size_limit: int = 0
291320
) -> bool:

0 commit comments

Comments
 (0)