From 8aea9b0bbde892f80e6645f0f21f4b11d1e42b41 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Wed, 16 Aug 2023 17:00:37 +0100 Subject: [PATCH] Add a new marker to check for memory leaks Users have indicated that it will be very useful if the plugin exposes a way to detect memory leaks in tests. This is possible, but is a bit tricky as the interpreter can allocate memory for internal caches, as well as user functions. To make this more reliable, the new marker will take two parameters: * The limit of memory per location to consider an allocation. If the memory leaked by any allocation location in the test is higher than this value, the test will fail. * An optional callable function that can be used to filter out locations. This will allow users to remove false positives. Signed-off-by: Pablo Galindo --- docs/usage.rst | 63 +++++++++++++++ src/pytest_memray/marks.py | 98 ++++++++++++++++++++---- src/pytest_memray/plugin.py | 37 +++++++-- tests/test_pytest_memray.py | 148 ++++++++++++++++++++++++++++++++++++ 4 files changed, 324 insertions(+), 22 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index a409335..9bdadda 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -40,6 +40,9 @@ validations on tests when this plugin is enabled. ``limit_memory`` ---------------- +.. py:function:: limit_memory(memory_limit: str) + Fail the execution of the test if the test allocates more memory than allowed + When this marker is applied to a test, it will cause the test to fail if the execution of the test allocates more memory than allowed. It takes a single argument with a string indicating the maximum memory that the test can allocate. @@ -61,3 +64,63 @@ Example of usage: @pytest.mark.limit_memory("24 MB") def test_foobar(): pass # do some stuff that allocates memory + + +``limit_leaks`` +--------------- + + .. py:function:: limit_leaks(location_limit: str, filtering_fn: Callable[Iterable[Tuple[str, str, int]], bool]) + Fail the execution of the test if any location in the test leaks more memory than allowed. + + .. important:: + To detect leaks, Memray needs to intercept calls to the Python allocators and use native + traces. This is adds significant overhead, and will slow your test down. + +When this marker is applied to a test, it will cause the test to fail if any allocation location in +the execution of the test leaks more memory than allowed. It takes a single positional argument with a +string indicating the maximum memory **per allocation location** that the test is allowed to leak. + +Leaks are defined as memory that is allocated **in the marked test** that is not freed before leaving the test body. + +.. important:: + It's recommended to run your API or code in a loop when utilizing this plugin. This practice helps in distinguishing + genuine leaks from the "noise" generated by internal caches and other incidental allocations. + +The format for the string is `` ([KMGTP]B|B)``. The marker will raise +``ValueError`` if the string format cannot be parsed correctly. + +The marker also takes an optional keyword-only argument ``filtering_fn``. This argument represents a filtering +function that will be called with the traceback for every location that allocates memory that cumulatively is +bigger than the provided limit. The function must return *True* if the allocation must be taken into account +and *False* otherwise. This function can be used to discard some false positives detected by the marker. + +.. tip:: + + You can pass the ``--memray-bin-path`` argument to ``pytest`` to specify + a directory where Memray will store the binary files with the results. You + can then use the ``memray`` CLI to further investigate the allocations and the + leaks using any Memray reporters you'd like. Check `the memray docs + `_ for more + information. + +Example of usage: + +.. code-block:: python + + @pytest.mark.limit_leaks("1 MB") + def test_foobar(): + # Run the function to test in a loop to ensure + # we can differentiate leaks from memory allocated + # in internal caches + for _ in range(100): + do_some_stuff() + +.. warning:: + Is **very** challenging to write tests that do not "leak" memory in some way. + interpreter caches but there are some that cannot be correctly detected so + you may need to allow some small amount of leaked memory per location or use the + ``filtering_fn`` argument to filter out false positive leak reports caused by + objects that the interpreter plans to reuse later. These caches are + implementation details of the interpreter, so the amount of memory + allocated, the location of the allocation, and the allocator that was used + can all change from one Python version to another. diff --git a/src/pytest_memray/marks.py b/src/pytest_memray/marks.py index 2faa30d..e64296e 100644 --- a/src/pytest_memray/marks.py +++ b/src/pytest_memray/marks.py @@ -1,10 +1,15 @@ from __future__ import annotations from dataclasses import dataclass +from pathlib import Path from typing import Tuple from typing import cast +from typing import Callable +from typing import Iterable +from typing import Optional from memray import AllocationRecord +from memray import FileReader from pytest import Config from .utils import parse_memory_string @@ -13,10 +18,13 @@ PytestSection = Tuple[str, str] +StackElement = Tuple[str, str, int] +LeaksFilteringFunction = Callable[[Iterable[StackElement]], bool] + @dataclass class _MemoryInfo: - """Type that holds all memray-related info for a failed test.""" + """Type that holds memory-related info for a failed test.""" max_memory: float total_allocated_memory: int @@ -24,14 +32,9 @@ class _MemoryInfo: num_stacks: int native_stacks: bool - @property - def section(self) -> PytestSection: - """Return a tuple in the format expected by section reporters.""" - total_memory_str = sizeof_fmt(self.total_allocated_memory) - max_memory_str = sizeof_fmt(self.max_memory) + def _generate_section_text(self, limit_text: str, header_text: str) -> str: text_lines = [ - f"Test is using {total_memory_str} out of limit of {max_memory_str}", - "List of allocations: ", + f"{header_text} {sizeof_fmt(self.total_allocated_memory)} out of limit of {sizeof_fmt(self.max_memory)}" ] for record in self.allocations: size = record.size @@ -51,28 +54,93 @@ def section(self) -> PytestSection: text_lines.append(f"{padding*2}{function}:{file}:{line}") stacks_left -= 1 - return "memray-max-memory", "\n".join(text_lines) + return "\n".join(text_lines) + + @property + def section(self) -> PytestSection: + """Return a tuple in the format expected by section reporters.""" + return ( + "memray-max-memory", + self._generate_section_text("Test is using", "List of allocations:"), + ) @property def long_repr(self) -> str: """Generate a longrepr user-facing error message.""" - total_memory_str = sizeof_fmt(self.total_allocated_memory) - max_memory_str = sizeof_fmt(self.max_memory) - return f"Test was limited to {max_memory_str} but allocated {total_memory_str}" + return f"Test was limited to {sizeof_fmt(self.max_memory)} but allocated {sizeof_fmt(self.total_allocated_memory)}" + + +@dataclass +class _LeakedInfo(_MemoryInfo): + """Type that holds leaked memory-related info for a failed test.""" + + @property + def section(self) -> PytestSection: + """Return a tuple in the format expected by section reporters.""" + return ( + "memray-leaked-memory", + self._generate_section_text("Test leaked", "List of leaked allocations:"), + ) + + @property + def long_repr(self) -> str: + """Generate a longrepr user-facing error message.""" + return ( + f"Test was allowed to leak {sizeof_fmt(self.max_memory)} " + "per location but at least one location leaked more" + ) def limit_memory( - limit: str, *, _allocations: list[AllocationRecord], _config: Config + limit: str, *, _result_file: Path, _config: Config ) -> _MemoryInfo | None: """Limit memory used by the test.""" + reader = FileReader(_result_file) + func = reader.get_high_watermark_allocation_records + allocations: list[AllocationRecord] = list((func(merge_threads=True))) max_memory = parse_memory_string(limit) - total_allocated_memory = sum(record.size for record in _allocations) + total_allocated_memory = sum(record.size for record in allocations) if total_allocated_memory < max_memory: return None num_stacks: int = cast(int, value_or_ini(_config, "stacks")) native_stacks: bool = cast(bool, value_or_ini(_config, "native")) return _MemoryInfo( - max_memory, total_allocated_memory, _allocations, num_stacks, native_stacks + max_memory, total_allocated_memory, allocations, num_stacks, native_stacks + ) + + +def limit_leaks( + location_limit: str, + *, + filter_fn: Optional[LeaksFilteringFunction] = None, + _result_file: Path, + _config: Config, +) -> _MemoryInfo | None: + reader = FileReader(_result_file) + func = reader.get_leaked_allocation_records + allocations: list[AllocationRecord] = list((func(merge_threads=True))) + + memory_limit = parse_memory_string(location_limit) + + leaked_allocations = list( + allocation + for allocation in allocations + if ( + allocation.size >= memory_limit + and (filter_fn is None or filter_fn(allocation.hybrid_stack_trace())) + ) + ) + if not leaked_allocations: + return None + total_leaked_memory = sum(allocation.size for allocation in leaked_allocations) + + num_stacks: int = cast(int, value_or_ini(_config, "stacks")) + return _LeakedInfo( + memory_limit, + total_leaked_memory, + leaked_allocations, + num_stacks, + native_stacks=True, ) diff --git a/src/pytest_memray/plugin.py b/src/pytest_memray/plugin.py index 1933388..a5ff10c 100644 --- a/src/pytest_memray/plugin.py +++ b/src/pytest_memray/plugin.py @@ -17,6 +17,7 @@ from typing import List from typing import Tuple from typing import cast +from typing import Protocol from _pytest.terminal import TerminalReporter from memray import AllocationRecord @@ -34,12 +35,28 @@ from pytest import hookimpl from .marks import limit_memory +from .marks import _MemoryInfo +from .marks import limit_leaks from .utils import WriteEnabledDirectoryAction from .utils import positive_int from .utils import sizeof_fmt from .utils import value_or_ini -MARKERS = {"limit_memory": limit_memory} + +class PluginFn(Protocol): + def __call__( + *args: Any, + _result_file: Path, + _config: Config, + **kwargs: Any, + ) -> _MemoryInfo | None: + ... + + +MARKERS = { + "limit_memory": limit_memory, + "limit_leaks": limit_leaks, +} N_TOP_ALLOCS = 5 N_HISTOGRAM_BINS = 5 @@ -134,6 +151,11 @@ def pytest_pyfunc_call(self, pyfuncitem: Function) -> object | None: yield return + if len(markers) > 1: + raise ValueError( + "Only one memray marker can be applied at the same time to the same test" + ) + def _build_bin_path() -> Path: if self._tmp_dir is None and not os.getenv("MEMRAY_RESULT_PATH"): of_id = pyfuncitem.nodeid.replace("::", "-") @@ -151,6 +173,9 @@ def _build_bin_path() -> Path: value_or_ini(self.config, "trace_python_allocators") ) + if markers and "limit_leaks" in markers: + native = trace_python_allocators = True + @functools.wraps(func) def wrapper(*args: Any, **kwargs: Any) -> object | None: test_result: object | Any = None @@ -198,19 +223,17 @@ def pytest_runtest_makereport( return None for marker in item.iter_markers(): - marker_fn = MARKERS.get(marker.name) - if not marker_fn: + maybe_marker_fn = MARKERS.get(marker.name) + if not maybe_marker_fn: continue + marker_fn: PluginFn = cast(PluginFn, maybe_marker_fn) result = self.results.get(item.nodeid) if not result: continue - reader = FileReader(result.result_file) - func = reader.get_high_watermark_allocation_records - allocations = list((func(merge_threads=True))) res = marker_fn( *marker.args, **marker.kwargs, - _allocations=allocations, + _result_file=result.result_file, _config=self.config, ) if res: diff --git a/tests/test_pytest_memray.py b/tests/test_pytest_memray.py index dd19afb..7690309 100644 --- a/tests/test_pytest_memray.py +++ b/tests/test_pytest_memray.py @@ -594,3 +594,151 @@ def test_memory_alloc_fails(): ) result = pytester.runpytest("-Werror", "--memray") assert result.ret == ExitCode.OK + + +@pytest.mark.parametrize( + "size, outcome", + [ + (1, ExitCode.OK), + (1024 * 1 / 10, ExitCode.OK), + (1024 * 1, ExitCode.TESTS_FAILED), + (1024 * 10, ExitCode.TESTS_FAILED), + ], +) +def test_leak_marker(pytester: Pytester, size: int, outcome: ExitCode) -> None: + pytester.makepyfile( + f""" + import pytest + from memray._test import MemoryAllocator + allocator = MemoryAllocator() + @pytest.mark.limit_leaks("5KB") + def test_memory_alloc_fails(): + for _ in range(10): + allocator.valloc({size}) + # No free call here + """ + ) + + result = pytester.runpytest("--memray") + + assert result.ret == outcome + + +@pytest.mark.parametrize( + "size, outcome", + [ + (1, ExitCode.OK), + (1024 * 1 / 10, ExitCode.OK), + (1024 * 1, ExitCode.TESTS_FAILED), + (1024 * 10, ExitCode.TESTS_FAILED), + ], +) +def test_leak_marker_in_a_thread( + pytester: Pytester, size: int, outcome: ExitCode +) -> None: + pytester.makepyfile( + f""" + import pytest + from memray._test import MemoryAllocator + allocator = MemoryAllocator() + import threading + def allocating_func(): + for _ in range(10): + allocator.valloc({size}) + # No free call here + @pytest.mark.limit_leaks("5KB") + def test_memory_alloc_fails(): + t = threading.Thread(target=allocating_func) + t.start() + t.join() + """ + ) + + result = pytester.runpytest("--memray") + assert result.ret == outcome + + +def test_leak_marker_filtering_function(pytester: Pytester) -> None: + pytester.makepyfile( + """ + import pytest + from memray._test import MemoryAllocator + LEAK_SIZE = 1024 + allocator = MemoryAllocator() + + def this_should_not_be_there(): + allocator.valloc(LEAK_SIZE) + # No free call here + + def filtering_function(locations): + for fn, _, _ in locations: + if fn == "this_should_not_be_there": + return False + return True + + @pytest.mark.limit_leaks("5KB", filter_fn=filtering_function) + def test_memory_alloc_fails(): + for _ in range(10): + this_should_not_be_there() + """ + ) + + result = pytester.runpytest("--memray") + + assert result.ret == ExitCode.OK + + +def test_leak_marker_does_work_if_memray_not_passed(pytester: Pytester) -> None: + pytester.makepyfile( + """ + import pytest + from memray._test import MemoryAllocator + allocator = MemoryAllocator() + @pytest.mark.limit_leaks("0B") + def test_memory_alloc_fails(): + allocator.valloc(512) + # No free call here + """ + ) + + result = pytester.runpytest() + + assert result.ret == ExitCode.TESTS_FAILED + + +def test_multiple_markers_are_not_supported(pytester: Pytester) -> None: + pytester.makepyfile( + """ + import pytest + @pytest.mark.limit_leaks("0MB") + @pytest.mark.limit_memory("0MB") + def test_bar(): + pass + """ + ) + + result = pytester.runpytest("--memray") + assert result.ret == ExitCode.TESTS_FAILED + + output = result.stdout.str() + assert "Only one memray marker can be applied" in output + + +def test_multiple_markers_are_not_supported_with_global_marker( + pytester: Pytester, +) -> None: + pytester.makepyfile( + """ + import pytest + pytestmark = pytest.mark.limit_memory("1 MB") + @pytest.mark.limit_leaks("0MB") + def test_bar(): + pass + """ + ) + + result = pytester.runpytest("--memray") + assert result.ret == ExitCode.TESTS_FAILED + + output = result.stdout.str() + assert "Only one memray marker can be applied" in output