From 8aea9b0bbde892f80e6645f0f21f4b11d1e42b41 Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Wed, 16 Aug 2023 17:00:37 +0100
Subject: [PATCH] Add a new marker to check for memory leaks

Users have indicated that it will be very useful if the plugin exposes a
way to detect memory leaks in tests. This is possible, but is a bit
tricky as the interpreter can allocate memory for internal caches, as
well as user functions.

To make this more reliable, the new marker will take two parameters:

* The limit of memory per location to consider an allocation. If the
  memory leaked by any allocation location in the test is higher than
  this value, the test will fail.

* An optional callable function that can be used to filter out
  locations. This will allow users to remove false positives.

Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
---
 docs/usage.rst              |  63 +++++++++++++++
 src/pytest_memray/marks.py  |  98 ++++++++++++++++++++----
 src/pytest_memray/plugin.py |  37 +++++++--
 tests/test_pytest_memray.py | 148 ++++++++++++++++++++++++++++++++++++
 4 files changed, 324 insertions(+), 22 deletions(-)
diff --git a/docs/usage.rst b/docs/usage.rst
index a409335..9bdadda 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -40,6 +40,9 @@ validations on tests when this plugin is enabled.
 ``limit_memory``
 ----------------
 
+.. py:function:: limit_memory(memory_limit: str)
+    Fail the execution of the test if the test allocates more memory than allowed
+
 When this marker is applied to a test, it will cause the test to fail if the execution
 of the test allocates more memory than allowed. It takes a single argument with a
 string indicating the maximum memory that the test can allocate.
@@ -61,3 +64,63 @@ Example of usage:
     @pytest.mark.limit_memory("24 MB")
     def test_foobar():
         pass  # do some stuff that allocates memory
+
+
+``limit_leaks``
+---------------
+
+ .. py:function:: limit_leaks(location_limit: str, filtering_fn: Callable[Iterable[Tuple[str, str, int]], bool])
+       Fail the execution of the test if any location in the test leaks more memory than allowed.
+
+ .. important::
+       To detect leaks, Memray needs to intercept calls to the Python allocators and use native
+       traces. This is adds significant overhead, and will slow your test down.
+
+When this marker is applied to a test, it will cause the test to fail if any allocation location in
+the execution of the test leaks more memory than allowed. It takes a single positional argument with a
+string indicating the maximum memory **per allocation location** that the test is allowed to leak.
+
+Leaks are defined as memory that is allocated **in the marked test** that is not freed before leaving the test body.
+
+.. important::
+    It's recommended to run your API or code in a loop when utilizing this plugin. This practice helps in distinguishing
+    genuine leaks from the "noise" generated by internal caches and other incidental allocations.
+
+The format for the string is ``<NUMBER> ([KMGTP]B|B)``. The marker will raise
+``ValueError`` if the string format cannot be parsed correctly.
+
+The marker also takes an optional keyword-only argument ``filtering_fn``. This argument represents a filtering
+function that will be called with the traceback for every location that allocates memory that cumulatively is
+bigger than the provided limit. The function must return *True* if the allocation must be taken into account
+and *False* otherwise. This function can be used to discard some false positives detected by the marker.
+
+.. tip::
+
+   You can pass the ``--memray-bin-path`` argument to ``pytest`` to specify
+   a directory where Memray will store the binary files with the results. You
+   can then use the ``memray`` CLI to further investigate the allocations and the
+   leaks using any Memray reporters you'd like. Check `the memray docs
+   <https://bloomberg.github.io/memray/getting_started.html>`_ for more
+   information.
+
+Example of usage:
+
+.. code-block:: python
+
+    @pytest.mark.limit_leaks("1 MB")
+    def test_foobar():
+        # Run the function to test in a loop to ensure
+        # we can differentiate leaks from memory allocated
+        # in internal caches
+        for _ in range(100):
+            do_some_stuff()
+
+.. warning::
+   Is **very** challenging to write tests that do not "leak" memory in some way.
+   interpreter caches but there are some that cannot be correctly detected so
+   you may need to allow some small amount of leaked memory per location or use the
+   ``filtering_fn`` argument to filter out false positive leak reports caused by
+   objects that the interpreter plans to reuse later. These caches are
+   implementation details of the interpreter, so the amount of memory
+   allocated, the location of the allocation, and the allocator that was used
+   can all change from one Python version to another.
diff --git a/src/pytest_memray/marks.py b/src/pytest_memray/marks.py
index 2faa30d..e64296e 100644
--- a/src/pytest_memray/marks.py
+++ b/src/pytest_memray/marks.py
@@ -1,10 +1,15 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
+from pathlib import Path
 from typing import Tuple
 from typing import cast
+from typing import Callable
+from typing import Iterable
+from typing import Optional
 
 from memray import AllocationRecord
+from memray import FileReader
 from pytest import Config
 
 from .utils import parse_memory_string
@@ -13,10 +18,13 @@
 
 PytestSection = Tuple[str, str]
 
+StackElement = Tuple[str, str, int]
+LeaksFilteringFunction = Callable[[Iterable[StackElement]], bool]
+
 
 @dataclass
 class _MemoryInfo:
-    """Type that holds all memray-related info for a failed test."""
+    """Type that holds memory-related info for a failed test."""
 
     max_memory: float
     total_allocated_memory: int
@@ -24,14 +32,9 @@ class _MemoryInfo:
     num_stacks: int
     native_stacks: bool
 
-    @property
-    def section(self) -> PytestSection:
-        """Return a tuple in the format expected by section reporters."""
-        total_memory_str = sizeof_fmt(self.total_allocated_memory)
-        max_memory_str = sizeof_fmt(self.max_memory)
+    def _generate_section_text(self, limit_text: str, header_text: str) -> str:
         text_lines = [
-            f"Test is using {total_memory_str} out of limit of {max_memory_str}",
-            "List of allocations: ",
+            f"{header_text} {sizeof_fmt(self.total_allocated_memory)} out of limit of {sizeof_fmt(self.max_memory)}"
         ]
         for record in self.allocations:
             size = record.size
@@ -51,28 +54,93 @@ def section(self) -> PytestSection:
                 text_lines.append(f"{padding*2}{function}:{file}:{line}")
                 stacks_left -= 1
 
-        return "memray-max-memory", "\n".join(text_lines)
+        return "\n".join(text_lines)
+
+    @property
+    def section(self) -> PytestSection:
+        """Return a tuple in the format expected by section reporters."""
+        return (
+            "memray-max-memory",
+            self._generate_section_text("Test is using", "List of allocations:"),
+        )
 
     @property
     def long_repr(self) -> str:
         """Generate a longrepr user-facing error message."""
-        total_memory_str = sizeof_fmt(self.total_allocated_memory)
-        max_memory_str = sizeof_fmt(self.max_memory)
-        return f"Test was limited to {max_memory_str} but allocated {total_memory_str}"
+        return f"Test was limited to {sizeof_fmt(self.max_memory)} but allocated {sizeof_fmt(self.total_allocated_memory)}"
+
+
+@dataclass
+class _LeakedInfo(_MemoryInfo):
+    """Type that holds leaked memory-related info for a failed test."""
+
+    @property
+    def section(self) -> PytestSection:
+        """Return a tuple in the format expected by section reporters."""
+        return (
+            "memray-leaked-memory",
+            self._generate_section_text("Test leaked", "List of leaked allocations:"),
+        )
+
+    @property
+    def long_repr(self) -> str:
+        """Generate a longrepr user-facing error message."""
+        return (
+            f"Test was allowed to leak {sizeof_fmt(self.max_memory)} "
+            "per location but at least one location leaked more"
+        )
 
 
 def limit_memory(
-    limit: str, *, _allocations: list[AllocationRecord], _config: Config
+    limit: str, *, _result_file: Path, _config: Config
 ) -> _MemoryInfo | None:
     """Limit memory used by the test."""
+    reader = FileReader(_result_file)
+    func = reader.get_high_watermark_allocation_records
+    allocations: list[AllocationRecord] = list((func(merge_threads=True)))
     max_memory = parse_memory_string(limit)
-    total_allocated_memory = sum(record.size for record in _allocations)
+    total_allocated_memory = sum(record.size for record in allocations)
     if total_allocated_memory < max_memory:
         return None
     num_stacks: int = cast(int, value_or_ini(_config, "stacks"))
     native_stacks: bool = cast(bool, value_or_ini(_config, "native"))
     return _MemoryInfo(
-        max_memory, total_allocated_memory, _allocations, num_stacks, native_stacks
+        max_memory, total_allocated_memory, allocations, num_stacks, native_stacks
+    )
+
+
+def limit_leaks(
+    location_limit: str,
+    *,
+    filter_fn: Optional[LeaksFilteringFunction] = None,
+    _result_file: Path,
+    _config: Config,
+) -> _MemoryInfo | None:
+    reader = FileReader(_result_file)
+    func = reader.get_leaked_allocation_records
+    allocations: list[AllocationRecord] = list((func(merge_threads=True)))
+
+    memory_limit = parse_memory_string(location_limit)
+
+    leaked_allocations = list(
+        allocation
+        for allocation in allocations
+        if (
+            allocation.size >= memory_limit
+            and (filter_fn is None or filter_fn(allocation.hybrid_stack_trace()))
+        )
+    )
+    if not leaked_allocations:
+        return None
+    total_leaked_memory = sum(allocation.size for allocation in leaked_allocations)
+
+    num_stacks: int = cast(int, value_or_ini(_config, "stacks"))
+    return _LeakedInfo(
+        memory_limit,
+        total_leaked_memory,
+        leaked_allocations,
+        num_stacks,
+        native_stacks=True,
     )
 
 
diff --git a/src/pytest_memray/plugin.py b/src/pytest_memray/plugin.py
index 1933388..a5ff10c 100644
--- a/src/pytest_memray/plugin.py
+++ b/src/pytest_memray/plugin.py
@@ -17,6 +17,7 @@
 from typing import List
 from typing import Tuple
 from typing import cast
+from typing import Protocol
 
 from _pytest.terminal import TerminalReporter
 from memray import AllocationRecord
@@ -34,12 +35,28 @@
 from pytest import hookimpl
 
 from .marks import limit_memory
+from .marks import _MemoryInfo
+from .marks import limit_leaks
 from .utils import WriteEnabledDirectoryAction
 from .utils import positive_int
 from .utils import sizeof_fmt
 from .utils import value_or_ini
 
-MARKERS = {"limit_memory": limit_memory}
+
+class PluginFn(Protocol):
+    def __call__(
+        *args: Any,
+        _result_file: Path,
+        _config: Config,
+        **kwargs: Any,
+    ) -> _MemoryInfo | None:
+        ...
+
+
+MARKERS = {
+    "limit_memory": limit_memory,
+    "limit_leaks": limit_leaks,
+}
 
 N_TOP_ALLOCS = 5
 N_HISTOGRAM_BINS = 5
@@ -134,6 +151,11 @@ def pytest_pyfunc_call(self, pyfuncitem: Function) -> object | None:
             yield
             return
 
+        if len(markers) > 1:
+            raise ValueError(
+                "Only one memray marker can be applied at the same time to the same test"
+            )
+
         def _build_bin_path() -> Path:
             if self._tmp_dir is None and not os.getenv("MEMRAY_RESULT_PATH"):
                 of_id = pyfuncitem.nodeid.replace("::", "-")
@@ -151,6 +173,9 @@ def _build_bin_path() -> Path:
             value_or_ini(self.config, "trace_python_allocators")
         )
 
+        if markers and "limit_leaks" in markers:
+            native = trace_python_allocators = True
+
         @functools.wraps(func)
         def wrapper(*args: Any, **kwargs: Any) -> object | None:
             test_result: object | Any = None
@@ -198,19 +223,17 @@ def pytest_runtest_makereport(
             return None
 
         for marker in item.iter_markers():
-            marker_fn = MARKERS.get(marker.name)
-            if not marker_fn:
+            maybe_marker_fn = MARKERS.get(marker.name)
+            if not maybe_marker_fn:
                 continue
+            marker_fn: PluginFn = cast(PluginFn, maybe_marker_fn)
             result = self.results.get(item.nodeid)
             if not result:
                 continue
-            reader = FileReader(result.result_file)
-            func = reader.get_high_watermark_allocation_records
-            allocations = list((func(merge_threads=True)))
             res = marker_fn(
                 *marker.args,
                 **marker.kwargs,
-                _allocations=allocations,
+                _result_file=result.result_file,
                 _config=self.config,
             )
             if res:
diff --git a/tests/test_pytest_memray.py b/tests/test_pytest_memray.py
index dd19afb..7690309 100644
--- a/tests/test_pytest_memray.py
+++ b/tests/test_pytest_memray.py
@@ -594,3 +594,151 @@ def test_memory_alloc_fails():
     )
     result = pytester.runpytest("-Werror", "--memray")
     assert result.ret == ExitCode.OK
+
+
+@pytest.mark.parametrize(
+    "size, outcome",
+    [
+        (1, ExitCode.OK),
+        (1024 * 1 / 10, ExitCode.OK),
+        (1024 * 1, ExitCode.TESTS_FAILED),
+        (1024 * 10, ExitCode.TESTS_FAILED),
+    ],
+)
+def test_leak_marker(pytester: Pytester, size: int, outcome: ExitCode) -> None:
+    pytester.makepyfile(
+        f"""
+         import pytest
+         from memray._test import MemoryAllocator
+         allocator = MemoryAllocator()
+         @pytest.mark.limit_leaks("5KB")
+         def test_memory_alloc_fails():
+             for _ in range(10):
+                 allocator.valloc({size})
+                 # No free call here
+         """
+    )
+
+    result = pytester.runpytest("--memray")
+
+    assert result.ret == outcome
+
+
+@pytest.mark.parametrize(
+    "size, outcome",
+    [
+        (1, ExitCode.OK),
+        (1024 * 1 / 10, ExitCode.OK),
+        (1024 * 1, ExitCode.TESTS_FAILED),
+        (1024 * 10, ExitCode.TESTS_FAILED),
+    ],
+)
+def test_leak_marker_in_a_thread(
+    pytester: Pytester, size: int, outcome: ExitCode
+) -> None:
+    pytester.makepyfile(
+        f"""
+         import pytest
+         from memray._test import MemoryAllocator
+         allocator = MemoryAllocator()
+         import threading
+         def allocating_func():
+            for _ in range(10):
+                allocator.valloc({size})
+                # No free call here
+         @pytest.mark.limit_leaks("5KB")
+         def test_memory_alloc_fails():
+             t = threading.Thread(target=allocating_func)
+             t.start()
+             t.join()
+         """
+    )
+
+    result = pytester.runpytest("--memray")
+    assert result.ret == outcome
+
+
+def test_leak_marker_filtering_function(pytester: Pytester) -> None:
+    pytester.makepyfile(
+        """
+         import pytest
+         from memray._test import MemoryAllocator
+         LEAK_SIZE = 1024
+         allocator = MemoryAllocator()
+
+         def this_should_not_be_there():
+             allocator.valloc(LEAK_SIZE)
+             # No free call here
+
+         def filtering_function(locations):
+             for fn, _, _ in locations:
+                 if fn == "this_should_not_be_there":
+                     return False
+             return True
+
+         @pytest.mark.limit_leaks("5KB", filter_fn=filtering_function)
+         def test_memory_alloc_fails():
+             for _ in range(10):
+                 this_should_not_be_there()
+         """
+    )
+
+    result = pytester.runpytest("--memray")
+
+    assert result.ret == ExitCode.OK
+
+
+def test_leak_marker_does_work_if_memray_not_passed(pytester: Pytester) -> None:
+    pytester.makepyfile(
+        """
+         import pytest
+         from memray._test import MemoryAllocator
+         allocator = MemoryAllocator()
+         @pytest.mark.limit_leaks("0B")
+         def test_memory_alloc_fails():
+             allocator.valloc(512)
+             # No free call here
+         """
+    )
+
+    result = pytester.runpytest()
+
+    assert result.ret == ExitCode.TESTS_FAILED
+
+
+def test_multiple_markers_are_not_supported(pytester: Pytester) -> None:
+    pytester.makepyfile(
+        """
+         import pytest
+         @pytest.mark.limit_leaks("0MB")
+         @pytest.mark.limit_memory("0MB")
+         def test_bar():
+             pass
+         """
+    )
+
+    result = pytester.runpytest("--memray")
+    assert result.ret == ExitCode.TESTS_FAILED
+
+    output = result.stdout.str()
+    assert "Only one memray marker can be applied" in output
+
+
+def test_multiple_markers_are_not_supported_with_global_marker(
+    pytester: Pytester,
+) -> None:
+    pytester.makepyfile(
+        """
+         import pytest
+         pytestmark = pytest.mark.limit_memory("1 MB")
+         @pytest.mark.limit_leaks("0MB")
+         def test_bar():
+             pass
+         """
+    )
+
+    result = pytester.runpytest("--memray")
+    assert result.ret == ExitCode.TESTS_FAILED
+
+    output = result.stdout.str()
+    assert "Only one memray marker can be applied" in output