Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve stepwise to not forget failed tests #13122

Merged
merged 7 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions changelog/13122.improvement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
The ``--stepwise`` mode received a number of improvements:

* It no longer forgets the last failed test in case pytest is executed later without the flag.

This enables the following workflow:

1. Execute pytest with ``--stepwise``, pytest then stops at the first failing test;
2. Iteratively update the code and run the test in isolation, without the ``--stepwise`` flag (for example in an IDE), until it is fixed.
3. Execute pytest with ``--stepwise`` again and pytest will continue from the previously failed test, and if it passes, continue on to the next tests.

Previously, at step 3, pytest would start from the beginning, forgetting the previously failed test.

This change however might cause issues if the ``--stepwise`` mode is used far apart in time, as the state might get stale, so the internal state will be reset automatically in case the test suite changes (for now only the number of tests are considered for this, we might change/improve this on the future).

* New ``--stepwise-reset``/``--sw-reset`` flag, allowing the user to explicitly reset the stepwise state and restart the workflow from the beginning.
124 changes: 104 additions & 20 deletions src/_pytest/stepwise.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from __future__ import annotations

import dataclasses
from datetime import datetime
from datetime import timedelta
from typing import Any
from typing import TYPE_CHECKING

from _pytest import nodes
from _pytest.cacheprovider import Cache
from _pytest.config import Config
Expand All @@ -8,6 +14,9 @@
from _pytest.reports import TestReport


if TYPE_CHECKING:
from typing_extensions import Self

STEPWISE_CACHE_DIR = "cache/stepwise"


Expand All @@ -30,11 +39,20 @@ def pytest_addoption(parser: Parser) -> None:
help="Ignore the first failing test but stop on the next failing test. "
"Implicitly enables --stepwise.",
)
group.addoption(
"--sw-reset",
"--stepwise-reset",
action="store_true",
default=False,
dest="stepwise_reset",
help="Resets stepwise state, restarting the stepwise workflow. "
"Implicitly enables --stepwise.",
)


def pytest_configure(config: Config) -> None:
if config.option.stepwise_skip:
# allow --stepwise-skip to work on its own merits.
# --stepwise-skip/--stepwise-reset implies stepwise.
if config.option.stepwise_skip or config.option.stepwise_reset:
config.option.stepwise = True
if config.getoption("stepwise"):
config.pluginmanager.register(StepwisePlugin(config), "stepwiseplugin")
Expand All @@ -47,43 +65,108 @@ def pytest_sessionfinish(session: Session) -> None:
# Do not update cache if this process is a xdist worker to prevent
# race conditions (#10641).
return
# Clear the list of failing tests if the plugin is not active.
nicoddemus marked this conversation as resolved.
Show resolved Hide resolved
session.config.cache.set(STEPWISE_CACHE_DIR, [])


@dataclasses.dataclass
class StepwiseCacheInfo:
# The nodeid of the last failed test.
last_failed: str | None

# The number of tests in the last time --stepwise was run.
# We use this information as a simple way to invalidate the cache information, avoiding
# confusing behavior in case the cache is stale.
last_test_count: int | None

# The date when the cache was last updated, for information purposes only.
last_cache_date_str: str

@property
def last_cache_date(self) -> datetime:
return datetime.fromisoformat(self.last_cache_date_str)

@classmethod
def empty(cls) -> Self:
return cls(
last_failed=None,
last_test_count=None,
last_cache_date_str=datetime.now().isoformat(),
)

def update_date_to_now(self) -> None:
self.last_cache_date_str = datetime.now().isoformat()


class StepwisePlugin:
def __init__(self, config: Config) -> None:
self.config = config
self.session: Session | None = None
self.report_status = ""
self.report_status: list[str] = []
assert config.cache is not None
self.cache: Cache = config.cache
self.lastfailed: str | None = self.cache.get(STEPWISE_CACHE_DIR, None)
self.skip: bool = config.getoption("stepwise_skip")
self.reset: bool = config.getoption("stepwise_reset")
self.cached_info = self._load_cached_info()

def _load_cached_info(self) -> StepwiseCacheInfo:
cached_dict: dict[str, Any] | None = self.cache.get(STEPWISE_CACHE_DIR, None)
if cached_dict:
try:
return StepwiseCacheInfo(
cached_dict["last_failed"],
cached_dict["last_test_count"],
cached_dict["last_cache_date_str"],
)
except (KeyError, TypeError) as e:
error = f"{type(e).__name__}: {e}"
self.report_status.append(f"error reading cache, discarding ({error})")

# Cache not found or error during load, return a new cache.
return StepwiseCacheInfo.empty()

def pytest_sessionstart(self, session: Session) -> None:
self.session = session

def pytest_collection_modifyitems(
self, config: Config, items: list[nodes.Item]
) -> None:
if not self.lastfailed:
self.report_status = "no previously failed tests, not skipping."
last_test_count = self.cached_info.last_test_count
self.cached_info.last_test_count = len(items)

if self.reset:
self.report_status.append("resetting state, not skipping.")
self.cached_info.last_failed = None
return

if not self.cached_info.last_failed:
self.report_status.append("no previously failed tests, not skipping.")
return

if last_test_count is not None and last_test_count != len(items):
The-Compiler marked this conversation as resolved.
Show resolved Hide resolved
self.report_status.append(
f"test count changed, not skipping (now {len(items)} tests, previously {last_test_count})."
)
self.cached_info.last_failed = None
return

# check all item nodes until we find a match on last failed
# Check all item nodes until we find a match on last failed.
failed_index = None
for index, item in enumerate(items):
if item.nodeid == self.lastfailed:
if item.nodeid == self.cached_info.last_failed:
failed_index = index
break

# If the previously failed test was not found among the test items,
# do not skip any tests.
if failed_index is None:
self.report_status = "previously failed test not found, not skipping."
self.report_status.append("previously failed test not found, not skipping.")
else:
self.report_status = f"skipping {failed_index} already passed items."
cache_age = datetime.now() - self.cached_info.last_cache_date
# Round up to avoid showing microseconds.
cache_age = timedelta(seconds=int(cache_age.total_seconds()))
self.report_status.append(
f"skipping {failed_index} already passed items (cache from {cache_age} ago,"
f" use --sw-reset to discard)."
)
deselected = items[:failed_index]
del items[:failed_index]
config.hook.pytest_deselected(items=deselected)
Expand All @@ -93,13 +176,13 @@ def pytest_runtest_logreport(self, report: TestReport) -> None:
if self.skip:
# Remove test from the failed ones (if it exists) and unset the skip option
# to make sure the following tests will not be skipped.
if report.nodeid == self.lastfailed:
self.lastfailed = None
if report.nodeid == self.cached_info.last_failed:
self.cached_info.last_failed = None

self.skip = False
else:
# Mark test as the last failing and interrupt the test session.
self.lastfailed = report.nodeid
self.cached_info.last_failed = report.nodeid
assert self.session is not None
self.session.shouldstop = (
"Test failed, continuing from this test next run."
Expand All @@ -109,17 +192,18 @@ def pytest_runtest_logreport(self, report: TestReport) -> None:
# If the test was actually run and did pass.
if report.when == "call":
# Remove test from the failed ones, if exists.
if report.nodeid == self.lastfailed:
self.lastfailed = None
if report.nodeid == self.cached_info.last_failed:
self.cached_info.last_failed = None

def pytest_report_collectionfinish(self) -> str | None:
def pytest_report_collectionfinish(self) -> list[str] | None:
if self.config.get_verbosity() >= 0 and self.report_status:
return f"stepwise: {self.report_status}"
return [f"stepwise: {x}" for x in self.report_status]
return None

def pytest_sessionfinish(self) -> None:
if hasattr(self.config, "workerinput"):
# Do not update cache if this process is a xdist worker to prevent
# race conditions (#10641).
return
self.cache.set(STEPWISE_CACHE_DIR, self.lastfailed)
self.cached_info.update_date_to_now()
self.cache.set(STEPWISE_CACHE_DIR, dataclasses.asdict(self.cached_info))
4 changes: 2 additions & 2 deletions testing/test_cacheprovider.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def test_cache_failure_warns(
pytester.makepyfile("def test_error(): raise Exception")
result = pytester.runpytest()
assert result.ret == 1
# warnings from nodeids, lastfailed, and stepwise
# warnings from nodeids and lastfailed
nicoddemus marked this conversation as resolved.
Show resolved Hide resolved
result.stdout.fnmatch_lines(
[
# Validate location/stacklevel of warning from cacheprovider.
Expand All @@ -113,7 +113,7 @@ def test_cache_failure_warns(
" */cacheprovider.py:*: PytestCacheWarning: could not create cache path "
f"{unwritable_cache_dir}/v/cache/nodeids: *",
' config.cache.set("cache/nodeids", sorted(self.cached_nodeids))',
"*1 failed, 3 warnings in*",
"*1 failed, 2 warnings in*",
]
)

Expand Down
Loading
Loading