diff --git a/changelog/13122.improvement.rst b/changelog/13122.improvement.rst
new file mode 100644
index 00000000000..c302713b320
--- /dev/null
+++ b/changelog/13122.improvement.rst
@@ -0,0 +1,15 @@
+The ``--stepwise`` mode received a number of improvements:
+
+* It no longer forgets the last failed test in case pytest is executed later without the flag.
+
+  This enables the following workflow:
+
+  1. Execute pytest with ``--stepwise``, pytest then stops at the first failing test;
+  2. Iteratively update the code and run the test in isolation, without the ``--stepwise`` flag (for example in an IDE), until it is fixed.
+  3. Execute pytest with ``--stepwise`` again and pytest will continue from the previously failed test, and if it passes, continue on to the next tests.
+
+  Previously, at step 3, pytest would start from the beginning, forgetting the previously failed test.
+
+  This change however might cause issues if the ``--stepwise`` mode is used far apart in time, as the state might get stale, so the internal state will be reset automatically in case the test suite changes (for now only the number of tests are considered for this, we might change/improve this on the future).
+
+* New ``--stepwise-reset``/``--sw-reset`` flag, allowing the user to explicitly reset the stepwise state and restart the workflow from the beginning.
diff --git a/src/_pytest/stepwise.py b/src/_pytest/stepwise.py
index c7860808c35..8901540eb59 100644
--- a/src/_pytest/stepwise.py
+++ b/src/_pytest/stepwise.py
@@ -1,5 +1,11 @@
 from __future__ import annotations
 
+import dataclasses
+from datetime import datetime
+from datetime import timedelta
+from typing import Any
+from typing import TYPE_CHECKING
+
 from _pytest import nodes
 from _pytest.cacheprovider import Cache
 from _pytest.config import Config
@@ -8,6 +14,9 @@
 from _pytest.reports import TestReport
 
 
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
 STEPWISE_CACHE_DIR = "cache/stepwise"
 
 
@@ -30,11 +39,20 @@ def pytest_addoption(parser: Parser) -> None:
         help="Ignore the first failing test but stop on the next failing test. "
         "Implicitly enables --stepwise.",
     )
+    group.addoption(
+        "--sw-reset",
+        "--stepwise-reset",
+        action="store_true",
+        default=False,
+        dest="stepwise_reset",
+        help="Resets stepwise state, restarting the stepwise workflow. "
+        "Implicitly enables --stepwise.",
+    )
 
 
 def pytest_configure(config: Config) -> None:
-    if config.option.stepwise_skip:
-        # allow --stepwise-skip to work on its own merits.
+    # --stepwise-skip/--stepwise-reset implies stepwise.
+    if config.option.stepwise_skip or config.option.stepwise_reset:
         config.option.stepwise = True
     if config.getoption("stepwise"):
         config.pluginmanager.register(StepwisePlugin(config), "stepwiseplugin")
@@ -47,19 +65,63 @@ def pytest_sessionfinish(session: Session) -> None:
             # Do not update cache if this process is a xdist worker to prevent
             # race conditions (#10641).
             return
-        # Clear the list of failing tests if the plugin is not active.
-        session.config.cache.set(STEPWISE_CACHE_DIR, [])
+
+
+@dataclasses.dataclass
+class StepwiseCacheInfo:
+    # The nodeid of the last failed test.
+    last_failed: str | None
+
+    # The number of tests in the last time --stepwise was run.
+    # We use this information as a simple way to invalidate the cache information, avoiding
+    # confusing behavior in case the cache is stale.
+    last_test_count: int | None
+
+    # The date when the cache was last updated, for information purposes only.
+    last_cache_date_str: str
+
+    @property
+    def last_cache_date(self) -> datetime:
+        return datetime.fromisoformat(self.last_cache_date_str)
+
+    @classmethod
+    def empty(cls) -> Self:
+        return cls(
+            last_failed=None,
+            last_test_count=None,
+            last_cache_date_str=datetime.now().isoformat(),
+        )
+
+    def update_date_to_now(self) -> None:
+        self.last_cache_date_str = datetime.now().isoformat()
 
 
 class StepwisePlugin:
     def __init__(self, config: Config) -> None:
         self.config = config
         self.session: Session | None = None
-        self.report_status = ""
+        self.report_status: list[str] = []
         assert config.cache is not None
         self.cache: Cache = config.cache
-        self.lastfailed: str | None = self.cache.get(STEPWISE_CACHE_DIR, None)
         self.skip: bool = config.getoption("stepwise_skip")
+        self.reset: bool = config.getoption("stepwise_reset")
+        self.cached_info = self._load_cached_info()
+
+    def _load_cached_info(self) -> StepwiseCacheInfo:
+        cached_dict: dict[str, Any] | None = self.cache.get(STEPWISE_CACHE_DIR, None)
+        if cached_dict:
+            try:
+                return StepwiseCacheInfo(
+                    cached_dict["last_failed"],
+                    cached_dict["last_test_count"],
+                    cached_dict["last_cache_date_str"],
+                )
+            except (KeyError, TypeError) as e:
+                error = f"{type(e).__name__}: {e}"
+                self.report_status.append(f"error reading cache, discarding ({error})")
+
+        # Cache not found or error during load, return a new cache.
+        return StepwiseCacheInfo.empty()
 
     def pytest_sessionstart(self, session: Session) -> None:
         self.session = session
@@ -67,23 +129,44 @@ def pytest_sessionstart(self, session: Session) -> None:
     def pytest_collection_modifyitems(
         self, config: Config, items: list[nodes.Item]
     ) -> None:
-        if not self.lastfailed:
-            self.report_status = "no previously failed tests, not skipping."
+        last_test_count = self.cached_info.last_test_count
+        self.cached_info.last_test_count = len(items)
+
+        if self.reset:
+            self.report_status.append("resetting state, not skipping.")
+            self.cached_info.last_failed = None
+            return
+
+        if not self.cached_info.last_failed:
+            self.report_status.append("no previously failed tests, not skipping.")
+            return
+
+        if last_test_count is not None and last_test_count != len(items):
+            self.report_status.append(
+                f"test count changed, not skipping (now {len(items)} tests, previously {last_test_count})."
+            )
+            self.cached_info.last_failed = None
             return
 
-        # check all item nodes until we find a match on last failed
+        # Check all item nodes until we find a match on last failed.
         failed_index = None
         for index, item in enumerate(items):
-            if item.nodeid == self.lastfailed:
+            if item.nodeid == self.cached_info.last_failed:
                 failed_index = index
                 break
 
         # If the previously failed test was not found among the test items,
         # do not skip any tests.
         if failed_index is None:
-            self.report_status = "previously failed test not found, not skipping."
+            self.report_status.append("previously failed test not found, not skipping.")
         else:
-            self.report_status = f"skipping {failed_index} already passed items."
+            cache_age = datetime.now() - self.cached_info.last_cache_date
+            # Round up to avoid showing microseconds.
+            cache_age = timedelta(seconds=int(cache_age.total_seconds()))
+            self.report_status.append(
+                f"skipping {failed_index} already passed items (cache from {cache_age} ago,"
+                f" use --sw-reset to discard)."
+            )
             deselected = items[:failed_index]
             del items[:failed_index]
             config.hook.pytest_deselected(items=deselected)
@@ -93,13 +176,13 @@ def pytest_runtest_logreport(self, report: TestReport) -> None:
             if self.skip:
                 # Remove test from the failed ones (if it exists) and unset the skip option
                 # to make sure the following tests will not be skipped.
-                if report.nodeid == self.lastfailed:
-                    self.lastfailed = None
+                if report.nodeid == self.cached_info.last_failed:
+                    self.cached_info.last_failed = None
 
                 self.skip = False
             else:
                 # Mark test as the last failing and interrupt the test session.
-                self.lastfailed = report.nodeid
+                self.cached_info.last_failed = report.nodeid
                 assert self.session is not None
                 self.session.shouldstop = (
                     "Test failed, continuing from this test next run."
@@ -109,12 +192,12 @@ def pytest_runtest_logreport(self, report: TestReport) -> None:
             # If the test was actually run and did pass.
             if report.when == "call":
                 # Remove test from the failed ones, if exists.
-                if report.nodeid == self.lastfailed:
-                    self.lastfailed = None
+                if report.nodeid == self.cached_info.last_failed:
+                    self.cached_info.last_failed = None
 
-    def pytest_report_collectionfinish(self) -> str | None:
+    def pytest_report_collectionfinish(self) -> list[str] | None:
         if self.config.get_verbosity() >= 0 and self.report_status:
-            return f"stepwise: {self.report_status}"
+            return [f"stepwise: {x}" for x in self.report_status]
         return None
 
     def pytest_sessionfinish(self) -> None:
@@ -122,4 +205,5 @@ def pytest_sessionfinish(self) -> None:
             # Do not update cache if this process is a xdist worker to prevent
             # race conditions (#10641).
             return
-        self.cache.set(STEPWISE_CACHE_DIR, self.lastfailed)
+        self.cached_info.update_date_to_now()
+        self.cache.set(STEPWISE_CACHE_DIR, dataclasses.asdict(self.cached_info))
diff --git a/testing/test_cacheprovider.py b/testing/test_cacheprovider.py
index 1ba4659d84e..ca417e86ee5 100644
--- a/testing/test_cacheprovider.py
+++ b/testing/test_cacheprovider.py
@@ -104,7 +104,7 @@ def test_cache_failure_warns(
         pytester.makepyfile("def test_error(): raise Exception")
         result = pytester.runpytest()
         assert result.ret == 1
-        # warnings from nodeids, lastfailed, and stepwise
+        # warnings from nodeids and lastfailed
         result.stdout.fnmatch_lines(
             [
                 # Validate location/stacklevel of warning from cacheprovider.
@@ -113,7 +113,7 @@ def test_cache_failure_warns(
                 "  */cacheprovider.py:*: PytestCacheWarning: could not create cache path "
                 f"{unwritable_cache_dir}/v/cache/nodeids: *",
                 '    config.cache.set("cache/nodeids", sorted(self.cached_nodeids))',
-                "*1 failed, 3 warnings in*",
+                "*1 failed, 2 warnings in*",
             ]
         )
 
diff --git a/testing/test_stepwise.py b/testing/test_stepwise.py
index affdb73375e..d2ad3bae500 100644
--- a/testing/test_stepwise.py
+++ b/testing/test_stepwise.py
@@ -1,6 +1,8 @@
-# mypy: allow-untyped-defs
+# mypy: disallow-untyped-defs
 from __future__ import annotations
 
+from collections.abc import Sequence
+import json
 from pathlib import Path
 
 from _pytest.cacheprovider import Cache
@@ -84,7 +86,7 @@ def broken_pytester(pytester: Pytester) -> Pytester:
     return pytester
 
 
-def _strip_resource_warnings(lines):
+def _strip_resource_warnings(lines: Sequence[str]) -> Sequence[str]:
     # Strip unreliable ResourceWarnings, so no-output assertions on stderr can work.
     # (https://github.com/pytest-dev/pytest/issues/5088)
     return [
@@ -114,7 +116,10 @@ def test_data(expected):
     result.stdout.fnmatch_lines(["stepwise: no previously failed tests, not skipping."])
     result = pytester.runpytest("-v", "--stepwise")
     result.stdout.fnmatch_lines(
-        ["stepwise: skipping 4 already passed items.", "*1 failed, 4 deselected*"]
+        [
+            "stepwise: skipping 4 already passed items (cache from * ago, use --sw-reset to discard).",
+            "*1 failed, 4 deselected*",
+        ]
     )
 
 
@@ -358,3 +363,183 @@ def test_one():
     with stepwise_cache_file.open(encoding="utf-8") as file_handle:
         observed_value = file_handle.readlines()
     assert [expected_value] == observed_value
+
+
+def test_do_not_reset_cache_if_disabled(pytester: Pytester) -> None:
+    """
+    If pytest is run without --stepwise, do not clear the stepwise cache.
+
+    Keeping the cache around is important for this workflow:
+
+    1. Run tests with --stepwise
+    2. Stop at the failing test, and iterate over it changing the code and running it in isolation
+    (in the IDE for example).
+    3. Run tests with --stepwise again - at this point we expect to start from the failing test, which should now pass,
+       and continue with the next tests.
+    """
+    pytester.makepyfile(
+        """
+        def test_1(): pass
+        def test_2(): assert False
+        def test_3(): pass
+        """
+    )
+    result = pytester.runpytest("--stepwise")
+    result.stdout.fnmatch_lines(
+        [
+            "*::test_2 - assert False*",
+            "*failed, continuing from this test next run*",
+            "=* 1 failed, 1 passed in *",
+        ]
+    )
+
+    # Run a specific test without passing `--stepwise`.
+    result = pytester.runpytest("-k", "test_1")
+    result.stdout.fnmatch_lines(["*1 passed*"])
+
+    # Running with `--stepwise` should continue from the last failing test.
+    result = pytester.runpytest("--stepwise")
+    result.stdout.fnmatch_lines(
+        [
+            "stepwise: skipping 1 already passed items (cache from *, use --sw-reset to discard).",
+            "*::test_2 - assert False*",
+            "*failed, continuing from this test next run*",
+            "=* 1 failed, 1 deselected in *",
+        ]
+    )
+
+
+def test_reset(pytester: Pytester) -> None:
+    pytester.makepyfile(
+        """
+        def test_1(): pass
+        def test_2(): assert False
+        def test_3(): pass
+        """
+    )
+    result = pytester.runpytest("--stepwise", "-v")
+    result.stdout.fnmatch_lines(
+        [
+            "stepwise: no previously failed tests, not skipping.",
+            "*::test_1 *PASSED*",
+            "*::test_2 *FAILED*",
+            "*failed, continuing from this test next run*",
+            "* 1 failed, 1 passed in *",
+        ]
+    )
+
+    result = pytester.runpytest("--stepwise", "-v")
+    result.stdout.fnmatch_lines(
+        [
+            "stepwise: skipping 1 already passed items (cache from *, use --sw-reset to discard).",
+            "*::test_2 *FAILED*",
+            "*failed, continuing from this test next run*",
+            "* 1 failed, 1 deselected in *",
+        ]
+    )
+
+    # Running with --stepwise-reset restarts the stepwise workflow.
+    result = pytester.runpytest("-v", "--stepwise-reset")
+    result.stdout.fnmatch_lines(
+        [
+            "stepwise: resetting state, not skipping.",
+            "*::test_1 *PASSED*",
+            "*::test_2 *FAILED*",
+            "*failed, continuing from this test next run*",
+            "* 1 failed, 1 passed in *",
+        ]
+    )
+
+
+def test_change_test_count(pytester: Pytester) -> None:
+    # Run initially with 3 tests.
+    pytester.makepyfile(
+        """
+        def test_1(): pass
+        def test_2(): assert False
+        def test_3(): pass
+        """
+    )
+    result = pytester.runpytest("--stepwise", "-v")
+    result.stdout.fnmatch_lines(
+        [
+            "stepwise: no previously failed tests, not skipping.",
+            "*::test_1 *PASSED*",
+            "*::test_2 *FAILED*",
+            "*failed, continuing from this test next run*",
+            "* 1 failed, 1 passed in *",
+        ]
+    )
+
+    # Change the number of tests, which invalidates the test cache.
+    pytester.makepyfile(
+        """
+        def test_1(): pass
+        def test_2(): assert False
+        def test_3(): pass
+        def test_4(): pass
+        """
+    )
+    result = pytester.runpytest("--stepwise", "-v")
+    result.stdout.fnmatch_lines(
+        [
+            "stepwise: test count changed, not skipping (now 4 tests, previously 3).",
+            "*::test_1 *PASSED*",
+            "*::test_2 *FAILED*",
+            "*failed, continuing from this test next run*",
+            "* 1 failed, 1 passed in *",
+        ]
+    )
+
+    # Fix the failing test and run again.
+    pytester.makepyfile(
+        """
+        def test_1(): pass
+        def test_2(): pass
+        def test_3(): pass
+        def test_4(): pass
+        """
+    )
+    result = pytester.runpytest("--stepwise", "-v")
+    result.stdout.fnmatch_lines(
+        [
+            "stepwise: skipping 1 already passed items (cache from *, use --sw-reset to discard).",
+            "*::test_2 *PASSED*",
+            "*::test_3 *PASSED*",
+            "*::test_4 *PASSED*",
+            "* 3 passed, 1 deselected in *",
+        ]
+    )
+
+
+def test_cache_error(pytester: Pytester) -> None:
+    pytester.makepyfile(
+        """
+        def test_1(): pass
+        """
+    )
+    # Run stepwise normally to generate the cache information.
+    result = pytester.runpytest("--stepwise", "-v")
+    result.stdout.fnmatch_lines(
+        [
+            "stepwise: no previously failed tests, not skipping.",
+            "*::test_1 *PASSED*",
+            "* 1 passed in *",
+        ]
+    )
+
+    # Corrupt the cache.
+    cache_file = pytester.path / f".pytest_cache/v/{STEPWISE_CACHE_DIR}"
+    assert cache_file.is_file()
+    cache_file.write_text(json.dumps({"invalid": True}), encoding="UTF-8")
+
+    # Check we run as if the cache did not exist, but also show an error message.
+    result = pytester.runpytest("--stepwise", "-v")
+    result.stdout.fnmatch_lines(
+        [
+            "stepwise: error reading cache, discarding (KeyError: *",
+            "stepwise: no previously failed tests, not skipping.",
+            "*::test_1 *PASSED*",
+            "* 1 passed in *",
+        ]
+    )