Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/1327.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix hang with `--dist=loadgroup` if a crashed worker is replaced.
6 changes: 6 additions & 0 deletions src/xdist/scheduler/loadscope.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,12 @@ def schedule(self) -> None:

# Initial distribution already happened, reschedule on all nodes
if self.collection is not None:
for node in self.nodes:
self._reschedule(node)
# Ensure nodes have at least two work units if possible,
# since workers need a "next item" before running the current one.
# (A restarted worker has no item before calling _reschedule()
# for the first time.)
for node in self.nodes:
self._reschedule(node)
return
Expand Down
21 changes: 21 additions & 0 deletions testing/acceptance_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,27 @@ def test_b(): pass
]
)

def test_loadgroup_does_not_hang_after_restart2(
self, pytester: pytest.Pytester
) -> None:
"""Fix test suite never finishing in case a worker has to be restarted
if there is still work to be done (#1327)."""
f = pytester.makepyfile(
"""
import os
def test_a(): os._exit(1)
def test_b(): pass
"""
)
res = pytester.runpytest(f, "-n1", "--dist=loadgroup")
res.stdout.fnmatch_lines(
[
"replacing crashed worker gw*",
"worker*crashed while running*",
"*5 failed*",
]
)

def test_max_worker_restart(self, pytester: pytest.Pytester) -> None:
f = pytester.makepyfile(
"""
Expand Down
Loading