Skip to content

Commit 07878eb

Browse files
Fix lock ordering issue for rb_ractor_sched_wait() and rb_ractor_sched_wakeup()
In rb_ractor_sched_wait() (ex: Ractor.receive), we acquire RACTOR_LOCK(cr) and then thread_sched_lock(cur_th). However, on wakeup if we're a dnt, in thread_sched_wait_running_turn() we acquire thread_sched_lock(cur_th) after condvar wakeup and then RACTOR_LOCK(cr). This lock inversion can cause a deadlock with rb_ractor_wakeup_all() (ex: port.send(obj)), where we acquire RACTOR_LOCK(other_r) and then thread_sched_lock(other_th). So, the error happens: nt 1: Ractor.receive rb_ractor_sched_wait() after condvar wakeup in thread_sched_wait_running_turn(): - thread_sched_lock(cur_th) (condvar) # acquires lock - rb_ractor_lock_self(cr) # deadlock here: tries to acquire, HANGS nt 2: port.send ractor_wakeup_all() - RACTOR_LOCK(port_r) # acquires lock - thread_sched_lock # tries to acquire, HANGS To fix it, we now unlock the thread_sched_lock before acquiring the ractor_lock in rb_ractor_sched_wait(). Script that reproduces issue: ```ruby require "async" class RactorWrapper def initialize @Ractor = Ractor.new do Ractor.recv # Ractor doesn't start until explicitly told to # Do some calculations fib = ->(x) { x < 2 ? 1 : fib.call(x - 1) + fib.call(x - 2) } fib.call(20) end end def take_async @ractor.send(nil) Thread.new { @ractor.value }.value end end Async do |task| 10_000.times do |i| task.async do RactorWrapper.new.take_async puts i end end end exit 0 ``` Fixes [Bug #21398] Co-authored-by: John Hawthorn <[email protected]>
1 parent e639e5f commit 07878eb

File tree

2 files changed

+44
-8
lines changed

2 files changed

+44
-8
lines changed

test/ruby/test_ractor.rb

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,45 @@ def test_require_non_string
162162
RUBY
163163
end
164164

165+
# [Bug #21398]
166+
def test_port_receive_dnt_with_port_send
167+
assert_ractor(<<~'RUBY', timeout: 30)
168+
THREADS = 10
169+
JOBS_PER_THREAD = 50
170+
ARRAY_SIZE = 20_000
171+
def ractor_job(job_count, array_size)
172+
port = Ractor::Port.new
173+
workers = (1..4).map do |i|
174+
Ractor.new(port) do |job_port|
175+
while job = Ractor.receive
176+
result = job.map { |x| x * 2 }.sum
177+
job_port.send result
178+
end
179+
end
180+
end
181+
jobs = Array.new(job_count) { Array.new(array_size) { rand(1000) } }
182+
jobs.each_with_index do |job, i|
183+
w_idx = i % 4
184+
workers[w_idx].send(job)
185+
end
186+
results = []
187+
jobs.size.times do
188+
result = port.receive # dnt receive
189+
results << result
190+
end
191+
results
192+
end
193+
threads = []
194+
# creates 40 ractors (THREADSx4)
195+
THREADS.times do
196+
threads << Thread.new do
197+
ractor_job(JOBS_PER_THREAD, ARRAY_SIZE)
198+
end
199+
end
200+
threads.each(&:join)
201+
RUBY
202+
end
203+
165204
def assert_make_shareable(obj)
166205
refute Ractor.shareable?(obj), "object was already shareable"
167206
Ractor.make_shareable(obj)

thread_pthread.c

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,23 +1351,20 @@ rb_ractor_sched_wait(rb_execution_context_t *ec, rb_ractor_t *cr, rb_unblock_fun
13511351
}
13521352

13531353
thread_sched_lock(sched, th);
1354+
rb_ractor_unlock_self(cr);
13541355
{
13551356
// setup sleep
13561357
bool can_direct_transfer = !th_has_dedicated_nt(th);
13571358
RB_VM_SAVE_MACHINE_CONTEXT(th);
13581359
th->status = THREAD_STOPPED_FOREVER;
13591360
RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_SUSPENDED, th);
13601361
thread_sched_wakeup_next_thread(sched, th, can_direct_transfer);
1361-
1362-
rb_ractor_unlock_self(cr);
1363-
{
1364-
// sleep
1365-
thread_sched_wait_running_turn(sched, th, can_direct_transfer);
1366-
th->status = THREAD_RUNNABLE;
1367-
}
1368-
rb_ractor_lock_self(cr);
1362+
// sleep
1363+
thread_sched_wait_running_turn(sched, th, can_direct_transfer);
1364+
th->status = THREAD_RUNNABLE;
13691365
}
13701366
thread_sched_unlock(sched, th);
1367+
rb_ractor_lock_self(cr);
13711368

13721369
ubf_clear(th);
13731370

0 commit comments

Comments
 (0)