Skip to content

Commit

Permalink
Fix potential goroutine leak.
Browse files Browse the repository at this point in the history
Race conditions in cleanup goroutines could cause them to fail. As they
are created every time a queue becomes non-empty, a single queue being
worked can cause multiple goroutines to spawn for cleanup, and they
could all get quite unlucky and never be able to perform their cleanup
(particularly if a task gets left in that queue for a while after
triggering multiple cleanup routines).

Taken across a large number of queues, this can be an issue and leak
memory unnecessarily. The implementation now recognizes that the
presence of the "dependents" semaphore makes synchronous cleanup a safe
procedure, simplifying logic and interactions between goroutines.
  • Loading branch information
shiblon committed Feb 10, 2022
1 parent 0f27f5b commit 884d60d
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 47 deletions.
78 changes: 31 additions & 47 deletions backend/eqmem/eqmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -830,59 +830,34 @@ func (m *EQMem) locksForQueues(qs []string) []*qLock {
return locks
}

// Get a single lock for a queue, creating it if it doesn't exist. If newly
// created, a cleanup goroutine is also launched in the background to handle
// empty queues. Dependents are incremented here.
func (m *EQMem) lockForQueueUnsafe(q string) *qLock {
ql := m.locksSuperUnsafe[q]
ts := m.queues[q]

if (ts == nil) != (ql == nil) {
// Get a single lock for a queue, creating it if it doesn't exist. Dependents
// are incremented here.
func (m *EQMem) lockForQueueUnsafe(q string) (ql *qLock) {
// Always increment dependents, whether we exit early from finding a lock,
// or late from creating a new queue.
defer func() {
ql.dependents++
}()

ql = m.locksSuperUnsafe[q]

if ts := m.queues[q]; (ts == nil) != (ql == nil) {
log.Fatalf("Queue tasks and lock structures out of step for queue %q: ts=%v, ql=%v", q, ts, ql)
}

if ql != nil {
ql.dependents++
return ql
}

if ts == nil || ql == nil {
ts = newTaskQueue(q)
m.queues[q] = ts

ql = &qLock{
queue: q,
heap: newClaimHeap(),
tasks: ts,
dependents: 1, // someone asked for it, mark it up.
}
m.locksSuperUnsafe[q] = ql

// Since we're creating a new queue lock, we create its goroutine to
// clean it up when its dependents have gone to zero. We use only the global lock
// here to avoid nested locks (global and local). This means that the
// decrement must also be done while holding *only* the global lock.
go func() {
for {
time.Sleep(5 * time.Second)
done := func() bool {
defer un(lock(m))
// Empty queue and nobody leaning on the lock, delete everything.
// Then indicate that we're finished so the goroutine can
// exit for this queue.
if m.queues[q].Len() == 0 && m.locksSuperUnsafe[q].dependents == 0 {
delete(m.queues, q)
delete(m.locksSuperUnsafe, q)
return true
}
return false
}()
if done {
return
}
}
}()
ts := newTaskQueue(q)
m.queues[q] = ts

ql = &qLock{
queue: q,
heap: newClaimHeap(),
tasks: ts,
}
m.locksSuperUnsafe[q] = ql

return ql
}
Expand All @@ -908,10 +883,19 @@ func (m *EQMem) lockQueues(qs []string) ([]*qLock, func()) {
qls[i].Unlock()
}
// Now that we're unlocked, take the global lock again and reduce
// dependents by 1, in reverse order.
// dependents by 1, in reverse order, then try to clean up if
// dependents go to zero anywhere with empty queues. If it fails, it
// simply exits; something else needed the lock to stay alive betwen
// lock acquisitions, so cleanup will occur later.
defer un(lock(m))
for i := len(qls) - 1; i >= 0; i-- {
qls[i].dependents--
ql := qls[i]
ql.dependents--

if ts := m.queues[ql.queue]; ql.dependents == 0 && ts.Len() == 0 {
delete(m.queues, ql.queue)
delete(m.locksSuperUnsafe, ql.queue)
}
}
}
}
3 changes: 3 additions & 0 deletions backend/eqmem/taskqueue.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ func (s *taskQueue) Update(id uuid.UUID, f func(*entroq.Task) *entroq.Task) erro

// Len returns the current size of this task store.
func (s *taskQueue) Len() int {
if s == nil {
return 0
}
defer un(lock(s))
return s.size
}
Expand Down
13 changes: 13 additions & 0 deletions qsvc/qtest/qtest.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,19 @@ func SimpleChange(ctx context.Context, t *testing.T, client *entroq.EntroQ, qPre
func SimpleWorker(ctx context.Context, t *testing.T, client *entroq.EntroQ, qPrefix string) {
queue := path.Join(qPrefix, "simple_worker")

attempts := 30
if testing.Short() {
attempts = 5
}
for i := 0; i < attempts; i++ {
q := fmt.Sprintf("%s_%d", queue, i)
simpleWorkerOnce(ctx, t, client, q)
}
}

func simpleWorkerOnce(ctx context.Context, t *testing.T, client *entroq.EntroQ, queue string) {
t.Helper()

const numTasks = 10

showQueue := func() {
Expand Down

0 comments on commit 884d60d

Please sign in to comment.