Skip to content

Commit baa9dbe

Browse files
htejungregkh
authored andcommitted
blkcg: fix use-after-free in __blkg_release_rcu() by making blkcg_gq refcnt an atomic_t
commit a5049a8 upstream. Hello, So, this patch should do. Joe, Vivek, can one of you guys please verify that the oops goes away with this patch? Jens, the original thread can be read at http://thread.gmane.org/gmane.linux.kernel/1720729 The fix converts blkg->refcnt from int to atomic_t. It does some overhead but it should be minute compared to everything else which is going on and the involved cacheline bouncing, so I think it's highly unlikely to cause any noticeable difference. Also, the refcnt in question should be converted to a perpcu_ref for blk-mq anyway, so the atomic_t is likely to go away pretty soon anyway. Thanks. ------- 8< ------- __blkg_release_rcu() may be invoked after the associated request_queue is released with a RCU grace period inbetween. As such, the function and callbacks invoked from it must not dereference the associated request_queue. This is clearly indicated in the comment above the function. Unfortunately, while trying to fix a different issue, 2a4fd07 ("blkcg: move bulk of blkcg_gq release operations to the RCU callback") ignored this and added [un]locking of @blkg->q->queue_lock to __blkg_release_rcu(). This of course can cause oops as the request_queue may be long gone by the time this code gets executed. general protection fault: 0000 [#1] SMP CPU: 21 PID: 30 Comm: rcuos/21 Not tainted 3.15.0 #1 Hardware name: Stratus ftServer 6400/G7LAZ, BIOS BIOS Version 6.3:57 12/25/2013 task: ffff880854021de0 ti: ffff88085403c000 task.ti: ffff88085403c000 RIP: 0010:[<ffffffff8162e9e5>] [<ffffffff8162e9e5>] _raw_spin_lock_irq+0x15/0x60 RSP: 0018:ffff88085403fdf0 EFLAGS: 00010086 RAX: 0000000000020000 RBX: 0000000000000010 RCX: 0000000000000000 RDX: 000060ef80008248 RSI: 0000000000000286 RDI: 6b6b6b6b6b6b6b6b RBP: ffff88085403fdf0 R08: 0000000000000286 R09: 0000000000009f39 R10: 0000000000020001 R11: 0000000000020001 R12: ffff88103c17a130 R13: ffff88103c17a080 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88107fca0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000006e5ab8 CR3: 000000000193d000 CR4: 00000000000407e0 Stack: ffff88085403fe18 ffffffff812cbfc2 ffff88103c17a130 0000000000000000 ffff88103c17a130 ffff88085403fec0 ffffffff810d1d28 ffff880854021de0 ffff880854021de0 ffff88107fcaec58 ffff88085403fe80 ffff88107fcaec30 Call Trace: [<ffffffff812cbfc2>] __blkg_release_rcu+0x72/0x150 [<ffffffff810d1d28>] rcu_nocb_kthread+0x1e8/0x300 [<ffffffff81091d81>] kthread+0xe1/0x100 [<ffffffff8163813c>] ret_from_fork+0x7c/0xb0 Code: ff 47 04 48 8b 7d 08 be 00 02 00 00 e8 55 48 a4 ff 5d c3 0f 1f 00 66 66 66 66 90 55 48 89 e5 +fa 66 66 90 66 66 90 b8 00 00 02 00 <f0> 0f c1 07 89 c2 c1 ea 10 66 39 c2 75 02 5d c3 83 e2 fe 0f +b7 RIP [<ffffffff8162e9e5>] _raw_spin_lock_irq+0x15/0x60 RSP <ffff88085403fdf0> The request_queue locking was added because blkcg_gq->refcnt is an int protected with the queue lock and __blkg_release_rcu() needs to put the parent. Let's fix it by making blkcg_gq->refcnt an atomic_t and dropping queue locking in the function. Given the general heavy weight of the current request_queue and blkcg operations, this is unlikely to cause any noticeable overhead. Moreover, blkcg_gq->refcnt is likely to be converted to percpu_ref in the near future, so whatever (most likely negligible) overhead it may add is temporary. Signed-off-by: Tejun Heo <[email protected]> Reported-by: Joe Lawrence <[email protected]> Acked-by: Vivek Goyal <[email protected]> Link: http://lkml.kernel.org/g/[email protected] Signed-off-by: Jens Axboe <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent c943406 commit baa9dbe

File tree

2 files changed

+9
-15
lines changed

2 files changed

+9
-15
lines changed

block/blk-cgroup.c

+2-5
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
8080
blkg->q = q;
8181
INIT_LIST_HEAD(&blkg->q_node);
8282
blkg->blkcg = blkcg;
83-
blkg->refcnt = 1;
83+
atomic_set(&blkg->refcnt, 1);
8484

8585
/* root blkg uses @q->root_rl, init rl only for !root blkgs */
8686
if (blkcg != &blkcg_root) {
@@ -399,11 +399,8 @@ void __blkg_release_rcu(struct rcu_head *rcu_head)
399399

400400
/* release the blkcg and parent blkg refs this blkg has been holding */
401401
css_put(&blkg->blkcg->css);
402-
if (blkg->parent) {
403-
spin_lock_irq(blkg->q->queue_lock);
402+
if (blkg->parent)
404403
blkg_put(blkg->parent);
405-
spin_unlock_irq(blkg->q->queue_lock);
406-
}
407404

408405
blkg_free(blkg);
409406
}

block/blk-cgroup.h

+7-10
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <linux/seq_file.h>
1919
#include <linux/radix-tree.h>
2020
#include <linux/blkdev.h>
21+
#include <linux/atomic.h>
2122

2223
/* Max limits for throttle policy */
2324
#define THROTL_IOPS_MAX UINT_MAX
@@ -104,7 +105,7 @@ struct blkcg_gq {
104105
struct request_list rl;
105106

106107
/* reference count */
107-
int refcnt;
108+
atomic_t refcnt;
108109

109110
/* is this blkg online? protected by both blkcg and q locks */
110111
bool online;
@@ -253,28 +254,24 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
253254
* blkg_get - get a blkg reference
254255
* @blkg: blkg to get
255256
*
256-
* The caller should be holding queue_lock and an existing reference.
257+
* The caller should be holding an existing reference.
257258
*/
258259
static inline void blkg_get(struct blkcg_gq *blkg)
259260
{
260-
lockdep_assert_held(blkg->q->queue_lock);
261-
WARN_ON_ONCE(!blkg->refcnt);
262-
blkg->refcnt++;
261+
WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
262+
atomic_inc(&blkg->refcnt);
263263
}
264264

265265
void __blkg_release_rcu(struct rcu_head *rcu);
266266

267267
/**
268268
* blkg_put - put a blkg reference
269269
* @blkg: blkg to put
270-
*
271-
* The caller should be holding queue_lock.
272270
*/
273271
static inline void blkg_put(struct blkcg_gq *blkg)
274272
{
275-
lockdep_assert_held(blkg->q->queue_lock);
276-
WARN_ON_ONCE(blkg->refcnt <= 0);
277-
if (!--blkg->refcnt)
273+
WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
274+
if (atomic_dec_and_test(&blkg->refcnt))
278275
call_rcu(&blkg->rcu_head, __blkg_release_rcu);
279276
}
280277

0 commit comments

Comments
 (0)