blk-mq: not embed .mq_kobj and ctx->kobj into queue instance

Even though .mq_kobj, ctx->kobj and q->kobj share same lifetime from block layer's view, actually they don't because userspace may grab one kobject anytime via sysfs. This patch fixes the issue by the following approach: 1) introduce 'struct blk_mq_ctxs' for holding .mq_kobj and managing all ctxs 2) free all allocated ctxs and the 'blk_mq_ctxs' instance in release handler of .mq_kobj 3) grab one ref of .mq_kobj before initializing each ctx->kobj, so that .mq_kobj is always released after all ctxs are freed. This patch fixes kernel panic issue during booting when DEBUG_KOBJECT_RELEASE is enabled. Reported-by: Guenter Roeck <linux@roeck-us.net> Cc: "jianchao.wang" <jianchao.w.wang@oracle.com> Tested-by: Guenter Roeck <linux@roeck-us.net> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Ming Lei <ming.lei@redhat.com> 2018-11-20 09:44:35 +0800
committer: Jens Axboe <axboe@kernel.dk> 2018-11-21 05:57:56 -0700
commit: 1db4909e76f64a85f4aaa187f0f683f5c85a471d (patch)
tree: ec08842eff41be0c4db21599a2a8954019335a30 /block/blk-mq.c
parent: 0c62bff1fd633774756be6d88d71002cd37615e0 (diff)
1 files changed, 32 insertions, 7 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 174384eaace7..b16204df65d1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2515,6 +2515,34 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
 	mutex_unlock(&set->tag_list_lock);
 }
 
+/* All allocations will be freed in release handler of q->mq_kobj */
+static int blk_mq_alloc_ctxs(struct request_queue *q)
+{
+	struct blk_mq_ctxs *ctxs;
+	int cpu;
+
+	ctxs = kzalloc(sizeof(*ctxs), GFP_KERNEL);
+	if (!ctxs)
+		return -ENOMEM;
+
+	ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx);
+	if (!ctxs->queue_ctx)
+		goto fail;
+
+	for_each_possible_cpu(cpu) {
+		struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu);
+		ctx->ctxs = ctxs;
+	}
+
+	q->mq_kobj = &ctxs->kobj;
+	q->queue_ctx = ctxs->queue_ctx;
+
+	return 0;
+ fail:
+	kfree(ctxs);
+	return -ENOMEM;
+}
+
 /*
  * It is the actual release handler for mq, but we do it from
  * request queue's release handler for avoiding use-after-free
@@ -2540,8 +2568,6 @@ void blk_mq_release(struct request_queue *q)
 	 * both share lifetime with request queue.
 	 */
 	blk_mq_sysfs_deinit(q);
-
-	free_percpu(q->queue_ctx);
 }
 
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
@@ -2731,8 +2757,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	if (!q->poll_cb)
 		goto err_exit;
 
-	q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
-	if (!q->queue_ctx)
+	if (blk_mq_alloc_ctxs(q))
 		goto err_exit;
 
 	/* init q->mq_kobj and sw queues' kobjects */
@@ -2742,7 +2767,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)),
 						GFP_KERNEL, set->numa_node);
 	if (!q->queue_hw_ctx)
-		goto err_percpu;
+		goto err_sys_init;
 
 	blk_mq_realloc_hw_ctxs(set, q);
 	if (!q->nr_hw_queues)
@@ -2794,8 +2819,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
 err_hctxs:
 	kfree(q->queue_hw_ctx);
-err_percpu:
-	free_percpu(q->queue_ctx);
+err_sys_init:
+	blk_mq_sysfs_deinit(q);
 err_exit:
 	q->mq_ops = NULL;
 	return ERR_PTR(-ENOMEM);
author	Ming Lei <ming.lei@redhat.com>	2018-11-20 09:44:35 +0800
committer	Jens Axboe <axboe@kernel.dk>	2018-11-21 05:57:56 -0700
commit	1db4909e76f64a85f4aaa187f0f683f5c85a471d (patch)
tree	ec08842eff41be0c4db21599a2a8954019335a30 /block/blk-mq.c
parent	0c62bff1fd633774756be6d88d71002cd37615e0 (diff)