nvme-tcp: serialize controller teardown sequences

In the timeout handler we may need to complete a request because the request that timed out may be an I/O that is a part of a serial sequence of controller teardown or initialization. In order to complete the request, we need to fence any other context that may compete with us and complete the request that is timing out. In this case, we could have a potential double completion in case a hard-irq or a different competing context triggered error recovery and is running inflight request cancellation concurrently with the timeout handler. Protect using a ctrl teardown_lock to serialize contexts that may complete a cancelled request due to error recovery or a reset. Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
author: Sagi Grimberg <sagi@grimberg.me> 2020-08-05 18:13:48 -0700
committer: Sagi Grimberg <sagi@grimberg.me> 2020-08-28 16:43:56 -0700
commit: d4d61470ae48838f49e668503e840e1520b97162 (patch)
tree: cc74032c552be563d8b81922466f411af59d0eed /drivers/nvme/host
parent: 7cf0d7c0f3c3b0203aaf81c1bc884924d8fdb9bd (diff)
1 files changed, 9 insertions, 2 deletions
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 3be4749dbf11..d58a6e2a4ab1 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -124,6 +124,7 @@ struct nvme_tcp_ctrl {
 	struct sockaddr_storage src_addr;
 	struct nvme_ctrl	ctrl;
 
+	struct mutex		teardown_lock;
 	struct work_struct	err_work;
 	struct delayed_work	connect_work;
 	struct nvme_tcp_request async_req;
@@ -1527,7 +1528,6 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
 
 	if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
 		return;
-
 	__nvme_tcp_stop_queue(queue);
 }
 
@@ -1875,6 +1875,7 @@ out_free_queue:
 static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
 		bool remove)
 {
+	mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
 	blk_mq_quiesce_queue(ctrl->admin_q);
 	nvme_tcp_stop_queue(ctrl, 0);
 	if (ctrl->admin_tagset) {
@@ -1885,13 +1886,16 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
 	if (remove)
 		blk_mq_unquiesce_queue(ctrl->admin_q);
 	nvme_tcp_destroy_admin_queue(ctrl, remove);
+	mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
 }
 
 static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
 		bool remove)
 {
+	mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
 	if (ctrl->queue_count <= 1)
-		return;
+		goto out;
+	blk_mq_quiesce_queue(ctrl->admin_q);
 	nvme_start_freeze(ctrl);
 	nvme_stop_queues(ctrl);
 	nvme_tcp_stop_io_queues(ctrl);
@@ -1903,6 +1907,8 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
 	if (remove)
 		nvme_start_queues(ctrl);
 	nvme_tcp_destroy_io_queues(ctrl, remove);
+out:
+	mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
 }
 
 static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
@@ -2423,6 +2429,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
 			nvme_tcp_reconnect_ctrl_work);
 	INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work);
 	INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work);
+	mutex_init(&ctrl->teardown_lock);
 
 	if (!(opts->mask & NVMF_OPT_TRSVCID)) {
 		opts->trsvcid =
author	Sagi Grimberg <sagi@grimberg.me>	2020-08-05 18:13:48 -0700
committer	Sagi Grimberg <sagi@grimberg.me>	2020-08-28 16:43:56 -0700
commit	d4d61470ae48838f49e668503e840e1520b97162 (patch)
tree	cc74032c552be563d8b81922466f411af59d0eed /drivers/nvme/host
parent	7cf0d7c0f3c3b0203aaf81c1bc884924d8fdb9bd (diff)