io_uring: use signal based task_work running

Since 5.7, we've been using task_work to trigger async running of requests in the context of the original task. This generally works great, but there's a case where if the task is currently blocked in the kernel waiting on a condition to become true, it won't process task_work. Even though the task is woken, it just checks whatever condition it's waiting on, and goes back to sleep if it's still false. This is a problem if that very condition only becomes true when that task_work is run. An example of that is the task registering an eventfd with io_uring, and it's now blocked waiting on an eventfd read. That read could depend on a completion event, and that completion event won't get trigged until task_work has been run. Use the TWA_SIGNAL notification for task_work, so that we ensure that the task always runs the work when queued. Cc: stable@vger.kernel.org # v5.7 Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Jens Axboe <axboe@kernel.dk> 2020-06-30 12:39:05 -0600
committer: Jens Axboe <axboe@kernel.dk> 2020-06-30 12:39:05 -0600
commit: ce593a6c480a22acba08795be313c0c6d49dd35d (patch)
tree: a31b7d4e964ada0b9b768d405507c699dfef91b4 /fs
parent: e91b48162332480f5840902268108bb7fb7a44c7 (diff)
1 files changed, 24 insertions, 8 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e507737f044e..700644a016a7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4072,6 +4072,21 @@ struct io_poll_table {
 	int error;
 };
 
+static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb,
+				int notify)
+{
+	struct task_struct *tsk = req->task;
+	int ret;
+
+	if (req->ctx->flags & IORING_SETUP_SQPOLL)
+		notify = 0;
+
+	ret = task_work_add(tsk, cb, notify);
+	if (!ret)
+		wake_up_process(tsk);
+	return ret;
+}
+
 static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
 			   __poll_t mask, task_work_func_t func)
 {
@@ -4095,13 +4110,13 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
 	 * of executing it. We can't safely execute it anyway, as we may not
 	 * have the needed state needed for it anyway.
 	 */
-	ret = task_work_add(tsk, &req->task_work, true);
+	ret = io_req_task_work_add(req, &req->task_work, TWA_SIGNAL);
 	if (unlikely(ret)) {
 		WRITE_ONCE(poll->canceled, true);
 		tsk = io_wq_get_task(req->ctx->io_wq);
-		task_work_add(tsk, &req->task_work, true);
+		task_work_add(tsk, &req->task_work, 0);
+		wake_up_process(tsk);
 	}
-	wake_up_process(tsk);
 	return 1;
 }
 
@@ -6182,19 +6197,20 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 	do {
 		prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
 						TASK_INTERRUPTIBLE);
+		/* make sure we run task_work before checking for signals */
 		if (current->task_works)
 			task_work_run();
-		if (io_should_wake(&iowq, false))
-			break;
-		schedule();
 		if (signal_pending(current)) {
-			ret = -EINTR;
+			ret = -ERESTARTSYS;
 			break;
 		}
+		if (io_should_wake(&iowq, false))
+			break;
+		schedule();
 	} while (1);
 	finish_wait(&ctx->wait, &iowq.wq);
 
-	restore_saved_sigmask_unless(ret == -EINTR);
+	restore_saved_sigmask_unless(ret == -ERESTARTSYS);
 
 	return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
 }
author	Jens Axboe <axboe@kernel.dk>	2020-06-30 12:39:05 -0600
committer	Jens Axboe <axboe@kernel.dk>	2020-06-30 12:39:05 -0600
commit	ce593a6c480a22acba08795be313c0c6d49dd35d (patch)
tree	a31b7d4e964ada0b9b768d405507c699dfef91b4 /fs
parent	e91b48162332480f5840902268108bb7fb7a44c7 (diff)