summaryrefslogtreecommitdiff
path: root/net/sunrpc/xprtrdma/rpc_rdma.c
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2017-10-16 15:01:30 -0400
committerAnna Schumaker <Anna.Schumaker@Netapp.com>2017-11-17 13:47:54 -0500
commitd8f532d20ee43a0117284798d486bc4f98e3b196 (patch)
tree539a169db11e0e8fe33c5de882410622c8032991 /net/sunrpc/xprtrdma/rpc_rdma.c
parente1352c9610e3235f5e1b159038762d0c01c6ef36 (diff)
xprtrdma: Invoke rpcrdma_reply_handler directly from RECV completion
I noticed that the soft IRQ thread looked pretty busy under heavy I/O workloads. perf suggested one area that was expensive was the queue_work() call in rpcrdma_wc_receive. That gave me some ideas. Instead of scheduling a separate worker to process RPC Replies, promote the Receive completion handler to IB_POLL_WORKQUEUE, and invoke rpcrdma_reply_handler directly. Note that the poll workqueue is single-threaded. In order to keep memory invalidation from serializing all RPC Replies, handle any necessary invalidation tasks in a separate multi-threaded workqueue. This provides a two-tier scheme, similar to OS I/O interrupt handlers: A fast interrupt handler that schedules the slow handler and re-enables the interrupt, and a slower handler that is invoked for any needed heavy lifting. Benefits include: - One less context switch for RPCs that don't register memory - Receive completion handling is moved out of soft IRQ context to make room for other users of soft IRQ - The same CPU core now DMA syncs and XDR decodes the Receive buffer Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net/sunrpc/xprtrdma/rpc_rdma.c')
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c46
1 files changed, 28 insertions, 18 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 418bcc6b3e1d..430f8b5a8c43 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1265,16 +1265,36 @@ out_badheader:
goto out;
}
+/* Reply handling runs in the poll worker thread. Anything that
+ * might wait is deferred to a separate workqueue.
+ */
+void rpcrdma_deferred_completion(struct work_struct *work)
+{
+ struct rpcrdma_rep *rep =
+ container_of(work, struct rpcrdma_rep, rr_work);
+ struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
+ struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
+
+ /* Invalidate and unmap the data payloads before waking
+ * the waiting application. This guarantees the memory
+ * regions are properly fenced from the server before the
+ * application accesses the data. It also ensures proper
+ * send flow control: waking the next RPC waits until this
+ * RPC has relinquished all its Send Queue entries.
+ */
+ rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
+ r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, &req->rl_registered);
+
+ rpcrdma_complete_rqst(rep);
+}
+
/* Process received RPC/RDMA messages.
*
* Errors must result in the RPC task either being awakened, or
* allowed to timeout, to discover the errors at that time.
*/
-void
-rpcrdma_reply_handler(struct work_struct *work)
+void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
{
- struct rpcrdma_rep *rep =
- container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct rpcrdma_req *req;
@@ -1320,20 +1340,10 @@ rpcrdma_reply_handler(struct work_struct *work)
dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
__func__, rep, req, be32_to_cpu(rep->rr_xid));
- /* Invalidate and unmap the data payloads before waking the
- * waiting application. This guarantees the memory regions
- * are properly fenced from the server before the application
- * accesses the data. It also ensures proper send flow control:
- * waking the next RPC waits until this RPC has relinquished
- * all its Send Queue entries.
- */
- if (!list_empty(&req->rl_registered)) {
- rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
- r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
- &req->rl_registered);
- }
-
- rpcrdma_complete_rqst(rep);
+ if (list_empty(&req->rl_registered))
+ rpcrdma_complete_rqst(rep);
+ else
+ queue_work(rpcrdma_receive_wq, &rep->rr_work);
return;
out_badstatus: