diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/sunrpc/svc_rdma.h | 95 | ||||
-rw-r--r-- | include/trace/events/rpcrdma.h | 584 |
2 files changed, 632 insertions, 47 deletions
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7337e1221590..fd78f78df5c6 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * @@ -70,37 +71,16 @@ extern atomic_t rdma_stat_rq_prod; extern atomic_t rdma_stat_sq_poll; extern atomic_t rdma_stat_sq_prod; -/* - * Contexts are built when an RDMA request is created and are a - * record of the resources that can be recovered when the request - * completes. - */ -struct svc_rdma_op_ctxt { - struct list_head list; - struct xdr_buf arg; - struct ib_cqe cqe; - u32 byte_len; - struct svcxprt_rdma *xprt; - enum dma_data_direction direction; - int count; - unsigned int mapped_sges; - int hdr_count; - struct ib_send_wr send_wr; - struct ib_sge sge[1 + RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE]; - struct page *pages[RPCSVC_MAXPAGES]; -}; - struct svcxprt_rdma { struct svc_xprt sc_xprt; /* SVC transport structure */ struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ struct list_head sc_accept_q; /* Conn. waiting accept */ int sc_ord; /* RDMA read limit */ - int sc_max_sge; + int sc_max_send_sges; bool sc_snd_w_inv; /* OK to use Send With Invalidate */ atomic_t sc_sq_avail; /* SQEs ready to be consumed */ unsigned int sc_sq_depth; /* Depth of SQ */ - unsigned int sc_rq_depth; /* Depth of RQ */ __be32 sc_fc_credits; /* Forward credits */ u32 sc_max_requests; /* Max requests */ u32 sc_max_bc_requests;/* Backward credits */ @@ -109,9 +89,8 @@ struct svcxprt_rdma { struct ib_pd *sc_pd; - spinlock_t sc_ctxt_lock; - struct list_head sc_ctxts; - int sc_ctxt_used; + spinlock_t sc_send_lock; + struct list_head sc_send_ctxts; spinlock_t sc_rw_ctxt_lock; struct list_head sc_rw_ctxts; @@ -127,6 +106,9 @@ struct svcxprt_rdma { unsigned long sc_flags; struct list_head sc_read_complete_q; struct work_struct sc_work; + + spinlock_t sc_recv_lock; + struct list_head sc_recv_ctxts; }; /* sc_flags */ #define RDMAXPRT_CONN_PENDING 3 @@ -141,12 +123,30 @@ struct svcxprt_rdma { #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD -/* Track DMA maps for this transport and context */ -static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma, - struct svc_rdma_op_ctxt *ctxt) -{ - ctxt->mapped_sges++; -} +struct svc_rdma_recv_ctxt { + struct list_head rc_list; + struct ib_recv_wr rc_recv_wr; + struct ib_cqe rc_cqe; + struct ib_sge rc_recv_sge; + void *rc_recv_buf; + struct xdr_buf rc_arg; + bool rc_temp; + u32 rc_byte_len; + unsigned int rc_page_count; + unsigned int rc_hdr_count; + struct page *rc_pages[RPCSVC_MAXPAGES]; +}; + +struct svc_rdma_send_ctxt { + struct list_head sc_list; + struct ib_send_wr sc_send_wr; + struct ib_cqe sc_cqe; + void *sc_xprt_buf; + int sc_page_count; + int sc_cur_sge_no; + struct page *sc_pages[RPCSVC_MAXPAGES]; + struct ib_sge sc_sges[]; +}; /* svc_rdma_backchannel.c */ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, @@ -154,13 +154,18 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct xdr_buf *rcvbuf); /* svc_rdma_recvfrom.c */ +extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma); +extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma); +extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, + struct svc_rdma_recv_ctxt *ctxt); +extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma); extern int svc_rdma_recvfrom(struct svc_rqst *); /* svc_rdma_rw.c */ extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, __be32 *p); + struct svc_rdma_recv_ctxt *head, __be32 *p); extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch, struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, @@ -168,24 +173,22 @@ extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, struct xdr_buf *xdr); /* svc_rdma_sendto.c */ -extern int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma, - struct svc_rdma_op_ctxt *ctxt, - __be32 *rdma_resp, unsigned int len); -extern int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma, - struct svc_rdma_op_ctxt *ctxt, - int num_sge, u32 inv_rkey); +extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma); +extern struct svc_rdma_send_ctxt * + svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); +extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr); +extern void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt, + unsigned int len); +extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt, + struct xdr_buf *xdr, __be32 *wr_lst); extern int svc_rdma_sendto(struct svc_rqst *); /* svc_rdma_transport.c */ -extern void svc_rdma_wc_send(struct ib_cq *, struct ib_wc *); -extern void svc_rdma_wc_reg(struct ib_cq *, struct ib_wc *); -extern void svc_rdma_wc_read(struct ib_cq *, struct ib_wc *); -extern void svc_rdma_wc_inv(struct ib_cq *, struct ib_wc *); -extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); -extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); -extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); -extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); extern void svc_sq_reap(struct svcxprt_rdma *); extern void svc_rq_reap(struct svcxprt_rdma *); extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 50ed3f8bf534..094a676d92a7 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (c) 2017 Oracle. All rights reserved. + * Copyright (c) 2017, 2018 Oracle. All rights reserved. + * + * Trace point definitions for the "rpcrdma" subsystem. */ #undef TRACE_SYSTEM #define TRACE_SYSTEM rpcrdma @@ -885,6 +887,586 @@ TRACE_EVENT(xprtrdma_cb_setup, DEFINE_CB_EVENT(xprtrdma_cb_call); DEFINE_CB_EVENT(xprtrdma_cb_reply); +/** + ** Server-side RPC/RDMA events + **/ + +DECLARE_EVENT_CLASS(svcrdma_xprt_event, + TP_PROTO( + const struct svc_xprt *xprt + ), + + TP_ARGS(xprt), + + TP_STRUCT__entry( + __field(const void *, xprt) + __string(addr, xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->xprt = xprt; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("xprt=%p addr=%s", + __entry->xprt, __get_str(addr) + ) +); + +#define DEFINE_XPRT_EVENT(name) \ + DEFINE_EVENT(svcrdma_xprt_event, svcrdma_xprt_##name, \ + TP_PROTO( \ + const struct svc_xprt *xprt \ + ), \ + TP_ARGS(xprt)) + +DEFINE_XPRT_EVENT(accept); +DEFINE_XPRT_EVENT(fail); +DEFINE_XPRT_EVENT(free); + +TRACE_DEFINE_ENUM(RDMA_MSG); +TRACE_DEFINE_ENUM(RDMA_NOMSG); +TRACE_DEFINE_ENUM(RDMA_MSGP); +TRACE_DEFINE_ENUM(RDMA_DONE); +TRACE_DEFINE_ENUM(RDMA_ERROR); + +#define show_rpcrdma_proc(x) \ + __print_symbolic(x, \ + { RDMA_MSG, "RDMA_MSG" }, \ + { RDMA_NOMSG, "RDMA_NOMSG" }, \ + { RDMA_MSGP, "RDMA_MSGP" }, \ + { RDMA_DONE, "RDMA_DONE" }, \ + { RDMA_ERROR, "RDMA_ERROR" }) + +TRACE_EVENT(svcrdma_decode_rqst, + TP_PROTO( + __be32 *p, + unsigned int hdrlen + ), + + TP_ARGS(p, hdrlen), + + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, vers) + __field(u32, proc) + __field(u32, credits) + __field(unsigned int, hdrlen) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpup(p++); + __entry->vers = be32_to_cpup(p++); + __entry->credits = be32_to_cpup(p++); + __entry->proc = be32_to_cpup(p); + __entry->hdrlen = hdrlen; + ), + + TP_printk("xid=0x%08x vers=%u credits=%u proc=%s hdrlen=%u", + __entry->xid, __entry->vers, __entry->credits, + show_rpcrdma_proc(__entry->proc), __entry->hdrlen) +); + +TRACE_EVENT(svcrdma_decode_short, + TP_PROTO( + unsigned int hdrlen + ), + + TP_ARGS(hdrlen), + + TP_STRUCT__entry( + __field(unsigned int, hdrlen) + ), + + TP_fast_assign( + __entry->hdrlen = hdrlen; + ), + + TP_printk("hdrlen=%u", __entry->hdrlen) +); + +DECLARE_EVENT_CLASS(svcrdma_badreq_event, + TP_PROTO( + __be32 *p + ), + + TP_ARGS(p), + + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, vers) + __field(u32, proc) + __field(u32, credits) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpup(p++); + __entry->vers = be32_to_cpup(p++); + __entry->credits = be32_to_cpup(p++); + __entry->proc = be32_to_cpup(p); + ), + + TP_printk("xid=0x%08x vers=%u credits=%u proc=%u", + __entry->xid, __entry->vers, __entry->credits, __entry->proc) +); + +#define DEFINE_BADREQ_EVENT(name) \ + DEFINE_EVENT(svcrdma_badreq_event, svcrdma_decode_##name,\ + TP_PROTO( \ + __be32 *p \ + ), \ + TP_ARGS(p)) + +DEFINE_BADREQ_EVENT(badvers); +DEFINE_BADREQ_EVENT(drop); +DEFINE_BADREQ_EVENT(badproc); +DEFINE_BADREQ_EVENT(parse); + +DECLARE_EVENT_CLASS(svcrdma_segment_event, + TP_PROTO( + u32 handle, + u32 length, + u64 offset + ), + + TP_ARGS(handle, length, offset), + + TP_STRUCT__entry( + __field(u32, handle) + __field(u32, length) + __field(u64, offset) + ), + + TP_fast_assign( + __entry->handle = handle; + __entry->length = length; + __entry->offset = offset; + ), + + TP_printk("%u@0x%016llx:0x%08x", + __entry->length, (unsigned long long)__entry->offset, + __entry->handle + ) +); + +#define DEFINE_SEGMENT_EVENT(name) \ + DEFINE_EVENT(svcrdma_segment_event, svcrdma_encode_##name,\ + TP_PROTO( \ + u32 handle, \ + u32 length, \ + u64 offset \ + ), \ + TP_ARGS(handle, length, offset)) + +DEFINE_SEGMENT_EVENT(rseg); +DEFINE_SEGMENT_EVENT(wseg); + +DECLARE_EVENT_CLASS(svcrdma_chunk_event, + TP_PROTO( + u32 length + ), + + TP_ARGS(length), + + TP_STRUCT__entry( + __field(u32, length) + ), + + TP_fast_assign( + __entry->length = length; + ), + + TP_printk("length=%u", + __entry->length + ) +); + +#define DEFINE_CHUNK_EVENT(name) \ + DEFINE_EVENT(svcrdma_chunk_event, svcrdma_encode_##name,\ + TP_PROTO( \ + u32 length \ + ), \ + TP_ARGS(length)) + +DEFINE_CHUNK_EVENT(pzr); +DEFINE_CHUNK_EVENT(write); +DEFINE_CHUNK_EVENT(reply); + +TRACE_EVENT(svcrdma_encode_read, + TP_PROTO( + u32 length, + u32 position + ), + + TP_ARGS(length, position), + + TP_STRUCT__entry( + __field(u32, length) + __field(u32, position) + ), + + TP_fast_assign( + __entry->length = length; + __entry->position = position; + ), + + TP_printk("length=%u position=%u", + __entry->length, __entry->position + ) +); + +DECLARE_EVENT_CLASS(svcrdma_error_event, + TP_PROTO( + __be32 xid + ), + + TP_ARGS(xid), + + TP_STRUCT__entry( + __field(u32, xid) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(xid); + ), + + TP_printk("xid=0x%08x", + __entry->xid + ) +); + +#define DEFINE_ERROR_EVENT(name) \ + DEFINE_EVENT(svcrdma_error_event, svcrdma_err_##name, \ + TP_PROTO( \ + __be32 xid \ + ), \ + TP_ARGS(xid)) + +DEFINE_ERROR_EVENT(vers); +DEFINE_ERROR_EVENT(chunk); + +/** + ** Server-side RDMA API events + **/ + +TRACE_EVENT(svcrdma_dma_map_page, + TP_PROTO( + const struct svcxprt_rdma *rdma, + const void *page + ), + + TP_ARGS(rdma, page), + + TP_STRUCT__entry( + __field(const void *, page); + __string(device, rdma->sc_cm_id->device->name) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->page = page; + __assign_str(device, rdma->sc_cm_id->device->name); + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s device=%s page=%p", + __get_str(addr), __get_str(device), __entry->page + ) +); + +TRACE_EVENT(svcrdma_dma_map_rwctx, + TP_PROTO( + const struct svcxprt_rdma *rdma, + int status + ), + + TP_ARGS(rdma, status), + + TP_STRUCT__entry( + __field(int, status) + __string(device, rdma->sc_cm_id->device->name) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->status = status; + __assign_str(device, rdma->sc_cm_id->device->name); + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s device=%s status=%d", + __get_str(addr), __get_str(device), __entry->status + ) +); + +TRACE_EVENT(svcrdma_send_failed, + TP_PROTO( + const struct svc_rqst *rqst, + int status + ), + + TP_ARGS(rqst, status), + + TP_STRUCT__entry( + __field(int, status) + __field(u32, xid) + __field(const void *, xprt) + __string(addr, rqst->rq_xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->status = status; + __entry->xid = __be32_to_cpu(rqst->rq_xid); + __entry->xprt = rqst->rq_xprt; + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); + ), + + TP_printk("xprt=%p addr=%s xid=0x%08x status=%d", + __entry->xprt, __get_str(addr), + __entry->xid, __entry->status + ) +); + +DECLARE_EVENT_CLASS(svcrdma_sendcomp_event, + TP_PROTO( + const struct ib_wc *wc + ), + + TP_ARGS(wc), + + TP_STRUCT__entry( + __field(const void *, cqe) + __field(unsigned int, status) + __field(unsigned int, vendor_err) + ), + + TP_fast_assign( + __entry->cqe = wc->wr_cqe; + __entry->status = wc->status; + if (wc->status) + __entry->vendor_err = wc->vendor_err; + else + __entry->vendor_err = 0; + ), + + TP_printk("cqe=%p status=%s (%u/0x%x)", + __entry->cqe, rdma_show_wc_status(__entry->status), + __entry->status, __entry->vendor_err + ) +); + +#define DEFINE_SENDCOMP_EVENT(name) \ + DEFINE_EVENT(svcrdma_sendcomp_event, svcrdma_wc_##name, \ + TP_PROTO( \ + const struct ib_wc *wc \ + ), \ + TP_ARGS(wc)) + +TRACE_EVENT(svcrdma_post_send, + TP_PROTO( + const struct ib_send_wr *wr, + int status + ), + + TP_ARGS(wr, status), + + TP_STRUCT__entry( + __field(const void *, cqe) + __field(unsigned int, num_sge) + __field(u32, inv_rkey) + __field(int, status) + ), + + TP_fast_assign( + __entry->cqe = wr->wr_cqe; + __entry->num_sge = wr->num_sge; + __entry->inv_rkey = (wr->opcode == IB_WR_SEND_WITH_INV) ? + wr->ex.invalidate_rkey : 0; + __entry->status = status; + ), + + TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x status=%d", + __entry->cqe, __entry->num_sge, + __entry->inv_rkey, __entry->status + ) +); + +DEFINE_SENDCOMP_EVENT(send); + +TRACE_EVENT(svcrdma_post_recv, + TP_PROTO( + const struct ib_recv_wr *wr, + int status + ), + + TP_ARGS(wr, status), + + TP_STRUCT__entry( + __field(const void *, cqe) + __field(int, status) + ), + + TP_fast_assign( + __entry->cqe = wr->wr_cqe; + __entry->status = status; + ), + + TP_printk("cqe=%p status=%d", + __entry->cqe, __entry->status + ) +); + +TRACE_EVENT(svcrdma_wc_receive, + TP_PROTO( + const struct ib_wc *wc + ), + + TP_ARGS(wc), + + TP_STRUCT__entry( + __field(const void *, cqe) + __field(u32, byte_len) + __field(unsigned int, status) + __field(u32, vendor_err) + ), + + TP_fast_assign( + __entry->cqe = wc->wr_cqe; + __entry->status = wc->status; + if (wc->status) { + __entry->byte_len = 0; + __entry->vendor_err = wc->vendor_err; + } else { + __entry->byte_len = wc->byte_len; + __entry->vendor_err = 0; + } + ), + + TP_printk("cqe=%p byte_len=%u status=%s (%u/0x%x)", + __entry->cqe, __entry->byte_len, + rdma_show_wc_status(__entry->status), + __entry->status, __entry->vendor_err + ) +); + +TRACE_EVENT(svcrdma_post_rw, + TP_PROTO( + const void *cqe, + int sqecount, + int status + ), + + TP_ARGS(cqe, sqecount, status), + + TP_STRUCT__entry( + __field(const void *, cqe) + __field(int, sqecount) + __field(int, status) + ), + + TP_fast_assign( + __entry->cqe = cqe; + __entry->sqecount = sqecount; + __entry->status = status; + ), + + TP_printk("cqe=%p sqecount=%d status=%d", + __entry->cqe, __entry->sqecount, __entry->status + ) +); + +DEFINE_SENDCOMP_EVENT(read); +DEFINE_SENDCOMP_EVENT(write); + +TRACE_EVENT(svcrdma_cm_event, + TP_PROTO( + const struct rdma_cm_event *event, + const struct sockaddr *sap + ), + + TP_ARGS(event, sap), + + TP_STRUCT__entry( + __field(unsigned int, event) + __field(int, status) + __array(__u8, addr, INET6_ADDRSTRLEN + 10) + ), + + TP_fast_assign( + __entry->event = event->event; + __entry->status = event->status; + snprintf(__entry->addr, sizeof(__entry->addr) - 1, + "%pISpc", sap); + ), + + TP_printk("addr=%s event=%s (%u/%d)", + __entry->addr, + rdma_show_cm_event(__entry->event), + __entry->event, __entry->status + ) +); + +TRACE_EVENT(svcrdma_qp_error, + TP_PROTO( + const struct ib_event *event, + const struct sockaddr *sap + ), + + TP_ARGS(event, sap), + + TP_STRUCT__entry( + __field(unsigned int, event) + __string(device, event->device->name) + __array(__u8, addr, INET6_ADDRSTRLEN + 10) + ), + + TP_fast_assign( + __entry->event = event->event; + __assign_str(device, event->device->name); + snprintf(__entry->addr, sizeof(__entry->addr) - 1, + "%pISpc", sap); + ), + + TP_printk("addr=%s dev=%s event=%s (%u)", + __entry->addr, __get_str(device), + rdma_show_ib_event(__entry->event), __entry->event + ) +); + +DECLARE_EVENT_CLASS(svcrdma_sendqueue_event, + TP_PROTO( + const struct svcxprt_rdma *rdma + ), + + TP_ARGS(rdma), + + TP_STRUCT__entry( + __field(int, avail) + __field(int, depth) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->avail = atomic_read(&rdma->sc_sq_avail); + __entry->depth = rdma->sc_sq_depth; + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s sc_sq_avail=%d/%d", + __get_str(addr), __entry->avail, __entry->depth + ) +); + +#define DEFINE_SQ_EVENT(name) \ + DEFINE_EVENT(svcrdma_sendqueue_event, svcrdma_sq_##name,\ + TP_PROTO( \ + const struct svcxprt_rdma *rdma \ + ), \ + TP_ARGS(rdma)) + +DEFINE_SQ_EVENT(full); +DEFINE_SQ_EVENT(retry); + #endif /* _TRACE_RPCRDMA_H */ #include <trace/define_trace.h> |