summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hfi1/tid_rdma.h
blob: 9b952351f0729831aa36cd715c22edec21ccafd3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
 * Copyright(c) 2018 Intel Corporation.
 *
 */
#ifndef HFI1_TID_RDMA_H
#define HFI1_TID_RDMA_H

#include <linux/circ_buf.h>
#include "common.h"

/* Add a convenience helper */
#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)

#define TID_RDMA_MIN_SEGMENT_SIZE       BIT(18)   /* 256 KiB (for now) */
#define TID_RDMA_MAX_SEGMENT_SIZE       BIT(18)   /* 256 KiB (for now) */
#define TID_RDMA_MAX_PAGES              (BIT(18) >> PAGE_SHIFT)

/*
 * Bit definitions for priv->s_flags.
 * These bit flags overload the bit flags defined for the QP's s_flags.
 * Due to the fact that these bit fields are used only for the QP priv
 * s_flags, there are no collisions.
 *
 * HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
 */
#define HFI1_S_TID_WAIT_INTERLCK  BIT(5)

/*
 * Unlike regular IB RDMA VERBS, which do not require an entry
 * in the s_ack_queue, TID RDMA WRITE requests do because they
 * generate responses.
 * Therefore, the s_ack_queue needs to be extended by a certain
 * amount. The key point is that the queue needs to be extended
 * without letting the "user" know so they user doesn't end up
 * using these extra entries.
 */
#define HFI1_TID_RDMA_WRITE_CNT 8

struct tid_rdma_params {
	struct rcu_head rcu_head;
	u32 qp;
	u32 max_len;
	u16 jkey;
	u8 max_read;
	u8 max_write;
	u8 timeout;
	u8 urg;
	u8 version;
};

struct tid_rdma_qp_params {
	struct work_struct trigger_work;
	struct tid_rdma_params local;
	struct tid_rdma_params __rcu *remote;
};

/* Track state for each hardware flow */
struct tid_flow_state {
	u32 generation;
	u32 psn;
	u32 r_next_psn;      /* next PSN to be received (in TID space) */
	u8 index;
	u8 last_index;
	u8 flags;
};

enum tid_rdma_req_state {
	TID_REQUEST_INACTIVE = 0,
	TID_REQUEST_INIT,
	TID_REQUEST_INIT_RESEND,
	TID_REQUEST_ACTIVE,
	TID_REQUEST_RESEND,
	TID_REQUEST_RESEND_ACTIVE,
	TID_REQUEST_QUEUED,
	TID_REQUEST_SYNC,
	TID_REQUEST_RNR_NAK,
	TID_REQUEST_COMPLETE,
};

struct tid_rdma_request {
	struct rvt_qp *qp;
	struct hfi1_ctxtdata *rcd;
	union {
		struct rvt_swqe *swqe;
		struct rvt_ack_entry *ack;
	} e;

	struct tid_rdma_flow *flows;	/* array of tid flows */
	u16 n_flows;		/* size of the flow buffer window */
	u16 setup_head;		/* flow index we are setting up */
	u16 clear_tail;		/* flow index we are clearing */
	u16 flow_idx;		/* flow index most recently set up */

	u32 seg_len;
	u32 total_len;
	u32 r_flow_psn;         /* IB PSN of next segment start */
	u32 s_next_psn;		/* IB PSN of next segment start for read */

	u32 total_segs;		/* segments required to complete a request */
	u32 cur_seg;		/* index of current segment */
	u32 comp_seg;           /* index of last completed segment */
	u32 ack_seg;            /* index of last ack'ed segment */
	u32 isge;		/* index of "current" sge */
	u32 ack_pending;        /* num acks pending for this request */

	enum tid_rdma_req_state state;
};

/*
 * When header suppression is used, PSNs associated with a "flow" are
 * relevant (and not the PSNs maintained by verbs). Track per-flow
 * PSNs here for a TID RDMA segment.
 *
 */
struct flow_state {
	u32 flags;
	u32 resp_ib_psn;     /* The IB PSN of the response for this flow */
	u32 generation;      /* generation of flow */
	u32 spsn;            /* starting PSN in TID space */
	u32 lpsn;            /* last PSN in TID space */
	u32 r_next_psn;      /* next PSN to be received (in TID space) */

	/* For tid rdma read */
	u32 ib_spsn;         /* starting PSN in Verbs space */
	u32 ib_lpsn;         /* last PSn in Verbs space */
};

struct tid_rdma_pageset {
	dma_addr_t addr : 48; /* Only needed for the first page */
	u8 idx: 8;
	u8 count : 7;
	u8 mapped: 1;
};

/**
 * kern_tid_node - used for managing TID's in TID groups
 *
 * @grp_idx: rcd relative index to tid_group
 * @map: grp->map captured prior to programming this TID group in HW
 * @cnt: Only @cnt of available group entries are actually programmed
 */
struct kern_tid_node {
	struct tid_group *grp;
	u8 map;
	u8 cnt;
};

/* Overall info for a TID RDMA segment */
struct tid_rdma_flow {
	/*
	 * While a TID RDMA segment is being transferred, it uses a QP number
	 * from the "KDETH section of QP numbers" (which is different from the
	 * QP number that originated the request). Bits 11-15 of these QP
	 * numbers identify the "TID flow" for the segment.
	 */
	struct flow_state flow_state;
	struct tid_rdma_request *req;
	u32 tid_qpn;
	u32 tid_offset;
	u32 length;
	u32 sent;
	u8 tnode_cnt;
	u8 tidcnt;
	u8 tid_idx;
	u8 idx;
	u8 npagesets;
	u8 npkts;
	u8 pkt;
	struct kern_tid_node tnode[TID_RDMA_MAX_PAGES];
	struct tid_rdma_pageset pagesets[TID_RDMA_MAX_PAGES];
	u32 tid_entry[TID_RDMA_MAX_PAGES];
};

bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data);
bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data);
bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data);
void tid_rdma_conn_error(struct rvt_qp *qp);
void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p);

int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit);
int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
			    struct rvt_sge_state *ss, bool *last);
int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req);
void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req);
void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe);

/**
 * trdma_clean_swqe - clean flows for swqe if large send queue
 * @qp: the qp
 * @wqe: the send wqe
 */
static inline void trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
{
	if (!wqe->priv)
		return;
	__trdma_clean_swqe(qp, wqe);
}

void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp);

int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
		      struct ib_qp_init_attr *init_attr);
void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);

void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp);

int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd);

struct cntr_entry;
u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
			    void *context, int vl, int mode, u64 data);

u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
				    struct ib_other_headers *ohdr,
				    u32 *bth1, u32 *bth2, u32 *len);
u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
				 struct ib_other_headers *ohdr, u32 *bth1,
				 u32 *bth2, u32 *len);
void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
				  struct ib_other_headers *ohdr, u32 *bth0,
				  u32 *bth1, u32 *bth2, u32 *len, bool *last);
void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet);
bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
			      struct hfi1_pportdata *ppd,
			      struct hfi1_packet *packet);
void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
			       u32 *bth2);
void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp);
bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe);

void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
static inline void hfi1_setup_tid_rdma_wqe(struct rvt_qp *qp,
					   struct rvt_swqe *wqe)
{
	if (wqe->priv &&
	    wqe->wr.opcode == IB_WR_RDMA_READ &&
	    wqe->length >= TID_RDMA_MIN_SEGMENT_SIZE)
		setup_tid_rdma_wqe(qp, wqe);
}

u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
				  struct ib_other_headers *ohdr,
				  u32 *bth1, u32 *bth2, u32 *len);
#endif /* HFI1_TID_RDMA_H */