summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXi Wang <wangxi11@huawei.com>2021-01-30 16:58:08 +0800
committerJason Gunthorpe <jgg@nvidia.com>2021-02-08 19:37:35 -0400
commit6b981e2bd9251f4f4d0fe32b1eeb29b0d88813a5 (patch)
treef341817924453a2435ae0280e4c762079acb70c8
parent9ae2a37e6ae650d7d81f4afede9d879937cab5db (diff)
RDMA/hns: Clear remaining unused sges when post_recv
The HIP09 requires the driver to clear the unused data segments in wqe buffer to make the hns ROCEE stop reading the remaining invalid sges for RQ. Link: https://lore.kernel.org/r/1611997090-48820-11-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang <wangxi11@huawei.com> Signed-off-by: Weihang Li <liweihang@huawei.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c99
1 files changed, 47 insertions, 52 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 1f7042285ed4..7eba9b5bfcb8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -729,28 +729,42 @@ static int check_recv_valid(struct hns_roce_dev *hr_dev,
return 0;
}
-static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr,
- u32 wqe_idx)
+static void fill_recv_sge_to_wqe(const struct ib_recv_wr *wr, void *wqe,
+ u32 max_sge, bool rsv)
{
- struct hns_roce_v2_wqe_data_seg *dseg;
- struct hns_roce_rinl_sge *sge_list;
- void *wqe = NULL;
- int i;
+ struct hns_roce_v2_wqe_data_seg *dseg = wqe;
+ u32 i, cnt;
- wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
- for (i = 0; i < wr->num_sge; i++) {
+ for (i = 0, cnt = 0; i < wr->num_sge; i++) {
+ /* Skip zero-length sge */
if (!wr->sg_list[i].length)
continue;
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
+ set_data_seg_v2(dseg + cnt, wr->sg_list + i);
+ cnt++;
}
- if (hr_qp->rq.rsv_sge) {
- dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
- dseg->addr = 0;
- dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
+ /* Fill a reserved sge to make hw stop reading remaining segments */
+ if (rsv) {
+ dseg[cnt].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
+ dseg[cnt].addr = 0;
+ dseg[cnt].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
+ } else {
+ /* Clear remaining segments to make ROCEE ignore sges */
+ if (cnt < max_sge)
+ memset(dseg + cnt, 0,
+ (max_sge - cnt) * HNS_ROCE_SGE_SIZE);
}
+}
+
+static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr,
+ u32 wqe_idx, u32 max_sge)
+{
+ struct hns_roce_rinl_sge *sge_list;
+ void *wqe = NULL;
+ u32 i;
+
+ wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge);
/* rq support inline data */
if (hr_qp->rq_inl_buf.wqe_cnt) {
@@ -801,8 +815,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
}
wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
- fill_rq_wqe(hr_qp, wr, wqe_idx);
-
+ fill_rq_wqe(hr_qp, wr, wqe_idx, max_sge);
hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
}
@@ -834,18 +847,18 @@ out:
return ret;
}
-static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
+static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n)
{
return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift);
}
-static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n)
+static void *get_idx_buf(struct hns_roce_idx_que *idx_que, u32 n)
{
return hns_roce_buf_offset(idx_que->mtr.kmem,
n << idx_que->entry_shift);
}
-static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
+static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index)
{
/* always called with interrupts disabled. */
spin_lock(&srq->lock);
@@ -856,7 +869,7 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
spin_unlock(&srq->lock);
}
-int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, int nreq)
+int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, u32 nreq)
{
struct hns_roce_idx_que *idx_que = &srq->idx_que;
unsigned int cur;
@@ -865,19 +878,18 @@ int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, int nreq)
return cur + nreq >= srq->wqe_cnt;
}
-static int find_empty_entry(struct hns_roce_idx_que *idx_que,
- unsigned long size)
+static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx)
{
- int wqe_idx;
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ u32 pos;
- if (unlikely(bitmap_full(idx_que->bitmap, size)))
+ pos = find_first_zero_bit(idx_que->bitmap, srq->wqe_cnt);
+ if (unlikely(pos == srq->wqe_cnt))
return -ENOSPC;
- wqe_idx = find_first_zero_bit(idx_que->bitmap, size);
-
- bitmap_set(idx_que->bitmap, wqe_idx, 1);
-
- return wqe_idx;
+ bitmap_set(idx_que->bitmap, pos, 1);
+ *wqe_idx = pos;
+ return 0;
}
static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
@@ -886,17 +898,12 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
- struct hns_roce_v2_wqe_data_seg *dseg;
+ u32 wqe_idx, ind, nreq, max_sge;
struct hns_roce_v2_db srq_db;
unsigned long flags;
- unsigned int ind;
__le32 *srq_idx;
int ret = 0;
- int wqe_idx;
- u32 max_sge;
void *wqe;
- int nreq;
- int i;
spin_lock_irqsave(&srq->lock, flags);
@@ -919,26 +926,14 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
break;
}
- wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt);
- if (unlikely(wqe_idx < 0)) {
- ret = -ENOMEM;
+ ret = get_srq_wqe_idx(srq, &wqe_idx);
+ if (unlikely(ret)) {
*bad_wr = wr;
break;
}
- wqe = get_srq_wqe(srq, wqe_idx);
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
-
- for (i = 0; i < wr->num_sge; ++i) {
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
- }
-
- if (srq->rsv_sge) {
- dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
- dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
- dseg[i].addr = 0;
- }
+ wqe = get_srq_wqe_buf(srq, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
srq_idx = get_idx_buf(&srq->idx_que, ind);
*srq_idx = cpu_to_le32(wqe_idx);