summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2009-01-16 12:47:47 -0800
committerRoland Dreier <rolandd@cisco.com>2009-01-16 12:47:47 -0800
commit0fd7e1d8559f45a6838cee93ea49adc0c5bda8f0 (patch)
tree7d31d09b0d60de47a1b668474957ce1926812087
parentd3b924d960a808105180d229b4667061123cc4ef (diff)
IB/mlx4: Fix memory ordering problem when posting LSO sends
The current work request posting code writes the LSO segment before writing any data segments. This leaves a window where the LSO segment overwrites the stamping in one cacheline that the HCA prefetches before the rest of the cacheline is filled with the correct data segments. When the HCA processes this work request, a local protection error may result. Fix this by saving the LSO header size field off and writing it only after all data segments are written. This fix is a cleaned-up version of a patch from Jack Morgenstein <jackm@dev.mellanox.co.il>. This fixes <https://bugs.openfabrics.org/show_bug.cgi?id=1383>. Reported-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c28
1 files changed, 19 insertions, 9 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 39167a797f99..a91cb4c3fa5c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1462,7 +1462,8 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
}
static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
- struct mlx4_ib_qp *qp, unsigned *lso_seg_len)
+ struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
+ __be32 *lso_hdr_sz)
{
unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
@@ -1479,12 +1480,8 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
- /* make sure LSO header is written before overwriting stamping */
- wmb();
-
- wqe->mss_hdr_size = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
- wr->wr.ud.hlen);
-
+ *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
+ wr->wr.ud.hlen);
*lso_seg_len = halign;
return 0;
}
@@ -1518,6 +1515,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int uninitialized_var(stamp);
int uninitialized_var(size);
unsigned uninitialized_var(seglen);
+ __be32 dummy;
+ __be32 *lso_wqe;
+ __be32 uninitialized_var(lso_hdr_sz);
int i;
spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1525,6 +1525,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
ind = qp->sq_next_wqe;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ lso_wqe = &dummy;
+
if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
err = -ENOMEM;
*bad_wr = wr;
@@ -1606,11 +1608,12 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
if (wr->opcode == IB_WR_LSO) {
- err = build_lso_seg(wqe, wr, qp, &seglen);
+ err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
}
+ lso_wqe = (__be32 *) wqe;
wqe += seglen;
size += seglen / 16;
}
@@ -1652,6 +1655,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
set_data_seg(dseg, wr->sg_list + i);
+ /*
+ * Possibly overwrite stamping in cacheline with LSO
+ * segment only after making sure all data segments
+ * are written.
+ */
+ wmb();
+ *lso_wqe = lso_hdr_sz;
+
ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
MLX4_WQE_CTRL_FENCE : 0) | size;
@@ -1686,7 +1697,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
stamp_send_wqe(qp, stamp, size * 16);
ind = pad_wraparound(qp, ind);
}
-
}
out: