diff options
author | Mintz, Yuval <Yuval.Mintz@cavium.com> | 2017-03-11 18:39:18 +0200 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-03-13 15:33:09 -0700 |
commit | be086e7c53f1fac51eed14523b28f2214b548dd2 (patch) | |
tree | 3eb35b0075b6dac25726da6ce4f7d766de15a990 /drivers/net/ethernet/qlogic/qed/qed_roce.c | |
parent | 6a019c5c5059aa0d14788d015aba2f5934aa16ec (diff) |
qed*: Utilize Firmware 8.15.3.0
This patch advances the qed* drivers into using the newer firmware -
This solves several firmware bugs, mostly related [but not limited to]
various init/deinit issues in various offloaded protocols.
It also introduces a major 4-Cached SGE change in firmware, which can be
seen in the storage drivers' changes.
In addition, this firmware is required for supporting the new QL41xxx
series of adapters; While this patch doesn't add the actual support,
the firmware contains the necessary initialization & firmware logic to
operate such adapters [actual support would be added later on].
Changes from Previous versions:
-------------------------------
- V2 - fix kbuild-test robot warnings
Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Manish Rangankar <Manish.Rangankar@cavium.com>
Signed-off-by: Chad Dupuis <Chad.Dupuis@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/qlogic/qed/qed_roce.c')
-rw-r--r-- | drivers/net/ethernet/qlogic/qed/qed_roce.c | 220 |
1 files changed, 176 insertions, 44 deletions
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index d9ff6b28591c..4bef5c59627c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -66,13 +66,27 @@ #include "qed_roce.h" #include "qed_ll2.h" -void qed_async_roce_event(struct qed_hwfn *p_hwfn, - struct event_ring_entry *p_eqe) +static void qed_roce_free_real_icid(struct qed_hwfn *p_hwfn, u16 icid); + +void qed_roce_async_event(struct qed_hwfn *p_hwfn, + u8 fw_event_code, union rdma_eqe_data *rdma_data) { - struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + if (fw_event_code == ROCE_ASYNC_EVENT_DESTROY_QP_DONE) { + u16 icid = + (u16)le32_to_cpu(rdma_data->rdma_destroy_qp_data.cid); + + /* icid release in this async event can occur only if the icid + * was offloaded to the FW. In case it wasn't offloaded this is + * handled in qed_roce_sp_destroy_qp. + */ + qed_roce_free_real_icid(p_hwfn, icid); + } else { + struct qed_rdma_events *events = &p_hwfn->p_rdma_info->events; - p_rdma_info->events.affiliated_event(p_rdma_info->events.context, - p_eqe->opcode, &p_eqe->data); + events->affiliated_event(p_hwfn->p_rdma_info->events.context, + fw_event_code, + &rdma_data->async_handle); + } } static int qed_rdma_bmap_alloc(struct qed_hwfn *p_hwfn, @@ -113,6 +127,15 @@ static int qed_rdma_bmap_alloc_id(struct qed_hwfn *p_hwfn, return 0; } +static void qed_bmap_set_id(struct qed_hwfn *p_hwfn, + struct qed_bmap *bmap, u32 id_num) +{ + if (id_num >= bmap->max_count) + return; + + __set_bit(id_num, bmap->bitmap); +} + static void qed_bmap_release_id(struct qed_hwfn *p_hwfn, struct qed_bmap *bmap, u32 id_num) { @@ -129,6 +152,15 @@ static void qed_bmap_release_id(struct qed_hwfn *p_hwfn, } } +static int qed_bmap_test_id(struct qed_hwfn *p_hwfn, + struct qed_bmap *bmap, u32 id_num) +{ + if (id_num >= bmap->max_count) + return -1; + + return test_bit(id_num, bmap->bitmap); +} + static u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id) { /* First sb id for RoCE is after all the l2 sb */ @@ -170,7 +202,8 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, /* Queue zone lines are shared between RoCE and L2 in such a way that * they can be used by each without obstructing the other. */ - p_rdma_info->queue_zone_base = (u16)FEAT_NUM(p_hwfn, QED_L2_QUEUE); + p_rdma_info->queue_zone_base = (u16)RESC_START(p_hwfn, QED_L2_QUEUE); + p_rdma_info->max_queue_zones = (u16)RESC_NUM(p_hwfn, QED_L2_QUEUE); /* Allocate a struct with device params and fill it */ p_rdma_info->dev = kzalloc(sizeof(*p_rdma_info->dev), GFP_KERNEL); @@ -248,9 +281,18 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, goto free_tid_map; } + /* Allocate bitmap for cids used for responders/requesters. */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->real_cid_map, num_cons); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate real cid bitmap, rc = %d\n", rc); + goto free_cid_map; + } DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocation successful\n"); return 0; +free_cid_map: + kfree(p_rdma_info->cid_map.bitmap); free_tid_map: kfree(p_rdma_info->tid_map.bitmap); free_toggle_map: @@ -273,7 +315,22 @@ free_rdma_info: static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) { + struct qed_bmap *rcid_map = &p_hwfn->p_rdma_info->real_cid_map; struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + int wait_count = 0; + + /* when destroying a_RoCE QP the control is returned to the user after + * the synchronous part. The asynchronous part may take a little longer. + * We delay for a short while if an async destroy QP is still expected. + * Beyond the added delay we clear the bitmap anyway. + */ + while (bitmap_weight(rcid_map->bitmap, rcid_map->max_count)) { + msleep(100); + if (wait_count++ > 20) { + DP_NOTICE(p_hwfn, "cid bitmap wait timed out\n"); + break; + } + } kfree(p_rdma_info->cid_map.bitmap); kfree(p_rdma_info->tid_map.bitmap); @@ -724,6 +781,14 @@ static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) u32 addr; p_hwfn = (struct qed_hwfn *)rdma_cxt; + + if (qz_offset > p_hwfn->p_rdma_info->max_queue_zones) { + DP_NOTICE(p_hwfn, + "queue zone offset %d is too large (max is %d)\n", + qz_offset, p_hwfn->p_rdma_info->max_queue_zones); + return; + } + qz_num = p_hwfn->p_rdma_info->queue_zone_base + qz_offset; addr = GTT_BAR0_MAP_REG_USDM_RAM + USTORM_COMMON_QUEUE_CONS_OFFSET(qz_num); @@ -1080,6 +1145,14 @@ static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode) return flavor; } +void qed_roce_free_cid_pair(struct qed_hwfn *p_hwfn, u16 cid) +{ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid + 1); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + static int qed_roce_alloc_cid(struct qed_hwfn *p_hwfn, u16 *cid) { struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; @@ -1139,6 +1212,13 @@ err: return rc; } +static void qed_roce_set_real_cid(struct qed_hwfn *p_hwfn, u32 cid) +{ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_set_id(p_hwfn, &p_hwfn->p_rdma_info->real_cid_map, cid); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) { @@ -1147,7 +1227,8 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn, union qed_qm_pq_params qm_params; enum roce_flavor roce_flavor; struct qed_spq_entry *p_ent; - u16 physical_queue0 = 0; + u16 regular_latency_queue; + enum protocol_type proto; int rc; DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); @@ -1229,15 +1310,19 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn, p_ramrod->qp_handle_for_async.lo = cpu_to_le32(qp->qp_handle_async.lo); p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi); p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo); - p_ramrod->stats_counter_id = p_hwfn->rel_pf_id; p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->rq_cq_id); memset(&qm_params, 0, sizeof(qm_params)); qm_params.roce.qpid = qp->icid >> 1; - physical_queue0 = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, &qm_params); + regular_latency_queue = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, + &qm_params); + + p_ramrod->regular_latency_phy_queue = + cpu_to_le16(regular_latency_queue); + p_ramrod->low_latency_phy_queue = + cpu_to_le16(regular_latency_queue); - p_ramrod->physical_queue0 = cpu_to_le16(physical_queue0); p_ramrod->dpi = cpu_to_le16(qp->dpi); qed_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr); @@ -1253,13 +1338,19 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn, rc = qed_spq_post(p_hwfn, p_ent, NULL); - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d physical_queue0 = 0x%x\n", - rc, physical_queue0); + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "rc = %d regular physical queue = 0x%x\n", rc, + regular_latency_queue); if (rc) goto err; qp->resp_offloaded = true; + qp->cq_prod = 0; + + proto = p_hwfn->p_rdma_info->proto; + qed_roce_set_real_cid(p_hwfn, qp->icid - + qed_cxt_get_proto_cid_start(p_hwfn, proto)); return rc; @@ -1280,7 +1371,8 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn, union qed_qm_pq_params qm_params; enum roce_flavor roce_flavor; struct qed_spq_entry *p_ent; - u16 physical_queue0 = 0; + u16 regular_latency_queue; + enum protocol_type proto; int rc; DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); @@ -1351,15 +1443,19 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn, p_ramrod->qp_handle_for_async.lo = cpu_to_le32(qp->qp_handle_async.lo); p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi); p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo); - p_ramrod->stats_counter_id = p_hwfn->rel_pf_id; - p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | - qp->sq_cq_id); + p_ramrod->cq_cid = + cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->sq_cq_id); memset(&qm_params, 0, sizeof(qm_params)); qm_params.roce.qpid = qp->icid >> 1; - physical_queue0 = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, &qm_params); + regular_latency_queue = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, + &qm_params); + + p_ramrod->regular_latency_phy_queue = + cpu_to_le16(regular_latency_queue); + p_ramrod->low_latency_phy_queue = + cpu_to_le16(regular_latency_queue); - p_ramrod->physical_queue0 = cpu_to_le16(physical_queue0); p_ramrod->dpi = cpu_to_le16(qp->dpi); qed_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr); @@ -1378,6 +1474,10 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn, goto err; qp->req_offloaded = true; + proto = p_hwfn->p_rdma_info->proto; + qed_roce_set_real_cid(p_hwfn, + qp->icid + 1 - + qed_cxt_get_proto_cid_start(p_hwfn, proto)); return rc; @@ -1577,7 +1677,8 @@ static int qed_roce_sp_modify_requester(struct qed_hwfn *p_hwfn, static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp, - u32 *num_invalidated_mw) + u32 *num_invalidated_mw, + u32 *cq_prod) { struct roce_destroy_qp_resp_output_params *p_ramrod_res; struct roce_destroy_qp_resp_ramrod_data *p_ramrod; @@ -1588,8 +1689,22 @@ static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn, DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); - if (!qp->resp_offloaded) + *num_invalidated_mw = 0; + *cq_prod = qp->cq_prod; + + if (!qp->resp_offloaded) { + /* If a responder was never offload, we need to free the cids + * allocated in create_qp as a FW async event will never arrive + */ + u32 cid; + + cid = qp->icid - + qed_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + qed_roce_free_cid_pair(p_hwfn, (u16)cid); + return 0; + } /* Get SPQ entry */ memset(&init_data, 0, sizeof(init_data)); @@ -1624,6 +1739,8 @@ static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn, goto err; *num_invalidated_mw = le32_to_cpu(p_ramrod_res->num_invalidated_mw); + *cq_prod = le32_to_cpu(p_ramrod_res->cq_prod); + qp->cq_prod = *cq_prod; /* Free IRQ - only if ramrod succeeded, in case FW is still using it */ dma_free_coherent(&p_hwfn->cdev->pdev->dev, @@ -1827,10 +1944,8 @@ static int qed_roce_query_qp(struct qed_hwfn *p_hwfn, out_params->draining = false; - if (rq_err_state) + if (rq_err_state || sq_err_state) qp->cur_state = QED_ROCE_QP_STATE_ERR; - else if (sq_err_state) - qp->cur_state = QED_ROCE_QP_STATE_SQE; else if (sq_draining) out_params->draining = true; out_params->state = qp->cur_state; @@ -1849,10 +1964,9 @@ err_resp: static int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) { - struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; u32 num_invalidated_mw = 0; u32 num_bound_mw = 0; - u32 start_cid; + u32 cq_prod; int rc; /* Destroys the specified QP */ @@ -1866,7 +1980,8 @@ static int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) if (qp->cur_state != QED_ROCE_QP_STATE_RESET) { rc = qed_roce_sp_destroy_qp_responder(p_hwfn, qp, - &num_invalidated_mw); + &num_invalidated_mw, + &cq_prod); if (rc) return rc; @@ -1881,21 +1996,6 @@ static int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) "number of invalidate memory windows is different from bounded ones\n"); return -EINVAL; } - - spin_lock_bh(&p_rdma_info->lock); - - start_cid = qed_cxt_get_proto_cid_start(p_hwfn, - p_rdma_info->proto); - - /* Release responder's icid */ - qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, - qp->icid - start_cid); - - /* Release requester's icid */ - qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, - qp->icid + 1 - start_cid); - - spin_unlock_bh(&p_rdma_info->lock); } return 0; @@ -2110,12 +2210,19 @@ static int qed_roce_modify_qp(struct qed_hwfn *p_hwfn, return rc; } else if (qp->cur_state == QED_ROCE_QP_STATE_RESET) { /* Any state -> RESET */ + u32 cq_prod; + + /* Send destroy responder ramrod */ + rc = qed_roce_sp_destroy_qp_responder(p_hwfn, + qp, + &num_invalidated_mw, + &cq_prod); - rc = qed_roce_sp_destroy_qp_responder(p_hwfn, qp, - &num_invalidated_mw); if (rc) return rc; + qp->cq_prod = cq_prod; + rc = qed_roce_sp_destroy_qp_requester(p_hwfn, qp, &num_bound_mw); @@ -2454,6 +2561,31 @@ static int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid) return rc; } +static void qed_roce_free_real_icid(struct qed_hwfn *p_hwfn, u16 icid) +{ + struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + u32 start_cid, cid, xcid; + + /* an even icid belongs to a responder while an odd icid belongs to a + * requester. The 'cid' received as an input can be either. We calculate + * the "partner" icid and call it xcid. Only if both are free then the + * "cid" map can be cleared. + */ + start_cid = qed_cxt_get_proto_cid_start(p_hwfn, p_rdma_info->proto); + cid = icid - start_cid; + xcid = cid ^ 1; + + spin_lock_bh(&p_rdma_info->lock); + + qed_bmap_release_id(p_hwfn, &p_rdma_info->real_cid_map, cid); + if (qed_bmap_test_id(p_hwfn, &p_rdma_info->real_cid_map, xcid) == 0) { + qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, cid); + qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, xcid); + } + + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev) { return QED_LEADING_HWFN(cdev); @@ -2773,7 +2905,7 @@ static int qed_roce_ll2_tx(struct qed_dev *cdev, : QED_LL2_RROCE; if (pkt->roce_mode == ROCE_V2_IPV4) - flags |= BIT(CORE_TX_BD_FLAGS_IP_CSUM_SHIFT); + flags |= BIT(CORE_TX_BD_DATA_IP_CSUM_SHIFT); /* Tx header */ rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), roce_ll2->handle, |