diff options
Diffstat (limited to 'drivers/infiniband/ulp')
26 files changed, 4063 insertions, 523 deletions
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile index f3c7dcf03098..c28af1823a2d 100644 --- a/drivers/infiniband/ulp/Makefile +++ b/drivers/infiniband/ulp/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_INFINIBAND_SRP) += srp/ obj-$(CONFIG_INFINIBAND_SRPT) += srpt/ obj-$(CONFIG_INFINIBAND_ISER) += iser/ obj-$(CONFIG_INFINIBAND_ISERT) += isert/ +obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic/ diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index bed233bf45c3..ff50a7bd66d8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -52,7 +52,6 @@ #include <rdma/ib_pack.h> #include <rdma/ib_sa.h> #include <linux/sched.h> - /* constants */ enum ipoib_flush_level { @@ -153,6 +152,13 @@ static inline void skb_add_pseudo_hdr(struct sk_buff *skb) skb_pull(skb, IPOIB_HARD_LEN); } +static inline struct ipoib_dev_priv *ipoib_priv(const struct net_device *dev) +{ + struct rdma_netdev *rn = netdev_priv(dev); + + return rn->clnt_priv; +} + /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ struct ipoib_mcast { struct ib_sa_mcmember_rec mcmember; @@ -404,6 +410,7 @@ struct ipoib_dev_priv { struct timer_list poll_timer; unsigned max_send_sge; bool sm_fullmember_sendonly_support; + const struct net_device_ops *rn_ops; }; struct ipoib_ah { @@ -416,7 +423,7 @@ struct ipoib_ah { struct ipoib_path { struct net_device *dev; - struct ib_sa_path_rec pathrec; + struct sa_path_rec pathrec; struct ipoib_ah *ah; struct sk_buff_head queue; @@ -472,7 +479,7 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr); struct ipoib_ah *ipoib_create_ah(struct net_device *dev, - struct ib_pd *pd, struct ib_ah_attr *attr); + struct ib_pd *pd, struct rdma_ah_attr *attr); void ipoib_free_ah(struct kref *kref); static inline void ipoib_put_ah(struct ipoib_ah *ah) { @@ -482,27 +489,28 @@ int ipoib_open(struct net_device *dev); int ipoib_add_pkey_attr(struct net_device *dev); int ipoib_add_umcast_attr(struct net_device *dev); -void ipoib_send(struct net_device *dev, struct sk_buff *skb, - struct ipoib_ah *address, u32 qpn); +int ipoib_send(struct net_device *dev, struct sk_buff *skb, + struct ib_ah *address, u32 dqpn); void ipoib_reap_ah(struct work_struct *work); struct ipoib_path *__path_find(struct net_device *dev, void *gid); void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); -int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv); -struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); - -int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); +struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, + const char *format); +void ipoib_ib_tx_timer_func(unsigned long ctx); void ipoib_ib_dev_flush_light(struct work_struct *work); void ipoib_ib_dev_flush_normal(struct work_struct *work); void ipoib_ib_dev_flush_heavy(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); +int ipoib_ib_dev_open_default(struct net_device *dev); int ipoib_ib_dev_open(struct net_device *dev); +int ipoib_ib_dev_stop(struct net_device *dev); void ipoib_ib_dev_up(struct net_device *dev); void ipoib_ib_dev_down(struct net_device *dev); -void ipoib_ib_dev_stop(struct net_device *dev); +int ipoib_ib_dev_stop_default(struct net_device *dev); void ipoib_pkey_dev_check_presence(struct net_device *dev); int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); @@ -562,8 +570,10 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path); #endif -int ipoib_mcast_attach(struct net_device *dev, u16 mlid, - union ib_gid *mgid, int set_qkey); +int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca, + union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey); +int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca, + union ib_gid *mgid, u16 mlid); void ipoib_mcast_remove_list(struct list_head *remove_list); void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid, struct list_head *remove_list); @@ -587,7 +597,7 @@ void __exit ipoib_netlink_fini(void); void ipoib_set_umcast(struct net_device *ndev, int umcast_val); int ipoib_set_mode(struct net_device *dev, const char *buf); -void ipoib_setup(struct net_device *dev); +void ipoib_setup_common(struct net_device *dev); void ipoib_pkey_open(struct ipoib_dev_priv *priv); void ipoib_drain_cq(struct net_device *dev); @@ -607,14 +617,14 @@ extern int ipoib_max_conn_qp; static inline int ipoib_cm_admin_enabled(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); return IPOIB_CM_SUPPORTED(dev->dev_addr) && test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); } static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); return IPOIB_CM_SUPPORTED(hwaddr) && test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); } @@ -637,13 +647,13 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t static inline int ipoib_cm_has_srq(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); return !!priv->cm.srq; } static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); return priv->cm.max_cm_mtu; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 0cdf2b7f272f..7cbcfdac6529 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -92,7 +92,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_recv_wr *bad_wr; int i, ret; @@ -118,7 +118,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, struct ib_recv_wr *wr, struct ib_sge *sge, int id) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_recv_wr *bad_wr; int i, ret; @@ -145,7 +145,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, u64 mapping[IPOIB_CM_RX_SG], gfp_t gfp) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct sk_buff *skb; int i; @@ -196,7 +196,7 @@ partial_error: static void ipoib_cm_free_rx_ring(struct net_device *dev, struct ipoib_cm_rx_buf *rx_ring) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); int i; for (i = 0; i < ipoib_recvq_size; ++i) @@ -235,7 +235,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx) { struct ipoib_cm_rx *p = ctx; - struct ipoib_dev_priv *priv = netdev_priv(p->dev); + struct ipoib_dev_priv *priv = ipoib_priv(p->dev); unsigned long flags; if (event->event != IB_EVENT_QP_LAST_WQE_REACHED) @@ -251,7 +251,7 @@ static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx) static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, struct ipoib_cm_rx *p) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_init_attr attr = { .event_handler = ipoib_cm_rx_event_handler, .send_cq = priv->recv_cq, /* For drain WR */ @@ -276,7 +276,7 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev, struct ib_cm_id *cm_id, struct ib_qp *qp, unsigned psn) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_attr qp_attr; int qp_attr_mask, ret; @@ -331,7 +331,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev, struct ib_recv_wr *wr, struct ib_sge *sge) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); int i; for (i = 0; i < priv->cm.num_frags; ++i) @@ -349,7 +349,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev, static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, struct ipoib_cm_rx *rx) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct { struct ib_recv_wr wr; struct ib_sge sge[IPOIB_CM_RX_SG]; @@ -422,7 +422,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, struct ib_qp *qp, struct ib_cm_req_event_param *req, unsigned psn) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_data data = {}; struct ib_cm_rep_param rep = {}; @@ -442,7 +442,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) { struct net_device *dev = cm_id->context; - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_rx *p; unsigned psn; int ret; @@ -515,7 +515,7 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, /* Fall through */ case IB_CM_REJ_RECEIVED: p = cm_id->context; - priv = netdev_priv(p->dev); + priv = ipoib_priv(p->dev); if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE)) ipoib_warn(priv, "unable to move qp to error state\n"); /* Fall through */ @@ -559,7 +559,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_rx_buf *rx_ring; unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); struct sk_buff *skb, *newskb; @@ -708,7 +708,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_tx_buf *tx_req; int rc; unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb); @@ -786,7 +786,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_tx *tx = wc->qp->qp_context; unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; struct ipoib_tx_buf *tx_req; @@ -855,7 +855,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) int ipoib_cm_dev_open(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); int ret; if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) @@ -887,7 +887,7 @@ err_cm: static void ipoib_cm_free_rx_reap_list(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_rx *rx, *n; LIST_HEAD(list); @@ -910,7 +910,7 @@ static void ipoib_cm_free_rx_reap_list(struct net_device *dev) void ipoib_cm_dev_stop(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_rx *p; unsigned long begin; int ret; @@ -969,7 +969,7 @@ void ipoib_cm_dev_stop(struct net_device *dev) static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) { struct ipoib_cm_tx *p = cm_id->context; - struct ipoib_dev_priv *priv = netdev_priv(p->dev); + struct ipoib_dev_priv *priv = ipoib_priv(p->dev); struct ipoib_cm_data *data = event->private_data; struct sk_buff_head skqueue; struct ib_qp_attr qp_attr; @@ -1037,7 +1037,7 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_init_attr attr = { .send_cq = priv->recv_cq, .recv_cq = priv->recv_cq, @@ -1068,9 +1068,9 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ static int ipoib_cm_send_req(struct net_device *dev, struct ib_cm_id *id, struct ib_qp *qp, u32 qpn, - struct ib_sa_path_rec *pathrec) + struct sa_path_rec *pathrec) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_data data = {}; struct ib_cm_req_param req = {}; @@ -1105,7 +1105,7 @@ static int ipoib_cm_send_req(struct net_device *dev, static int ipoib_cm_modify_tx_init(struct net_device *dev, struct ib_cm_id *cm_id, struct ib_qp *qp) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_attr qp_attr; int qp_attr_mask, ret; ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index); @@ -1128,9 +1128,9 @@ static int ipoib_cm_modify_tx_init(struct net_device *dev, } static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, - struct ib_sa_path_rec *pathrec) + struct sa_path_rec *pathrec) { - struct ipoib_dev_priv *priv = netdev_priv(p->dev); + struct ipoib_dev_priv *priv = ipoib_priv(p->dev); int ret; p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring, @@ -1186,7 +1186,7 @@ err_tx: static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) { - struct ipoib_dev_priv *priv = netdev_priv(p->dev); + struct ipoib_dev_priv *priv = ipoib_priv(p->dev); struct ipoib_tx_buf *tx_req; unsigned long begin; @@ -1236,7 +1236,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) { struct ipoib_cm_tx *tx = cm_id->context; - struct ipoib_dev_priv *priv = netdev_priv(tx->dev); + struct ipoib_dev_priv *priv = ipoib_priv(tx->dev); struct net_device *dev = priv->dev; struct ipoib_neigh *neigh; unsigned long flags; @@ -1287,7 +1287,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, struct ipoib_neigh *neigh) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_tx *tx; tx = kzalloc(sizeof *tx, GFP_ATOMIC); @@ -1306,7 +1306,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) { - struct ipoib_dev_priv *priv = netdev_priv(tx->dev); + struct ipoib_dev_priv *priv = ipoib_priv(tx->dev); unsigned long flags; if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { spin_lock_irqsave(&priv->lock, flags); @@ -1332,7 +1332,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) struct ipoib_path *path; int ret; - struct ib_sa_path_rec pathrec; + struct sa_path_rec pathrec; u32 qpn; netif_tx_lock_bh(dev); @@ -1441,7 +1441,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work) void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, unsigned int mtu) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); int e = skb_queue_empty(&priv->cm.skb_queue); if (skb_dst(skb)) @@ -1490,7 +1490,8 @@ static void ipoib_cm_stale_task(struct work_struct *work) static ssize_t show_mode(struct device *d, struct device_attribute *attr, char *buf) { - struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d)); + struct net_device *dev = to_net_dev(d); + struct ipoib_dev_priv *priv = ipoib_priv(dev); if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) return sprintf(buf, "connected\n"); @@ -1503,7 +1504,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, { struct net_device *dev = to_net_dev(d); int ret; - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags)) return -EPERM; @@ -1532,7 +1533,7 @@ int ipoib_cm_add_mode_attr(struct net_device *dev) static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_srq_init_attr srq_init_attr = { .srq_type = IB_SRQT_BASIC, .attr = { @@ -1561,7 +1562,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) int ipoib_cm_dev_init(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); int max_srq_sge, i; INIT_LIST_HEAD(&priv->cm.passive_ids); @@ -1622,7 +1623,7 @@ int ipoib_cm_dev_init(struct net_device *dev) void ipoib_cm_dev_cleanup(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); int ret; if (!priv->cm.srq) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index bac455a1942d..874b24366e4d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -60,7 +60,7 @@ static const struct ipoib_stats ipoib_gstrings_stats[] = { static void ipoib_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { - struct ipoib_dev_priv *priv = netdev_priv(netdev); + struct ipoib_dev_priv *priv = ipoib_priv(netdev); ib_get_device_fw_str(priv->ca, drvinfo->fw_version, sizeof(drvinfo->fw_version)); @@ -77,7 +77,7 @@ static void ipoib_get_drvinfo(struct net_device *netdev, static int ipoib_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs; coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames; @@ -88,7 +88,7 @@ static int ipoib_get_coalesce(struct net_device *dev, static int ipoib_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); int ret; /* @@ -155,7 +155,66 @@ static int ipoib_get_sset_count(struct net_device __always_unused *dev, return -EOPNOTSUPP; } +/* Return lane speed in unit of 1e6 bit/sec */ +static inline int ib_speed_enum_to_int(int speed) +{ + switch (speed) { + case IB_SPEED_SDR: + return SPEED_2500; + case IB_SPEED_DDR: + return SPEED_5000; + case IB_SPEED_QDR: + case IB_SPEED_FDR10: + return SPEED_10000; + case IB_SPEED_FDR: + return SPEED_14000; + case IB_SPEED_EDR: + return SPEED_25000; + } + + return SPEED_UNKNOWN; +} + +static int ipoib_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct ipoib_dev_priv *priv = netdev_priv(netdev); + struct ib_port_attr attr; + int ret, speed, width; + + if (!netif_carrier_ok(netdev)) { + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + return 0; + } + + ret = ib_query_port(priv->ca, priv->port, &attr); + if (ret < 0) + return -EINVAL; + + speed = ib_speed_enum_to_int(attr.active_speed); + width = ib_width_enum_to_int(attr.active_width); + + if (speed < 0 || width < 0) + return -EINVAL; + + /* Except the following are set, the other members of + * the struct ethtool_link_settings are initialized to + * zero in the function __ethtool_get_link_ksettings. + */ + cmd->base.speed = speed * width; + cmd->base.duplex = DUPLEX_FULL; + + cmd->base.phy_address = 0xFF; + + cmd->base.autoneg = AUTONEG_ENABLE; + cmd->base.port = PORT_OTHER; + + return 0; +} + static const struct ethtool_ops ipoib_ethtool_ops = { + .get_link_ksettings = ipoib_get_link_ksettings, .get_drvinfo = ipoib_get_drvinfo, .get_coalesce = ipoib_get_coalesce, .set_coalesce = ipoib_set_coalesce, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 6bd5740e2691..11f74cbe6660 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -210,16 +210,16 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr) seq_printf(file, "GID: %s\n" " complete: %6s\n", - gid_buf, path.pathrec.dlid ? "yes" : "no"); + gid_buf, sa_path_get_dlid(&path.pathrec) ? "yes" : "no"); - if (path.pathrec.dlid) { + if (sa_path_get_dlid(&path.pathrec)) { rate = ib_rate_to_mbps(path.pathrec.rate); seq_printf(file, " DLID: 0x%04x\n" " SL: %12d\n" " rate: %8d.%d Gb/sec\n", - be16_to_cpu(path.pathrec.dlid), + be32_to_cpu(sa_path_get_dlid(&path.pathrec)), path.pathrec.sl, rate / 1000, rate % 1000); } @@ -261,7 +261,7 @@ static const struct file_operations ipoib_path_fops = { void ipoib_create_debug_files(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); char name[IFNAMSIZ + sizeof "_path"]; snprintf(name, sizeof name, "%s_mcg", dev->name); @@ -279,10 +279,13 @@ void ipoib_create_debug_files(struct net_device *dev) void ipoib_delete_debug_files(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); + WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n"); + WARN_ONCE(!priv->path_dentry, "null path debug file\n"); debugfs_remove(priv->mcg_dentry); debugfs_remove(priv->path_dentry); + priv->mcg_dentry = priv->path_dentry = NULL; } int ipoib_register_debugfs(void) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 12c4f84a6639..0060b2f9f659 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -52,7 +52,7 @@ MODULE_PARM_DESC(data_debug_level, #endif struct ipoib_ah *ipoib_create_ah(struct net_device *dev, - struct ib_pd *pd, struct ib_ah_attr *attr) + struct ib_pd *pd, struct rdma_ah_attr *attr) { struct ipoib_ah *ah; struct ib_ah *vah; @@ -65,13 +65,13 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev, ah->last_send = 0; kref_init(&ah->ref); - vah = ib_create_ah(pd, attr); + vah = rdma_create_ah(pd, attr); if (IS_ERR(vah)) { kfree(ah); ah = (struct ipoib_ah *)vah; } else { ah->ah = vah; - ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah); + ipoib_dbg(ipoib_priv(dev), "Created ah %p\n", ah->ah); } return ah; @@ -80,7 +80,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev, void ipoib_free_ah(struct kref *kref) { struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref); - struct ipoib_dev_priv *priv = netdev_priv(ah->dev); + struct ipoib_dev_priv *priv = ipoib_priv(ah->dev); unsigned long flags; @@ -99,7 +99,7 @@ static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv, static int ipoib_ib_post_receive(struct net_device *dev, int id) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_recv_wr *bad_wr; int ret; @@ -121,7 +121,7 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id) static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct sk_buff *skb; int buf_size; u64 *mapping; @@ -153,7 +153,7 @@ error: static int ipoib_ib_post_receives(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); int i; for (i = 0; i < ipoib_recvq_size; ++i) { @@ -172,7 +172,7 @@ static int ipoib_ib_post_receives(struct net_device *dev) static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; struct sk_buff *skb; u64 mapping[IPOIB_UD_RX_SG]; @@ -381,7 +381,7 @@ free_res: static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); unsigned int wr_id = wc->wr_id; struct ipoib_tx_buf *tx_req; @@ -485,14 +485,14 @@ poll_more: void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) { struct net_device *dev = dev_ptr; - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); napi_schedule(&priv->napi); } static void drain_tx_cq(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); netif_tx_lock(dev); while (poll_tx(priv)) @@ -506,14 +506,14 @@ static void drain_tx_cq(struct net_device *dev) void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr) { - struct ipoib_dev_priv *priv = netdev_priv(dev_ptr); + struct ipoib_dev_priv *priv = ipoib_priv(dev_ptr); mod_timer(&priv->poll_timer, jiffies); } static inline int post_send(struct ipoib_dev_priv *priv, unsigned int wr_id, - struct ib_ah *address, u32 qpn, + struct ib_ah *address, u32 dqpn, struct ipoib_tx_buf *tx_req, void *head, int hlen) { @@ -523,7 +523,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, ipoib_build_sge(priv, tx_req); priv->tx_wr.wr.wr_id = wr_id; - priv->tx_wr.remote_qpn = qpn; + priv->tx_wr.remote_qpn = dqpn; priv->tx_wr.ah = address; if (head) { @@ -537,10 +537,10 @@ static inline int post_send(struct ipoib_dev_priv *priv, return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr); } -void ipoib_send(struct net_device *dev, struct sk_buff *skb, - struct ipoib_ah *address, u32 qpn) +int ipoib_send(struct net_device *dev, struct sk_buff *skb, + struct ib_ah *address, u32 dqpn) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_tx_buf *tx_req; int hlen, rc; void *phead; @@ -554,7 +554,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ++dev->stats.tx_dropped; ++dev->stats.tx_errors; dev_kfree_skb_any(skb); - return; + return -1; } } else { if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) { @@ -563,7 +563,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ++dev->stats.tx_dropped; ++dev->stats.tx_errors; ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu); - return; + return -1; } phead = NULL; hlen = 0; @@ -574,7 +574,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ++dev->stats.tx_dropped; ++dev->stats.tx_errors; dev_kfree_skb_any(skb); - return; + return -1; } /* Does skb_linearize return ok without reducing nr_frags? */ if (skb_shinfo(skb)->nr_frags > usable_sge) { @@ -582,12 +582,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ++dev->stats.tx_dropped; ++dev->stats.tx_errors; dev_kfree_skb_any(skb); - return; + return -1; } } - ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n", - skb->len, address, qpn); + ipoib_dbg_data(priv, + "sending packet, length=%d address=%p dqpn=0x%06x\n", + skb->len, address, dqpn); /* * We put the skb into the tx_ring _before_ we call post_send() @@ -601,7 +602,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) { ++dev->stats.tx_errors; dev_kfree_skb_any(skb); - return; + return -1; } if (skb->ip_summed == CHECKSUM_PARTIAL) @@ -620,7 +621,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, skb_dst_drop(skb); rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), - address->ah, qpn, tx_req, phead, hlen); + address, dqpn, tx_req, phead, hlen); if (unlikely(rc)) { ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; @@ -629,21 +630,24 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, dev_kfree_skb_any(skb); if (netif_queue_stopped(dev)) netif_wake_queue(dev); + rc = 0; } else { netif_trans_update(dev); - address->last_send = priv->tx_head; + rc = priv->tx_head; ++priv->tx_head; } if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) while (poll_tx(priv)) ; /* nothing */ + + return rc; } static void __ipoib_reap_ah(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_ah *ah, *tah; LIST_HEAD(remove_list); unsigned long flags; @@ -654,7 +658,7 @@ static void __ipoib_reap_ah(struct net_device *dev) list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) if ((int) priv->tx_tail - (int) ah->last_send >= 0) { list_del(&ah->list); - ib_destroy_ah(ah->ah); + rdma_destroy_ah(ah->ah); kfree(ah); } @@ -677,7 +681,7 @@ void ipoib_reap_ah(struct work_struct *work) static void ipoib_flush_ah(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); cancel_delayed_work(&priv->ah_reap_task); flush_workqueue(priv->wq); @@ -686,30 +690,124 @@ static void ipoib_flush_ah(struct net_device *dev) static void ipoib_stop_ah(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); set_bit(IPOIB_STOP_REAPER, &priv->flags); ipoib_flush_ah(dev); } -static void ipoib_ib_tx_timer_func(unsigned long ctx) +static int recvs_pending(struct net_device *dev) { - drain_tx_cq((struct net_device *)ctx); + struct ipoib_dev_priv *priv = ipoib_priv(dev); + int pending = 0; + int i; + + for (i = 0; i < ipoib_recvq_size; ++i) + if (priv->rx_ring[i].skb) + ++pending; + + return pending; } -int ipoib_ib_dev_open(struct net_device *dev) +int ipoib_ib_dev_stop_default(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); - int ret; + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct ib_qp_attr qp_attr; + unsigned long begin; + struct ipoib_tx_buf *tx_req; + int i; - ipoib_pkey_dev_check_presence(dev); + if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) + napi_disable(&priv->napi); - if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { - ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey, - (!(priv->pkey & 0x7fff) ? "Invalid" : "not found")); - return -1; + ipoib_cm_dev_stop(dev); + + /* + * Move our QP to the error state and then reinitialize in + * when all work requests have completed or have been flushed. + */ + qp_attr.qp_state = IB_QPS_ERR; + if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) + ipoib_warn(priv, "Failed to modify QP to ERROR state\n"); + + /* Wait for all sends and receives to complete */ + begin = jiffies; + + while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) { + if (time_after(jiffies, begin + 5 * HZ)) { + ipoib_warn(priv, + "timing out; %d sends %d receives not completed\n", + priv->tx_head - priv->tx_tail, + recvs_pending(dev)); + + /* + * assume the HW is wedged and just free up + * all our pending work requests. + */ + while ((int)priv->tx_tail - (int)priv->tx_head < 0) { + tx_req = &priv->tx_ring[priv->tx_tail & + (ipoib_sendq_size - 1)]; + ipoib_dma_unmap_tx(priv, tx_req); + dev_kfree_skb_any(tx_req->skb); + ++priv->tx_tail; + --priv->tx_outstanding; + } + + for (i = 0; i < ipoib_recvq_size; ++i) { + struct ipoib_rx_buf *rx_req; + + rx_req = &priv->rx_ring[i]; + if (!rx_req->skb) + continue; + ipoib_ud_dma_unmap_rx(priv, + priv->rx_ring[i].mapping); + dev_kfree_skb_any(rx_req->skb); + rx_req->skb = NULL; + } + + goto timeout; + } + + ipoib_drain_cq(dev); + + msleep(1); } + ipoib_dbg(priv, "All sends and receives done.\n"); + +timeout: + del_timer_sync(&priv->poll_timer); + qp_attr.qp_state = IB_QPS_RESET; + if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) + ipoib_warn(priv, "Failed to modify QP to RESET state\n"); + + ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP); + + return 0; +} + +int ipoib_ib_dev_stop(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + priv->rn_ops->ndo_stop(dev); + + clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); + ipoib_flush_ah(dev); + + return 0; +} + +void ipoib_ib_tx_timer_func(unsigned long ctx) +{ + drain_tx_cq((struct net_device *)ctx); +} + +int ipoib_ib_dev_open_default(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + int ret; + ret = ipoib_init_qp(dev); if (ret) { ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); @@ -719,33 +817,60 @@ int ipoib_ib_dev_open(struct net_device *dev) ret = ipoib_ib_post_receives(dev); if (ret) { ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); - goto dev_stop; + goto out; } ret = ipoib_cm_dev_open(dev); if (ret) { ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret); - goto dev_stop; + goto out; + } + + if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) + napi_enable(&priv->napi); + + return 0; +out: + return -1; +} + +int ipoib_ib_dev_open(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + ipoib_pkey_dev_check_presence(dev); + + if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { + ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey, + (!(priv->pkey & 0x7fff) ? "Invalid" : "not found")); + return -1; } clear_bit(IPOIB_STOP_REAPER, &priv->flags); queue_delayed_work(priv->wq, &priv->ah_reap_task, round_jiffies_relative(HZ)); - if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) - napi_enable(&priv->napi); + if (priv->rn_ops->ndo_open(dev)) { + pr_warn("%s: Failed to open dev\n", dev->name); + goto dev_stop; + } + + set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); return 0; + dev_stop: - if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) - napi_enable(&priv->napi); + set_bit(IPOIB_STOP_REAPER, &priv->flags); + cancel_delayed_work(&priv->ah_reap_task); + set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); + napi_enable(&priv->napi); ipoib_ib_dev_stop(dev); return -1; } void ipoib_pkey_dev_check_presence(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); if (!(priv->pkey & 0x7fff) || ib_find_pkey(priv->ca, priv->port, priv->pkey, @@ -757,7 +882,7 @@ void ipoib_pkey_dev_check_presence(struct net_device *dev) void ipoib_ib_dev_up(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); ipoib_pkey_dev_check_presence(dev); @@ -773,7 +898,7 @@ void ipoib_ib_dev_up(struct net_device *dev) void ipoib_ib_dev_down(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); ipoib_dbg(priv, "downing ib_dev\n"); @@ -786,22 +911,9 @@ void ipoib_ib_dev_down(struct net_device *dev) ipoib_flush_paths(dev); } -static int recvs_pending(struct net_device *dev) -{ - struct ipoib_dev_priv *priv = netdev_priv(dev); - int pending = 0; - int i; - - for (i = 0; i < ipoib_recvq_size; ++i) - if (priv->rx_ring[i].skb) - ++pending; - - return pending; -} - void ipoib_drain_cq(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); int i, n; /* @@ -838,107 +950,6 @@ void ipoib_drain_cq(struct net_device *dev) local_bh_enable(); } -void ipoib_ib_dev_stop(struct net_device *dev) -{ - struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_qp_attr qp_attr; - unsigned long begin; - struct ipoib_tx_buf *tx_req; - int i; - - if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) - napi_disable(&priv->napi); - - ipoib_cm_dev_stop(dev); - - /* - * Move our QP to the error state and then reinitialize in - * when all work requests have completed or have been flushed. - */ - qp_attr.qp_state = IB_QPS_ERR; - if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) - ipoib_warn(priv, "Failed to modify QP to ERROR state\n"); - - /* Wait for all sends and receives to complete */ - begin = jiffies; - - while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) { - if (time_after(jiffies, begin + 5 * HZ)) { - ipoib_warn(priv, "timing out; %d sends %d receives not completed\n", - priv->tx_head - priv->tx_tail, recvs_pending(dev)); - - /* - * assume the HW is wedged and just free up - * all our pending work requests. - */ - while ((int) priv->tx_tail - (int) priv->tx_head < 0) { - tx_req = &priv->tx_ring[priv->tx_tail & - (ipoib_sendq_size - 1)]; - ipoib_dma_unmap_tx(priv, tx_req); - dev_kfree_skb_any(tx_req->skb); - ++priv->tx_tail; - --priv->tx_outstanding; - } - - for (i = 0; i < ipoib_recvq_size; ++i) { - struct ipoib_rx_buf *rx_req; - - rx_req = &priv->rx_ring[i]; - if (!rx_req->skb) - continue; - ipoib_ud_dma_unmap_rx(priv, - priv->rx_ring[i].mapping); - dev_kfree_skb_any(rx_req->skb); - rx_req->skb = NULL; - } - - goto timeout; - } - - ipoib_drain_cq(dev); - - msleep(1); - } - - ipoib_dbg(priv, "All sends and receives done.\n"); - -timeout: - del_timer_sync(&priv->poll_timer); - qp_attr.qp_state = IB_QPS_RESET; - if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) - ipoib_warn(priv, "Failed to modify QP to RESET state\n"); - - ipoib_flush_ah(dev); - - ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP); -} - -int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) -{ - struct ipoib_dev_priv *priv = netdev_priv(dev); - - priv->ca = ca; - priv->port = port; - priv->qp = NULL; - - if (ipoib_transport_dev_init(dev, ca)) { - printk(KERN_WARNING "%s: ipoib_transport_dev_init failed\n", ca->name); - return -ENODEV; - } - - setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func, - (unsigned long) dev); - - if (dev->flags & IFF_UP) { - if (ipoib_ib_dev_open(dev)) { - ipoib_transport_dev_cleanup(dev); - return -ENODEV; - } - } - - return 0; -} - /* * Takes whatever value which is in pkey index 0 and updates priv->pkey * returns 0 if the pkey value was changed. @@ -967,6 +978,19 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv) */ priv->dev->broadcast[8] = priv->pkey >> 8; priv->dev->broadcast[9] = priv->pkey & 0xff; + + /* + * Update the broadcast address in the priv->broadcast object, + * in case it already exists, otherwise no one will do that. + */ + if (priv->broadcast) { + spin_lock_irq(&priv->lock); + memcpy(priv->broadcast->mcmember.mgid.raw, + priv->dev->broadcast + 4, + sizeof(union ib_gid)); + spin_unlock_irq(&priv->lock); + } + return 0; } @@ -1216,7 +1240,7 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work) void ipoib_ib_dev_cleanup(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); ipoib_dbg(priv, "cleaning up ib_dev\n"); /* @@ -1236,7 +1260,13 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) */ ipoib_stop_ah(dev); - ipoib_transport_dev_cleanup(dev); -} + clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + priv->rn_ops->ndo_uninit(dev); + + if (priv->pd) { + ib_dealloc_pd(priv->pd); + priv->pd = NULL; + } +} diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index d1d3fb7a6127..2869d1adb1de 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -108,9 +108,36 @@ static struct ib_client ipoib_client = { .get_net_dev_by_params = ipoib_get_net_dev_by_params, }; +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +static int ipoib_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netdev_notifier_info *ni = ptr; + struct net_device *dev = ni->dev; + + if (dev->netdev_ops->ndo_open != ipoib_open) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_REGISTER: + ipoib_create_debug_files(dev); + break; + case NETDEV_CHANGENAME: + ipoib_delete_debug_files(dev); + ipoib_create_debug_files(dev); + break; + case NETDEV_UNREGISTER: + ipoib_delete_debug_files(dev); + break; + } + + return NOTIFY_DONE; +} +#endif + int ipoib_open(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); ipoib_dbg(priv, "bringing up interface\n"); @@ -157,7 +184,7 @@ err_disable: static int ipoib_stop(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); ipoib_dbg(priv, "stopping interface\n"); @@ -195,7 +222,7 @@ static void ipoib_uninit(struct net_device *dev) static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO); @@ -205,7 +232,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu static int ipoib_change_mtu(struct net_device *dev, int new_mtu) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); /* dev->mtu > 2K ==> connected mode */ if (ipoib_cm_admin_enabled(dev)) { @@ -468,7 +495,7 @@ static struct net_device *ipoib_get_net_dev_by_params( int ipoib_set_mode(struct net_device *dev, const char *buf) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); if ((test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) && !strcmp(buf, "connected\n")) || @@ -505,7 +532,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) struct ipoib_path *__path_find(struct net_device *dev, void *gid) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct rb_node *n = priv->path_tree.rb_node; struct ipoib_path *path; int ret; @@ -529,7 +556,7 @@ struct ipoib_path *__path_find(struct net_device *dev, void *gid) static int __path_add(struct net_device *dev, struct ipoib_path *path) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct rb_node **n = &priv->path_tree.rb_node; struct rb_node *pn = NULL; struct ipoib_path *tpath; @@ -564,7 +591,7 @@ static void path_free(struct net_device *dev, struct ipoib_path *path) while ((skb = __skb_dequeue(&path->queue))) dev_kfree_skb_irq(skb); - ipoib_dbg(netdev_priv(dev), "path_free\n"); + ipoib_dbg(ipoib_priv(dev), "path_free\n"); /* remove all neigh connected to this path */ ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw); @@ -598,7 +625,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev) int ipoib_path_iter_next(struct ipoib_path_iter *iter) { - struct ipoib_dev_priv *priv = netdev_priv(iter->dev); + struct ipoib_dev_priv *priv = ipoib_priv(iter->dev); struct rb_node *n; struct ipoib_path *path; int ret = 1; @@ -635,92 +662,21 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, void ipoib_mark_paths_invalid(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_path *path, *tp; spin_lock_irq(&priv->lock); list_for_each_entry_safe(path, tp, &priv->path_list, list) { - ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 invalid\n", - be16_to_cpu(path->pathrec.dlid), - path->pathrec.dgid.raw); + ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n", + be32_to_cpu(sa_path_get_dlid(&path->pathrec)), + path->pathrec.dgid.raw); path->valid = 0; } spin_unlock_irq(&priv->lock); } -struct classport_info_context { - struct ipoib_dev_priv *priv; - struct completion done; - struct ib_sa_query *sa_query; -}; - -static void classport_info_query_cb(int status, struct ib_class_port_info *rec, - void *context) -{ - struct classport_info_context *cb_ctx = context; - struct ipoib_dev_priv *priv; - - WARN_ON(!context); - - priv = cb_ctx->priv; - - if (status || !rec) { - pr_debug("device: %s failed query classport_info status: %d\n", - priv->dev->name, status); - /* keeps the default, will try next mcast_restart */ - priv->sm_fullmember_sendonly_support = false; - goto out; - } - - if (ib_get_cpi_capmask2(rec) & - IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) { - pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n", - priv->dev->name); - priv->sm_fullmember_sendonly_support = true; - } else { - pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n", - priv->dev->name); - priv->sm_fullmember_sendonly_support = false; - } - -out: - complete(&cb_ctx->done); -} - -int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv) -{ - struct classport_info_context *callback_context; - int ret; - - callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL); - if (!callback_context) - return -ENOMEM; - - callback_context->priv = priv; - init_completion(&callback_context->done); - - ret = ib_sa_classport_info_rec_query(&ipoib_sa_client, - priv->ca, priv->port, 3000, - GFP_KERNEL, - classport_info_query_cb, - callback_context, - &callback_context->sa_query); - if (ret < 0) { - pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n", - priv->dev->name, ret); - kfree(callback_context); - return ret; - } - - /* waiting for the callback to finish before returnning */ - wait_for_completion(&callback_context->done); - kfree(callback_context); - - return ret; -} - static void push_pseudo_header(struct sk_buff *skb, const char *daddr) { struct ipoib_pseudo_header *phdr; @@ -731,7 +687,7 @@ static void push_pseudo_header(struct sk_buff *skb, const char *daddr) void ipoib_flush_paths(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_path *path, *tp; LIST_HEAD(remove_list); unsigned long flags; @@ -760,12 +716,12 @@ void ipoib_flush_paths(struct net_device *dev) } static void path_rec_completion(int status, - struct ib_sa_path_rec *pathrec, + struct sa_path_rec *pathrec, void *path_ptr) { struct ipoib_path *path = path_ptr; struct net_device *dev = path->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_ah *ah = NULL; struct ipoib_ah *old_ah = NULL; struct ipoib_neigh *neigh, *tn; @@ -775,7 +731,8 @@ static void path_rec_completion(int status, if (!status) ipoib_dbg(priv, "PathRec LID 0x%04x for GID %pI6\n", - be16_to_cpu(pathrec->dlid), pathrec->dgid.raw); + be32_to_cpu(sa_path_get_dlid(pathrec)), + pathrec->dgid.raw); else ipoib_dbg(priv, "PathRec status %d for GID %pI6\n", status, path->pathrec.dgid.raw); @@ -783,7 +740,7 @@ static void path_rec_completion(int status, skb_queue_head_init(&skqueue); if (!status) { - struct ib_ah_attr av; + struct rdma_ah_attr av; if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) ah = ipoib_create_ah(dev, priv->pd, &av); @@ -798,7 +755,8 @@ static void path_rec_completion(int status, path->ah = ah; ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", - ah, be16_to_cpu(pathrec->dlid), pathrec->sl); + ah, be32_to_cpu(sa_path_get_dlid(pathrec)), + pathrec->sl); while ((skb = __skb_dequeue(&path->queue))) __skb_queue_tail(&skqueue, skb); @@ -858,7 +816,7 @@ static void path_rec_completion(int status, static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_path *path; if (!priv->broadcast) @@ -874,6 +832,10 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) INIT_LIST_HEAD(&path->neigh_list); + if (rdma_cap_opa_ah(priv->ca, priv->port)) + path->pathrec.rec_type = SA_PATH_REC_TYPE_OPA; + else + path->pathrec.rec_type = SA_PATH_REC_TYPE_IB; memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid)); path->pathrec.sgid = priv->local_gid; path->pathrec.pkey = cpu_to_be16(priv->pkey); @@ -886,7 +848,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) static int path_rec_start(struct net_device *dev, struct ipoib_path *path) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); ipoib_dbg(priv, "Start path record lookup for %pI6\n", path->pathrec.dgid.raw); @@ -917,7 +879,8 @@ static int path_rec_start(struct net_device *dev, static void neigh_add_path(struct sk_buff *skb, u8 *daddr, struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct rdma_netdev *rn = netdev_priv(dev); struct ipoib_path *path; struct ipoib_neigh *neigh; unsigned long flags; @@ -964,7 +927,8 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, } } else { spin_unlock_irqrestore(&priv->lock, flags); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr)); + path->ah->last_send = rn->send(dev, skb, path->ah->ah, + IPOIB_QPN(daddr)); ipoib_neigh_put(neigh); return; } @@ -998,7 +962,8 @@ err_drop: static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, struct ipoib_pseudo_header *phdr) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct rdma_netdev *rn = netdev_priv(dev); struct ipoib_path *path; unsigned long flags; @@ -1038,11 +1003,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, } if (path->ah) { - ipoib_dbg(priv, "Send unicast ARP to %04x\n", - be16_to_cpu(path->pathrec.dlid)); + ipoib_dbg(priv, "Send unicast ARP to %08x\n", + be32_to_cpu(sa_path_get_dlid(&path->pathrec))); spin_unlock_irqrestore(&priv->lock, flags); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr)); + path->ah->last_send = rn->send(dev, skb, path->ah->ah, + IPOIB_QPN(phdr->hwaddr)); return; } else if ((path->query || !path_rec_start(dev, path)) && skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { @@ -1058,7 +1024,8 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct rdma_netdev *rn = netdev_priv(dev); struct ipoib_neigh *neigh; struct ipoib_pseudo_header *phdr; struct ipoib_header *header; @@ -1122,7 +1089,8 @@ send_using_neigh: goto unref; } } else if (neigh->ah) { - ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(phdr->hwaddr)); + neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah, + IPOIB_QPN(phdr->hwaddr)); goto unref; } @@ -1144,7 +1112,7 @@ unref: static void ipoib_timeout(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); ipoib_warn(priv, "transmit timeout: latency %d msecs\n", jiffies_to_msecs(jiffies - dev_trans_start(dev))); @@ -1178,7 +1146,7 @@ static int ipoib_hard_header(struct sk_buff *skb, static void ipoib_set_mcast_list(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set"); @@ -1190,7 +1158,7 @@ static void ipoib_set_mcast_list(struct net_device *dev) static int ipoib_get_iflink(const struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); /* parent interface */ if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) @@ -1218,7 +1186,7 @@ static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_neigh_table *ntbl = &priv->ntbl; struct ipoib_neigh_hash *htbl; struct ipoib_neigh *neigh = NULL; @@ -1347,7 +1315,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr, struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_neigh_table *ntbl = &priv->ntbl; struct ipoib_neigh_hash *htbl; struct ipoib_neigh *neigh; @@ -1404,7 +1372,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh) { /* neigh reference count was dropprd to zero */ struct net_device *dev = neigh->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct sk_buff *skb; if (neigh->ah) ipoib_put_ah(neigh->ah); @@ -1414,7 +1382,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh) } if (ipoib_cm_get(neigh)) ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); - ipoib_dbg(netdev_priv(dev), + ipoib_dbg(ipoib_priv(dev), "neigh free for %06x %pI6\n", IPOIB_QPN(neigh->daddr), neigh->daddr + 4); @@ -1436,7 +1404,7 @@ static void ipoib_neigh_reclaim(struct rcu_head *rp) void ipoib_neigh_free(struct ipoib_neigh *neigh) { struct net_device *dev = neigh->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_neigh_table *ntbl = &priv->ntbl; struct ipoib_neigh_hash *htbl; struct ipoib_neigh __rcu **np; @@ -1519,7 +1487,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_neigh_table *ntbl = &priv->ntbl; struct ipoib_neigh_hash *htbl; unsigned long flags; @@ -1605,7 +1573,7 @@ out_unlock: static void ipoib_neigh_hash_uninit(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); int stopped; ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); @@ -1622,10 +1590,26 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev) wait_for_completion(&priv->ntbl.deleted); } +void ipoib_dev_uninit_default(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); -int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) + ipoib_transport_dev_cleanup(dev); + + ipoib_cm_dev_cleanup(dev); + + kfree(priv->rx_ring); + vfree(priv->tx_ring); + + priv->rx_ring = NULL; + priv->tx_ring = NULL; +} + +static int ipoib_dev_init_default(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT); /* Allocate RX/TX "rings" to hold queued skbs */ priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, @@ -1636,46 +1620,111 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", - ca->name, ipoib_sendq_size); + priv->ca->name, ipoib_sendq_size); goto out_rx_ring_cleanup; } /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ - if (ipoib_ib_dev_init(dev, ca, port)) + if (ipoib_transport_dev_init(dev, priv->ca)) { + pr_warn("%s: ipoib_transport_dev_init failed\n", + priv->ca->name); goto out_tx_ring_cleanup; + } + + /* after qp created set dev address */ + priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; + priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; + priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff; + + setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func, + (unsigned long)dev); + + return 0; + +out_tx_ring_cleanup: + vfree(priv->tx_ring); + +out_rx_ring_cleanup: + kfree(priv->rx_ring); + +out: + return -ENOMEM; +} + +int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + int ret = -ENOMEM; + + priv->ca = ca; + priv->port = port; + priv->qp = NULL; /* - * Must be after ipoib_ib_dev_init so we can allocate a per - * device wq there and use it here + * the various IPoIB tasks assume they will never race against + * themselves, so always use a single thread workqueue */ - if (ipoib_neigh_hash_init(priv) < 0) + priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM); + if (!priv->wq) { + pr_warn("%s: failed to allocate device WQ\n", dev->name); + goto out; + } + + /* create pd, which used both for control and datapath*/ + priv->pd = ib_alloc_pd(priv->ca, 0); + if (IS_ERR(priv->pd)) { + pr_warn("%s: failed to allocate PD\n", ca->name); + goto clean_wq; + } + + ret = priv->rn_ops->ndo_init(dev); + if (ret) { + pr_warn("%s failed to init HW resource\n", dev->name); + goto out_free_pd; + } + + if (ipoib_neigh_hash_init(priv) < 0) { + pr_warn("%s failed to init neigh hash\n", dev->name); goto out_dev_uninit; + } + + if (dev->flags & IFF_UP) { + if (ipoib_ib_dev_open(dev)) { + pr_warn("%s failed to open device\n", dev->name); + ret = -ENODEV; + goto out_dev_uninit; + } + } return 0; out_dev_uninit: ipoib_ib_dev_cleanup(dev); -out_tx_ring_cleanup: - vfree(priv->tx_ring); +out_free_pd: + if (priv->pd) { + ib_dealloc_pd(priv->pd); + priv->pd = NULL; + } -out_rx_ring_cleanup: - kfree(priv->rx_ring); +clean_wq: + if (priv->wq) { + destroy_workqueue(priv->wq); + priv->wq = NULL; + } out: - return -ENOMEM; + return ret; } void ipoib_dev_cleanup(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv; + struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv; LIST_HEAD(head); ASSERT_RTNL(); - ipoib_delete_debug_files(dev); - /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { /* Stop GC on child */ @@ -1685,24 +1734,21 @@ void ipoib_dev_cleanup(struct net_device *dev) } unregister_netdevice_many(&head); - /* - * Must be before ipoib_ib_dev_cleanup or we delete an in use - * work queue - */ ipoib_neigh_hash_uninit(dev); ipoib_ib_dev_cleanup(dev); - kfree(priv->rx_ring); - vfree(priv->tx_ring); - - priv->rx_ring = NULL; - priv->tx_ring = NULL; + /* no more works over the priv->wq */ + if (priv->wq) { + flush_workqueue(priv->wq); + destroy_workqueue(priv->wq); + priv->wq = NULL; + } } static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); return ib_set_vf_link_state(priv->ca, vf, priv->port, link_state); } @@ -1710,7 +1756,7 @@ static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_stat static int ipoib_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivf) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); int err; err = ib_get_vf_config(priv->ca, vf, priv->port, ivf); @@ -1724,7 +1770,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf, static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); if (type != IFLA_VF_IB_NODE_GUID && type != IFLA_VF_IB_PORT_GUID) return -EINVAL; @@ -1735,7 +1781,7 @@ static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type) static int ipoib_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); return ib_get_vf_stats(priv->ca, vf, priv->port, vf_stats); } @@ -1773,21 +1819,12 @@ static const struct net_device_ops ipoib_netdev_ops_vf = { .ndo_get_iflink = ipoib_get_iflink, }; -void ipoib_setup(struct net_device *dev) +void ipoib_setup_common(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); - - if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION) - dev->netdev_ops = &ipoib_netdev_ops_vf; - else - dev->netdev_ops = &ipoib_netdev_ops_pf; - dev->header_ops = &ipoib_header_ops; ipoib_set_ethtool_ops(dev); - netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT); - dev->watchdog_timeo = HZ; dev->flags |= IFF_BROADCAST | IFF_MULTICAST; @@ -1801,11 +1838,14 @@ void ipoib_setup(struct net_device *dev) netif_keep_dst(dev); memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); +} - priv->dev = dev; +static void ipoib_build_priv(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + priv->dev = dev; spin_lock_init(&priv->lock); - init_rwsem(&priv->vlan_rwsem); INIT_LIST_HEAD(&priv->path_list); @@ -1823,22 +1863,99 @@ void ipoib_setup(struct net_device *dev) INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh); } -struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) +static const struct net_device_ops ipoib_netdev_default_pf = { + .ndo_init = ipoib_dev_init_default, + .ndo_uninit = ipoib_dev_uninit_default, + .ndo_open = ipoib_ib_dev_open_default, + .ndo_stop = ipoib_ib_dev_stop_default, +}; + +static struct net_device +*ipoib_create_netdev_default(struct ib_device *hca, + const char *name, + unsigned char name_assign_type, + void (*setup)(struct net_device *)) { struct net_device *dev; + struct rdma_netdev *rn; - dev = alloc_netdev((int)sizeof(struct ipoib_dev_priv), name, - NET_NAME_UNKNOWN, ipoib_setup); + dev = alloc_netdev((int)sizeof(struct rdma_netdev), + name, + name_assign_type, setup); if (!dev) return NULL; - return netdev_priv(dev); + rn = netdev_priv(dev); + + rn->send = ipoib_send; + rn->attach_mcast = ipoib_mcast_attach; + rn->detach_mcast = ipoib_mcast_detach; + rn->hca = hca; + + dev->netdev_ops = &ipoib_netdev_default_pf; + + return dev; +} + +static struct net_device *ipoib_get_netdev(struct ib_device *hca, u8 port, + const char *name) +{ + struct net_device *dev; + + if (hca->alloc_rdma_netdev) { + dev = hca->alloc_rdma_netdev(hca, port, + RDMA_NETDEV_IPOIB, name, + NET_NAME_UNKNOWN, + ipoib_setup_common); + if (IS_ERR_OR_NULL(dev) && PTR_ERR(dev) != -EOPNOTSUPP) + return NULL; + } + + if (!hca->alloc_rdma_netdev || PTR_ERR(dev) == -EOPNOTSUPP) + dev = ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN, + ipoib_setup_common); + + return dev; +} + +struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, + const char *name) +{ + struct net_device *dev; + struct ipoib_dev_priv *priv; + struct rdma_netdev *rn; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + dev = ipoib_get_netdev(hca, port, name); + if (!dev) + goto free_priv; + + priv->rn_ops = dev->netdev_ops; + + /* fixme : should be after the query_cap */ + if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION) + dev->netdev_ops = &ipoib_netdev_ops_vf; + else + dev->netdev_ops = &ipoib_netdev_ops_pf; + + rn = netdev_priv(dev); + rn->clnt_priv = priv; + ipoib_build_priv(dev); + + return priv; +free_priv: + kfree(priv); + return NULL; } static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, char *buf) { - struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev)); + struct net_device *ndev = to_net_dev(dev); + struct ipoib_dev_priv *priv = ipoib_priv(ndev); return sprintf(buf, "0x%04x\n", priv->pkey); } @@ -1847,14 +1964,15 @@ static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); static ssize_t show_umcast(struct device *dev, struct device_attribute *attr, char *buf) { - struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev)); + struct net_device *ndev = to_net_dev(dev); + struct ipoib_dev_priv *priv = ipoib_priv(ndev); return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); } void ipoib_set_umcast(struct net_device *ndev, int umcast_val) { - struct ipoib_dev_priv *priv = netdev_priv(ndev); + struct ipoib_dev_priv *priv = ipoib_priv(ndev); if (umcast_val > 0) { set_bit(IPOIB_FLAG_UMCAST, &priv->flags); @@ -1927,7 +2045,7 @@ static int ipoib_check_lladdr(struct net_device *dev, static int ipoib_set_mac(struct net_device *dev, void *addr) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct sockaddr_storage *ss = addr; int ret; @@ -2000,7 +2118,7 @@ void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) priv->hca_caps = hca->attrs.device_cap_flags; if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { - priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM; + priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM; if (priv->hca_caps & IB_DEVICE_UD_TSO) priv->dev->hw_features |= NETIF_F_TSO; @@ -2016,7 +2134,7 @@ static struct net_device *ipoib_add_port(const char *format, struct ib_port_attr attr; int result = -ENOMEM; - priv = ipoib_intf_alloc(format); + priv = ipoib_intf_alloc(hca, port, format); if (!priv) goto alloc_mem_failed; @@ -2090,8 +2208,6 @@ static struct net_device *ipoib_add_port(const char *format, goto register_failed; } - ipoib_create_debug_files(priv->dev); - if (ipoib_cm_add_mode_attr(priv->dev)) goto sysfs_failed; if (ipoib_add_pkey_attr(priv->dev)) @@ -2106,7 +2222,6 @@ static struct net_device *ipoib_add_port(const char *format, return priv->dev; sysfs_failed: - ipoib_delete_debug_files(priv->dev); unregister_netdev(priv->dev); register_failed: @@ -2146,7 +2261,7 @@ static void ipoib_add_one(struct ib_device *device) continue; dev = ipoib_add_port("ib%d", device, p); if (!IS_ERR(dev)) { - priv = netdev_priv(dev); + priv = ipoib_priv(dev); list_add_tail(&priv->list, dev_list); count++; } @@ -2186,11 +2301,18 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) unregister_netdev(priv->dev); free_netdev(priv->dev); + kfree(priv); } kfree(dev_list); } +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +static struct notifier_block ipoib_netdev_notifier = { + .notifier_call = ipoib_netdev_event, +}; +#endif + static int __init ipoib_init_module(void) { int ret; @@ -2243,6 +2365,9 @@ static int __init ipoib_init_module(void) if (ret) goto err_client; +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG + register_netdevice_notifier(&ipoib_netdev_notifier); +#endif return 0; err_client: @@ -2260,6 +2385,9 @@ err_fs: static void __exit ipoib_cleanup_module(void) { +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG + unregister_netdevice_notifier(&ipoib_netdev_notifier); +#endif ipoib_netlink_fini(); ib_unregister_client(&ipoib_client); ib_sa_unregister_client(&ipoib_sa_client); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 69e146cdc306..057f58e6afca 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -114,7 +114,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) struct net_device *dev = mcast->dev; int tx_dropped = 0; - ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", + ipoib_dbg_mcast(ipoib_priv(dev), "deleting multicast group %pI6\n", mcast->mcmember.mgid.raw); /* remove all neigh connected to this mcast */ @@ -158,7 +158,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct rb_node *n = priv->multicast_tree.rb_node; while (n) { @@ -182,7 +182,7 @@ static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; while (*n) { @@ -212,8 +212,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, struct ib_sa_mcmember_rec *mcmember) { struct net_device *dev = mcast->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct rdma_netdev *rn = netdev_priv(dev); struct ipoib_ah *ah; + struct rdma_ah_attr av; int ret; int set_qkey = 0; @@ -260,8 +262,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, return 0; } - ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), - &mcast->mcmember.mgid, set_qkey); + ret = rn->attach_mcast(dev, priv->ca, &mcast->mcmember.mgid, + be16_to_cpu(mcast->mcmember.mlid), + set_qkey, priv->qkey); if (ret < 0) { ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n", mcast->mcmember.mgid.raw); @@ -271,40 +274,34 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, } } - { - struct ib_ah_attr av = { - .dlid = be16_to_cpu(mcast->mcmember.mlid), - .port_num = priv->port, - .sl = mcast->mcmember.sl, - .ah_flags = IB_AH_GRH, - .static_rate = mcast->mcmember.rate, - .grh = { - .flow_label = be32_to_cpu(mcast->mcmember.flow_label), - .hop_limit = mcast->mcmember.hop_limit, - .sgid_index = 0, - .traffic_class = mcast->mcmember.traffic_class - } - }; - av.grh.dgid = mcast->mcmember.mgid; - - ah = ipoib_create_ah(dev, priv->pd, &av); - if (IS_ERR(ah)) { - ipoib_warn(priv, "ib_address_create failed %ld\n", - -PTR_ERR(ah)); - /* use original error */ - return PTR_ERR(ah); - } else { - spin_lock_irq(&priv->lock); - mcast->ah = ah; - spin_unlock_irq(&priv->lock); - - ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n", - mcast->mcmember.mgid.raw, - mcast->ah->ah, - be16_to_cpu(mcast->mcmember.mlid), - mcast->mcmember.sl); - } + memset(&av, 0, sizeof(av)); + av.type = rdma_ah_find_type(priv->ca, priv->port); + rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)), + rdma_ah_set_port_num(&av, priv->port); + rdma_ah_set_sl(&av, mcast->mcmember.sl); + rdma_ah_set_static_rate(&av, mcast->mcmember.rate); + + rdma_ah_set_grh(&av, &mcast->mcmember.mgid, + be32_to_cpu(mcast->mcmember.flow_label), + 0, mcast->mcmember.hop_limit, + mcast->mcmember.traffic_class); + + ah = ipoib_create_ah(dev, priv->pd, &av); + if (IS_ERR(ah)) { + ipoib_warn(priv, "ib_address_create failed %ld\n", + -PTR_ERR(ah)); + /* use original error */ + return PTR_ERR(ah); } + spin_lock_irq(&priv->lock); + mcast->ah = ah; + spin_unlock_irq(&priv->lock); + + ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n", + mcast->mcmember.mgid.raw, + mcast->ah->ah, + be16_to_cpu(mcast->mcmember.mlid), + mcast->mcmember.sl); /* actually send any queued packets */ netif_tx_lock_bh(dev); @@ -331,7 +328,6 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, carrier_on_task); struct ib_port_attr attr; - int ret; if (ib_query_port(priv->ca, priv->port, &attr) || attr.state != IB_PORT_ACTIVE) { @@ -344,11 +340,9 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work) * because the broadcast group must always be joined first and is always * re-joined if the SM changes substantially. */ - ret = ipoib_check_sm_sendonly_fullmember_support(priv); - if (ret < 0) - pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n", - priv->dev->name, ret); - + priv->sm_fullmember_sendonly_support = + ib_sa_sendonly_fullmem_support(&ipoib_sa_client, + priv->ca, priv->port); /* * Take rtnl_lock to avoid racing with ipoib_stop() and * turning the carrier back on while a device is being @@ -375,7 +369,7 @@ static int ipoib_mcast_join_complete(int status, { struct ipoib_mcast *mcast = multicast->context; struct net_device *dev = mcast->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n", test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? @@ -477,7 +471,7 @@ out_locked: */ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_sa_multicast *multicast; struct ib_sa_mcmember_rec rec = { .join_state = 1 @@ -489,6 +483,9 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) !test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) return -EINVAL; + init_completion(&mcast->done); + set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw); rec.mgid = mcast->mcmember.mgid; @@ -647,8 +644,6 @@ void ipoib_mcast_join_task(struct work_struct *work) if (mcast->backoff == 1 || time_after_eq(jiffies, mcast->delay_until)) { /* Found the next unjoined group */ - init_completion(&mcast->done); - set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); if (ipoib_mcast_join(dev, mcast)) { spin_unlock_irq(&priv->lock); return; @@ -668,17 +663,15 @@ out: queue_delayed_work(priv->wq, &priv->mcast_task, delay_until - jiffies); } - if (mcast) { - init_completion(&mcast->done); - set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + if (mcast) ipoib_mcast_join(dev, mcast); - } + spin_unlock_irq(&priv->lock); } void ipoib_mcast_start_thread(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); unsigned long flags; ipoib_dbg_mcast(priv, "starting multicast thread\n"); @@ -690,7 +683,7 @@ void ipoib_mcast_start_thread(struct net_device *dev) int ipoib_mcast_stop_thread(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); unsigned long flags; ipoib_dbg_mcast(priv, "stopping multicast thread\n"); @@ -706,7 +699,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev) static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct rdma_netdev *rn = netdev_priv(dev); int ret = 0; if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) @@ -720,8 +714,8 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) mcast->mcmember.mgid.raw); /* Remove ourselves from the multicast group */ - ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid, - be16_to_cpu(mcast->mcmember.mlid)); + ret = rn->detach_mcast(dev, priv->ca, &mcast->mcmember.mgid, + be16_to_cpu(mcast->mcmember.mlid)); if (ret) ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); } else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) @@ -762,7 +756,8 @@ void ipoib_mcast_remove_list(struct list_head *remove_list) void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct rdma_netdev *rn = netdev_priv(dev); struct ipoib_mcast *mcast; unsigned long flags; void *mgid = daddr + 4; @@ -825,7 +820,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) } } spin_unlock_irqrestore(&priv->lock, flags); - ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); + mcast->ah->last_send = rn->send(dev, skb, mcast->ah->ah, + IB_MULTICAST_QPN); if (neigh) ipoib_neigh_put(neigh); return; @@ -837,7 +833,7 @@ unlock: void ipoib_mcast_dev_flush(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); LIST_HEAD(remove_list); struct ipoib_mcast *mcast, *tmcast; unsigned long flags; @@ -1029,7 +1025,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) { - struct ipoib_dev_priv *priv = netdev_priv(iter->dev); + struct ipoib_dev_priv *priv = ipoib_priv(iter->dev); struct rb_node *n; struct ipoib_mcast *mcast; int ret = 1; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index cdc7df4fdb8a..28884781311b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -44,7 +44,7 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = { static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); u16 val; if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey)) @@ -107,7 +107,7 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, if (!pdev || pdev->type != ARPHRD_INFINIBAND) return -ENODEV; - ppriv = netdev_priv(pdev); + ppriv = ipoib_priv(pdev); if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) { ipoib_warn(ppriv, "child creation disallowed for child devices\n"); @@ -129,7 +129,8 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, */ child_pkey |= 0x8000; - err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD); + err = __ipoib_vlan_add(ppriv, ipoib_priv(dev), + child_pkey, IPOIB_RTNL_CHILD); if (!err && data) err = ipoib_changelink(dev, tb, data); @@ -140,8 +141,8 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head { struct ipoib_dev_priv *priv, *ppriv; - priv = netdev_priv(dev); - ppriv = netdev_priv(priv->parent); + priv = ipoib_priv(dev); + ppriv = ipoib_priv(priv->parent); down_write(&ppriv->vlan_rwsem); unregister_netdevice_queue(dev, head); @@ -161,7 +162,7 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = { .maxtype = IFLA_IPOIB_MAX, .policy = ipoib_policy, .priv_size = sizeof(struct ipoib_dev_priv), - .setup = ipoib_setup, + .setup = ipoib_setup_common, .newlink = ipoib_new_child_link, .changelink = ipoib_changelink, .dellink = ipoib_unregister_child_dev, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 189dcd1709d2..bb64baf25309 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -35,9 +35,10 @@ #include "ipoib.h" -int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey) +int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca, + union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_attr *qp_attr = NULL; int ret; u16 pkey_index; @@ -56,7 +57,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int goto out; /* set correct QKey for QP */ - qp_attr->qkey = priv->qkey; + qp_attr->qkey = qkey; ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); if (ret) { ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); @@ -74,9 +75,20 @@ out: return ret; } +int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca, + union ib_gid *mgid, u16 mlid) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + int ret; + + ret = ib_detach_mcast(priv->qp, mgid, mlid); + + return ret; +} + int ipoib_init_qp(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); int ret; struct ib_qp_attr qp_attr; int attr_mask; @@ -130,7 +142,7 @@ out_fail: int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_init_attr init_attr = { .cap = { .max_send_wr = ipoib_sendq_size, @@ -147,22 +159,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) int ret, size; int i; - priv->pd = ib_alloc_pd(priv->ca, 0); - if (IS_ERR(priv->pd)) { - printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); - return -ENODEV; - } - - /* - * the various IPoIB tasks assume they will never race against - * themselves, so always use a single thread workqueue - */ - priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM); - if (!priv->wq) { - printk(KERN_WARNING "ipoib: failed to allocate device WQ\n"); - goto out_free_pd; - } - size = ipoib_recvq_size + 1; ret = ipoib_cm_dev_init(dev); if (!ret) { @@ -173,7 +169,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) size += ipoib_recvq_size * ipoib_max_conn_qp; } else if (ret != -ENOSYS) - goto out_free_wq; + return -ENODEV; cq_attr.cqe = size; priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, @@ -212,10 +208,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) goto out_free_send_cq; } - priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; - priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; - priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff; - for (i = 0; i < MAX_SKB_FRAGS + 1; ++i) priv->tx_sge[i].lkey = priv->pd->local_dma_lkey; @@ -247,26 +239,18 @@ out_free_recv_cq: out_cm_dev_cleanup: ipoib_cm_dev_cleanup(dev); -out_free_wq: - destroy_workqueue(priv->wq); - priv->wq = NULL; - -out_free_pd: - ib_dealloc_pd(priv->pd); - return -ENODEV; } void ipoib_transport_dev_cleanup(struct net_device *dev) { - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); if (priv->qp) { if (ib_destroy_qp(priv->qp)) ipoib_warn(priv, "ib_qp_destroy failed\n"); priv->qp = NULL; - clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); } if (ib_destroy_cq(priv->send_cq)) @@ -274,16 +258,6 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) if (ib_destroy_cq(priv->recv_cq)) ipoib_warn(priv, "ib_cq_destroy (recv) failed\n"); - - ipoib_cm_dev_cleanup(dev); - - if (priv->wq) { - flush_workqueue(priv->wq); - destroy_workqueue(priv->wq); - priv->wq = NULL; - } - - ib_dealloc_pd(priv->pd); } void ipoib_event(struct ib_event_handler *handler, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 3e10e3dac2e7..36dc4fcaa3cd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -44,7 +44,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, char *buf) { struct net_device *dev = to_net_dev(d); - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = ipoib_priv(dev); return sprintf(buf, "%s\n", priv->parent->name); } @@ -86,8 +86,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, goto register_failed; } - ipoib_create_debug_files(priv->dev); - /* RTNL childs don't need proprietary sysfs entries */ if (type == IPOIB_LEGACY_CHILD) { if (ipoib_cm_add_mode_attr(priv->dev)) @@ -108,7 +106,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, sysfs_failed: result = -ENOMEM; - ipoib_delete_debug_files(priv->dev); unregister_netdevice(priv->dev); register_failed: @@ -128,14 +125,15 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) if (!capable(CAP_NET_ADMIN)) return -EPERM; - ppriv = netdev_priv(pdev); + ppriv = ipoib_priv(pdev); if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) return -EPERM; snprintf(intf_name, sizeof intf_name, "%s.%04x", ppriv->dev->name, pkey); - priv = ipoib_intf_alloc(intf_name); + + priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); if (!priv) return -ENOMEM; @@ -183,7 +181,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) if (!capable(CAP_NET_ADMIN)) return -EPERM; - ppriv = netdev_priv(pdev); + ppriv = ipoib_priv(pdev); if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) return -EPERM; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 9d0b22ad58c1..c1ae4aeae2f9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -430,6 +430,7 @@ struct iser_fr_desc { struct list_head list; struct iser_reg_resources rsc; struct iser_pi_context *pi_ctx; + struct list_head all_list; }; /** @@ -443,6 +444,7 @@ struct iser_fr_pool { struct list_head list; spinlock_t lock; int size; + struct list_head all_list; }; /** diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 81ae2e30dd12..12ed62ce9ff7 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -612,7 +612,7 @@ iser_check_remote_inv(struct iser_conn *iser_conn, iser_conn, rkey); if (unlikely(!iser_conn->snd_w_inv)) { - iser_err("conn %p: unexepected remote invalidation, " + iser_err("conn %p: unexpected remote invalidation, " "terminating connection\n", iser_conn); return -EPROTO; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 30b622f2ab73..c538a38c91ce 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -362,6 +362,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, int i, ret; INIT_LIST_HEAD(&fr_pool->list); + INIT_LIST_HEAD(&fr_pool->all_list); spin_lock_init(&fr_pool->lock); fr_pool->size = 0; for (i = 0; i < cmds_max; i++) { @@ -373,6 +374,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, } list_add_tail(&desc->list, &fr_pool->list); + list_add_tail(&desc->all_list, &fr_pool->all_list); fr_pool->size++; } @@ -392,13 +394,13 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) struct iser_fr_desc *desc, *tmp; int i = 0; - if (list_empty(&fr_pool->list)) + if (list_empty(&fr_pool->all_list)) return; iser_info("freeing conn %p fr pool\n", ib_conn); - list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) { - list_del(&desc->list); + list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) { + list_del(&desc->all_list); iser_free_reg_res(&desc->rsc); if (desc->pi_ctx) iser_free_pi_ctx(desc->pi_ctx); diff --git a/drivers/infiniband/ulp/opa_vnic/Kconfig b/drivers/infiniband/ulp/opa_vnic/Kconfig new file mode 100644 index 000000000000..48132ab5e6b9 --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/Kconfig @@ -0,0 +1,8 @@ +config INFINIBAND_OPA_VNIC + tristate "Intel OPA VNIC support" + depends on X86_64 && INFINIBAND + ---help--- + This is Omni-Path (OPA) Virtual Network Interface Controller (VNIC) + driver for Ethernet over Omni-Path feature. It implements the HW + independent VNIC functionality. It interfaces with Linux stack for + data path and IB MAD for the control path. diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile b/drivers/infiniband/ulp/opa_vnic/Makefile new file mode 100644 index 000000000000..8061b287cfe4 --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/Makefile @@ -0,0 +1,7 @@ +# Makefile - Intel Omni-Path Virtual Network Controller driver +# Copyright(c) 2017, Intel Corporation. +# +obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o + +opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \ + opa_vnic_vema.o opa_vnic_vema_iface.o diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c new file mode 100644 index 000000000000..2e8fee982436 --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c @@ -0,0 +1,475 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains OPA VNIC encapsulation/decapsulation function. + */ + +#include <linux/if_ether.h> +#include <linux/if_vlan.h> + +#include "opa_vnic_internal.h" + +/* OPA 16B Header fields */ +#define OPA_16B_LID_MASK 0xFFFFFull +#define OPA_16B_SLID_HIGH_SHFT 8 +#define OPA_16B_SLID_MASK 0xF00ull +#define OPA_16B_DLID_MASK 0xF000ull +#define OPA_16B_DLID_HIGH_SHFT 12 +#define OPA_16B_LEN_SHFT 20 +#define OPA_16B_SC_SHFT 20 +#define OPA_16B_RC_SHFT 25 +#define OPA_16B_PKEY_SHFT 16 + +#define OPA_VNIC_L4_HDR_SHFT 16 + +/* L2+L4 hdr len is 20 bytes (5 quad words) */ +#define OPA_VNIC_HDR_QW_LEN 5 + +static inline void opa_vnic_make_header(u8 *hdr, u32 slid, u32 dlid, u16 len, + u16 pkey, u16 entropy, u8 sc, u8 rc, + u8 l4_type, u16 l4_hdr) +{ + /* h[1]: LT=1, 16B L2=10 */ + u32 h[OPA_VNIC_HDR_QW_LEN] = {0, 0xc0000000, 0, 0, 0}; + + h[2] = l4_type; + h[3] = entropy; + h[4] = l4_hdr << OPA_VNIC_L4_HDR_SHFT; + + /* Extract and set 4 upper bits and 20 lower bits of the lids */ + h[0] |= (slid & OPA_16B_LID_MASK); + h[2] |= ((slid >> (20 - OPA_16B_SLID_HIGH_SHFT)) & OPA_16B_SLID_MASK); + + h[1] |= (dlid & OPA_16B_LID_MASK); + h[2] |= ((dlid >> (20 - OPA_16B_DLID_HIGH_SHFT)) & OPA_16B_DLID_MASK); + + h[0] |= (len << OPA_16B_LEN_SHFT); + h[1] |= (rc << OPA_16B_RC_SHFT); + h[1] |= (sc << OPA_16B_SC_SHFT); + h[2] |= ((u32)pkey << OPA_16B_PKEY_SHFT); + + memcpy(hdr, h, OPA_VNIC_HDR_LEN); +} + +/* + * Using a simple hash table for mac table implementation with the last octet + * of mac address as a key. + */ +static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl) +{ + struct opa_vnic_mac_tbl_node *node; + struct hlist_node *tmp; + int bkt; + + if (!mactbl) + return; + + vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) { + hash_del(&node->hlist); + kfree(node); + } + kfree(mactbl); +} + +static struct hlist_head *opa_vnic_alloc_mac_tbl(void) +{ + u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE; + struct hlist_head *mactbl; + + mactbl = kzalloc(size, GFP_KERNEL); + if (!mactbl) + return ERR_PTR(-ENOMEM); + + vnic_hash_init(mactbl); + return mactbl; +} + +/* opa_vnic_release_mac_tbl - empty and free the mac table */ +void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter) +{ + struct hlist_head *mactbl; + + mutex_lock(&adapter->mactbl_lock); + mactbl = rcu_access_pointer(adapter->mactbl); + rcu_assign_pointer(adapter->mactbl, NULL); + synchronize_rcu(); + opa_vnic_free_mac_tbl(mactbl); + mutex_unlock(&adapter->mactbl_lock); +} + +/* + * opa_vnic_query_mac_tbl - query the mac table for a section + * + * This function implements query of specific function of the mac table. + * The function also expects the requested range to be valid. + */ +void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter, + struct opa_veswport_mactable *tbl) +{ + struct opa_vnic_mac_tbl_node *node; + struct hlist_head *mactbl; + int bkt; + u16 loffset, lnum_entries; + + rcu_read_lock(); + mactbl = rcu_dereference(adapter->mactbl); + if (!mactbl) + goto get_mac_done; + + loffset = be16_to_cpu(tbl->offset); + lnum_entries = be16_to_cpu(tbl->num_entries); + + vnic_hash_for_each(mactbl, bkt, node, hlist) { + struct __opa_vnic_mactable_entry *nentry = &node->entry; + struct opa_veswport_mactable_entry *entry; + + if ((node->index < loffset) || + (node->index >= (loffset + lnum_entries))) + continue; + + /* populate entry in the tbl corresponding to the index */ + entry = &tbl->tbl_entries[node->index - loffset]; + memcpy(entry->mac_addr, nentry->mac_addr, + ARRAY_SIZE(entry->mac_addr)); + memcpy(entry->mac_addr_mask, nentry->mac_addr_mask, + ARRAY_SIZE(entry->mac_addr_mask)); + entry->dlid_sd = cpu_to_be32(nentry->dlid_sd); + } + tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest); +get_mac_done: + rcu_read_unlock(); +} + +/* + * opa_vnic_update_mac_tbl - update mac table section + * + * This function updates the specified section of the mac table. + * The procedure includes following steps. + * - Allocate a new mac (hash) table. + * - Add the specified entries to the new table. + * (except the ones that are requested to be deleted). + * - Add all the other entries from the old mac table. + * - If there is a failure, free the new table and return. + * - Switch to the new table. + * - Free the old table and return. + * + * The function also expects the requested range to be valid. + */ +int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter, + struct opa_veswport_mactable *tbl) +{ + struct opa_vnic_mac_tbl_node *node, *new_node; + struct hlist_head *new_mactbl, *old_mactbl; + int i, bkt, rc = 0; + u8 key; + u16 loffset, lnum_entries; + + mutex_lock(&adapter->mactbl_lock); + /* allocate new mac table */ + new_mactbl = opa_vnic_alloc_mac_tbl(); + if (IS_ERR(new_mactbl)) { + mutex_unlock(&adapter->mactbl_lock); + return PTR_ERR(new_mactbl); + } + + loffset = be16_to_cpu(tbl->offset); + lnum_entries = be16_to_cpu(tbl->num_entries); + + /* add updated entries to the new mac table */ + for (i = 0; i < lnum_entries; i++) { + struct __opa_vnic_mactable_entry *nentry; + struct opa_veswport_mactable_entry *entry = + &tbl->tbl_entries[i]; + u8 *mac_addr = entry->mac_addr; + u8 empty_mac[ETH_ALEN] = { 0 }; + + v_dbg("new mac entry %4d: %02x:%02x:%02x:%02x:%02x:%02x %x\n", + loffset + i, mac_addr[0], mac_addr[1], mac_addr[2], + mac_addr[3], mac_addr[4], mac_addr[5], + entry->dlid_sd); + + /* if the entry is being removed, do not add it */ + if (!memcmp(mac_addr, empty_mac, ARRAY_SIZE(empty_mac))) + continue; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) { + rc = -ENOMEM; + goto updt_done; + } + + node->index = loffset + i; + nentry = &node->entry; + memcpy(nentry->mac_addr, entry->mac_addr, + ARRAY_SIZE(nentry->mac_addr)); + memcpy(nentry->mac_addr_mask, entry->mac_addr_mask, + ARRAY_SIZE(nentry->mac_addr_mask)); + nentry->dlid_sd = be32_to_cpu(entry->dlid_sd); + key = node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX]; + vnic_hash_add(new_mactbl, &node->hlist, key); + } + + /* add other entries from current mac table to new mac table */ + old_mactbl = rcu_access_pointer(adapter->mactbl); + if (!old_mactbl) + goto switch_tbl; + + vnic_hash_for_each(old_mactbl, bkt, node, hlist) { + if ((node->index >= loffset) && + (node->index < (loffset + lnum_entries))) + continue; + + new_node = kzalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) { + rc = -ENOMEM; + goto updt_done; + } + + new_node->index = node->index; + memcpy(&new_node->entry, &node->entry, sizeof(node->entry)); + key = new_node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX]; + vnic_hash_add(new_mactbl, &new_node->hlist, key); + } + +switch_tbl: + /* switch to new table */ + rcu_assign_pointer(adapter->mactbl, new_mactbl); + synchronize_rcu(); + + adapter->info.vport.mac_tbl_digest = be32_to_cpu(tbl->mac_tbl_digest); +updt_done: + /* upon failure, free the new table; otherwise, free the old table */ + if (rc) + opa_vnic_free_mac_tbl(new_mactbl); + else + opa_vnic_free_mac_tbl(old_mactbl); + + mutex_unlock(&adapter->mactbl_lock); + return rc; +} + +/* opa_vnic_chk_mac_tbl - check mac table for dlid */ +static uint32_t opa_vnic_chk_mac_tbl(struct opa_vnic_adapter *adapter, + struct ethhdr *mac_hdr) +{ + struct opa_vnic_mac_tbl_node *node; + struct hlist_head *mactbl; + u32 dlid = 0; + u8 key; + + rcu_read_lock(); + mactbl = rcu_dereference(adapter->mactbl); + if (unlikely(!mactbl)) + goto chk_done; + + key = mac_hdr->h_dest[OPA_VNIC_MAC_HASH_IDX]; + vnic_hash_for_each_possible(mactbl, node, hlist, key) { + struct __opa_vnic_mactable_entry *entry = &node->entry; + + /* if related to source mac, skip */ + if (unlikely(OPA_VNIC_DLID_SD_IS_SRC_MAC(entry->dlid_sd))) + continue; + + if (!memcmp(node->entry.mac_addr, mac_hdr->h_dest, + ARRAY_SIZE(node->entry.mac_addr))) { + /* mac address found */ + dlid = OPA_VNIC_DLID_SD_GET_DLID(node->entry.dlid_sd); + break; + } + } + +chk_done: + rcu_read_unlock(); + return dlid; +} + +/* opa_vnic_get_dlid - find and return the DLID */ +static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter, + struct sk_buff *skb, u8 def_port) +{ + struct __opa_veswport_info *info = &adapter->info; + struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); + u32 dlid; + + dlid = opa_vnic_chk_mac_tbl(adapter, mac_hdr); + if (dlid) + return dlid; + + if (is_multicast_ether_addr(mac_hdr->h_dest)) { + dlid = info->vesw.u_mcast_dlid; + } else { + if (is_local_ether_addr(mac_hdr->h_dest)) { + dlid = ((uint32_t)mac_hdr->h_dest[5] << 16) | + ((uint32_t)mac_hdr->h_dest[4] << 8) | + mac_hdr->h_dest[3]; + if (unlikely(!dlid)) + v_warn("Null dlid in MAC address\n"); + } else if (def_port != OPA_VNIC_INVALID_PORT) { + dlid = info->vesw.u_ucast_dlid[def_port]; + } + } + + return dlid; +} + +/* opa_vnic_get_sc - return the service class */ +static u8 opa_vnic_get_sc(struct __opa_veswport_info *info, + struct sk_buff *skb) +{ + struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); + u16 vlan_tci; + u8 sc; + + if (!__vlan_get_tag(skb, &vlan_tci)) { + u8 pcp = OPA_VNIC_VLAN_PCP(vlan_tci); + + if (is_multicast_ether_addr(mac_hdr->h_dest)) + sc = info->vport.pcp_to_sc_mc[pcp]; + else + sc = info->vport.pcp_to_sc_uc[pcp]; + } else { + if (is_multicast_ether_addr(mac_hdr->h_dest)) + sc = info->vport.non_vlan_sc_mc; + else + sc = info->vport.non_vlan_sc_uc; + } + + return sc; +} + +u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb) +{ + struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); + struct __opa_veswport_info *info = &adapter->info; + u8 vl; + + if (skb_vlan_tag_present(skb)) { + u8 pcp = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT; + + if (is_multicast_ether_addr(mac_hdr->h_dest)) + vl = info->vport.pcp_to_vl_mc[pcp]; + else + vl = info->vport.pcp_to_vl_uc[pcp]; + } else { + if (is_multicast_ether_addr(mac_hdr->h_dest)) + vl = info->vport.non_vlan_vl_mc; + else + vl = info->vport.non_vlan_vl_uc; + } + + return vl; +} + +/* opa_vnic_calc_entropy - calculate the packet entropy */ +u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb) +{ + u16 hash16; + + /* + * Get flow based 16-bit hash and then XOR the upper and lower bytes + * to get the entropy. + * __skb_tx_hash limits qcount to 16 bits. Hence, get 15-bit hash. + */ + hash16 = __skb_tx_hash(adapter->netdev, skb, BIT(15)); + return (u8)((hash16 >> 8) ^ (hash16 & 0xff)); +} + +/* opa_vnic_get_def_port - get default port based on entropy */ +static inline u8 opa_vnic_get_def_port(struct opa_vnic_adapter *adapter, + u8 entropy) +{ + u8 flow_id; + + /* Add the upper and lower 4-bits of entropy to get the flow id */ + flow_id = ((entropy & 0xf) + (entropy >> 4)); + return adapter->flow_tbl[flow_id & (OPA_VNIC_FLOW_TBL_SIZE - 1)]; +} + +/* Calculate packet length including OPA header, crc and padding */ +static inline int opa_vnic_wire_length(struct sk_buff *skb) +{ + u32 pad_len; + + /* padding for 8 bytes size alignment */ + pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7; + pad_len += OPA_VNIC_ICRC_TAIL_LEN; + + return (skb->len + pad_len) >> 3; +} + +/* opa_vnic_encap_skb - encapsulate skb packet with OPA header and meta data */ +void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb) +{ + struct __opa_veswport_info *info = &adapter->info; + struct opa_vnic_skb_mdata *mdata; + u8 def_port, sc, entropy, *hdr; + u16 len, l4_hdr; + u32 dlid; + + hdr = skb_push(skb, OPA_VNIC_HDR_LEN); + + entropy = opa_vnic_calc_entropy(adapter, skb); + def_port = opa_vnic_get_def_port(adapter, entropy); + len = opa_vnic_wire_length(skb); + dlid = opa_vnic_get_dlid(adapter, skb, def_port); + sc = opa_vnic_get_sc(info, skb); + l4_hdr = info->vesw.vesw_id; + + mdata = (struct opa_vnic_skb_mdata *)skb_push(skb, sizeof(*mdata)); + mdata->vl = opa_vnic_get_vl(adapter, skb); + mdata->entropy = entropy; + mdata->flags = 0; + if (unlikely(!dlid)) { + mdata->flags = OPA_VNIC_SKB_MDATA_ENCAP_ERR; + return; + } + + opa_vnic_make_header(hdr, info->vport.encap_slid, dlid, len, + info->vesw.pkey, entropy, sc, 0, + OPA_VNIC_L4_ETHR, l4_hdr); +} diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h new file mode 100644 index 000000000000..4c434b9dd84c --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h @@ -0,0 +1,489 @@ +#ifndef _OPA_VNIC_ENCAP_H +#define _OPA_VNIC_ENCAP_H +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains all OPA VNIC declaration required for encapsulation + * and decapsulation of Ethernet packets + */ + +#include <linux/types.h> +#include <rdma/ib_mad.h> + +/* EMA class version */ +#define OPA_EMA_CLASS_VERSION 0x80 + +/* + * Define the Intel vendor management class for OPA + * ETHERNET MANAGEMENT + */ +#define OPA_MGMT_CLASS_INTEL_EMA 0x34 + +/* EM attribute IDs */ +#define OPA_EM_ATTR_CLASS_PORT_INFO 0x0001 +#define OPA_EM_ATTR_VESWPORT_INFO 0x0011 +#define OPA_EM_ATTR_VESWPORT_MAC_ENTRIES 0x0012 +#define OPA_EM_ATTR_IFACE_UCAST_MACS 0x0013 +#define OPA_EM_ATTR_IFACE_MCAST_MACS 0x0014 +#define OPA_EM_ATTR_DELETE_VESW 0x0015 +#define OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS 0x0020 +#define OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS 0x0022 + +/* VNIC configured and operational state values */ +#define OPA_VNIC_STATE_DROP_ALL 0x1 +#define OPA_VNIC_STATE_FORWARDING 0x3 + +#define OPA_VESW_MAX_NUM_DEF_PORT 16 +#define OPA_VNIC_MAX_NUM_PCP 8 + +#define OPA_VNIC_EMA_DATA (OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR) + +/* Defines for vendor specific notice(trap) attributes */ +#define OPA_INTEL_EMA_NOTICE_TYPE_INFO 0x04 + +/* INTEL OUI */ +#define INTEL_OUI_1 0x00 +#define INTEL_OUI_2 0x06 +#define INTEL_OUI_3 0x6a + +/* Trap opcodes sent from VNIC */ +#define OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1 +#define OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2 +#define OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3 + +#define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd) (!!((dlid_sd) & 0x20)) +#define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd) ((dlid_sd) >> 8) + +/* VNIC Ethernet link status */ +#define OPA_VNIC_ETH_LINK_UP 1 +#define OPA_VNIC_ETH_LINK_DOWN 2 + +/** + * struct opa_vesw_info - OPA vnic switch information + * @fabric_id: 10-bit fabric id + * @vesw_id: 12-bit virtual ethernet switch id + * @def_port_mask: bitmask of default ports + * @pkey: partition key + * @u_mcast_dlid: unknown multicast dlid + * @u_ucast_dlid: array of unknown unicast dlids + * @eth_mtu: MTUs for each vlan PCP + * @eth_mtu_non_vlan: MTU for non vlan packets + */ +struct opa_vesw_info { + __be16 fabric_id; + __be16 vesw_id; + + u8 rsvd0[6]; + __be16 def_port_mask; + + u8 rsvd1[2]; + __be16 pkey; + + u8 rsvd2[4]; + __be32 u_mcast_dlid; + __be32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT]; + + u8 rsvd3[44]; + __be16 eth_mtu[OPA_VNIC_MAX_NUM_PCP]; + __be16 eth_mtu_non_vlan; + u8 rsvd4[2]; +} __packed; + +/** + * struct opa_per_veswport_info - OPA vnic per port information + * @port_num: port number + * @eth_link_status: current ethernet link state + * @base_mac_addr: base mac address + * @config_state: configured port state + * @oper_state: operational port state + * @max_mac_tbl_ent: max number of mac table entries + * @max_smac_ent: max smac entries in mac table + * @mac_tbl_digest: mac table digest + * @encap_slid: base slid for the port + * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets + * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets + * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets + * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets + * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets + * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets + * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets + * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets + * @uc_macs_gen_count: generation count for unicast macs list + * @mc_macs_gen_count: generation count for multicast macs list + */ +struct opa_per_veswport_info { + __be32 port_num; + + u8 eth_link_status; + u8 rsvd0[3]; + + u8 base_mac_addr[ETH_ALEN]; + u8 config_state; + u8 oper_state; + + __be16 max_mac_tbl_ent; + __be16 max_smac_ent; + __be32 mac_tbl_digest; + u8 rsvd1[4]; + + __be32 encap_slid; + + u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP]; + u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP]; + u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP]; + u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP]; + + u8 non_vlan_sc_uc; + u8 non_vlan_vl_uc; + u8 non_vlan_sc_mc; + u8 non_vlan_vl_mc; + + u8 rsvd2[48]; + + __be16 uc_macs_gen_count; + __be16 mc_macs_gen_count; + + u8 rsvd3[8]; +} __packed; + +/** + * struct opa_veswport_info - OPA vnic port information + * @vesw: OPA vnic switch information + * @vport: OPA vnic per port information + * + * On host, each of the virtual ethernet ports belongs + * to a different virtual ethernet switches. + */ +struct opa_veswport_info { + struct opa_vesw_info vesw; + struct opa_per_veswport_info vport; +}; + +/** + * struct opa_veswport_mactable_entry - single entry in the forwarding table + * @mac_addr: MAC address + * @mac_addr_mask: MAC address bit mask + * @dlid_sd: Matching DLID and side data + * + * On the host each virtual ethernet port will have + * a forwarding table. These tables are used to + * map a MAC to a LID and other data. For more + * details see struct opa_veswport_mactable_entries. + * This is the structure of a single mactable entry + */ +struct opa_veswport_mactable_entry { + u8 mac_addr[ETH_ALEN]; + u8 mac_addr_mask[ETH_ALEN]; + __be32 dlid_sd; +} __packed; + +/** + * struct opa_veswport_mactable - Forwarding table array + * @offset: mac table starting offset + * @num_entries: Number of entries to get or set + * @mac_tbl_digest: mac table digest + * @tbl_entries[]: Array of table entries + * + * The EM sends down this structure in a MAD indicating + * the starting offset in the forwarding table that this + * entry is to be loaded into and the number of entries + * that that this MAD instance contains + * The mac_tbl_digest has been added to this MAD structure. It will be set by + * the EM and it will be used by the EM to check if there are any + * discrepancies with this value and the value + * maintained by the EM in the case of VNIC port being deleted or unloaded + * A new instantiation of a VNIC will always have a value of zero. + * This value is stored as part of the vnic adapter structure and will be + * accessed by the GET and SET routines for both the mactable entries and the + * veswport info. + */ +struct opa_veswport_mactable { + __be16 offset; + __be16 num_entries; + __be32 mac_tbl_digest; + struct opa_veswport_mactable_entry tbl_entries[0]; +} __packed; + +/** + * struct opa_veswport_summary_counters - summary counters + * @vp_instance: vport instance on the OPA port + * @vesw_id: virtual ethernet switch id + * @veswport_num: virtual ethernet switch port number + * @tx_errors: transmit errors + * @rx_errors: receive errors + * @tx_packets: transmit packets + * @rx_packets: receive packets + * @tx_bytes: transmit bytes + * @rx_bytes: receive bytes + * @tx_unicast: unicast packets transmitted + * @tx_mcastbcast: multicast/broadcast packets transmitted + * @tx_untagged: non-vlan packets transmitted + * @tx_vlan: vlan packets transmitted + * @tx_64_size: transmit packet length is 64 bytes + * @tx_65_127: transmit packet length is >=65 and < 127 bytes + * @tx_128_255: transmit packet length is >=128 and < 255 bytes + * @tx_256_511: transmit packet length is >=256 and < 511 bytes + * @tx_512_1023: transmit packet length is >=512 and < 1023 bytes + * @tx_1024_1518: transmit packet length is >=1024 and < 1518 bytes + * @tx_1519_max: transmit packet length >= 1519 bytes + * @rx_unicast: unicast packets received + * @rx_mcastbcast: multicast/broadcast packets received + * @rx_untagged: non-vlan packets received + * @rx_vlan: vlan packets received + * @rx_64_size: received packet length is 64 bytes + * @rx_65_127: received packet length is >=65 and < 127 bytes + * @rx_128_255: received packet length is >=128 and < 255 bytes + * @rx_256_511: received packet length is >=256 and < 511 bytes + * @rx_512_1023: received packet length is >=512 and < 1023 bytes + * @rx_1024_1518: received packet length is >=1024 and < 1518 bytes + * @rx_1519_max: received packet length >= 1519 bytes + * + * All the above are counters of corresponding conditions. + */ +struct opa_veswport_summary_counters { + __be16 vp_instance; + __be16 vesw_id; + __be32 veswport_num; + + __be64 tx_errors; + __be64 rx_errors; + __be64 tx_packets; + __be64 rx_packets; + __be64 tx_bytes; + __be64 rx_bytes; + + __be64 tx_unicast; + __be64 tx_mcastbcast; + + __be64 tx_untagged; + __be64 tx_vlan; + + __be64 tx_64_size; + __be64 tx_65_127; + __be64 tx_128_255; + __be64 tx_256_511; + __be64 tx_512_1023; + __be64 tx_1024_1518; + __be64 tx_1519_max; + + __be64 rx_unicast; + __be64 rx_mcastbcast; + + __be64 rx_untagged; + __be64 rx_vlan; + + __be64 rx_64_size; + __be64 rx_65_127; + __be64 rx_128_255; + __be64 rx_256_511; + __be64 rx_512_1023; + __be64 rx_1024_1518; + __be64 rx_1519_max; + + __be64 reserved[16]; +} __packed; + +/** + * struct opa_veswport_error_counters - error counters + * @vp_instance: vport instance on the OPA port + * @vesw_id: virtual ethernet switch id + * @veswport_num: virtual ethernet switch port number + * @tx_errors: transmit errors + * @rx_errors: receive errors + * @tx_smac_filt: smac filter errors + * @tx_dlid_zero: transmit packets with invalid dlid + * @tx_logic: other transmit errors + * @tx_drop_state: packet tansmission in non-forward port state + * @rx_bad_veswid: received packet with invalid vesw id + * @rx_runt: received ethernet packet with length < 64 bytes + * @rx_oversize: received ethernet packet with length > MTU size + * @rx_eth_down: received packets when interface is down + * @rx_drop_state: received packets in non-forwarding port state + * @rx_logic: other receive errors + * + * All the above are counters of corresponding erorr conditions. + */ +struct opa_veswport_error_counters { + __be16 vp_instance; + __be16 vesw_id; + __be32 veswport_num; + + __be64 tx_errors; + __be64 rx_errors; + + __be64 rsvd0; + __be64 tx_smac_filt; + __be64 rsvd1; + __be64 rsvd2; + __be64 rsvd3; + __be64 tx_dlid_zero; + __be64 rsvd4; + __be64 tx_logic; + __be64 rsvd5; + __be64 tx_drop_state; + + __be64 rx_bad_veswid; + __be64 rsvd6; + __be64 rx_runt; + __be64 rx_oversize; + __be64 rsvd7; + __be64 rx_eth_down; + __be64 rx_drop_state; + __be64 rx_logic; + __be64 rsvd8; + + __be64 rsvd9[16]; +} __packed; + +/** + * struct opa_veswport_trap - Trap message sent to EM by VNIC + * @fabric_id: 10 bit fabric id + * @veswid: 12 bit virtual ethernet switch id + * @veswportnum: logical port number on the Virtual switch + * @opaportnum: physical port num (redundant on host) + * @veswportindex: switch port index on opa port 0 based + * @opcode: operation + * @reserved: 32 bit for alignment + * + * The VNIC will send trap messages to the Ethernet manager to + * inform it about changes to the VNIC config, behaviour etc. + * This is the format of the trap payload. + */ +struct opa_veswport_trap { + __be16 fabric_id; + __be16 veswid; + __be32 veswportnum; + __be16 opaportnum; + u8 veswportindex; + u8 opcode; + __be32 reserved; +} __packed; + +/** + * struct opa_vnic_iface_macs_entry - single entry in the mac list + * @mac_addr: MAC address + */ +struct opa_vnic_iface_mac_entry { + u8 mac_addr[ETH_ALEN]; +}; + +/** + * struct opa_veswport_iface_macs - Msg to set globally administered MAC + * @start_idx: position of first entry (0 based) + * @num_macs_in_msg: number of MACs in this message + * @tot_macs_in_lst: The total number of MACs the agent has + * @gen_count: gen_count to indicate change + * @entry: The mac list entry + * + * Same attribute IDS and attribute modifiers as in locally administered + * addresses used to set globally administered addresses + */ +struct opa_veswport_iface_macs { + __be16 start_idx; + __be16 num_macs_in_msg; + __be16 tot_macs_in_lst; + __be16 gen_count; + struct opa_vnic_iface_mac_entry entry[0]; +} __packed; + +/** + * struct opa_vnic_vema_mad - Generic VEMA MAD + * @mad_hdr: Generic MAD header + * @rmpp_hdr: RMPP header for vendor specific MADs + * @oui: Unique org identifier + * @data: MAD data + */ +struct opa_vnic_vema_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 reserved; + u8 oui[3]; + u8 data[OPA_VNIC_EMA_DATA]; +}; + +/** + * struct opa_vnic_notice_attr - Generic Notice MAD + * @gen_type: Generic/Specific bit and type of notice + * @oui_1: Vendor ID byte 1 + * @oui_2: Vendor ID byte 2 + * @oui_3: Vendor ID byte 3 + * @trap_num: Trap number + * @toggle_count: Notice toggle bit and count value + * @issuer_lid: Trap issuer's lid + * @issuer_gid: Issuer GID (only if Report method) + * @raw_data: Trap message body + */ +struct opa_vnic_notice_attr { + u8 gen_type; + u8 oui_1; + u8 oui_2; + u8 oui_3; + __be16 trap_num; + __be16 toggle_count; + __be32 issuer_lid; + __be32 reserved; + u8 issuer_gid[16]; + u8 raw_data[64]; +} __packed; + +/** + * struct opa_vnic_vema_mad_trap - Generic VEMA MAD Trap + * @mad_hdr: Generic MAD header + * @rmpp_hdr: RMPP header for vendor specific MADs + * @oui: Unique org identifier + * @notice: Notice structure + */ +struct opa_vnic_vema_mad_trap { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 reserved; + u8 oui[3]; + struct opa_vnic_notice_attr notice; +}; + +#endif /* _OPA_VNIC_ENCAP_H */ diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c new file mode 100644 index 000000000000..d66540e24885 --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c @@ -0,0 +1,187 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains OPA VNIC ethtool functions + */ + +#include <linux/ethtool.h> + +#include "opa_vnic_internal.h" + +enum {NETDEV_STATS, VNIC_STATS}; + +struct vnic_stats { + char stat_string[ETH_GSTRING_LEN]; + struct { + int sizeof_stat; + int stat_offset; + }; +}; + +#define VNIC_STAT(m) { FIELD_SIZEOF(struct opa_vnic_stats, m), \ + offsetof(struct opa_vnic_stats, m) } + +static struct vnic_stats vnic_gstrings_stats[] = { + /* NETDEV stats */ + {"rx_packets", VNIC_STAT(netstats.rx_packets)}, + {"tx_packets", VNIC_STAT(netstats.tx_packets)}, + {"rx_bytes", VNIC_STAT(netstats.rx_bytes)}, + {"tx_bytes", VNIC_STAT(netstats.tx_bytes)}, + {"rx_errors", VNIC_STAT(netstats.rx_errors)}, + {"tx_errors", VNIC_STAT(netstats.tx_errors)}, + {"rx_dropped", VNIC_STAT(netstats.rx_dropped)}, + {"tx_dropped", VNIC_STAT(netstats.tx_dropped)}, + + /* SUMMARY counters */ + {"tx_unicast", VNIC_STAT(tx_grp.unicast)}, + {"tx_mcastbcast", VNIC_STAT(tx_grp.mcastbcast)}, + {"tx_untagged", VNIC_STAT(tx_grp.untagged)}, + {"tx_vlan", VNIC_STAT(tx_grp.vlan)}, + + {"tx_64_size", VNIC_STAT(tx_grp.s_64)}, + {"tx_65_127", VNIC_STAT(tx_grp.s_65_127)}, + {"tx_128_255", VNIC_STAT(tx_grp.s_128_255)}, + {"tx_256_511", VNIC_STAT(tx_grp.s_256_511)}, + {"tx_512_1023", VNIC_STAT(tx_grp.s_512_1023)}, + {"tx_1024_1518", VNIC_STAT(tx_grp.s_1024_1518)}, + {"tx_1519_max", VNIC_STAT(tx_grp.s_1519_max)}, + + {"rx_unicast", VNIC_STAT(rx_grp.unicast)}, + {"rx_mcastbcast", VNIC_STAT(rx_grp.mcastbcast)}, + {"rx_untagged", VNIC_STAT(rx_grp.untagged)}, + {"rx_vlan", VNIC_STAT(rx_grp.vlan)}, + + {"rx_64_size", VNIC_STAT(rx_grp.s_64)}, + {"rx_65_127", VNIC_STAT(rx_grp.s_65_127)}, + {"rx_128_255", VNIC_STAT(rx_grp.s_128_255)}, + {"rx_256_511", VNIC_STAT(rx_grp.s_256_511)}, + {"rx_512_1023", VNIC_STAT(rx_grp.s_512_1023)}, + {"rx_1024_1518", VNIC_STAT(rx_grp.s_1024_1518)}, + {"rx_1519_max", VNIC_STAT(rx_grp.s_1519_max)}, + + /* ERROR counters */ + {"rx_fifo_errors", VNIC_STAT(netstats.rx_fifo_errors)}, + {"rx_length_errors", VNIC_STAT(netstats.rx_length_errors)}, + + {"tx_fifo_errors", VNIC_STAT(netstats.tx_fifo_errors)}, + {"tx_carrier_errors", VNIC_STAT(netstats.tx_carrier_errors)}, + + {"tx_dlid_zero", VNIC_STAT(tx_dlid_zero)}, + {"tx_drop_state", VNIC_STAT(tx_drop_state)}, + {"rx_drop_state", VNIC_STAT(rx_drop_state)}, + {"rx_oversize", VNIC_STAT(rx_oversize)}, + {"rx_runt", VNIC_STAT(rx_runt)}, +}; + +#define VNIC_STATS_LEN ARRAY_SIZE(vnic_gstrings_stats) + +/* vnic_get_drvinfo - get driver info */ +static void vnic_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, opa_vnic_driver_version, + sizeof(drvinfo->version)); + strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent), + sizeof(drvinfo->bus_info)); +} + +/* vnic_get_sset_count - get string set count */ +static int vnic_get_sset_count(struct net_device *netdev, int sset) +{ + return (sset == ETH_SS_STATS) ? VNIC_STATS_LEN : -EOPNOTSUPP; +} + +/* vnic_get_ethtool_stats - get statistics */ +static void vnic_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + struct opa_vnic_stats vstats; + int i; + + memset(&vstats, 0, sizeof(vstats)); + mutex_lock(&adapter->stats_lock); + adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats); + for (i = 0; i < VNIC_STATS_LEN; i++) { + char *p = (char *)&vstats + vnic_gstrings_stats[i].stat_offset; + + data[i] = (vnic_gstrings_stats[i].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + mutex_unlock(&adapter->stats_lock); +} + +/* vnic_get_strings - get strings */ +static void vnic_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < VNIC_STATS_LEN; i++) + memcpy(data + i * ETH_GSTRING_LEN, + vnic_gstrings_stats[i].stat_string, + ETH_GSTRING_LEN); +} + +/* ethtool ops */ +static const struct ethtool_ops opa_vnic_ethtool_ops = { + .get_drvinfo = vnic_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = vnic_get_strings, + .get_sset_count = vnic_get_sset_count, + .get_ethtool_stats = vnic_get_ethtool_stats, +}; + +/* opa_vnic_set_ethtool_ops - set ethtool ops */ +void opa_vnic_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &opa_vnic_ethtool_ops; +} diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h new file mode 100644 index 000000000000..6bba886bec1f --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h @@ -0,0 +1,329 @@ +#ifndef _OPA_VNIC_INTERNAL_H +#define _OPA_VNIC_INTERNAL_H +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains OPA VNIC driver internal declarations + */ + +#include <linux/bitops.h> +#include <linux/etherdevice.h> +#include <linux/hashtable.h> +#include <linux/sizes.h> +#include <rdma/opa_vnic.h> + +#include "opa_vnic_encap.h" + +#define OPA_VNIC_VLAN_PCP(vlan_tci) \ + (((vlan_tci) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT) + +/* Flow to default port redirection table size */ +#define OPA_VNIC_FLOW_TBL_SIZE 32 + +/* Invalid port number */ +#define OPA_VNIC_INVALID_PORT 0xff + +struct opa_vnic_adapter; + +/** + * struct __opa_vesw_info - OPA vnic virtual switch info + * + * Same as opa_vesw_info without bitwise attribute. + */ +struct __opa_vesw_info { + u16 fabric_id; + u16 vesw_id; + + u8 rsvd0[6]; + u16 def_port_mask; + + u8 rsvd1[2]; + u16 pkey; + + u8 rsvd2[4]; + u32 u_mcast_dlid; + u32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT]; + + u8 rsvd3[44]; + u16 eth_mtu[OPA_VNIC_MAX_NUM_PCP]; + u16 eth_mtu_non_vlan; + u8 rsvd4[2]; +} __packed; + +/** + * struct __opa_per_veswport_info - OPA vnic per port info + * + * Same as opa_per_veswport_info without bitwise attribute. + */ +struct __opa_per_veswport_info { + u32 port_num; + + u8 eth_link_status; + u8 rsvd0[3]; + + u8 base_mac_addr[ETH_ALEN]; + u8 config_state; + u8 oper_state; + + u16 max_mac_tbl_ent; + u16 max_smac_ent; + u32 mac_tbl_digest; + u8 rsvd1[4]; + + u32 encap_slid; + + u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP]; + u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP]; + u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP]; + u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP]; + + u8 non_vlan_sc_uc; + u8 non_vlan_vl_uc; + u8 non_vlan_sc_mc; + u8 non_vlan_vl_mc; + + u8 rsvd2[48]; + + u16 uc_macs_gen_count; + u16 mc_macs_gen_count; + + u8 rsvd3[8]; +} __packed; + +/** + * struct __opa_veswport_info - OPA vnic port info + * + * Same as opa_veswport_info without bitwise attribute. + */ +struct __opa_veswport_info { + struct __opa_vesw_info vesw; + struct __opa_per_veswport_info vport; +}; + +/** + * struct __opa_veswport_trap - OPA vnic trap info + * + * Same as opa_veswport_trap without bitwise attribute. + */ +struct __opa_veswport_trap { + u16 fabric_id; + u16 veswid; + u32 veswportnum; + u16 opaportnum; + u8 veswportindex; + u8 opcode; + u32 reserved; +} __packed; + +/** + * struct opa_vnic_ctrl_port - OPA virtual NIC control port + * @ibdev: pointer to ib device + * @ops: opa vnic control operations + * @num_ports: number of opa ports + */ +struct opa_vnic_ctrl_port { + struct ib_device *ibdev; + struct opa_vnic_ctrl_ops *ops; + u8 num_ports; +}; + +/** + * struct opa_vnic_adapter - OPA VNIC netdev private data structure + * @netdev: pointer to associated netdev + * @ibdev: ib device + * @cport: pointer to opa vnic control port + * @rn_ops: rdma netdev's net_device_ops + * @port_num: OPA port number + * @vport_num: vesw port number + * @lock: adapter lock + * @info: virtual ethernet switch port information + * @vema_mac_addr: mac address configured by vema + * @umac_hash: unicast maclist hash + * @mmac_hash: multicast maclist hash + * @mactbl: hash table of MAC entries + * @mactbl_lock: mac table lock + * @stats_lock: statistics lock + * @flow_tbl: flow to default port redirection table + * @trap_timeout: trap timeout + * @trap_count: no. of traps allowed within timeout period + */ +struct opa_vnic_adapter { + struct net_device *netdev; + struct ib_device *ibdev; + struct opa_vnic_ctrl_port *cport; + const struct net_device_ops *rn_ops; + + u8 port_num; + u8 vport_num; + + /* Lock used around concurrent updates to netdev */ + struct mutex lock; + + struct __opa_veswport_info info; + u8 vema_mac_addr[ETH_ALEN]; + u32 umac_hash; + u32 mmac_hash; + struct hlist_head __rcu *mactbl; + + /* Lock used to protect updates to mac table */ + struct mutex mactbl_lock; + + /* Lock used to protect access to vnic counters */ + struct mutex stats_lock; + + u8 flow_tbl[OPA_VNIC_FLOW_TBL_SIZE]; + + unsigned long trap_timeout; + u8 trap_count; +}; + +/* Same as opa_veswport_mactable_entry, but without bitwise attribute */ +struct __opa_vnic_mactable_entry { + u8 mac_addr[ETH_ALEN]; + u8 mac_addr_mask[ETH_ALEN]; + u32 dlid_sd; +} __packed; + +/** + * struct opa_vnic_mac_tbl_node - OPA VNIC mac table node + * @hlist: hash list handle + * @index: index of entry in the mac table + * @entry: entry in the table + */ +struct opa_vnic_mac_tbl_node { + struct hlist_node hlist; + u16 index; + struct __opa_vnic_mactable_entry entry; +}; + +#define v_dbg(format, arg...) \ + netdev_dbg(adapter->netdev, format, ## arg) +#define v_err(format, arg...) \ + netdev_err(adapter->netdev, format, ## arg) +#define v_info(format, arg...) \ + netdev_info(adapter->netdev, format, ## arg) +#define v_warn(format, arg...) \ + netdev_warn(adapter->netdev, format, ## arg) + +#define c_err(format, arg...) \ + dev_err(&cport->ibdev->dev, format, ## arg) +#define c_info(format, arg...) \ + dev_info(&cport->ibdev->dev, format, ## arg) +#define c_dbg(format, arg...) \ + dev_dbg(&cport->ibdev->dev, format, ## arg) + +/* The maximum allowed entries in the mac table */ +#define OPA_VNIC_MAC_TBL_MAX_ENTRIES 2048 +/* Limit of smac entries in mac table */ +#define OPA_VNIC_MAX_SMAC_LIMIT 256 + +/* The last octet of the MAC address is used as the key to the hash table */ +#define OPA_VNIC_MAC_HASH_IDX 5 + +/* The VNIC MAC hash table is of size 2^8 */ +#define OPA_VNIC_MAC_TBL_HASH_BITS 8 +#define OPA_VNIC_MAC_TBL_SIZE BIT(OPA_VNIC_MAC_TBL_HASH_BITS) + +/* VNIC HASH MACROS */ +#define vnic_hash_init(hashtable) __hash_init(hashtable, OPA_VNIC_MAC_TBL_SIZE) + +#define vnic_hash_add(hashtable, node, key) \ + hlist_add_head(node, \ + &hashtable[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))]) + +#define vnic_hash_for_each_safe(name, bkt, tmp, obj, member) \ + for ((bkt) = 0, obj = NULL; \ + !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \ + hlist_for_each_entry_safe(obj, tmp, &name[bkt], member) + +#define vnic_hash_for_each_possible(name, obj, member, key) \ + hlist_for_each_entry(obj, \ + &name[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))], member) + +#define vnic_hash_for_each(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; \ + !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \ + hlist_for_each_entry(obj, &name[bkt], member) + +extern char opa_vnic_driver_name[]; +extern const char opa_vnic_driver_version[]; + +struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev, + u8 port_num, u8 vport_num); +void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter); +void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb); +u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb); +u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb); +void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter); +void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter); +void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter, + struct opa_veswport_mactable *tbl); +int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter, + struct opa_veswport_mactable *tbl); +void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, + struct opa_veswport_iface_macs *macs); +void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter, + struct opa_veswport_iface_macs *macs); +void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter, + struct opa_veswport_summary_counters *cntrs); +void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter, + struct opa_veswport_error_counters *cntrs); +void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter, + struct opa_vesw_info *info); +void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter, + struct opa_vesw_info *info); +void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter, + struct opa_per_veswport_info *info); +void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter, + struct opa_per_veswport_info *info); +void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event); +void opa_vnic_set_ethtool_ops(struct net_device *netdev); +void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter, + struct __opa_veswport_trap *data, u32 lid); + +#endif /* _OPA_VNIC_INTERNAL_H */ diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c new file mode 100644 index 000000000000..905f39dda5aa --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c @@ -0,0 +1,389 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains OPA Virtual Network Interface Controller (VNIC) driver + * netdev functionality. + */ + +#include <linux/module.h> +#include <linux/if_vlan.h> +#include <linux/crc32.h> + +#include "opa_vnic_internal.h" + +#define OPA_TX_TIMEOUT_MS 1000 + +#define OPA_VNIC_SKB_HEADROOM \ + ALIGN((OPA_VNIC_HDR_LEN + OPA_VNIC_SKB_MDATA_LEN), 8) + +/* This function is overloaded for opa_vnic specific implementation */ +static void opa_vnic_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + struct opa_vnic_stats vstats; + + memset(&vstats, 0, sizeof(vstats)); + mutex_lock(&adapter->stats_lock); + adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats); + mutex_unlock(&adapter->stats_lock); + memcpy(stats, &vstats.netstats, sizeof(*stats)); +} + +/* opa_netdev_start_xmit - transmit function */ +static netdev_tx_t opa_netdev_start_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + + v_dbg("xmit: queue %d skb len %d\n", skb->queue_mapping, skb->len); + /* pad to ensure mininum ethernet packet length */ + if (unlikely(skb->len < ETH_ZLEN)) { + if (skb_padto(skb, ETH_ZLEN)) + return NETDEV_TX_OK; + + skb_put(skb, ETH_ZLEN - skb->len); + } + + opa_vnic_encap_skb(adapter, skb); + return adapter->rn_ops->ndo_start_xmit(skb, netdev); +} + +static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb, + void *accel_priv, + select_queue_fallback_t fallback) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + struct opa_vnic_skb_mdata *mdata; + int rc; + + /* pass entropy and vl as metadata in skb */ + mdata = (struct opa_vnic_skb_mdata *)skb_push(skb, sizeof(*mdata)); + mdata->entropy = opa_vnic_calc_entropy(adapter, skb); + mdata->vl = opa_vnic_get_vl(adapter, skb); + rc = adapter->rn_ops->ndo_select_queue(netdev, skb, + accel_priv, fallback); + skb_pull(skb, sizeof(*mdata)); + return rc; +} + +/* opa_vnic_process_vema_config - process vema configuration updates */ +void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter) +{ + struct __opa_veswport_info *info = &adapter->info; + struct rdma_netdev *rn = netdev_priv(adapter->netdev); + u8 port_num[OPA_VESW_MAX_NUM_DEF_PORT] = { 0 }; + struct net_device *netdev = adapter->netdev; + u8 i, port_count = 0; + u16 port_mask; + + /* If the base_mac_addr is changed, update the interface mac address */ + if (memcmp(info->vport.base_mac_addr, adapter->vema_mac_addr, + ARRAY_SIZE(info->vport.base_mac_addr))) { + struct sockaddr saddr; + + memcpy(saddr.sa_data, info->vport.base_mac_addr, + ARRAY_SIZE(info->vport.base_mac_addr)); + mutex_lock(&adapter->lock); + eth_mac_addr(netdev, &saddr); + memcpy(adapter->vema_mac_addr, + info->vport.base_mac_addr, ETH_ALEN); + mutex_unlock(&adapter->lock); + } + + rn->set_id(netdev, info->vesw.vesw_id); + + /* Handle MTU limit change */ + rtnl_lock(); + netdev->max_mtu = max_t(unsigned int, info->vesw.eth_mtu_non_vlan, + netdev->min_mtu); + if (netdev->mtu > netdev->max_mtu) + dev_set_mtu(netdev, netdev->max_mtu); + rtnl_unlock(); + + /* Update flow to default port redirection table */ + port_mask = info->vesw.def_port_mask; + for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) { + if (port_mask & 1) + port_num[port_count++] = i; + port_mask >>= 1; + } + + /* + * Build the flow table. Flow table is required when destination LID + * is not available. Up to OPA_VNIC_FLOW_TBL_SIZE flows supported. + * Each flow need a default port number to get its dlid from the + * u_ucast_dlid array. + */ + for (i = 0; i < OPA_VNIC_FLOW_TBL_SIZE; i++) + adapter->flow_tbl[i] = port_count ? port_num[i % port_count] : + OPA_VNIC_INVALID_PORT; + + /* Operational state can only be DROP_ALL or FORWARDING */ + if (info->vport.config_state == OPA_VNIC_STATE_FORWARDING) { + info->vport.oper_state = OPA_VNIC_STATE_FORWARDING; + netif_dormant_off(netdev); + } else { + info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL; + netif_dormant_on(netdev); + } +} + +/* + * Set the power on default values in adapter's vema interface structure. + */ +static inline void opa_vnic_set_pod_values(struct opa_vnic_adapter *adapter) +{ + adapter->info.vport.max_mac_tbl_ent = OPA_VNIC_MAC_TBL_MAX_ENTRIES; + adapter->info.vport.max_smac_ent = OPA_VNIC_MAX_SMAC_LIMIT; + adapter->info.vport.config_state = OPA_VNIC_STATE_DROP_ALL; + adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN; +} + +/* opa_vnic_set_mac_addr - change mac address */ +static int opa_vnic_set_mac_addr(struct net_device *netdev, void *addr) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + struct sockaddr *sa = addr; + int rc; + + if (!memcmp(netdev->dev_addr, sa->sa_data, ETH_ALEN)) + return 0; + + mutex_lock(&adapter->lock); + rc = eth_mac_addr(netdev, addr); + mutex_unlock(&adapter->lock); + if (rc) + return rc; + + adapter->info.vport.uc_macs_gen_count++; + opa_vnic_vema_report_event(adapter, + OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE); + return 0; +} + +/* + * opa_vnic_mac_send_event - post event on possible mac list exchange + * Send trap when digest from uc/mc mac list differs from previous run. + * Digest is evaluated similar to how cksum does. + */ +static void opa_vnic_mac_send_event(struct net_device *netdev, u8 event) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + struct netdev_hw_addr *ha; + struct netdev_hw_addr_list *hw_list; + u32 *ref_crc; + u32 l, crc = 0; + + switch (event) { + case OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE: + hw_list = &netdev->uc; + adapter->info.vport.uc_macs_gen_count++; + ref_crc = &adapter->umac_hash; + break; + case OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE: + hw_list = &netdev->mc; + adapter->info.vport.mc_macs_gen_count++; + ref_crc = &adapter->mmac_hash; + break; + default: + return; + } + netdev_hw_addr_list_for_each(ha, hw_list) { + crc = crc32_le(crc, ha->addr, ETH_ALEN); + } + l = netdev_hw_addr_list_count(hw_list) * ETH_ALEN; + crc = ~crc32_le(crc, (void *)&l, sizeof(l)); + + if (crc != *ref_crc) { + *ref_crc = crc; + opa_vnic_vema_report_event(adapter, event); + } +} + +/* opa_vnic_set_rx_mode - handle uc/mc mac list change */ +static void opa_vnic_set_rx_mode(struct net_device *netdev) +{ + opa_vnic_mac_send_event(netdev, + OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE); + + opa_vnic_mac_send_event(netdev, + OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE); +} + +/* opa_netdev_open - activate network interface */ +static int opa_netdev_open(struct net_device *netdev) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + int rc; + + rc = adapter->rn_ops->ndo_open(adapter->netdev); + if (rc) { + v_dbg("open failed %d\n", rc); + return rc; + } + + /* Update eth link status and send trap */ + adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_UP; + opa_vnic_vema_report_event(adapter, + OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE); + return 0; +} + +/* opa_netdev_close - disable network interface */ +static int opa_netdev_close(struct net_device *netdev) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + int rc; + + rc = adapter->rn_ops->ndo_stop(adapter->netdev); + if (rc) { + v_dbg("close failed %d\n", rc); + return rc; + } + + /* Update eth link status and send trap */ + adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN; + opa_vnic_vema_report_event(adapter, + OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE); + return 0; +} + +/* netdev ops */ +static const struct net_device_ops opa_netdev_ops = { + .ndo_open = opa_netdev_open, + .ndo_stop = opa_netdev_close, + .ndo_start_xmit = opa_netdev_start_xmit, + .ndo_get_stats64 = opa_vnic_get_stats64, + .ndo_set_rx_mode = opa_vnic_set_rx_mode, + .ndo_select_queue = opa_vnic_select_queue, + .ndo_set_mac_address = opa_vnic_set_mac_addr, +}; + +/* opa_vnic_add_netdev - create vnic netdev interface */ +struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev, + u8 port_num, u8 vport_num) +{ + struct opa_vnic_adapter *adapter; + struct net_device *netdev; + struct rdma_netdev *rn; + int rc; + + netdev = ibdev->alloc_rdma_netdev(ibdev, port_num, + RDMA_NETDEV_OPA_VNIC, + "veth%d", NET_NAME_UNKNOWN, + ether_setup); + if (!netdev) + return ERR_PTR(-ENOMEM); + else if (IS_ERR(netdev)) + return ERR_CAST(netdev); + + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) { + rc = -ENOMEM; + goto adapter_err; + } + + rn = netdev_priv(netdev); + rn->clnt_priv = adapter; + rn->hca = ibdev; + rn->port_num = port_num; + adapter->netdev = netdev; + adapter->ibdev = ibdev; + adapter->port_num = port_num; + adapter->vport_num = vport_num; + adapter->rn_ops = netdev->netdev_ops; + + netdev->netdev_ops = &opa_netdev_ops; + netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netdev->hard_header_len += OPA_VNIC_SKB_HEADROOM; + mutex_init(&adapter->lock); + mutex_init(&adapter->mactbl_lock); + mutex_init(&adapter->stats_lock); + + SET_NETDEV_DEV(netdev, ibdev->dev.parent); + + opa_vnic_set_ethtool_ops(netdev); + + opa_vnic_set_pod_values(adapter); + + rc = register_netdev(netdev); + if (rc) + goto netdev_err; + + netif_carrier_off(netdev); + netif_dormant_on(netdev); + v_info("initialized\n"); + + return adapter; +netdev_err: + mutex_destroy(&adapter->lock); + mutex_destroy(&adapter->mactbl_lock); + mutex_destroy(&adapter->stats_lock); + kfree(adapter); +adapter_err: + ibdev->free_rdma_netdev(netdev); + + return ERR_PTR(rc); +} + +/* opa_vnic_rem_netdev - remove vnic netdev interface */ +void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct ib_device *ibdev = adapter->ibdev; + + v_info("removing\n"); + unregister_netdev(netdev); + opa_vnic_release_mac_tbl(adapter); + mutex_destroy(&adapter->lock); + mutex_destroy(&adapter->mactbl_lock); + mutex_destroy(&adapter->stats_lock); + kfree(adapter); + ibdev->free_rdma_netdev(netdev); +} diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c new file mode 100644 index 000000000000..875694f9a7f9 --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -0,0 +1,1056 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains OPA Virtual Network Interface Controller (VNIC) + * Ethernet Management Agent (EMA) driver + */ + +#include <linux/module.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_smi.h> + +#include "opa_vnic_internal.h" + +#define DRV_VERSION "1.0" +char opa_vnic_driver_name[] = "opa_vnic"; +const char opa_vnic_driver_version[] = DRV_VERSION; + +/* + * The trap service level is kept in bits 3 to 7 in the trap_sl_rsvd + * field in the class port info MAD. + */ +#define GET_TRAP_SL_FROM_CLASS_PORT_INFO(x) (((x) >> 3) & 0x1f) + +/* Cap trap bursts to a reasonable limit good for normal cases */ +#define OPA_VNIC_TRAP_BURST_LIMIT 4 + +/* + * VNIC trap limit timeout. + * Inverse of cap2_mask response time out (1.0737 secs) = 0.9 + * secs approx IB spec 13.4.6.2.1 PortInfoSubnetTimeout and + * 13.4.9 Traps. + */ +#define OPA_VNIC_TRAP_TIMEOUT ((4096 * (1UL << 18)) / 1000) + +#define OPA_VNIC_UNSUP_ATTR \ + cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB) + +#define OPA_VNIC_INVAL_ATTR \ + cpu_to_be16(IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE) + +#define OPA_VNIC_CLASS_CAP_TRAP 0x1 + +/* Maximum number of VNIC ports supported */ +#define OPA_VNIC_MAX_NUM_VPORT 255 + +/** + * struct opa_vnic_vema_port -- VNIC VEMA port details + * @cport: pointer to port + * @mad_agent: pointer to mad agent for port + * @class_port_info: Class port info information. + * @tid: Transaction id + * @port_num: OPA port number + * @vport_idr: vnic ports idr + * @event_handler: ib event handler + * @lock: adapter interface lock + */ +struct opa_vnic_vema_port { + struct opa_vnic_ctrl_port *cport; + struct ib_mad_agent *mad_agent; + struct opa_class_port_info class_port_info; + u64 tid; + u8 port_num; + struct idr vport_idr; + struct ib_event_handler event_handler; + + /* Lock to query/update network adapter */ + struct mutex lock; +}; + +static void opa_vnic_vema_add_one(struct ib_device *device); +static void opa_vnic_vema_rem_one(struct ib_device *device, + void *client_data); + +static struct ib_client opa_vnic_client = { + .name = opa_vnic_driver_name, + .add = opa_vnic_vema_add_one, + .remove = opa_vnic_vema_rem_one, +}; + +/** + * vema_get_vport_num -- Get the vnic from the mad + * @recvd_mad: Received mad + * + * Return: returns value of the vnic port number + */ +static inline u8 vema_get_vport_num(struct opa_vnic_vema_mad *recvd_mad) +{ + return be32_to_cpu(recvd_mad->mad_hdr.attr_mod) & 0xff; +} + +/** + * vema_get_vport_adapter -- Get vnic port adapter from recvd mad + * @recvd_mad: received mad + * @port: ptr to port struct on which MAD was recvd + * + * Return: vnic adapter + */ +static inline struct opa_vnic_adapter * +vema_get_vport_adapter(struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_port *port) +{ + u8 vport_num = vema_get_vport_num(recvd_mad); + + return idr_find(&port->vport_idr, vport_num); +} + +/** + * vema_mac_tbl_req_ok -- Check if mac request has correct values + * @mac_tbl: mac table + * + * This function checks for the validity of the offset and number of + * entries required. + * + * Return: true if offset and num_entries are valid + */ +static inline bool vema_mac_tbl_req_ok(struct opa_veswport_mactable *mac_tbl) +{ + u16 offset, num_entries; + u16 req_entries = ((OPA_VNIC_EMA_DATA - sizeof(*mac_tbl)) / + sizeof(mac_tbl->tbl_entries[0])); + + offset = be16_to_cpu(mac_tbl->offset); + num_entries = be16_to_cpu(mac_tbl->num_entries); + + return ((num_entries <= req_entries) && + (offset + num_entries <= OPA_VNIC_MAC_TBL_MAX_ENTRIES)); +} + +/* + * Return the power on default values in the port info structure + * in big endian format as required by MAD. + */ +static inline void vema_get_pod_values(struct opa_veswport_info *port_info) +{ + memset(port_info, 0, sizeof(*port_info)); + port_info->vport.max_mac_tbl_ent = + cpu_to_be16(OPA_VNIC_MAC_TBL_MAX_ENTRIES); + port_info->vport.max_smac_ent = + cpu_to_be16(OPA_VNIC_MAX_SMAC_LIMIT); + port_info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL; + port_info->vport.config_state = OPA_VNIC_STATE_DROP_ALL; +} + +/** + * vema_add_vport -- Add a new vnic port + * @port: ptr to opa_vnic_vema_port struct + * @vport_num: vnic port number (to be added) + * + * Return a pointer to the vnic adapter structure + */ +static struct opa_vnic_adapter *vema_add_vport(struct opa_vnic_vema_port *port, + u8 vport_num) +{ + struct opa_vnic_ctrl_port *cport = port->cport; + struct opa_vnic_adapter *adapter; + + adapter = opa_vnic_add_netdev(cport->ibdev, port->port_num, vport_num); + if (!IS_ERR(adapter)) { + int rc; + + adapter->cport = cport; + rc = idr_alloc(&port->vport_idr, adapter, vport_num, + vport_num + 1, GFP_NOWAIT); + if (rc < 0) { + opa_vnic_rem_netdev(adapter); + adapter = ERR_PTR(rc); + } + } + + return adapter; +} + +/** + * vema_get_class_port_info -- Get class info for port + * @port: Port on whic MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + * + * This function copies the latest class port info value set for the + * port and stores it for generating traps + */ +static void vema_get_class_port_info(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_class_port_info *port_info; + + port_info = (struct opa_class_port_info *)rsp_mad->data; + memcpy(port_info, &port->class_port_info, sizeof(*port_info)); + port_info->base_version = OPA_MGMT_BASE_VERSION, + port_info->class_version = OPA_EMA_CLASS_VERSION; + + /* + * Set capability mask bit indicating agent generates traps, + * and set the maximum number of VNIC ports supported. + */ + port_info->cap_mask = cpu_to_be16((OPA_VNIC_CLASS_CAP_TRAP | + (OPA_VNIC_MAX_NUM_VPORT << 8))); + + /* + * Since a get routine is always sent by the EM first we + * set the expected response time to + * 4.096 usec * 2^18 == 1.0737 sec here. + */ + port_info->cap_mask2_resp_time = cpu_to_be32(18); +} + +/** + * vema_set_class_port_info -- Get class info for port + * @port: Port on whic MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + * + * This function updates the port class info for the specific vnic + * and sets up the response mad data + */ +static void vema_set_class_port_info(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + memcpy(&port->class_port_info, recvd_mad->data, + sizeof(port->class_port_info)); + + vema_get_class_port_info(port, recvd_mad, rsp_mad); +} + +/** + * vema_get_veswport_info -- Get veswport info + * @port: source port on which MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + */ +static void vema_get_veswport_info(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_veswport_info *port_info = + (struct opa_veswport_info *)rsp_mad->data; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (adapter) { + memset(port_info, 0, sizeof(*port_info)); + opa_vnic_get_vesw_info(adapter, &port_info->vesw); + opa_vnic_get_per_veswport_info(adapter, + &port_info->vport); + } else { + vema_get_pod_values(port_info); + } +} + +/** + * vema_set_veswport_info -- Set veswport info + * @port: source port on which MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + * + * This function gets the port class infor for vnic + */ +static void vema_set_veswport_info(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_vnic_ctrl_port *cport = port->cport; + struct opa_veswport_info *port_info; + struct opa_vnic_adapter *adapter; + u8 vport_num; + + vport_num = vema_get_vport_num(recvd_mad); + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (!adapter) { + adapter = vema_add_vport(port, vport_num); + if (IS_ERR(adapter)) { + c_err("failed to add vport %d: %ld\n", + vport_num, PTR_ERR(adapter)); + goto err_exit; + } + } + + port_info = (struct opa_veswport_info *)recvd_mad->data; + opa_vnic_set_vesw_info(adapter, &port_info->vesw); + opa_vnic_set_per_veswport_info(adapter, &port_info->vport); + + /* Process the new config settings */ + opa_vnic_process_vema_config(adapter); + + vema_get_veswport_info(port, recvd_mad, rsp_mad); + return; + +err_exit: + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; +} + +/** + * vema_get_mac_entries -- Get MAC entries in VNIC MAC table + * @port: source port on which MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + * + * This function gets the MAC entries that are programmed into + * the VNIC MAC forwarding table. It checks for the validity of + * the index into the MAC table and the number of entries that + * are to be retrieved. + */ +static void vema_get_mac_entries(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_veswport_mactable *mac_tbl_in, *mac_tbl_out; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (!adapter) { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + return; + } + + mac_tbl_in = (struct opa_veswport_mactable *)recvd_mad->data; + mac_tbl_out = (struct opa_veswport_mactable *)rsp_mad->data; + + if (vema_mac_tbl_req_ok(mac_tbl_in)) { + mac_tbl_out->offset = mac_tbl_in->offset; + mac_tbl_out->num_entries = mac_tbl_in->num_entries; + opa_vnic_query_mac_tbl(adapter, mac_tbl_out); + } else { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + } +} + +/** + * vema_set_mac_entries -- Set MAC entries in VNIC MAC table + * @port: source port on which MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + * + * This function sets the MAC entries in the VNIC forwarding table + * It checks for the validity of the index and the number of forwarding + * table entries to be programmed. + */ +static void vema_set_mac_entries(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_veswport_mactable *mac_tbl; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (!adapter) { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + return; + } + + mac_tbl = (struct opa_veswport_mactable *)recvd_mad->data; + if (vema_mac_tbl_req_ok(mac_tbl)) { + if (opa_vnic_update_mac_tbl(adapter, mac_tbl)) + rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR; + } else { + rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR; + } + vema_get_mac_entries(port, recvd_mad, rsp_mad); +} + +/** + * vema_set_delete_vesw -- Reset VESW info to POD values + * @port: source port on which MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + * + * This function clears all the fields of veswport info for the requested vesw + * and sets them back to the power-on default values. It does not delete the + * vesw. + */ +static void vema_set_delete_vesw(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_veswport_info *port_info = + (struct opa_veswport_info *)rsp_mad->data; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (!adapter) { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + return; + } + + vema_get_pod_values(port_info); + opa_vnic_set_vesw_info(adapter, &port_info->vesw); + opa_vnic_set_per_veswport_info(adapter, &port_info->vport); + + /* Process the new config settings */ + opa_vnic_process_vema_config(adapter); + + opa_vnic_release_mac_tbl(adapter); + + vema_get_veswport_info(port, recvd_mad, rsp_mad); +} + +/** + * vema_get_mac_list -- Get the unicast/multicast macs. + * @port: source port on which MAD was received + * @recvd_mad: Received mad contains fields to set vnic parameters + * @rsp_mad: Response mad to be built + * @attr_id: Attribute ID indicating multicast or unicast mac list + */ +static void vema_get_mac_list(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad, + u16 attr_id) +{ + struct opa_veswport_iface_macs *macs_in, *macs_out; + int max_entries = (OPA_VNIC_EMA_DATA - sizeof(*macs_out)) / ETH_ALEN; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (!adapter) { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + return; + } + + macs_in = (struct opa_veswport_iface_macs *)recvd_mad->data; + macs_out = (struct opa_veswport_iface_macs *)rsp_mad->data; + + macs_out->start_idx = macs_in->start_idx; + if (macs_in->num_macs_in_msg) + macs_out->num_macs_in_msg = macs_in->num_macs_in_msg; + else + macs_out->num_macs_in_msg = cpu_to_be16(max_entries); + + if (attr_id == OPA_EM_ATTR_IFACE_MCAST_MACS) + opa_vnic_query_mcast_macs(adapter, macs_out); + else + opa_vnic_query_ucast_macs(adapter, macs_out); +} + +/** + * vema_get_summary_counters -- Gets summary counters. + * @port: source port on which MAD was received + * @recvd_mad: Received mad contains fields to set vnic parameters + * @rsp_mad: Response mad to be built + */ +static void vema_get_summary_counters(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_veswport_summary_counters *cntrs; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (adapter) { + cntrs = (struct opa_veswport_summary_counters *)rsp_mad->data; + opa_vnic_get_summary_counters(adapter, cntrs); + } else { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + } +} + +/** + * vema_get_error_counters -- Gets summary counters. + * @port: source port on which MAD was received + * @recvd_mad: Received mad contains fields to set vnic parameters + * @rsp_mad: Response mad to be built + */ +static void vema_get_error_counters(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_veswport_error_counters *cntrs; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (adapter) { + cntrs = (struct opa_veswport_error_counters *)rsp_mad->data; + opa_vnic_get_error_counters(adapter, cntrs); + } else { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + } +} + +/** + * vema_get -- Process received get MAD + * @port: source port on which MAD was received + * @recvd_mad: Received mad + * @rsp_mad: Response mad to be built + */ +static void vema_get(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id); + + switch (attr_id) { + case OPA_EM_ATTR_CLASS_PORT_INFO: + vema_get_class_port_info(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_VESWPORT_INFO: + vema_get_veswport_info(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES: + vema_get_mac_entries(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_IFACE_UCAST_MACS: + /* fall through */ + case OPA_EM_ATTR_IFACE_MCAST_MACS: + vema_get_mac_list(port, recvd_mad, rsp_mad, attr_id); + break; + case OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS: + vema_get_summary_counters(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS: + vema_get_error_counters(port, recvd_mad, rsp_mad); + break; + default: + rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR; + break; + } +} + +/** + * vema_set -- Process received set MAD + * @port: source port on which MAD was received + * @recvd_mad: Received mad contains fields to set vnic parameters + * @rsp_mad: Response mad to be built + */ +static void vema_set(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id); + + switch (attr_id) { + case OPA_EM_ATTR_CLASS_PORT_INFO: + vema_set_class_port_info(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_VESWPORT_INFO: + vema_set_veswport_info(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES: + vema_set_mac_entries(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_DELETE_VESW: + vema_set_delete_vesw(port, recvd_mad, rsp_mad); + break; + default: + rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR; + break; + } +} + +/** + * vema_send -- Send handler for VEMA MAD agent + * @mad_agent: pointer to the mad agent + * @mad_wc: pointer to mad send work completion information + * + * Free all the data structures associated with the sent MAD + */ +static void vema_send(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_wc) +{ + rdma_destroy_ah(mad_wc->send_buf->ah); + ib_free_send_mad(mad_wc->send_buf); +} + +/** + * vema_recv -- Recv handler for VEMA MAD agent + * @mad_agent: pointer to the mad agent + * @send_buf: Send buffer if found, else NULL + * @mad_wc: pointer to mad send work completion information + * + * Handle only set and get methods and respond to other methods + * as unsupported. Allocate response buffer and address handle + * for the response MAD. + */ +static void vema_recv(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, + struct ib_mad_recv_wc *mad_wc) +{ + struct opa_vnic_vema_port *port; + struct ib_ah *ah; + struct ib_mad_send_buf *rsp; + struct opa_vnic_vema_mad *vema_mad; + + if (!mad_wc || !mad_wc->recv_buf.mad) + return; + + port = mad_agent->context; + ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc, + mad_wc->recv_buf.grh, mad_agent->port_num); + if (IS_ERR(ah)) + goto free_recv_mad; + + rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp, + mad_wc->wc->pkey_index, 0, + IB_MGMT_VENDOR_HDR, OPA_VNIC_EMA_DATA, + GFP_KERNEL, OPA_MGMT_BASE_VERSION); + if (IS_ERR(rsp)) + goto err_rsp; + + rsp->ah = ah; + vema_mad = rsp->mad; + memcpy(vema_mad, mad_wc->recv_buf.mad, IB_MGMT_VENDOR_HDR); + vema_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + vema_mad->mad_hdr.status = 0; + + /* Lock ensures network adapter is not removed */ + mutex_lock(&port->lock); + + switch (mad_wc->recv_buf.mad->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + vema_get(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad, + vema_mad); + break; + case IB_MGMT_METHOD_SET: + vema_set(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad, + vema_mad); + break; + default: + vema_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR; + break; + } + mutex_unlock(&port->lock); + + if (!ib_post_send_mad(rsp, NULL)) { + /* + * with post send successful ah and send mad + * will be destroyed in send handler + */ + goto free_recv_mad; + } + + ib_free_send_mad(rsp); + +err_rsp: + rdma_destroy_ah(ah); +free_recv_mad: + ib_free_recv_mad(mad_wc); +} + +/** + * vema_get_port -- Gets the opa_vnic_vema_port + * @cport: pointer to control dev + * @port_num: Port number + * + * This function loops through the ports and returns + * the opa_vnic_vema port structure that is associated + * with the OPA port number + * + * Return: ptr to requested opa_vnic_vema_port strucure + * if success, NULL if not + */ +static struct opa_vnic_vema_port * +vema_get_port(struct opa_vnic_ctrl_port *cport, u8 port_num) +{ + struct opa_vnic_vema_port *port = (void *)cport + sizeof(*cport); + + if (port_num > cport->num_ports) + return NULL; + + return port + (port_num - 1); +} + +/** + * opa_vnic_vema_send_trap -- This function sends a trap to the EM + * @cport: pointer to vnic control port + * @data: pointer to trap data filled by calling function + * @lid: issuers lid (encap_slid from vesw_port_info) + * + * This function is called from the VNIC driver to send a trap if there + * is somethng the EM should be notified about. These events currently + * are + * 1) UNICAST INTERFACE MACADDRESS changes + * 2) MULTICAST INTERFACE MACADDRESS changes + * 3) ETHERNET LINK STATUS changes + * While allocating the send mad the remote site qpn used is 1 + * as this is the well known QP. + * + */ +void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter, + struct __opa_veswport_trap *data, u32 lid) +{ + struct opa_vnic_ctrl_port *cport = adapter->cport; + struct ib_mad_send_buf *send_buf; + struct opa_vnic_vema_port *port; + struct ib_device *ibp; + struct opa_vnic_vema_mad_trap *trap_mad; + struct opa_class_port_info *class; + struct rdma_ah_attr ah_attr; + struct ib_ah *ah; + struct opa_veswport_trap *trap; + u32 trap_lid; + u16 pkey_idx; + + if (!cport) + goto err_exit; + ibp = cport->ibdev; + port = vema_get_port(cport, data->opaportnum); + if (!port || !port->mad_agent) + goto err_exit; + + if (time_before(jiffies, adapter->trap_timeout)) { + if (adapter->trap_count == OPA_VNIC_TRAP_BURST_LIMIT) { + v_warn("Trap rate exceeded\n"); + goto err_exit; + } else { + adapter->trap_count++; + } + } else { + adapter->trap_count = 0; + } + + class = &port->class_port_info; + /* Set up address handle */ + memset(&ah_attr, 0, sizeof(ah_attr)); + ah_attr.type = rdma_ah_find_type(ibp, port->port_num); + rdma_ah_set_sl(&ah_attr, + GET_TRAP_SL_FROM_CLASS_PORT_INFO(class->trap_sl_rsvd)); + rdma_ah_set_port_num(&ah_attr, port->port_num); + trap_lid = be32_to_cpu(class->trap_lid); + /* + * check for trap lid validity, must not be zero + * The trap sink could change after we fashion the MAD but since traps + * are not guaranteed we won't use a lock as anyway the change will take + * place even with locking. + */ + if (!trap_lid) { + c_err("%s: Invalid dlid\n", __func__); + goto err_exit; + } + + rdma_ah_set_dlid(&ah_attr, trap_lid); + ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr); + if (IS_ERR(ah)) { + c_err("%s:Couldn't create new AH = %p\n", __func__, ah); + c_err("%s:dlid = %d, sl = %d, port = %d\n", __func__, + rdma_ah_get_dlid(&ah_attr), rdma_ah_get_sl(&ah_attr), + rdma_ah_get_port_num(&ah_attr)); + goto err_exit; + } + + if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_FULL, + &pkey_idx) < 0) { + c_err("%s:full key not found, defaulting to partial\n", + __func__); + if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_PARTIAL, + &pkey_idx) < 0) + pkey_idx = 1; + } + + send_buf = ib_create_send_mad(port->mad_agent, 1, pkey_idx, 0, + IB_MGMT_VENDOR_HDR, IB_MGMT_MAD_DATA, + GFP_KERNEL, OPA_MGMT_BASE_VERSION); + if (IS_ERR(send_buf)) { + c_err("%s:Couldn't allocate send buf\n", __func__); + goto err_sndbuf; + } + + send_buf->ah = ah; + + /* Set up common MAD hdr */ + trap_mad = send_buf->mad; + trap_mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION; + trap_mad->mad_hdr.mgmt_class = OPA_MGMT_CLASS_INTEL_EMA; + trap_mad->mad_hdr.class_version = OPA_EMA_CLASS_VERSION; + trap_mad->mad_hdr.method = IB_MGMT_METHOD_TRAP; + port->tid++; + trap_mad->mad_hdr.tid = cpu_to_be64(port->tid); + trap_mad->mad_hdr.attr_id = IB_SMP_ATTR_NOTICE; + + /* Set up vendor OUI */ + trap_mad->oui[0] = INTEL_OUI_1; + trap_mad->oui[1] = INTEL_OUI_2; + trap_mad->oui[2] = INTEL_OUI_3; + + /* Setup notice attribute portion */ + trap_mad->notice.gen_type = OPA_INTEL_EMA_NOTICE_TYPE_INFO << 1; + trap_mad->notice.oui_1 = INTEL_OUI_1; + trap_mad->notice.oui_2 = INTEL_OUI_2; + trap_mad->notice.oui_3 = INTEL_OUI_3; + trap_mad->notice.issuer_lid = cpu_to_be32(lid); + + /* copy the actual trap data */ + trap = (struct opa_veswport_trap *)trap_mad->notice.raw_data; + trap->fabric_id = cpu_to_be16(data->fabric_id); + trap->veswid = cpu_to_be16(data->veswid); + trap->veswportnum = cpu_to_be32(data->veswportnum); + trap->opaportnum = cpu_to_be16(data->opaportnum); + trap->veswportindex = data->veswportindex; + trap->opcode = data->opcode; + + /* If successful send set up rate limit timeout else bail */ + if (ib_post_send_mad(send_buf, NULL)) { + ib_free_send_mad(send_buf); + } else { + if (adapter->trap_count) + return; + adapter->trap_timeout = jiffies + + usecs_to_jiffies(OPA_VNIC_TRAP_TIMEOUT); + return; + } + +err_sndbuf: + rdma_destroy_ah(ah); +err_exit: + v_err("Aborting trap\n"); +} + +static int vema_rem_vport(int id, void *p, void *data) +{ + struct opa_vnic_adapter *adapter = p; + + opa_vnic_rem_netdev(adapter); + return 0; +} + +static int vema_enable_vport(int id, void *p, void *data) +{ + struct opa_vnic_adapter *adapter = p; + + netif_carrier_on(adapter->netdev); + return 0; +} + +static int vema_disable_vport(int id, void *p, void *data) +{ + struct opa_vnic_adapter *adapter = p; + + netif_carrier_off(adapter->netdev); + return 0; +} + +static void opa_vnic_event(struct ib_event_handler *handler, + struct ib_event *record) +{ + struct opa_vnic_vema_port *port = + container_of(handler, struct opa_vnic_vema_port, event_handler); + struct opa_vnic_ctrl_port *cport = port->cport; + + if (record->element.port_num != port->port_num) + return; + + c_dbg("OPA_VNIC received event %d on device %s port %d\n", + record->event, record->device->name, record->element.port_num); + + if (record->event == IB_EVENT_PORT_ERR) + idr_for_each(&port->vport_idr, vema_disable_vport, NULL); + if (record->event == IB_EVENT_PORT_ACTIVE) + idr_for_each(&port->vport_idr, vema_enable_vport, NULL); +} + +/** + * vema_unregister -- Unregisters agent + * @cport: pointer to control port + * + * This deletes the registration by VEMA for MADs + */ +static void vema_unregister(struct opa_vnic_ctrl_port *cport) +{ + int i; + + for (i = 1; i <= cport->num_ports; i++) { + struct opa_vnic_vema_port *port = vema_get_port(cport, i); + + if (!port->mad_agent) + continue; + + /* Lock ensures no MAD is being processed */ + mutex_lock(&port->lock); + idr_for_each(&port->vport_idr, vema_rem_vport, NULL); + mutex_unlock(&port->lock); + + ib_unregister_mad_agent(port->mad_agent); + port->mad_agent = NULL; + mutex_destroy(&port->lock); + idr_destroy(&port->vport_idr); + ib_unregister_event_handler(&port->event_handler); + } +} + +/** + * vema_register -- Registers agent + * @cport: pointer to control port + * + * This function registers the handlers for the VEMA MADs + * + * Return: returns 0 on success. non zero otherwise + */ +static int vema_register(struct opa_vnic_ctrl_port *cport) +{ + struct ib_mad_reg_req reg_req = { + .mgmt_class = OPA_MGMT_CLASS_INTEL_EMA, + .mgmt_class_version = OPA_MGMT_BASE_VERSION, + .oui = { INTEL_OUI_1, INTEL_OUI_2, INTEL_OUI_3 } + }; + int i; + + set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask); + set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask); + + /* register ib event handler and mad agent for each port on dev */ + for (i = 1; i <= cport->num_ports; i++) { + struct opa_vnic_vema_port *port = vema_get_port(cport, i); + int ret; + + port->cport = cport; + port->port_num = i; + + INIT_IB_EVENT_HANDLER(&port->event_handler, + cport->ibdev, opa_vnic_event); + ret = ib_register_event_handler(&port->event_handler); + if (ret) { + c_err("port %d: event handler register failed\n", i); + vema_unregister(cport); + return ret; + } + + idr_init(&port->vport_idr); + mutex_init(&port->lock); + port->mad_agent = ib_register_mad_agent(cport->ibdev, i, + IB_QPT_GSI, ®_req, + IB_MGMT_RMPP_VERSION, + vema_send, vema_recv, + port, 0); + if (IS_ERR(port->mad_agent)) { + ret = PTR_ERR(port->mad_agent); + port->mad_agent = NULL; + mutex_destroy(&port->lock); + idr_destroy(&port->vport_idr); + vema_unregister(cport); + return ret; + } + } + + return 0; +} + +/** + * opa_vnic_vema_add_one -- Handle new ib device + * @device: ib device pointer + * + * Allocate the vnic control port and initialize it. + */ +static void opa_vnic_vema_add_one(struct ib_device *device) +{ + struct opa_vnic_ctrl_port *cport; + int rc, size = sizeof(*cport); + + if (!rdma_cap_opa_vnic(device)) + return; + + size += device->phys_port_cnt * sizeof(struct opa_vnic_vema_port); + cport = kzalloc(size, GFP_KERNEL); + if (!cport) + return; + + cport->num_ports = device->phys_port_cnt; + cport->ibdev = device; + + /* Initialize opa vnic management agent (vema) */ + rc = vema_register(cport); + if (!rc) + c_info("VNIC client initialized\n"); + + ib_set_client_data(device, &opa_vnic_client, cport); +} + +/** + * opa_vnic_vema_rem_one -- Handle ib device removal + * @device: ib device pointer + * @client_data: ib client data + * + * Uninitialize and free the vnic control port. + */ +static void opa_vnic_vema_rem_one(struct ib_device *device, + void *client_data) +{ + struct opa_vnic_ctrl_port *cport = client_data; + + if (!cport) + return; + + c_info("removing VNIC client\n"); + vema_unregister(cport); + kfree(cport); +} + +static int __init opa_vnic_init(void) +{ + int rc; + + pr_info("OPA Virtual Network Driver - v%s\n", + opa_vnic_driver_version); + + rc = ib_register_client(&opa_vnic_client); + if (rc) + pr_err("VNIC driver register failed %d\n", rc); + + return rc; +} +module_init(opa_vnic_init); + +static void opa_vnic_deinit(void) +{ + ib_unregister_client(&opa_vnic_client); +} +module_exit(opa_vnic_deinit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel OPA Virtual Network driver"); +MODULE_VERSION(DRV_VERSION); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c new file mode 100644 index 000000000000..a51bf977f4d6 --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c @@ -0,0 +1,390 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains OPA VNIC EMA Interface functions. + */ + +#include "opa_vnic_internal.h" + +/** + * opa_vnic_vema_report_event - sent trap to report the specified event + * @adapter: vnic port adapter + * @event: event to be reported + * + * This function calls vema api to sent a trap for the given event. + */ +void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event) +{ + struct __opa_veswport_info *info = &adapter->info; + struct __opa_veswport_trap trap_data; + + trap_data.fabric_id = info->vesw.fabric_id; + trap_data.veswid = info->vesw.vesw_id; + trap_data.veswportnum = info->vport.port_num; + trap_data.opaportnum = adapter->port_num; + trap_data.veswportindex = adapter->vport_num; + trap_data.opcode = event; + + opa_vnic_vema_send_trap(adapter, &trap_data, info->vport.encap_slid); +} + +/** + * opa_vnic_get_error_counters - get summary counters + * @adapter: vnic port adapter + * @cntrs: pointer to destination summary counters structure + * + * This function populates the summary counters that is maintained by the + * given adapter to destination address provided. + */ +void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter, + struct opa_veswport_summary_counters *cntrs) +{ + struct opa_vnic_stats vstats; + __be64 *dst; + u64 *src; + + memset(&vstats, 0, sizeof(vstats)); + mutex_lock(&adapter->stats_lock); + adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats); + mutex_unlock(&adapter->stats_lock); + + cntrs->vp_instance = cpu_to_be16(adapter->vport_num); + cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id); + cntrs->veswport_num = cpu_to_be32(adapter->port_num); + + cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors); + cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors); + cntrs->tx_packets = cpu_to_be64(vstats.netstats.tx_packets); + cntrs->rx_packets = cpu_to_be64(vstats.netstats.rx_packets); + cntrs->tx_bytes = cpu_to_be64(vstats.netstats.tx_bytes); + cntrs->rx_bytes = cpu_to_be64(vstats.netstats.rx_bytes); + + /* + * This loop depends on layout of + * opa_veswport_summary_counters opa_vnic_stats structures. + */ + for (dst = &cntrs->tx_unicast, src = &vstats.tx_grp.unicast; + dst < &cntrs->reserved[0]; dst++, src++) { + *dst = cpu_to_be64(*src); + } +} + +/** + * opa_vnic_get_error_counters - get error counters + * @adapter: vnic port adapter + * @cntrs: pointer to destination error counters structure + * + * This function populates the error counters that is maintained by the + * given adapter to destination address provided. + */ +void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter, + struct opa_veswport_error_counters *cntrs) +{ + struct opa_vnic_stats vstats; + + memset(&vstats, 0, sizeof(vstats)); + mutex_lock(&adapter->stats_lock); + adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats); + mutex_unlock(&adapter->stats_lock); + + cntrs->vp_instance = cpu_to_be16(adapter->vport_num); + cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id); + cntrs->veswport_num = cpu_to_be32(adapter->port_num); + + cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors); + cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors); + cntrs->tx_dlid_zero = cpu_to_be64(vstats.tx_dlid_zero); + cntrs->tx_drop_state = cpu_to_be64(vstats.tx_drop_state); + cntrs->tx_logic = cpu_to_be64(vstats.netstats.tx_fifo_errors + + vstats.netstats.tx_carrier_errors); + + cntrs->rx_bad_veswid = cpu_to_be64(vstats.netstats.rx_nohandler); + cntrs->rx_runt = cpu_to_be64(vstats.rx_runt); + cntrs->rx_oversize = cpu_to_be64(vstats.rx_oversize); + cntrs->rx_drop_state = cpu_to_be64(vstats.rx_drop_state); + cntrs->rx_logic = cpu_to_be64(vstats.netstats.rx_fifo_errors); +} + +/** + * opa_vnic_get_vesw_info -- Get the vesw information + * @adapter: vnic port adapter + * @info: pointer to destination vesw info structure + * + * This function copies the vesw info that is maintained by the + * given adapter to destination address provided. + */ +void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter, + struct opa_vesw_info *info) +{ + struct __opa_vesw_info *src = &adapter->info.vesw; + int i; + + info->fabric_id = cpu_to_be16(src->fabric_id); + info->vesw_id = cpu_to_be16(src->vesw_id); + memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0)); + info->def_port_mask = cpu_to_be16(src->def_port_mask); + memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1)); + info->pkey = cpu_to_be16(src->pkey); + + memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2)); + info->u_mcast_dlid = cpu_to_be32(src->u_mcast_dlid); + for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) + info->u_ucast_dlid[i] = cpu_to_be32(src->u_ucast_dlid[i]); + + memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3)); + for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++) + info->eth_mtu[i] = cpu_to_be16(src->eth_mtu[i]); + + info->eth_mtu_non_vlan = cpu_to_be16(src->eth_mtu_non_vlan); + memcpy(info->rsvd4, src->rsvd4, ARRAY_SIZE(src->rsvd4)); +} + +/** + * opa_vnic_set_vesw_info -- Set the vesw information + * @adapter: vnic port adapter + * @info: pointer to vesw info structure + * + * This function updates the vesw info that is maintained by the + * given adapter with vesw info provided. Reserved fields are stored + * and returned back to EM as is. + */ +void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter, + struct opa_vesw_info *info) +{ + struct __opa_vesw_info *dst = &adapter->info.vesw; + int i; + + dst->fabric_id = be16_to_cpu(info->fabric_id); + dst->vesw_id = be16_to_cpu(info->vesw_id); + memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0)); + dst->def_port_mask = be16_to_cpu(info->def_port_mask); + memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1)); + dst->pkey = be16_to_cpu(info->pkey); + + memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2)); + dst->u_mcast_dlid = be32_to_cpu(info->u_mcast_dlid); + for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) + dst->u_ucast_dlid[i] = be32_to_cpu(info->u_ucast_dlid[i]); + + memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3)); + for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++) + dst->eth_mtu[i] = be16_to_cpu(info->eth_mtu[i]); + + dst->eth_mtu_non_vlan = be16_to_cpu(info->eth_mtu_non_vlan); + memcpy(dst->rsvd4, info->rsvd4, ARRAY_SIZE(info->rsvd4)); +} + +/** + * opa_vnic_get_per_veswport_info -- Get the vesw per port information + * @adapter: vnic port adapter + * @info: pointer to destination vport info structure + * + * This function copies the vesw per port info that is maintained by the + * given adapter to destination address provided. + * Note that the read only fields are not copied. + */ +void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter, + struct opa_per_veswport_info *info) +{ + struct __opa_per_veswport_info *src = &adapter->info.vport; + + info->port_num = cpu_to_be32(src->port_num); + info->eth_link_status = src->eth_link_status; + memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0)); + + memcpy(info->base_mac_addr, src->base_mac_addr, + ARRAY_SIZE(info->base_mac_addr)); + info->config_state = src->config_state; + info->oper_state = src->oper_state; + info->max_mac_tbl_ent = cpu_to_be16(src->max_mac_tbl_ent); + info->max_smac_ent = cpu_to_be16(src->max_smac_ent); + info->mac_tbl_digest = cpu_to_be32(src->mac_tbl_digest); + memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1)); + + info->encap_slid = cpu_to_be32(src->encap_slid); + memcpy(info->pcp_to_sc_uc, src->pcp_to_sc_uc, + ARRAY_SIZE(info->pcp_to_sc_uc)); + memcpy(info->pcp_to_vl_uc, src->pcp_to_vl_uc, + ARRAY_SIZE(info->pcp_to_vl_uc)); + memcpy(info->pcp_to_sc_mc, src->pcp_to_sc_mc, + ARRAY_SIZE(info->pcp_to_sc_mc)); + memcpy(info->pcp_to_vl_mc, src->pcp_to_vl_mc, + ARRAY_SIZE(info->pcp_to_vl_mc)); + info->non_vlan_sc_uc = src->non_vlan_sc_uc; + info->non_vlan_vl_uc = src->non_vlan_vl_uc; + info->non_vlan_sc_mc = src->non_vlan_sc_mc; + info->non_vlan_vl_mc = src->non_vlan_vl_mc; + memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2)); + + info->uc_macs_gen_count = cpu_to_be16(src->uc_macs_gen_count); + info->mc_macs_gen_count = cpu_to_be16(src->mc_macs_gen_count); + memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3)); +} + +/** + * opa_vnic_set_per_veswport_info -- Set vesw per port information + * @adapter: vnic port adapter + * @info: pointer to vport info structure + * + * This function updates the vesw per port info that is maintained by the + * given adapter with vesw per port info provided. Reserved fields are + * stored and returned back to EM as is. + */ +void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter, + struct opa_per_veswport_info *info) +{ + struct __opa_per_veswport_info *dst = &adapter->info.vport; + + dst->port_num = be32_to_cpu(info->port_num); + memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0)); + + memcpy(dst->base_mac_addr, info->base_mac_addr, + ARRAY_SIZE(dst->base_mac_addr)); + dst->config_state = info->config_state; + memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1)); + + dst->encap_slid = be32_to_cpu(info->encap_slid); + memcpy(dst->pcp_to_sc_uc, info->pcp_to_sc_uc, + ARRAY_SIZE(dst->pcp_to_sc_uc)); + memcpy(dst->pcp_to_vl_uc, info->pcp_to_vl_uc, + ARRAY_SIZE(dst->pcp_to_vl_uc)); + memcpy(dst->pcp_to_sc_mc, info->pcp_to_sc_mc, + ARRAY_SIZE(dst->pcp_to_sc_mc)); + memcpy(dst->pcp_to_vl_mc, info->pcp_to_vl_mc, + ARRAY_SIZE(dst->pcp_to_vl_mc)); + dst->non_vlan_sc_uc = info->non_vlan_sc_uc; + dst->non_vlan_vl_uc = info->non_vlan_vl_uc; + dst->non_vlan_sc_mc = info->non_vlan_sc_mc; + dst->non_vlan_vl_mc = info->non_vlan_vl_mc; + memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2)); + memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3)); +} + +/** + * opa_vnic_query_mcast_macs - query multicast mac list + * @adapter: vnic port adapter + * @macs: pointer mac list + * + * This function populates the provided mac list with the configured + * multicast addresses in the adapter. + */ +void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter, + struct opa_veswport_iface_macs *macs) +{ + u16 start_idx, num_macs, idx = 0, count = 0; + struct netdev_hw_addr *ha; + + start_idx = be16_to_cpu(macs->start_idx); + num_macs = be16_to_cpu(macs->num_macs_in_msg); + netdev_for_each_mc_addr(ha, adapter->netdev) { + struct opa_vnic_iface_mac_entry *entry = &macs->entry[count]; + + if (start_idx > idx++) + continue; + else if (num_macs == count) + break; + memcpy(entry, ha->addr, sizeof(*entry)); + count++; + } + + macs->tot_macs_in_lst = cpu_to_be16(netdev_mc_count(adapter->netdev)); + macs->num_macs_in_msg = cpu_to_be16(count); + macs->gen_count = cpu_to_be16(adapter->info.vport.mc_macs_gen_count); +} + +/** + * opa_vnic_query_ucast_macs - query unicast mac list + * @adapter: vnic port adapter + * @macs: pointer mac list + * + * This function populates the provided mac list with the configured + * unicast addresses in the adapter. + */ +void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, + struct opa_veswport_iface_macs *macs) +{ + u16 start_idx, tot_macs, num_macs, idx = 0, count = 0; + struct netdev_hw_addr *ha; + + start_idx = be16_to_cpu(macs->start_idx); + num_macs = be16_to_cpu(macs->num_macs_in_msg); + /* loop through dev_addrs list first */ + for_each_dev_addr(adapter->netdev, ha) { + struct opa_vnic_iface_mac_entry *entry = &macs->entry[count]; + + /* Do not include EM specified MAC address */ + if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr, + ARRAY_SIZE(adapter->info.vport.base_mac_addr))) + continue; + + if (start_idx > idx++) + continue; + else if (num_macs == count) + break; + memcpy(entry, ha->addr, sizeof(*entry)); + count++; + } + + /* loop through uc list */ + netdev_for_each_uc_addr(ha, adapter->netdev) { + struct opa_vnic_iface_mac_entry *entry = &macs->entry[count]; + + if (start_idx > idx++) + continue; + else if (num_macs == count) + break; + memcpy(entry, ha->addr, sizeof(*entry)); + count++; + } + + tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) + + netdev_uc_count(adapter->netdev); + macs->tot_macs_in_lst = cpu_to_be16(tot_macs); + macs->num_macs_in_msg = cpu_to_be16(count); + macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count); +} diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index cee46266f434..def723a5df29 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -312,10 +312,15 @@ static int srp_new_cm_id(struct srp_rdma_ch *ch) if (ch->cm_id) ib_destroy_cm_id(ch->cm_id); ch->cm_id = new_cm_id; + if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev, + target->srp_host->port)) + ch->path.rec_type = SA_PATH_REC_TYPE_OPA; + else + ch->path.rec_type = SA_PATH_REC_TYPE_IB; ch->path.sgid = target->sgid; ch->path.dgid = target->orig_dgid; ch->path.pkey = target->pkey; - ch->path.service_id = target->service_id; + sa_path_set_service_id(&ch->path, target->service_id); return 0; } @@ -643,7 +648,7 @@ static void srp_free_ch_ib(struct srp_target_port *target, } static void srp_path_rec_completion(int status, - struct ib_sa_path_rec *pathrec, + struct sa_path_rec *pathrec, void *ch_ptr) { struct srp_rdma_ch *ch = ch_ptr; @@ -2399,12 +2404,12 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, switch (event->param.rej_rcvd.reason) { case IB_CM_REJ_PORT_CM_REDIRECT: cpi = event->param.rej_rcvd.ari; - ch->path.dlid = cpi->redirect_lid; + sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid))); ch->path.pkey = cpi->redirect_pkey; cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16); - ch->status = ch->path.dlid ? + ch->status = sa_path_get_dlid(&ch->path) ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT; break; diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 32ed40db3ca2..ab9077b81d5a 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -152,7 +152,7 @@ struct srp_rdma_ch { struct completion done; int status; - struct ib_sa_path_rec path; + struct sa_path_rec path; struct ib_sa_query *path_query; int path_query_id; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index ee026b6b4f0d..1ced0731c140 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -417,7 +417,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad, static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_wc) { - ib_destroy_ah(mad_wc->send_buf->ah); + rdma_destroy_ah(mad_wc->send_buf->ah); ib_free_send_mad(mad_wc->send_buf); } @@ -481,7 +481,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, ib_free_send_mad(rsp); err_rsp: - ib_destroy_ah(ah); + rdma_destroy_ah(ah); err: ib_free_recv_mad(mad_wc); } |