26 files changed, 4063 insertions, 523 deletions
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
index f3c7dcf03098..c28af1823a2d 100644
--- a/drivers/infiniband/ulp/Makefile
+++ b/drivers/infiniband/ulp/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_INFINIBAND_SRP)		+= srp/
 obj-$(CONFIG_INFINIBAND_SRPT)		+= srpt/
 obj-$(CONFIG_INFINIBAND_ISER)		+= iser/
 obj-$(CONFIG_INFINIBAND_ISERT)		+= isert/
+obj-$(CONFIG_INFINIBAND_OPA_VNIC)	+= opa_vnic/
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index bed233bf45c3..ff50a7bd66d8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -52,7 +52,6 @@
 #include <rdma/ib_pack.h>
 #include <rdma/ib_sa.h>
 #include <linux/sched.h>
-
 /* constants */
 
 enum ipoib_flush_level {
@@ -153,6 +152,13 @@ static inline void skb_add_pseudo_hdr(struct sk_buff *skb)
 	skb_pull(skb, IPOIB_HARD_LEN);
 }
 
+static inline struct ipoib_dev_priv *ipoib_priv(const struct net_device *dev)
+{
+	struct rdma_netdev *rn = netdev_priv(dev);
+
+	return rn->clnt_priv;
+}
+
 /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
 struct ipoib_mcast {
 	struct ib_sa_mcmember_rec mcmember;
@@ -404,6 +410,7 @@ struct ipoib_dev_priv {
 	struct timer_list poll_timer;
 	unsigned max_send_sge;
 	bool sm_fullmember_sendonly_support;
+	const struct net_device_ops	*rn_ops;
 };
 
 struct ipoib_ah {
@@ -416,7 +423,7 @@ struct ipoib_ah {
 
 struct ipoib_path {
 	struct net_device    *dev;
-	struct ib_sa_path_rec pathrec;
+	struct sa_path_rec pathrec;
 	struct ipoib_ah      *ah;
 	struct sk_buff_head   queue;
 
@@ -472,7 +479,7 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
 void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
 
 struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
-				 struct ib_pd *pd, struct ib_ah_attr *attr);
+				 struct ib_pd *pd, struct rdma_ah_attr *attr);
 void ipoib_free_ah(struct kref *kref);
 static inline void ipoib_put_ah(struct ipoib_ah *ah)
 {
@@ -482,27 +489,28 @@ int ipoib_open(struct net_device *dev);
 int ipoib_add_pkey_attr(struct net_device *dev);
 int ipoib_add_umcast_attr(struct net_device *dev);
 
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
-		struct ipoib_ah *address, u32 qpn);
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
+	       struct ib_ah *address, u32 dqpn);
 void ipoib_reap_ah(struct work_struct *work);
 
 struct ipoib_path *__path_find(struct net_device *dev, void *gid);
 void ipoib_mark_paths_invalid(struct net_device *dev);
 void ipoib_flush_paths(struct net_device *dev);
-int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
-struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
-
-int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+					const char *format);
+void ipoib_ib_tx_timer_func(unsigned long ctx);
 void ipoib_ib_dev_flush_light(struct work_struct *work);
 void ipoib_ib_dev_flush_normal(struct work_struct *work);
 void ipoib_ib_dev_flush_heavy(struct work_struct *work);
 void ipoib_pkey_event(struct work_struct *work);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
+int ipoib_ib_dev_open_default(struct net_device *dev);
 int ipoib_ib_dev_open(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev);
 void ipoib_ib_dev_up(struct net_device *dev);
 void ipoib_ib_dev_down(struct net_device *dev);
-void ipoib_ib_dev_stop(struct net_device *dev);
+int ipoib_ib_dev_stop_default(struct net_device *dev);
 void ipoib_pkey_dev_check_presence(struct net_device *dev);
 
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -562,8 +570,10 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
 			  struct ipoib_path *path);
 #endif
 
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
-		       union ib_gid *mgid, int set_qkey);
+int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
+		       union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey);
+int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca,
+		       union ib_gid *mgid, u16 mlid);
 void ipoib_mcast_remove_list(struct list_head *remove_list);
 void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
 				struct list_head *remove_list);
@@ -587,7 +597,7 @@ void __exit ipoib_netlink_fini(void);
 void ipoib_set_umcast(struct net_device *ndev, int umcast_val);
 int  ipoib_set_mode(struct net_device *dev, const char *buf);
 
-void ipoib_setup(struct net_device *dev);
+void ipoib_setup_common(struct net_device *dev);
 
 void ipoib_pkey_open(struct ipoib_dev_priv *priv);
 void ipoib_drain_cq(struct net_device *dev);
@@ -607,14 +617,14 @@ extern int ipoib_max_conn_qp;
 
 static inline int ipoib_cm_admin_enabled(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	return IPOIB_CM_SUPPORTED(dev->dev_addr) &&
 		test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
 }
 
 static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	return IPOIB_CM_SUPPORTED(hwaddr) &&
 		test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
 }
@@ -637,13 +647,13 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t
 
 static inline int ipoib_cm_has_srq(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	return !!priv->cm.srq;
 }
 
 static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	return priv->cm.max_cm_mtu;
 }
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 0cdf2b7f272f..7cbcfdac6529 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -92,7 +92,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
 
 static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ib_recv_wr *bad_wr;
 	int i, ret;
 
@@ -118,7 +118,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
 					struct ib_recv_wr *wr,
 					struct ib_sge *sge, int id)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ib_recv_wr *bad_wr;
 	int i, ret;
 
@@ -145,7 +145,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
 					     u64 mapping[IPOIB_CM_RX_SG],
 					     gfp_t gfp)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct sk_buff *skb;
 	int i;
 
@@ -196,7 +196,7 @@ partial_error:
 static void ipoib_cm_free_rx_ring(struct net_device *dev,
 				  struct ipoib_cm_rx_buf *rx_ring)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	int i;
 
 	for (i = 0; i < ipoib_recvq_size; ++i)
@@ -235,7 +235,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
 static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
 {
 	struct ipoib_cm_rx *p = ctx;
-	struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
 	unsigned long flags;
 
 	if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
@@ -251,7 +251,7 @@ static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
 static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
 					   struct ipoib_cm_rx *p)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ib_qp_init_attr attr = {
 		.event_handler = ipoib_cm_rx_event_handler,
 		.send_cq = priv->recv_cq, /* For drain WR */
@@ -276,7 +276,7 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
 				 struct ib_cm_id *cm_id, struct ib_qp *qp,
 				 unsigned psn)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
 
@@ -331,7 +331,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
 				struct ib_recv_wr *wr,
 				struct ib_sge *sge)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	int i;
 
 	for (i = 0; i < priv->cm.num_frags; ++i)
@@ -349,7 +349,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
 static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
 				   struct ipoib_cm_rx *rx)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct {
 		struct ib_recv_wr wr;
 		struct ib_sge sge[IPOIB_CM_RX_SG];
@@ -422,7 +422,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
 			     struct ib_qp *qp, struct ib_cm_req_event_param *req,
 			     unsigned psn)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_cm_data data = {};
 	struct ib_cm_rep_param rep = {};
 
@@ -442,7 +442,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
 static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
 {
 	struct net_device *dev = cm_id->context;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_cm_rx *p;
 	unsigned psn;
 	int ret;
@@ -515,7 +515,7 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
 		/* Fall through */
 	case IB_CM_REJ_RECEIVED:
 		p = cm_id->context;
-		priv = netdev_priv(p->dev);
+		priv = ipoib_priv(p->dev);
 		if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
 			ipoib_warn(priv, "unable to move qp to error state\n");
 		/* Fall through */
@@ -559,7 +559,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
 
 void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_cm_rx_buf *rx_ring;
 	unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
 	struct sk_buff *skb, *newskb;
@@ -708,7 +708,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
 
 void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_tx_buf *tx_req;
 	int rc;
 	unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
@@ -786,7 +786,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
 
 void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_cm_tx *tx = wc->qp->qp_context;
 	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
 	struct ipoib_tx_buf *tx_req;
@@ -855,7 +855,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 
 int ipoib_cm_dev_open(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	int ret;
 
 	if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
@@ -887,7 +887,7 @@ err_cm:
 
 static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_cm_rx *rx, *n;
 	LIST_HEAD(list);
 
@@ -910,7 +910,7 @@ static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
 
 void ipoib_cm_dev_stop(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_cm_rx *p;
 	unsigned long begin;
 	int ret;
@@ -969,7 +969,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
 static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
 {
 	struct ipoib_cm_tx *p = cm_id->context;
-	struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
 	struct ipoib_cm_data *data = event->private_data;
 	struct sk_buff_head skqueue;
 	struct ib_qp_attr qp_attr;
@@ -1037,7 +1037,7 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 
 static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ib_qp_init_attr attr = {
 		.send_cq		= priv->recv_cq,
 		.recv_cq		= priv->recv_cq,
@@ -1068,9 +1068,9 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
 static int ipoib_cm_send_req(struct net_device *dev,
 			     struct ib_cm_id *id, struct ib_qp *qp,
 			     u32 qpn,
-			     struct ib_sa_path_rec *pathrec)
+			     struct sa_path_rec *pathrec)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_cm_data data = {};
 	struct ib_cm_req_param req = {};
 
@@ -1105,7 +1105,7 @@ static int ipoib_cm_send_req(struct net_device *dev,
 static int ipoib_cm_modify_tx_init(struct net_device *dev,
 				  struct ib_cm_id *cm_id, struct ib_qp *qp)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
 	ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
@@ -1128,9 +1128,9 @@ static int ipoib_cm_modify_tx_init(struct net_device *dev,
 }
 
 static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
-			    struct ib_sa_path_rec *pathrec)
+			    struct sa_path_rec *pathrec)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
 	int ret;
 
 	p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
@@ -1186,7 +1186,7 @@ err_tx:
 
 static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
 	struct ipoib_tx_buf *tx_req;
 	unsigned long begin;
 
@@ -1236,7 +1236,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 			       struct ib_cm_event *event)
 {
 	struct ipoib_cm_tx *tx = cm_id->context;
-	struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
 	struct net_device *dev = priv->dev;
 	struct ipoib_neigh *neigh;
 	unsigned long flags;
@@ -1287,7 +1287,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
 				       struct ipoib_neigh *neigh)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_cm_tx *tx;
 
 	tx = kzalloc(sizeof *tx, GFP_ATOMIC);
@@ -1306,7 +1306,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
 
 void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
 	unsigned long flags;
 	if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
 		spin_lock_irqsave(&priv->lock, flags);
@@ -1332,7 +1332,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
 	struct ipoib_path *path;
 	int ret;
 
-	struct ib_sa_path_rec pathrec;
+	struct sa_path_rec pathrec;
 	u32 qpn;
 
 	netif_tx_lock_bh(dev);
@@ -1441,7 +1441,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
 void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
 			   unsigned int mtu)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	int e = skb_queue_empty(&priv->cm.skb_queue);
 
 	if (skb_dst(skb))
@@ -1490,7 +1490,8 @@ static void ipoib_cm_stale_task(struct work_struct *work)
 static ssize_t show_mode(struct device *d, struct device_attribute *attr,
 			 char *buf)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d));
+	struct net_device *dev = to_net_dev(d);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
 		return sprintf(buf, "connected\n");
@@ -1503,7 +1504,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 {
 	struct net_device *dev = to_net_dev(d);
 	int ret;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags))
 		return -EPERM;
@@ -1532,7 +1533,7 @@ int ipoib_cm_add_mode_attr(struct net_device *dev)
 
 static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ib_srq_init_attr srq_init_attr = {
 		.srq_type = IB_SRQT_BASIC,
 		.attr = {
@@ -1561,7 +1562,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
 
 int ipoib_cm_dev_init(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	int max_srq_sge, i;
 
 	INIT_LIST_HEAD(&priv->cm.passive_ids);
@@ -1622,7 +1623,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
 
 void ipoib_cm_dev_cleanup(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	int ret;
 
 	if (!priv->cm.srq)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index bac455a1942d..874b24366e4d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -60,7 +60,7 @@ static const struct ipoib_stats ipoib_gstrings_stats[] = {
 static void ipoib_get_drvinfo(struct net_device *netdev,
 			      struct ethtool_drvinfo *drvinfo)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(netdev);
+	struct ipoib_dev_priv *priv = ipoib_priv(netdev);
 
 	ib_get_device_fw_str(priv->ca, drvinfo->fw_version,
 			     sizeof(drvinfo->fw_version));
@@ -77,7 +77,7 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
 static int ipoib_get_coalesce(struct net_device *dev,
 			      struct ethtool_coalesce *coal)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs;
 	coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
@@ -88,7 +88,7 @@ static int ipoib_get_coalesce(struct net_device *dev,
 static int ipoib_set_coalesce(struct net_device *dev,
 			      struct ethtool_coalesce *coal)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	int ret;
 
 	/*
@@ -155,7 +155,66 @@ static int ipoib_get_sset_count(struct net_device __always_unused *dev,
 	return -EOPNOTSUPP;
 }
 
+/* Return lane speed in unit of 1e6 bit/sec */
+static inline int ib_speed_enum_to_int(int speed)
+{
+	switch (speed) {
+	case IB_SPEED_SDR:
+		return SPEED_2500;
+	case IB_SPEED_DDR:
+		return SPEED_5000;
+	case IB_SPEED_QDR:
+	case IB_SPEED_FDR10:
+		return SPEED_10000;
+	case IB_SPEED_FDR:
+		return SPEED_14000;
+	case IB_SPEED_EDR:
+		return SPEED_25000;
+	}
+
+	return SPEED_UNKNOWN;
+}
+
+static int ipoib_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings *cmd)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(netdev);
+	struct ib_port_attr attr;
+	int ret, speed, width;
+
+	if (!netif_carrier_ok(netdev)) {
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
+		return 0;
+	}
+
+	ret = ib_query_port(priv->ca, priv->port, &attr);
+	if (ret < 0)
+		return -EINVAL;
+
+	speed = ib_speed_enum_to_int(attr.active_speed);
+	width = ib_width_enum_to_int(attr.active_width);
+
+	if (speed < 0 || width < 0)
+		return -EINVAL;
+
+	/* Except the following are set, the other members of
+	 * the struct ethtool_link_settings are initialized to
+	 * zero in the function __ethtool_get_link_ksettings.
+	 */
+	cmd->base.speed		 = speed * width;
+	cmd->base.duplex	 = DUPLEX_FULL;
+
+	cmd->base.phy_address	 = 0xFF;
+
+	cmd->base.autoneg	 = AUTONEG_ENABLE;
+	cmd->base.port		 = PORT_OTHER;
+
+	return 0;
+}
+
 static const struct ethtool_ops ipoib_ethtool_ops = {
+	.get_link_ksettings	= ipoib_get_link_ksettings,
 	.get_drvinfo		= ipoib_get_drvinfo,
 	.get_coalesce		= ipoib_get_coalesce,
 	.set_coalesce		= ipoib_set_coalesce,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 6bd5740e2691..11f74cbe6660 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -210,16 +210,16 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
 	seq_printf(file,
 		   "GID: %s\n"
 		   "  complete: %6s\n",
-		   gid_buf, path.pathrec.dlid ? "yes" : "no");
+		   gid_buf, sa_path_get_dlid(&path.pathrec) ? "yes" : "no");
 
-	if (path.pathrec.dlid) {
+	if (sa_path_get_dlid(&path.pathrec)) {
 		rate = ib_rate_to_mbps(path.pathrec.rate);
 
 		seq_printf(file,
 			   "  DLID:     0x%04x\n"
 			   "  SL: %12d\n"
 			   "  rate: %8d.%d Gb/sec\n",
-			   be16_to_cpu(path.pathrec.dlid),
+			   be32_to_cpu(sa_path_get_dlid(&path.pathrec)),
 			   path.pathrec.sl,
 			   rate / 1000, rate % 1000);
 	}
@@ -261,7 +261,7 @@ static const struct file_operations ipoib_path_fops = {
 
 void ipoib_create_debug_files(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	char name[IFNAMSIZ + sizeof "_path"];
 
 	snprintf(name, sizeof name, "%s_mcg", dev->name);
@@ -279,10 +279,13 @@ void ipoib_create_debug_files(struct net_device *dev)
 
 void ipoib_delete_debug_files(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
+	WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n");
+	WARN_ONCE(!priv->path_dentry, "null path debug file\n");
 	debugfs_remove(priv->mcg_dentry);
 	debugfs_remove(priv->path_dentry);
+	priv->mcg_dentry = priv->path_dentry = NULL;
 }
 
 int ipoib_register_debugfs(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 12c4f84a6639..0060b2f9f659 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -52,7 +52,7 @@ MODULE_PARM_DESC(data_debug_level,
 #endif
 
 struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
-				 struct ib_pd *pd, struct ib_ah_attr *attr)
+				 struct ib_pd *pd, struct rdma_ah_attr *attr)
 {
 	struct ipoib_ah *ah;
 	struct ib_ah *vah;
@@ -65,13 +65,13 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
 	ah->last_send = 0;
 	kref_init(&ah->ref);
 
-	vah = ib_create_ah(pd, attr);
+	vah = rdma_create_ah(pd, attr);
 	if (IS_ERR(vah)) {
 		kfree(ah);
 		ah = (struct ipoib_ah *)vah;
 	} else {
 		ah->ah = vah;
-		ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah);
+		ipoib_dbg(ipoib_priv(dev), "Created ah %p\n", ah->ah);
 	}
 
 	return ah;
@@ -80,7 +80,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
 void ipoib_free_ah(struct kref *kref)
 {
 	struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
-	struct ipoib_dev_priv *priv = netdev_priv(ah->dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(ah->dev);
 
 	unsigned long flags;
 
@@ -99,7 +99,7 @@ static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
 
 static int ipoib_ib_post_receive(struct net_device *dev, int id)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ib_recv_wr *bad_wr;
 	int ret;
 
@@ -121,7 +121,7 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id)
 
 static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct sk_buff *skb;
 	int buf_size;
 	u64 *mapping;
@@ -153,7 +153,7 @@ error:
 
 static int ipoib_ib_post_receives(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	int i;
 
 	for (i = 0; i < ipoib_recvq_size; ++i) {
@@ -172,7 +172,7 @@ static int ipoib_ib_post_receives(struct net_device *dev)
 
 static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
 	struct sk_buff *skb;
 	u64 mapping[IPOIB_UD_RX_SG];
@@ -381,7 +381,7 @@ free_res:
 
 static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	unsigned int wr_id = wc->wr_id;
 	struct ipoib_tx_buf *tx_req;
 
@@ -485,14 +485,14 @@ poll_more:
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
 {
 	struct net_device *dev = dev_ptr;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	napi_schedule(&priv->napi);
 }
 
 static void drain_tx_cq(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	netif_tx_lock(dev);
 	while (poll_tx(priv))
@@ -506,14 +506,14 @@ static void drain_tx_cq(struct net_device *dev)
 
 void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev_ptr);
 
 	mod_timer(&priv->poll_timer, jiffies);
 }
 
 static inline int post_send(struct ipoib_dev_priv *priv,
 			    unsigned int wr_id,
-			    struct ib_ah *address, u32 qpn,
+			    struct ib_ah *address, u32 dqpn,
 			    struct ipoib_tx_buf *tx_req,
 			    void *head, int hlen)
 {
@@ -523,7 +523,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
 	ipoib_build_sge(priv, tx_req);
 
 	priv->tx_wr.wr.wr_id	= wr_id;
-	priv->tx_wr.remote_qpn	= qpn;
+	priv->tx_wr.remote_qpn	= dqpn;
 	priv->tx_wr.ah		= address;
 
 	if (head) {
@@ -537,10 +537,10 @@ static inline int post_send(struct ipoib_dev_priv *priv,
 	return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr);
 }
 
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
-		struct ipoib_ah *address, u32 qpn)
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
+	       struct ib_ah *address, u32 dqpn)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_tx_buf *tx_req;
 	int hlen, rc;
 	void *phead;
@@ -554,7 +554,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 			++dev->stats.tx_dropped;
 			++dev->stats.tx_errors;
 			dev_kfree_skb_any(skb);
-			return;
+			return -1;
 		}
 	} else {
 		if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
@@ -563,7 +563,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 			++dev->stats.tx_dropped;
 			++dev->stats.tx_errors;
 			ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
-			return;
+			return -1;
 		}
 		phead = NULL;
 		hlen  = 0;
@@ -574,7 +574,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 			++dev->stats.tx_dropped;
 			++dev->stats.tx_errors;
 			dev_kfree_skb_any(skb);
-			return;
+			return -1;
 		}
 		/* Does skb_linearize return ok without reducing nr_frags? */
 		if (skb_shinfo(skb)->nr_frags > usable_sge) {
@@ -582,12 +582,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 			++dev->stats.tx_dropped;
 			++dev->stats.tx_errors;
 			dev_kfree_skb_any(skb);
-			return;
+			return -1;
 		}
 	}
 
-	ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
-		       skb->len, address, qpn);
+	ipoib_dbg_data(priv,
+		       "sending packet, length=%d address=%p dqpn=0x%06x\n",
+		       skb->len, address, dqpn);
 
 	/*
 	 * We put the skb into the tx_ring _before_ we call post_send()
@@ -601,7 +602,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 	if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
 		++dev->stats.tx_errors;
 		dev_kfree_skb_any(skb);
-		return;
+		return -1;
 	}
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -620,7 +621,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 	skb_dst_drop(skb);
 
 	rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
-		       address->ah, qpn, tx_req, phead, hlen);
+		       address, dqpn, tx_req, phead, hlen);
 	if (unlikely(rc)) {
 		ipoib_warn(priv, "post_send failed, error %d\n", rc);
 		++dev->stats.tx_errors;
@@ -629,21 +630,24 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		dev_kfree_skb_any(skb);
 		if (netif_queue_stopped(dev))
 			netif_wake_queue(dev);
+		rc = 0;
 	} else {
 		netif_trans_update(dev);
 
-		address->last_send = priv->tx_head;
+		rc = priv->tx_head;
 		++priv->tx_head;
 	}
 
 	if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
 		while (poll_tx(priv))
 			; /* nothing */
+
+	return rc;
 }
 
 static void __ipoib_reap_ah(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_ah *ah, *tah;
 	LIST_HEAD(remove_list);
 	unsigned long flags;
@@ -654,7 +658,7 @@ static void __ipoib_reap_ah(struct net_device *dev)
 	list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
 		if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
 			list_del(&ah->list);
-			ib_destroy_ah(ah->ah);
+			rdma_destroy_ah(ah->ah);
 			kfree(ah);
 		}
 
@@ -677,7 +681,7 @@ void ipoib_reap_ah(struct work_struct *work)
 
 static void ipoib_flush_ah(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	cancel_delayed_work(&priv->ah_reap_task);
 	flush_workqueue(priv->wq);
@@ -686,30 +690,124 @@ static void ipoib_flush_ah(struct net_device *dev)
 
 static void ipoib_stop_ah(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	set_bit(IPOIB_STOP_REAPER, &priv->flags);
 	ipoib_flush_ah(dev);
 }
 
-static void ipoib_ib_tx_timer_func(unsigned long ctx)
+static int recvs_pending(struct net_device *dev)
 {
-	drain_tx_cq((struct net_device *)ctx);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	int pending = 0;
+	int i;
+
+	for (i = 0; i < ipoib_recvq_size; ++i)
+		if (priv->rx_ring[i].skb)
+			++pending;
+
+	return pending;
 }
 
-int ipoib_ib_dev_open(struct net_device *dev)
+int ipoib_ib_dev_stop_default(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	int ret;
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	struct ib_qp_attr qp_attr;
+	unsigned long begin;
+	struct ipoib_tx_buf *tx_req;
+	int i;
 
-	ipoib_pkey_dev_check_presence(dev);
+	if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+		napi_disable(&priv->napi);
 
-	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
-		ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
-			   (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
-		return -1;
+	ipoib_cm_dev_stop(dev);
+
+	/*
+	 * Move our QP to the error state and then reinitialize in
+	 * when all work requests have completed or have been flushed.
+	 */
+	qp_attr.qp_state = IB_QPS_ERR;
+	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+		ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
+
+	/* Wait for all sends and receives to complete */
+	begin = jiffies;
+
+	while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
+		if (time_after(jiffies, begin + 5 * HZ)) {
+			ipoib_warn(priv,
+				   "timing out; %d sends %d receives not completed\n",
+				   priv->tx_head - priv->tx_tail,
+				   recvs_pending(dev));
+
+			/*
+			 * assume the HW is wedged and just free up
+			 * all our pending work requests.
+			 */
+			while ((int)priv->tx_tail - (int)priv->tx_head < 0) {
+				tx_req = &priv->tx_ring[priv->tx_tail &
+							(ipoib_sendq_size - 1)];
+				ipoib_dma_unmap_tx(priv, tx_req);
+				dev_kfree_skb_any(tx_req->skb);
+				++priv->tx_tail;
+				--priv->tx_outstanding;
+			}
+
+			for (i = 0; i < ipoib_recvq_size; ++i) {
+				struct ipoib_rx_buf *rx_req;
+
+				rx_req = &priv->rx_ring[i];
+				if (!rx_req->skb)
+					continue;
+				ipoib_ud_dma_unmap_rx(priv,
+						      priv->rx_ring[i].mapping);
+				dev_kfree_skb_any(rx_req->skb);
+				rx_req->skb = NULL;
+			}
+
+			goto timeout;
+		}
+
+		ipoib_drain_cq(dev);
+
+		msleep(1);
 	}
 
+	ipoib_dbg(priv, "All sends and receives done.\n");
+
+timeout:
+	del_timer_sync(&priv->poll_timer);
+	qp_attr.qp_state = IB_QPS_RESET;
+	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+		ipoib_warn(priv, "Failed to modify QP to RESET state\n");
+
+	ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
+
+	return 0;
+}
+
+int ipoib_ib_dev_stop(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+	priv->rn_ops->ndo_stop(dev);
+
+	clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+	ipoib_flush_ah(dev);
+
+	return 0;
+}
+
+void ipoib_ib_tx_timer_func(unsigned long ctx)
+{
+	drain_tx_cq((struct net_device *)ctx);
+}
+
+int ipoib_ib_dev_open_default(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	int ret;
+
 	ret = ipoib_init_qp(dev);
 	if (ret) {
 		ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -719,33 +817,60 @@ int ipoib_ib_dev_open(struct net_device *dev)
 	ret = ipoib_ib_post_receives(dev);
 	if (ret) {
 		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
-		goto dev_stop;
+		goto out;
 	}
 
 	ret = ipoib_cm_dev_open(dev);
 	if (ret) {
 		ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
-		goto dev_stop;
+		goto out;
+	}
+
+	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+		napi_enable(&priv->napi);
+
+	return 0;
+out:
+	return -1;
+}
+
+int ipoib_ib_dev_open(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+	ipoib_pkey_dev_check_presence(dev);
+
+	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
+		ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
+			   (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
+		return -1;
 	}
 
 	clear_bit(IPOIB_STOP_REAPER, &priv->flags);
 	queue_delayed_work(priv->wq, &priv->ah_reap_task,
 			   round_jiffies_relative(HZ));
 
-	if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
-		napi_enable(&priv->napi);
+	if (priv->rn_ops->ndo_open(dev)) {
+		pr_warn("%s: Failed to open dev\n", dev->name);
+		goto dev_stop;
+	}
+
+	set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
 
 	return 0;
+
 dev_stop:
-	if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
-		napi_enable(&priv->napi);
+	set_bit(IPOIB_STOP_REAPER, &priv->flags);
+	cancel_delayed_work(&priv->ah_reap_task);
+	set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+	napi_enable(&priv->napi);
 	ipoib_ib_dev_stop(dev);
 	return -1;
 }
 
 void ipoib_pkey_dev_check_presence(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	if (!(priv->pkey & 0x7fff) ||
 	    ib_find_pkey(priv->ca, priv->port, priv->pkey,
@@ -757,7 +882,7 @@ void ipoib_pkey_dev_check_presence(struct net_device *dev)
 
 void ipoib_ib_dev_up(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	ipoib_pkey_dev_check_presence(dev);
 
@@ -773,7 +898,7 @@ void ipoib_ib_dev_up(struct net_device *dev)
 
 void ipoib_ib_dev_down(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	ipoib_dbg(priv, "downing ib_dev\n");
 
@@ -786,22 +911,9 @@ void ipoib_ib_dev_down(struct net_device *dev)
 	ipoib_flush_paths(dev);
 }
 
-static int recvs_pending(struct net_device *dev)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	int pending = 0;
-	int i;
-
-	for (i = 0; i < ipoib_recvq_size; ++i)
-		if (priv->rx_ring[i].skb)
-			++pending;
-
-	return pending;
-}
-
 void ipoib_drain_cq(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	int i, n;
 
 	/*
@@ -838,107 +950,6 @@ void ipoib_drain_cq(struct net_device *dev)
 	local_bh_enable();
 }
 
-void ipoib_ib_dev_stop(struct net_device *dev)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ib_qp_attr qp_attr;
-	unsigned long begin;
-	struct ipoib_tx_buf *tx_req;
-	int i;
-
-	if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
-		napi_disable(&priv->napi);
-
-	ipoib_cm_dev_stop(dev);
-
-	/*
-	 * Move our QP to the error state and then reinitialize in
-	 * when all work requests have completed or have been flushed.
-	 */
-	qp_attr.qp_state = IB_QPS_ERR;
-	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
-		ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
-
-	/* Wait for all sends and receives to complete */
-	begin = jiffies;
-
-	while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
-		if (time_after(jiffies, begin + 5 * HZ)) {
-			ipoib_warn(priv, "timing out; %d sends %d receives not completed\n",
-				   priv->tx_head - priv->tx_tail, recvs_pending(dev));
-
-			/*
-			 * assume the HW is wedged and just free up
-			 * all our pending work requests.
-			 */
-			while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
-				tx_req = &priv->tx_ring[priv->tx_tail &
-							(ipoib_sendq_size - 1)];
-				ipoib_dma_unmap_tx(priv, tx_req);
-				dev_kfree_skb_any(tx_req->skb);
-				++priv->tx_tail;
-				--priv->tx_outstanding;
-			}
-
-			for (i = 0; i < ipoib_recvq_size; ++i) {
-				struct ipoib_rx_buf *rx_req;
-
-				rx_req = &priv->rx_ring[i];
-				if (!rx_req->skb)
-					continue;
-				ipoib_ud_dma_unmap_rx(priv,
-						      priv->rx_ring[i].mapping);
-				dev_kfree_skb_any(rx_req->skb);
-				rx_req->skb = NULL;
-			}
-
-			goto timeout;
-		}
-
-		ipoib_drain_cq(dev);
-
-		msleep(1);
-	}
-
-	ipoib_dbg(priv, "All sends and receives done.\n");
-
-timeout:
-	del_timer_sync(&priv->poll_timer);
-	qp_attr.qp_state = IB_QPS_RESET;
-	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
-		ipoib_warn(priv, "Failed to modify QP to RESET state\n");
-
-	ipoib_flush_ah(dev);
-
-	ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
-}
-
-int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-
-	priv->ca = ca;
-	priv->port = port;
-	priv->qp = NULL;
-
-	if (ipoib_transport_dev_init(dev, ca)) {
-		printk(KERN_WARNING "%s: ipoib_transport_dev_init failed\n", ca->name);
-		return -ENODEV;
-	}
-
-	setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
-		    (unsigned long) dev);
-
-	if (dev->flags & IFF_UP) {
-		if (ipoib_ib_dev_open(dev)) {
-			ipoib_transport_dev_cleanup(dev);
-			return -ENODEV;
-		}
-	}
-
-	return 0;
-}
-
 /*
  * Takes whatever value which is in pkey index 0 and updates priv->pkey
  * returns 0 if the pkey value was changed.
@@ -967,6 +978,19 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
 		 */
 		priv->dev->broadcast[8] = priv->pkey >> 8;
 		priv->dev->broadcast[9] = priv->pkey & 0xff;
+
+		/*
+		 * Update the broadcast address in the priv->broadcast object,
+		 * in case it already exists, otherwise no one will do that.
+		 */
+		if (priv->broadcast) {
+			spin_lock_irq(&priv->lock);
+			memcpy(priv->broadcast->mcmember.mgid.raw,
+			       priv->dev->broadcast + 4,
+			sizeof(union ib_gid));
+			spin_unlock_irq(&priv->lock);
+		}
+
 		return 0;
 	}
 
@@ -1216,7 +1240,7 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
 
 void ipoib_ib_dev_cleanup(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	ipoib_dbg(priv, "cleaning up ib_dev\n");
 	/*
@@ -1236,7 +1260,13 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
 	 */
 	ipoib_stop_ah(dev);
 
-	ipoib_transport_dev_cleanup(dev);
-}
+	clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 
+	priv->rn_ops->ndo_uninit(dev);
+
+	if (priv->pd) {
+		ib_dealloc_pd(priv->pd);
+		priv->pd = NULL;
+	}
+}
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index d1d3fb7a6127..2869d1adb1de 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,9 +108,36 @@ static struct ib_client ipoib_client = {
 	.get_net_dev_by_params = ipoib_get_net_dev_by_params,
 };
 
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static int ipoib_netdev_event(struct notifier_block *this,
+			      unsigned long event, void *ptr)
+{
+	struct netdev_notifier_info *ni = ptr;
+	struct net_device *dev = ni->dev;
+
+	if (dev->netdev_ops->ndo_open != ipoib_open)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		ipoib_create_debug_files(dev);
+		break;
+	case NETDEV_CHANGENAME:
+		ipoib_delete_debug_files(dev);
+		ipoib_create_debug_files(dev);
+		break;
+	case NETDEV_UNREGISTER:
+		ipoib_delete_debug_files(dev);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+#endif
+
 int ipoib_open(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	ipoib_dbg(priv, "bringing up interface\n");
 
@@ -157,7 +184,7 @@ err_disable:
 
 static int ipoib_stop(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	ipoib_dbg(priv, "stopping interface\n");
 
@@ -195,7 +222,7 @@ static void ipoib_uninit(struct net_device *dev)
 
 static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
 		features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
@@ -205,7 +232,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu
 
 static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	/* dev->mtu > 2K ==> connected mode */
 	if (ipoib_cm_admin_enabled(dev)) {
@@ -468,7 +495,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
 
 int ipoib_set_mode(struct net_device *dev, const char *buf)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	if ((test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
 	     !strcmp(buf, "connected\n")) ||
@@ -505,7 +532,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
 
 struct ipoib_path *__path_find(struct net_device *dev, void *gid)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct rb_node *n = priv->path_tree.rb_node;
 	struct ipoib_path *path;
 	int ret;
@@ -529,7 +556,7 @@ struct ipoib_path *__path_find(struct net_device *dev, void *gid)
 
 static int __path_add(struct net_device *dev, struct ipoib_path *path)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct rb_node **n = &priv->path_tree.rb_node;
 	struct rb_node *pn = NULL;
 	struct ipoib_path *tpath;
@@ -564,7 +591,7 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
 	while ((skb = __skb_dequeue(&path->queue)))
 		dev_kfree_skb_irq(skb);
 
-	ipoib_dbg(netdev_priv(dev), "path_free\n");
+	ipoib_dbg(ipoib_priv(dev), "path_free\n");
 
 	/* remove all neigh connected to this path */
 	ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
@@ -598,7 +625,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
 
 int ipoib_path_iter_next(struct ipoib_path_iter *iter)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
 	struct rb_node *n;
 	struct ipoib_path *path;
 	int ret = 1;
@@ -635,92 +662,21 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
 
 void ipoib_mark_paths_invalid(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_path *path, *tp;
 
 	spin_lock_irq(&priv->lock);
 
 	list_for_each_entry_safe(path, tp, &priv->path_list, list) {
-		ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 invalid\n",
-			be16_to_cpu(path->pathrec.dlid),
-			path->pathrec.dgid.raw);
+		ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n",
+			  be32_to_cpu(sa_path_get_dlid(&path->pathrec)),
+			  path->pathrec.dgid.raw);
 		path->valid =  0;
 	}
 
 	spin_unlock_irq(&priv->lock);
 }
 
-struct classport_info_context {
-	struct ipoib_dev_priv	*priv;
-	struct completion	done;
-	struct ib_sa_query	*sa_query;
-};
-
-static void classport_info_query_cb(int status, struct ib_class_port_info *rec,
-				    void *context)
-{
-	struct classport_info_context *cb_ctx = context;
-	struct ipoib_dev_priv *priv;
-
-	WARN_ON(!context);
-
-	priv = cb_ctx->priv;
-
-	if (status || !rec) {
-		pr_debug("device: %s failed query classport_info status: %d\n",
-			 priv->dev->name, status);
-		/* keeps the default, will try next mcast_restart */
-		priv->sm_fullmember_sendonly_support = false;
-		goto out;
-	}
-
-	if (ib_get_cpi_capmask2(rec) &
-	    IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) {
-		pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n",
-			 priv->dev->name);
-		priv->sm_fullmember_sendonly_support = true;
-	} else {
-		pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n",
-			 priv->dev->name);
-		priv->sm_fullmember_sendonly_support = false;
-	}
-
-out:
-	complete(&cb_ctx->done);
-}
-
-int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv)
-{
-	struct classport_info_context *callback_context;
-	int ret;
-
-	callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL);
-	if (!callback_context)
-		return -ENOMEM;
-
-	callback_context->priv = priv;
-	init_completion(&callback_context->done);
-
-	ret = ib_sa_classport_info_rec_query(&ipoib_sa_client,
-					     priv->ca, priv->port, 3000,
-					     GFP_KERNEL,
-					     classport_info_query_cb,
-					     callback_context,
-					     &callback_context->sa_query);
-	if (ret < 0) {
-		pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n",
-			priv->dev->name, ret);
-		kfree(callback_context);
-		return ret;
-	}
-
-	/* waiting for the callback to finish before returnning */
-	wait_for_completion(&callback_context->done);
-	kfree(callback_context);
-
-	return ret;
-}
-
 static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
 {
 	struct ipoib_pseudo_header *phdr;
@@ -731,7 +687,7 @@ static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
 
 void ipoib_flush_paths(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_path *path, *tp;
 	LIST_HEAD(remove_list);
 	unsigned long flags;
@@ -760,12 +716,12 @@ void ipoib_flush_paths(struct net_device *dev)
 }
 
 static void path_rec_completion(int status,
-				struct ib_sa_path_rec *pathrec,
+				struct sa_path_rec *pathrec,
 				void *path_ptr)
 {
 	struct ipoib_path *path = path_ptr;
 	struct net_device *dev = path->dev;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_ah *ah = NULL;
 	struct ipoib_ah *old_ah = NULL;
 	struct ipoib_neigh *neigh, *tn;
@@ -775,7 +731,8 @@ static void path_rec_completion(int status,
 
 	if (!status)
 		ipoib_dbg(priv, "PathRec LID 0x%04x for GID %pI6\n",
-			  be16_to_cpu(pathrec->dlid), pathrec->dgid.raw);
+			  be32_to_cpu(sa_path_get_dlid(pathrec)),
+			  pathrec->dgid.raw);
 	else
 		ipoib_dbg(priv, "PathRec status %d for GID %pI6\n",
 			  status, path->pathrec.dgid.raw);
@@ -783,7 +740,7 @@ static void path_rec_completion(int status,
 	skb_queue_head_init(&skqueue);
 
 	if (!status) {
-		struct ib_ah_attr av;
+		struct rdma_ah_attr av;
 
 		if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
 			ah = ipoib_create_ah(dev, priv->pd, &av);
@@ -798,7 +755,8 @@ static void path_rec_completion(int status,
 		path->ah = ah;
 
 		ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
-			  ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
+			  ah, be32_to_cpu(sa_path_get_dlid(pathrec)),
+			  pathrec->sl);
 
 		while ((skb = __skb_dequeue(&path->queue)))
 			__skb_queue_tail(&skqueue, skb);
@@ -858,7 +816,7 @@ static void path_rec_completion(int status,
 
 static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_path *path;
 
 	if (!priv->broadcast)
@@ -874,6 +832,10 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
 
 	INIT_LIST_HEAD(&path->neigh_list);
 
+	if (rdma_cap_opa_ah(priv->ca, priv->port))
+		path->pathrec.rec_type = SA_PATH_REC_TYPE_OPA;
+	else
+		path->pathrec.rec_type = SA_PATH_REC_TYPE_IB;
 	memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
 	path->pathrec.sgid	    = priv->local_gid;
 	path->pathrec.pkey	    = cpu_to_be16(priv->pkey);
@@ -886,7 +848,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
 static int path_rec_start(struct net_device *dev,
 			  struct ipoib_path *path)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	ipoib_dbg(priv, "Start path record lookup for %pI6\n",
 		  path->pathrec.dgid.raw);
@@ -917,7 +879,8 @@ static int path_rec_start(struct net_device *dev,
 static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 			   struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	struct rdma_netdev *rn = netdev_priv(dev);
 	struct ipoib_path *path;
 	struct ipoib_neigh *neigh;
 	unsigned long flags;
@@ -964,7 +927,8 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 			}
 		} else {
 			spin_unlock_irqrestore(&priv->lock, flags);
-			ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr));
+			path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+						       IPOIB_QPN(daddr));
 			ipoib_neigh_put(neigh);
 			return;
 		}
@@ -998,7 +962,8 @@ err_drop:
 static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
 			     struct ipoib_pseudo_header *phdr)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	struct rdma_netdev *rn = netdev_priv(dev);
 	struct ipoib_path *path;
 	unsigned long flags;
 
@@ -1038,11 +1003,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	if (path->ah) {
-		ipoib_dbg(priv, "Send unicast ARP to %04x\n",
-			  be16_to_cpu(path->pathrec.dlid));
+		ipoib_dbg(priv, "Send unicast ARP to %08x\n",
+			  be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
 
 		spin_unlock_irqrestore(&priv->lock, flags);
-		ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+		path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+					       IPOIB_QPN(phdr->hwaddr));
 		return;
 	} else if ((path->query || !path_rec_start(dev, path)) &&
 		   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
@@ -1058,7 +1024,8 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
 
 static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	struct rdma_netdev *rn = netdev_priv(dev);
 	struct ipoib_neigh *neigh;
 	struct ipoib_pseudo_header *phdr;
 	struct ipoib_header *header;
@@ -1122,7 +1089,8 @@ send_using_neigh:
 			goto unref;
 		}
 	} else if (neigh->ah) {
-		ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(phdr->hwaddr));
+		neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
+						IPOIB_QPN(phdr->hwaddr));
 		goto unref;
 	}
 
@@ -1144,7 +1112,7 @@ unref:
 
 static void ipoib_timeout(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
 		   jiffies_to_msecs(jiffies - dev_trans_start(dev)));
@@ -1178,7 +1146,7 @@ static int ipoib_hard_header(struct sk_buff *skb,
 
 static void ipoib_set_mcast_list(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
 		ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set");
@@ -1190,7 +1158,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
 
 static int ipoib_get_iflink(const struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	/* parent interface */
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
@@ -1218,7 +1186,7 @@ static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
 
 struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_neigh_table *ntbl = &priv->ntbl;
 	struct ipoib_neigh_hash *htbl;
 	struct ipoib_neigh *neigh = NULL;
@@ -1347,7 +1315,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
 struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
 				      struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_neigh_table *ntbl = &priv->ntbl;
 	struct ipoib_neigh_hash *htbl;
 	struct ipoib_neigh *neigh;
@@ -1404,7 +1372,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
 {
 	/* neigh reference count was dropprd to zero */
 	struct net_device *dev = neigh->dev;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct sk_buff *skb;
 	if (neigh->ah)
 		ipoib_put_ah(neigh->ah);
@@ -1414,7 +1382,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
 	}
 	if (ipoib_cm_get(neigh))
 		ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
-	ipoib_dbg(netdev_priv(dev),
+	ipoib_dbg(ipoib_priv(dev),
 		  "neigh free for %06x %pI6\n",
 		  IPOIB_QPN(neigh->daddr),
 		  neigh->daddr + 4);
@@ -1436,7 +1404,7 @@ static void ipoib_neigh_reclaim(struct rcu_head *rp)
 void ipoib_neigh_free(struct ipoib_neigh *neigh)
 {
 	struct net_device *dev = neigh->dev;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_neigh_table *ntbl = &priv->ntbl;
 	struct ipoib_neigh_hash *htbl;
 	struct ipoib_neigh __rcu **np;
@@ -1519,7 +1487,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
 
 void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_neigh_table *ntbl = &priv->ntbl;
 	struct ipoib_neigh_hash *htbl;
 	unsigned long flags;
@@ -1605,7 +1573,7 @@ out_unlock:
 
 static void ipoib_neigh_hash_uninit(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	int stopped;
 
 	ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
@@ -1622,10 +1590,26 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
 	wait_for_completion(&priv->ntbl.deleted);
 }
 
+void ipoib_dev_uninit_default(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
-int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+	ipoib_transport_dev_cleanup(dev);
+
+	ipoib_cm_dev_cleanup(dev);
+
+	kfree(priv->rx_ring);
+	vfree(priv->tx_ring);
+
+	priv->rx_ring = NULL;
+	priv->tx_ring = NULL;
+}
+
+static int ipoib_dev_init_default(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+	netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
 
 	/* Allocate RX/TX "rings" to hold queued skbs */
 	priv->rx_ring =	kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
@@ -1636,46 +1620,111 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 	priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
 	if (!priv->tx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
-		       ca->name, ipoib_sendq_size);
+		       priv->ca->name, ipoib_sendq_size);
 		goto out_rx_ring_cleanup;
 	}
 
 	/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
 
-	if (ipoib_ib_dev_init(dev, ca, port))
+	if (ipoib_transport_dev_init(dev, priv->ca)) {
+		pr_warn("%s: ipoib_transport_dev_init failed\n",
+			priv->ca->name);
 		goto out_tx_ring_cleanup;
+	}
+
+	/* after qp created set dev address */
+	priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
+	priv->dev->dev_addr[2] = (priv->qp->qp_num >>  8) & 0xff;
+	priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff;
+
+	setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
+		    (unsigned long)dev);
+
+	return 0;
+
+out_tx_ring_cleanup:
+	vfree(priv->tx_ring);
+
+out_rx_ring_cleanup:
+	kfree(priv->rx_ring);
+
+out:
+	return -ENOMEM;
+}
+
+int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	int ret = -ENOMEM;
+
+	priv->ca = ca;
+	priv->port = port;
+	priv->qp = NULL;
 
 	/*
-	 * Must be after ipoib_ib_dev_init so we can allocate a per
-	 * device wq there and use it here
+	 * the various IPoIB tasks assume they will never race against
+	 * themselves, so always use a single thread workqueue
 	 */
-	if (ipoib_neigh_hash_init(priv) < 0)
+	priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
+	if (!priv->wq) {
+		pr_warn("%s: failed to allocate device WQ\n", dev->name);
+		goto out;
+	}
+
+	/* create pd, which used both for control and datapath*/
+	priv->pd = ib_alloc_pd(priv->ca, 0);
+	if (IS_ERR(priv->pd)) {
+		pr_warn("%s: failed to allocate PD\n", ca->name);
+		goto clean_wq;
+	}
+
+	ret = priv->rn_ops->ndo_init(dev);
+	if (ret) {
+		pr_warn("%s failed to init HW resource\n", dev->name);
+		goto out_free_pd;
+	}
+
+	if (ipoib_neigh_hash_init(priv) < 0) {
+		pr_warn("%s failed to init neigh hash\n", dev->name);
 		goto out_dev_uninit;
+	}
+
+	if (dev->flags & IFF_UP) {
+		if (ipoib_ib_dev_open(dev)) {
+			pr_warn("%s failed to open device\n", dev->name);
+			ret = -ENODEV;
+			goto out_dev_uninit;
+		}
+	}
 
 	return 0;
 
 out_dev_uninit:
 	ipoib_ib_dev_cleanup(dev);
 
-out_tx_ring_cleanup:
-	vfree(priv->tx_ring);
+out_free_pd:
+	if (priv->pd) {
+		ib_dealloc_pd(priv->pd);
+		priv->pd = NULL;
+	}
 
-out_rx_ring_cleanup:
-	kfree(priv->rx_ring);
+clean_wq:
+	if (priv->wq) {
+		destroy_workqueue(priv->wq);
+		priv->wq = NULL;
+	}
 
 out:
-	return -ENOMEM;
+	return ret;
 }
 
 void ipoib_dev_cleanup(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
+	struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv;
 	LIST_HEAD(head);
 
 	ASSERT_RTNL();
 
-	ipoib_delete_debug_files(dev);
-
 	/* Delete any child interfaces first */
 	list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
 		/* Stop GC on child */
@@ -1685,24 +1734,21 @@ void ipoib_dev_cleanup(struct net_device *dev)
 	}
 	unregister_netdevice_many(&head);
 
-	/*
-	 * Must be before ipoib_ib_dev_cleanup or we delete an in use
-	 * work queue
-	 */
 	ipoib_neigh_hash_uninit(dev);
 
 	ipoib_ib_dev_cleanup(dev);
 
-	kfree(priv->rx_ring);
-	vfree(priv->tx_ring);
-
-	priv->rx_ring = NULL;
-	priv->tx_ring = NULL;
+	/* no more works over the priv->wq */
+	if (priv->wq) {
+		flush_workqueue(priv->wq);
+		destroy_workqueue(priv->wq);
+		priv->wq = NULL;
+	}
 }
 
 static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	return ib_set_vf_link_state(priv->ca, vf, priv->port, link_state);
 }
@@ -1710,7 +1756,7 @@ static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_stat
 static int ipoib_get_vf_config(struct net_device *dev, int vf,
 			       struct ifla_vf_info *ivf)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	int err;
 
 	err = ib_get_vf_config(priv->ca, vf, priv->port, ivf);
@@ -1724,7 +1770,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf,
 
 static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	if (type != IFLA_VF_IB_NODE_GUID && type != IFLA_VF_IB_PORT_GUID)
 		return -EINVAL;
@@ -1735,7 +1781,7 @@ static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
 static int ipoib_get_vf_stats(struct net_device *dev, int vf,
 			      struct ifla_vf_stats *vf_stats)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	return ib_get_vf_stats(priv->ca, vf, priv->port, vf_stats);
 }
@@ -1773,21 +1819,12 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
 	.ndo_get_iflink		 = ipoib_get_iflink,
 };
 
-void ipoib_setup(struct net_device *dev)
+void ipoib_setup_common(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-
-	if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
-		dev->netdev_ops	= &ipoib_netdev_ops_vf;
-	else
-		dev->netdev_ops	= &ipoib_netdev_ops_pf;
-
 	dev->header_ops		 = &ipoib_header_ops;
 
 	ipoib_set_ethtool_ops(dev);
 
-	netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
-
 	dev->watchdog_timeo	 = HZ;
 
 	dev->flags		|= IFF_BROADCAST | IFF_MULTICAST;
@@ -1801,11 +1838,14 @@ void ipoib_setup(struct net_device *dev)
 	netif_keep_dst(dev);
 
 	memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
+}
 
-	priv->dev = dev;
+static void ipoib_build_priv(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
+	priv->dev = dev;
 	spin_lock_init(&priv->lock);
-
 	init_rwsem(&priv->vlan_rwsem);
 
 	INIT_LIST_HEAD(&priv->path_list);
@@ -1823,22 +1863,99 @@ void ipoib_setup(struct net_device *dev)
 	INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
 }
 
-struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
+static const struct net_device_ops ipoib_netdev_default_pf = {
+	.ndo_init		 = ipoib_dev_init_default,
+	.ndo_uninit		 = ipoib_dev_uninit_default,
+	.ndo_open		 = ipoib_ib_dev_open_default,
+	.ndo_stop		 = ipoib_ib_dev_stop_default,
+};
+
+static struct net_device
+*ipoib_create_netdev_default(struct ib_device *hca,
+			     const char *name,
+			     unsigned char name_assign_type,
+			     void (*setup)(struct net_device *))
 {
 	struct net_device *dev;
+	struct rdma_netdev *rn;
 
-	dev = alloc_netdev((int)sizeof(struct ipoib_dev_priv), name,
-			   NET_NAME_UNKNOWN, ipoib_setup);
+	dev = alloc_netdev((int)sizeof(struct rdma_netdev),
+			   name,
+			   name_assign_type, setup);
 	if (!dev)
 		return NULL;
 
-	return netdev_priv(dev);
+	rn = netdev_priv(dev);
+
+	rn->send = ipoib_send;
+	rn->attach_mcast = ipoib_mcast_attach;
+	rn->detach_mcast = ipoib_mcast_detach;
+	rn->hca = hca;
+
+	dev->netdev_ops = &ipoib_netdev_default_pf;
+
+	return dev;
+}
+
+static struct net_device *ipoib_get_netdev(struct ib_device *hca, u8 port,
+					   const char *name)
+{
+	struct net_device *dev;
+
+	if (hca->alloc_rdma_netdev) {
+		dev = hca->alloc_rdma_netdev(hca, port,
+					     RDMA_NETDEV_IPOIB, name,
+					     NET_NAME_UNKNOWN,
+					     ipoib_setup_common);
+		if (IS_ERR_OR_NULL(dev) && PTR_ERR(dev) != -EOPNOTSUPP)
+			return NULL;
+	}
+
+	if (!hca->alloc_rdma_netdev || PTR_ERR(dev) == -EOPNOTSUPP)
+		dev = ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN,
+						  ipoib_setup_common);
+
+	return dev;
+}
+
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+					const char *name)
+{
+	struct net_device *dev;
+	struct ipoib_dev_priv *priv;
+	struct rdma_netdev *rn;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return NULL;
+
+	dev = ipoib_get_netdev(hca, port, name);
+	if (!dev)
+		goto free_priv;
+
+	priv->rn_ops = dev->netdev_ops;
+
+	/* fixme : should be after the query_cap */
+	if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
+		dev->netdev_ops	= &ipoib_netdev_ops_vf;
+	else
+		dev->netdev_ops	= &ipoib_netdev_ops_pf;
+
+	rn = netdev_priv(dev);
+	rn->clnt_priv = priv;
+	ipoib_build_priv(dev);
+
+	return priv;
+free_priv:
+	kfree(priv);
+	return NULL;
 }
 
 static ssize_t show_pkey(struct device *dev,
 			 struct device_attribute *attr, char *buf)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+	struct net_device *ndev = to_net_dev(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(ndev);
 
 	return sprintf(buf, "0x%04x\n", priv->pkey);
 }
@@ -1847,14 +1964,15 @@ static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
 static ssize_t show_umcast(struct device *dev,
 			   struct device_attribute *attr, char *buf)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+	struct net_device *ndev = to_net_dev(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(ndev);
 
 	return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
 }
 
 void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(ndev);
+	struct ipoib_dev_priv *priv = ipoib_priv(ndev);
 
 	if (umcast_val > 0) {
 		set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
@@ -1927,7 +2045,7 @@ static int ipoib_check_lladdr(struct net_device *dev,
 
 static int ipoib_set_mac(struct net_device *dev, void *addr)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct sockaddr_storage *ss = addr;
 	int ret;
 
@@ -2000,7 +2118,7 @@ void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
 	priv->hca_caps = hca->attrs.device_cap_flags;
 
 	if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
-		priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
+		priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
 
 		if (priv->hca_caps & IB_DEVICE_UD_TSO)
 			priv->dev->hw_features |= NETIF_F_TSO;
@@ -2016,7 +2134,7 @@ static struct net_device *ipoib_add_port(const char *format,
 	struct ib_port_attr attr;
 	int result = -ENOMEM;
 
-	priv = ipoib_intf_alloc(format);
+	priv = ipoib_intf_alloc(hca, port, format);
 	if (!priv)
 		goto alloc_mem_failed;
 
@@ -2090,8 +2208,6 @@ static struct net_device *ipoib_add_port(const char *format,
 		goto register_failed;
 	}
 
-	ipoib_create_debug_files(priv->dev);
-
 	if (ipoib_cm_add_mode_attr(priv->dev))
 		goto sysfs_failed;
 	if (ipoib_add_pkey_attr(priv->dev))
@@ -2106,7 +2222,6 @@ static struct net_device *ipoib_add_port(const char *format,
 	return priv->dev;
 
 sysfs_failed:
-	ipoib_delete_debug_files(priv->dev);
 	unregister_netdev(priv->dev);
 
 register_failed:
@@ -2146,7 +2261,7 @@ static void ipoib_add_one(struct ib_device *device)
 			continue;
 		dev = ipoib_add_port("ib%d", device, p);
 		if (!IS_ERR(dev)) {
-			priv = netdev_priv(dev);
+			priv = ipoib_priv(dev);
 			list_add_tail(&priv->list, dev_list);
 			count++;
 		}
@@ -2186,11 +2301,18 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
 
 		unregister_netdev(priv->dev);
 		free_netdev(priv->dev);
+		kfree(priv);
 	}
 
 	kfree(dev_list);
 }
 
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static struct notifier_block ipoib_netdev_notifier = {
+	.notifier_call = ipoib_netdev_event,
+};
+#endif
+
 static int __init ipoib_init_module(void)
 {
 	int ret;
@@ -2243,6 +2365,9 @@ static int __init ipoib_init_module(void)
 	if (ret)
 		goto err_client;
 
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+	register_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
 	return 0;
 
 err_client:
@@ -2260,6 +2385,9 @@ err_fs:
 
 static void __exit ipoib_cleanup_module(void)
 {
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+	unregister_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
 	ipoib_netlink_fini();
 	ib_unregister_client(&ipoib_client);
 	ib_sa_unregister_client(&ipoib_sa_client);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 69e146cdc306..057f58e6afca 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -114,7 +114,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
 	struct net_device *dev = mcast->dev;
 	int tx_dropped = 0;
 
-	ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
+	ipoib_dbg_mcast(ipoib_priv(dev), "deleting multicast group %pI6\n",
 			mcast->mcmember.mgid.raw);
 
 	/* remove all neigh connected to this mcast */
@@ -158,7 +158,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
 
 static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct rb_node *n = priv->multicast_tree.rb_node;
 
 	while (n) {
@@ -182,7 +182,7 @@ static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid
 
 static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
 
 	while (*n) {
@@ -212,8 +212,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 				   struct ib_sa_mcmember_rec *mcmember)
 {
 	struct net_device *dev = mcast->dev;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	struct rdma_netdev *rn = netdev_priv(dev);
 	struct ipoib_ah *ah;
+	struct rdma_ah_attr av;
 	int ret;
 	int set_qkey = 0;
 
@@ -260,8 +262,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 			return 0;
 		}
 
-		ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
-					 &mcast->mcmember.mgid, set_qkey);
+		ret = rn->attach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
+				       be16_to_cpu(mcast->mcmember.mlid),
+				       set_qkey, priv->qkey);
 		if (ret < 0) {
 			ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n",
 				   mcast->mcmember.mgid.raw);
@@ -271,40 +274,34 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 		}
 	}
 
-	{
-		struct ib_ah_attr av = {
-			.dlid	       = be16_to_cpu(mcast->mcmember.mlid),
-			.port_num      = priv->port,
-			.sl	       = mcast->mcmember.sl,
-			.ah_flags      = IB_AH_GRH,
-			.static_rate   = mcast->mcmember.rate,
-			.grh	       = {
-				.flow_label    = be32_to_cpu(mcast->mcmember.flow_label),
-				.hop_limit     = mcast->mcmember.hop_limit,
-				.sgid_index    = 0,
-				.traffic_class = mcast->mcmember.traffic_class
-			}
-		};
-		av.grh.dgid = mcast->mcmember.mgid;
-
-		ah = ipoib_create_ah(dev, priv->pd, &av);
-		if (IS_ERR(ah)) {
-			ipoib_warn(priv, "ib_address_create failed %ld\n",
-				-PTR_ERR(ah));
-			/* use original error */
-			return PTR_ERR(ah);
-		} else {
-			spin_lock_irq(&priv->lock);
-			mcast->ah = ah;
-			spin_unlock_irq(&priv->lock);
-
-			ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
-					mcast->mcmember.mgid.raw,
-					mcast->ah->ah,
-					be16_to_cpu(mcast->mcmember.mlid),
-					mcast->mcmember.sl);
-		}
+	memset(&av, 0, sizeof(av));
+	av.type = rdma_ah_find_type(priv->ca, priv->port);
+	rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)),
+	rdma_ah_set_port_num(&av, priv->port);
+	rdma_ah_set_sl(&av, mcast->mcmember.sl);
+	rdma_ah_set_static_rate(&av, mcast->mcmember.rate);
+
+	rdma_ah_set_grh(&av, &mcast->mcmember.mgid,
+			be32_to_cpu(mcast->mcmember.flow_label),
+			0, mcast->mcmember.hop_limit,
+			mcast->mcmember.traffic_class);
+
+	ah = ipoib_create_ah(dev, priv->pd, &av);
+	if (IS_ERR(ah)) {
+		ipoib_warn(priv, "ib_address_create failed %ld\n",
+			   -PTR_ERR(ah));
+		/* use original error */
+		return PTR_ERR(ah);
 	}
+	spin_lock_irq(&priv->lock);
+	mcast->ah = ah;
+	spin_unlock_irq(&priv->lock);
+
+	ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
+			mcast->mcmember.mgid.raw,
+			mcast->ah->ah,
+			be16_to_cpu(mcast->mcmember.mlid),
+			mcast->mcmember.sl);
 
 	/* actually send any queued packets */
 	netif_tx_lock_bh(dev);
@@ -331,7 +328,6 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
 	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
 						   carrier_on_task);
 	struct ib_port_attr attr;
-	int ret;
 
 	if (ib_query_port(priv->ca, priv->port, &attr) ||
 	    attr.state != IB_PORT_ACTIVE) {
@@ -344,11 +340,9 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
 	 * because the broadcast group must always be joined first and is always
 	 * re-joined if the SM changes substantially.
 	 */
-	ret = ipoib_check_sm_sendonly_fullmember_support(priv);
-	if (ret < 0)
-		pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n",
-			 priv->dev->name, ret);
-
+	priv->sm_fullmember_sendonly_support =
+		ib_sa_sendonly_fullmem_support(&ipoib_sa_client,
+					       priv->ca, priv->port);
 	/*
 	 * Take rtnl_lock to avoid racing with ipoib_stop() and
 	 * turning the carrier back on while a device is being
@@ -375,7 +369,7 @@ static int ipoib_mcast_join_complete(int status,
 {
 	struct ipoib_mcast *mcast = multicast->context;
 	struct net_device *dev = mcast->dev;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
 			test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
@@ -477,7 +471,7 @@ out_locked:
  */
 static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ib_sa_multicast *multicast;
 	struct ib_sa_mcmember_rec rec = {
 		.join_state = 1
@@ -489,6 +483,9 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
 	    !test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
 		return -EINVAL;
 
+	init_completion(&mcast->done);
+	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+
 	ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
 
 	rec.mgid     = mcast->mcmember.mgid;
@@ -647,8 +644,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
 			if (mcast->backoff == 1 ||
 			    time_after_eq(jiffies, mcast->delay_until)) {
 				/* Found the next unjoined group */
-				init_completion(&mcast->done);
-				set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
 				if (ipoib_mcast_join(dev, mcast)) {
 					spin_unlock_irq(&priv->lock);
 					return;
@@ -668,17 +663,15 @@ out:
 		queue_delayed_work(priv->wq, &priv->mcast_task,
 				   delay_until - jiffies);
 	}
-	if (mcast) {
-		init_completion(&mcast->done);
-		set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+	if (mcast)
 		ipoib_mcast_join(dev, mcast);
-	}
+
 	spin_unlock_irq(&priv->lock);
 }
 
 void ipoib_mcast_start_thread(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	unsigned long flags;
 
 	ipoib_dbg_mcast(priv, "starting multicast thread\n");
@@ -690,7 +683,7 @@ void ipoib_mcast_start_thread(struct net_device *dev)
 
 int ipoib_mcast_stop_thread(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	unsigned long flags;
 
 	ipoib_dbg_mcast(priv, "stopping multicast thread\n");
@@ -706,7 +699,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
 
 static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	struct rdma_netdev *rn = netdev_priv(dev);
 	int ret = 0;
 
 	if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
@@ -720,8 +714,8 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 				mcast->mcmember.mgid.raw);
 
 		/* Remove ourselves from the multicast group */
-		ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
-				      be16_to_cpu(mcast->mcmember.mlid));
+		ret = rn->detach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
+				       be16_to_cpu(mcast->mcmember.mlid));
 		if (ret)
 			ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
 	} else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
@@ -762,7 +756,8 @@ void ipoib_mcast_remove_list(struct list_head *remove_list)
 
 void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	struct rdma_netdev *rn = netdev_priv(dev);
 	struct ipoib_mcast *mcast;
 	unsigned long flags;
 	void *mgid = daddr + 4;
@@ -825,7 +820,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
 			}
 		}
 		spin_unlock_irqrestore(&priv->lock, flags);
-		ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+		mcast->ah->last_send = rn->send(dev, skb, mcast->ah->ah,
+						IB_MULTICAST_QPN);
 		if (neigh)
 			ipoib_neigh_put(neigh);
 		return;
@@ -837,7 +833,7 @@ unlock:
 
 void ipoib_mcast_dev_flush(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	LIST_HEAD(remove_list);
 	struct ipoib_mcast *mcast, *tmcast;
 	unsigned long flags;
@@ -1029,7 +1025,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
 
 int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
 	struct rb_node *n;
 	struct ipoib_mcast *mcast;
 	int ret = 1;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index cdc7df4fdb8a..28884781311b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -44,7 +44,7 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
 
 static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	u16 val;
 
 	if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey))
@@ -107,7 +107,7 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
 	if (!pdev || pdev->type != ARPHRD_INFINIBAND)
 		return -ENODEV;
 
-	ppriv = netdev_priv(pdev);
+	ppriv = ipoib_priv(pdev);
 
 	if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) {
 		ipoib_warn(ppriv, "child creation disallowed for child devices\n");
@@ -129,7 +129,8 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
 	 */
 	child_pkey |= 0x8000;
 
-	err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD);
+	err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
+			       child_pkey, IPOIB_RTNL_CHILD);
 
 	if (!err && data)
 		err = ipoib_changelink(dev, tb, data);
@@ -140,8 +141,8 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head
 {
 	struct ipoib_dev_priv *priv, *ppriv;
 
-	priv = netdev_priv(dev);
-	ppriv = netdev_priv(priv->parent);
+	priv = ipoib_priv(dev);
+	ppriv = ipoib_priv(priv->parent);
 
 	down_write(&ppriv->vlan_rwsem);
 	unregister_netdevice_queue(dev, head);
@@ -161,7 +162,7 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
 	.maxtype	= IFLA_IPOIB_MAX,
 	.policy		= ipoib_policy,
 	.priv_size	= sizeof(struct ipoib_dev_priv),
-	.setup		= ipoib_setup,
+	.setup		= ipoib_setup_common,
 	.newlink	= ipoib_new_child_link,
 	.changelink	= ipoib_changelink,
 	.dellink	= ipoib_unregister_child_dev,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 189dcd1709d2..bb64baf25309 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -35,9 +35,10 @@
 
 #include "ipoib.h"
 
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey)
+int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
+		       union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ib_qp_attr *qp_attr = NULL;
 	int ret;
 	u16 pkey_index;
@@ -56,7 +57,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int
 			goto out;
 
 		/* set correct QKey for QP */
-		qp_attr->qkey = priv->qkey;
+		qp_attr->qkey = qkey;
 		ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
 		if (ret) {
 			ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
@@ -74,9 +75,20 @@ out:
 	return ret;
 }
 
+int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca,
+		       union ib_gid *mgid, u16 mlid)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	int ret;
+
+	ret = ib_detach_mcast(priv->qp, mgid, mlid);
+
+	return ret;
+}
+
 int ipoib_init_qp(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	int ret;
 	struct ib_qp_attr qp_attr;
 	int attr_mask;
@@ -130,7 +142,7 @@ out_fail:
 
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ib_qp_init_attr init_attr = {
 		.cap = {
 			.max_send_wr  = ipoib_sendq_size,
@@ -147,22 +159,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 	int ret, size;
 	int i;
 
-	priv->pd = ib_alloc_pd(priv->ca, 0);
-	if (IS_ERR(priv->pd)) {
-		printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
-		return -ENODEV;
-	}
-
-	/*
-	 * the various IPoIB tasks assume they will never race against
-	 * themselves, so always use a single thread workqueue
-	 */
-	priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
-	if (!priv->wq) {
-		printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
-		goto out_free_pd;
-	}
-
 	size = ipoib_recvq_size + 1;
 	ret = ipoib_cm_dev_init(dev);
 	if (!ret) {
@@ -173,7 +169,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 			size += ipoib_recvq_size * ipoib_max_conn_qp;
 	} else
 		if (ret != -ENOSYS)
-			goto out_free_wq;
+			return -ENODEV;
 
 	cq_attr.cqe = size;
 	priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL,
@@ -212,10 +208,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 		goto out_free_send_cq;
 	}
 
-	priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
-	priv->dev->dev_addr[2] = (priv->qp->qp_num >>  8) & 0xff;
-	priv->dev->dev_addr[3] = (priv->qp->qp_num      ) & 0xff;
-
 	for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
 		priv->tx_sge[i].lkey = priv->pd->local_dma_lkey;
 
@@ -247,26 +239,18 @@ out_free_recv_cq:
 out_cm_dev_cleanup:
 	ipoib_cm_dev_cleanup(dev);
 
-out_free_wq:
-	destroy_workqueue(priv->wq);
-	priv->wq = NULL;
-
-out_free_pd:
-	ib_dealloc_pd(priv->pd);
-
 	return -ENODEV;
 }
 
 void ipoib_transport_dev_cleanup(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	if (priv->qp) {
 		if (ib_destroy_qp(priv->qp))
 			ipoib_warn(priv, "ib_qp_destroy failed\n");
 
 		priv->qp = NULL;
-		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 	}
 
 	if (ib_destroy_cq(priv->send_cq))
@@ -274,16 +258,6 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
 
 	if (ib_destroy_cq(priv->recv_cq))
 		ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
-
-	ipoib_cm_dev_cleanup(dev);
-
-	if (priv->wq) {
-		flush_workqueue(priv->wq);
-		destroy_workqueue(priv->wq);
-		priv->wq = NULL;
-	}
-
-	ib_dealloc_pd(priv->pd);
 }
 
 void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 3e10e3dac2e7..36dc4fcaa3cd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -44,7 +44,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
 			   char *buf)
 {
 	struct net_device *dev = to_net_dev(d);
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	return sprintf(buf, "%s\n", priv->parent->name);
 }
@@ -86,8 +86,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
 		goto register_failed;
 	}
 
-	ipoib_create_debug_files(priv->dev);
-
 	/* RTNL childs don't need proprietary sysfs entries */
 	if (type == IPOIB_LEGACY_CHILD) {
 		if (ipoib_cm_add_mode_attr(priv->dev))
@@ -108,7 +106,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
 
 sysfs_failed:
 	result = -ENOMEM;
-	ipoib_delete_debug_files(priv->dev);
 	unregister_netdevice(priv->dev);
 
 register_failed:
@@ -128,14 +125,15 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	ppriv = netdev_priv(pdev);
+	ppriv = ipoib_priv(pdev);
 
 	if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
 		return -EPERM;
 
 	snprintf(intf_name, sizeof intf_name, "%s.%04x",
 		 ppriv->dev->name, pkey);
-	priv = ipoib_intf_alloc(intf_name);
+
+	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
 	if (!priv)
 		return -ENOMEM;
 
@@ -183,7 +181,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	ppriv = netdev_priv(pdev);
+	ppriv = ipoib_priv(pdev);
 
 	if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
 		return -EPERM;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 9d0b22ad58c1..c1ae4aeae2f9 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -430,6 +430,7 @@ struct iser_fr_desc {
 	struct list_head		  list;
 	struct iser_reg_resources	  rsc;
 	struct iser_pi_context		 *pi_ctx;
+	struct list_head                  all_list;
 };
 
 /**
@@ -443,6 +444,7 @@ struct iser_fr_pool {
 	struct list_head        list;
 	spinlock_t              lock;
 	int                     size;
+	struct list_head        all_list;
 };
 
 /**
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 81ae2e30dd12..12ed62ce9ff7 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -612,7 +612,7 @@ iser_check_remote_inv(struct iser_conn *iser_conn,
 			 iser_conn, rkey);
 
 		if (unlikely(!iser_conn->snd_w_inv)) {
-			iser_err("conn %p: unexepected remote invalidation, "
+			iser_err("conn %p: unexpected remote invalidation, "
 				 "terminating connection\n", iser_conn);
 			return -EPROTO;
 		}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 30b622f2ab73..c538a38c91ce 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -362,6 +362,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
 	int i, ret;
 
 	INIT_LIST_HEAD(&fr_pool->list);
+	INIT_LIST_HEAD(&fr_pool->all_list);
 	spin_lock_init(&fr_pool->lock);
 	fr_pool->size = 0;
 	for (i = 0; i < cmds_max; i++) {
@@ -373,6 +374,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
 		}
 
 		list_add_tail(&desc->list, &fr_pool->list);
+		list_add_tail(&desc->all_list, &fr_pool->all_list);
 		fr_pool->size++;
 	}
 
@@ -392,13 +394,13 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
 	struct iser_fr_desc *desc, *tmp;
 	int i = 0;
 
-	if (list_empty(&fr_pool->list))
+	if (list_empty(&fr_pool->all_list))
 		return;
 
 	iser_info("freeing conn %p fr pool\n", ib_conn);
 
-	list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) {
-		list_del(&desc->list);
+	list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) {
+		list_del(&desc->all_list);
 		iser_free_reg_res(&desc->rsc);
 		if (desc->pi_ctx)
 			iser_free_pi_ctx(desc->pi_ctx);
diff --git a/drivers/infiniband/ulp/opa_vnic/Kconfig b/drivers/infiniband/ulp/opa_vnic/Kconfig
new file mode 100644
index 000000000000..48132ab5e6b9
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Kconfig
@@ -0,0 +1,8 @@
+config INFINIBAND_OPA_VNIC
+	tristate "Intel OPA VNIC support"
+	depends on X86_64 && INFINIBAND
+	---help---
+	This is Omni-Path (OPA) Virtual Network Interface Controller (VNIC)
+	driver for Ethernet over Omni-Path feature. It implements the HW
+	independent VNIC functionality. It interfaces with Linux stack for
+	data path and IB MAD for the control path.
diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile b/drivers/infiniband/ulp/opa_vnic/Makefile
new file mode 100644
index 000000000000..8061b287cfe4
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -0,0 +1,7 @@
+# Makefile - Intel Omni-Path Virtual Network Controller driver
+# Copyright(c) 2017, Intel Corporation.
+#
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
+
+opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \
+              opa_vnic_vema.o opa_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
new file mode 100644
index 000000000000..2e8fee982436
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC encapsulation/decapsulation function.
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+
+#include "opa_vnic_internal.h"
+
+/* OPA 16B Header fields */
+#define OPA_16B_LID_MASK        0xFFFFFull
+#define OPA_16B_SLID_HIGH_SHFT  8
+#define OPA_16B_SLID_MASK       0xF00ull
+#define OPA_16B_DLID_MASK       0xF000ull
+#define OPA_16B_DLID_HIGH_SHFT  12
+#define OPA_16B_LEN_SHFT        20
+#define OPA_16B_SC_SHFT         20
+#define OPA_16B_RC_SHFT         25
+#define OPA_16B_PKEY_SHFT       16
+
+#define OPA_VNIC_L4_HDR_SHFT    16
+
+/* L2+L4 hdr len is 20 bytes (5 quad words) */
+#define OPA_VNIC_HDR_QW_LEN   5
+
+static inline void opa_vnic_make_header(u8 *hdr, u32 slid, u32 dlid, u16 len,
+					u16 pkey, u16 entropy, u8 sc, u8 rc,
+					u8 l4_type, u16 l4_hdr)
+{
+	/* h[1]: LT=1, 16B L2=10 */
+	u32 h[OPA_VNIC_HDR_QW_LEN] = {0, 0xc0000000, 0, 0, 0};
+
+	h[2] = l4_type;
+	h[3] = entropy;
+	h[4] = l4_hdr << OPA_VNIC_L4_HDR_SHFT;
+
+	/* Extract and set 4 upper bits and 20 lower bits of the lids */
+	h[0] |= (slid & OPA_16B_LID_MASK);
+	h[2] |= ((slid >> (20 - OPA_16B_SLID_HIGH_SHFT)) & OPA_16B_SLID_MASK);
+
+	h[1] |= (dlid & OPA_16B_LID_MASK);
+	h[2] |= ((dlid >> (20 - OPA_16B_DLID_HIGH_SHFT)) & OPA_16B_DLID_MASK);
+
+	h[0] |= (len << OPA_16B_LEN_SHFT);
+	h[1] |= (rc << OPA_16B_RC_SHFT);
+	h[1] |= (sc << OPA_16B_SC_SHFT);
+	h[2] |= ((u32)pkey << OPA_16B_PKEY_SHFT);
+
+	memcpy(hdr, h, OPA_VNIC_HDR_LEN);
+}
+
+/*
+ * Using a simple hash table for mac table implementation with the last octet
+ * of mac address as a key.
+ */
+static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl)
+{
+	struct opa_vnic_mac_tbl_node *node;
+	struct hlist_node *tmp;
+	int bkt;
+
+	if (!mactbl)
+		return;
+
+	vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) {
+		hash_del(&node->hlist);
+		kfree(node);
+	}
+	kfree(mactbl);
+}
+
+static struct hlist_head *opa_vnic_alloc_mac_tbl(void)
+{
+	u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE;
+	struct hlist_head *mactbl;
+
+	mactbl = kzalloc(size, GFP_KERNEL);
+	if (!mactbl)
+		return ERR_PTR(-ENOMEM);
+
+	vnic_hash_init(mactbl);
+	return mactbl;
+}
+
+/* opa_vnic_release_mac_tbl - empty and free the mac table */
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter)
+{
+	struct hlist_head *mactbl;
+
+	mutex_lock(&adapter->mactbl_lock);
+	mactbl = rcu_access_pointer(adapter->mactbl);
+	rcu_assign_pointer(adapter->mactbl, NULL);
+	synchronize_rcu();
+	opa_vnic_free_mac_tbl(mactbl);
+	mutex_unlock(&adapter->mactbl_lock);
+}
+
+/*
+ * opa_vnic_query_mac_tbl - query the mac table for a section
+ *
+ * This function implements query of specific function of the mac table.
+ * The function also expects the requested range to be valid.
+ */
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+			    struct opa_veswport_mactable *tbl)
+{
+	struct opa_vnic_mac_tbl_node *node;
+	struct hlist_head *mactbl;
+	int bkt;
+	u16 loffset, lnum_entries;
+
+	rcu_read_lock();
+	mactbl = rcu_dereference(adapter->mactbl);
+	if (!mactbl)
+		goto get_mac_done;
+
+	loffset = be16_to_cpu(tbl->offset);
+	lnum_entries = be16_to_cpu(tbl->num_entries);
+
+	vnic_hash_for_each(mactbl, bkt, node, hlist) {
+		struct __opa_vnic_mactable_entry *nentry = &node->entry;
+		struct opa_veswport_mactable_entry *entry;
+
+		if ((node->index < loffset) ||
+		    (node->index >= (loffset + lnum_entries)))
+			continue;
+
+		/* populate entry in the tbl corresponding to the index */
+		entry = &tbl->tbl_entries[node->index - loffset];
+		memcpy(entry->mac_addr, nentry->mac_addr,
+		       ARRAY_SIZE(entry->mac_addr));
+		memcpy(entry->mac_addr_mask, nentry->mac_addr_mask,
+		       ARRAY_SIZE(entry->mac_addr_mask));
+		entry->dlid_sd = cpu_to_be32(nentry->dlid_sd);
+	}
+	tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest);
+get_mac_done:
+	rcu_read_unlock();
+}
+
+/*
+ * opa_vnic_update_mac_tbl - update mac table section
+ *
+ * This function updates the specified section of the mac table.
+ * The procedure includes following steps.
+ *  - Allocate a new mac (hash) table.
+ *  - Add the specified entries to the new table.
+ *    (except the ones that are requested to be deleted).
+ *  - Add all the other entries from the old mac table.
+ *  - If there is a failure, free the new table and return.
+ *  - Switch to the new table.
+ *  - Free the old table and return.
+ *
+ * The function also expects the requested range to be valid.
+ */
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+			    struct opa_veswport_mactable *tbl)
+{
+	struct opa_vnic_mac_tbl_node *node, *new_node;
+	struct hlist_head *new_mactbl, *old_mactbl;
+	int i, bkt, rc = 0;
+	u8 key;
+	u16 loffset, lnum_entries;
+
+	mutex_lock(&adapter->mactbl_lock);
+	/* allocate new mac table */
+	new_mactbl = opa_vnic_alloc_mac_tbl();
+	if (IS_ERR(new_mactbl)) {
+		mutex_unlock(&adapter->mactbl_lock);
+		return PTR_ERR(new_mactbl);
+	}
+
+	loffset = be16_to_cpu(tbl->offset);
+	lnum_entries = be16_to_cpu(tbl->num_entries);
+
+	/* add updated entries to the new mac table */
+	for (i = 0; i < lnum_entries; i++) {
+		struct __opa_vnic_mactable_entry *nentry;
+		struct opa_veswport_mactable_entry *entry =
+							&tbl->tbl_entries[i];
+		u8 *mac_addr = entry->mac_addr;
+		u8 empty_mac[ETH_ALEN] = { 0 };
+
+		v_dbg("new mac entry %4d: %02x:%02x:%02x:%02x:%02x:%02x %x\n",
+		      loffset + i, mac_addr[0], mac_addr[1], mac_addr[2],
+		      mac_addr[3], mac_addr[4], mac_addr[5],
+		      entry->dlid_sd);
+
+		/* if the entry is being removed, do not add it */
+		if (!memcmp(mac_addr, empty_mac, ARRAY_SIZE(empty_mac)))
+			continue;
+
+		node = kzalloc(sizeof(*node), GFP_KERNEL);
+		if (!node) {
+			rc = -ENOMEM;
+			goto updt_done;
+		}
+
+		node->index = loffset + i;
+		nentry = &node->entry;
+		memcpy(nentry->mac_addr, entry->mac_addr,
+		       ARRAY_SIZE(nentry->mac_addr));
+		memcpy(nentry->mac_addr_mask, entry->mac_addr_mask,
+		       ARRAY_SIZE(nentry->mac_addr_mask));
+		nentry->dlid_sd = be32_to_cpu(entry->dlid_sd);
+		key = node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
+		vnic_hash_add(new_mactbl, &node->hlist, key);
+	}
+
+	/* add other entries from current mac table to new mac table */
+	old_mactbl = rcu_access_pointer(adapter->mactbl);
+	if (!old_mactbl)
+		goto switch_tbl;
+
+	vnic_hash_for_each(old_mactbl, bkt, node, hlist) {
+		if ((node->index >= loffset) &&
+		    (node->index < (loffset + lnum_entries)))
+			continue;
+
+		new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+		if (!new_node) {
+			rc = -ENOMEM;
+			goto updt_done;
+		}
+
+		new_node->index = node->index;
+		memcpy(&new_node->entry, &node->entry, sizeof(node->entry));
+		key = new_node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
+		vnic_hash_add(new_mactbl, &new_node->hlist, key);
+	}
+
+switch_tbl:
+	/* switch to new table */
+	rcu_assign_pointer(adapter->mactbl, new_mactbl);
+	synchronize_rcu();
+
+	adapter->info.vport.mac_tbl_digest = be32_to_cpu(tbl->mac_tbl_digest);
+updt_done:
+	/* upon failure, free the new table; otherwise, free the old table */
+	if (rc)
+		opa_vnic_free_mac_tbl(new_mactbl);
+	else
+		opa_vnic_free_mac_tbl(old_mactbl);
+
+	mutex_unlock(&adapter->mactbl_lock);
+	return rc;
+}
+
+/* opa_vnic_chk_mac_tbl - check mac table for dlid */
+static uint32_t opa_vnic_chk_mac_tbl(struct opa_vnic_adapter *adapter,
+				     struct ethhdr *mac_hdr)
+{
+	struct opa_vnic_mac_tbl_node *node;
+	struct hlist_head *mactbl;
+	u32 dlid = 0;
+	u8 key;
+
+	rcu_read_lock();
+	mactbl = rcu_dereference(adapter->mactbl);
+	if (unlikely(!mactbl))
+		goto chk_done;
+
+	key = mac_hdr->h_dest[OPA_VNIC_MAC_HASH_IDX];
+	vnic_hash_for_each_possible(mactbl, node, hlist, key) {
+		struct __opa_vnic_mactable_entry *entry = &node->entry;
+
+		/* if related to source mac, skip */
+		if (unlikely(OPA_VNIC_DLID_SD_IS_SRC_MAC(entry->dlid_sd)))
+			continue;
+
+		if (!memcmp(node->entry.mac_addr, mac_hdr->h_dest,
+			    ARRAY_SIZE(node->entry.mac_addr))) {
+			/* mac address found */
+			dlid = OPA_VNIC_DLID_SD_GET_DLID(node->entry.dlid_sd);
+			break;
+		}
+	}
+
+chk_done:
+	rcu_read_unlock();
+	return dlid;
+}
+
+/* opa_vnic_get_dlid - find and return the DLID */
+static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter,
+				  struct sk_buff *skb, u8 def_port)
+{
+	struct __opa_veswport_info *info = &adapter->info;
+	struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+	u32 dlid;
+
+	dlid = opa_vnic_chk_mac_tbl(adapter, mac_hdr);
+	if (dlid)
+		return dlid;
+
+	if (is_multicast_ether_addr(mac_hdr->h_dest)) {
+		dlid = info->vesw.u_mcast_dlid;
+	} else {
+		if (is_local_ether_addr(mac_hdr->h_dest)) {
+			dlid = ((uint32_t)mac_hdr->h_dest[5] << 16) |
+				((uint32_t)mac_hdr->h_dest[4] << 8)  |
+				mac_hdr->h_dest[3];
+			if (unlikely(!dlid))
+				v_warn("Null dlid in MAC address\n");
+		} else if (def_port != OPA_VNIC_INVALID_PORT) {
+			dlid = info->vesw.u_ucast_dlid[def_port];
+		}
+	}
+
+	return dlid;
+}
+
+/* opa_vnic_get_sc - return the service class */
+static u8 opa_vnic_get_sc(struct __opa_veswport_info *info,
+			  struct sk_buff *skb)
+{
+	struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+	u16 vlan_tci;
+	u8 sc;
+
+	if (!__vlan_get_tag(skb, &vlan_tci)) {
+		u8 pcp = OPA_VNIC_VLAN_PCP(vlan_tci);
+
+		if (is_multicast_ether_addr(mac_hdr->h_dest))
+			sc = info->vport.pcp_to_sc_mc[pcp];
+		else
+			sc = info->vport.pcp_to_sc_uc[pcp];
+	} else {
+		if (is_multicast_ether_addr(mac_hdr->h_dest))
+			sc = info->vport.non_vlan_sc_mc;
+		else
+			sc = info->vport.non_vlan_sc_uc;
+	}
+
+	return sc;
+}
+
+u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+	struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+	struct __opa_veswport_info *info = &adapter->info;
+	u8 vl;
+
+	if (skb_vlan_tag_present(skb)) {
+		u8 pcp = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
+
+		if (is_multicast_ether_addr(mac_hdr->h_dest))
+			vl = info->vport.pcp_to_vl_mc[pcp];
+		else
+			vl = info->vport.pcp_to_vl_uc[pcp];
+	} else {
+		if (is_multicast_ether_addr(mac_hdr->h_dest))
+			vl = info->vport.non_vlan_vl_mc;
+		else
+			vl = info->vport.non_vlan_vl_uc;
+	}
+
+	return vl;
+}
+
+/* opa_vnic_calc_entropy - calculate the packet entropy */
+u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+	u16 hash16;
+
+	/*
+	 * Get flow based 16-bit hash and then XOR the upper and lower bytes
+	 * to get the entropy.
+	 * __skb_tx_hash limits qcount to 16 bits. Hence, get 15-bit hash.
+	 */
+	hash16 = __skb_tx_hash(adapter->netdev, skb, BIT(15));
+	return (u8)((hash16 >> 8) ^ (hash16 & 0xff));
+}
+
+/* opa_vnic_get_def_port - get default port based on entropy */
+static inline u8 opa_vnic_get_def_port(struct opa_vnic_adapter *adapter,
+				       u8 entropy)
+{
+	u8 flow_id;
+
+	/* Add the upper and lower 4-bits of entropy to get the flow id */
+	flow_id = ((entropy & 0xf) + (entropy >> 4));
+	return adapter->flow_tbl[flow_id & (OPA_VNIC_FLOW_TBL_SIZE - 1)];
+}
+
+/* Calculate packet length including OPA header, crc and padding */
+static inline int opa_vnic_wire_length(struct sk_buff *skb)
+{
+	u32 pad_len;
+
+	/* padding for 8 bytes size alignment */
+	pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
+	pad_len += OPA_VNIC_ICRC_TAIL_LEN;
+
+	return (skb->len + pad_len) >> 3;
+}
+
+/* opa_vnic_encap_skb - encapsulate skb packet with OPA header and meta data */
+void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+	struct __opa_veswport_info *info = &adapter->info;
+	struct opa_vnic_skb_mdata *mdata;
+	u8 def_port, sc, entropy, *hdr;
+	u16 len, l4_hdr;
+	u32 dlid;
+
+	hdr = skb_push(skb, OPA_VNIC_HDR_LEN);
+
+	entropy = opa_vnic_calc_entropy(adapter, skb);
+	def_port = opa_vnic_get_def_port(adapter, entropy);
+	len = opa_vnic_wire_length(skb);
+	dlid = opa_vnic_get_dlid(adapter, skb, def_port);
+	sc = opa_vnic_get_sc(info, skb);
+	l4_hdr = info->vesw.vesw_id;
+
+	mdata = (struct opa_vnic_skb_mdata *)skb_push(skb, sizeof(*mdata));
+	mdata->vl = opa_vnic_get_vl(adapter, skb);
+	mdata->entropy = entropy;
+	mdata->flags = 0;
+	if (unlikely(!dlid)) {
+		mdata->flags = OPA_VNIC_SKB_MDATA_ENCAP_ERR;
+		return;
+	}
+
+	opa_vnic_make_header(hdr, info->vport.encap_slid, dlid, len,
+			     info->vesw.pkey, entropy, sc, 0,
+			     OPA_VNIC_L4_ETHR, l4_hdr);
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
new file mode 100644
index 000000000000..4c434b9dd84c
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -0,0 +1,489 @@
+#ifndef _OPA_VNIC_ENCAP_H
+#define _OPA_VNIC_ENCAP_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains all OPA VNIC declaration required for encapsulation
+ * and decapsulation of Ethernet packets
+ */
+
+#include <linux/types.h>
+#include <rdma/ib_mad.h>
+
+/* EMA class version */
+#define OPA_EMA_CLASS_VERSION               0x80
+
+/*
+ * Define the Intel vendor management class for OPA
+ * ETHERNET MANAGEMENT
+ */
+#define OPA_MGMT_CLASS_INTEL_EMA            0x34
+
+/* EM attribute IDs */
+#define OPA_EM_ATTR_CLASS_PORT_INFO                 0x0001
+#define OPA_EM_ATTR_VESWPORT_INFO                   0x0011
+#define OPA_EM_ATTR_VESWPORT_MAC_ENTRIES            0x0012
+#define OPA_EM_ATTR_IFACE_UCAST_MACS                0x0013
+#define OPA_EM_ATTR_IFACE_MCAST_MACS                0x0014
+#define OPA_EM_ATTR_DELETE_VESW                     0x0015
+#define OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS       0x0020
+#define OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS         0x0022
+
+/* VNIC configured and operational state values */
+#define OPA_VNIC_STATE_DROP_ALL        0x1
+#define OPA_VNIC_STATE_FORWARDING      0x3
+
+#define OPA_VESW_MAX_NUM_DEF_PORT   16
+#define OPA_VNIC_MAX_NUM_PCP        8
+
+#define OPA_VNIC_EMA_DATA    (OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR)
+
+/* Defines for vendor specific notice(trap) attributes */
+#define OPA_INTEL_EMA_NOTICE_TYPE_INFO 0x04
+
+/* INTEL OUI */
+#define INTEL_OUI_1 0x00
+#define INTEL_OUI_2 0x06
+#define INTEL_OUI_3 0x6a
+
+/* Trap opcodes sent from VNIC */
+#define OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1
+#define OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2
+#define OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3
+
+#define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd)  (!!((dlid_sd) & 0x20))
+#define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd)    ((dlid_sd) >> 8)
+
+/* VNIC Ethernet link status */
+#define OPA_VNIC_ETH_LINK_UP     1
+#define OPA_VNIC_ETH_LINK_DOWN   2
+
+/**
+ * struct opa_vesw_info - OPA vnic switch information
+ * @fabric_id: 10-bit fabric id
+ * @vesw_id: 12-bit virtual ethernet switch id
+ * @def_port_mask: bitmask of default ports
+ * @pkey: partition key
+ * @u_mcast_dlid: unknown multicast dlid
+ * @u_ucast_dlid: array of unknown unicast dlids
+ * @eth_mtu: MTUs for each vlan PCP
+ * @eth_mtu_non_vlan: MTU for non vlan packets
+ */
+struct opa_vesw_info {
+	__be16  fabric_id;
+	__be16  vesw_id;
+
+	u8      rsvd0[6];
+	__be16  def_port_mask;
+
+	u8      rsvd1[2];
+	__be16  pkey;
+
+	u8      rsvd2[4];
+	__be32  u_mcast_dlid;
+	__be32  u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+	u8      rsvd3[44];
+	__be16  eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+	__be16  eth_mtu_non_vlan;
+	u8      rsvd4[2];
+} __packed;
+
+/**
+ * struct opa_per_veswport_info - OPA vnic per port information
+ * @port_num: port number
+ * @eth_link_status: current ethernet link state
+ * @base_mac_addr: base mac address
+ * @config_state: configured port state
+ * @oper_state: operational port state
+ * @max_mac_tbl_ent: max number of mac table entries
+ * @max_smac_ent: max smac entries in mac table
+ * @mac_tbl_digest: mac table digest
+ * @encap_slid: base slid for the port
+ * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets
+ * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets
+ * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets
+ * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets
+ * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets
+ * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets
+ * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets
+ * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets
+ * @uc_macs_gen_count: generation count for unicast macs list
+ * @mc_macs_gen_count: generation count for multicast macs list
+ */
+struct opa_per_veswport_info {
+	__be32  port_num;
+
+	u8      eth_link_status;
+	u8      rsvd0[3];
+
+	u8      base_mac_addr[ETH_ALEN];
+	u8      config_state;
+	u8      oper_state;
+
+	__be16  max_mac_tbl_ent;
+	__be16  max_smac_ent;
+	__be32  mac_tbl_digest;
+	u8      rsvd1[4];
+
+	__be32  encap_slid;
+
+	u8      pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+	u8      pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+	u8      pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP];
+	u8      pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP];
+
+	u8      non_vlan_sc_uc;
+	u8      non_vlan_vl_uc;
+	u8      non_vlan_sc_mc;
+	u8      non_vlan_vl_mc;
+
+	u8      rsvd2[48];
+
+	__be16  uc_macs_gen_count;
+	__be16  mc_macs_gen_count;
+
+	u8      rsvd3[8];
+} __packed;
+
+/**
+ * struct opa_veswport_info - OPA vnic port information
+ * @vesw: OPA vnic switch information
+ * @vport: OPA vnic per port information
+ *
+ * On host, each of the virtual ethernet ports belongs
+ * to a different virtual ethernet switches.
+ */
+struct opa_veswport_info {
+	struct opa_vesw_info          vesw;
+	struct opa_per_veswport_info  vport;
+};
+
+/**
+ * struct opa_veswport_mactable_entry - single entry in the forwarding table
+ * @mac_addr: MAC address
+ * @mac_addr_mask: MAC address bit mask
+ * @dlid_sd: Matching DLID and side data
+ *
+ * On the host each virtual ethernet port will have
+ * a forwarding table. These tables are used to
+ * map a MAC to a LID and other data. For more
+ * details see struct opa_veswport_mactable_entries.
+ * This is the structure of a single mactable entry
+ */
+struct opa_veswport_mactable_entry {
+	u8      mac_addr[ETH_ALEN];
+	u8      mac_addr_mask[ETH_ALEN];
+	__be32  dlid_sd;
+} __packed;
+
+/**
+ * struct opa_veswport_mactable - Forwarding table array
+ * @offset: mac table starting offset
+ * @num_entries: Number of entries to get or set
+ * @mac_tbl_digest: mac table digest
+ * @tbl_entries[]: Array of table entries
+ *
+ * The EM sends down this structure in a MAD indicating
+ * the starting offset in the forwarding table that this
+ * entry is to be loaded into and the number of entries
+ * that that this MAD instance contains
+ * The mac_tbl_digest has been added to this MAD structure. It will be set by
+ * the EM and it will be used by the EM to check if there are any
+ * discrepancies with this value and the value
+ * maintained by the EM in the case of VNIC port being deleted or unloaded
+ * A new instantiation of a VNIC will always have a value of zero.
+ * This value is stored as part of the vnic adapter structure and will be
+ * accessed by the GET and SET routines for both the mactable entries and the
+ * veswport info.
+ */
+struct opa_veswport_mactable {
+	__be16                              offset;
+	__be16                              num_entries;
+	__be32                              mac_tbl_digest;
+	struct opa_veswport_mactable_entry  tbl_entries[0];
+} __packed;
+
+/**
+ * struct opa_veswport_summary_counters - summary counters
+ * @vp_instance: vport instance on the OPA port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_packets: transmit packets
+ * @rx_packets: receive packets
+ * @tx_bytes: transmit bytes
+ * @rx_bytes: receive bytes
+ * @tx_unicast: unicast packets transmitted
+ * @tx_mcastbcast: multicast/broadcast packets transmitted
+ * @tx_untagged: non-vlan packets transmitted
+ * @tx_vlan: vlan packets transmitted
+ * @tx_64_size: transmit packet length is 64 bytes
+ * @tx_65_127: transmit packet length is >=65 and < 127 bytes
+ * @tx_128_255: transmit packet length is >=128 and < 255 bytes
+ * @tx_256_511: transmit packet length is >=256 and < 511 bytes
+ * @tx_512_1023: transmit packet length is >=512 and < 1023 bytes
+ * @tx_1024_1518: transmit packet length is >=1024 and < 1518 bytes
+ * @tx_1519_max: transmit packet length >= 1519 bytes
+ * @rx_unicast: unicast packets received
+ * @rx_mcastbcast: multicast/broadcast packets received
+ * @rx_untagged: non-vlan packets received
+ * @rx_vlan: vlan packets received
+ * @rx_64_size: received packet length is 64 bytes
+ * @rx_65_127: received packet length is >=65 and < 127 bytes
+ * @rx_128_255: received packet length is >=128 and < 255 bytes
+ * @rx_256_511: received packet length is >=256 and < 511 bytes
+ * @rx_512_1023: received packet length is >=512 and < 1023 bytes
+ * @rx_1024_1518: received packet length is >=1024 and < 1518 bytes
+ * @rx_1519_max: received packet length >= 1519 bytes
+ *
+ * All the above are counters of corresponding conditions.
+ */
+struct opa_veswport_summary_counters {
+	__be16  vp_instance;
+	__be16  vesw_id;
+	__be32  veswport_num;
+
+	__be64  tx_errors;
+	__be64  rx_errors;
+	__be64  tx_packets;
+	__be64  rx_packets;
+	__be64  tx_bytes;
+	__be64  rx_bytes;
+
+	__be64  tx_unicast;
+	__be64  tx_mcastbcast;
+
+	__be64  tx_untagged;
+	__be64  tx_vlan;
+
+	__be64  tx_64_size;
+	__be64  tx_65_127;
+	__be64  tx_128_255;
+	__be64  tx_256_511;
+	__be64  tx_512_1023;
+	__be64  tx_1024_1518;
+	__be64  tx_1519_max;
+
+	__be64  rx_unicast;
+	__be64  rx_mcastbcast;
+
+	__be64  rx_untagged;
+	__be64  rx_vlan;
+
+	__be64  rx_64_size;
+	__be64  rx_65_127;
+	__be64  rx_128_255;
+	__be64  rx_256_511;
+	__be64  rx_512_1023;
+	__be64  rx_1024_1518;
+	__be64  rx_1519_max;
+
+	__be64  reserved[16];
+} __packed;
+
+/**
+ * struct opa_veswport_error_counters - error counters
+ * @vp_instance: vport instance on the OPA port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_smac_filt: smac filter errors
+ * @tx_dlid_zero: transmit packets with invalid dlid
+ * @tx_logic: other transmit errors
+ * @tx_drop_state: packet tansmission in non-forward port state
+ * @rx_bad_veswid: received packet with invalid vesw id
+ * @rx_runt: received ethernet packet with length < 64 bytes
+ * @rx_oversize: received ethernet packet with length > MTU size
+ * @rx_eth_down: received packets when interface is down
+ * @rx_drop_state: received packets in non-forwarding port state
+ * @rx_logic: other receive errors
+ *
+ * All the above are counters of corresponding erorr conditions.
+ */
+struct opa_veswport_error_counters {
+	__be16  vp_instance;
+	__be16  vesw_id;
+	__be32  veswport_num;
+
+	__be64  tx_errors;
+	__be64  rx_errors;
+
+	__be64  rsvd0;
+	__be64  tx_smac_filt;
+	__be64  rsvd1;
+	__be64  rsvd2;
+	__be64  rsvd3;
+	__be64  tx_dlid_zero;
+	__be64  rsvd4;
+	__be64  tx_logic;
+	__be64  rsvd5;
+	__be64  tx_drop_state;
+
+	__be64  rx_bad_veswid;
+	__be64  rsvd6;
+	__be64  rx_runt;
+	__be64  rx_oversize;
+	__be64  rsvd7;
+	__be64  rx_eth_down;
+	__be64  rx_drop_state;
+	__be64  rx_logic;
+	__be64  rsvd8;
+
+	__be64  rsvd9[16];
+} __packed;
+
+/**
+ * struct opa_veswport_trap - Trap message sent to EM by VNIC
+ * @fabric_id: 10 bit fabric id
+ * @veswid: 12 bit virtual ethernet switch id
+ * @veswportnum: logical port number on the Virtual switch
+ * @opaportnum: physical port num (redundant on host)
+ * @veswportindex: switch port index on opa port 0 based
+ * @opcode: operation
+ * @reserved: 32 bit for alignment
+ *
+ * The VNIC will send trap messages to the Ethernet manager to
+ * inform it about changes to the VNIC config, behaviour etc.
+ * This is the format of the trap payload.
+ */
+struct opa_veswport_trap {
+	__be16  fabric_id;
+	__be16  veswid;
+	__be32  veswportnum;
+	__be16  opaportnum;
+	u8      veswportindex;
+	u8      opcode;
+	__be32  reserved;
+} __packed;
+
+/**
+ * struct opa_vnic_iface_macs_entry - single entry in the mac list
+ * @mac_addr: MAC address
+ */
+struct opa_vnic_iface_mac_entry {
+	u8 mac_addr[ETH_ALEN];
+};
+
+/**
+ * struct opa_veswport_iface_macs - Msg to set globally administered MAC
+ * @start_idx: position of first entry (0 based)
+ * @num_macs_in_msg: number of MACs in this message
+ * @tot_macs_in_lst: The total number of MACs the agent has
+ * @gen_count: gen_count to indicate change
+ * @entry: The mac list entry
+ *
+ * Same attribute IDS and attribute modifiers as in locally administered
+ * addresses used to set globally administered addresses
+ */
+struct opa_veswport_iface_macs {
+	__be16 start_idx;
+	__be16 num_macs_in_msg;
+	__be16 tot_macs_in_lst;
+	__be16 gen_count;
+	struct opa_vnic_iface_mac_entry entry[0];
+} __packed;
+
+/**
+ * struct opa_vnic_vema_mad - Generic VEMA MAD
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @data: MAD data
+ */
+struct opa_vnic_vema_mad {
+	struct ib_mad_hdr  mad_hdr;
+	struct ib_rmpp_hdr rmpp_hdr;
+	u8                 reserved;
+	u8                 oui[3];
+	u8                 data[OPA_VNIC_EMA_DATA];
+};
+
+/**
+ * struct opa_vnic_notice_attr - Generic Notice MAD
+ * @gen_type: Generic/Specific bit and type of notice
+ * @oui_1: Vendor ID byte 1
+ * @oui_2: Vendor ID byte 2
+ * @oui_3: Vendor ID byte 3
+ * @trap_num: Trap number
+ * @toggle_count: Notice toggle bit and count value
+ * @issuer_lid: Trap issuer's lid
+ * @issuer_gid: Issuer GID (only if Report method)
+ * @raw_data: Trap message body
+ */
+struct opa_vnic_notice_attr {
+	u8     gen_type;
+	u8     oui_1;
+	u8     oui_2;
+	u8     oui_3;
+	__be16 trap_num;
+	__be16 toggle_count;
+	__be32 issuer_lid;
+	__be32 reserved;
+	u8     issuer_gid[16];
+	u8     raw_data[64];
+} __packed;
+
+/**
+ * struct opa_vnic_vema_mad_trap - Generic VEMA MAD Trap
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @notice: Notice structure
+ */
+struct opa_vnic_vema_mad_trap {
+	struct ib_mad_hdr            mad_hdr;
+	struct ib_rmpp_hdr           rmpp_hdr;
+	u8                           reserved;
+	u8                           oui[3];
+	struct opa_vnic_notice_attr  notice;
+};
+
+#endif /* _OPA_VNIC_ENCAP_H */
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
new file mode 100644
index 000000000000..d66540e24885
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC ethtool functions
+ */
+
+#include <linux/ethtool.h>
+
+#include "opa_vnic_internal.h"
+
+enum {NETDEV_STATS, VNIC_STATS};
+
+struct vnic_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	struct {
+		int sizeof_stat;
+		int stat_offset;
+	};
+};
+
+#define VNIC_STAT(m)            { FIELD_SIZEOF(struct opa_vnic_stats, m),   \
+				  offsetof(struct opa_vnic_stats, m) }
+
+static struct vnic_stats vnic_gstrings_stats[] = {
+	/* NETDEV stats */
+	{"rx_packets", VNIC_STAT(netstats.rx_packets)},
+	{"tx_packets", VNIC_STAT(netstats.tx_packets)},
+	{"rx_bytes", VNIC_STAT(netstats.rx_bytes)},
+	{"tx_bytes", VNIC_STAT(netstats.tx_bytes)},
+	{"rx_errors", VNIC_STAT(netstats.rx_errors)},
+	{"tx_errors", VNIC_STAT(netstats.tx_errors)},
+	{"rx_dropped", VNIC_STAT(netstats.rx_dropped)},
+	{"tx_dropped", VNIC_STAT(netstats.tx_dropped)},
+
+	/* SUMMARY counters */
+	{"tx_unicast", VNIC_STAT(tx_grp.unicast)},
+	{"tx_mcastbcast", VNIC_STAT(tx_grp.mcastbcast)},
+	{"tx_untagged", VNIC_STAT(tx_grp.untagged)},
+	{"tx_vlan", VNIC_STAT(tx_grp.vlan)},
+
+	{"tx_64_size", VNIC_STAT(tx_grp.s_64)},
+	{"tx_65_127", VNIC_STAT(tx_grp.s_65_127)},
+	{"tx_128_255", VNIC_STAT(tx_grp.s_128_255)},
+	{"tx_256_511", VNIC_STAT(tx_grp.s_256_511)},
+	{"tx_512_1023", VNIC_STAT(tx_grp.s_512_1023)},
+	{"tx_1024_1518", VNIC_STAT(tx_grp.s_1024_1518)},
+	{"tx_1519_max", VNIC_STAT(tx_grp.s_1519_max)},
+
+	{"rx_unicast", VNIC_STAT(rx_grp.unicast)},
+	{"rx_mcastbcast", VNIC_STAT(rx_grp.mcastbcast)},
+	{"rx_untagged", VNIC_STAT(rx_grp.untagged)},
+	{"rx_vlan", VNIC_STAT(rx_grp.vlan)},
+
+	{"rx_64_size", VNIC_STAT(rx_grp.s_64)},
+	{"rx_65_127", VNIC_STAT(rx_grp.s_65_127)},
+	{"rx_128_255", VNIC_STAT(rx_grp.s_128_255)},
+	{"rx_256_511", VNIC_STAT(rx_grp.s_256_511)},
+	{"rx_512_1023", VNIC_STAT(rx_grp.s_512_1023)},
+	{"rx_1024_1518", VNIC_STAT(rx_grp.s_1024_1518)},
+	{"rx_1519_max", VNIC_STAT(rx_grp.s_1519_max)},
+
+	/* ERROR counters */
+	{"rx_fifo_errors", VNIC_STAT(netstats.rx_fifo_errors)},
+	{"rx_length_errors", VNIC_STAT(netstats.rx_length_errors)},
+
+	{"tx_fifo_errors", VNIC_STAT(netstats.tx_fifo_errors)},
+	{"tx_carrier_errors", VNIC_STAT(netstats.tx_carrier_errors)},
+
+	{"tx_dlid_zero", VNIC_STAT(tx_dlid_zero)},
+	{"tx_drop_state", VNIC_STAT(tx_drop_state)},
+	{"rx_drop_state", VNIC_STAT(rx_drop_state)},
+	{"rx_oversize", VNIC_STAT(rx_oversize)},
+	{"rx_runt", VNIC_STAT(rx_runt)},
+};
+
+#define VNIC_STATS_LEN  ARRAY_SIZE(vnic_gstrings_stats)
+
+/* vnic_get_drvinfo - get driver info */
+static void vnic_get_drvinfo(struct net_device *netdev,
+			     struct ethtool_drvinfo *drvinfo)
+{
+	strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, opa_vnic_driver_version,
+		sizeof(drvinfo->version));
+	strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
+		sizeof(drvinfo->bus_info));
+}
+
+/* vnic_get_sset_count - get string set count */
+static int vnic_get_sset_count(struct net_device *netdev, int sset)
+{
+	return (sset == ETH_SS_STATS) ? VNIC_STATS_LEN : -EOPNOTSUPP;
+}
+
+/* vnic_get_ethtool_stats - get statistics */
+static void vnic_get_ethtool_stats(struct net_device *netdev,
+				   struct ethtool_stats *stats, u64 *data)
+{
+	struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+	struct opa_vnic_stats vstats;
+	int i;
+
+	memset(&vstats, 0, sizeof(vstats));
+	mutex_lock(&adapter->stats_lock);
+	adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+	for (i = 0; i < VNIC_STATS_LEN; i++) {
+		char *p = (char *)&vstats + vnic_gstrings_stats[i].stat_offset;
+
+		data[i] = (vnic_gstrings_stats[i].sizeof_stat ==
+			   sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+	mutex_unlock(&adapter->stats_lock);
+}
+
+/* vnic_get_strings - get strings */
+static void vnic_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	int i;
+
+	if (stringset != ETH_SS_STATS)
+		return;
+
+	for (i = 0; i < VNIC_STATS_LEN; i++)
+		memcpy(data + i * ETH_GSTRING_LEN,
+		       vnic_gstrings_stats[i].stat_string,
+		       ETH_GSTRING_LEN);
+}
+
+/* ethtool ops */
+static const struct ethtool_ops opa_vnic_ethtool_ops = {
+	.get_drvinfo = vnic_get_drvinfo,
+	.get_link = ethtool_op_get_link,
+	.get_strings = vnic_get_strings,
+	.get_sset_count = vnic_get_sset_count,
+	.get_ethtool_stats = vnic_get_ethtool_stats,
+};
+
+/* opa_vnic_set_ethtool_ops - set ethtool ops */
+void opa_vnic_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &opa_vnic_ethtool_ops;
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
new file mode 100644
index 000000000000..6bba886bec1f
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -0,0 +1,329 @@
+#ifndef _OPA_VNIC_INTERNAL_H
+#define _OPA_VNIC_INTERNAL_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC driver internal declarations
+ */
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/hashtable.h>
+#include <linux/sizes.h>
+#include <rdma/opa_vnic.h>
+
+#include "opa_vnic_encap.h"
+
+#define OPA_VNIC_VLAN_PCP(vlan_tci)  \
+			(((vlan_tci) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT)
+
+/* Flow to default port redirection table size */
+#define OPA_VNIC_FLOW_TBL_SIZE    32
+
+/* Invalid port number */
+#define OPA_VNIC_INVALID_PORT     0xff
+
+struct opa_vnic_adapter;
+
+/**
+ * struct __opa_vesw_info - OPA vnic virtual switch info
+ *
+ * Same as opa_vesw_info without bitwise attribute.
+ */
+struct __opa_vesw_info {
+	u16  fabric_id;
+	u16  vesw_id;
+
+	u8   rsvd0[6];
+	u16  def_port_mask;
+
+	u8   rsvd1[2];
+	u16  pkey;
+
+	u8   rsvd2[4];
+	u32  u_mcast_dlid;
+	u32  u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+	u8   rsvd3[44];
+	u16  eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+	u16  eth_mtu_non_vlan;
+	u8   rsvd4[2];
+} __packed;
+
+/**
+ * struct __opa_per_veswport_info - OPA vnic per port info
+ *
+ * Same as opa_per_veswport_info without bitwise attribute.
+ */
+struct __opa_per_veswport_info {
+	u32  port_num;
+
+	u8   eth_link_status;
+	u8   rsvd0[3];
+
+	u8   base_mac_addr[ETH_ALEN];
+	u8   config_state;
+	u8   oper_state;
+
+	u16  max_mac_tbl_ent;
+	u16  max_smac_ent;
+	u32  mac_tbl_digest;
+	u8   rsvd1[4];
+
+	u32  encap_slid;
+
+	u8   pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+	u8   pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+	u8   pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP];
+	u8   pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP];
+
+	u8   non_vlan_sc_uc;
+	u8   non_vlan_vl_uc;
+	u8   non_vlan_sc_mc;
+	u8   non_vlan_vl_mc;
+
+	u8   rsvd2[48];
+
+	u16  uc_macs_gen_count;
+	u16  mc_macs_gen_count;
+
+	u8   rsvd3[8];
+} __packed;
+
+/**
+ * struct __opa_veswport_info - OPA vnic port info
+ *
+ * Same as opa_veswport_info without bitwise attribute.
+ */
+struct __opa_veswport_info {
+	struct __opa_vesw_info            vesw;
+	struct __opa_per_veswport_info    vport;
+};
+
+/**
+ * struct __opa_veswport_trap - OPA vnic trap info
+ *
+ * Same as opa_veswport_trap without bitwise attribute.
+ */
+struct __opa_veswport_trap {
+	u16	fabric_id;
+	u16	veswid;
+	u32	veswportnum;
+	u16	opaportnum;
+	u8	veswportindex;
+	u8	opcode;
+	u32	reserved;
+} __packed;
+
+/**
+ * struct opa_vnic_ctrl_port - OPA virtual NIC control port
+ * @ibdev: pointer to ib device
+ * @ops: opa vnic control operations
+ * @num_ports: number of opa ports
+ */
+struct opa_vnic_ctrl_port {
+	struct ib_device           *ibdev;
+	struct opa_vnic_ctrl_ops   *ops;
+	u8                          num_ports;
+};
+
+/**
+ * struct opa_vnic_adapter - OPA VNIC netdev private data structure
+ * @netdev: pointer to associated netdev
+ * @ibdev: ib device
+ * @cport: pointer to opa vnic control port
+ * @rn_ops: rdma netdev's net_device_ops
+ * @port_num: OPA port number
+ * @vport_num: vesw port number
+ * @lock: adapter lock
+ * @info: virtual ethernet switch port information
+ * @vema_mac_addr: mac address configured by vema
+ * @umac_hash: unicast maclist hash
+ * @mmac_hash: multicast maclist hash
+ * @mactbl: hash table of MAC entries
+ * @mactbl_lock: mac table lock
+ * @stats_lock: statistics lock
+ * @flow_tbl: flow to default port redirection table
+ * @trap_timeout: trap timeout
+ * @trap_count: no. of traps allowed within timeout period
+ */
+struct opa_vnic_adapter {
+	struct net_device             *netdev;
+	struct ib_device              *ibdev;
+	struct opa_vnic_ctrl_port     *cport;
+	const struct net_device_ops   *rn_ops;
+
+	u8 port_num;
+	u8 vport_num;
+
+	/* Lock used around concurrent updates to netdev */
+	struct mutex lock;
+
+	struct __opa_veswport_info  info;
+	u8                          vema_mac_addr[ETH_ALEN];
+	u32                         umac_hash;
+	u32                         mmac_hash;
+	struct hlist_head  __rcu   *mactbl;
+
+	/* Lock used to protect updates to mac table */
+	struct mutex mactbl_lock;
+
+	/* Lock used to protect access to vnic counters */
+	struct mutex stats_lock;
+
+	u8 flow_tbl[OPA_VNIC_FLOW_TBL_SIZE];
+
+	unsigned long trap_timeout;
+	u8            trap_count;
+};
+
+/* Same as opa_veswport_mactable_entry, but without bitwise attribute */
+struct __opa_vnic_mactable_entry {
+	u8  mac_addr[ETH_ALEN];
+	u8  mac_addr_mask[ETH_ALEN];
+	u32 dlid_sd;
+} __packed;
+
+/**
+ * struct opa_vnic_mac_tbl_node - OPA VNIC mac table node
+ * @hlist: hash list handle
+ * @index: index of entry in the mac table
+ * @entry: entry in the table
+ */
+struct opa_vnic_mac_tbl_node {
+	struct hlist_node                    hlist;
+	u16                                  index;
+	struct __opa_vnic_mactable_entry     entry;
+};
+
+#define v_dbg(format, arg...) \
+	netdev_dbg(adapter->netdev, format, ## arg)
+#define v_err(format, arg...) \
+	netdev_err(adapter->netdev, format, ## arg)
+#define v_info(format, arg...) \
+	netdev_info(adapter->netdev, format, ## arg)
+#define v_warn(format, arg...) \
+	netdev_warn(adapter->netdev, format, ## arg)
+
+#define c_err(format, arg...) \
+	dev_err(&cport->ibdev->dev, format, ## arg)
+#define c_info(format, arg...) \
+	dev_info(&cport->ibdev->dev, format, ## arg)
+#define c_dbg(format, arg...) \
+	dev_dbg(&cport->ibdev->dev, format, ## arg)
+
+/* The maximum allowed entries in the mac table */
+#define OPA_VNIC_MAC_TBL_MAX_ENTRIES  2048
+/* Limit of smac entries in mac table */
+#define OPA_VNIC_MAX_SMAC_LIMIT       256
+
+/* The last octet of the MAC address is used as the key to the hash table */
+#define OPA_VNIC_MAC_HASH_IDX         5
+
+/* The VNIC MAC hash table is of size 2^8 */
+#define OPA_VNIC_MAC_TBL_HASH_BITS    8
+#define OPA_VNIC_MAC_TBL_SIZE  BIT(OPA_VNIC_MAC_TBL_HASH_BITS)
+
+/* VNIC HASH MACROS */
+#define vnic_hash_init(hashtable) __hash_init(hashtable, OPA_VNIC_MAC_TBL_SIZE)
+
+#define vnic_hash_add(hashtable, node, key)                                   \
+	hlist_add_head(node,                                                  \
+		&hashtable[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))])
+
+#define vnic_hash_for_each_safe(name, bkt, tmp, obj, member)                  \
+	for ((bkt) = 0, obj = NULL;                                           \
+		    !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++)           \
+		hlist_for_each_entry_safe(obj, tmp, &name[bkt], member)
+
+#define vnic_hash_for_each_possible(name, obj, member, key)                   \
+	hlist_for_each_entry(obj,                                             \
+		&name[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))], member)
+
+#define vnic_hash_for_each(name, bkt, obj, member)                            \
+	for ((bkt) = 0, obj = NULL;                                           \
+		    !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++)           \
+		hlist_for_each_entry(obj, &name[bkt], member)
+
+extern char opa_vnic_driver_name[];
+extern const char opa_vnic_driver_version[];
+
+struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
+					     u8 port_num, u8 vport_num);
+void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter);
+void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter);
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter);
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+			    struct opa_veswport_mactable *tbl);
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+			    struct opa_veswport_mactable *tbl);
+void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
+			       struct opa_veswport_iface_macs *macs);
+void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
+			       struct opa_veswport_iface_macs *macs);
+void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter,
+				   struct opa_veswport_summary_counters *cntrs);
+void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter,
+				 struct opa_veswport_error_counters *cntrs);
+void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter,
+			    struct opa_vesw_info *info);
+void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter,
+			    struct opa_vesw_info *info);
+void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter,
+				    struct opa_per_veswport_info *info);
+void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter,
+				    struct opa_per_veswport_info *info);
+void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event);
+void opa_vnic_set_ethtool_ops(struct net_device *netdev);
+void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
+			     struct __opa_veswport_trap *data, u32 lid);
+
+#endif /* _OPA_VNIC_INTERNAL_H */
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
new file mode 100644
index 000000000000..905f39dda5aa
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA Virtual Network Interface Controller (VNIC) driver
+ * netdev functionality.
+ */
+
+#include <linux/module.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+
+#include "opa_vnic_internal.h"
+
+#define OPA_TX_TIMEOUT_MS 1000
+
+#define OPA_VNIC_SKB_HEADROOM  \
+			ALIGN((OPA_VNIC_HDR_LEN + OPA_VNIC_SKB_MDATA_LEN), 8)
+
+/* This function is overloaded for opa_vnic specific implementation */
+static void opa_vnic_get_stats64(struct net_device *netdev,
+				 struct rtnl_link_stats64 *stats)
+{
+	struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+	struct opa_vnic_stats vstats;
+
+	memset(&vstats, 0, sizeof(vstats));
+	mutex_lock(&adapter->stats_lock);
+	adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+	mutex_unlock(&adapter->stats_lock);
+	memcpy(stats, &vstats.netstats, sizeof(*stats));
+}
+
+/* opa_netdev_start_xmit - transmit function */
+static netdev_tx_t opa_netdev_start_xmit(struct sk_buff *skb,
+					 struct net_device *netdev)
+{
+	struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+
+	v_dbg("xmit: queue %d skb len %d\n", skb->queue_mapping, skb->len);
+	/* pad to ensure mininum ethernet packet length */
+	if (unlikely(skb->len < ETH_ZLEN)) {
+		if (skb_padto(skb, ETH_ZLEN))
+			return NETDEV_TX_OK;
+
+		skb_put(skb, ETH_ZLEN - skb->len);
+	}
+
+	opa_vnic_encap_skb(adapter, skb);
+	return adapter->rn_ops->ndo_start_xmit(skb, netdev);
+}
+
+static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
+				 void *accel_priv,
+				 select_queue_fallback_t fallback)
+{
+	struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+	struct opa_vnic_skb_mdata *mdata;
+	int rc;
+
+	/* pass entropy and vl as metadata in skb */
+	mdata = (struct opa_vnic_skb_mdata *)skb_push(skb, sizeof(*mdata));
+	mdata->entropy =  opa_vnic_calc_entropy(adapter, skb);
+	mdata->vl = opa_vnic_get_vl(adapter, skb);
+	rc = adapter->rn_ops->ndo_select_queue(netdev, skb,
+					       accel_priv, fallback);
+	skb_pull(skb, sizeof(*mdata));
+	return rc;
+}
+
+/* opa_vnic_process_vema_config - process vema configuration updates */
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter)
+{
+	struct __opa_veswport_info *info = &adapter->info;
+	struct rdma_netdev *rn = netdev_priv(adapter->netdev);
+	u8 port_num[OPA_VESW_MAX_NUM_DEF_PORT] = { 0 };
+	struct net_device *netdev = adapter->netdev;
+	u8 i, port_count = 0;
+	u16 port_mask;
+
+	/* If the base_mac_addr is changed, update the interface mac address */
+	if (memcmp(info->vport.base_mac_addr, adapter->vema_mac_addr,
+		   ARRAY_SIZE(info->vport.base_mac_addr))) {
+		struct sockaddr saddr;
+
+		memcpy(saddr.sa_data, info->vport.base_mac_addr,
+		       ARRAY_SIZE(info->vport.base_mac_addr));
+		mutex_lock(&adapter->lock);
+		eth_mac_addr(netdev, &saddr);
+		memcpy(adapter->vema_mac_addr,
+		       info->vport.base_mac_addr, ETH_ALEN);
+		mutex_unlock(&adapter->lock);
+	}
+
+	rn->set_id(netdev, info->vesw.vesw_id);
+
+	/* Handle MTU limit change */
+	rtnl_lock();
+	netdev->max_mtu = max_t(unsigned int, info->vesw.eth_mtu_non_vlan,
+				netdev->min_mtu);
+	if (netdev->mtu > netdev->max_mtu)
+		dev_set_mtu(netdev, netdev->max_mtu);
+	rtnl_unlock();
+
+	/* Update flow to default port redirection table */
+	port_mask = info->vesw.def_port_mask;
+	for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) {
+		if (port_mask & 1)
+			port_num[port_count++] = i;
+		port_mask >>= 1;
+	}
+
+	/*
+	 * Build the flow table. Flow table is required when destination LID
+	 * is not available. Up to OPA_VNIC_FLOW_TBL_SIZE flows supported.
+	 * Each flow need a default port number to get its dlid from the
+	 * u_ucast_dlid array.
+	 */
+	for (i = 0; i < OPA_VNIC_FLOW_TBL_SIZE; i++)
+		adapter->flow_tbl[i] = port_count ? port_num[i % port_count] :
+						    OPA_VNIC_INVALID_PORT;
+
+	/* Operational state can only be DROP_ALL or FORWARDING */
+	if (info->vport.config_state == OPA_VNIC_STATE_FORWARDING) {
+		info->vport.oper_state = OPA_VNIC_STATE_FORWARDING;
+		netif_dormant_off(netdev);
+	} else {
+		info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL;
+		netif_dormant_on(netdev);
+	}
+}
+
+/*
+ * Set the power on default values in adapter's vema interface structure.
+ */
+static inline void opa_vnic_set_pod_values(struct opa_vnic_adapter *adapter)
+{
+	adapter->info.vport.max_mac_tbl_ent = OPA_VNIC_MAC_TBL_MAX_ENTRIES;
+	adapter->info.vport.max_smac_ent = OPA_VNIC_MAX_SMAC_LIMIT;
+	adapter->info.vport.config_state = OPA_VNIC_STATE_DROP_ALL;
+	adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN;
+}
+
+/* opa_vnic_set_mac_addr - change mac address */
+static int opa_vnic_set_mac_addr(struct net_device *netdev, void *addr)
+{
+	struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+	struct sockaddr *sa = addr;
+	int rc;
+
+	if (!memcmp(netdev->dev_addr, sa->sa_data, ETH_ALEN))
+		return 0;
+
+	mutex_lock(&adapter->lock);
+	rc = eth_mac_addr(netdev, addr);
+	mutex_unlock(&adapter->lock);
+	if (rc)
+		return rc;
+
+	adapter->info.vport.uc_macs_gen_count++;
+	opa_vnic_vema_report_event(adapter,
+				   OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+	return 0;
+}
+
+/*
+ * opa_vnic_mac_send_event - post event on possible mac list exchange
+ *  Send trap when digest from uc/mc mac list differs from previous run.
+ *  Digest is evaluated similar to how cksum does.
+ */
+static void opa_vnic_mac_send_event(struct net_device *netdev, u8 event)
+{
+	struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+	struct netdev_hw_addr *ha;
+	struct netdev_hw_addr_list *hw_list;
+	u32 *ref_crc;
+	u32 l, crc = 0;
+
+	switch (event) {
+	case OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE:
+		hw_list = &netdev->uc;
+		adapter->info.vport.uc_macs_gen_count++;
+		ref_crc = &adapter->umac_hash;
+		break;
+	case OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE:
+		hw_list = &netdev->mc;
+		adapter->info.vport.mc_macs_gen_count++;
+		ref_crc = &adapter->mmac_hash;
+		break;
+	default:
+		return;
+	}
+	netdev_hw_addr_list_for_each(ha, hw_list) {
+		crc = crc32_le(crc, ha->addr, ETH_ALEN);
+	}
+	l = netdev_hw_addr_list_count(hw_list) * ETH_ALEN;
+	crc = ~crc32_le(crc, (void *)&l, sizeof(l));
+
+	if (crc != *ref_crc) {
+		*ref_crc = crc;
+		opa_vnic_vema_report_event(adapter, event);
+	}
+}
+
+/* opa_vnic_set_rx_mode - handle uc/mc mac list change */
+static void opa_vnic_set_rx_mode(struct net_device *netdev)
+{
+	opa_vnic_mac_send_event(netdev,
+				OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+
+	opa_vnic_mac_send_event(netdev,
+				OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE);
+}
+
+/* opa_netdev_open - activate network interface */
+static int opa_netdev_open(struct net_device *netdev)
+{
+	struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+	int rc;
+
+	rc = adapter->rn_ops->ndo_open(adapter->netdev);
+	if (rc) {
+		v_dbg("open failed %d\n", rc);
+		return rc;
+	}
+
+	/* Update eth link status and send trap */
+	adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_UP;
+	opa_vnic_vema_report_event(adapter,
+				   OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
+	return 0;
+}
+
+/* opa_netdev_close - disable network interface */
+static int opa_netdev_close(struct net_device *netdev)
+{
+	struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+	int rc;
+
+	rc = adapter->rn_ops->ndo_stop(adapter->netdev);
+	if (rc) {
+		v_dbg("close failed %d\n", rc);
+		return rc;
+	}
+
+	/* Update eth link status and send trap */
+	adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN;
+	opa_vnic_vema_report_event(adapter,
+				   OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
+	return 0;
+}
+
+/* netdev ops */
+static const struct net_device_ops opa_netdev_ops = {
+	.ndo_open = opa_netdev_open,
+	.ndo_stop = opa_netdev_close,
+	.ndo_start_xmit = opa_netdev_start_xmit,
+	.ndo_get_stats64 = opa_vnic_get_stats64,
+	.ndo_set_rx_mode = opa_vnic_set_rx_mode,
+	.ndo_select_queue = opa_vnic_select_queue,
+	.ndo_set_mac_address = opa_vnic_set_mac_addr,
+};
+
+/* opa_vnic_add_netdev - create vnic netdev interface */
+struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
+					     u8 port_num, u8 vport_num)
+{
+	struct opa_vnic_adapter *adapter;
+	struct net_device *netdev;
+	struct rdma_netdev *rn;
+	int rc;
+
+	netdev = ibdev->alloc_rdma_netdev(ibdev, port_num,
+					  RDMA_NETDEV_OPA_VNIC,
+					  "veth%d", NET_NAME_UNKNOWN,
+					  ether_setup);
+	if (!netdev)
+		return ERR_PTR(-ENOMEM);
+	else if (IS_ERR(netdev))
+		return ERR_CAST(netdev);
+
+	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+	if (!adapter) {
+		rc = -ENOMEM;
+		goto adapter_err;
+	}
+
+	rn = netdev_priv(netdev);
+	rn->clnt_priv = adapter;
+	rn->hca = ibdev;
+	rn->port_num = port_num;
+	adapter->netdev = netdev;
+	adapter->ibdev = ibdev;
+	adapter->port_num = port_num;
+	adapter->vport_num = vport_num;
+	adapter->rn_ops = netdev->netdev_ops;
+
+	netdev->netdev_ops = &opa_netdev_ops;
+	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	netdev->hard_header_len += OPA_VNIC_SKB_HEADROOM;
+	mutex_init(&adapter->lock);
+	mutex_init(&adapter->mactbl_lock);
+	mutex_init(&adapter->stats_lock);
+
+	SET_NETDEV_DEV(netdev, ibdev->dev.parent);
+
+	opa_vnic_set_ethtool_ops(netdev);
+
+	opa_vnic_set_pod_values(adapter);
+
+	rc = register_netdev(netdev);
+	if (rc)
+		goto netdev_err;
+
+	netif_carrier_off(netdev);
+	netif_dormant_on(netdev);
+	v_info("initialized\n");
+
+	return adapter;
+netdev_err:
+	mutex_destroy(&adapter->lock);
+	mutex_destroy(&adapter->mactbl_lock);
+	mutex_destroy(&adapter->stats_lock);
+	kfree(adapter);
+adapter_err:
+	ibdev->free_rdma_netdev(netdev);
+
+	return ERR_PTR(rc);
+}
+
+/* opa_vnic_rem_netdev - remove vnic netdev interface */
+void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct ib_device *ibdev = adapter->ibdev;
+
+	v_info("removing\n");
+	unregister_netdev(netdev);
+	opa_vnic_release_mac_tbl(adapter);
+	mutex_destroy(&adapter->lock);
+	mutex_destroy(&adapter->mactbl_lock);
+	mutex_destroy(&adapter->stats_lock);
+	kfree(adapter);
+	ibdev->free_rdma_netdev(netdev);
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
new file mode 100644
index 000000000000..875694f9a7f9
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -0,0 +1,1056 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA Virtual Network Interface Controller (VNIC)
+ * Ethernet Management Agent (EMA) driver
+ */
+
+#include <linux/module.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+
+#include "opa_vnic_internal.h"
+
+#define DRV_VERSION "1.0"
+char opa_vnic_driver_name[] = "opa_vnic";
+const char opa_vnic_driver_version[] = DRV_VERSION;
+
+/*
+ * The trap service level is kept in bits 3 to 7 in the trap_sl_rsvd
+ * field in the class port info MAD.
+ */
+#define GET_TRAP_SL_FROM_CLASS_PORT_INFO(x)  (((x) >> 3) & 0x1f)
+
+/* Cap trap bursts to a reasonable limit good for normal cases */
+#define OPA_VNIC_TRAP_BURST_LIMIT 4
+
+/*
+ * VNIC trap limit timeout.
+ * Inverse of cap2_mask response time out (1.0737 secs) = 0.9
+ * secs approx IB spec 13.4.6.2.1 PortInfoSubnetTimeout and
+ * 13.4.9 Traps.
+ */
+#define OPA_VNIC_TRAP_TIMEOUT  ((4096 * (1UL << 18)) / 1000)
+
+#define OPA_VNIC_UNSUP_ATTR  \
+		cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB)
+
+#define OPA_VNIC_INVAL_ATTR  \
+		cpu_to_be16(IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE)
+
+#define OPA_VNIC_CLASS_CAP_TRAP   0x1
+
+/* Maximum number of VNIC ports supported */
+#define OPA_VNIC_MAX_NUM_VPORT    255
+
+/**
+ * struct opa_vnic_vema_port -- VNIC VEMA port details
+ * @cport: pointer to port
+ * @mad_agent: pointer to mad agent for port
+ * @class_port_info: Class port info information.
+ * @tid: Transaction id
+ * @port_num: OPA port number
+ * @vport_idr: vnic ports idr
+ * @event_handler: ib event handler
+ * @lock: adapter interface lock
+ */
+struct opa_vnic_vema_port {
+	struct opa_vnic_ctrl_port      *cport;
+	struct ib_mad_agent            *mad_agent;
+	struct opa_class_port_info      class_port_info;
+	u64                             tid;
+	u8                              port_num;
+	struct idr                      vport_idr;
+	struct ib_event_handler         event_handler;
+
+	/* Lock to query/update network adapter */
+	struct mutex                    lock;
+};
+
+static void opa_vnic_vema_add_one(struct ib_device *device);
+static void opa_vnic_vema_rem_one(struct ib_device *device,
+				  void *client_data);
+
+static struct ib_client opa_vnic_client = {
+	.name   = opa_vnic_driver_name,
+	.add    = opa_vnic_vema_add_one,
+	.remove = opa_vnic_vema_rem_one,
+};
+
+/**
+ * vema_get_vport_num -- Get the vnic from the mad
+ * @recvd_mad:  Received mad
+ *
+ * Return: returns value of the vnic port number
+ */
+static inline u8 vema_get_vport_num(struct opa_vnic_vema_mad *recvd_mad)
+{
+	return be32_to_cpu(recvd_mad->mad_hdr.attr_mod) & 0xff;
+}
+
+/**
+ * vema_get_vport_adapter -- Get vnic port adapter from recvd mad
+ * @recvd_mad: received mad
+ * @port: ptr to port struct on which MAD was recvd
+ *
+ * Return: vnic adapter
+ */
+static inline struct opa_vnic_adapter *
+vema_get_vport_adapter(struct opa_vnic_vema_mad *recvd_mad,
+		       struct opa_vnic_vema_port *port)
+{
+	u8 vport_num = vema_get_vport_num(recvd_mad);
+
+	return idr_find(&port->vport_idr, vport_num);
+}
+
+/**
+ * vema_mac_tbl_req_ok -- Check if mac request has correct values
+ * @mac_tbl: mac table
+ *
+ * This function checks for the validity of the offset and number of
+ * entries required.
+ *
+ * Return: true if offset and num_entries are valid
+ */
+static inline bool vema_mac_tbl_req_ok(struct opa_veswport_mactable *mac_tbl)
+{
+	u16 offset, num_entries;
+	u16 req_entries = ((OPA_VNIC_EMA_DATA - sizeof(*mac_tbl)) /
+			   sizeof(mac_tbl->tbl_entries[0]));
+
+	offset = be16_to_cpu(mac_tbl->offset);
+	num_entries = be16_to_cpu(mac_tbl->num_entries);
+
+	return ((num_entries <= req_entries) &&
+		(offset + num_entries <= OPA_VNIC_MAC_TBL_MAX_ENTRIES));
+}
+
+/*
+ * Return the power on default values in the port info structure
+ * in big endian format as required by MAD.
+ */
+static inline void vema_get_pod_values(struct opa_veswport_info *port_info)
+{
+	memset(port_info, 0, sizeof(*port_info));
+	port_info->vport.max_mac_tbl_ent =
+		cpu_to_be16(OPA_VNIC_MAC_TBL_MAX_ENTRIES);
+	port_info->vport.max_smac_ent =
+		cpu_to_be16(OPA_VNIC_MAX_SMAC_LIMIT);
+	port_info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL;
+	port_info->vport.config_state = OPA_VNIC_STATE_DROP_ALL;
+}
+
+/**
+ * vema_add_vport -- Add a new vnic port
+ * @port: ptr to opa_vnic_vema_port struct
+ * @vport_num: vnic port number (to be added)
+ *
+ * Return a pointer to the vnic adapter structure
+ */
+static struct opa_vnic_adapter *vema_add_vport(struct opa_vnic_vema_port *port,
+					       u8 vport_num)
+{
+	struct opa_vnic_ctrl_port *cport = port->cport;
+	struct opa_vnic_adapter *adapter;
+
+	adapter = opa_vnic_add_netdev(cport->ibdev, port->port_num, vport_num);
+	if (!IS_ERR(adapter)) {
+		int rc;
+
+		adapter->cport = cport;
+		rc = idr_alloc(&port->vport_idr, adapter, vport_num,
+			       vport_num + 1, GFP_NOWAIT);
+		if (rc < 0) {
+			opa_vnic_rem_netdev(adapter);
+			adapter = ERR_PTR(rc);
+		}
+	}
+
+	return adapter;
+}
+
+/**
+ * vema_get_class_port_info -- Get class info for port
+ * @port:  Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ *
+ * This function copies the latest class port info value set for the
+ * port and stores it for generating traps
+ */
+static void vema_get_class_port_info(struct opa_vnic_vema_port *port,
+				     struct opa_vnic_vema_mad *recvd_mad,
+				     struct opa_vnic_vema_mad *rsp_mad)
+{
+	struct opa_class_port_info *port_info;
+
+	port_info = (struct opa_class_port_info *)rsp_mad->data;
+	memcpy(port_info, &port->class_port_info, sizeof(*port_info));
+	port_info->base_version = OPA_MGMT_BASE_VERSION,
+	port_info->class_version = OPA_EMA_CLASS_VERSION;
+
+	/*
+	 * Set capability mask bit indicating agent generates traps,
+	 * and set the maximum number of VNIC ports supported.
+	 */
+	port_info->cap_mask = cpu_to_be16((OPA_VNIC_CLASS_CAP_TRAP |
+					   (OPA_VNIC_MAX_NUM_VPORT << 8)));
+
+	/*
+	 * Since a get routine is always sent by the EM first we
+	 * set the expected response time to
+	 * 4.096 usec * 2^18 == 1.0737 sec here.
+	 */
+	port_info->cap_mask2_resp_time = cpu_to_be32(18);
+}
+
+/**
+ * vema_set_class_port_info -- Get class info for port
+ * @port:  Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ *
+ * This function updates the port class info for the specific vnic
+ * and sets up the response mad data
+ */
+static void vema_set_class_port_info(struct opa_vnic_vema_port *port,
+				     struct opa_vnic_vema_mad *recvd_mad,
+				     struct opa_vnic_vema_mad *rsp_mad)
+{
+	memcpy(&port->class_port_info, recvd_mad->data,
+	       sizeof(port->class_port_info));
+
+	vema_get_class_port_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_veswport_info -- Get veswport info
+ * @port:      source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ */
+static void vema_get_veswport_info(struct opa_vnic_vema_port *port,
+				   struct opa_vnic_vema_mad *recvd_mad,
+				   struct opa_vnic_vema_mad *rsp_mad)
+{
+	struct opa_veswport_info *port_info =
+				  (struct opa_veswport_info *)rsp_mad->data;
+	struct opa_vnic_adapter *adapter;
+
+	adapter = vema_get_vport_adapter(recvd_mad, port);
+	if (adapter) {
+		memset(port_info, 0, sizeof(*port_info));
+		opa_vnic_get_vesw_info(adapter, &port_info->vesw);
+		opa_vnic_get_per_veswport_info(adapter,
+					       &port_info->vport);
+	} else {
+		vema_get_pod_values(port_info);
+	}
+}
+
+/**
+ * vema_set_veswport_info -- Set veswport info
+ * @port:      source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ *
+ * This function gets the port class infor for vnic
+ */
+static void vema_set_veswport_info(struct opa_vnic_vema_port *port,
+				   struct opa_vnic_vema_mad *recvd_mad,
+				   struct opa_vnic_vema_mad *rsp_mad)
+{
+	struct opa_vnic_ctrl_port *cport = port->cport;
+	struct opa_veswport_info *port_info;
+	struct opa_vnic_adapter *adapter;
+	u8 vport_num;
+
+	vport_num = vema_get_vport_num(recvd_mad);
+
+	adapter = vema_get_vport_adapter(recvd_mad, port);
+	if (!adapter) {
+		adapter = vema_add_vport(port, vport_num);
+		if (IS_ERR(adapter)) {
+			c_err("failed to add vport %d: %ld\n",
+			      vport_num, PTR_ERR(adapter));
+			goto err_exit;
+		}
+	}
+
+	port_info = (struct opa_veswport_info *)recvd_mad->data;
+	opa_vnic_set_vesw_info(adapter, &port_info->vesw);
+	opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+	/* Process the new config settings */
+	opa_vnic_process_vema_config(adapter);
+
+	vema_get_veswport_info(port, recvd_mad, rsp_mad);
+	return;
+
+err_exit:
+	rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+}
+
+/**
+ * vema_get_mac_entries -- Get MAC entries in VNIC MAC table
+ * @port:      source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ *
+ * This function gets the MAC entries that are programmed into
+ * the VNIC MAC forwarding table. It checks for the validity of
+ * the index into the MAC table and the number of entries that
+ * are to be retrieved.
+ */
+static void vema_get_mac_entries(struct opa_vnic_vema_port *port,
+				 struct opa_vnic_vema_mad *recvd_mad,
+				 struct opa_vnic_vema_mad *rsp_mad)
+{
+	struct opa_veswport_mactable *mac_tbl_in, *mac_tbl_out;
+	struct opa_vnic_adapter *adapter;
+
+	adapter = vema_get_vport_adapter(recvd_mad, port);
+	if (!adapter) {
+		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+		return;
+	}
+
+	mac_tbl_in = (struct opa_veswport_mactable *)recvd_mad->data;
+	mac_tbl_out = (struct opa_veswport_mactable *)rsp_mad->data;
+
+	if (vema_mac_tbl_req_ok(mac_tbl_in)) {
+		mac_tbl_out->offset = mac_tbl_in->offset;
+		mac_tbl_out->num_entries = mac_tbl_in->num_entries;
+		opa_vnic_query_mac_tbl(adapter, mac_tbl_out);
+	} else {
+		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+	}
+}
+
+/**
+ * vema_set_mac_entries -- Set MAC entries in VNIC MAC table
+ * @port:      source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ *
+ * This function sets the MAC entries in the VNIC forwarding table
+ * It checks for the validity of the index and the number of forwarding
+ * table entries to be programmed.
+ */
+static void vema_set_mac_entries(struct opa_vnic_vema_port *port,
+				 struct opa_vnic_vema_mad *recvd_mad,
+				 struct opa_vnic_vema_mad *rsp_mad)
+{
+	struct opa_veswport_mactable *mac_tbl;
+	struct opa_vnic_adapter *adapter;
+
+	adapter = vema_get_vport_adapter(recvd_mad, port);
+	if (!adapter) {
+		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+		return;
+	}
+
+	mac_tbl = (struct opa_veswport_mactable *)recvd_mad->data;
+	if (vema_mac_tbl_req_ok(mac_tbl)) {
+		if (opa_vnic_update_mac_tbl(adapter, mac_tbl))
+			rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+	} else {
+		rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+	}
+	vema_get_mac_entries(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_set_delete_vesw -- Reset VESW info to POD values
+ * @port:      source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ *
+ * This function clears all the fields of veswport info for the requested vesw
+ * and sets them back to the power-on default values. It does not delete the
+ * vesw.
+ */
+static void vema_set_delete_vesw(struct opa_vnic_vema_port *port,
+				 struct opa_vnic_vema_mad *recvd_mad,
+				 struct opa_vnic_vema_mad *rsp_mad)
+{
+	struct opa_veswport_info *port_info =
+				  (struct opa_veswport_info *)rsp_mad->data;
+	struct opa_vnic_adapter *adapter;
+
+	adapter = vema_get_vport_adapter(recvd_mad, port);
+	if (!adapter) {
+		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+		return;
+	}
+
+	vema_get_pod_values(port_info);
+	opa_vnic_set_vesw_info(adapter, &port_info->vesw);
+	opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+	/* Process the new config settings */
+	opa_vnic_process_vema_config(adapter);
+
+	opa_vnic_release_mac_tbl(adapter);
+
+	vema_get_veswport_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_mac_list -- Get the unicast/multicast macs.
+ * @port:      source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad:   Response mad to be built
+ * @attr_id:   Attribute ID indicating multicast or unicast mac list
+ */
+static void vema_get_mac_list(struct opa_vnic_vema_port *port,
+			      struct opa_vnic_vema_mad *recvd_mad,
+			      struct opa_vnic_vema_mad *rsp_mad,
+			      u16 attr_id)
+{
+	struct opa_veswport_iface_macs *macs_in, *macs_out;
+	int max_entries = (OPA_VNIC_EMA_DATA - sizeof(*macs_out)) / ETH_ALEN;
+	struct opa_vnic_adapter *adapter;
+
+	adapter = vema_get_vport_adapter(recvd_mad, port);
+	if (!adapter) {
+		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+		return;
+	}
+
+	macs_in = (struct opa_veswport_iface_macs *)recvd_mad->data;
+	macs_out = (struct opa_veswport_iface_macs *)rsp_mad->data;
+
+	macs_out->start_idx = macs_in->start_idx;
+	if (macs_in->num_macs_in_msg)
+		macs_out->num_macs_in_msg = macs_in->num_macs_in_msg;
+	else
+		macs_out->num_macs_in_msg = cpu_to_be16(max_entries);
+
+	if (attr_id == OPA_EM_ATTR_IFACE_MCAST_MACS)
+		opa_vnic_query_mcast_macs(adapter, macs_out);
+	else
+		opa_vnic_query_ucast_macs(adapter, macs_out);
+}
+
+/**
+ * vema_get_summary_counters -- Gets summary counters.
+ * @port:      source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad:   Response mad to be built
+ */
+static void vema_get_summary_counters(struct opa_vnic_vema_port *port,
+				      struct opa_vnic_vema_mad *recvd_mad,
+				      struct opa_vnic_vema_mad *rsp_mad)
+{
+	struct opa_veswport_summary_counters *cntrs;
+	struct opa_vnic_adapter *adapter;
+
+	adapter = vema_get_vport_adapter(recvd_mad, port);
+	if (adapter) {
+		cntrs = (struct opa_veswport_summary_counters *)rsp_mad->data;
+		opa_vnic_get_summary_counters(adapter, cntrs);
+	} else {
+		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+	}
+}
+
+/**
+ * vema_get_error_counters -- Gets summary counters.
+ * @port:      source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad:   Response mad to be built
+ */
+static void vema_get_error_counters(struct opa_vnic_vema_port *port,
+				    struct opa_vnic_vema_mad *recvd_mad,
+				    struct opa_vnic_vema_mad *rsp_mad)
+{
+	struct opa_veswport_error_counters *cntrs;
+	struct opa_vnic_adapter *adapter;
+
+	adapter = vema_get_vport_adapter(recvd_mad, port);
+	if (adapter) {
+		cntrs = (struct opa_veswport_error_counters *)rsp_mad->data;
+		opa_vnic_get_error_counters(adapter, cntrs);
+	} else {
+		rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+	}
+}
+
+/**
+ * vema_get -- Process received get MAD
+ * @port:      source port on which MAD was received
+ * @recvd_mad: Received mad
+ * @rsp_mad:   Response mad to be built
+ */
+static void vema_get(struct opa_vnic_vema_port *port,
+		     struct opa_vnic_vema_mad *recvd_mad,
+		     struct opa_vnic_vema_mad *rsp_mad)
+{
+	u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+	switch (attr_id) {
+	case OPA_EM_ATTR_CLASS_PORT_INFO:
+		vema_get_class_port_info(port, recvd_mad, rsp_mad);
+		break;
+	case OPA_EM_ATTR_VESWPORT_INFO:
+		vema_get_veswport_info(port, recvd_mad, rsp_mad);
+		break;
+	case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
+		vema_get_mac_entries(port, recvd_mad, rsp_mad);
+		break;
+	case OPA_EM_ATTR_IFACE_UCAST_MACS:
+		/* fall through */
+	case OPA_EM_ATTR_IFACE_MCAST_MACS:
+		vema_get_mac_list(port, recvd_mad, rsp_mad, attr_id);
+		break;
+	case OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS:
+		vema_get_summary_counters(port, recvd_mad, rsp_mad);
+		break;
+	case OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS:
+		vema_get_error_counters(port, recvd_mad, rsp_mad);
+		break;
+	default:
+		rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+		break;
+	}
+}
+
+/**
+ * vema_set -- Process received set MAD
+ * @port:      source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad:   Response mad to be built
+ */
+static void vema_set(struct opa_vnic_vema_port *port,
+		     struct opa_vnic_vema_mad *recvd_mad,
+		     struct opa_vnic_vema_mad *rsp_mad)
+{
+	u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+	switch (attr_id) {
+	case OPA_EM_ATTR_CLASS_PORT_INFO:
+		vema_set_class_port_info(port, recvd_mad, rsp_mad);
+		break;
+	case OPA_EM_ATTR_VESWPORT_INFO:
+		vema_set_veswport_info(port, recvd_mad, rsp_mad);
+		break;
+	case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
+		vema_set_mac_entries(port, recvd_mad, rsp_mad);
+		break;
+	case OPA_EM_ATTR_DELETE_VESW:
+		vema_set_delete_vesw(port, recvd_mad, rsp_mad);
+		break;
+	default:
+		rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+		break;
+	}
+}
+
+/**
+ * vema_send -- Send handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @mad_wc:    pointer to mad send work completion information
+ *
+ * Free all the data structures associated with the sent MAD
+ */
+static void vema_send(struct ib_mad_agent *mad_agent,
+		      struct ib_mad_send_wc *mad_wc)
+{
+	rdma_destroy_ah(mad_wc->send_buf->ah);
+	ib_free_send_mad(mad_wc->send_buf);
+}
+
+/**
+ * vema_recv -- Recv handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @send_buf: Send buffer if found, else NULL
+ * @mad_wc:    pointer to mad send work completion information
+ *
+ * Handle only set and get methods and respond to other methods
+ * as unsupported. Allocate response buffer and address handle
+ * for the response MAD.
+ */
+static void vema_recv(struct ib_mad_agent *mad_agent,
+		      struct ib_mad_send_buf *send_buf,
+		      struct ib_mad_recv_wc *mad_wc)
+{
+	struct opa_vnic_vema_port *port;
+	struct ib_ah              *ah;
+	struct ib_mad_send_buf    *rsp;
+	struct opa_vnic_vema_mad  *vema_mad;
+
+	if (!mad_wc || !mad_wc->recv_buf.mad)
+		return;
+
+	port = mad_agent->context;
+	ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
+				  mad_wc->recv_buf.grh, mad_agent->port_num);
+	if (IS_ERR(ah))
+		goto free_recv_mad;
+
+	rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
+				 mad_wc->wc->pkey_index, 0,
+				 IB_MGMT_VENDOR_HDR, OPA_VNIC_EMA_DATA,
+				 GFP_KERNEL, OPA_MGMT_BASE_VERSION);
+	if (IS_ERR(rsp))
+		goto err_rsp;
+
+	rsp->ah = ah;
+	vema_mad = rsp->mad;
+	memcpy(vema_mad, mad_wc->recv_buf.mad, IB_MGMT_VENDOR_HDR);
+	vema_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+	vema_mad->mad_hdr.status = 0;
+
+	/* Lock ensures network adapter is not removed */
+	mutex_lock(&port->lock);
+
+	switch (mad_wc->recv_buf.mad->mad_hdr.method) {
+	case IB_MGMT_METHOD_GET:
+		vema_get(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
+			 vema_mad);
+		break;
+	case IB_MGMT_METHOD_SET:
+		vema_set(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
+			 vema_mad);
+		break;
+	default:
+		vema_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+		break;
+	}
+	mutex_unlock(&port->lock);
+
+	if (!ib_post_send_mad(rsp, NULL)) {
+		/*
+		 * with post send successful ah and send mad
+		 * will be destroyed in send handler
+		 */
+		goto free_recv_mad;
+	}
+
+	ib_free_send_mad(rsp);
+
+err_rsp:
+	rdma_destroy_ah(ah);
+free_recv_mad:
+	ib_free_recv_mad(mad_wc);
+}
+
+/**
+ * vema_get_port -- Gets the opa_vnic_vema_port
+ * @cport: pointer to control dev
+ * @port_num: Port number
+ *
+ * This function loops through the ports and returns
+ * the opa_vnic_vema port structure that is associated
+ * with the OPA port number
+ *
+ * Return: ptr to requested opa_vnic_vema_port strucure
+ *         if success, NULL if not
+ */
+static struct opa_vnic_vema_port *
+vema_get_port(struct opa_vnic_ctrl_port *cport, u8 port_num)
+{
+	struct opa_vnic_vema_port *port = (void *)cport + sizeof(*cport);
+
+	if (port_num > cport->num_ports)
+		return NULL;
+
+	return port + (port_num - 1);
+}
+
+/**
+ * opa_vnic_vema_send_trap -- This function sends a trap to the EM
+ * @cport: pointer to vnic control port
+ * @data: pointer to trap data filled by calling function
+ * @lid:  issuers lid (encap_slid from vesw_port_info)
+ *
+ * This function is called from the VNIC driver to send a trap if there
+ * is somethng the EM should be notified about. These events currently
+ * are
+ * 1) UNICAST INTERFACE MACADDRESS changes
+ * 2) MULTICAST INTERFACE MACADDRESS changes
+ * 3) ETHERNET LINK STATUS changes
+ * While allocating the send mad the remote site qpn used is 1
+ * as this is the well known QP.
+ *
+ */
+void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
+			     struct __opa_veswport_trap *data, u32 lid)
+{
+	struct opa_vnic_ctrl_port *cport = adapter->cport;
+	struct ib_mad_send_buf *send_buf;
+	struct opa_vnic_vema_port *port;
+	struct ib_device *ibp;
+	struct opa_vnic_vema_mad_trap *trap_mad;
+	struct opa_class_port_info *class;
+	struct rdma_ah_attr ah_attr;
+	struct ib_ah *ah;
+	struct opa_veswport_trap *trap;
+	u32 trap_lid;
+	u16 pkey_idx;
+
+	if (!cport)
+		goto err_exit;
+	ibp = cport->ibdev;
+	port = vema_get_port(cport, data->opaportnum);
+	if (!port || !port->mad_agent)
+		goto err_exit;
+
+	if (time_before(jiffies, adapter->trap_timeout)) {
+		if (adapter->trap_count == OPA_VNIC_TRAP_BURST_LIMIT) {
+			v_warn("Trap rate exceeded\n");
+			goto err_exit;
+		} else {
+			adapter->trap_count++;
+		}
+	} else {
+		adapter->trap_count = 0;
+	}
+
+	class = &port->class_port_info;
+	/* Set up address handle */
+	memset(&ah_attr, 0, sizeof(ah_attr));
+	ah_attr.type = rdma_ah_find_type(ibp, port->port_num);
+	rdma_ah_set_sl(&ah_attr,
+		       GET_TRAP_SL_FROM_CLASS_PORT_INFO(class->trap_sl_rsvd));
+	rdma_ah_set_port_num(&ah_attr, port->port_num);
+	trap_lid = be32_to_cpu(class->trap_lid);
+	/*
+	 * check for trap lid validity, must not be zero
+	 * The trap sink could change after we fashion the MAD but since traps
+	 * are not guaranteed we won't use a lock as anyway the change will take
+	 * place even with locking.
+	 */
+	if (!trap_lid) {
+		c_err("%s: Invalid dlid\n", __func__);
+		goto err_exit;
+	}
+
+	rdma_ah_set_dlid(&ah_attr, trap_lid);
+	ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr);
+	if (IS_ERR(ah)) {
+		c_err("%s:Couldn't create new AH = %p\n", __func__, ah);
+		c_err("%s:dlid = %d, sl = %d, port = %d\n", __func__,
+		      rdma_ah_get_dlid(&ah_attr), rdma_ah_get_sl(&ah_attr),
+		      rdma_ah_get_port_num(&ah_attr));
+		goto err_exit;
+	}
+
+	if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_FULL,
+			 &pkey_idx) < 0) {
+		c_err("%s:full key not found, defaulting to partial\n",
+		      __func__);
+		if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_PARTIAL,
+				 &pkey_idx) < 0)
+			pkey_idx = 1;
+	}
+
+	send_buf = ib_create_send_mad(port->mad_agent, 1, pkey_idx, 0,
+				      IB_MGMT_VENDOR_HDR, IB_MGMT_MAD_DATA,
+				      GFP_KERNEL, OPA_MGMT_BASE_VERSION);
+	if (IS_ERR(send_buf)) {
+		c_err("%s:Couldn't allocate send buf\n", __func__);
+		goto err_sndbuf;
+	}
+
+	send_buf->ah = ah;
+
+	/* Set up common MAD hdr */
+	trap_mad = send_buf->mad;
+	trap_mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
+	trap_mad->mad_hdr.mgmt_class = OPA_MGMT_CLASS_INTEL_EMA;
+	trap_mad->mad_hdr.class_version = OPA_EMA_CLASS_VERSION;
+	trap_mad->mad_hdr.method = IB_MGMT_METHOD_TRAP;
+	port->tid++;
+	trap_mad->mad_hdr.tid = cpu_to_be64(port->tid);
+	trap_mad->mad_hdr.attr_id = IB_SMP_ATTR_NOTICE;
+
+	/* Set up vendor OUI */
+	trap_mad->oui[0] = INTEL_OUI_1;
+	trap_mad->oui[1] = INTEL_OUI_2;
+	trap_mad->oui[2] = INTEL_OUI_3;
+
+	/* Setup notice attribute portion */
+	trap_mad->notice.gen_type = OPA_INTEL_EMA_NOTICE_TYPE_INFO << 1;
+	trap_mad->notice.oui_1 = INTEL_OUI_1;
+	trap_mad->notice.oui_2 = INTEL_OUI_2;
+	trap_mad->notice.oui_3 = INTEL_OUI_3;
+	trap_mad->notice.issuer_lid = cpu_to_be32(lid);
+
+	/* copy the actual trap data */
+	trap = (struct opa_veswport_trap *)trap_mad->notice.raw_data;
+	trap->fabric_id = cpu_to_be16(data->fabric_id);
+	trap->veswid = cpu_to_be16(data->veswid);
+	trap->veswportnum = cpu_to_be32(data->veswportnum);
+	trap->opaportnum = cpu_to_be16(data->opaportnum);
+	trap->veswportindex = data->veswportindex;
+	trap->opcode = data->opcode;
+
+	/* If successful send set up rate limit timeout else bail */
+	if (ib_post_send_mad(send_buf, NULL)) {
+		ib_free_send_mad(send_buf);
+	} else {
+		if (adapter->trap_count)
+			return;
+		adapter->trap_timeout = jiffies +
+					usecs_to_jiffies(OPA_VNIC_TRAP_TIMEOUT);
+		return;
+	}
+
+err_sndbuf:
+	rdma_destroy_ah(ah);
+err_exit:
+	v_err("Aborting trap\n");
+}
+
+static int vema_rem_vport(int id, void *p, void *data)
+{
+	struct opa_vnic_adapter *adapter = p;
+
+	opa_vnic_rem_netdev(adapter);
+	return 0;
+}
+
+static int vema_enable_vport(int id, void *p, void *data)
+{
+	struct opa_vnic_adapter *adapter = p;
+
+	netif_carrier_on(adapter->netdev);
+	return 0;
+}
+
+static int vema_disable_vport(int id, void *p, void *data)
+{
+	struct opa_vnic_adapter *adapter = p;
+
+	netif_carrier_off(adapter->netdev);
+	return 0;
+}
+
+static void opa_vnic_event(struct ib_event_handler *handler,
+			   struct ib_event *record)
+{
+	struct opa_vnic_vema_port *port =
+		container_of(handler, struct opa_vnic_vema_port, event_handler);
+	struct opa_vnic_ctrl_port *cport = port->cport;
+
+	if (record->element.port_num != port->port_num)
+		return;
+
+	c_dbg("OPA_VNIC received event %d on device %s port %d\n",
+	      record->event, record->device->name, record->element.port_num);
+
+	if (record->event == IB_EVENT_PORT_ERR)
+		idr_for_each(&port->vport_idr, vema_disable_vport, NULL);
+	if (record->event == IB_EVENT_PORT_ACTIVE)
+		idr_for_each(&port->vport_idr, vema_enable_vport, NULL);
+}
+
+/**
+ * vema_unregister -- Unregisters agent
+ * @cport: pointer to control port
+ *
+ * This deletes the registration by VEMA for MADs
+ */
+static void vema_unregister(struct opa_vnic_ctrl_port *cport)
+{
+	int i;
+
+	for (i = 1; i <= cport->num_ports; i++) {
+		struct opa_vnic_vema_port *port = vema_get_port(cport, i);
+
+		if (!port->mad_agent)
+			continue;
+
+		/* Lock ensures no MAD is being processed */
+		mutex_lock(&port->lock);
+		idr_for_each(&port->vport_idr, vema_rem_vport, NULL);
+		mutex_unlock(&port->lock);
+
+		ib_unregister_mad_agent(port->mad_agent);
+		port->mad_agent = NULL;
+		mutex_destroy(&port->lock);
+		idr_destroy(&port->vport_idr);
+		ib_unregister_event_handler(&port->event_handler);
+	}
+}
+
+/**
+ * vema_register -- Registers agent
+ * @cport: pointer to control port
+ *
+ * This function registers the handlers for the VEMA MADs
+ *
+ * Return: returns 0 on success. non zero otherwise
+ */
+static int vema_register(struct opa_vnic_ctrl_port *cport)
+{
+	struct ib_mad_reg_req reg_req = {
+		.mgmt_class = OPA_MGMT_CLASS_INTEL_EMA,
+		.mgmt_class_version = OPA_MGMT_BASE_VERSION,
+		.oui = { INTEL_OUI_1, INTEL_OUI_2, INTEL_OUI_3 }
+	};
+	int i;
+
+	set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
+	set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
+
+	/* register ib event handler and mad agent for each port on dev */
+	for (i = 1; i <= cport->num_ports; i++) {
+		struct opa_vnic_vema_port *port = vema_get_port(cport, i);
+		int ret;
+
+		port->cport = cport;
+		port->port_num = i;
+
+		INIT_IB_EVENT_HANDLER(&port->event_handler,
+				      cport->ibdev, opa_vnic_event);
+		ret = ib_register_event_handler(&port->event_handler);
+		if (ret) {
+			c_err("port %d: event handler register failed\n", i);
+			vema_unregister(cport);
+			return ret;
+		}
+
+		idr_init(&port->vport_idr);
+		mutex_init(&port->lock);
+		port->mad_agent = ib_register_mad_agent(cport->ibdev, i,
+							IB_QPT_GSI, &reg_req,
+							IB_MGMT_RMPP_VERSION,
+							vema_send, vema_recv,
+							port, 0);
+		if (IS_ERR(port->mad_agent)) {
+			ret = PTR_ERR(port->mad_agent);
+			port->mad_agent = NULL;
+			mutex_destroy(&port->lock);
+			idr_destroy(&port->vport_idr);
+			vema_unregister(cport);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * opa_vnic_vema_add_one -- Handle new ib device
+ * @device: ib device pointer
+ *
+ * Allocate the vnic control port and initialize it.
+ */
+static void opa_vnic_vema_add_one(struct ib_device *device)
+{
+	struct opa_vnic_ctrl_port *cport;
+	int rc, size = sizeof(*cport);
+
+	if (!rdma_cap_opa_vnic(device))
+		return;
+
+	size += device->phys_port_cnt * sizeof(struct opa_vnic_vema_port);
+	cport = kzalloc(size, GFP_KERNEL);
+	if (!cport)
+		return;
+
+	cport->num_ports = device->phys_port_cnt;
+	cport->ibdev = device;
+
+	/* Initialize opa vnic management agent (vema) */
+	rc = vema_register(cport);
+	if (!rc)
+		c_info("VNIC client initialized\n");
+
+	ib_set_client_data(device, &opa_vnic_client, cport);
+}
+
+/**
+ * opa_vnic_vema_rem_one -- Handle ib device removal
+ * @device: ib device pointer
+ * @client_data: ib client data
+ *
+ * Uninitialize and free the vnic control port.
+ */
+static void opa_vnic_vema_rem_one(struct ib_device *device,
+				  void *client_data)
+{
+	struct opa_vnic_ctrl_port *cport = client_data;
+
+	if (!cport)
+		return;
+
+	c_info("removing VNIC client\n");
+	vema_unregister(cport);
+	kfree(cport);
+}
+
+static int __init opa_vnic_init(void)
+{
+	int rc;
+
+	pr_info("OPA Virtual Network Driver - v%s\n",
+		opa_vnic_driver_version);
+
+	rc = ib_register_client(&opa_vnic_client);
+	if (rc)
+		pr_err("VNIC driver register failed %d\n", rc);
+
+	return rc;
+}
+module_init(opa_vnic_init);
+
+static void opa_vnic_deinit(void)
+{
+	ib_unregister_client(&opa_vnic_client);
+}
+module_exit(opa_vnic_deinit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel OPA Virtual Network driver");
+MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
new file mode 100644
index 000000000000..a51bf977f4d6
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC EMA Interface functions.
+ */
+
+#include "opa_vnic_internal.h"
+
+/**
+ * opa_vnic_vema_report_event - sent trap to report the specified event
+ * @adapter: vnic port adapter
+ * @event: event to be reported
+ *
+ * This function calls vema api to sent a trap for the given event.
+ */
+void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event)
+{
+	struct __opa_veswport_info *info = &adapter->info;
+	struct __opa_veswport_trap trap_data;
+
+	trap_data.fabric_id = info->vesw.fabric_id;
+	trap_data.veswid = info->vesw.vesw_id;
+	trap_data.veswportnum = info->vport.port_num;
+	trap_data.opaportnum = adapter->port_num;
+	trap_data.veswportindex = adapter->vport_num;
+	trap_data.opcode = event;
+
+	opa_vnic_vema_send_trap(adapter, &trap_data, info->vport.encap_slid);
+}
+
+/**
+ * opa_vnic_get_error_counters - get summary counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination summary counters structure
+ *
+ * This function populates the summary counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter,
+				   struct opa_veswport_summary_counters *cntrs)
+{
+	struct opa_vnic_stats vstats;
+	__be64 *dst;
+	u64 *src;
+
+	memset(&vstats, 0, sizeof(vstats));
+	mutex_lock(&adapter->stats_lock);
+	adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats);
+	mutex_unlock(&adapter->stats_lock);
+
+	cntrs->vp_instance = cpu_to_be16(adapter->vport_num);
+	cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id);
+	cntrs->veswport_num = cpu_to_be32(adapter->port_num);
+
+	cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors);
+	cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors);
+	cntrs->tx_packets = cpu_to_be64(vstats.netstats.tx_packets);
+	cntrs->rx_packets = cpu_to_be64(vstats.netstats.rx_packets);
+	cntrs->tx_bytes = cpu_to_be64(vstats.netstats.tx_bytes);
+	cntrs->rx_bytes = cpu_to_be64(vstats.netstats.rx_bytes);
+
+	/*
+	 * This loop depends on layout of
+	 * opa_veswport_summary_counters opa_vnic_stats structures.
+	 */
+	for (dst = &cntrs->tx_unicast, src = &vstats.tx_grp.unicast;
+	     dst < &cntrs->reserved[0]; dst++, src++) {
+		*dst = cpu_to_be64(*src);
+	}
+}
+
+/**
+ * opa_vnic_get_error_counters - get error counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination error counters structure
+ *
+ * This function populates the error counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter,
+				 struct opa_veswport_error_counters *cntrs)
+{
+	struct opa_vnic_stats vstats;
+
+	memset(&vstats, 0, sizeof(vstats));
+	mutex_lock(&adapter->stats_lock);
+	adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats);
+	mutex_unlock(&adapter->stats_lock);
+
+	cntrs->vp_instance = cpu_to_be16(adapter->vport_num);
+	cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id);
+	cntrs->veswport_num = cpu_to_be32(adapter->port_num);
+
+	cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors);
+	cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors);
+	cntrs->tx_dlid_zero = cpu_to_be64(vstats.tx_dlid_zero);
+	cntrs->tx_drop_state = cpu_to_be64(vstats.tx_drop_state);
+	cntrs->tx_logic = cpu_to_be64(vstats.netstats.tx_fifo_errors +
+				      vstats.netstats.tx_carrier_errors);
+
+	cntrs->rx_bad_veswid = cpu_to_be64(vstats.netstats.rx_nohandler);
+	cntrs->rx_runt = cpu_to_be64(vstats.rx_runt);
+	cntrs->rx_oversize = cpu_to_be64(vstats.rx_oversize);
+	cntrs->rx_drop_state = cpu_to_be64(vstats.rx_drop_state);
+	cntrs->rx_logic = cpu_to_be64(vstats.netstats.rx_fifo_errors);
+}
+
+/**
+ * opa_vnic_get_vesw_info -- Get the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vesw info structure
+ *
+ * This function copies the vesw info that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter,
+			    struct opa_vesw_info *info)
+{
+	struct __opa_vesw_info *src = &adapter->info.vesw;
+	int i;
+
+	info->fabric_id = cpu_to_be16(src->fabric_id);
+	info->vesw_id = cpu_to_be16(src->vesw_id);
+	memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+	info->def_port_mask = cpu_to_be16(src->def_port_mask);
+	memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+	info->pkey = cpu_to_be16(src->pkey);
+
+	memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+	info->u_mcast_dlid = cpu_to_be32(src->u_mcast_dlid);
+	for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++)
+		info->u_ucast_dlid[i] = cpu_to_be32(src->u_ucast_dlid[i]);
+
+	memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+	for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++)
+		info->eth_mtu[i] = cpu_to_be16(src->eth_mtu[i]);
+
+	info->eth_mtu_non_vlan = cpu_to_be16(src->eth_mtu_non_vlan);
+	memcpy(info->rsvd4, src->rsvd4, ARRAY_SIZE(src->rsvd4));
+}
+
+/**
+ * opa_vnic_set_vesw_info -- Set the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to vesw info structure
+ *
+ * This function updates the vesw info that is maintained by the
+ * given adapter with vesw info provided. Reserved fields are stored
+ * and returned back to EM as is.
+ */
+void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter,
+			    struct opa_vesw_info *info)
+{
+	struct __opa_vesw_info *dst = &adapter->info.vesw;
+	int i;
+
+	dst->fabric_id = be16_to_cpu(info->fabric_id);
+	dst->vesw_id = be16_to_cpu(info->vesw_id);
+	memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+	dst->def_port_mask = be16_to_cpu(info->def_port_mask);
+	memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+	dst->pkey = be16_to_cpu(info->pkey);
+
+	memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+	dst->u_mcast_dlid = be32_to_cpu(info->u_mcast_dlid);
+	for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++)
+		dst->u_ucast_dlid[i] = be32_to_cpu(info->u_ucast_dlid[i]);
+
+	memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+	for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++)
+		dst->eth_mtu[i] = be16_to_cpu(info->eth_mtu[i]);
+
+	dst->eth_mtu_non_vlan = be16_to_cpu(info->eth_mtu_non_vlan);
+	memcpy(dst->rsvd4, info->rsvd4, ARRAY_SIZE(info->rsvd4));
+}
+
+/**
+ * opa_vnic_get_per_veswport_info -- Get the vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vport info structure
+ *
+ * This function copies the vesw per port info that is maintained by the
+ * given adapter to destination address provided.
+ * Note that the read only fields are not copied.
+ */
+void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter,
+				    struct opa_per_veswport_info *info)
+{
+	struct __opa_per_veswport_info *src = &adapter->info.vport;
+
+	info->port_num = cpu_to_be32(src->port_num);
+	info->eth_link_status = src->eth_link_status;
+	memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+
+	memcpy(info->base_mac_addr, src->base_mac_addr,
+	       ARRAY_SIZE(info->base_mac_addr));
+	info->config_state = src->config_state;
+	info->oper_state = src->oper_state;
+	info->max_mac_tbl_ent = cpu_to_be16(src->max_mac_tbl_ent);
+	info->max_smac_ent = cpu_to_be16(src->max_smac_ent);
+	info->mac_tbl_digest = cpu_to_be32(src->mac_tbl_digest);
+	memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+
+	info->encap_slid = cpu_to_be32(src->encap_slid);
+	memcpy(info->pcp_to_sc_uc, src->pcp_to_sc_uc,
+	       ARRAY_SIZE(info->pcp_to_sc_uc));
+	memcpy(info->pcp_to_vl_uc, src->pcp_to_vl_uc,
+	       ARRAY_SIZE(info->pcp_to_vl_uc));
+	memcpy(info->pcp_to_sc_mc, src->pcp_to_sc_mc,
+	       ARRAY_SIZE(info->pcp_to_sc_mc));
+	memcpy(info->pcp_to_vl_mc, src->pcp_to_vl_mc,
+	       ARRAY_SIZE(info->pcp_to_vl_mc));
+	info->non_vlan_sc_uc = src->non_vlan_sc_uc;
+	info->non_vlan_vl_uc = src->non_vlan_vl_uc;
+	info->non_vlan_sc_mc = src->non_vlan_sc_mc;
+	info->non_vlan_vl_mc = src->non_vlan_vl_mc;
+	memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+
+	info->uc_macs_gen_count = cpu_to_be16(src->uc_macs_gen_count);
+	info->mc_macs_gen_count = cpu_to_be16(src->mc_macs_gen_count);
+	memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+}
+
+/**
+ * opa_vnic_set_per_veswport_info -- Set vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to vport info structure
+ *
+ * This function updates the vesw per port info that is maintained by the
+ * given adapter with vesw per port info provided. Reserved fields are
+ * stored and returned back to EM as is.
+ */
+void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter,
+				    struct opa_per_veswport_info *info)
+{
+	struct __opa_per_veswport_info *dst = &adapter->info.vport;
+
+	dst->port_num = be32_to_cpu(info->port_num);
+	memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+
+	memcpy(dst->base_mac_addr, info->base_mac_addr,
+	       ARRAY_SIZE(dst->base_mac_addr));
+	dst->config_state = info->config_state;
+	memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+
+	dst->encap_slid = be32_to_cpu(info->encap_slid);
+	memcpy(dst->pcp_to_sc_uc, info->pcp_to_sc_uc,
+	       ARRAY_SIZE(dst->pcp_to_sc_uc));
+	memcpy(dst->pcp_to_vl_uc, info->pcp_to_vl_uc,
+	       ARRAY_SIZE(dst->pcp_to_vl_uc));
+	memcpy(dst->pcp_to_sc_mc, info->pcp_to_sc_mc,
+	       ARRAY_SIZE(dst->pcp_to_sc_mc));
+	memcpy(dst->pcp_to_vl_mc, info->pcp_to_vl_mc,
+	       ARRAY_SIZE(dst->pcp_to_vl_mc));
+	dst->non_vlan_sc_uc = info->non_vlan_sc_uc;
+	dst->non_vlan_vl_uc = info->non_vlan_vl_uc;
+	dst->non_vlan_sc_mc = info->non_vlan_sc_mc;
+	dst->non_vlan_vl_mc = info->non_vlan_vl_mc;
+	memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+	memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+}
+
+/**
+ * opa_vnic_query_mcast_macs - query multicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * multicast addresses in the adapter.
+ */
+void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
+			       struct opa_veswport_iface_macs *macs)
+{
+	u16 start_idx, num_macs, idx = 0, count = 0;
+	struct netdev_hw_addr *ha;
+
+	start_idx = be16_to_cpu(macs->start_idx);
+	num_macs = be16_to_cpu(macs->num_macs_in_msg);
+	netdev_for_each_mc_addr(ha, adapter->netdev) {
+		struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+		if (start_idx > idx++)
+			continue;
+		else if (num_macs == count)
+			break;
+		memcpy(entry, ha->addr, sizeof(*entry));
+		count++;
+	}
+
+	macs->tot_macs_in_lst = cpu_to_be16(netdev_mc_count(adapter->netdev));
+	macs->num_macs_in_msg = cpu_to_be16(count);
+	macs->gen_count = cpu_to_be16(adapter->info.vport.mc_macs_gen_count);
+}
+
+/**
+ * opa_vnic_query_ucast_macs - query unicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * unicast addresses in the adapter.
+ */
+void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
+			       struct opa_veswport_iface_macs *macs)
+{
+	u16 start_idx, tot_macs, num_macs, idx = 0, count = 0;
+	struct netdev_hw_addr *ha;
+
+	start_idx = be16_to_cpu(macs->start_idx);
+	num_macs = be16_to_cpu(macs->num_macs_in_msg);
+	/* loop through dev_addrs list first */
+	for_each_dev_addr(adapter->netdev, ha) {
+		struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+		/* Do not include EM specified MAC address */
+		if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr,
+			    ARRAY_SIZE(adapter->info.vport.base_mac_addr)))
+			continue;
+
+		if (start_idx > idx++)
+			continue;
+		else if (num_macs == count)
+			break;
+		memcpy(entry, ha->addr, sizeof(*entry));
+		count++;
+	}
+
+	/* loop through uc list */
+	netdev_for_each_uc_addr(ha, adapter->netdev) {
+		struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+		if (start_idx > idx++)
+			continue;
+		else if (num_macs == count)
+			break;
+		memcpy(entry, ha->addr, sizeof(*entry));
+		count++;
+	}
+
+	tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) +
+		   netdev_uc_count(adapter->netdev);
+	macs->tot_macs_in_lst = cpu_to_be16(tot_macs);
+	macs->num_macs_in_msg = cpu_to_be16(count);
+	macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count);
+}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index cee46266f434..def723a5df29 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -312,10 +312,15 @@ static int srp_new_cm_id(struct srp_rdma_ch *ch)
 	if (ch->cm_id)
 		ib_destroy_cm_id(ch->cm_id);
 	ch->cm_id = new_cm_id;
+	if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
+			    target->srp_host->port))
+		ch->path.rec_type = SA_PATH_REC_TYPE_OPA;
+	else
+		ch->path.rec_type = SA_PATH_REC_TYPE_IB;
 	ch->path.sgid = target->sgid;
 	ch->path.dgid = target->orig_dgid;
 	ch->path.pkey = target->pkey;
-	ch->path.service_id = target->service_id;
+	sa_path_set_service_id(&ch->path, target->service_id);
 
 	return 0;
 }
@@ -643,7 +648,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
 }
 
 static void srp_path_rec_completion(int status,
-				    struct ib_sa_path_rec *pathrec,
+				    struct sa_path_rec *pathrec,
 				    void *ch_ptr)
 {
 	struct srp_rdma_ch *ch = ch_ptr;
@@ -2399,12 +2404,12 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
 	switch (event->param.rej_rcvd.reason) {
 	case IB_CM_REJ_PORT_CM_REDIRECT:
 		cpi = event->param.rej_rcvd.ari;
-		ch->path.dlid = cpi->redirect_lid;
+		sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid)));
 		ch->path.pkey = cpi->redirect_pkey;
 		cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
 		memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
 
-		ch->status = ch->path.dlid ?
+		ch->status = sa_path_get_dlid(&ch->path) ?
 			SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
 		break;
 
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 32ed40db3ca2..ab9077b81d5a 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -152,7 +152,7 @@ struct srp_rdma_ch {
 	struct completion	done;
 	int			status;
 
-	struct ib_sa_path_rec	path;
+	struct sa_path_rec	path;
 	struct ib_sa_query     *path_query;
 	int			path_query_id;
 
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index ee026b6b4f0d..1ced0731c140 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -417,7 +417,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
 				  struct ib_mad_send_wc *mad_wc)
 {
-	ib_destroy_ah(mad_wc->send_buf->ah);
+	rdma_destroy_ah(mad_wc->send_buf->ah);
 	ib_free_send_mad(mad_wc->send_buf);
 }
 
@@ -481,7 +481,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
 	ib_free_send_mad(rsp);
 
 err_rsp:
-	ib_destroy_ah(ah);
+	rdma_destroy_ah(ah);
 err:
 	ib_free_recv_mad(mad_wc);
 }