From cdc5a7f363be34287ac6c2345e5d1d3b37cf4a23 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 9 May 2017 20:45:06 +0300 Subject: net/mlx5e: Use the correct delete call on offloaded TC encap entry detach We wrongly direcly invoke hlist_del_rcu() and not hash_del_rcu() which does a slightly different call now and may change later, fix that. Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads') Signed-off-by: Or Gerlitz Reported-by: Paul Blakey Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 11c27e4fadf6..a90dd26ea51c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -384,7 +384,7 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, if (e->flags & MLX5_ENCAP_ENTRY_VALID) mlx5_encap_dealloc(priv->mdev, e->encap_id); - hlist_del_rcu(&e->encap_hlist); + hash_del_rcu(&e->encap_hlist); kfree(e->encap_header); kfree(e); } -- cgit v1.2.3 From 26c02749936f064abf771a0f5f49b280fcfd8b66 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 10 May 2017 13:59:54 +0300 Subject: net/mlx5e: Allow TC csum offload if applied together with pedit action When offloading header re-writes, the HW re-calculates the relevant L3/L4 checksums. Hence, when upper layers (as done by OVS) ask for TC checksum action offload together with pedit offload, don't err. This command now works: tc filter add dev ens1f0 protocol ip parent ffff: prio 20 flower skip_sw ip_proto tcp dst_port 9001 action pedit ex munge tcp dport set 0x1234 pipe action csum tcp Signed-off-by: Or Gerlitz Reported-by: Paul Blakey Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 39 +++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a90dd26ea51c..9dd83c7e4c51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include "en.h" @@ -1109,6 +1110,28 @@ out_err: return err; } +static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags) +{ + u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP | + TCA_CSUM_UPDATE_FLAG_UDP; + + /* The HW recalcs checksums only if re-writing headers */ + if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) { + netdev_warn(priv->netdev, + "TC csum action is only offloaded with pedit\n"); + return false; + } + + if (update_flags & ~prot_flags) { + netdev_warn(priv->netdev, + "can't offload TC csum action for some header/s - flags %#x\n", + update_flags); + return false; + } + + return true; +} + static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow) @@ -1149,6 +1172,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } + if (is_tcf_csum(a)) { + if (csum_offload_supported(priv, attr->action, + tcf_csum_update_flags(a))) + continue; + + return -EOPNOTSUPP; + } + if (is_tcf_skbedit_mark(a)) { u32 mark = tcf_skbedit_mark(a); @@ -1651,6 +1682,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } + if (is_tcf_csum(a)) { + if (csum_offload_supported(priv, attr->action, + tcf_csum_update_flags(a))) + continue; + + return -EOPNOTSUPP; + } + if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev, *encap_dev = NULL; -- cgit v1.2.3 From d824bf3fe2d352fc2c52b7ede05b1a0e95d946be Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 9 May 2017 19:02:42 +0300 Subject: net/mlx5e: Properly enforce disallowing of partial field re-write offload Currently we don't support partial header re-writes through TC pedit action offloading. However, the code that enforces that wasn't err-ing on cases where the first and last bits of the mask are set but there is some zero bit between them, such as in the below example, fix that! tc filter add dev enp1s0 protocol ip parent ffff: prio 10 flower ip_proto udp dst_port 2001 skip_sw action pedit munge ip src set 1.0.0.1 retain 0xff0000ff Fixes: d79b6df6b10a ('net/mlx5e: Add parsing of TC pedit actions to HW format') Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9dd83c7e4c51..0387c321f0a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -926,7 +926,7 @@ static int offload_pedit_fields(struct pedit_headers *masks, struct mlx5e_tc_flow_parse_attr *parse_attr) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; - int i, action_size, nactions, max_actions, first, last; + int i, action_size, nactions, max_actions, first, last, first_z; void *s_masks_p, *a_masks_p, *vals_p; u32 s_mask, a_mask, val; struct mlx5_fields *f; @@ -985,9 +985,10 @@ static int offload_pedit_fields(struct pedit_headers *masks, memcpy(&val, vals_p, f->size); field_bsize = f->size * BITS_PER_BYTE; + first_z = find_first_zero_bit(&mask, field_bsize); first = find_first_bit(&mask, field_bsize); last = find_last_bit(&mask, field_bsize); - if (first > 0 || last != (field_bsize - 1)) { + if (first > 0 || last != (field_bsize - 1) || first_z < last) { printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n", mask); return -EOPNOTSUPP; -- cgit v1.2.3 From e3ca4e0583a02a04503d9c827fb5c5d50abc4ff5 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 9 May 2017 13:37:26 +0300 Subject: net/mlx5e: Fix warnings around parsing of TC pedit actions The sparse tool emits these correct complaints: drivers/net/ethernet/mellanox/mlx5/core//en_tc.c:1005:25: warning: cast to restricted __be32 drivers/net/ethernet/mellanox/mlx5/core//en_tc.c:1007:25: warning: cast to restricted __be16 The value is provided from user-space in network order, but there's no way for them to realize that, avoid the warnings by casting to the appropriate type. Fixes: d79b6df6b10a ('net/mlx5e: Add parsing of TC pedit actions to HW format') Signed-off-by: Or Gerlitz Reported-by: Leon Romanovsky Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0387c321f0a2..ec63158ab643 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -928,9 +928,9 @@ static int offload_pedit_fields(struct pedit_headers *masks, struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; int i, action_size, nactions, max_actions, first, last, first_z; void *s_masks_p, *a_masks_p, *vals_p; - u32 s_mask, a_mask, val; struct mlx5_fields *f; u8 cmd, field_bsize; + u32 s_mask, a_mask; unsigned long mask; void *action; @@ -947,7 +947,8 @@ static int offload_pedit_fields(struct pedit_headers *masks, for (i = 0; i < ARRAY_SIZE(fields); i++) { f = &fields[i]; /* avoid seeing bits set from previous iterations */ - s_mask = a_mask = mask = val = 0; + s_mask = 0; + a_mask = 0; s_masks_p = (void *)set_masks + f->offset; a_masks_p = (void *)add_masks + f->offset; @@ -982,9 +983,8 @@ static int offload_pedit_fields(struct pedit_headers *masks, memset(a_masks_p, 0, f->size); } - memcpy(&val, vals_p, f->size); - field_bsize = f->size * BITS_PER_BYTE; + first_z = find_first_zero_bit(&mask, field_bsize); first = find_first_bit(&mask, field_bsize); last = find_last_bit(&mask, field_bsize); @@ -1004,11 +1004,11 @@ static int offload_pedit_fields(struct pedit_headers *masks, } if (field_bsize == 32) - MLX5_SET(set_action_in, action, data, ntohl(val)); + MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p)); else if (field_bsize == 16) - MLX5_SET(set_action_in, action, data, ntohs(val)); + MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p)); else if (field_bsize == 8) - MLX5_SET(set_action_in, action, data, val); + MLX5_SET(set_action_in, action, data, *(u8 *)vals_p); action += action_size; nactions++; -- cgit v1.2.3 From b57fe691961cc8f00541f9a435c70df45d41e514 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 27 Apr 2017 17:59:00 +0300 Subject: net/mlx5e: IPoIB, handle RX packet correctly IPoIB packet contains the pseudo header area, we need to pull it prior to reset_mac_header in order to let the GRO work well. In more details: GRO checks the mac address of the new coming packet, it does that by comparing the hard_header_len size of the current packet to the previous one in that session, the comparison is over hard_header_len size. Now, the driver prepares that area in the skb by allocating area from the reserved part and resetting the correct mac header to it. Fixes: 9d6bd752c63c ("net/mlx5e: IPoIB, RX handler") Signed-off-by: Erez Shitrit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7b1566f0ae58..66b5fec15313 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1041,6 +1041,8 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) #define MLX5_IB_GRH_BYTES 40 #define MLX5_IPOIB_ENCAP_LEN 4 #define MLX5_GID_SIZE 16 +#define MLX5_IPOIB_PSEUDO_LEN 20 +#define MLX5_IPOIB_HARD_LEN (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN) static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, @@ -1048,6 +1050,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, struct sk_buff *skb) { struct net_device *netdev = rq->netdev; + char *pseudo_header; u8 *dgid; u8 g; @@ -1076,8 +1079,11 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, if (likely(netdev->features & NETIF_F_RXHASH)) mlx5e_skb_set_hash(cqe, skb); + /* 20 bytes of ipoib header and 4 for encap existing */ + pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN); + memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN); skb_reset_mac_header(skb); - skb_pull(skb, MLX5_IPOIB_ENCAP_LEN); + skb_pull(skb, MLX5_IPOIB_HARD_LEN); skb->dev = netdev; -- cgit v1.2.3 From 73dd3a4839c1d27c36d4dcc92e1ff44225ecbeb7 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 23 Feb 2017 11:19:36 +0200 Subject: net/mlx5: Avoid using pending command interface slots Currently when firmware command gets stuck or it takes long time to complete, the driver command will get timeout and the command slot is freed and can be used for new commands, and if the firmware receive new command on the old busy slot its behavior is unexpected and this could be harmful. To fix this when the driver command gets timeout we return failure, but we don't free the command slot and we wait for the firmware to explicitly respond to that command. Once all the entries are busy we will stop processing new firmware commands. Fixes: 9cba4ebcf374 ('net/mlx5: Fix potential deadlock in command mode change') Signed-off-by: Mohamad Haj Yahia Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 41 +++++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- 3 files changed, 38 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 5bdaf3d545b2..10d282841f5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -774,7 +774,7 @@ static void cb_timeout_handler(struct work_struct *work) mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); } static void cmd_work_handler(struct work_struct *work) @@ -804,6 +804,7 @@ static void cmd_work_handler(struct work_struct *work) } cmd->ent_arr[ent->idx] = ent; + set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); lay = get_inst(cmd, ent->idx); ent->lay = lay; memset(lay, 0, sizeof(*lay)); @@ -825,6 +826,20 @@ static void cmd_work_handler(struct work_struct *work) if (ent->callback) schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); + /* Skip sending command to fw if internal error */ + if (pci_channel_offline(dev->pdev) || + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + u8 status = 0; + u32 drv_synd; + + ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status); + MLX5_SET(mbox_out, ent->out, status, status); + MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); + + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + return; + } + /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -835,7 +850,7 @@ static void cmd_work_handler(struct work_struct *work) poll_timeout(ent); /* make sure we read the descriptor after ownership is SW */ rmb(); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT)); } } @@ -879,7 +894,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) wait_for_completion(&ent->done); } else if (!wait_for_completion_timeout(&ent->done, timeout)) { ent->ret = -ETIMEDOUT; - mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); } err = ent->ret; @@ -1375,7 +1390,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) } } -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -1395,6 +1410,19 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) struct semaphore *sem; ent = cmd->ent_arr[i]; + + /* if we already completed the command, ignore it */ + if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, + &ent->state)) { + /* only real completion can free the cmd slot */ + if (!forced) { + mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n", + ent->idx); + free_ent(cmd, ent->idx); + } + continue; + } + if (ent->callback) cancel_delayed_work(&ent->cb_timeout_work); if (ent->page_queue) @@ -1417,7 +1445,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", ent->ret, deliv_status_to_str(ent->status), ent->status); } - free_ent(cmd, ent->idx); + + /* only real completion will free the entry slot */ + if (!forced) + free_ent(cmd, ent->idx); if (ent->callback) { ds = ent->ts2 - ent->ts1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ea5d8d37a75c..33eae5ad2fb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -422,7 +422,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) break; case MLX5_EVENT_TYPE_CMD: - mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector)); + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); break; case MLX5_EVENT_TYPE_PORT_CHANGE: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index d0515391d33b..44f59b1d6f0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -90,7 +90,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev) spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); mlx5_core_dbg(dev, "vector 0x%llx\n", vector); - mlx5_cmd_comp_handler(dev, vector); + mlx5_cmd_comp_handler(dev, vector, true); return; no_trig: -- cgit v1.2.3 From b665d98edc9ab295169be2fc5bb4e89a46de0a1a Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 18 May 2017 13:34:43 +0300 Subject: net/mlx5: Tolerate irq_set_affinity_hint() failures Add tolerance to failures of irq_set_affinity_hint(). Its role is to give hints that optimizes performance, and should not block the driver load. In non-SMP systems, functionality is not available as there is a single core, and all these calls definitely fail. Hence, do not call the function and avoid the warning prints. Fixes: db058a186f98 ("net/mlx5_core: Set irq affinity hints") Signed-off-by: Tariq Toukan Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0c123d571b4c..fe5546bb4153 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -612,7 +612,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; struct msix_entry *msix = priv->msix_arr; int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - int err; if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); @@ -622,18 +621,12 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), priv->irq_info[i].mask); - err = irq_set_affinity_hint(irq, priv->irq_info[i].mask); - if (err) { - mlx5_core_warn(mdev, "irq_set_affinity_hint failed,irq 0x%.4x", - irq); - goto err_clear_mask; - } +#ifdef CONFIG_SMP + if (irq_set_affinity_hint(irq, priv->irq_info[i].mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); +#endif return 0; - -err_clear_mask: - free_cpumask_var(priv->irq_info[i].mask); - return err; } static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) -- cgit v1.2.3