diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4')
21 files changed, 448 insertions, 229 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index c7e939945259..53daa6ca5d83 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -158,7 +158,7 @@ static int mlx4_reset_slave(struct mlx4_dev *dev) return -ETIMEDOUT; } -static int mlx4_comm_internal_err(u32 slave_read) +int mlx4_comm_internal_err(u32 slave_read) { return (u32)COMM_CHAN_EVENT_INTERNAL_ERR == (slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index a49072b4fa52..e8c105164931 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -43,6 +43,7 @@ #include <linux/semaphore.h> #include <rdma/ib_smi.h> #include <linux/delay.h> +#include <linux/etherdevice.h> #include <asm/io.h> @@ -2955,7 +2956,7 @@ static bool mlx4_valid_vf_state_change(struct mlx4_dev *dev, int port, return false; } -int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) +int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *s_info; @@ -2964,13 +2965,22 @@ int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) if (!mlx4_is_master(dev)) return -EPROTONOSUPPORT; + if (is_multicast_ether_addr(mac)) + return -EINVAL; + slave = mlx4_get_slave_indx(dev, vf); if (slave < 0) return -EINVAL; port = mlx4_slaves_closest_port(dev, slave, port); s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; - s_info->mac = mac; + + if (s_info->spoofchk && is_zero_ether_addr(mac)) { + mlx4_info(dev, "MAC invalidation is not allowed when spoofchk is on\n"); + return -EPERM; + } + + s_info->mac = mlx4_mac_to_u64(mac); mlx4_info(dev, "default mac on vf %d port %d to %llX will take effect only after vf restart\n", vf, port, s_info->mac); return 0; @@ -3143,6 +3153,7 @@ int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *s_info; int slave; + u8 mac[ETH_ALEN]; if ((!mlx4_is_master(dev)) || !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FSM)) @@ -3154,6 +3165,13 @@ int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting) port = mlx4_slaves_closest_port(dev, slave, port); s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + + mlx4_u64_to_mac(mac, s_info->mac); + if (setting && !is_valid_ether_addr(mac)) { + mlx4_info(dev, "Illegal MAC with spoofchk\n"); + return -EPERM; + } + s_info->spoofchk = setting; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index a849da92f857..fa6d2354a0e9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -81,8 +81,9 @@ void mlx4_cq_tasklet_cb(unsigned long data) static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq) { - unsigned long flags; struct mlx4_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; + unsigned long flags; + bool kick; spin_lock_irqsave(&tasklet_ctx->lock, flags); /* When migrating CQs between EQs will be implemented, please note @@ -92,7 +93,10 @@ static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq) */ if (list_empty_careful(&cq->tasklet_ctx.list)) { atomic_inc(&cq->refcount); + kick = list_empty(&tasklet_ctx->list); list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); + if (kick) + tasklet_schedule(&tasklet_ctx->task); } spin_unlock_irqrestore(&tasklet_ctx->lock, flags); } @@ -101,13 +105,19 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn) { struct mlx4_cq *cq; + rcu_read_lock(); cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, cqn & (dev->caps.num_cqs - 1)); + rcu_read_unlock(); + if (!cq) { mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn); return; } + /* Acessing the CQ outside of rcu_read_lock is safe, because + * the CQ is freed only after interrupt handling is completed. + */ ++cq->arm_sn; cq->comp(cq); @@ -118,23 +128,19 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; struct mlx4_cq *cq; - spin_lock(&cq_table->lock); - + rcu_read_lock(); cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); - if (cq) - atomic_inc(&cq->refcount); - - spin_unlock(&cq_table->lock); + rcu_read_unlock(); if (!cq) { - mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn); + mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn); return; } + /* Acessing the CQ outside of rcu_read_lock is safe, because + * the CQ is freed only after interrupt handling is completed. + */ cq->event(cq, event_type); - - if (atomic_dec_and_test(&cq->refcount)) - complete(&cq->free); } static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, @@ -301,9 +307,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, if (err) return err; - spin_lock_irq(&cq_table->lock); + spin_lock(&cq_table->lock); err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); - spin_unlock_irq(&cq_table->lock); + spin_unlock(&cq_table->lock); if (err) goto err_icm; @@ -349,9 +355,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, return 0; err_radix: - spin_lock_irq(&cq_table->lock); + spin_lock(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); - spin_unlock_irq(&cq_table->lock); + spin_unlock(&cq_table->lock); err_icm: mlx4_cq_free_icm(dev, cq->cqn); @@ -370,15 +376,15 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) if (err) mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); + spin_lock(&cq_table->lock); + radix_tree_delete(&cq_table->tree, cq->cqn); + spin_unlock(&cq_table->lock); + synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq); if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq != priv->eq_table.eq[MLX4_EQ_ASYNC].irq) synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq); - spin_lock_irq(&cq_table->lock); - radix_tree_delete(&cq_table->tree, cq->cqn); - spin_unlock_irq(&cq_table->lock); - if (atomic_dec_and_test(&cq->refcount)) complete(&cq->free); wait_for_completion(&cq->free); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 015198c14fa8..024788549c25 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -62,12 +62,13 @@ void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev, struct skb_shared_hwtstamps *hwts, u64 timestamp) { - unsigned long flags; + unsigned int seq; u64 nsec; - read_lock_irqsave(&mdev->clock_lock, flags); - nsec = timecounter_cyc2time(&mdev->clock, timestamp); - read_unlock_irqrestore(&mdev->clock_lock, flags); + do { + seq = read_seqbegin(&mdev->clock_lock); + nsec = timecounter_cyc2time(&mdev->clock, timestamp); + } while (read_seqretry(&mdev->clock_lock, seq)); memset(hwts, 0, sizeof(struct skb_shared_hwtstamps)); hwts->hwtstamp = ns_to_ktime(nsec); @@ -88,16 +89,23 @@ void mlx4_en_remove_timestamp(struct mlx4_en_dev *mdev) } } +#define MLX4_EN_WRAP_AROUND_SEC 10UL +/* By scheduling the overflow check every 5 seconds, we have a reasonably + * good chance we wont miss a wrap around. + * TOTO: Use a timer instead of a work queue to increase the guarantee. + */ +#define MLX4_EN_OVERFLOW_PERIOD (MLX4_EN_WRAP_AROUND_SEC * HZ / 2) + void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev) { bool timeout = time_is_before_jiffies(mdev->last_overflow_check + - mdev->overflow_period); + MLX4_EN_OVERFLOW_PERIOD); unsigned long flags; if (timeout) { - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_read(&mdev->clock); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); mdev->last_overflow_check = jiffies; } } @@ -128,10 +136,10 @@ static int mlx4_en_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) adj *= delta; diff = div_u64(adj, 1000000000ULL); - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_read(&mdev->clock); mdev->cycles.mult = neg_adj ? mult - diff : mult + diff; - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); return 0; } @@ -149,9 +157,9 @@ static int mlx4_en_phc_adjtime(struct ptp_clock_info *ptp, s64 delta) ptp_clock_info); unsigned long flags; - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_adjtime(&mdev->clock, delta); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); return 0; } @@ -172,9 +180,9 @@ static int mlx4_en_phc_gettime(struct ptp_clock_info *ptp, unsigned long flags; u64 ns; - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); ns = timecounter_read(&mdev->clock); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); *ts = ns_to_timespec64(ns); @@ -198,9 +206,9 @@ static int mlx4_en_phc_settime(struct ptp_clock_info *ptp, unsigned long flags; /* reset the timecounter */ - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_init(&mdev->clock, &mdev->cycles, ns); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); return 0; } @@ -236,7 +244,6 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = { .enable = mlx4_en_phc_enable, }; -#define MLX4_EN_WRAP_AROUND_SEC 10ULL /* This function calculates the max shift that enables the user range * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register. @@ -245,13 +252,9 @@ static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; - u64 tmp_rounded = - roundup_pow_of_two(max_val_cycles) > max_val_cycles ? - roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX; - u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? - max_val_cycles : tmp_rounded; + u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1); /* calculate max possible multiplier in order to fit in 64bit */ - u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); + u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded); /* This comes from the reverse of clocksource_khz2mult */ return ilog2(div_u64(max_mul * freq_khz, 1000000)); @@ -261,7 +264,6 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) { struct mlx4_dev *dev = mdev->dev; unsigned long flags; - u64 ns, zero = 0; /* mlx4_en_init_timestamp is called for each netdev. * mdev->ptp_clock is common for all ports, skip initialization if @@ -270,7 +272,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) if (mdev->ptp_clock) return; - rwlock_init(&mdev->clock_lock); + seqlock_init(&mdev->clock_lock); memset(&mdev->cycles, 0, sizeof(mdev->cycles)); mdev->cycles.read = mlx4_en_read_clock; @@ -280,17 +282,10 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift); mdev->nominal_c_mult = mdev->cycles.mult; - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_init(&mdev->clock, &mdev->cycles, ktime_to_ns(ktime_get_real())); - write_unlock_irqrestore(&mdev->clock_lock, flags); - - /* Calculate period in seconds to call the overflow watchdog - to make - * sure counter is checked at least once every wrap around. - */ - ns = cyclecounter_cyc2ns(&mdev->cycles, mdev->cycles.mask, zero, &zero); - do_div(ns, NSEC_PER_SEC / 2 / HZ); - mdev->overflow_period = ns; + write_sequnlock_irqrestore(&mdev->clock_lock, flags); /* Configure the PHC */ mdev->ptp_clock_info = mlx4_en_ptp_clock_info; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index b04760a5034b..1dae8e40fb25 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -319,7 +319,7 @@ static int mlx4_en_ets_validate(struct mlx4_en_priv *priv, struct ieee_ets *ets) default: en_err(priv, "TC[%d]: Not supported TSA: %d\n", i, ets->tc_tsa[i]); - return -ENOTSUPP; + return -EOPNOTSUPP; } } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index d9c9f86a30df..c4d714fcc7da 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -902,6 +902,7 @@ mlx4_en_set_link_ksettings(struct net_device *dev, struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_ptys_reg ptys_reg; __be32 proto_admin; + u8 cur_autoneg; int ret; u32 ptys_adv = ethtool2ptys_link_modes( @@ -931,10 +932,21 @@ mlx4_en_set_link_ksettings(struct net_device *dev, return 0; } - proto_admin = link_ksettings->base.autoneg == AUTONEG_ENABLE ? - cpu_to_be32(ptys_adv) : - speed_set_ptys_admin(priv, speed, - ptys_reg.eth_proto_cap); + cur_autoneg = ptys_reg.flags & MLX4_PTYS_AN_DISABLE_ADMIN ? + AUTONEG_DISABLE : AUTONEG_ENABLE; + + if (link_ksettings->base.autoneg == AUTONEG_DISABLE) { + proto_admin = speed_set_ptys_admin(priv, speed, + ptys_reg.eth_proto_cap); + if ((be32_to_cpu(proto_admin) & + (MLX4_PROT_MASK(MLX4_1000BASE_CX_SGMII) | + MLX4_PROT_MASK(MLX4_1000BASE_KX))) && + (ptys_reg.flags & MLX4_PTYS_AN_DISABLE_CAP)) + ptys_reg.flags |= MLX4_PTYS_AN_DISABLE_ADMIN; + } else { + proto_admin = cpu_to_be32(ptys_adv); + ptys_reg.flags &= ~MLX4_PTYS_AN_DISABLE_ADMIN; + } proto_admin &= ptys_reg.eth_proto_cap; if (!proto_admin) { @@ -942,7 +954,9 @@ mlx4_en_set_link_ksettings(struct net_device *dev, return -EINVAL; /* nothing to change due to bad input */ } - if (proto_admin == ptys_reg.eth_proto_admin) + if ((proto_admin == ptys_reg.eth_proto_admin) && + ((ptys_reg.flags & MLX4_PTYS_AN_DISABLE_CAP) && + (link_ksettings->base.autoneg == cur_autoneg))) return 0; /* Nothing to change */ en_dbg(DRV, priv, "mlx4_ACCESS_PTYS_REG SET: ptys_reg.eth_proto_admin = 0x%x\n", @@ -1099,7 +1113,7 @@ static int mlx4_en_set_ringparam(struct net_device *dev, memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); new_prof.tx_ring_size = tx_size; new_prof.rx_ring_size = rx_size; - err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); if (err) goto out; @@ -1732,8 +1746,6 @@ static void mlx4_en_get_channels(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); - memset(channel, 0, sizeof(*channel)); - channel->max_rx = MAX_RX_RINGS; channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP; @@ -1752,10 +1764,7 @@ static int mlx4_en_set_channels(struct net_device *dev, int xdp_count; int err = 0; - if (channel->other_count || channel->combined_count || - channel->tx_count > MLX4_EN_MAX_TX_RING_P_UP || - channel->rx_count > MAX_RX_RINGS || - !channel->tx_count || !channel->rx_count) + if (!channel->tx_count || !channel->rx_count) return -EINVAL; tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); @@ -1779,7 +1788,7 @@ static int mlx4_en_set_channels(struct net_device *dev, new_prof.tx_ring_num[TX_XDP] = xdp_count; new_prof.rx_ring_num = channel->rx_count; - err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); if (err) goto out; @@ -1793,7 +1802,7 @@ static int mlx4_en_set_channels(struct net_device *dev, netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); - if (dev->num_tc) + if (netdev_get_num_tc(dev)) mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP); en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]); @@ -1985,7 +1994,7 @@ static int mlx4_en_get_module_info(struct net_device *dev, modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; break; default: - return -ENOSYS; + return -EINVAL; } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index bcd955339058..61420473fe5f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1321,7 +1321,7 @@ static void mlx4_en_tx_timeout(struct net_device *dev) } -static struct rtnl_link_stats64 * +static void mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -1330,8 +1330,6 @@ mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) mlx4_en_fold_software_stats(dev); netdev_stats_to_stats64(stats, &dev->stats); spin_unlock_bh(&priv->stats_lock); - - return stats; } static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) @@ -1384,6 +1382,7 @@ static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) { unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies); + u32 pkt_rate_high, pkt_rate_low; struct mlx4_en_cq *cq; unsigned long packets; unsigned long rate; @@ -1397,37 +1396,40 @@ static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ) return; + pkt_rate_low = READ_ONCE(priv->pkt_rate_low); + pkt_rate_high = READ_ONCE(priv->pkt_rate_high); + for (ring = 0; ring < priv->rx_ring_num; ring++) { rx_packets = READ_ONCE(priv->rx_ring[ring]->packets); rx_bytes = READ_ONCE(priv->rx_ring[ring]->bytes); - rx_pkt_diff = ((unsigned long) (rx_packets - - priv->last_moder_packets[ring])); + rx_pkt_diff = rx_packets - priv->last_moder_packets[ring]; packets = rx_pkt_diff; rate = packets * HZ / period; - avg_pkt_size = packets ? ((unsigned long) (rx_bytes - - priv->last_moder_bytes[ring])) / packets : 0; + avg_pkt_size = packets ? (rx_bytes - + priv->last_moder_bytes[ring]) / packets : 0; /* Apply auto-moderation only when packet rate * exceeds a rate that it matters */ if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) && avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) { - if (rate < priv->pkt_rate_low) + if (rate <= pkt_rate_low) moder_time = priv->rx_usecs_low; - else if (rate > priv->pkt_rate_high) + else if (rate >= pkt_rate_high) moder_time = priv->rx_usecs_high; else - moder_time = (rate - priv->pkt_rate_low) * + moder_time = (rate - pkt_rate_low) * (priv->rx_usecs_high - priv->rx_usecs_low) / - (priv->pkt_rate_high - priv->pkt_rate_low) + + (pkt_rate_high - pkt_rate_low) + priv->rx_usecs_low; } else { moder_time = priv->rx_usecs_low; } - if (moder_time != priv->last_moder_time[ring]) { + cq = priv->rx_cq[ring]; + if (moder_time != priv->last_moder_time[ring] || + cq->moder_cnt != priv->rx_frames) { priv->last_moder_time[ring] = moder_time; - cq = priv->rx_cq[ring]; cq->moder_time = moder_time; cq->moder_cnt = priv->rx_frames; err = mlx4_en_set_cq_moder(priv, cq); @@ -1638,7 +1640,8 @@ int mlx4_en_start_port(struct net_device *dev) /* Configure tx cq's and rings */ for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) { - u8 num_tx_rings_p_up = t == TX ? priv->num_tx_rings_p_up : 1; + u8 num_tx_rings_p_up = t == TX ? + priv->num_tx_rings_p_up : priv->tx_ring_num[t]; for (i = 0; i < priv->tx_ring_num[t]; i++) { /* Configure cq */ @@ -1696,6 +1699,14 @@ int mlx4_en_start_port(struct net_device *dev) priv->port, err); goto tx_err; } + + err = mlx4_SET_PORT_user_mtu(mdev->dev, priv->port, dev->mtu); + if (err) { + en_err(priv, "Failed to pass user MTU(%d) to Firmware for port %d, with error %d\n", + dev->mtu, priv->port, err); + goto tx_err; + } + /* Set default qp number */ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) { @@ -1747,8 +1758,11 @@ int mlx4_en_start_port(struct net_device *dev) /* Process all completions if exist to prevent * the queues freezing if they are full */ - for (i = 0; i < priv->rx_ring_num; i++) + for (i = 0; i < priv->rx_ring_num; i++) { + local_bh_disable(); napi_schedule(&priv->rx_cq[i]->napi); + local_bh_enable(); + } netif_tx_start_all_queues(dev); netif_device_attach(dev); @@ -2038,6 +2052,8 @@ static void mlx4_en_free_resources(struct mlx4_en_priv *priv) if (priv->tx_cq[t] && priv->tx_cq[t][i]) mlx4_en_destroy_cq(priv, &priv->tx_cq[t][i]); } + kfree(priv->tx_ring[t]); + kfree(priv->tx_cq[t]); } for (i = 0; i < priv->rx_ring_num; i++) { @@ -2180,9 +2196,11 @@ static void mlx4_en_update_priv(struct mlx4_en_priv *dst, int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, struct mlx4_en_priv *tmp, - struct mlx4_en_port_profile *prof) + struct mlx4_en_port_profile *prof, + bool carry_xdp_prog) { - int t; + struct bpf_prog *xdp_prog; + int i, t; mlx4_en_copy_priv(tmp, priv, prof); @@ -2196,6 +2214,23 @@ int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, } return -ENOMEM; } + + /* All rx_rings has the same xdp_prog. Pick the first one. */ + xdp_prog = rcu_dereference_protected( + priv->rx_ring[0]->xdp_prog, + lockdep_is_held(&priv->mdev->state_lock)); + + if (xdp_prog && carry_xdp_prog) { + xdp_prog = bpf_prog_add(xdp_prog, tmp->rx_ring_num); + if (IS_ERR(xdp_prog)) { + mlx4_en_free_resources(tmp); + return PTR_ERR(xdp_prog); + } + for (i = 0; i < tmp->rx_ring_num; i++) + rcu_assign_pointer(tmp->rx_ring[i]->xdp_prog, + xdp_prog); + } + return 0; } @@ -2210,7 +2245,6 @@ void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - int t; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); @@ -2244,11 +2278,6 @@ void mlx4_en_destroy_netdev(struct net_device *dev) mlx4_en_free_resources(priv); mutex_unlock(&mdev->state_lock); - for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { - kfree(priv->tx_ring[t]); - kfree(priv->tx_cq[t]); - } - free_netdev(dev); } @@ -2276,7 +2305,7 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) if (priv->tx_ring_num[TX_XDP] && !mlx4_en_check_xdp_mtu(dev, new_mtu)) - return -ENOTSUPP; + return -EOPNOTSUPP; dev->mtu = new_mtu; @@ -2456,12 +2485,8 @@ static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac) { struct mlx4_en_priv *en_priv = netdev_priv(dev); struct mlx4_en_dev *mdev = en_priv->mdev; - u64 mac_u64 = mlx4_mac_to_u64(mac); - - if (is_multicast_ether_addr(mac)) - return -EINVAL; - return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64); + return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac); } static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, @@ -2751,7 +2776,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) en_warn(priv, "Reducing the number of TX rings, to not exceed the max total rings number.\n"); } - err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, false); if (err) { if (prog) bpf_prog_sub(prog, priv->rx_ring_num - 1); @@ -3495,7 +3520,7 @@ int mlx4_en_reset_config(struct net_device *dev, memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); memcpy(&new_prof.hwtstamp_config, &ts_config, sizeof(ts_config)); - err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.h b/drivers/net/ethernet/mellanox/mlx4/en_port.h index 040da4b16b1c..930f961fee42 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.h +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.h @@ -35,7 +35,6 @@ #define _MLX4_EN_PORT_H_ -#define SET_PORT_GEN_ALL_VALID 0x7 #define SET_PORT_PROMISC_SHIFT 31 #define SET_PORT_MC_PROMISC_SHIFT 30 diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 3c37e216bbf3..867292880c07 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -33,6 +33,7 @@ #include <net/busy_poll.h> #include <linux/bpf.h> +#include <linux/bpf_trace.h> #include <linux/mlx4/cq.h> #include <linux/slab.h> #include <linux/mlx4/qp.h> @@ -445,8 +446,14 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; ring->stride = stride; - if (ring->stride <= TXBB_SIZE) + if (ring->stride <= TXBB_SIZE) { + /* Stamp first unused send wqe */ + __be32 *ptr = (__be32 *)ring->buf; + __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT); + *ptr = stamp; + /* Move pointer to start of rx section */ ring->buf += TXBB_SIZE; + } ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride; @@ -508,8 +515,11 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) return; for (ring = 0; ring < priv->rx_ring_num; ring++) { - if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) + if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) { + local_bh_disable(); napi_reschedule(&priv->rx_cq[ring]->napi); + local_bh_enable(); + } } } @@ -594,10 +604,10 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size, DMA_FROM_DEVICE); - /* Save page reference in skb */ - __skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page); - skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size); - skb_frags_rx[nr].page_offset = frags[nr].page_offset; + __skb_fill_page_desc(skb, nr, frags[nr].page, + frags[nr].page_offset, + frag_info->frag_size); + skb->truesize += frag_info->frag_stride; frags[nr].page = NULL; } @@ -700,7 +710,8 @@ static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, do { if (mlx4_en_prepare_rx_desc(priv, ring, ring->prod & ring->size_mask, - GFP_ATOMIC | __GFP_COLD)) + GFP_ATOMIC | __GFP_COLD | + __GFP_MEMALLOC)) break; ring->prod++; } while (--missing); @@ -919,10 +930,12 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud length, cq->ring, &doorbell_pending))) goto consumed; + trace_xdp_exception(dev, xdp_prog, act); goto xdp_drop_no_cnt; /* Drop on xmit failure */ default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: + trace_xdp_exception(dev, xdp_prog, act); case XDP_DROP: ring->xdp_drop++; xdp_drop_no_cnt: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 5886ad78058f..3ed42199d3f1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -710,7 +710,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, u16 rings_p_up = priv->num_tx_rings_p_up; u8 up = 0; - if (dev->num_tc) + if (netdev_get_num_tc(dev)) return skb_tx_hash(dev, skb); if (skb_vlan_tag_present(skb)) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index cd3638e6fe25..07406cf2eacd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -494,7 +494,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_eqe *eqe; - int cqn = -1; + int cqn; int eqes_found = 0; int set_ci = 0; int port; @@ -554,8 +554,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) break; case MLX4_EVENT_TYPE_SRQ_LIMIT: - mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n", - __func__); + mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT. srq_no=0x%x, eq 0x%x\n", + __func__, be32_to_cpu(eqe->event.srq.srqn), + eq->eqn); case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: if (mlx4_is_master(dev)) { /* forward only to slave owning the SRQ */ @@ -570,15 +571,19 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eq->eqn, eq->cons_index, ret); break; } - mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n", - __func__, slave, - be32_to_cpu(eqe->event.srq.srqn), - eqe->type, eqe->subtype); + if (eqe->type == + MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) + mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n", + __func__, slave, + be32_to_cpu(eqe->event.srq.srqn), + eqe->type, eqe->subtype); if (!ret && slave != dev->caps.function) { - mlx4_warn(dev, "%s: sending event %02x(%02x) to slave:%d\n", - __func__, eqe->type, - eqe->subtype, slave); + if (eqe->type == + MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) + mlx4_warn(dev, "%s: sending event %02x(%02x) to slave:%d\n", + __func__, eqe->type, + eqe->subtype, slave); mlx4_slave_event(dev, slave, eqe); break; } @@ -835,13 +840,6 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eq_set_ci(eq, 1); - /* cqn is 24bit wide but is initialized such that its higher bits - * are ones too. Thus, if we got any event, cqn's high bits should be off - * and we need to schedule the tasklet. - */ - if (!(cqn & ~0xffffff)) - tasklet_schedule(&eq->tasklet_ctx.task); - return eqes_found; } @@ -1251,9 +1249,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) mlx4_warn(dev, "Failed adding irq rmap\n"); } #endif - err = mlx4_create_eq(dev, dev->caps.num_cqs - - dev->caps.reserved_cqs + - MLX4_NUM_SPARE_EQE, + err = mlx4_create_eq(dev, dev->quotas.cq + + MLX4_NUM_SPARE_EQE, (dev->flags & MLX4_FLAG_MSI_X) ? i + 1 - !!(i > MLX4_EQ_ASYNC) : 0, eq); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 84bab9f0732e..37e84a59e751 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -672,7 +672,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); func_cap->physical_port = field; if (func_cap->physical_port != gen_or_port) { - err = -ENOSYS; + err = -EINVAL; goto out; } @@ -1875,7 +1875,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION; *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) = - (ilog2(cache_line_size()) - 4) << 5; + ((ilog2(cache_line_size()) - 4) << 5) | (1 << 4); #if defined(__LITTLE_ENDIAN) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); @@ -2436,7 +2436,7 @@ int mlx4_config_dev_retrieval(struct mlx4_dev *dev, #define CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET 4 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CONFIG_DEV)) - return -ENOTSUPP; + return -EOPNOTSUPP; err = mlx4_CONFIG_DEV_get(dev, &config_dev); if (err) @@ -2983,7 +2983,7 @@ static int mlx4_SET_PORT_phv_bit(struct mlx4_dev *dev, u8 port, u8 phv_bit) return PTR_ERR(mailbox); context = mailbox->buf; - context->v_ignore_fcs |= SET_PORT_GEN_PHV_VALID; + context->flags2 |= SET_PORT_GEN_PHV_VALID; if (phv_bit) context->phv_en |= SET_PORT_GEN_PHV_EN; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 2a9dd460a95f..e1f9e7cebf8f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -118,8 +118,13 @@ static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, if (!buf) return -ENOMEM; + if (offset_in_page(buf)) { + dma_free_coherent(dev, PAGE_SIZE << order, + buf, sg_dma_address(mem)); + return -ENOMEM; + } + sg_set_buf(mem, buf, PAGE_SIZE << order); - BUG_ON(mem->offset); sg_dma_len(mem) = PAGE_SIZE << order; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 0e8b7c44931f..e00f627331cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -136,7 +136,7 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable) LIST_HEAD(bond_list); if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) - return -ENOTSUPP; + return -EOPNOTSUPP; ret = mlx4_disable_rx_port_check(dev, enable); if (ret) { @@ -222,6 +222,18 @@ void mlx4_unregister_device(struct mlx4_dev *dev) return; mlx4_stop_catas_poll(dev); + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION && + mlx4_is_slave(dev)) { + /* In mlx4_remove_one on a VF */ + u32 slave_read = + swab32(readl(&mlx4_priv(dev)->mfunc.comm->slave_read)); + + if (mlx4_comm_internal_err(slave_read)) { + mlx4_dbg(dev, "%s: comm channel is down, entering error state.\n", + __func__); + mlx4_enter_error_state(dev->persist); + } + } mutex_lock(&intf_mutex); list_for_each_entry(intf, &intf_list, list) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 5e7840a7a33b..21377c315083 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -42,6 +42,7 @@ #include <linux/io-mapping.h> #include <linux/delay.h> #include <linux/kmod.h> +#include <linux/etherdevice.h> #include <net/devlink.h> #include <linux/mlx4/device.h> @@ -782,6 +783,23 @@ int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) } EXPORT_SYMBOL(mlx4_is_slave_active); +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header) +{ + if (is_multicast_ether_addr(eth_header->eth.dst_mac) || + is_broadcast_ether_addr(eth_header->eth.dst_mac)) { + struct mlx4_net_trans_rule_hw_eth *eth = + (struct mlx4_net_trans_rule_hw_eth *)eth_header; + struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); + bool last_rule = next_rule->size == 0 && next_rule->id == 0 && + next_rule->rsvd == 0; + + if (last_rule) + ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); + } +} +EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio); + static void slave_adjust_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *hca_param) @@ -820,11 +838,9 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) */ if (hca_param.global_caps) { mlx4_err(dev, "Unknown hca global capabilities\n"); - return -ENOSYS; + return -EINVAL; } - mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; - dev->caps.hca_core_clock = hca_param.hca_core_clock; memset(&dev_cap, 0, sizeof(dev_cap)); @@ -878,7 +894,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) PF_CONTEXT_BEHAVIOUR_MASK) { mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n", func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK); - return -ENOSYS; + return -EINVAL; } dev->caps.num_ports = func_cap.num_ports; @@ -1429,7 +1445,7 @@ int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p) int err; if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) - return -ENOTSUPP; + return -EOPNOTSUPP; mutex_lock(&priv->bond_mutex); @@ -1866,7 +1882,7 @@ int mlx4_get_internal_clock_params(struct mlx4_dev *dev, struct mlx4_priv *priv = mlx4_priv(dev); if (mlx4_is_slave(dev)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (!params) return -EINVAL; @@ -2366,7 +2382,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) /* Query CONFIG_DEV parameters */ err = mlx4_config_dev_retrieval(dev, ¶ms); - if (err && err != -ENOTSUPP) { + if (err && err != -EOPNOTSUPP) { mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n"); } else if (!err) { dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1; @@ -3474,7 +3490,7 @@ slave_start: mlx4_enable_msi_x(dev); if ((mlx4_is_mfunc(dev)) && !(dev->flags & MLX4_FLAG_MSI_X)) { - err = -ENOSYS; + err = -EOPNOTSUPP; mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n"); goto err_free_eq; } @@ -3485,6 +3501,8 @@ slave_start: goto err_disable_msix; } + mlx4_init_quotas(dev); + err = mlx4_setup_hca(dev); if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && !mlx4_is_mfunc(dev)) { @@ -3497,7 +3515,6 @@ slave_start: if (err) goto err_steer; - mlx4_init_quotas(dev); /* When PF resources are ready arm its comm channel to enable * getting commands */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 88ee7d8a5923..b4f1bc56cc68 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -487,6 +487,7 @@ struct mlx4_slave_state { bool vst_qinq_supported; u8 function; dma_addr_t vhcr_dma; + u16 user_mtu[MLX4_MAX_PORTS + 1]; u16 mtu[MLX4_MAX_PORTS + 1]; __be32 ib_cap_mask[MLX4_MAX_PORTS + 1]; struct mlx4_slave_eqe eq[MLX4_MFUNC_MAX_EQES]; @@ -590,6 +591,7 @@ struct mlx4_mfunc_master_ctx { struct mlx4_master_qp0_state qp0_state[MLX4_MAX_PORTS + 1]; int init_port_ref[MLX4_MAX_PORTS + 1]; u16 max_mtu[MLX4_MAX_PORTS + 1]; + u16 max_user_mtu[MLX4_MAX_PORTS + 1]; u8 pptx; u8 pprx; int disable_mcast_ref[MLX4_MAX_PORTS + 1]; @@ -774,7 +776,9 @@ struct mlx4_vlan_table { int max; }; -#define SET_PORT_GEN_ALL_VALID 0x7 +#define SET_PORT_GEN_ALL_VALID (MLX4_FLAG_V_MTU_MASK | \ + MLX4_FLAG_V_PPRX_MASK | \ + MLX4_FLAG_V_PPTX_MASK) #define SET_PORT_PROMISC_SHIFT 31 #define SET_PORT_MC_PROMISC_SHIFT 30 @@ -787,7 +791,7 @@ enum { struct mlx4_set_port_general_context { u16 reserved1; - u8 v_ignore_fcs; + u8 flags2; u8 flags; union { u8 ignore_fcs; @@ -803,7 +807,8 @@ struct mlx4_set_port_general_context { u16 reserved4; u32 reserved5; u8 phv_en; - u8 reserved6[3]; + u8 reserved6[5]; + __be16 user_mtu; }; struct mlx4_set_port_rqp_calc_context { @@ -1220,6 +1225,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type); void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); void mlx4_enter_error_state(struct mlx4_dev_persistent *persist); +int mlx4_comm_internal_err(u32 slave_read); int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, enum mlx4_port_type *type); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index ba1c6cd0cc79..3629ce11a68b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -102,7 +102,8 @@ /* Use the maximum between 16384 and a single page */ #define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384) -#define MLX4_EN_ALLOC_PREFER_ORDER PAGE_ALLOC_COSTLY_ORDER +#define MLX4_EN_ALLOC_PREFER_ORDER min_t(int, get_order(32768), \ + PAGE_ALLOC_COSTLY_ORDER) /* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU * and 4K allocations) */ @@ -424,12 +425,11 @@ struct mlx4_en_dev { u32 priv_pdn; spinlock_t uar_lock; u8 mac_removed[MLX4_MAX_PORTS + 1]; - rwlock_t clock_lock; u32 nominal_c_mult; struct cyclecounter cycles; + seqlock_t clock_lock; struct timecounter clock; unsigned long last_overflow_check; - unsigned long overflow_period; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_info; struct notifier_block nb; @@ -679,7 +679,8 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, struct mlx4_en_priv *tmp, - struct mlx4_en_port_profile *prof); + struct mlx4_en_port_profile *prof, + bool carry_xdp_prog); void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv, struct mlx4_en_priv *tmp); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 395b5463cfd9..db65f72879e9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -823,7 +823,7 @@ int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type, !(dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)) || (type == MLX4_MW_TYPE_2 && !(dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN))) - return -ENOTSUPP; + return -EOPNOTSUPP; index = mlx4_mpt_reserve(dev); if (index == -1) diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index b656dd5772e5..4e36e287d605 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -50,7 +50,11 @@ #define MLX4_STATS_ERROR_COUNTERS_MASK 0x1ffc30ULL #define MLX4_STATS_PORT_COUNTERS_MASK 0x1fe00000ULL -#define MLX4_FLAG_V_IGNORE_FCS_MASK 0x2 +#define MLX4_FLAG2_V_IGNORE_FCS_MASK BIT(1) +#define MLX4_FLAG2_V_USER_MTU_MASK BIT(5) +#define MLX4_FLAG_V_MTU_MASK BIT(0) +#define MLX4_FLAG_V_PPRX_MASK BIT(1) +#define MLX4_FLAG_V_PPTX_MASK BIT(2) #define MLX4_IGNORE_FCS_MASK 0x1 #define MLX4_TC_MAX_NUMBER 8 @@ -1239,13 +1243,96 @@ void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave) return; } +static void +mlx4_en_set_port_mtu(struct mlx4_dev *dev, int slave, int port, + struct mlx4_set_port_general_context *gen_context) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master; + struct mlx4_slave_state *slave_st = &master->slave_state[slave]; + u16 mtu, prev_mtu; + + /* Mtu is configured as the max USER_MTU among all + * the functions on the port. + */ + mtu = be16_to_cpu(gen_context->mtu); + mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port] + + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); + prev_mtu = slave_st->mtu[port]; + slave_st->mtu[port] = mtu; + if (mtu > master->max_mtu[port]) + master->max_mtu[port] = mtu; + if (mtu < prev_mtu && prev_mtu == master->max_mtu[port]) { + int i; + + slave_st->mtu[port] = mtu; + master->max_mtu[port] = mtu; + for (i = 0; i < dev->num_slaves; i++) + master->max_mtu[port] = + max_t(u16, master->max_mtu[port], + master->slave_state[i].mtu[port]); + } + gen_context->mtu = cpu_to_be16(master->max_mtu[port]); +} + +static void +mlx4_en_set_port_user_mtu(struct mlx4_dev *dev, int slave, int port, + struct mlx4_set_port_general_context *gen_context) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master; + struct mlx4_slave_state *slave_st = &master->slave_state[slave]; + u16 user_mtu, prev_user_mtu; + + /* User Mtu is configured as the max USER_MTU among all + * the functions on the port. + */ + user_mtu = be16_to_cpu(gen_context->user_mtu); + user_mtu = min_t(int, user_mtu, dev->caps.eth_mtu_cap[port]); + prev_user_mtu = slave_st->user_mtu[port]; + slave_st->user_mtu[port] = user_mtu; + if (user_mtu > master->max_user_mtu[port]) + master->max_user_mtu[port] = user_mtu; + if (user_mtu < prev_user_mtu && + prev_user_mtu == master->max_user_mtu[port]) { + int i; + + slave_st->user_mtu[port] = user_mtu; + master->max_user_mtu[port] = user_mtu; + for (i = 0; i < dev->num_slaves; i++) + master->max_user_mtu[port] = + max_t(u16, master->max_user_mtu[port], + master->slave_state[i].user_mtu[port]); + } + gen_context->user_mtu = cpu_to_be16(master->max_user_mtu[port]); +} + +static void +mlx4_en_set_port_global_pause(struct mlx4_dev *dev, int slave, + struct mlx4_set_port_general_context *gen_context) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master; + + /* Slave cannot change Global Pause configuration */ + if (slave != mlx4_master_func_num(dev) && + (gen_context->pptx != master->pptx || + gen_context->pprx != master->pprx)) { + gen_context->pptx = master->pptx; + gen_context->pprx = master->pprx; + mlx4_warn(dev, "denying Global Pause change for slave:%d\n", + slave); + } else { + master->pptx = gen_context->pptx; + master->pprx = gen_context->pprx; + } +} + static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, u8 op_mod, struct mlx4_cmd_mailbox *inbox) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_port_info *port_info; - struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master; - struct mlx4_slave_state *slave_st = &master->slave_state[slave]; struct mlx4_set_port_rqp_calc_context *qpn_context; struct mlx4_set_port_general_context *gen_context; struct mlx4_roce_gid_entry *gid_entry_tbl, *gid_entry_mbox, *gid_entry_mb1; @@ -1256,7 +1343,6 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, int base; u32 in_modifier; u32 promisc; - u16 mtu, prev_mtu; int err; int i, j; int offset; @@ -1269,7 +1355,9 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, is_eth = op_mod; port_info = &priv->port[port]; - /* Slaves cannot perform SET_PORT operations except changing MTU */ + /* Slaves cannot perform SET_PORT operations, + * except for changing MTU and USER_MTU. + */ if (is_eth) { if (slave != dev->caps.function && in_modifier != MLX4_SET_PORT_GENERAL && @@ -1297,40 +1385,20 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, break; case MLX4_SET_PORT_GENERAL: gen_context = inbox->buf; - /* Mtu is configured as the max MTU among all the - * the functions on the port. */ - mtu = be16_to_cpu(gen_context->mtu); - mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port] + - ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); - prev_mtu = slave_st->mtu[port]; - slave_st->mtu[port] = mtu; - if (mtu > master->max_mtu[port]) - master->max_mtu[port] = mtu; - if (mtu < prev_mtu && prev_mtu == - master->max_mtu[port]) { - slave_st->mtu[port] = mtu; - master->max_mtu[port] = mtu; - for (i = 0; i < dev->num_slaves; i++) { - master->max_mtu[port] = - max(master->max_mtu[port], - master->slave_state[i].mtu[port]); - } - } - gen_context->mtu = cpu_to_be16(master->max_mtu[port]); - /* Slave cannot change Global Pause configuration */ - if (slave != mlx4_master_func_num(dev) && - ((gen_context->pptx != master->pptx) || - (gen_context->pprx != master->pprx))) { - gen_context->pptx = master->pptx; - gen_context->pprx = master->pprx; - mlx4_warn(dev, - "denying Global Pause change for slave:%d\n", - slave); - } else { - master->pptx = gen_context->pptx; - master->pprx = gen_context->pprx; - } + if (gen_context->flags & MLX4_FLAG_V_MTU_MASK) + mlx4_en_set_port_mtu(dev, slave, port, + gen_context); + + if (gen_context->flags2 & MLX4_FLAG2_V_USER_MTU_MASK) + mlx4_en_set_port_user_mtu(dev, slave, port, + gen_context); + + if (gen_context->flags & + (MLX4_FLAG_V_PPRX_MASK | MLX4_FLAG_V_PPTX_MASK)) + mlx4_en_set_port_global_pause(dev, slave, + gen_context); + break; case MLX4_SET_PORT_GID_TABLE: /* change to MULTIPLE entries: number of guest's gids @@ -1608,6 +1676,30 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, } EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc); +int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + context->flags2 |= MLX4_FLAG2_V_USER_MTU_MASK; + context->user_mtu = cpu_to_be16(user_mtu); + + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_user_mtu); + int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, u8 ignore_fcs_value) { struct mlx4_cmd_mailbox *mailbox; @@ -1619,7 +1711,7 @@ int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, u8 ignore_fcs_value) if (IS_ERR(mailbox)) return PTR_ERR(mailbox); context = mailbox->buf; - context->v_ignore_fcs |= MLX4_FLAG_V_IGNORE_FCS_MASK; + context->flags2 |= MLX4_FLAG2_V_IGNORE_FCS_MASK; if (ignore_fcs_value) context->ignore_fcs |= MLX4_IGNORE_FCS_MASK; else diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index d1cd9c32a9ae..2d6abd4662b1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -447,7 +447,7 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { mlx4_warn(dev, "Trying to set src check LB, but it isn't supported\n"); - err = -ENOTSUPP; + err = -EOPNOTSUPP; goto out; } pri_addr_path_mask |= diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index c548beaaf910..d8d5d161b8c7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -77,6 +77,7 @@ struct res_common { int from_state; int to_state; int removing; + const char *func_name; }; enum { @@ -236,8 +237,8 @@ static void *res_tracker_lookup(struct rb_root *root, u64 res_id) struct rb_node *node = root->rb_node; while (node) { - struct res_common *res = container_of(node, struct res_common, - node); + struct res_common *res = rb_entry(node, struct res_common, + node); if (res_id < res->res_id) node = node->rb_left; @@ -255,8 +256,8 @@ static int res_tracker_insert(struct rb_root *root, struct res_common *res) /* Figure out where to put new node */ while (*new) { - struct res_common *this = container_of(*new, struct res_common, - node); + struct res_common *this = rb_entry(*new, struct res_common, + node); parent = *new; if (res->res_id < this->res_id) @@ -837,6 +838,36 @@ static int mpt_mask(struct mlx4_dev *dev) return dev->caps.num_mpts - 1; } +static const char *mlx4_resource_type_to_str(enum mlx4_resource t) +{ + switch (t) { + case RES_QP: + return "QP"; + case RES_CQ: + return "CQ"; + case RES_SRQ: + return "SRQ"; + case RES_XRCD: + return "XRCD"; + case RES_MPT: + return "MPT"; + case RES_MTT: + return "MTT"; + case RES_MAC: + return "MAC"; + case RES_VLAN: + return "VLAN"; + case RES_COUNTER: + return "COUNTER"; + case RES_FS_RULE: + return "FS_RULE"; + case RES_EQ: + return "EQ"; + default: + return "INVALID RESOURCE"; + } +} + static void *find_res(struct mlx4_dev *dev, u64 res_id, enum mlx4_resource type) { @@ -846,9 +877,9 @@ static void *find_res(struct mlx4_dev *dev, u64 res_id, res_id); } -static int get_res(struct mlx4_dev *dev, int slave, u64 res_id, - enum mlx4_resource type, - void *res) +static int _get_res(struct mlx4_dev *dev, int slave, u64 res_id, + enum mlx4_resource type, + void *res, const char *func_name) { struct res_common *r; int err = 0; @@ -861,6 +892,10 @@ static int get_res(struct mlx4_dev *dev, int slave, u64 res_id, } if (r->state == RES_ANY_BUSY) { + mlx4_warn(dev, + "%s(%d) trying to get resource %llx of type %s, but it's already taken by %s\n", + func_name, slave, res_id, mlx4_resource_type_to_str(type), + r->func_name); err = -EBUSY; goto exit; } @@ -872,6 +907,7 @@ static int get_res(struct mlx4_dev *dev, int slave, u64 res_id, r->from_state = r->state; r->state = RES_ANY_BUSY; + r->func_name = func_name; if (res) *((struct res_common **)res) = r; @@ -881,6 +917,9 @@ exit: return err; } +#define get_res(dev, slave, res_id, type, res) \ + _get_res((dev), (slave), (res_id), (type), (res), __func__) + int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, enum mlx4_resource type, u64 res_id, int *slave) @@ -911,8 +950,10 @@ static void put_res(struct mlx4_dev *dev, int slave, u64 res_id, spin_lock_irq(mlx4_tlock(dev)); r = find_res(dev, res_id, type); - if (r) + if (r) { r->state = r->from_state; + r->func_name = ""; + } spin_unlock_irq(mlx4_tlock(dev)); } @@ -1396,7 +1437,7 @@ static int remove_ok(struct res_common *res, enum mlx4_resource type, int extra) case RES_MTT: return remove_mtt_ok((struct res_mtt *)res, extra); case RES_MAC: - return -ENOSYS; + return -EOPNOTSUPP; case RES_EQ: return remove_eq_ok((struct res_eq *)res); case RES_COUNTER: @@ -2980,6 +3021,9 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, put_res(dev, slave, srqn, RES_SRQ); qp->srq = srq; } + + /* Save param3 for dynamic changes from VST back to VGT */ + qp->param3 = qpc->param3; put_res(dev, slave, rcqn, RES_CQ); put_res(dev, slave, mtt_base, RES_MTT); res_end_move(dev, slave, RES_QP, qpn); @@ -3772,7 +3816,6 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, int qpn = vhcr->in_modifier & 0x7fffff; struct res_qp *qp; u8 orig_sched_queue; - __be32 orig_param3 = qpc->param3; u8 orig_vlan_control = qpc->pri_path.vlan_control; u8 orig_fvl_rx = qpc->pri_path.fvl_rx; u8 orig_pri_path_fl = qpc->pri_path.fl; @@ -3814,7 +3857,6 @@ out: */ if (!err) { qp->sched_queue = orig_sched_queue; - qp->param3 = orig_param3; qp->vlan_control = orig_vlan_control; qp->fvl_rx = orig_fvl_rx; qp->pri_path_fl = orig_pri_path_fl; @@ -4164,22 +4206,6 @@ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header, return 0; } -static void handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, - struct _rule_hw *eth_header) -{ - if (is_multicast_ether_addr(eth_header->eth.dst_mac) || - is_broadcast_ether_addr(eth_header->eth.dst_mac)) { - struct mlx4_net_trans_rule_hw_eth *eth = - (struct mlx4_net_trans_rule_hw_eth *)eth_header; - struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); - bool last_rule = next_rule->size == 0 && next_rule->id == 0 && - next_rule->rsvd == 0; - - if (last_rule) - ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); - } -} - /* * In case of missing eth header, append eth header with a MAC address * assigned to the VF. @@ -4271,7 +4297,7 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { mlx4_warn(dev, "Src check LB for slave %d isn't supported\n", slave); - return -ENOTSUPP; + return -EOPNOTSUPP; } /* Just change the smac for the QP */ @@ -4363,10 +4389,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); if (header_id == MLX4_NET_TRANS_RULE_ID_ETH) - handle_eth_header_mcast_prio(ctrl, rule_header); - - if (slave == dev->caps.function) - goto execute; + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: @@ -4394,7 +4417,6 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, goto err_put_qp; } -execute: err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, vhcr->in_modifier, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, @@ -4473,6 +4495,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct res_qp *rqp; struct res_fs_rule *rrule; u64 mirr_reg_id; + int qpn; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) @@ -4489,10 +4512,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, } mirr_reg_id = rrule->mirr_rule_id; kfree(rrule->mirr_mbox); + qpn = rrule->qpn; /* Release the rule form busy state before removal */ put_res(dev, slave, vhcr->in_param, RES_FS_RULE); - err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp); + err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) return err; @@ -4517,7 +4541,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, if (!err) atomic_dec(&rqp->ref_count); out: - put_res(dev, slave, rrule->qpn, RES_QP); + put_res(dev, slave, qpn, RES_QP); return err; } |