summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-09-06 15:42:04 -0700
committerDavid S. Miller <davem@davemloft.net>2018-09-06 15:42:04 -0700
commitddc9cc0131619382678771b0b85632f28bcf2521 (patch)
tree4116a4ee819ecf54ac8c6c029c5353c7efcfc143
parent2002bc328ca3e23b7b31849d823b67d4d7c1fd41 (diff)
parentfe1dc069990c1f290ef6b99adb46332c03258f38 (diff)
Merge tag 'mlx5e-updates-2018-09-05' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says: ==================== mlx5e-updates-2018-09-05 This series provides updates to mlx5 ethernet driver. 1) Starting with a four patches series to optimize flow counters updates, From Vlad Buslov: ============================================== By default mlx5 driver updates cached counters each second. Update function consumes noticeable amount of CPU resources. The goal of this patch series is to optimize update function. Investigation revealed following bottlenecks in fs counters implementation: 1) Update code(scheduled each second) iterates over all counters twice. (first for finding and deleting counters that are marked for deletion, second iteration is for actually updating the counters) 2) Counters are stored in rb tree. Linear iteration over all rb tree elements(rb_next in profiling data) consumed ~65% of time spent in update function. Following optimizations were implemented: 1) Instead of just marking counters for deletion, store them in standalone list. This removes first iteration over whole counters tree. 2) Store counters in sorted list to optimize traversing them and remove calls to rb_next. First implementation of these changes caused degradation of performance, instead of improving it. Investigation revealed that there first cache line of struct mlx5_fc is full and adding anything to it causes amount of cache misses to double. To mitigate that, following refactorings were implemented: - Change 'addlist' list type from double linked to single linked. This allowes to get free space for one additional pointer that is used to store deletion list(optimization 1) - Substitute rb tree with idr. Idr is non-intrusive data structure and doesn't require adding any new members to struct mlx5_fc. Use free space that became available for double linked sorted list that is used for traversing all counters. (optimization 2) Described changes reduced CPU time spent in mlx5_fc_stats_work from 70% to 44%. (global perf profile mode) ============================================ The rest of the series are misc updates: 2) From Kamal, Move mlx5e_priv_flags into en_ethtool.c, to avoid a compilation warning. 3) From Roi Dayan, Move Q counters allocation and drop RQ to init_rx profile function to avoid allocating Q counters when not required. 4) From Shay Agroskin, Replace PTP clock lock from RW lock to seq lock. Almost double the packet rate when timestamping is active on multiple TX queues. 5) From: Natali Shechtman, set ECN for received packets using CQE indication. 6) From: Alaa Hleihel, don't set CHECKSUM_COMPLETE on SCTP packets. CHECKSUM_COMPLETE is not applicable to SCTP protocol. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c47
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c184
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c34
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h8
-rw-r--r--include/linux/mlx5/driver.h11
13 files changed, 235 insertions, 153 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index db2cfcd21d43..01a967e717e7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -204,13 +204,6 @@ struct mlx5e_umr_wqe {
extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
-static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
- "rx_cqe_moder",
- "tx_cqe_moder",
- "rx_cqe_compress",
- "rx_striding_rq",
-};
-
enum mlx5e_priv_flag {
MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1),
@@ -905,6 +898,12 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb);
/* common netdev helpers */
+void mlx5e_create_q_counters(struct mlx5e_priv *priv);
+void mlx5e_destroy_q_counters(struct mlx5e_priv *priv);
+int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
+ struct mlx5e_rq *drop_rq);
+void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq);
+
int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 98dd3e0ada72..8cd338ceb237 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -135,6 +135,13 @@ void mlx5e_build_ptys2ethtool_map(void)
ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT);
}
+static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
+ "rx_cqe_moder",
+ "tx_cqe_moder",
+ "rx_cqe_compress",
+ "rx_striding_rq",
+};
+
int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
{
int i, num_stats = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5a7939e70190..d14c4051edd8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3049,8 +3049,8 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev,
return mlx5e_alloc_cq_common(mdev, param, cq);
}
-static int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
- struct mlx5e_rq *drop_rq)
+int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
+ struct mlx5e_rq *drop_rq)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_cq_param cq_param = {};
@@ -3094,7 +3094,7 @@ err_free_cq:
return err;
}
-static void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
+void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
{
mlx5e_destroy_rq(drop_rq);
mlx5e_free_rq(drop_rq);
@@ -4726,7 +4726,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
mlx5e_tls_build_netdev(priv);
}
-static void mlx5e_create_q_counters(struct mlx5e_priv *priv)
+void mlx5e_create_q_counters(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
int err;
@@ -4744,7 +4744,7 @@ static void mlx5e_create_q_counters(struct mlx5e_priv *priv)
}
}
-static void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
+void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
{
if (priv->q_counter)
mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
@@ -4783,9 +4783,17 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
struct mlx5_core_dev *mdev = priv->mdev;
int err;
+ mlx5e_create_q_counters(priv);
+
+ err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+ if (err) {
+ mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+ goto err_destroy_q_counters;
+ }
+
err = mlx5e_create_indirect_rqt(priv);
if (err)
- return err;
+ goto err_close_drop_rq;
err = mlx5e_create_direct_rqts(priv);
if (err)
@@ -4821,6 +4829,10 @@ err_destroy_direct_rqts:
mlx5e_destroy_direct_rqts(priv);
err_destroy_indirect_rqts:
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+err_close_drop_rq:
+ mlx5e_close_drop_rq(&priv->drop_rq);
+err_destroy_q_counters:
+ mlx5e_destroy_q_counters(priv);
return err;
}
@@ -4832,6 +4844,8 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
mlx5e_destroy_indirect_tirs(priv);
mlx5e_destroy_direct_rqts(priv);
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5e_close_drop_rq(&priv->drop_rq);
+ mlx5e_destroy_q_counters(priv);
}
static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
@@ -4975,7 +4989,6 @@ err_cleanup_nic:
int mlx5e_attach_netdev(struct mlx5e_priv *priv)
{
- struct mlx5_core_dev *mdev = priv->mdev;
const struct mlx5e_profile *profile;
int err;
@@ -4986,28 +4999,16 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
if (err)
goto out;
- mlx5e_create_q_counters(priv);
-
- err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
- if (err) {
- mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
- goto err_destroy_q_counters;
- }
-
err = profile->init_rx(priv);
if (err)
- goto err_close_drop_rq;
+ goto err_cleanup_tx;
if (profile->enable)
profile->enable(priv);
return 0;
-err_close_drop_rq:
- mlx5e_close_drop_rq(&priv->drop_rq);
-
-err_destroy_q_counters:
- mlx5e_destroy_q_counters(priv);
+err_cleanup_tx:
profile->cleanup_tx(priv);
out:
@@ -5025,8 +5026,6 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv)
flush_workqueue(priv->wq);
profile->cleanup_rx(priv);
- mlx5e_close_drop_rq(&priv->drop_rq);
- mlx5e_destroy_q_counters(priv);
profile->cleanup_tx(priv);
cancel_delayed_work_sync(&priv->update_stats_work);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index c9cc9747d21d..f6eead24931f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -999,14 +999,21 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5_flow_handle *flow_rule;
int err;
mlx5e_init_l2_addr(priv);
+ err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+ if (err) {
+ mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+ return err;
+ }
+
err = mlx5e_create_direct_rqts(priv);
if (err)
- return err;
+ goto err_close_drop_rq;
err = mlx5e_create_direct_tirs(priv);
if (err)
@@ -1027,6 +1034,8 @@ err_destroy_direct_tirs:
mlx5e_destroy_direct_tirs(priv);
err_destroy_direct_rqts:
mlx5e_destroy_direct_rqts(priv);
+err_close_drop_rq:
+ mlx5e_close_drop_rq(&priv->drop_rq);
return err;
}
@@ -1037,6 +1046,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
mlx5_del_flow_rules(rpriv->vport_rx_rule);
mlx5e_destroy_direct_tirs(priv);
mlx5e_destroy_direct_rqts(priv);
+ mlx5e_close_drop_rq(&priv->drop_rq);
}
static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 6a959e8b1f9d..424bc89184c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -37,6 +37,7 @@
#include <net/busy_poll.h>
#include <net/ip6_checksum.h>
#include <net/page_pool.h>
+#include <net/inet_ecn.h>
#include "en.h"
#include "en_tc.h"
#include "eswitch.h"
@@ -690,12 +691,29 @@ static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht);
}
-static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth)
+static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
+ __be16 *proto)
{
- __be16 ethertype = ((struct ethhdr *)skb->data)->h_proto;
+ *proto = ((struct ethhdr *)skb->data)->h_proto;
+ *proto = __vlan_get_protocol(skb, *proto, network_depth);
+ return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6));
+}
+
+static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
+{
+ int network_depth = 0;
+ __be16 proto;
+ void *ip;
+ int rc;
+
+ if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto)))
+ return;
+
+ ip = skb->data + network_depth;
+ rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) :
+ IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip));
- ethertype = __vlan_get_protocol(skb, ethertype, network_depth);
- return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6));
+ rq->stats->ecn_mark += !!rc;
}
static __be32 mlx5e_get_fcs(struct sk_buff *skb)
@@ -737,6 +755,14 @@ static __be32 mlx5e_get_fcs(struct sk_buff *skb)
return fcs_bytes;
}
+static u8 get_ip_proto(struct sk_buff *skb, __be16 proto)
+{
+ void *ip_p = skb->data + sizeof(struct ethhdr);
+
+ return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
+ ((struct ipv6hdr *)ip_p)->nexthdr;
+}
+
static inline void mlx5e_handle_csum(struct net_device *netdev,
struct mlx5_cqe64 *cqe,
struct mlx5e_rq *rq,
@@ -745,6 +771,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
{
struct mlx5e_rq_stats *stats = rq->stats;
int network_depth = 0;
+ __be16 proto;
if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
goto csum_none;
@@ -755,7 +782,10 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
return;
}
- if (likely(is_last_ethertype_ip(skb, &network_depth))) {
+ if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
+ if (unlikely(get_ip_proto(skb, proto) == IPPROTO_SCTP))
+ goto csum_unnecessary;
+
skb->ip_summed = CHECKSUM_COMPLETE;
skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
if (network_depth > ETH_HLEN)
@@ -773,6 +803,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
return;
}
+csum_unnecessary:
if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
(cqe->hds_ip_ext & CQE_L4_OK))) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -790,6 +821,8 @@ csum_none:
stats->csum_none++;
}
+#define MLX5E_CE_BIT_MASK 0x80
+
static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
u32 cqe_bcnt,
struct mlx5e_rq *rq,
@@ -834,6 +867,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg);
+ /* checking CE bit in cqe - MSB in ml_path field */
+ if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK))
+ mlx5e_enable_ecn(rq, skb);
+
skb->protocol = eth_type_trans(skb, netdev);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 6839481f7697..90c7607b1f44 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -53,6 +53,7 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
@@ -144,6 +145,7 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->rx_bytes += rq_stats->bytes;
s->rx_lro_packets += rq_stats->lro_packets;
s->rx_lro_bytes += rq_stats->lro_bytes;
+ s->rx_ecn_mark += rq_stats->ecn_mark;
s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
s->rx_csum_none += rq_stats->csum_none;
s->rx_csum_complete += rq_stats->csum_complete;
@@ -1144,6 +1146,7 @@ static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index a4c035aedd46..a5fb3dc27f50 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -66,6 +66,7 @@ struct mlx5e_sw_stats {
u64 tx_nop;
u64 rx_lro_packets;
u64 rx_lro_bytes;
+ u64 rx_ecn_mark;
u64 rx_removed_vlan_packets;
u64 rx_csum_unnecessary;
u64 rx_csum_none;
@@ -184,6 +185,7 @@ struct mlx5e_rq_stats {
u64 csum_none;
u64 lro_packets;
u64 lro_bytes;
+ u64 ecn_mark;
u64 removed_vlan_packets;
u64 xdp_drop;
u64 xdp_redirect;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 32070e5d993d..a06f83c0c2b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -36,6 +36,7 @@
#include <linux/refcount.h>
#include <linux/mlx5/fs.h>
#include <linux/rhashtable.h>
+#include <linux/llist.h>
enum fs_node_type {
FS_TYPE_NAMESPACE,
@@ -138,8 +139,9 @@ struct mlx5_fc_cache {
};
struct mlx5_fc {
- struct rb_node node;
struct list_head list;
+ struct llist_node addlist;
+ struct llist_node dellist;
/* last{packets,bytes} members are used when calculating the delta since
* last reading
@@ -148,7 +150,6 @@ struct mlx5_fc {
u64 lastbytes;
u32 id;
- bool deleted;
bool aging;
struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 58af6be13dfa..09206c4acd9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -52,11 +52,13 @@
* access to counter list:
* - create (user context)
* - mlx5_fc_create() only adds to an addlist to be used by
- * mlx5_fc_stats_query_work(). addlist is protected by a spinlock.
+ * mlx5_fc_stats_query_work(). addlist is a lockless single linked list
+ * that doesn't require any additional synchronization when adding single
+ * node.
* - spawn thread to do the actual destroy
*
* - destroy (user context)
- * - mark a counter as deleted
+ * - add a counter to lockless dellist
* - spawn thread to do the actual del
*
* - dump (user context)
@@ -71,36 +73,43 @@
* elapsed, the thread will actually query the hardware.
*/
-static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter)
+static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev,
+ u32 id)
{
- struct rb_node **new = &root->rb_node;
- struct rb_node *parent = NULL;
-
- while (*new) {
- struct mlx5_fc *this = rb_entry(*new, struct mlx5_fc, node);
- int result = counter->id - this->id;
-
- parent = *new;
- if (result < 0)
- new = &((*new)->rb_left);
- else
- new = &((*new)->rb_right);
- }
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ unsigned long next_id = (unsigned long)id + 1;
+ struct mlx5_fc *counter;
+
+ rcu_read_lock();
+ /* skip counters that are in idr, but not yet in counters list */
+ while ((counter = idr_get_next_ul(&fc_stats->counters_idr,
+ &next_id)) != NULL &&
+ list_empty(&counter->list))
+ next_id++;
+ rcu_read_unlock();
+
+ return counter ? &counter->list : &fc_stats->counters;
+}
- /* Add new node and rebalance tree. */
- rb_link_node(&counter->node, parent, new);
- rb_insert_color(&counter->node, root);
+static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev,
+ struct mlx5_fc *counter)
+{
+ struct list_head *next = mlx5_fc_counters_lookup_next(dev, counter->id);
+
+ list_add_tail(&counter->list, next);
}
-/* The function returns the last node that was queried so the caller
+/* The function returns the last counter that was queried so the caller
* function can continue calling it till all counters are queried.
*/
-static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
+static struct mlx5_fc *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
struct mlx5_fc *first,
u32 last_id)
{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ struct mlx5_fc *counter = NULL;
struct mlx5_cmd_fc_bulk *b;
- struct rb_node *node = NULL;
+ bool more = false;
u32 afirst_id;
int num;
int err;
@@ -130,14 +139,16 @@ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
goto out;
}
- for (node = &first->node; node; node = rb_next(node)) {
- struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node);
+ counter = first;
+ list_for_each_entry_from(counter, &fc_stats->counters, list) {
struct mlx5_fc_cache *c = &counter->cache;
u64 packets;
u64 bytes;
- if (counter->id > last_id)
+ if (counter->id > last_id) {
+ more = true;
break;
+ }
mlx5_cmd_fc_bulk_get(dev, b,
counter->id, &packets, &bytes);
@@ -153,7 +164,14 @@ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
out:
mlx5_cmd_fc_bulk_free(b);
- return node;
+ return more ? counter : NULL;
+}
+
+static void mlx5_free_fc(struct mlx5_core_dev *dev,
+ struct mlx5_fc *counter)
+{
+ mlx5_cmd_fc_free(dev, counter->id);
+ kfree(counter);
}
static void mlx5_fc_stats_work(struct work_struct *work)
@@ -161,52 +179,33 @@ static void mlx5_fc_stats_work(struct work_struct *work)
struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
priv.fc_stats.work.work);
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ struct llist_node *tmplist = llist_del_all(&fc_stats->addlist);
+ struct mlx5_fc *counter = NULL, *last = NULL, *tmp;
unsigned long now = jiffies;
- struct mlx5_fc *counter = NULL;
- struct mlx5_fc *last = NULL;
- struct rb_node *node;
- LIST_HEAD(tmplist);
-
- spin_lock(&fc_stats->addlist_lock);
- list_splice_tail_init(&fc_stats->addlist, &tmplist);
-
- if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters))
+ if (tmplist || !list_empty(&fc_stats->counters))
queue_delayed_work(fc_stats->wq, &fc_stats->work,
fc_stats->sampling_interval);
- spin_unlock(&fc_stats->addlist_lock);
-
- list_for_each_entry(counter, &tmplist, list)
- mlx5_fc_stats_insert(&fc_stats->counters, counter);
-
- node = rb_first(&fc_stats->counters);
- while (node) {
- counter = rb_entry(node, struct mlx5_fc, node);
-
- node = rb_next(node);
-
- if (counter->deleted) {
- rb_erase(&counter->node, &fc_stats->counters);
+ llist_for_each_entry(counter, tmplist, addlist)
+ mlx5_fc_stats_insert(dev, counter);
- mlx5_cmd_fc_free(dev, counter->id);
-
- kfree(counter);
- continue;
- }
+ tmplist = llist_del_all(&fc_stats->dellist);
+ llist_for_each_entry_safe(counter, tmp, tmplist, dellist) {
+ list_del(&counter->list);
- last = counter;
+ mlx5_free_fc(dev, counter);
}
- if (time_before(now, fc_stats->next_query) || !last)
+ if (time_before(now, fc_stats->next_query) ||
+ list_empty(&fc_stats->counters))
return;
+ last = list_last_entry(&fc_stats->counters, struct mlx5_fc, list);
- node = rb_first(&fc_stats->counters);
- while (node) {
- counter = rb_entry(node, struct mlx5_fc, node);
-
- node = mlx5_fc_stats_query(dev, counter, last->id);
- }
+ counter = list_first_entry(&fc_stats->counters, struct mlx5_fc,
+ list);
+ while (counter)
+ counter = mlx5_fc_stats_query(dev, counter, last->id);
fc_stats->next_query = now + fc_stats->sampling_interval;
}
@@ -220,24 +219,38 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
counter = kzalloc(sizeof(*counter), GFP_KERNEL);
if (!counter)
return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&counter->list);
err = mlx5_cmd_fc_alloc(dev, &counter->id);
if (err)
goto err_out;
if (aging) {
+ u32 id = counter->id;
+
counter->cache.lastuse = jiffies;
counter->aging = true;
- spin_lock(&fc_stats->addlist_lock);
- list_add(&counter->list, &fc_stats->addlist);
- spin_unlock(&fc_stats->addlist_lock);
+ idr_preload(GFP_KERNEL);
+ spin_lock(&fc_stats->counters_idr_lock);
+
+ err = idr_alloc_u32(&fc_stats->counters_idr, counter, &id, id,
+ GFP_NOWAIT);
+
+ spin_unlock(&fc_stats->counters_idr_lock);
+ idr_preload_end();
+ if (err)
+ goto err_out_alloc;
+
+ llist_add(&counter->addlist, &fc_stats->addlist);
mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
}
return counter;
+err_out_alloc:
+ mlx5_cmd_fc_free(dev, counter->id);
err_out:
kfree(counter);
@@ -253,13 +266,16 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
return;
if (counter->aging) {
- counter->deleted = true;
+ spin_lock(&fc_stats->counters_idr_lock);
+ WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id));
+ spin_unlock(&fc_stats->counters_idr_lock);
+
+ llist_add(&counter->dellist, &fc_stats->dellist);
mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
return;
}
- mlx5_cmd_fc_free(dev, counter->id);
- kfree(counter);
+ mlx5_free_fc(dev, counter);
}
EXPORT_SYMBOL(mlx5_fc_destroy);
@@ -267,9 +283,11 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
- fc_stats->counters = RB_ROOT;
- INIT_LIST_HEAD(&fc_stats->addlist);
- spin_lock_init(&fc_stats->addlist_lock);
+ spin_lock_init(&fc_stats->counters_idr_lock);
+ idr_init(&fc_stats->counters_idr);
+ INIT_LIST_HEAD(&fc_stats->counters);
+ init_llist_head(&fc_stats->addlist);
+ init_llist_head(&fc_stats->dellist);
fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
if (!fc_stats->wq)
@@ -284,34 +302,22 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ struct llist_node *tmplist;
struct mlx5_fc *counter;
struct mlx5_fc *tmp;
- struct rb_node *node;
cancel_delayed_work_sync(&dev->priv.fc_stats.work);
destroy_workqueue(dev->priv.fc_stats.wq);
dev->priv.fc_stats.wq = NULL;
- list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) {
- list_del(&counter->list);
-
- mlx5_cmd_fc_free(dev, counter->id);
+ idr_destroy(&fc_stats->counters_idr);
- kfree(counter);
- }
+ tmplist = llist_del_all(&fc_stats->addlist);
+ llist_for_each_entry_safe(counter, tmp, tmplist, addlist)
+ mlx5_free_fc(dev, counter);
- node = rb_first(&fc_stats->counters);
- while (node) {
- counter = rb_entry(node, struct mlx5_fc, node);
-
- node = rb_next(node);
-
- rb_erase(&counter->node, &fc_stats->counters);
-
- mlx5_cmd_fc_free(dev, counter->id);
-
- kfree(counter);
- }
+ list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list)
+ mlx5_free_fc(dev, counter);
}
int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 3dd9f885ed8b..a825ed093efd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -349,11 +349,20 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
static int mlx5i_init_rx(struct mlx5e_priv *priv)
{
+ struct mlx5_core_dev *mdev = priv->mdev;
int err;
+ mlx5e_create_q_counters(priv);
+
+ err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+ if (err) {
+ mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+ goto err_destroy_q_counters;
+ }
+
err = mlx5e_create_indirect_rqt(priv);
if (err)
- return err;
+ goto err_close_drop_rq;
err = mlx5e_create_direct_rqts(priv);
if (err)
@@ -381,6 +390,10 @@ err_destroy_direct_rqts:
mlx5e_destroy_direct_rqts(priv);
err_destroy_indirect_rqts:
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+err_close_drop_rq:
+ mlx5e_close_drop_rq(&priv->drop_rq);
+err_destroy_q_counters:
+ mlx5e_destroy_q_counters(priv);
return err;
}
@@ -391,6 +404,8 @@ static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
mlx5e_destroy_indirect_tirs(priv);
mlx5e_destroy_direct_rqts(priv);
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5e_close_drop_rq(&priv->drop_rq);
+ mlx5e_destroy_q_counters(priv);
}
static const struct mlx5e_profile mlx5i_nic_profile = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 3f767cde4c1d..0d90b1b4a3d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -111,10 +111,10 @@ static void mlx5_pps_out(struct work_struct *work)
for (i = 0; i < clock->ptp_info.n_pins; i++) {
u64 tstart;
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
tstart = clock->pps_info.start[i];
clock->pps_info.start[i] = 0;
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
if (!tstart)
continue;
@@ -132,10 +132,10 @@ static void mlx5_timestamp_overflow(struct work_struct *work)
overflow_work);
unsigned long flags;
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
timecounter_read(&clock->tc);
mlx5_update_clock_info_page(clock->mdev);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
}
@@ -147,10 +147,10 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
u64 ns = timespec64_to_ns(ts);
unsigned long flags;
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
timecounter_init(&clock->tc, &clock->cycles, ns);
mlx5_update_clock_info_page(clock->mdev);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
return 0;
}
@@ -162,9 +162,9 @@ static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
u64 ns;
unsigned long flags;
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
ns = timecounter_read(&clock->tc);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
*ts = ns_to_timespec64(ns);
@@ -177,10 +177,10 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
ptp_info);
unsigned long flags;
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
timecounter_adjtime(&clock->tc, delta);
mlx5_update_clock_info_page(clock->mdev);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
return 0;
}
@@ -203,12 +203,12 @@ static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
adj *= delta;
diff = div_u64(adj, 1000000000ULL);
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
timecounter_read(&clock->tc);
clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
clock->nominal_c_mult + diff;
mlx5_update_clock_info_page(clock->mdev);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
return 0;
}
@@ -307,12 +307,12 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
ts.tv_nsec = rq->perout.start.nsec;
ns = timespec64_to_ns(&ts);
cycles_now = mlx5_read_internal_timer(mdev);
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
nsec_delta = ns - nsec_now;
cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
clock->cycles.mult);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
time_stamp = cycles_now + cycles_delta;
field_select = MLX5_MTPPS_FS_PIN_MODE |
MLX5_MTPPS_FS_PATTERN |
@@ -471,14 +471,14 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
ts.tv_sec += 1;
ts.tv_nsec = 0;
ns = timespec64_to_ns(&ts);
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
nsec_delta = ns - nsec_now;
cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
clock->cycles.mult);
clock->pps_info.start[pin] = cycles_now + cycles_delta;
schedule_work(&clock->pps_info.out_work);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
break;
default:
mlx5_core_err(mdev, " Unhandled event\n");
@@ -498,7 +498,7 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
return;
}
- rwlock_init(&clock->lock);
+ seqlock_init(&clock->lock);
clock->cycles.read = read_internal_timer;
clock->cycles.shift = MLX5_CYCLES_SHIFT;
clock->cycles.mult = clocksource_khz2mult(dev_freq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
index 02e2e4575e4f..263cb6e2aeee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -46,11 +46,13 @@ static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
u64 timestamp)
{
+ unsigned int seq;
u64 nsec;
- read_lock(&clock->lock);
- nsec = timecounter_cyc2time(&clock->tc, timestamp);
- read_unlock(&clock->lock);
+ do {
+ seq = read_seqbegin(&clock->lock);
+ nsec = timecounter_cyc2time(&clock->tc, timestamp);
+ } while (read_seqretry(&clock->lock, seq));
return ns_to_ktime(nsec);
}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 7a452716de4b..b7fce2c9443d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -583,10 +583,11 @@ struct mlx5_irq_info {
};
struct mlx5_fc_stats {
- struct rb_root counters;
- struct list_head addlist;
- /* protect addlist add/splice operations */
- spinlock_t addlist_lock;
+ spinlock_t counters_idr_lock; /* protects counters_idr */
+ struct idr counters_idr;
+ struct list_head counters;
+ struct llist_head addlist;
+ struct llist_head dellist;
struct workqueue_struct *wq;
struct delayed_work work;
@@ -804,7 +805,7 @@ struct mlx5_pps {
};
struct mlx5_clock {
- rwlock_t lock;
+ seqlock_t lock;
struct cyclecounter cycles;
struct timecounter tc;
struct hwtstamp_config hwtstamp_config;