diff options
author | Alexander Lobakin <bloodyreaper@yandex.ru> | 2020-04-21 16:41:08 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-04-23 12:32:13 -0700 |
commit | e131a5634830047923c694b4ce0c3b31745ff01b (patch) | |
tree | 484784cd19a9b436268c5d5bc529bbaf81009132 | |
parent | b75326c201242de9495ff98e5d5cff41d7fc0d9d (diff) |
net: dsa: add GRO support via gro_cells
gro_cells lib is used by different encapsulating netdevices, such as
geneve, macsec, vxlan etc. to speed up decapsulated traffic processing.
CPU tag is a sort of "encapsulation", and we can use the same mechs to
greatly improve overall DSA performance.
skbs are passed to the GRO layer after removing CPU tags, so we don't
need any new packet offload types as it was firstly proposed by me in
the first GRO-over-DSA variant [1].
The size of struct gro_cells is sizeof(void *), so hot struct
dsa_slave_priv becomes only 4/8 bytes bigger, and all critical fields
remain in one 32-byte cacheline.
The other positive side effect is that drivers for network devices
that can be shipped as CPU ports of DSA-driven switches can now use
napi_gro_frags() to pass skbs to kernel. Packets built that way are
completely non-linear and are likely being dropped without GRO.
This was tested on to-be-mainlined-soon Ethernet driver that uses
napi_gro_frags(), and the overall performance was on par with the
variant from [1], sometimes even better due to minimal overhead.
net.core.gro_normal_batch tuning may help to push it to the limit
on particular setups and platforms.
iperf3 IPoE VLAN NAT TCP forwarding (port1.218 -> port0) setup
on 1.2 GHz MIPS board:
5.7-rc2 baseline:
[ID] Interval Transfer Bitrate Retr
[ 5] 0.00-120.01 sec 9.00 GBytes 644 Mbits/sec 413 sender
[ 5] 0.00-120.00 sec 8.99 GBytes 644 Mbits/sec receiver
Iface RX packets TX packets
eth0 7097731 7097702
port0 426050 6671829
port1 6671681 425862
port1.218 6671677 425851
With this patch:
[ID] Interval Transfer Bitrate Retr
[ 5] 0.00-120.01 sec 12.2 GBytes 870 Mbits/sec 122 sender
[ 5] 0.00-120.00 sec 12.2 GBytes 870 Mbits/sec receiver
Iface RX packets TX packets
eth0 9474792 9474777
port0 455200 353288
port1 9019592 455035
port1.218 353144 455024
v2:
- Add some performance examples in the commit message;
- No functional changes.
[1] https://lore.kernel.org/netdev/20191230143028.27313-1-alobakin@dlink.ru/
Signed-off-by: Alexander Lobakin <bloodyreaper@yandex.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/dsa/Kconfig | 1 | ||||
-rw-r--r-- | net/dsa/dsa.c | 2 | ||||
-rw-r--r-- | net/dsa/dsa_priv.h | 3 | ||||
-rw-r--r-- | net/dsa/slave.c | 10 |
4 files changed, 14 insertions, 2 deletions
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 92663dcb3aa2..739613070d07 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -9,6 +9,7 @@ menuconfig NET_DSA tristate "Distributed Switch Architecture" depends on HAVE_NET_DSA depends on BRIDGE || BRIDGE=n + select GRO_CELLS select NET_SWITCHDEV select PHYLINK select NET_DEVLINK diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index ee2610c4d46a..0384a911779e 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -234,7 +234,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, if (dsa_skb_defer_rx_timestamp(p, skb)) return 0; - netif_receive_skb(skb); + gro_cells_receive(&p->gcells, skb); return 0; } diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 904cc7c9b882..6d9a1ef65fa0 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -11,6 +11,7 @@ #include <linux/netdevice.h> #include <linux/netpoll.h> #include <net/dsa.h> +#include <net/gro_cells.h> enum { DSA_NOTIFIER_AGEING_TIME, @@ -77,6 +78,8 @@ struct dsa_slave_priv { struct pcpu_sw_netstats *stats64; + struct gro_cells gcells; + /* DSA port data, such as switch, port index, etc. */ struct dsa_port *dp; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index e94eb1aac602..f2c241cf3a80 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1762,6 +1762,11 @@ int dsa_slave_create(struct dsa_port *port) free_netdev(slave_dev); return -ENOMEM; } + + ret = gro_cells_init(&p->gcells, slave_dev); + if (ret) + goto out_free; + p->dp = port; INIT_LIST_HEAD(&p->mall_tc_list); p->xmit = cpu_dp->tag_ops->xmit; @@ -1781,7 +1786,7 @@ int dsa_slave_create(struct dsa_port *port) ret = dsa_slave_phy_setup(slave_dev); if (ret) { netdev_err(master, "error %d setting up slave phy\n", ret); - goto out_free; + goto out_gcells; } dsa_slave_notify(slave_dev, DSA_PORT_REGISTER); @@ -1800,6 +1805,8 @@ out_phy: phylink_disconnect_phy(p->dp->pl); rtnl_unlock(); phylink_destroy(p->dp->pl); +out_gcells: + gro_cells_destroy(&p->gcells); out_free: free_percpu(p->stats64); free_netdev(slave_dev); @@ -1820,6 +1827,7 @@ void dsa_slave_destroy(struct net_device *slave_dev) dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER); unregister_netdev(slave_dev); phylink_destroy(dp->pl); + gro_cells_destroy(&p->gcells); free_percpu(p->stats64); free_netdev(slave_dev); } |