diff options
-rw-r--r-- | drivers/net/veth.c | 152 | ||||
-rw-r--r-- | tools/testing/selftests/net/Makefile | 1 | ||||
-rwxr-xr-x | tools/testing/selftests/net/veth.sh | 177 |
3 files changed, 316 insertions, 14 deletions
diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 9e525646df1d..15b2e3923c47 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -57,6 +57,7 @@ struct veth_rq_stats { struct veth_rq { struct napi_struct xdp_napi; + struct napi_struct __rcu *napi; /* points to xdp_napi when the latter is initialized */ struct net_device *dev; struct bpf_prog __rcu *xdp_prog; struct xdp_mem_info xdp_mem; @@ -293,13 +294,32 @@ static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, netif_rx(skb); } +/* return true if the specified skb has chances of GRO aggregation + * Don't strive for accuracy, but try to avoid GRO overhead in the most + * common scenarios. + * When XDP is enabled, all traffic is considered eligible, as the xmit + * device has TSO off. + * When TSO is enabled on the xmit device, we are likely interested only + * in UDP aggregation, explicitly check for that if the skb is suspected + * - the sock_wfree destructor is used by UDP, ICMP and XDP sockets - + * to belong to locally generated UDP traffic. + */ +static bool veth_skb_is_eligible_for_gro(const struct net_device *dev, + const struct net_device *rcv, + const struct sk_buff *skb) +{ + return !(dev->features & NETIF_F_ALL_TSO) || + (skb->destructor == sock_wfree && + rcv->features & (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD)); +} + static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) { struct veth_priv *rcv_priv, *priv = netdev_priv(dev); struct veth_rq *rq = NULL; struct net_device *rcv; int length = skb->len; - bool rcv_xdp = false; + bool use_napi = false; int rxq; rcu_read_lock(); @@ -313,20 +333,26 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) rxq = skb_get_queue_mapping(skb); if (rxq < rcv->real_num_rx_queues) { rq = &rcv_priv->rq[rxq]; - rcv_xdp = rcu_access_pointer(rq->xdp_prog); + + /* The napi pointer is available when an XDP program is + * attached or when GRO is enabled + * Don't bother with napi/GRO if the skb can't be aggregated + */ + use_napi = rcu_access_pointer(rq->napi) && + veth_skb_is_eligible_for_gro(dev, rcv, skb); skb_record_rx_queue(skb, rxq); } skb_tx_timestamp(skb); - if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) { - if (!rcv_xdp) + if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { + if (!use_napi) dev_lstats_add(dev, length); } else { drop: atomic64_inc(&priv->dropped); } - if (rcv_xdp) + if (use_napi) __veth_xdp_flush(rq); rcu_read_unlock(); @@ -686,7 +712,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, int mac_len, delta, off; struct xdp_buff xdp; - skb_orphan(skb); + skb_orphan_partial(skb); rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); @@ -903,7 +929,7 @@ static int veth_poll(struct napi_struct *napi, int budget) return done; } -static int veth_napi_add(struct net_device *dev) +static int __veth_napi_enable(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); int err, i; @@ -920,6 +946,7 @@ static int veth_napi_add(struct net_device *dev) struct veth_rq *rq = &priv->rq[i]; napi_enable(&rq->xdp_napi); + rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi); } return 0; @@ -938,6 +965,7 @@ static void veth_napi_del(struct net_device *dev) for (i = 0; i < dev->real_num_rx_queues; i++) { struct veth_rq *rq = &priv->rq[i]; + rcu_assign_pointer(priv->rq[i].napi, NULL); napi_disable(&rq->xdp_napi); __netif_napi_del(&rq->xdp_napi); } @@ -951,8 +979,14 @@ static void veth_napi_del(struct net_device *dev) } } +static bool veth_gro_requested(const struct net_device *dev) +{ + return !!(dev->wanted_features & NETIF_F_GRO); +} + static int veth_enable_xdp(struct net_device *dev) { + bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP); struct veth_priv *priv = netdev_priv(dev); int err, i; @@ -960,7 +994,8 @@ static int veth_enable_xdp(struct net_device *dev) for (i = 0; i < dev->real_num_rx_queues; i++) { struct veth_rq *rq = &priv->rq[i]; - netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT); + if (!napi_already_on) + netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT); err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id); if (err < 0) goto err_rxq_reg; @@ -975,13 +1010,25 @@ static int veth_enable_xdp(struct net_device *dev) rq->xdp_mem = rq->xdp_rxq.mem; } - err = veth_napi_add(dev); - if (err) - goto err_rxq_reg; + if (!napi_already_on) { + err = __veth_napi_enable(dev); + if (err) + goto err_rxq_reg; + + if (!veth_gro_requested(dev)) { + /* user-space did not require GRO, but adding XDP + * is supposed to get GRO working + */ + dev->features |= NETIF_F_GRO; + netdev_features_change(dev); + } + } } - for (i = 0; i < dev->real_num_rx_queues; i++) + for (i = 0; i < dev->real_num_rx_queues; i++) { rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog); + rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi); + } return 0; err_reg_mem: @@ -991,7 +1038,8 @@ err_rxq_reg: struct veth_rq *rq = &priv->rq[i]; xdp_rxq_info_unreg(&rq->xdp_rxq); - netif_napi_del(&rq->xdp_napi); + if (!napi_already_on) + netif_napi_del(&rq->xdp_napi); } return err; @@ -1004,7 +1052,19 @@ static void veth_disable_xdp(struct net_device *dev) for (i = 0; i < dev->real_num_rx_queues; i++) rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); - veth_napi_del(dev); + + if (!netif_running(dev) || !veth_gro_requested(dev)) { + veth_napi_del(dev); + + /* if user-space did not require GRO, since adding XDP + * enabled it, clear it now + */ + if (!veth_gro_requested(dev) && netif_running(dev)) { + dev->features &= ~NETIF_F_GRO; + netdev_features_change(dev); + } + } + for (i = 0; i < dev->real_num_rx_queues; i++) { struct veth_rq *rq = &priv->rq[i]; @@ -1013,6 +1073,29 @@ static void veth_disable_xdp(struct net_device *dev) } } +static int veth_napi_enable(struct net_device *dev) +{ + struct veth_priv *priv = netdev_priv(dev); + int err, i; + + for (i = 0; i < dev->real_num_rx_queues; i++) { + struct veth_rq *rq = &priv->rq[i]; + + netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT); + } + + err = __veth_napi_enable(dev); + if (err) { + for (i = 0; i < dev->real_num_rx_queues; i++) { + struct veth_rq *rq = &priv->rq[i]; + + netif_napi_del(&rq->xdp_napi); + } + return err; + } + return err; +} + static int veth_open(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); @@ -1026,6 +1109,10 @@ static int veth_open(struct net_device *dev) err = veth_enable_xdp(dev); if (err) return err; + } else if (veth_gro_requested(dev)) { + err = veth_napi_enable(dev); + if (err) + return err; } if (peer->flags & IFF_UP) { @@ -1047,6 +1134,8 @@ static int veth_close(struct net_device *dev) if (priv->_xdp_prog) veth_disable_xdp(dev); + else if (veth_gro_requested(dev)) + veth_napi_del(dev); return 0; } @@ -1145,10 +1234,32 @@ static netdev_features_t veth_fix_features(struct net_device *dev, if (peer_priv->_xdp_prog) features &= ~NETIF_F_GSO_SOFTWARE; } + if (priv->_xdp_prog) + features |= NETIF_F_GRO; return features; } +static int veth_set_features(struct net_device *dev, + netdev_features_t features) +{ + netdev_features_t changed = features ^ dev->features; + struct veth_priv *priv = netdev_priv(dev); + int err; + + if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog) + return 0; + + if (features & NETIF_F_GRO) { + err = veth_napi_enable(dev); + if (err) + return err; + } else { + veth_napi_del(dev); + } + return 0; +} + static void veth_set_rx_headroom(struct net_device *dev, int new_hr) { struct veth_priv *peer_priv, *priv = netdev_priv(dev); @@ -1267,6 +1378,7 @@ static const struct net_device_ops veth_netdev_ops = { #endif .ndo_get_iflink = veth_get_iflink, .ndo_fix_features = veth_fix_features, + .ndo_set_features = veth_set_features, .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = veth_set_rx_headroom, .ndo_bpf = veth_xdp, @@ -1329,6 +1441,13 @@ static int veth_validate(struct nlattr *tb[], struct nlattr *data[], static struct rtnl_link_ops veth_link_ops; +static void veth_disable_gro(struct net_device *dev) +{ + dev->features &= ~NETIF_F_GRO; + dev->wanted_features &= ~NETIF_F_GRO; + netdev_update_features(dev); +} + static int veth_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -1401,6 +1520,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, if (err < 0) goto err_register_peer; + /* keep GRO disabled by default to be consistent with the established + * veth behavior + */ + veth_disable_gro(peer); netif_carrier_off(peer); err = rtnl_configure_link(peer, ifmp); @@ -1438,6 +1561,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, priv = netdev_priv(peer); rcu_assign_pointer(priv->peer, dev); + veth_disable_gro(dev); return 0; err_register_dev: diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 2d71b283dde3..f4242a961088 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -24,6 +24,7 @@ TEST_PROGS += vrf_route_leaking.sh TEST_PROGS += bareudp.sh TEST_PROGS += unicast_extensions.sh TEST_PROGS += udpgro_fwd.sh +TEST_PROGS += veth.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh new file mode 100755 index 000000000000..2fedc0781ce8 --- /dev/null +++ b/tools/testing/selftests/net/veth.sh @@ -0,0 +1,177 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" +readonly BASE=`basename $STATS` +readonly SRC=2 +readonly DST=1 +readonly DST_NAT=100 +readonly NS_SRC=$BASE$SRC +readonly NS_DST=$BASE$DST + +# "baremetal" network used for raw UDP traffic +readonly BM_NET_V4=192.168.1. +readonly BM_NET_V6=2001:db8:: + +readonly NPROCS=`nproc` +ret=0 + +cleanup() { + local ns + local -r jobs="$(jobs -p)" + [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null + rm -f $STATS + + for ns in $NS_SRC $NS_DST; do + ip netns del $ns 2>/dev/null + done +} + +trap cleanup EXIT + +create_ns() { + local ns + + for ns in $NS_SRC $NS_DST; do + ip netns add $ns + ip -n $ns link set dev lo up + done + + ip link add name veth$SRC type veth peer name veth$DST + + for ns in $SRC $DST; do + ip link set dev veth$ns netns $BASE$ns up + ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 + ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad + done + echo "#kernel" > $BASE + chmod go-rw $BASE +} + +__chk_flag() { + local msg="$1" + local target=$2 + local expected=$3 + local flagname=$4 + + local flag=`ip netns exec $BASE$target ethtool -k veth$target |\ + grep $flagname | awk '{print $2}'` + + printf "%-60s" "$msg" + if [ "$flag" = "$expected" ]; then + echo " ok " + else + echo " fail - expected $expected found $flag" + ret=1 + fi +} + +chk_gro_flag() { + __chk_flag "$1" $2 $3 generic-receive-offload +} + +chk_tso_flag() { + __chk_flag "$1" $2 $3 tcp-segmentation-offload +} + +chk_gro() { + local msg="$1" + local expected=$2 + + ip netns exec $BASE$SRC ping -qc 1 $BM_NET_V4$DST >/dev/null + NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat -n + + printf "%-60s" "$msg" + ip netns exec $BASE$DST ./udpgso_bench_rx -C 1000 -R 10 & + local spid=$! + sleep 0.1 + + ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 13000 -S 1300 -M 1 -D $BM_NET_V4$DST + local retc=$? + wait $spid + local rets=$? + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo " fail client exit code $retc, server $rets" + ret=1 + return + fi + + local pkts=`NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat IpInReceives | \ + awk '{print $2}' | tail -n 1` + if [ "$pkts" = "$expected" ]; then + echo " ok " + else + echo " fail - got $pkts packets, expected $expected " + ret=1 + fi +} + +if [ ! -f ../bpf/xdp_dummy.o ]; then + echo "Missing xdp_dummy helper. Build bpf selftest first" + exit -1 +fi + +create_ns +chk_gro_flag "default - gro flag" $SRC off +chk_gro_flag " - peer gro flag" $DST off +chk_tso_flag " - tso flag" $SRC on +chk_tso_flag " - peer tso flag" $DST on +chk_gro " - aggregation" 1 +ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off +chk_gro " - aggregation with TSO off" 10 +cleanup + +create_ns +ip netns exec $NS_DST ethtool -K veth$DST gro on +chk_gro_flag "with gro on - gro flag" $DST on +chk_gro_flag " - peer gro flag" $SRC off +chk_tso_flag " - tso flag" $SRC on +chk_tso_flag " - peer tso flag" $DST on +ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off +ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on +chk_gro " - aggregation with TSO off" 1 +cleanup + +create_ns +ip -n $NS_DST link set dev veth$DST down +ip netns exec $NS_DST ethtool -K veth$DST gro on +chk_gro_flag "with gro enabled on link down - gro flag" $DST on +chk_gro_flag " - peer gro flag" $SRC off +chk_tso_flag " - tso flag" $SRC on +chk_tso_flag " - peer tso flag" $DST on +ip -n $NS_DST link set dev veth$DST up +ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off +ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on +chk_gro " - aggregation with TSO off" 1 +cleanup + +create_ns +ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null +chk_gro_flag "with xdp attached - gro flag" $DST on +chk_gro_flag " - peer gro flag" $SRC off +chk_tso_flag " - tso flag" $SRC off +chk_tso_flag " - peer tso flag" $DST on +ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on +chk_gro " - aggregation" 1 + + +ip -n $NS_DST link set dev veth$DST down +ip -n $NS_SRC link set dev veth$SRC down +chk_gro_flag " - after dev off, flag" $DST on +chk_gro_flag " - peer flag" $SRC off + +ip netns exec $NS_DST ethtool -K veth$DST gro on +ip -n $NS_DST link set dev veth$DST xdp off +chk_gro_flag " - after gro on xdp off, gro flag" $DST on +chk_gro_flag " - peer gro flag" $SRC off +chk_tso_flag " - tso flag" $SRC on +chk_tso_flag " - peer tso flag" $DST on +ip -n $NS_DST link set dev veth$DST up +ip -n $NS_SRC link set dev veth$SRC up +chk_gro " - aggregation" 1 + +ip netns exec $NS_DST ethtool -K veth$DST gro off +ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off +chk_gro "aggregation again with default and TSO off" 10 + +exit $ret |