diff options
author | David S. Miller <davem@davemloft.net> | 2016-02-11 08:55:42 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-02-11 08:55:42 -0500 |
commit | e7e9956d8fc3c92e797e7334f2aee31dd9c623f3 (patch) | |
tree | b04ab120785a4f827079361fffc1b78cfb88f8f3 | |
parent | a060679c6b3da17dc9e95d0500f811de118ec901 (diff) | |
parent | f245d079c1d11dc6927e56f5a89dd566fef2a415 (diff) |
Merge branch 'gso-checksums'
Alexander Duyck says:
====================
Add GSO support for outer checksum w/ inner checksum offloads
This patch series updates the existing segmentation offload code for
tunnels to make better use of existing and updated GSO checksum
computation. This is done primarily through two mechanisms. First we
maintain a separate checksum in the GSO context block of the sk_buff. This
allows us to maintain two checksum values, one offloaded with values stored
in csum_start and csum_offset, and one computed and tracked in
SKB_GSO_CB(skb)->csum. By maintaining these two values we are able to take
advantage of the same sort of math used in local checksum offload so that
we can provide both inner and outer checksums with minimal overhead.
Below is the performance for a netperf session between an ixgbe PF and VF
on the same host but in different namespaces. As can be seen a significant
gain in performance can be had from allowing the use of Tx checksum offload
on the inner headers while performing a software offload on the outer
header computation:
Recv Send Send Utilization Service Demand
Socket Socket Message Elapsed Send Recv Send Recv
Size Size Size Time Throughput local remote local remote
bytes bytes bytes secs. 10^6bits/s % S % U us/KB us/KB
Before:
87380 16384 16384 10.00 12844.38 9.30 -1.00 0.712 -1.00
After:
87380 16384 16384 10.00 13216.63 6.78 -1.00 0.504 -1.000
Changes from v1:
* Dropped use of CHECKSUM_UNNECESSARY for remote checksum offload
* Left encap_hdr_csum as it will likely be needed in future for SCTP GSO
* Broke the changes out over many more patches
* Updated GRE segmentation to more closely match UDP tunnel segmentation
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/skbuff.h | 29 | ||||
-rw-r--r-- | net/core/skbuff.c | 34 | ||||
-rw-r--r-- | net/ipv4/gre_offload.c | 85 | ||||
-rw-r--r-- | net/ipv4/tcp_offload.c | 8 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 93 |
5 files changed, 127 insertions, 122 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 11f935c1a090..a8fc2220e8ce 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2161,6 +2161,11 @@ static inline int skb_checksum_start_offset(const struct sk_buff *skb) return skb->csum_start - skb_headroom(skb); } +static inline unsigned char *skb_checksum_start(const struct sk_buff *skb) +{ + return skb->head + skb->csum_start; +} + static inline int skb_transport_offset(const struct sk_buff *skb) { return skb_transport_header(skb) - skb->data; @@ -3549,6 +3554,7 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) struct skb_gso_cb { int mac_offset; int encap_level; + __wsum csum; __u16 csum_start; }; #define SKB_SGO_CB_OFFSET 32 @@ -3575,6 +3581,16 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) return 0; } +static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res) +{ + /* Do not update partial checksums if remote checksum is enabled. */ + if (skb->remcsum_offload) + return; + + SKB_GSO_CB(skb)->csum = res; + SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head; +} + /* Compute the checksum for a gso segment. First compute the checksum value * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and * then add in skb->csum (checksum from csum_start to end of packet). @@ -3585,15 +3601,14 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) */ static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) { - int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) - - skb_transport_offset(skb); - __wsum partial; + unsigned char *csum_start = skb_transport_header(skb); + int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start; + __wsum partial = SKB_GSO_CB(skb)->csum; - partial = csum_partial(skb_transport_header(skb), plen, skb->csum); - skb->csum = res; - SKB_GSO_CB(skb)->csum_start -= plen; + SKB_GSO_CB(skb)->csum = res; + SKB_GSO_CB(skb)->csum_start = csum_start - skb->head; - return csum_fold(partial); + return csum_fold(csum_partial(csum_start, plen, partial)); } static inline bool skb_is_gso(const struct sk_buff *skb) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b2df375ec9c2..b0cce744e2a0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3004,8 +3004,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, if (unlikely(!proto)) return ERR_PTR(-EINVAL); - csum = !head_skb->encap_hdr_csum && - !!can_checksum_protocol(features, proto); + csum = !!can_checksum_protocol(features, proto); headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -3098,13 +3097,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, if (nskb->len == len + doffset) goto perform_csum_check; - if (!sg && !nskb->remcsum_offload) { - nskb->ip_summed = CHECKSUM_NONE; - nskb->csum = skb_copy_and_csum_bits(head_skb, offset, - skb_put(nskb, len), - len, 0); + if (!sg) { + if (!nskb->remcsum_offload) + nskb->ip_summed = CHECKSUM_NONE; + SKB_GSO_CB(nskb)->csum = + skb_copy_and_csum_bits(head_skb, offset, + skb_put(nskb, len), + len, 0); SKB_GSO_CB(nskb)->csum_start = - skb_headroom(nskb) + doffset; + skb_headroom(nskb) + doffset; continue; } @@ -3170,12 +3171,19 @@ skip_fraglist: nskb->truesize += nskb->data_len; perform_csum_check: - if (!csum && !nskb->remcsum_offload) { - nskb->csum = skb_checksum(nskb, doffset, - nskb->len - doffset, 0); - nskb->ip_summed = CHECKSUM_NONE; + if (!csum) { + if (skb_has_shared_frag(nskb)) { + err = __skb_linearize(nskb); + if (err) + goto err; + } + if (!nskb->remcsum_offload) + nskb->ip_summed = CHECKSUM_NONE; + SKB_GSO_CB(nskb)->csum = + skb_checksum(nskb, doffset, + nskb->len - doffset, 0); SKB_GSO_CB(nskb)->csum_start = - skb_headroom(nskb) + doffset; + skb_headroom(nskb) + doffset; } } while ((offset += len) < head_skb->len); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 5a8ee3282550..003b0ebbcfdd 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -18,15 +18,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, netdev_features_t features) { + int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); struct sk_buff *segs = ERR_PTR(-EINVAL); - netdev_features_t enc_features; - int ghl; - struct gre_base_hdr *greh; u16 mac_offset = skb->mac_header; - int mac_len = skb->mac_len; __be16 protocol = skb->protocol; - int tnl_hlen; - bool csum; + u16 mac_len = skb->mac_len; + int gre_offset, outer_hlen; + bool need_csum; if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | @@ -43,74 +41,59 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, if (!skb->encapsulation) goto out; - if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) + if (unlikely(tnl_hlen < sizeof(struct gre_base_hdr))) goto out; - greh = (struct gre_base_hdr *)skb_transport_header(skb); - - ghl = skb_inner_mac_header(skb) - skb_transport_header(skb); - if (unlikely(ghl < sizeof(*greh))) + if (unlikely(!pskb_may_pull(skb, tnl_hlen))) goto out; - csum = !!(greh->flags & GRE_CSUM); - if (csum) - skb->encap_hdr_csum = 1; - /* setup inner skb. */ - skb->protocol = greh->protocol; skb->encapsulation = 0; - - if (unlikely(!pskb_may_pull(skb, ghl))) - goto out; - - __skb_pull(skb, ghl); + __skb_pull(skb, tnl_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); skb->mac_len = skb_inner_network_offset(skb); + skb->protocol = skb->inner_protocol; + + need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM); + skb->encap_hdr_csum = need_csum; + + features &= skb->dev->hw_enc_features; /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & features; - segs = skb_mac_gso_segment(skb, enc_features); + segs = skb_mac_gso_segment(skb, features); if (IS_ERR_OR_NULL(segs)) { - skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); + skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, + mac_len); goto out; } + outer_hlen = skb_tnl_header_len(skb); + gre_offset = outer_hlen - tnl_hlen; skb = segs; - tnl_hlen = skb_tnl_header_len(skb); do { - __skb_push(skb, ghl); - if (csum) { - __be32 *pcsum; - - if (skb_has_shared_frag(skb)) { - int err; - - err = __skb_linearize(skb); - if (err) { - kfree_skb_list(segs); - segs = ERR_PTR(err); - goto out; - } - } - - skb_reset_transport_header(skb); - - greh = (struct gre_base_hdr *) - skb_transport_header(skb); - pcsum = (__be32 *)(greh + 1); - *pcsum = 0; - *(__sum16 *)pcsum = gso_make_checksum(skb, 0); - } - __skb_push(skb, tnl_hlen - ghl); + struct gre_base_hdr *greh; + __be32 *pcsum; skb_reset_inner_headers(skb); skb->encapsulation = 1; - skb_reset_mac_header(skb); - skb_set_network_header(skb, mac_len); skb->mac_len = mac_len; skb->protocol = protocol; + + __skb_push(skb, outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb_set_transport_header(skb, gre_offset); + + if (!need_csum) + continue; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + pcsum = (__be32 *)(greh + 1); + + *pcsum = 0; + *(__sum16 *)pcsum = gso_make_checksum(skb, 0); } while ((skb = skb->next)); out: return segs; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 9864a2dbadce..773083b7f1e9 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -135,7 +135,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th->fin = th->psh = 0; th->check = newcheck; - if (skb->ip_summed != CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_PARTIAL) + gso_reset_checksum(skb, ~th->check); + else th->check = gso_make_checksum(skb, ~th->check); seq += mss; @@ -169,7 +171,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, skb->data_len); th->check = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); - if (skb->ip_summed != CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_PARTIAL) + gso_reset_checksum(skb, ~th->check); + else th->check = gso_make_checksum(skb, ~th->check); out: return segs; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 4c519c1dc161..56c4c8b88b28 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -32,42 +32,56 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features), __be16 new_protocol, bool is_ipv6) { + int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); struct sk_buff *segs = ERR_PTR(-EINVAL); + bool remcsum, need_csum, offload_csum; + struct udphdr *uh = udp_hdr(skb); u16 mac_offset = skb->mac_header; - int mac_len = skb->mac_len; - int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); __be16 protocol = skb->protocol; - netdev_features_t enc_features; + u16 mac_len = skb->mac_len; int udp_offset, outer_hlen; - unsigned int oldlen; - bool need_csum = !!(skb_shinfo(skb)->gso_type & - SKB_GSO_UDP_TUNNEL_CSUM); - bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); - bool offload_csum = false, dont_encap = (need_csum || remcsum); - - oldlen = (u16)~skb->len; + u32 partial; if (unlikely(!pskb_may_pull(skb, tnl_hlen))) goto out; + /* adjust partial header checksum to negate old length */ + partial = (__force u32)uh->check + (__force u16)~uh->len; + + /* setup inner skb. */ skb->encapsulation = 0; __skb_pull(skb, tnl_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); skb->mac_len = skb_inner_network_offset(skb); skb->protocol = new_protocol; + + need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); skb->encap_hdr_csum = need_csum; + + remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); skb->remcsum_offload = remcsum; /* Try to offload checksum if possible */ offload_csum = !!(need_csum && - ((skb->dev->features & NETIF_F_HW_CSUM) || - (skb->dev->features & (is_ipv6 ? - NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM)))); + (skb->dev->features & + (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) : + (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)))); + + features &= skb->dev->hw_enc_features; + + /* The only checksum offload we care about from here on out is the + * outer one so strip the existing checksum feature flags and + * instead set the flag based on our outer checksum offload value. + */ + if (remcsum) { + features &= ~NETIF_F_CSUM_MASK; + if (offload_csum) + features |= NETIF_F_HW_CSUM; + } /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & features; - segs = gso_inner_segment(skb, enc_features); + segs = gso_inner_segment(skb, features); if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, mac_len); @@ -78,17 +92,13 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, udp_offset = outer_hlen - tnl_hlen; skb = segs; do { - struct udphdr *uh; - int len; - __be32 delta; + __be16 len; - if (dont_encap) { - skb->encapsulation = 0; + if (remcsum) skb->ip_summed = CHECKSUM_NONE; - } else { - /* Only set up inner headers if we might be offloading - * inner checksum. - */ + + /* Set up inner headers if we are offloading inner checksum */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_reset_inner_headers(skb); skb->encapsulation = 1; } @@ -96,43 +106,28 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, skb->mac_len = mac_len; skb->protocol = protocol; - skb_push(skb, outer_hlen); + __skb_push(skb, outer_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_set_transport_header(skb, udp_offset); - len = skb->len - udp_offset; + len = htons(skb->len - udp_offset); uh = udp_hdr(skb); - uh->len = htons(len); + uh->len = len; if (!need_csum) continue; - delta = htonl(oldlen + len); - uh->check = ~csum_fold((__force __wsum) - ((__force u32)uh->check + - (__force u32)delta)); - if (offload_csum) { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - } else if (remcsum) { - /* Need to calculate checksum from scratch, - * inner checksums are never when doing - * remote_checksum_offload. - */ - - skb->csum = skb_checksum(skb, udp_offset, - skb->len - udp_offset, - 0); - uh->check = csum_fold(skb->csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } else { - uh->check = gso_make_checksum(skb, ~uh->check); + ((__force u32)len + partial)); + if (skb->encapsulation || !offload_csum) { + uh->check = gso_make_checksum(skb, ~uh->check); if (uh->check == 0) uh->check = CSUM_MANGLED_0; + } else { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); } } while ((skb = skb->next)); out: |