From f70ea018da0631e10c26a02f5a82d626ffef5bd7 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 31 Jul 2015 16:52:10 -0700 Subject: net: Add functions to get skb->hash based on flow structures Add skb_get_hash_flowi6 and skb_get_hash_flowi4 which derive an sk_buff hash from flowi6 and flowi4 structures respectively. These functions can be called when creating a packet in the output path where the new sk_buff does not yet contain a fully formed packet that is parsable by flow dissector. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 21 +++++++++++++++++ net/core/flow_dissector.c | 58 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 75 insertions(+), 4 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 648a2c241993..b7c1286e247d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -37,6 +37,7 @@ #include #include #include +#include /* A. Checksumming of received packets by device. * @@ -945,6 +946,26 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->hash; } +__u32 __skb_get_hash_flowi6(struct sk_buff *skb, struct flowi6 *fl6); + +static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, struct flowi6 *fl6) +{ + if (!skb->l4_hash && !skb->sw_hash) + __skb_get_hash_flowi6(skb, fl6); + + return skb->hash; +} + +__u32 __skb_get_hash_flowi4(struct sk_buff *skb, struct flowi4 *fl); + +static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, struct flowi4 *fl4) +{ + if (!skb->l4_hash && !skb->sw_hash) + __skb_get_hash_flowi4(skb, fl4); + + return skb->hash; +} + __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2a834c6179b9..11e6540fa386 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -590,6 +590,15 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, } EXPORT_SYMBOL(make_flow_keys_digest); +static inline void __skb_set_sw_hash(struct sk_buff *skb, u32 hash, + struct flow_keys *keys) +{ + if (keys->ports.ports) + skb->l4_hash = 1; + skb->sw_hash = 1; + skb->hash = hash; +} + /** * __skb_get_hash: calculate a flow hash * @skb: sk_buff to calculate flow hash from @@ -609,10 +618,8 @@ void __skb_get_hash(struct sk_buff *skb) hash = ___skb_get_hash(skb, &keys, hashrnd); if (!hash) return; - if (keys.ports.ports) - skb->l4_hash = 1; - skb->sw_hash = 1; - skb->hash = hash; + + __skb_set_sw_hash(skb, hash, &keys); } EXPORT_SYMBOL(__skb_get_hash); @@ -624,6 +631,49 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) } EXPORT_SYMBOL(skb_get_hash_perturb); +__u32 __skb_get_hash_flowi6(struct sk_buff *skb, struct flowi6 *fl6) +{ + struct flow_keys keys; + + memset(&keys, 0, sizeof(keys)); + + memcpy(&keys.addrs.v6addrs.src, &fl6->saddr, + sizeof(keys.addrs.v6addrs.src)); + memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr, + sizeof(keys.addrs.v6addrs.dst)); + keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + keys.ports.src = fl6->fl6_sport; + keys.ports.dst = fl6->fl6_dport; + keys.keyid.keyid = fl6->fl6_gre_key; + keys.tags.flow_label = (__force u32)fl6->flowlabel; + keys.basic.ip_proto = fl6->flowi6_proto; + + __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), &keys); + + return skb->hash; +} +EXPORT_SYMBOL(__skb_get_hash_flowi6); + +__u32 __skb_get_hash_flowi4(struct sk_buff *skb, struct flowi4 *fl4) +{ + struct flow_keys keys; + + memset(&keys, 0, sizeof(keys)); + + keys.addrs.v4addrs.src = fl4->saddr; + keys.addrs.v4addrs.dst = fl4->daddr; + keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + keys.ports.src = fl4->fl4_sport; + keys.ports.dst = fl4->fl4_dport; + keys.keyid.keyid = fl4->fl4_gre_key; + keys.basic.ip_proto = fl4->flowi4_proto; + + __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), &keys); + + return skb->hash; +} +EXPORT_SYMBOL(__skb_get_hash_flowi4); + u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) { -- cgit v1.2.3 From 67800f9b1f4eb5bbefc32e3f5044097354bc85b3 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 31 Jul 2015 16:52:11 -0700 Subject: ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel We can't call skb_get_hash here since the packet is not complete to do flow_dissector. Create hash based on flowi6 instead. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ipv6.h | 5 +++-- net/ipv6/ip6_gre.c | 5 +++-- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/ip6_tunnel.c | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index eecdfc92f807..3e334b33ef3a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -708,12 +708,13 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, #if IS_ENABLED(CONFIG_IPV6) static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, - __be32 flowlabel, bool autolabel) + __be32 flowlabel, bool autolabel, + struct flowi6 *fl6) { if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) { u32 hash; - hash = skb_get_hash(skb); + hash = skb_get_hash_flowi6(skb, fl6); /* Since this is being sent on the wire obfuscate hash a bit * to minimize possbility that any useful information to an diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a38d3ac0f18f..a7d1ca2337a9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -728,7 +728,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, */ ipv6h = ipv6_hdr(skb); ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), - ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); + ip6_make_flowlabel(net, skb, fl6->flowlabel, false, fl6)); ipv6h->hop_limit = tunnel->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; @@ -1182,7 +1182,8 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb, - t->fl.u.ip6.flowlabel, false)); + t->fl.u.ip6.flowlabel, false, + &t->fl.u.ip6)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 92b7cf0dc1f9..26ea47930740 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -207,7 +207,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel)); + np->autoflowlabel, fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -1649,7 +1649,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel)); + np->autoflowlabel, fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2e67b660118b..54e694c4af0e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1095,7 +1095,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), - ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); + ip6_make_flowlabel(net, skb, fl6->flowlabel, false, fl6)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; -- cgit v1.2.3 From 42240901f7c438636715b9cb6ed93f4441ffc091 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 31 Jul 2015 16:52:12 -0700 Subject: ipv6: Implement different admin modes for automatic flow labels Change the meaning of net.ipv6.auto_flowlabels to provide a mode for automatic flow labels generation. There are four modes: 0: flow labels are disabled 1: flow labels are enabled, sockets can opt-out 2: flow labels are allowed, sockets can opt-in 3: flow labels are enabled and enforced, no opt-out for sockets np->autoflowlabel is initialized according to the sysctl value. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 20 ++++++++---- include/net/ipv6.h | 59 ++++++++++++++++++++++++++-------- net/ipv6/af_inet6.c | 3 +- net/ipv6/ip6_gre.c | 4 +-- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/sysctl_net_ipv6.c | 7 +++- 6 files changed, 70 insertions(+), 25 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 00d26d919459..9ac3af3ab739 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1215,14 +1215,20 @@ flowlabel_consistency - BOOLEAN FALSE: disabled Default: TRUE -auto_flowlabels - BOOLEAN - Automatically generate flow labels based based on a flow hash - of the packet. This allows intermediate devices, such as routers, - to idenfify packet flows for mechanisms like Equal Cost Multipath +auto_flowlabels - INTEGER + Automatically generate flow labels based on a flow hash of the + packet. This allows intermediate devices, such as routers, to + identify packet flows for mechanisms like Equal Cost Multipath Routing (see RFC 6438). - TRUE: enabled - FALSE: disabled - Default: false + 0: automatic flow labels are completely disabled + 1: automatic flow labels are enabled by default, they can be + disabled on a per socket basis using the IPV6_AUTOFLOWLABEL + socket option + 2: automatic flow labels are allowed, they may be enabled on a + per socket basis using the IPV6_AUTOFLOWLABEL socket option + 3: automatic flow labels are enabled and enforced, they cannot + be disabled by the socket option + Default: 0 flowlabel_state_ranges - BOOLEAN Split the flow label number space into two ranges. 0-0x7FFFF is diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3e334b33ef3a..c02c1c03363a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -707,36 +707,69 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, } #if IS_ENABLED(CONFIG_IPV6) + +/* Sysctl settings for net ipv6.auto_flowlabels */ +#define IP6_AUTO_FLOW_LABEL_OFF 0 +#define IP6_AUTO_FLOW_LABEL_OPTOUT 1 +#define IP6_AUTO_FLOW_LABEL_OPTIN 2 +#define IP6_AUTO_FLOW_LABEL_FORCED 3 + +#define IP6_AUTO_FLOW_LABEL_MAX IP6_AUTO_FLOW_LABEL_FORCED + +#define IP6_DEFAULT_AUTO_FLOW_LABELS IP6_AUTO_FLOW_LABEL_OFF + static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, __be32 flowlabel, bool autolabel, struct flowi6 *fl6) { - if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) { - u32 hash; + u32 hash; - hash = skb_get_hash_flowi6(skb, fl6); + if (flowlabel || + net->ipv6.sysctl.auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF || + (!autolabel && + net->ipv6.sysctl.auto_flowlabels != IP6_AUTO_FLOW_LABEL_FORCED)) + return flowlabel; - /* Since this is being sent on the wire obfuscate hash a bit - * to minimize possbility that any useful information to an - * attacker is leaked. Only lower 20 bits are relevant. - */ - hash ^= hash >> 12; + hash = skb_get_hash_flowi6(skb, fl6); - flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; + /* Since this is being sent on the wire obfuscate hash a bit + * to minimize possbility that any useful information to an + * attacker is leaked. Only lower 20 bits are relevant. + */ + rol32(hash, 16); - if (net->ipv6.sysctl.flowlabel_state_ranges) - flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG; - } + flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; + + if (net->ipv6.sysctl.flowlabel_state_ranges) + flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG; return flowlabel; } + +static inline int ip6_default_np_autolabel(struct net *net) +{ + switch (net->ipv6.sysctl.auto_flowlabels) { + case IP6_AUTO_FLOW_LABEL_OFF: + case IP6_AUTO_FLOW_LABEL_OPTIN: + default: + return 0; + case IP6_AUTO_FLOW_LABEL_OPTOUT: + case IP6_AUTO_FLOW_LABEL_FORCED: + return 1; + } +} #else static inline void ip6_set_txhash(struct sock *sk) { } static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, - __be32 flowlabel, bool autolabel) + __be32 flowlabel, bool autolabel, + struct flowi6 *fl6) { return flowlabel; } +static inline int ip6_default_np_autolabel(struct net *net) +{ + return 0; +} #endif diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7bc92ea4ae8f..3f0ae3a7c0b1 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -197,6 +197,7 @@ lookup_protocol: np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; + np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk)); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets @@ -767,7 +768,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.flowlabel_consistency = 1; - net->ipv6.sysctl.auto_flowlabels = 0; + net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS; net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_delay = 1 * HZ; net->ipv6.sysctl.flowlabel_state_ranges = 1; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a7d1ca2337a9..34f121812a14 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -728,7 +728,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, */ ipv6h = ipv6_hdr(skb); ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), - ip6_make_flowlabel(net, skb, fl6->flowlabel, false, fl6)); + ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); ipv6h->hop_limit = tunnel->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; @@ -1182,7 +1182,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb, - t->fl.u.ip6.flowlabel, false, + t->fl.u.ip6.flowlabel, true, &t->fl.u.ip6)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 54e694c4af0e..b0ab420612bc 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1095,7 +1095,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), - ip6_make_flowlabel(net, skb, fl6->flowlabel, false, fl6)); + ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index db48aebd9c47..45243bbe5253 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -17,6 +17,9 @@ #include static int one = 1; +static int auto_flowlabels_min; +static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; + static struct ctl_table ipv6_table_template[] = { { @@ -45,7 +48,9 @@ static struct ctl_table ipv6_table_template[] = { .data = &init_net.ipv6.sysctl.auto_flowlabels, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &auto_flowlabels_min, + .extra2 = &auto_flowlabels_max }, { .procname = "fwmark_reflect", -- cgit v1.2.3 From be26849bfb6fcee4123c687fc39bd6da1b3be328 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 31 Jul 2015 16:52:13 -0700 Subject: ipv6: Disable flowlabel state ranges by default Per RFC6437 stateful flow labels (e.g. labels set by flow label manager) cannot "disturb" nodes taking part in stateless flow labels. While the ranges only reduce the flow label entropy by one bit, it is conceivable that this might bias the algorithm on some routers causing a load imbalance. For best results on the Internet we really need the full 20 bits. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3f0ae3a7c0b1..44bb66bde0e2 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -771,7 +771,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS; net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_delay = 1 * HZ; - net->ipv6.sysctl.flowlabel_state_ranges = 1; + net->ipv6.sysctl.flowlabel_state_ranges = 0; atomic_set(&net->ipv6.fib6_sernum, 1); err = ipv6_init_mibs(net); -- cgit v1.2.3 From b56774163f994efce3f5603f35aa4e677c3e725a Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 31 Jul 2015 16:52:14 -0700 Subject: ipv6: Enable auto flow labels by default Initialize auto_flowlabels to one. This enables automatic flow labels, individual socket may disable them using the IPV6_AUTOFLOWLABEL socket option. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 2 +- include/net/ipv6.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 9ac3af3ab739..56db1efd7189 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1228,7 +1228,7 @@ auto_flowlabels - INTEGER per socket basis using the IPV6_AUTOFLOWLABEL socket option 3: automatic flow labels are enabled and enforced, they cannot be disabled by the socket option - Default: 0 + Default: 1 flowlabel_state_ranges - BOOLEAN Split the flow label number space into two ranges. 0-0x7FFFF is diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c02c1c03363a..711cca428cc8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -716,7 +716,7 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, #define IP6_AUTO_FLOW_LABEL_MAX IP6_AUTO_FLOW_LABEL_FORCED -#define IP6_DEFAULT_AUTO_FLOW_LABELS IP6_AUTO_FLOW_LABEL_OFF +#define IP6_DEFAULT_AUTO_FLOW_LABELS IP6_AUTO_FLOW_LABEL_OPTOUT static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, __be32 flowlabel, bool autolabel, -- cgit v1.2.3