summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c63
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/fib6_rules.c3
-rw-r--r--net/ipv6/ila/ila_common.c1
-rw-r--r--net/ipv6/ila/ila_lwt.c2
-rw-r--r--net/ipv6/ila/ila_xlat.c2
-rw-r--r--net/ipv6/ip6_fib.c6
-rw-r--r--net/ipv6/ip6_gre.c14
-rw-r--r--net/ipv6/ip6_offload.c5
-rw-r--r--net/ipv6/ip6_output.c27
-rw-r--r--net/ipv6/ip6_tunnel.c188
-rw-r--r--net/ipv6/ip6_vti.c10
-rw-r--r--net/ipv6/mcast.c10
-rw-r--r--net/ipv6/ndisc.c11
-rw-r--r--net/ipv6/netfilter/ip6_tables.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c2
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c21
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c9
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c4
-rw-r--r--net/ipv6/output_core.c7
-rw-r--r--net/ipv6/proc.c30
-rw-r--r--net/ipv6/raw.c7
-rw-r--r--net/ipv6/route.c41
-rw-r--r--net/ipv6/sit.c12
-rw-r--r--net/ipv6/tcp_ipv6.c27
-rw-r--r--net/ipv6/udp.c13
-rw-r--r--net/ipv6/udp_impl.h2
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_policy.c2
30 files changed, 338 insertions, 188 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2f1f5d439788..87183983724d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -112,6 +112,27 @@ static inline u32 cstamp_delta(unsigned long cstamp)
return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
}
+static inline s32 rfc3315_s14_backoff_init(s32 irt)
+{
+ /* multiply 'initial retransmission time' by 0.9 .. 1.1 */
+ u64 tmp = (900000 + prandom_u32() % 200001) * (u64)irt;
+ do_div(tmp, 1000000);
+ return (s32)tmp;
+}
+
+static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt)
+{
+ /* multiply 'retransmission timeout' by 1.9 .. 2.1 */
+ u64 tmp = (1900000 + prandom_u32() % 200001) * (u64)rt;
+ do_div(tmp, 1000000);
+ if ((s32)tmp > mrt) {
+ /* multiply 'maximum retransmission time' by 0.9 .. 1.1 */
+ tmp = (900000 + prandom_u32() % 200001) * (u64)mrt;
+ do_div(tmp, 1000000);
+ }
+ return (s32)tmp;
+}
+
#ifdef CONFIG_SYSCTL
static int addrconf_sysctl_register(struct inet6_dev *idev);
static void addrconf_sysctl_unregister(struct inet6_dev *idev);
@@ -187,6 +208,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.dad_transmits = 1,
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
+ .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL,
.rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
@@ -232,6 +254,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.dad_transmits = 1,
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
+ .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL,
.rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
@@ -3687,7 +3710,7 @@ static void addrconf_rs_timer(unsigned long data)
if (idev->if_flags & IF_RA_RCVD)
goto out;
- if (idev->rs_probes++ < idev->cnf.rtr_solicits) {
+ if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) {
write_unlock(&idev->lock);
if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
ndisc_send_rs(dev, &lladdr,
@@ -3696,11 +3719,13 @@ static void addrconf_rs_timer(unsigned long data)
goto put;
write_lock(&idev->lock);
+ idev->rs_interval = rfc3315_s14_backoff_update(
+ idev->rs_interval, idev->cnf.rtr_solicit_max_interval);
/* The wait after the last probe can be shorter */
addrconf_mod_rs_timer(idev, (idev->rs_probes ==
idev->cnf.rtr_solicits) ?
idev->cnf.rtr_solicit_delay :
- idev->cnf.rtr_solicit_interval);
+ idev->rs_interval);
} else {
/*
* Note: we do not support deprecated "all on-link"
@@ -3949,7 +3974,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp);
send_rs = send_mld &&
ipv6_accept_ra(ifp->idev) &&
- ifp->idev->cnf.rtr_solicits > 0 &&
+ ifp->idev->cnf.rtr_solicits != 0 &&
(dev->flags&IFF_LOOPBACK) == 0;
read_unlock_bh(&ifp->idev->lock);
@@ -3971,10 +3996,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
write_lock_bh(&ifp->idev->lock);
spin_lock(&ifp->lock);
+ ifp->idev->rs_interval = rfc3315_s14_backoff_init(
+ ifp->idev->cnf.rtr_solicit_interval);
ifp->idev->rs_probes = 1;
ifp->idev->if_flags |= IF_RS_SENT;
- addrconf_mod_rs_timer(ifp->idev,
- ifp->idev->cnf.rtr_solicit_interval);
+ addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval);
spin_unlock(&ifp->lock);
write_unlock_bh(&ifp->idev->lock);
}
@@ -4891,6 +4917,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
array[DEVCONF_RTR_SOLICIT_INTERVAL] =
jiffies_to_msecs(cnf->rtr_solicit_interval);
+ array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] =
+ jiffies_to_msecs(cnf->rtr_solicit_max_interval);
array[DEVCONF_RTR_SOLICIT_DELAY] =
jiffies_to_msecs(cnf->rtr_solicit_delay);
array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
@@ -4961,18 +4989,18 @@ static inline size_t inet6_if_nlmsg_size(void)
}
static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
- int items, int bytes)
+ int bytes)
{
int i;
- int pad = bytes - sizeof(u64) * items;
+ int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX;
BUG_ON(pad < 0);
/* Use put_unaligned() because stats may not be aligned for u64. */
- put_unaligned(items, &stats[0]);
- for (i = 1; i < items; i++)
+ put_unaligned(ICMP6_MIB_MAX, &stats[0]);
+ for (i = 1; i < ICMP6_MIB_MAX; i++)
put_unaligned(atomic_long_read(&mib[i]), &stats[i]);
- memset(&stats[items], 0, pad);
+ memset(&stats[ICMP6_MIB_MAX], 0, pad);
}
static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
@@ -5005,7 +5033,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
offsetof(struct ipstats_mib, syncp));
break;
case IFLA_INET6_ICMP6STATS:
- __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes);
+ __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes);
break;
}
}
@@ -5099,7 +5127,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
return -EINVAL;
if (!ipv6_accept_ra(idev))
return -EINVAL;
- if (idev->cnf.rtr_solicits <= 0)
+ if (idev->cnf.rtr_solicits == 0)
return -EINVAL;
write_lock_bh(&idev->lock);
@@ -5128,8 +5156,10 @@ update_lft:
if (update_rs) {
idev->if_flags |= IF_RS_SENT;
+ idev->rs_interval = rfc3315_s14_backoff_init(
+ idev->cnf.rtr_solicit_interval);
idev->rs_probes = 1;
- addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval);
+ addrconf_mod_rs_timer(idev, idev->rs_interval);
}
/* Well, that's kinda nasty ... */
@@ -5778,6 +5808,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec_jiffies,
},
{
+ .procname = "router_solicitation_max_interval",
+ .data = &ipv6_devconf.rtr_solicit_max_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
.procname = "router_solicitation_delay",
.data = &ipv6_devconf.rtr_solicit_delay,
.maxlen = sizeof(int),
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b454055ba625..46ad699937fd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -545,6 +545,8 @@ const struct proto_ops inet6_stream_ops = {
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
.splice_read = tcp_splice_read,
+ .read_sock = tcp_read_sock,
+ .peek_len = tcp_peek_len,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 5857c1fc8b67..eea23b57c6a5 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -38,6 +38,9 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
.flags = FIB_LOOKUP_NOREF,
};
+ /* update flow if oif or iif point to device enslaved to l3mdev */
+ l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+
fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index ec9efbcdad35..aba0998ddbfb 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -172,6 +172,5 @@ static void __exit ila_fini(void)
module_init(ila_init);
module_exit(ila_fini);
-MODULE_ALIAS_RTNL_LWT(ILA);
MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index c8314c6b6154..e50c27a93e17 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -51,7 +51,7 @@ drop:
return -EINVAL;
}
-static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
+static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
};
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index e6eca5fdf4c9..e604013dd814 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -128,7 +128,7 @@ static struct genl_family ila_nl_family = {
.parallel_ops = true,
};
-static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
+static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
[ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
[ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 771be1fa4176..ef5485204522 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -743,6 +743,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ u16 nlflags = NLM_F_EXCL;
int err;
ins = &fn->leaf;
@@ -759,6 +760,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
if (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_EXCL))
return -EEXIST;
+
+ nlflags &= ~NLM_F_EXCL;
if (replace) {
if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
found++;
@@ -856,6 +859,7 @@ next_iter:
pr_warn("NLM_F_CREATE should be set when creating new route\n");
add:
+ nlflags |= NLM_F_CREATE;
err = fib6_commit_metrics(&rt->dst, mxc);
if (err)
return err;
@@ -864,7 +868,7 @@ add:
*ins = rt;
rt->rt6i_node = fn;
atomic_inc(&rt->rt6i_ref);
- inet6_rt_notify(RTM_NEWROUTE, rt, info, 0);
+ inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
if (!(fn->fn_flags & RTN_RTINFO)) {
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index edc3daab354e..d7d6d3ae0b3b 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -61,12 +61,12 @@ static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
-#define HASH_SIZE_SHIFT 5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_GRE_HASH_SIZE_SHIFT 5
+#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
static int ip6gre_net_id __read_mostly;
struct ip6gre_net {
- struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+ struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
struct net_device *fb_tunnel_dev;
};
@@ -96,12 +96,12 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
will match fallback tunnel.
*/
-#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1))
static u32 HASH_ADDR(const struct in6_addr *addr)
{
u32 hash = ipv6_addr_hash(addr);
- return hash_32(hash, HASH_SIZE_SHIFT);
+ return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT);
}
#define tunnels_r_l tunnels[3]
@@ -1086,7 +1086,7 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
for (prio = 0; prio < 4; prio++) {
int h;
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < IP6_GRE_HASH_SIZE; h++) {
struct ip6_tnl *t;
t = rtnl_dereference(ign->tunnels[prio][h]);
@@ -1238,7 +1238,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
if (data[IFLA_GRE_FLOWINFO])
- parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
+ parms->flowinfo = nla_get_be32(data[IFLA_GRE_FLOWINFO]);
if (data[IFLA_GRE_FLAGS])
parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 22e90e56b5a9..e7bfd55899a3 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -69,6 +69,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
int offset = 0;
bool encap, udpfrag;
int nhoff;
+ bool gso_partial;
skb_reset_network_header(skb);
nhoff = skb_network_header(skb) - skb_mac_header(skb);
@@ -101,9 +102,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
if (IS_ERR(segs))
goto out;
+ gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL);
+
for (skb = segs; skb; skb = skb->next) {
ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
- if (skb_is_gso(skb))
+ if (gso_partial)
payload_len = skb_shinfo(skb)->gso_size +
SKB_GSO_CB(skb)->data_offset +
skb->head - (unsigned char *)(ipv6h + 1);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1dfc402d9ad1..6001e781164e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,6 +56,7 @@
#include <net/checksum.h>
#include <linux/mroute6.h>
#include <net/l3mdev.h>
+#include <net/lwtunnel.h>
static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
@@ -104,6 +105,13 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
}
+ if (lwtunnel_xmit_redirect(dst->lwtstate)) {
+ int res = lwtunnel_xmit(skb);
+
+ if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+ return res;
+ }
+
rcu_read_lock_bh();
nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
@@ -228,6 +236,14 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUT, skb->len);
+
+ /* if egress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+ skb = l3mdev_ip6_out((struct sock *)sk, skb);
+ if (unlikely(!skb))
+ return 0;
+
/* hooks should never assume socket lock is held.
* we promote our socket to non const
*/
@@ -910,13 +926,6 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
int err;
int flags = 0;
- if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif &&
- (!*dst || !(*dst)->error)) {
- err = l3mdev_get_saddr6(net, sk, fl6);
- if (err)
- goto out_err;
- }
-
/* The correct way to handle this would be to do
* ip6_route_get_saddr, and then ip6_route_output; however,
* the route-specific preferred source forces the
@@ -1008,7 +1017,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
out_err_release:
dst_release(*dst);
*dst = NULL;
-out_err:
+
if (err == -ENETUNREACH)
IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
return err;
@@ -1054,8 +1063,6 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
return ERR_PTR(err);
if (final_dst)
fl6->daddr = *final_dst;
- if (!fl6->flowi6_oif)
- fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 888543debe4e..6a66adba0c22 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -57,6 +57,7 @@
#include <net/inet_ecn.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/dst_metadata.h>
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
@@ -64,8 +65,8 @@ MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("ip6tnl");
MODULE_ALIAS_NETDEV("ip6tnl0");
-#define HASH_SIZE_SHIFT 5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_TUNNEL_HASH_SIZE_SHIFT 5
+#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT)
static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
@@ -75,7 +76,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
{
u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
- return hash_32(hash, HASH_SIZE_SHIFT);
+ return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
}
static int ip6_tnl_dev_init(struct net_device *dev);
@@ -87,9 +88,10 @@ struct ip6_tnl_net {
/* the IPv6 tunnel fallback device */
struct net_device *fb_tnl_dev;
/* lists for storing tunnels in use */
- struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+ struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
struct ip6_tnl __rcu *tnls_wc[1];
struct ip6_tnl __rcu **tnls[2];
+ struct ip6_tnl __rcu *collect_md_tun;
};
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
@@ -166,6 +168,10 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
return t;
}
+ t = rcu_dereference(ip6n->collect_md_tun);
+ if (t)
+ return t;
+
t = rcu_dereference(ip6n->tnls_wc[0]);
if (t && (t->dev->flags & IFF_UP))
return t;
@@ -209,6 +215,8 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
{
struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ip6n->collect_md_tun, t);
rcu_assign_pointer(t->next , rtnl_dereference(*tp));
rcu_assign_pointer(*tp, t);
}
@@ -224,6 +232,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
struct ip6_tnl __rcu **tp;
struct ip6_tnl *iter;
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ip6n->collect_md_tun, NULL);
+
for (tp = ip6_tnl_bucket(ip6n, &t->parms);
(iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) {
@@ -829,6 +840,9 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
+ if (tun_dst)
+ skb_dst_set(skb, (struct dst_entry *)tun_dst);
+
gro_cells_receive(&tunnel->gro_cells, skb);
return 0;
@@ -865,6 +879,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
{
struct ip6_tnl *t;
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct metadata_dst *tun_dst = NULL;
int ret = -1;
rcu_read_lock();
@@ -881,7 +896,12 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
goto drop;
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
- ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate,
+ if (t->parms.collect_md) {
+ tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
+ if (!tun_dst)
+ return 0;
+ }
+ ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
log_ecn_error);
}
@@ -1012,8 +1032,16 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
int mtu;
unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
unsigned int max_headroom = psh_hlen;
+ u8 hop_limit;
int err = -1;
+ if (t->parms.collect_md) {
+ hop_limit = skb_tunnel_info(skb)->key.ttl;
+ goto route_lookup;
+ } else {
+ hop_limit = t->parms.hop_limit;
+ }
+
/* NBMA tunnel */
if (ipv6_addr_any(&t->parms.raddr)) {
struct in6_addr *addr6;
@@ -1043,6 +1071,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
goto tx_err_link_failure;
if (!dst) {
+route_lookup:
dst = ip6_route_output(net, NULL, fl6);
if (dst->error)
@@ -1053,6 +1082,10 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
dst = NULL;
goto tx_err_link_failure;
}
+ if (t->parms.collect_md &&
+ ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
+ &fl6->daddr, 0, &fl6->saddr))
+ goto tx_err_link_failure;
ndst = dst;
}
@@ -1071,7 +1104,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
}
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if (skb_dst(skb))
+ if (skb_dst(skb) && !t->parms.collect_md)
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->len > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
@@ -1111,8 +1144,13 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
skb = new_skb;
}
- if (!fl6->flowi6_mark && ndst)
- dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
+ if (t->parms.collect_md) {
+ if (t->encap.type != TUNNEL_ENCAP_NONE)
+ goto tx_err_dst_release;
+ } else {
+ if (!fl6->flowi6_mark && ndst)
+ dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
+ }
skb_dst_set(skb, dst);
if (encap_limit >= 0) {
@@ -1137,7 +1175,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
ipv6h = ipv6_hdr(skb);
ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
- ipv6h->hop_limit = t->parms.hop_limit;
+ ipv6h->hop_limit = hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
ipv6h->daddr = fl6->daddr;
@@ -1170,19 +1208,34 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (tproto != IPPROTO_IPIP && tproto != 0)
return -1;
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
+ dsfield = ipv4_get_dsfield(iph);
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPIP;
+ if (t->parms.collect_md) {
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
- dsfield = ipv4_get_dsfield(iph);
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET6))
+ return -1;
+ key = &tun_info->key;
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPIP;
+ fl6.daddr = key->u.ipv6.dst;
+ fl6.flowlabel = key->label;
+ } else {
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
- & IPV6_TCLASS_MASK;
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6.flowi6_mark = skb->mark;
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPIP;
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+ & IPV6_TCLASS_MASK;
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
+ }
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
@@ -1220,29 +1273,47 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
ip6_tnl_addr_conflict(t, ipv6h))
return -1;
- offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
- if (offset > 0) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
- if (tel->encap_limit == 0) {
- icmpv6_send(skb, ICMPV6_PARAMPROB,
- ICMPV6_HDR_FIELD, offset + 2);
+ dsfield = ipv6_get_dsfield(ipv6h);
+
+ if (t->parms.collect_md) {
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET6))
return -1;
+ key = &tun_info->key;
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPV6;
+ fl6.daddr = key->u.ipv6.dst;
+ fl6.flowlabel = key->label;
+ } else {
+ offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ if (offset > 0) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+
+ tel = (void *)&skb_network_header(skb)[offset];
+ if (tel->encap_limit == 0) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2);
+ return -1;
+ }
+ encap_limit = tel->encap_limit - 1;
+ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+ encap_limit = t->parms.encap_limit;
}
- encap_limit = tel->encap_limit - 1;
- } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPV6;
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPV6;
- dsfield = ipv6_get_dsfield(ipv6h);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
- fl6.flowlabel |= ip6_flowlabel(ipv6h);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6.flowi6_mark = skb->mark;
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+ fl6.flowlabel |= ip6_flowlabel(ipv6h);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
+ }
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
@@ -1741,6 +1812,10 @@ static int ip6_tnl_dev_init(struct net_device *dev)
if (err)
return err;
ip6_tnl_link_config(t);
+ if (t->parms.collect_md) {
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ netif_keep_dst(dev);
+ }
return 0;
}
@@ -1811,6 +1886,9 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_PROTO])
parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
+ if (data[IFLA_IPTUN_COLLECT_METADATA])
+ parms->collect_md = true;
}
static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
@@ -1850,6 +1928,7 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct ip6_tnl *nt, *t;
struct ip_tunnel_encap ipencap;
@@ -1864,9 +1943,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
ip6_tnl_netlink_parms(data, &nt->parms);
- t = ip6_tnl_locate(net, &nt->parms, 0);
- if (!IS_ERR(t))
- return -EEXIST;
+ if (nt->parms.collect_md) {
+ if (rtnl_dereference(ip6n->collect_md_tun))
+ return -EEXIST;
+ } else {
+ t = ip6_tnl_locate(net, &nt->parms, 0);
+ if (!IS_ERR(t))
+ return -EEXIST;
+ }
return ip6_tnl_create2(dev);
}
@@ -1890,6 +1974,8 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
return err;
}
ip6_tnl_netlink_parms(data, &p);
+ if (p.collect_md)
+ return -EINVAL;
t = ip6_tnl_locate(net, &p, 0);
if (!IS_ERR(t)) {
@@ -1937,6 +2023,8 @@ static size_t ip6_tnl_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_IPTUN_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_IPTUN_COLLECT_METADATA */
+ nla_total_size(0) +
0;
}
@@ -1955,16 +2043,15 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
goto nla_put_failure;
- if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
- tunnel->encap.type) ||
- nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
- tunnel->encap.sport) ||
- nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
- tunnel->encap.dport) ||
- nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
- tunnel->encap.flags))
+ if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) ||
+ nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) ||
+ nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags))
goto nla_put_failure;
+ if (parm->collect_md)
+ if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -1992,6 +2079,7 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
};
static struct rtnl_link_ops ip6_link_ops __read_mostly = {
@@ -2033,7 +2121,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
if (dev->rtnl_link_ops == &ip6_link_ops)
unregister_netdevice_queue(dev, &list);
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
while (t) {
/* If dev is in the same netns, it has already
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 5bd3afdcc771..8a02ca8a11af 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -50,14 +50,14 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
-#define HASH_SIZE_SHIFT 5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_VTI_HASH_SIZE_SHIFT 5
+#define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT)
static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
{
u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
- return hash_32(hash, HASH_SIZE_SHIFT);
+ return hash_32(hash, IP6_VTI_HASH_SIZE_SHIFT);
}
static int vti6_dev_init(struct net_device *dev);
@@ -69,7 +69,7 @@ struct vti6_net {
/* the vti6 tunnel fallback device */
struct net_device *fb_tnl_dev;
/* lists for storing tunnels in use */
- struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+ struct ip6_tnl __rcu *tnls_r_l[IP6_VTI_HASH_SIZE];
struct ip6_tnl __rcu *tnls_wc[1];
struct ip6_tnl __rcu **tnls[2];
};
@@ -1051,7 +1051,7 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
struct ip6_tnl *t;
LIST_HEAD(list);
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
while (t) {
unregister_netdevice_queue(t->dev, &list);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d64ee7e83664..75c1fc54f188 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1739,6 +1739,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
continue;
}
+ /* Based on RFC3810 6.1. Should not send source-list change
+ * records when there is a filter mode change.
+ */
+ if (((gdeleted && pmc->mca_sfmode == MCAST_EXCLUDE) ||
+ (!gdeleted && pmc->mca_crcount)) &&
+ (type == MLD2_ALLOW_NEW_SOURCES ||
+ type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount)
+ goto decrease_sf_crcount;
+
/* clear marks on query responses */
if (isquery)
psf->sf_gsresp = 0;
@@ -1766,6 +1775,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
scount++; stotal++;
if ((type == MLD2_ALLOW_NEW_SOURCES ||
type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+decrease_sf_crcount:
psf->sf_crcount--;
if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
if (psf_prev)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index fe65cdc28a45..d8e671457d10 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -67,7 +67,6 @@
#include <net/flow.h>
#include <net/ip6_checksum.h>
#include <net/inet_common.h>
-#include <net/l3mdev.h>
#include <linux/proc_fs.h>
#include <linux/netfilter.h>
@@ -457,11 +456,9 @@ static void ndisc_send_skb(struct sk_buff *skb,
if (!dst) {
struct flowi6 fl6;
- int oif = l3mdev_fib_oif(skb->dev);
+ int oif = skb->dev->ifindex;
icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
- if (oif != skb->dev->ifindex)
- fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
dst = icmp6_dst_alloc(skb->dev, &fl6);
if (IS_ERR(dst)) {
kfree_skb(skb);
@@ -1538,7 +1535,6 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
int rd_len;
u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
- int oif = l3mdev_fib_oif(dev);
bool ret;
if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
@@ -1555,10 +1551,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
}
icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
- &saddr_buf, &ipv6_hdr(skb)->saddr, oif);
-
- if (oif != skb->dev->ifindex)
- fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
+ &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 552fac2f390a..55aacea24396 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -190,7 +190,7 @@ static struct nf_loginfo trace_loginfo = {
.u = {
.log = {
.level = LOGLEVEL_WARNING,
- .logflags = NF_LOG_MASK,
+ .logflags = NF_LOG_DEFAULT_MASK,
},
},
};
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 1aa5848764a7..963ee3848675 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -115,7 +115,7 @@ static unsigned int ipv6_helper(void *priv,
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 660bc10c7a9c..f5a61bc3ec2b 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -165,7 +165,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
return -NF_ACCEPT;
}
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
/* Ordinarily, we'd expect the inverted tupleproto, but it's
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index 8dd869642f45..57d86066a13b 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = {
.u = {
.log = {
.level = LOGLEVEL_NOTICE,
- .logflags = NF_LOG_MASK,
+ .logflags = NF_LOG_DEFAULT_MASK,
},
},
};
@@ -52,7 +52,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
if (info->type == NF_LOG_TYPE_LOG)
logflags = info->u.log.logflags;
else
- logflags = NF_LOG_MASK;
+ logflags = NF_LOG_DEFAULT_MASK;
ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
if (ih == NULL) {
@@ -84,7 +84,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
}
/* Max length: 48 "OPT (...) " */
- if (logflags & XT_LOG_IPOPT)
+ if (logflags & NF_LOG_IPOPT)
nf_log_buf_add(m, "OPT ( ");
switch (currenthdr) {
@@ -121,7 +121,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
case IPPROTO_ROUTING:
case IPPROTO_HOPOPTS:
if (fragment) {
- if (logflags & XT_LOG_IPOPT)
+ if (logflags & NF_LOG_IPOPT)
nf_log_buf_add(m, ")");
return;
}
@@ -129,7 +129,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
break;
/* Max Length */
case IPPROTO_AH:
- if (logflags & XT_LOG_IPOPT) {
+ if (logflags & NF_LOG_IPOPT) {
struct ip_auth_hdr _ahdr;
const struct ip_auth_hdr *ah;
@@ -161,7 +161,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
hdrlen = (hp->hdrlen+2)<<2;
break;
case IPPROTO_ESP:
- if (logflags & XT_LOG_IPOPT) {
+ if (logflags & NF_LOG_IPOPT) {
struct ip_esp_hdr _esph;
const struct ip_esp_hdr *eh;
@@ -194,7 +194,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr);
return;
}
- if (logflags & XT_LOG_IPOPT)
+ if (logflags & NF_LOG_IPOPT)
nf_log_buf_add(m, ") ");
currenthdr = hp->nexthdr;
@@ -277,7 +277,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
}
/* Max length: 15 "UID=4294967295 " */
- if ((logflags & XT_LOG_UID) && recurse)
+ if ((logflags & NF_LOG_UID) && recurse)
nf_log_dump_sk_uid_gid(m, skb->sk);
/* Max length: 16 "MARK=0xFFFFFFFF " */
@@ -295,7 +295,7 @@ static void dump_ipv6_mac_header(struct nf_log_buf *m,
if (info->type == NF_LOG_TYPE_LOG)
logflags = info->u.log.logflags;
- if (!(logflags & XT_LOG_MACDECODE))
+ if (!(logflags & NF_LOG_MACDECODE))
goto fallback;
switch (dev->type) {
@@ -379,8 +379,7 @@ static struct nf_logger nf_ip6_logger __read_mostly = {
static int __net_init nf_log_ipv6_net_init(struct net *net)
{
- nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
- return 0;
+ return nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
}
static void __net_exit nf_log_ipv6_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index 30b22f4dff55..d6e4ba5de916 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -22,9 +22,7 @@ static unsigned int nft_do_chain_ipv6(void *priv,
{
struct nft_pktinfo pkt;
- /* malformed packet, drop it */
- if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
- return NF_DROP;
+ nft_set_pktinfo_ipv6(&pkt, skb, state);
return nft_do_chain(&pkt, priv);
}
@@ -102,7 +100,10 @@ static int __init nf_tables_ipv6_init(void)
{
int ret;
- nft_register_chain_type(&filter_ipv6);
+ ret = nft_register_chain_type(&filter_ipv6);
+ if (ret < 0)
+ return ret;
+
ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
if (ret < 0)
nft_unregister_chain_type(&filter_ipv6);
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 2535223ba956..f2727475895e 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -33,9 +33,7 @@ static unsigned int nf_route_table_hook(void *priv,
u32 mark, flowlabel;
int err;
- /* malformed packet, drop it */
- if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
- return NF_DROP;
+ nft_set_pktinfo_ipv6(&pkt, skb, state);
/* save source/dest address, mark, hoplimit, flowlabel, priority */
memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 462f2a76b5c2..7cca8ac66fe9 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -148,6 +148,13 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(len);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+ /* if egress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+ skb = l3mdev_ip6_out(sk, skb);
+ if (unlikely(!skb))
+ return 0;
+
return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, skb_dst(skb)->dev,
dst_output);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 679253d0af84..cc8e3ae9ca73 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -30,6 +30,11 @@
#include <net/transp_v6.h>
#include <net/ipv6.h>
+#define MAX4(a, b, c, d) \
+ max_t(u32, max_t(u32, a, b), max_t(u32, c, d))
+#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \
+ IPSTATS_MIB_MAX, ICMP_MIB_MAX)
+
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
@@ -191,25 +196,34 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib,
atomic_long_t *smib,
const struct snmp_mib *itemlist)
{
+ unsigned long buff[SNMP_MIB_MAX];
int i;
- unsigned long val;
- for (i = 0; itemlist[i].name; i++) {
- val = pcpumib ?
- snmp_fold_field(pcpumib, itemlist[i].entry) :
- atomic_long_read(smib + itemlist[i].entry);
- seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val);
+ if (pcpumib) {
+ memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+
+ snmp_get_cpu_field_batch(buff, itemlist, pcpumib);
+ for (i = 0; itemlist[i].name; i++)
+ seq_printf(seq, "%-32s\t%lu\n",
+ itemlist[i].name, buff[i]);
+ } else {
+ for (i = 0; itemlist[i].name; i++)
+ seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
+ atomic_long_read(smib + itemlist[i].entry));
}
}
static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
const struct snmp_mib *itemlist, size_t syncpoff)
{
+ u64 buff64[SNMP_MIB_MAX];
int i;
+ memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+
+ snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff);
for (i = 0; itemlist[i].name; i++)
- seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name,
- snmp_fold_field64(mib, itemlist[i].entry, syncpoff));
+ seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]);
}
static int snmp6_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 590dd1f7746f..54404f08efcc 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -653,6 +653,13 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
if (err)
goto error_fault;
+ /* if egress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+ skb = l3mdev_ip6_out(sk, skb);
+ if (unlikely(!skb))
+ return 0;
+
IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, rt->dst.dev, dst_output);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 269218aacbea..bdbc38e8bf29 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1147,15 +1147,16 @@ static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *
return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
}
-static struct dst_entry *ip6_route_input_lookup(struct net *net,
- struct net_device *dev,
- struct flowi6 *fl6, int flags)
+struct dst_entry *ip6_route_input_lookup(struct net *net,
+ struct net_device *dev,
+ struct flowi6 *fl6, int flags)
{
if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
flags |= RT6_LOOKUP_F_IFACE;
return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
}
+EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
void ip6_route_input(struct sk_buff *skb)
{
@@ -1164,7 +1165,7 @@ void ip6_route_input(struct sk_buff *skb)
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct ip_tunnel_info *tun_info;
struct flowi6 fl6 = {
- .flowi6_iif = l3mdev_fib_oif(skb->dev),
+ .flowi6_iif = skb->dev->ifindex,
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowlabel = ip6_flowinfo(iph),
@@ -1188,12 +1189,15 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct flowi6 *fl6, int flags)
{
- struct dst_entry *dst;
bool any_src;
- dst = l3mdev_get_rt6_dst(net, fl6);
- if (dst)
- return dst;
+ if (rt6_need_strict(&fl6->daddr)) {
+ struct dst_entry *dst;
+
+ dst = l3mdev_link_scope_lookup(net, fl6);
+ if (dst)
+ return dst;
+ }
fl6->flowi6_iif = LOOPBACK_IFINDEX;
@@ -1604,7 +1608,9 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
rcu_read_unlock();
out:
- return min_t(unsigned int, mtu, IP6_MAX_MTU);
+ mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+
+ return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
static struct dst_entry *icmp6_dst_gc_list;
@@ -2565,8 +2571,16 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
{
u32 tb_id;
struct net *net = dev_net(idev->dev);
- struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
- DST_NOCOUNT);
+ struct net_device *dev = net->loopback_dev;
+ struct rt6_info *rt;
+
+ /* use L3 Master device as loopback for host routes if device
+ * is enslaved and address is not link local or multicast
+ */
+ if (!rt6_need_strict(addr))
+ dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
+
+ rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
if (!rt)
return ERR_PTR(-ENOMEM);
@@ -3347,11 +3361,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
} else {
fl6.flowi6_oif = oif;
- if (netif_index_is_l3_master(net, oif)) {
- fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
- FLOWI_FLAG_SKIP_NH_OIF;
- }
-
rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 182b6a9be29d..b1cdf8009d29 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -62,7 +62,7 @@
For comments look at net/ipv4/ip_gre.c --ANK
*/
-#define HASH_SIZE 16
+#define IP6_SIT_HASH_SIZE 16
#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
static bool log_ecn_error = true;
@@ -78,9 +78,9 @@ static struct rtnl_link_ops sit_link_ops __read_mostly;
static int sit_net_id __read_mostly;
struct sit_net {
- struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
- struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
- struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE];
struct ip_tunnel __rcu *tunnels_wc[1];
struct ip_tunnel __rcu **tunnels[4];
@@ -1126,7 +1126,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
}
#endif
-bool ipip6_valid_ip_proto(u8 ipproto)
+static bool ipip6_valid_ip_proto(u8 ipproto)
{
return ipproto == IPPROTO_IPV6 ||
ipproto == IPPROTO_IPIP ||
@@ -1783,7 +1783,7 @@ static void __net_exit sit_destroy_tunnels(struct net *net,
for (prio = 1; prio < 4; prio++) {
int h;
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < IP6_SIT_HASH_SIZE; h++) {
struct ip_tunnel *t;
t = rtnl_dereference(sitn->tunnels[prio][h]);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 94f4f89d73e7..54cf7197c7ab 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -671,6 +671,7 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
genhash ? "failed" : "mismatch",
&ip6h->saddr, ntohs(th->source),
@@ -817,12 +818,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
fl6.flowi6_proto = IPPROTO_TCP;
if (rt6_need_strict(&fl6.daddr) && !oif)
fl6.flowi6_oif = tcp_v6_iif(skb);
- else {
- if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
- oif = skb->skb_iif;
-
- fl6.flowi6_oif = oif;
- }
+ else
+ fl6.flowi6_oif = oif ? : skb->skb_iif;
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
fl6.fl6_dport = t1->dest;
@@ -1415,6 +1412,7 @@ process:
sk = req->rsk_listener;
tcp_v6_fill_cb(skb, hdr, th);
if (tcp_v6_inbound_md5_hash(sk, skb)) {
+ sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
}
@@ -1471,10 +1469,7 @@ process:
if (!sock_owned_by_user(sk)) {
if (!tcp_prequeue(sk, skb))
ret = tcp_v6_do_rcv(sk, skb);
- } else if (unlikely(sk_add_backlog(sk, skb,
- sk->sk_rcvbuf + sk->sk_sndbuf))) {
- bh_unlock_sock(sk);
- __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
+ } else if (tcp_add_backlog(sk, skb)) {
goto discard_and_relse;
}
bh_unlock_sock(sk);
@@ -1868,17 +1863,6 @@ void tcp6_proc_exit(struct net *net)
}
#endif
-static void tcp_v6_clear_sk(struct sock *sk, int size)
-{
- struct inet_sock *inet = inet_sk(sk);
-
- /* we do not want to clear pinet6 field, because of RCU lookups */
- sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
-
- size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
- memset(&inet->pinet6 + 1, 0, size);
-}
-
struct proto tcpv6_prot = {
.name = "TCPv6",
.owner = THIS_MODULE,
@@ -1920,7 +1904,6 @@ struct proto tcpv6_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
- .clear_sk = tcp_v6_clear_sk,
.diag_destroy = tcp_abort,
};
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 19ac3a1c308d..9aa7c1c7a9ce 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1424,17 +1424,6 @@ void udp6_proc_exit(struct net *net)
}
#endif /* CONFIG_PROC_FS */
-void udp_v6_clear_sk(struct sock *sk, int size)
-{
- struct inet_sock *inet = inet_sk(sk);
-
- /* we do not want to clear pinet6 field, because of RCU lookups */
- sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6));
-
- size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
- memset(&inet->pinet6 + 1, 0, size);
-}
-
/* ------------------------------------------------------------------------ */
struct proto udpv6_prot = {
@@ -1465,7 +1454,7 @@ struct proto udpv6_prot = {
.compat_setsockopt = compat_udpv6_setsockopt,
.compat_getsockopt = compat_udpv6_getsockopt,
#endif
- .clear_sk = udp_v6_clear_sk,
+ .diag_destroy = udp_abort,
};
static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 0682c031ccdc..f6eb1ab34f4b 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -29,8 +29,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
void udpv6_destroy_sock(struct sock *sk);
-void udp_v6_clear_sk(struct sock *sk, int size);
-
#ifdef CONFIG_PROC_FS
int udp6_seq_show(struct seq_file *seq, void *v);
#endif
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index fd6ef414899b..47d0d2b87106 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -55,7 +55,6 @@ struct proto udplitev6_prot = {
.compat_setsockopt = compat_udpv6_setsockopt,
.compat_getsockopt = compat_udpv6_getsockopt,
#endif
- .clear_sk = udp_v6_clear_sk,
};
static struct inet_protosw udplite6_protosw = {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 70a86adad875..e0f71c01d728 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -134,7 +134,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
nexthdr = nh[nhoff];
if (skb_dst(skb))
- oif = l3mdev_fib_oif(skb_dst(skb)->dev);
+ oif = skb_dst(skb)->dev->ifindex;
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;