diff options
author | David S. Miller <davem@davemloft.net> | 2013-08-15 01:01:27 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-08-15 01:01:27 -0700 |
commit | f36f092065415b75b6be8377ae648c7114e7f099 (patch) | |
tree | 1de2d9f01b9947e87705c7d00082fa474e517c5f | |
parent | fc4eba58b4c1462ff3d6247b66fb47d6928db6d2 (diff) | |
parent | 0bd8762824e73a3cce7b7560a97463301764b616 (diff) |
Merge branch 'x-netns'
Nicolas Dichtel says:
====================
This series is a follow up of the previous series whcih adds this
functionality for sit tunnels.
The goal is to add x-netns support for the module ipip and ip6_tunnel,
ie. the encapsulation addresses and the network device are not owned
by the same namespace.
Note that the two first patches are cleanups.
Example to configure an ipip tunnel:
modprobe ipip
ip netns add netns1
ip link add ipip1 type ipip remote 10.16.0.121 local 10.16.0.249
ip l s ipip1 netns netns1
ip netns exec netns1 ip l s lo up
ip netns exec netns1 ip l s ipip1 up
ip netns exec netns1 ip a a dev ipip1 192.168.2.123 remote 192.168.2.121
or an ip6_tunnel:
modprobe ip6_tunnel
ip netns add netns1
ip link add ip6tnl1 type ip6tnl remote 2001:660:3008:c1c3::121 local 2001:660:3008:c1c3::123
ip l s ip6tnl1 netns netns1
ip netns exec netns1 ip l s lo up
ip netns exec netns1 ip l s ip6tnl1 up
ip netns exec netns1 ip a a dev ip6tnl1 192.168.1.123 remote 192.168.1.121
ip netns exec netns1 ip -6 a a dev ip6tnl1 2001:1235::123 remote 2001:1235::121
v2: remove the patch 1/3 of the v1 series (already included)
use net_eq()
add patch 1/4 and 2/4
====================-
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/ip6_tunnel.h | 1 | ||||
-rw-r--r-- | include/net/ip_tunnels.h | 2 | ||||
-rw-r--r-- | net/core/dev.c | 6 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 52 | ||||
-rw-r--r-- | net/ipv4/ip_vti.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 3 | ||||
-rw-r--r-- | net/ipv6/ip6_gre.c | 5 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 41 | ||||
-rw-r--r-- | net/ipv6/sit.c | 6 |
10 files changed, 81 insertions, 41 deletions
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 4da5de10d1d4..2265b0bf97e5 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -36,6 +36,7 @@ struct __ip6_tnl_parm { struct ip6_tnl { struct ip6_tnl __rcu *next; /* next tunnel in list */ struct net_device *dev; /* virtual device associated with tunnel */ + struct net *net; /* netns for packet i/o */ struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ struct dst_entry *dst_cache; /* cached dst */ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index c6acd9f8f877..5a76f2bef822 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -102,7 +102,7 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); -void ip_tunnel_delete_net(struct ip_tunnel_net *itn); +void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); diff --git a/net/core/dev.c b/net/core/dev.c index 58eb802584b9..1ed2b66a10a6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1691,13 +1691,13 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) kfree_skb(skb); return NET_RX_DROP; } - skb_scrub_packet(skb); skb->protocol = eth_type_trans(skb, dev); /* eth_type_trans() can set pkt_type. - * clear pkt_type _after_ calling eth_type_trans() + * call skb_scrub_packet() after it to clear pkt_type _after_ calling + * eth_type_trans(). */ - skb->pkt_type = PACKET_HOST; + skb_scrub_packet(skb); return netif_rx(skb); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 1f6eab66f7ce..bc3a76521deb 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -534,7 +534,7 @@ static int __net_init ipgre_init_net(struct net *net) static void __net_exit ipgre_exit_net(struct net *net) { struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); - ip_tunnel_delete_net(itn); + ip_tunnel_delete_net(itn, &ipgre_link_ops); } static struct pernet_operations ipgre_net_ops = { @@ -767,7 +767,7 @@ static int __net_init ipgre_tap_init_net(struct net *net) static void __net_exit ipgre_tap_exit_net(struct net *net) { struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); - ip_tunnel_delete_net(itn); + ip_tunnel_delete_net(itn, &ipgre_tap_ops); } static struct pernet_operations ipgre_tap_net_ops = { diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 9fdf8a6d95f3..a4d9126c7b51 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -350,7 +350,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) struct flowi4 fl4; struct rtable *rt; - rt = ip_route_output_tunnel(dev_net(dev), &fl4, + rt = ip_route_output_tunnel(tunnel->net, &fl4, tunnel->parms.iph.protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, @@ -365,7 +365,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev) { hlen = tdev->hard_header_len + tdev->needed_headroom; @@ -454,15 +454,16 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - if (tunnel->net != dev_net(tunnel->dev)) - skb_scrub_packet(skb); - if (tunnel->dev->type == ARPHRD_ETHER) { skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { skb->dev = tunnel->dev; } + + if (!net_eq(tunnel->net, dev_net(tunnel->dev))) + skb_scrub_packet(skb); + gro_cells_receive(&tunnel->gro_cells, skb); return 0; @@ -613,7 +614,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tunnel->net != dev_net(dev)) + if (!net_eq(tunnel->net, dev_net(dev))) skb_scrub_packet(skb); if (tunnel->err_count > 0) { @@ -653,7 +654,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } } - err = iptunnel_xmit(dev_net(dev), rt, skb, + err = iptunnel_xmit(tunnel->net, rt, skb, fl4.saddr, fl4.daddr, protocol, ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); @@ -820,11 +821,10 @@ static void ip_tunnel_dev_free(struct net_device *dev) void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) { - struct net *net = dev_net(dev); struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_net *itn; - itn = net_generic(net, tunnel->ip_tnl_net_id); + itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); if (itn->fb_tunnel_dev != dev) { ip_tunnel_del(netdev_priv(dev)); @@ -854,6 +854,10 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, rtnl_lock(); itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; rtnl_unlock(); if (IS_ERR(itn->fb_tunnel_dev)) @@ -863,28 +867,39 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, } EXPORT_SYMBOL_GPL(ip_tunnel_init_net); -static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head) +static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, + struct rtnl_link_ops *ops) { + struct net *net = dev_net(itn->fb_tunnel_dev); + struct net_device *dev, *aux; int h; + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == ops) + unregister_netdevice_queue(dev, head); + for (h = 0; h < IP_TNL_HASH_SIZE; h++) { struct ip_tunnel *t; struct hlist_node *n; struct hlist_head *thead = &itn->tunnels[h]; hlist_for_each_entry_safe(t, n, thead, hash_node) - unregister_netdevice_queue(t->dev, head); + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, head); } if (itn->fb_tunnel_dev) unregister_netdevice_queue(itn->fb_tunnel_dev, head); } -void ip_tunnel_delete_net(struct ip_tunnel_net *itn) +void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) { LIST_HEAD(list); rtnl_lock(); - ip_tunnel_destroy(itn, &list); + ip_tunnel_destroy(itn, &list, ops); unregister_netdevice_many(&list); rtnl_unlock(); } @@ -928,23 +943,21 @@ EXPORT_SYMBOL_GPL(ip_tunnel_newlink); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p) { - struct ip_tunnel *t, *nt; - struct net *net = dev_net(dev); + struct ip_tunnel *t; struct ip_tunnel *tunnel = netdev_priv(dev); + struct net *net = tunnel->net; struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); if (dev == itn->fb_tunnel_dev) return -EINVAL; - nt = netdev_priv(dev); - t = ip_tunnel_find(itn, p, dev->type); if (t) { if (t->dev != dev) return -EEXIST; } else { - t = nt; + t = tunnel; if (dev->type != ARPHRD_ETHER) { unsigned int nflags = 0; @@ -983,6 +996,7 @@ int ip_tunnel_init(struct net_device *dev) } tunnel->dev = dev; + tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); iph->version = 4; iph->ihl = 5; @@ -993,8 +1007,8 @@ EXPORT_SYMBOL_GPL(ip_tunnel_init); void ip_tunnel_uninit(struct net_device *dev) { - struct net *net = dev_net(dev); struct ip_tunnel *tunnel = netdev_priv(dev); + struct net *net = tunnel->net; struct ip_tunnel_net *itn; itn = net_generic(net, tunnel->ip_tnl_net_id); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 79b263da4168..e805e7b3030e 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -318,7 +318,7 @@ static int __net_init vti_init_net(struct net *net) static void __net_exit vti_exit_net(struct net *net) { struct ip_tunnel_net *itn = net_generic(net, vti_net_id); - ip_tunnel_delete_net(itn); + ip_tunnel_delete_net(itn, &vti_link_ops); } static struct pernet_operations vti_net_ops = { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 51fc2a1dcdd3..87bd2952c733 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -286,7 +286,6 @@ static void ipip_tunnel_setup(struct net_device *dev) dev->flags = IFF_NOARP; dev->iflink = 0; dev->addr_len = 4; - dev->features |= NETIF_F_NETNS_LOCAL; dev->features |= NETIF_F_LLTX; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; @@ -437,7 +436,7 @@ static int __net_init ipip_init_net(struct net *net) static void __net_exit ipip_exit_net(struct net *net) { struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); - ip_tunnel_delete_net(itn); + ip_tunnel_delete_net(itn, &ipip_link_ops); } static struct pernet_operations ipip_net_ops = { diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index ecd60733e5e2..f2d0a42f8057 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -335,6 +335,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, dev->rtnl_link_ops = &ip6gre_link_ops; nt->dev = dev; + nt->net = dev_net(dev); ip6gre_tnl_link_config(nt, 1); if (register_netdevice(dev) < 0) @@ -1255,6 +1256,7 @@ static int ip6gre_tunnel_init(struct net_device *dev) tunnel = netdev_priv(dev); tunnel->dev = dev; + tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); @@ -1275,6 +1277,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) struct ip6_tnl *tunnel = netdev_priv(dev); tunnel->dev = dev; + tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); tunnel->hlen = sizeof(struct ipv6hdr) + 4; @@ -1450,6 +1453,7 @@ static int ip6gre_tap_init(struct net_device *dev) tunnel = netdev_priv(dev); tunnel->dev = dev; + tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); ip6gre_tnl_link_config(tunnel, 1); @@ -1501,6 +1505,7 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, eth_hw_addr_random(dev); nt->dev = dev; + nt->net = dev_net(dev); ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); /* Can use a lockless transmit, unless we generate output sequences */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 1e55866cead7..cc3bb201b8b0 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -315,6 +315,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) t = netdev_priv(dev); t->parms = *p; + t->net = dev_net(dev); err = ip6_tnl_create2(dev); if (err < 0) goto failed_free; @@ -374,7 +375,7 @@ static void ip6_tnl_dev_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct net *net = dev_net(dev); + struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); if (dev == ip6n->fb_tnl_dev) @@ -741,7 +742,7 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t, { struct __ip6_tnl_parm *p = &t->parms; int ret = 0; - struct net *net = dev_net(t->dev); + struct net *net = t->net; if ((p->flags & IP6_TNL_F_CAP_RCV) || ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && @@ -827,6 +828,9 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, tstats->rx_packets++; tstats->rx_bytes += skb->len; + if (!net_eq(t->net, dev_net(t->dev))) + skb_scrub_packet(skb); + netif_rx(skb); rcu_read_unlock(); @@ -895,7 +899,7 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t) { struct __ip6_tnl_parm *p = &t->parms; int ret = 0; - struct net *net = dev_net(t->dev); + struct net *net = t->net; if (p->flags & IP6_TNL_F_CAP_XMIT) { struct net_device *ldev = NULL; @@ -945,8 +949,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, int encap_limit, __u32 *pmtu) { - struct net *net = dev_net(dev); struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct ipv6_tel_txoption opt; @@ -996,6 +1000,9 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, goto tx_err_dst_release; } + if (!net_eq(t->net, dev_net(dev))) + skb_scrub_packet(skb); + /* * Okay, now see if we can stuff it in the buffer as-is. */ @@ -1202,7 +1209,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); - struct rt6_info *rt = rt6_lookup(dev_net(dev), + struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, p->link, strict); @@ -1251,7 +1258,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) { - struct net *net = dev_net(t->dev); + struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); int err; @@ -1463,7 +1470,6 @@ static void ip6_tnl_dev_setup(struct net_device *dev) dev->mtu-=8; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); - dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } @@ -1479,6 +1485,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); t->dev = dev; + t->net = dev_net(dev); dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; @@ -1596,9 +1603,9 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip6_tnl *t; + struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm p; - struct net *net = dev_net(dev); + struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); if (dev == ip6n->fb_tnl_dev) @@ -1699,14 +1706,24 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) { + struct net *net = dev_net(ip6n->fb_tnl_dev); + struct net_device *dev, *aux; int h; struct ip6_tnl *t; LIST_HEAD(list); + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &ip6_link_ops) + unregister_netdevice_queue(dev, &list); + for (h = 0; h < HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); while (t != NULL) { - unregister_netdevice_queue(t->dev, &list); + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, &list); t = rtnl_dereference(t->next); } } @@ -1732,6 +1749,10 @@ static int __net_init ip6_tnl_init_net(struct net *net) if (!ip6n->fb_tnl_dev) goto err_alloc_dev; dev_net_set(ip6n->fb_tnl_dev, net); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL; err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); if (err < 0) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a3437a4cd07e..f18f842ac893 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -621,7 +621,7 @@ static int ipip6_rcv(struct sk_buff *skb) tstats->rx_packets++; tstats->rx_bytes += skb->len; - if (tunnel->net != dev_net(tunnel->dev)) + if (!net_eq(tunnel->net, dev_net(tunnel->dev))) skb_scrub_packet(skb); netif_rx(skb); @@ -860,7 +860,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tunnel->err_count = 0; } - if (tunnel->net != dev_net(dev)) + if (!net_eq(tunnel->net, dev_net(dev))) skb_scrub_packet(skb); /* @@ -1589,7 +1589,7 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ - if (dev_net(t->dev) != net) + if (!net_eq(dev_net(t->dev), net)) unregister_netdevice_queue(t->dev, head); t = rtnl_dereference(t->next); |