diff options
author | David S. Miller <davem@davemloft.net> | 2016-05-11 19:31:40 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-05-11 19:31:56 -0400 |
commit | c3f1010b30f7fc611139cfb702a8685741aa6827 (patch) | |
tree | b68c8a9b5148687c7bce7c6a5ab14e26a5c47050 /drivers/net | |
parent | ca4aa976f04d14bc7da60dce0e2afc34c9f0f1d2 (diff) | |
parent | 0b922b7a829c06e3b0790c58cd9ca026de86096e (diff) |
Merge branch 'vrf-pktinfo'
David Ahern says:
====================
net: vrf: Fixup PKTINFO to return enslaved device index
Applications such as OSPF and BFD need the original ingress device not
the VRF device; the latter can be derived from the former. To that end
move the packet intercept from an rx handler that is invoked by
__netif_receive_skb_core to the ipv4 and ipv6 receive processing.
IPv6 already saves the skb_iif to the control buffer in ipv6_rcv. Since
the skb->dev has not been switched the cb has the enslaved device. Make
the same happen for IPv4 by adding the skb_iif to inet_skb_parm and set
it in ipv4 code after clearing the skb control buffer similar to IPv6.
From there the pktinfo can just pull it from cb with the PKTINFO_SKB_CB
cast.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r-- | drivers/net/vrf.c | 189 |
1 files changed, 91 insertions, 98 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index c8db55aa8280..0ea29345eb2e 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -42,9 +42,6 @@ #define DRV_NAME "vrf" #define DRV_VERSION "1.0" -#define vrf_master_get_rcu(dev) \ - ((struct net_device *)rcu_dereference(dev->rx_handler_data)) - struct net_vrf { struct rtable *rth; struct rt6_info *rt6; @@ -60,90 +57,12 @@ struct pcpu_dstats { struct u64_stats_sync syncp; }; -/* neighbor handling is done with actual device; do not want - * to flip skb->dev for those ndisc packets. This really fails - * for multiple next protocols (e.g., NEXTHDR_HOP). But it is - * a start. - */ -#if IS_ENABLED(CONFIG_IPV6) -static bool check_ipv6_frame(const struct sk_buff *skb) -{ - const struct ipv6hdr *ipv6h; - struct ipv6hdr _ipv6h; - bool rc = true; - - ipv6h = skb_header_pointer(skb, 0, sizeof(_ipv6h), &_ipv6h); - if (!ipv6h) - goto out; - - if (ipv6h->nexthdr == NEXTHDR_ICMP) { - const struct icmp6hdr *icmph; - struct icmp6hdr _icmph; - - icmph = skb_header_pointer(skb, sizeof(_ipv6h), - sizeof(_icmph), &_icmph); - if (!icmph) - goto out; - - switch (icmph->icmp6_type) { - case NDISC_ROUTER_SOLICITATION: - case NDISC_ROUTER_ADVERTISEMENT: - case NDISC_NEIGHBOUR_SOLICITATION: - case NDISC_NEIGHBOUR_ADVERTISEMENT: - case NDISC_REDIRECT: - rc = false; - break; - } - } - -out: - return rc; -} -#else -static bool check_ipv6_frame(const struct sk_buff *skb) -{ - return false; -} -#endif - -static bool is_ip_rx_frame(struct sk_buff *skb) -{ - switch (skb->protocol) { - case htons(ETH_P_IP): - return true; - case htons(ETH_P_IPV6): - return check_ipv6_frame(skb); - } - return false; -} - static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb) { vrf_dev->stats.tx_errors++; kfree_skb(skb); } -/* note: already called with rcu_read_lock */ -static rx_handler_result_t vrf_handle_frame(struct sk_buff **pskb) -{ - struct sk_buff *skb = *pskb; - - if (is_ip_rx_frame(skb)) { - struct net_device *dev = vrf_master_get_rcu(skb->dev); - struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); - - u64_stats_update_begin(&dstats->syncp); - dstats->rx_pkts++; - dstats->rx_bytes += skb->len; - u64_stats_update_end(&dstats->syncp); - - skb->dev = dev; - - return RX_HANDLER_ANOTHER; - } - return RX_HANDLER_PASS; -} - static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -506,28 +425,14 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) { int ret; - /* register the packet handler for slave ports */ - ret = netdev_rx_handler_register(port_dev, vrf_handle_frame, dev); - if (ret) { - netdev_err(port_dev, - "Device %s failed to register rx_handler\n", - port_dev->name); - goto out_fail; - } - ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); if (ret < 0) - goto out_unregister; + return ret; port_dev->priv_flags |= IFF_L3MDEV_SLAVE; cycle_netdev(port_dev); return 0; - -out_unregister: - netdev_rx_handler_unregister(port_dev); -out_fail: - return ret; } static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev) @@ -544,8 +449,6 @@ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev) netdev_upper_dev_unlink(port_dev, dev); port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; - netdev_rx_handler_unregister(port_dev); - cycle_netdev(port_dev); return 0; @@ -670,6 +573,95 @@ static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) } #if IS_ENABLED(CONFIG_IPV6) +/* neighbor handling is done with actual device; do not want + * to flip skb->dev for those ndisc packets. This really fails + * for multiple next protocols (e.g., NEXTHDR_HOP). But it is + * a start. + */ +static bool ipv6_ndisc_frame(const struct sk_buff *skb) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + bool rc = false; + + if (iph->nexthdr == NEXTHDR_ICMP) { + const struct icmp6hdr *icmph; + struct icmp6hdr _icmph; + + icmph = skb_header_pointer(skb, sizeof(*iph), + sizeof(_icmph), &_icmph); + if (!icmph) + goto out; + + switch (icmph->icmp6_type) { + case NDISC_ROUTER_SOLICITATION: + case NDISC_ROUTER_ADVERTISEMENT: + case NDISC_NEIGHBOUR_SOLICITATION: + case NDISC_NEIGHBOUR_ADVERTISEMENT: + case NDISC_REDIRECT: + rc = true; + break; + } + } + +out: + return rc; +} + +static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, + struct sk_buff *skb) +{ + /* if packet is NDISC keep the ingress interface */ + if (!ipv6_ndisc_frame(skb)) { + skb->dev = vrf_dev; + skb->skb_iif = vrf_dev->ifindex; + + skb_push(skb, skb->mac_len); + dev_queue_xmit_nit(skb, vrf_dev); + skb_pull(skb, skb->mac_len); + + IP6CB(skb)->flags |= IP6SKB_L3SLAVE; + } + + return skb; +} + +#else +static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, + struct sk_buff *skb) +{ + return skb; +} +#endif + +static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, + struct sk_buff *skb) +{ + skb->dev = vrf_dev; + skb->skb_iif = vrf_dev->ifindex; + + skb_push(skb, skb->mac_len); + dev_queue_xmit_nit(skb, vrf_dev); + skb_pull(skb, skb->mac_len); + + return skb; +} + +/* called with rcu lock held */ +static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, + struct sk_buff *skb, + u16 proto) +{ + switch (proto) { + case AF_INET: + return vrf_ip_rcv(vrf_dev, skb); + case AF_INET6: + return vrf_ip6_rcv(vrf_dev, skb); + } + + return skb; +} + +#if IS_ENABLED(CONFIG_IPV6) static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, const struct flowi6 *fl6) { @@ -690,6 +682,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_get_rtable = vrf_get_rtable, .l3mdev_get_saddr = vrf_get_saddr, + .l3mdev_l3_rcv = vrf_l3_rcv, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_get_rt6_dst = vrf_get_rt6_dst, #endif |