summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-08-03 15:36:01 -0700
committerDavid S. Miller <davem@davemloft.net>2017-08-03 15:36:08 -0700
commit84b7187ca2338832e3af58eb5123c02bb6921e4e (patch)
treeec5a8a265479cb3b0496178334ce23bd379fcbd6
parent80cf0b45d88b9b045d7205424987f0c65a3a8f61 (diff)
parent65e65ec137f4abe78b6c90c72c0a6ca7474e9ae6 (diff)
Merge branch 'mlxsw-Support-for-IPv6-UC-router'
Jiri Pirko says: ==================== mlxsw: Support for IPv6 UC router Ido says: This set adds support for IPv6 unicast routes offload. The first four patches make the FIB notification chain generic so that it could be used by address families other than IPv4. This is done by having each address family register its callbacks with the common code, so that its FIB tables and rules could be dumped upon registration to the chain, while ensuring the integrity of the dump. The exact mechanics are explained in detail in the first patch. The next six patches build upon this work and add the necessary callbacks in IPv6 code. This allows listeners of the chain to receive notifications about IPv6 routes addition, deletion and replacement as well as FIB rules notifications. Unlike user space notifications for IPv6 multipath routes, the FIB notification chain notifies these on a per-nexthop basis. This allows us to keep the common code lean and is also unnecessary, as notifications are serialized by each table's lock whereas applications maintaining netlink caches may suffer from concurrent dumps and deletions / additions of routes. The next five patches audit the different code paths reading the route's reference count (rt6i_ref) and remove assumptions regarding its meaning. This is needed since non-FIB users need to be able to hold a reference on the route and a non-zero reference count no longer means the route is in the FIB. The last six patches enable the mlxsw driver to offload IPv6 unicast routes to the Spectrum ASIC. Without resorting to ACLs, lookup is done solely based on the destination IP, so the abort mechanism is invoked upon the addition of source-specific routes. Follow-up patch sets will increase the scale of gatewayed routes by consolidating identical nexthop groups to one adjacency entry in the device's adjacency table (as in IPv4), as well as add support for NH_{ADD,DEL} events which enable support for the 'ignore_routes_with_linkdown' sysctl. Changes in v2: * Provide offload indication for individual nexthops (David Ahern). * Use existing route reference count instead of adding another one. This resulted in several new patches to remove assumptions regarding current semantics of the existing reference count (David Ahern). * Add helpers to allow non-FIB users to take a reference on route. * Remove use of tb6_lock in mlxsw (David Ahern). * Add IPv6 dependency to mlxsw. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Kconfig1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c855
-rw-r--r--drivers/net/ethernet/rocker/rocker_main.c5
-rw-r--r--include/net/fib_notifier.h44
-rw-r--r--include/net/fib_rules.h9
-rw-r--r--include/net/ip6_fib.h51
-rw-r--r--include/net/ip_fib.h54
-rw-r--r--include/net/net_namespace.h1
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/netns/ipv6.h1
-rw-r--r--include/uapi/linux/ipv6_route.h1
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/fib_notifier.c164
-rw-r--r--net/core/fib_rules.c63
-rw-r--r--net/ipv4/fib_frontend.c17
-rw-r--r--net/ipv4/fib_notifier.c99
-rw-r--r--net/ipv4/fib_rules.c44
-rw-r--r--net/ipv4/fib_semantics.c9
-rw-r--r--net/ipv4/fib_trie.c5
-rw-r--r--net/ipv6/Makefile2
-rw-r--r--net/ipv6/addrconf.c6
-rw-r--r--net/ipv6/fib6_notifier.c61
-rw-r--r--net/ipv6/fib6_rules.c31
-rw-r--r--net/ipv6/ip6_fib.c132
-rw-r--r--net/ipv6/route.c8
25 files changed, 1490 insertions, 177 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 695adff89d71..d56eea310509 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -75,6 +75,7 @@ config MLXSW_SPECTRUM
depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q
depends on PSAMPLE || PSAMPLE=n
depends on BRIDGE || BRIDGE=n
+ depends on IPV6 || IPV6=n
select PARMAN
select MLXFW
default m
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 2f03c7e71584..93b6da88e79c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -44,15 +44,18 @@
#include <linux/netdevice.h>
#include <linux/if_bridge.h>
#include <linux/socket.h>
+#include <linux/route.h>
#include <net/netevent.h>
#include <net/neighbour.h>
#include <net/arp.h>
#include <net/ip_fib.h>
+#include <net/ip6_fib.h>
#include <net/fib_rules.h>
#include <net/l3mdev.h>
#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/ipv6.h>
+#include <net/fib_notifier.h>
#include "spectrum.h"
#include "core.h"
@@ -404,6 +407,17 @@ struct mlxsw_sp_fib4_entry {
u8 type;
};
+struct mlxsw_sp_fib6_entry {
+ struct mlxsw_sp_fib_entry common;
+ struct list_head rt6_list;
+ unsigned int nrt6;
+};
+
+struct mlxsw_sp_rt6 {
+ struct list_head list;
+ struct rt6_info *rt;
+};
+
enum mlxsw_sp_l3proto {
MLXSW_SP_L3_PROTO_IPV4,
MLXSW_SP_L3_PROTO_IPV6,
@@ -2124,6 +2138,26 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
}
}
+static struct mlxsw_sp_nexthop *
+mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
+ const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
+{
+ int i;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+ struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+ if (nh->rif && nh->rif->dev == rt->dst.dev &&
+ ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
+ &rt->rt6i_gateway))
+ return nh;
+ continue;
+ }
+
+ return NULL;
+}
+
static void
mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
{
@@ -2158,6 +2192,48 @@ mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
}
}
+static void
+mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+ common);
+
+ if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
+ list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
+ list)->rt->rt6i_flags |= RTF_OFFLOAD;
+ return;
+ }
+
+ list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+ struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+ struct mlxsw_sp_nexthop *nh;
+
+ nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
+ if (nh && nh->offloaded)
+ mlxsw_sp_rt6->rt->rt6i_flags |= RTF_OFFLOAD;
+ else
+ mlxsw_sp_rt6->rt->rt6i_flags &= ~RTF_OFFLOAD;
+ }
+}
+
+static void
+mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+ common);
+ list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+ struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+ rt->rt6i_flags &= ~RTF_OFFLOAD;
+ }
+}
+
static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
{
switch (fib_entry->fib_node->fib->proto) {
@@ -2165,7 +2241,8 @@ static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib4_entry_offload_set(fib_entry);
break;
case MLXSW_SP_L3_PROTO_IPV6:
- WARN_ON_ONCE(1);
+ mlxsw_sp_fib6_entry_offload_set(fib_entry);
+ break;
}
}
@@ -2177,7 +2254,8 @@ mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib4_entry_offload_unset(fib_entry);
break;
case MLXSW_SP_L3_PROTO_IPV6:
- WARN_ON_ONCE(1);
+ mlxsw_sp_fib6_entry_offload_unset(fib_entry);
+ break;
}
}
@@ -2885,6 +2963,649 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
}
+static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
+{
+ /* Packets with link-local destination IP arriving to the router
+ * are trapped to the CPU, so no need to program specific routes
+ * for them.
+ */
+ if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
+ return true;
+
+ /* Multicast routes aren't supported, so ignore them. Neighbour
+ * Discovery packets are specifically trapped.
+ */
+ if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
+ return true;
+
+ /* Cloned routes are irrelevant in the forwarding path. */
+ if (rt->rt6i_flags & RTF_CACHE)
+ return true;
+
+ return false;
+}
+
+static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
+ if (!mlxsw_sp_rt6)
+ return ERR_PTR(-ENOMEM);
+
+ /* In case of route replace, replaced route is deleted with
+ * no notification. Take reference to prevent accessing freed
+ * memory.
+ */
+ mlxsw_sp_rt6->rt = rt;
+ rt6_hold(rt);
+
+ return mlxsw_sp_rt6;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+{
+ rt6_release(rt);
+}
+#else
+static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+{
+}
+#endif
+
+static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
+{
+ mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
+ kfree(mlxsw_sp_rt6);
+}
+
+static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
+{
+ /* RTF_CACHE routes are ignored */
+ return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
+}
+
+static struct rt6_info *
+mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
+ list)->rt;
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
+ const struct rt6_info *nrt, bool replace)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+
+ if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
+ return NULL;
+
+ list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+ struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+ /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
+ * virtual router.
+ */
+ if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
+ continue;
+ if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
+ break;
+ if (rt->rt6i_metric < nrt->rt6i_metric)
+ continue;
+ if (rt->rt6i_metric == nrt->rt6i_metric &&
+ mlxsw_sp_fib6_rt_can_mp(rt))
+ return fib6_entry;
+ if (rt->rt6i_metric > nrt->rt6i_metric)
+ break;
+ }
+
+ return NULL;
+}
+
+static struct mlxsw_sp_rt6 *
+mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
+ const struct rt6_info *rt)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+ if (mlxsw_sp_rt6->rt == rt)
+ return mlxsw_sp_rt6;
+ }
+
+ return NULL;
+}
+
+static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp,
+ struct mlxsw_sp_nexthop *nh,
+ const struct rt6_info *rt)
+{
+ struct net_device *dev = rt->dst.dev;
+ struct mlxsw_sp_rif *rif;
+ int err;
+
+ nh->nh_grp = nh_grp;
+ memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
+
+ if (!dev)
+ return 0;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+ if (!rif)
+ return 0;
+ mlxsw_sp_nexthop_rif_init(nh, rif);
+
+ err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
+ if (err)
+ goto err_nexthop_neigh_init;
+
+ return 0;
+
+err_nexthop_neigh_init:
+ mlxsw_sp_nexthop_rif_fini(nh);
+ return err;
+}
+
+static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_rif_fini(nh);
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp;
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+ struct mlxsw_sp_nexthop *nh;
+ size_t alloc_size;
+ int i = 0;
+ int err;
+
+ alloc_size = sizeof(*nh_grp) +
+ fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
+ nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+ if (!nh_grp)
+ return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&nh_grp->fib_list);
+#if IS_ENABLED(CONFIG_IPV6)
+ nh_grp->neigh_tbl = &nd_tbl;
+#endif
+ mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
+ struct mlxsw_sp_rt6, list);
+ nh_grp->gateway = !!(mlxsw_sp_rt6->rt->rt6i_flags & RTF_GATEWAY);
+ nh_grp->count = fib6_entry->nrt6;
+ for (i = 0; i < nh_grp->count; i++) {
+ struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+ nh = &nh_grp->nexthops[i];
+ err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
+ if (err)
+ goto err_nexthop6_init;
+ mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
+ }
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+ return nh_grp;
+
+err_nexthop6_init:
+ for (i--; i >= 0; i--) {
+ nh = &nh_grp->nexthops[i];
+ mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
+ }
+ kfree(nh_grp);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ struct mlxsw_sp_nexthop *nh;
+ int i = nh_grp->count;
+
+ for (i--; i >= 0; i--) {
+ nh = &nh_grp->nexthops[i];
+ mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
+ }
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+ WARN_ON(nh_grp->adj_index_valid);
+ kfree(nh_grp);
+}
+
+static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp;
+
+ /* For now, don't consolidate nexthop groups */
+ nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
+ if (IS_ERR(nh_grp))
+ return PTR_ERR(nh_grp);
+
+ list_add_tail(&fib6_entry->common.nexthop_group_node,
+ &nh_grp->fib_list);
+ fib6_entry->common.nh_group = nh_grp;
+
+ return 0;
+}
+
+static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+
+ list_del(&fib_entry->nexthop_group_node);
+ if (!list_empty(&nh_grp->fib_list))
+ return;
+ mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
+}
+
+static int
+mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
+ int err;
+
+ fib6_entry->common.nh_group = NULL;
+ list_del(&fib6_entry->common.nexthop_group_node);
+
+ err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
+ if (err)
+ goto err_nexthop6_group_get;
+
+ /* In case this entry is offloaded, then the adjacency index
+ * currently associated with it in the device's table is that
+ * of the old group. Start using the new one instead.
+ */
+ err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
+ if (err)
+ goto err_fib_node_entry_add;
+
+ if (list_empty(&old_nh_grp->fib_list))
+ mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
+
+ return 0;
+
+err_fib_node_entry_add:
+ mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
+err_nexthop6_group_get:
+ list_add_tail(&fib6_entry->common.nexthop_group_node,
+ &old_nh_grp->fib_list);
+ fib6_entry->common.nh_group = old_nh_grp;
+ return err;
+}
+
+static int
+mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry,
+ struct rt6_info *rt)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+ int err;
+
+ mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
+ if (IS_ERR(mlxsw_sp_rt6))
+ return PTR_ERR(mlxsw_sp_rt6);
+
+ list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+ fib6_entry->nrt6++;
+
+ err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
+ if (err)
+ goto err_nexthop6_group_update;
+
+ return 0;
+
+err_nexthop6_group_update:
+ fib6_entry->nrt6--;
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+ return err;
+}
+
+static void
+mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry,
+ struct rt6_info *rt)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
+ if (WARN_ON(!mlxsw_sp_rt6))
+ return;
+
+ fib6_entry->nrt6--;
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+}
+
+static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp_fib_entry *fib_entry,
+ const struct rt6_info *rt)
+{
+ /* Packets hitting RTF_REJECT routes need to be discarded by the
+ * stack. We can rely on their destination device not having a
+ * RIF (it's the loopback device) and can thus use action type
+ * local, which will cause them to be trapped with a lower
+ * priority than packets that need to be locally received.
+ */
+ if (rt->rt6i_flags & RTF_LOCAL)
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+ else if (rt->rt6i_flags & RTF_REJECT)
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+ else if (rt->rt6i_flags & RTF_GATEWAY)
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+ else
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+}
+
+static void
+mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
+
+ list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
+ list) {
+ fib6_entry->nrt6--;
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+ }
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node,
+ struct rt6_info *rt)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib_entry *fib_entry;
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+ int err;
+
+ fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
+ if (!fib6_entry)
+ return ERR_PTR(-ENOMEM);
+ fib_entry = &fib6_entry->common;
+
+ mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
+ if (IS_ERR(mlxsw_sp_rt6)) {
+ err = PTR_ERR(mlxsw_sp_rt6);
+ goto err_rt6_create;
+ }
+
+ mlxsw_sp_fib6_entry_type_set(fib_entry, mlxsw_sp_rt6->rt);
+
+ INIT_LIST_HEAD(&fib6_entry->rt6_list);
+ list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+ fib6_entry->nrt6 = 1;
+ err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
+ if (err)
+ goto err_nexthop6_group_get;
+
+ fib_entry->fib_node = fib_node;
+
+ return fib6_entry;
+
+err_nexthop6_group_get:
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+err_rt6_create:
+ kfree(fib6_entry);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
+ mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
+ WARN_ON(fib6_entry->nrt6);
+ kfree(fib6_entry);
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
+ const struct rt6_info *nrt, bool replace)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
+
+ list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+ struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+ if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
+ continue;
+ if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
+ break;
+ if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
+ if (mlxsw_sp_fib6_rt_can_mp(rt) ==
+ mlxsw_sp_fib6_rt_can_mp(nrt))
+ return fib6_entry;
+ if (mlxsw_sp_fib6_rt_can_mp(nrt))
+ fallback = fallback ?: fib6_entry;
+ }
+ if (rt->rt6i_metric > nrt->rt6i_metric)
+ return fallback ?: fib6_entry;
+ }
+
+ return fallback;
+}
+
+static int
+mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
+ bool replace)
+{
+ struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
+ struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+
+ fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
+
+ if (replace && WARN_ON(!fib6_entry))
+ return -EINVAL;
+
+ if (fib6_entry) {
+ list_add_tail(&new6_entry->common.list,
+ &fib6_entry->common.list);
+ } else {
+ struct mlxsw_sp_fib6_entry *last;
+
+ list_for_each_entry(last, &fib_node->entry_list, common.list) {
+ struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
+
+ if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
+ break;
+ fib6_entry = last;
+ }
+
+ if (fib6_entry)
+ list_add(&new6_entry->common.list,
+ &fib6_entry->common.list);
+ else
+ list_add(&new6_entry->common.list,
+ &fib_node->entry_list);
+ }
+
+ return 0;
+}
+
+static void
+mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ list_del(&fib6_entry->common.list);
+}
+
+static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry,
+ bool replace)
+{
+ int err;
+
+ err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
+ if (err)
+ goto err_fib_node_entry_add;
+
+ return 0;
+
+err_fib_node_entry_add:
+ mlxsw_sp_fib6_node_list_remove(fib6_entry);
+ return err;
+}
+
+static void
+mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
+ mlxsw_sp_fib6_node_list_remove(fib6_entry);
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
+ const struct rt6_info *rt)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+ struct mlxsw_sp_fib *fib;
+ struct mlxsw_sp_vr *vr;
+
+ vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
+ if (!vr)
+ return NULL;
+ fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
+
+ fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
+ sizeof(rt->rt6i_dst.addr),
+ rt->rt6i_dst.plen);
+ if (!fib_node)
+ return NULL;
+
+ list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+ struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+ if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
+ rt->rt6i_metric == iter_rt->rt6i_metric &&
+ mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
+ return fib6_entry;
+ }
+
+ return NULL;
+}
+
+static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry,
+ bool replace)
+{
+ struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
+ struct mlxsw_sp_fib6_entry *replaced;
+
+ if (!replace)
+ return;
+
+ replaced = list_next_entry(fib6_entry, common.list);
+
+ mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+}
+
+static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
+ struct rt6_info *rt, bool replace)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+ int err;
+
+ if (mlxsw_sp->router->aborted)
+ return 0;
+
+ if (rt->rt6i_src.plen)
+ return -EINVAL;
+
+ if (mlxsw_sp_fib6_rt_should_ignore(rt))
+ return 0;
+
+ fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
+ &rt->rt6i_dst.addr,
+ sizeof(rt->rt6i_dst.addr),
+ rt->rt6i_dst.plen,
+ MLXSW_SP_L3_PROTO_IPV6);
+ if (IS_ERR(fib_node))
+ return PTR_ERR(fib_node);
+
+ /* Before creating a new entry, try to append route to an existing
+ * multipath entry.
+ */
+ fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
+ if (fib6_entry) {
+ err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
+ if (err)
+ goto err_fib6_entry_nexthop_add;
+ return 0;
+ }
+
+ fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
+ if (IS_ERR(fib6_entry)) {
+ err = PTR_ERR(fib6_entry);
+ goto err_fib6_entry_create;
+ }
+
+ err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
+ if (err)
+ goto err_fib6_node_entry_link;
+
+ mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
+
+ return 0;
+
+err_fib6_node_entry_link:
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+err_fib6_entry_create:
+err_fib6_entry_nexthop_add:
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+ return err;
+}
+
+static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
+ struct rt6_info *rt)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+
+ if (mlxsw_sp->router->aborted)
+ return;
+
+ if (mlxsw_sp_fib6_rt_should_ignore(rt))
+ return;
+
+ fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
+ if (WARN_ON(!fib6_entry))
+ return;
+
+ /* If route is part of a multipath entry, but not the last one
+ * removed, then only reduce its nexthop group.
+ */
+ if (!list_is_singular(&fib6_entry->rt6_list)) {
+ mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
+ return;
+ }
+
+ fib_node = fib6_entry->common.fib_node;
+
+ mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+}
+
static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
enum mlxsw_reg_ralxx_protocol proto,
u8 tree_id)
@@ -2965,6 +3686,23 @@ static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
}
}
+static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
+
+ list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
+ common.list) {
+ bool do_break = &tmp->common.list == &fib_node->entry_list;
+
+ mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+ if (do_break)
+ break;
+ }
+}
+
static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_node *fib_node)
{
@@ -2973,7 +3711,7 @@ static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
break;
case MLXSW_SP_L3_PROTO_IPV6:
- WARN_ON_ONCE(1);
+ mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
break;
}
}
@@ -3031,6 +3769,7 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
struct mlxsw_sp_fib_event_work {
struct work_struct work;
union {
+ struct fib6_entry_notifier_info fen6_info;
struct fib_entry_notifier_info fen_info;
struct fib_rule_notifier_info fr_info;
struct fib_nh_notifier_info fnh_info;
@@ -3039,7 +3778,7 @@ struct mlxsw_sp_fib_event_work {
unsigned long event;
};
-static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
+static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
{
struct mlxsw_sp_fib_event_work *fib_work =
container_of(work, struct mlxsw_sp_fib_event_work, work);
@@ -3084,6 +3823,87 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
kfree(fib_work);
}
+static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
+{
+ struct mlxsw_sp_fib_event_work *fib_work =
+ container_of(work, struct mlxsw_sp_fib_event_work, work);
+ struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
+ struct fib_rule *rule;
+ bool replace;
+ int err;
+
+ rtnl_lock();
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_ADD:
+ replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
+ err = mlxsw_sp_router_fib6_add(mlxsw_sp,
+ fib_work->fen6_info.rt, replace);
+ if (err)
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+ break;
+ case FIB_EVENT_ENTRY_DEL:
+ mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
+ mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+ break;
+ case FIB_EVENT_RULE_ADD: /* fall through */
+ case FIB_EVENT_RULE_DEL:
+ rule = fib_work->fr_info.rule;
+ if (!fib6_rule_default(rule) && !rule->l3mdev)
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ fib_rule_put(rule);
+ break;
+ }
+ rtnl_unlock();
+ kfree(fib_work);
+}
+
+static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
+ struct fib_notifier_info *info)
+{
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_APPEND: /* fall through */
+ case FIB_EVENT_ENTRY_ADD: /* fall through */
+ case FIB_EVENT_ENTRY_DEL:
+ memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info));
+ /* Take referece on fib_info to prevent it from being
+ * freed while work is queued. Release it afterwards.
+ */
+ fib_info_hold(fib_work->fen_info.fi);
+ break;
+ case FIB_EVENT_RULE_ADD: /* fall through */
+ case FIB_EVENT_RULE_DEL:
+ memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
+ fib_rule_get(fib_work->fr_info.rule);
+ break;
+ case FIB_EVENT_NH_ADD: /* fall through */
+ case FIB_EVENT_NH_DEL:
+ memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info));
+ fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+ break;
+ }
+}
+
+static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
+ struct fib_notifier_info *info)
+{
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_ADD: /* fall through */
+ case FIB_EVENT_ENTRY_DEL:
+ memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info));
+ rt6_hold(fib_work->fen6_info.rt);
+ break;
+ case FIB_EVENT_RULE_ADD: /* fall through */
+ case FIB_EVENT_RULE_DEL:
+ memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
+ fib_rule_get(fib_work->fr_info.rule);
+ break;
+ }
+}
+
/* Called with rcu_read_lock() */
static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
unsigned long event, void *ptr)
@@ -3099,31 +3919,18 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
if (WARN_ON(!fib_work))
return NOTIFY_BAD;
- INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
router = container_of(nb, struct mlxsw_sp_router, fib_nb);
fib_work->mlxsw_sp = router->mlxsw_sp;
fib_work->event = event;
- switch (event) {
- case FIB_EVENT_ENTRY_REPLACE: /* fall through */
- case FIB_EVENT_ENTRY_APPEND: /* fall through */
- case FIB_EVENT_ENTRY_ADD: /* fall through */
- case FIB_EVENT_ENTRY_DEL:
- memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
- /* Take referece on fib_info to prevent it from being
- * freed while work is queued. Release it afterwards.
- */
- fib_info_hold(fib_work->fen_info.fi);
+ switch (info->family) {
+ case AF_INET:
+ INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
+ mlxsw_sp_router_fib4_event(fib_work, info);
break;
- case FIB_EVENT_RULE_ADD: /* fall through */
- case FIB_EVENT_RULE_DEL:
- memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info));
- fib_rule_get(fib_work->fr_info.rule);
- break;
- case FIB_EVENT_NH_ADD: /* fall through */
- case FIB_EVENT_NH_DEL:
- memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
- fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+ case AF_INET6:
+ INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
+ mlxsw_sp_router_fib6_event(fib_work, info);
break;
}
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index b1e5c07099fa..fc8f8bdf6579 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -34,6 +34,7 @@
#include <net/netevent.h>
#include <net/arp.h>
#include <net/fib_rules.h>
+#include <net/fib_notifier.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <generated/utsrelease.h>
@@ -2191,6 +2192,10 @@ static int rocker_router_fib_event(struct notifier_block *nb,
{
struct rocker *rocker = container_of(nb, struct rocker, fib_nb);
struct rocker_fib_event_work *fib_work;
+ struct fib_notifier_info *info = ptr;
+
+ if (info->family != AF_INET)
+ return NOTIFY_DONE;
fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
if (WARN_ON(!fib_work))
diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h
new file mode 100644
index 000000000000..241475224f74
--- /dev/null
+++ b/include/net/fib_notifier.h
@@ -0,0 +1,44 @@
+#ifndef __NET_FIB_NOTIFIER_H
+#define __NET_FIB_NOTIFIER_H
+
+#include <linux/types.h>
+#include <linux/notifier.h>
+#include <net/net_namespace.h>
+
+struct fib_notifier_info {
+ struct net *net;
+ int family;
+};
+
+enum fib_event_type {
+ FIB_EVENT_ENTRY_REPLACE,
+ FIB_EVENT_ENTRY_APPEND,
+ FIB_EVENT_ENTRY_ADD,
+ FIB_EVENT_ENTRY_DEL,
+ FIB_EVENT_RULE_ADD,
+ FIB_EVENT_RULE_DEL,
+ FIB_EVENT_NH_ADD,
+ FIB_EVENT_NH_DEL,
+};
+
+struct fib_notifier_ops {
+ int family;
+ struct list_head list;
+ unsigned int (*fib_seq_read)(struct net *net);
+ int (*fib_dump)(struct net *net, struct notifier_block *nb);
+ struct rcu_head rcu;
+};
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info);
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info);
+int register_fib_notifier(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb));
+int unregister_fib_notifier(struct notifier_block *nb);
+struct fib_notifier_ops *
+fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net);
+void fib_notifier_ops_unregister(struct fib_notifier_ops *ops);
+
+#endif
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index c487bfa2f479..3d7f1cefc6f5 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -8,6 +8,7 @@
#include <linux/refcount.h>
#include <net/flow.h>
#include <net/rtnetlink.h>
+#include <net/fib_notifier.h>
struct fib_kuid_range {
kuid_t start;
@@ -57,6 +58,7 @@ struct fib_rules_ops {
int addr_size;
int unresolved_rules;
int nr_goto_rules;
+ unsigned int fib_rules_seq;
int (*action)(struct fib_rule *,
struct flowi *, int,
@@ -89,6 +91,11 @@ struct fib_rules_ops {
struct rcu_head rcu;
};
+struct fib_rule_notifier_info {
+ struct fib_notifier_info info; /* must be first */
+ struct fib_rule *rule;
+};
+
#define FRA_GENERIC_POLICY \
[FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
[FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
@@ -143,6 +150,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
u32 flags);
bool fib_rule_matchall(const struct fib_rule *rule);
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family);
+unsigned int fib_rules_seq_read(struct net *net, int family);
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1a88008cc6f5..1d790ea40ea7 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -16,10 +16,12 @@
#include <linux/ipv6_route.h>
#include <linux/rtnetlink.h>
#include <linux/spinlock.h>
+#include <linux/notifier.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/netlink.h>
#include <net/inetpeer.h>
+#include <net/fib_notifier.h>
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
#define FIB6_TABLE_HASHSZ 256
@@ -185,6 +187,22 @@ static inline void ip6_rt_put(struct rt6_info *rt)
dst_release(&rt->dst);
}
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
+
+static inline void rt6_hold(struct rt6_info *rt)
+{
+ atomic_inc(&rt->rt6i_ref);
+}
+
+static inline void rt6_release(struct rt6_info *rt)
+{
+ if (atomic_dec_and_test(&rt->rt6i_ref)) {
+ rt6_free_pcpu(rt);
+ dst_dev_put(&rt->dst);
+ dst_release(&rt->dst);
+ }
+}
+
enum fib6_walk_state {
#ifdef CONFIG_IPV6_SUBTREES
FWS_S,
@@ -233,6 +251,7 @@ struct fib6_table {
struct fib6_node tb6_root;
struct inet_peer_base tb6_peers;
unsigned int flags;
+ unsigned int fib_seq;
#define RT6_TABLE_HAS_DFLT_ROUTER BIT(0)
};
@@ -256,6 +275,11 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
struct fib6_table *,
struct flowi6 *, int);
+struct fib6_entry_notifier_info {
+ struct fib_notifier_info info; /* must be first */
+ struct rt6_info *rt;
+};
+
/*
* exported functions
*/
@@ -292,9 +316,24 @@ int fib6_init(void);
int ipv6_route_open(struct inode *inode, struct file *file);
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info);
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info);
+
+int __net_init fib6_notifier_init(struct net *net);
+void __net_exit fib6_notifier_exit(struct net *net);
+
+unsigned int fib6_tables_seq_read(struct net *net);
+int fib6_tables_dump(struct net *net, struct notifier_block *nb);
+
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
int fib6_rules_init(void);
void fib6_rules_cleanup(void);
+bool fib6_rule_default(const struct fib_rule *rule);
+int fib6_rules_dump(struct net *net, struct notifier_block *nb);
+unsigned int fib6_rules_seq_read(struct net *net);
#else
static inline int fib6_rules_init(void)
{
@@ -304,5 +343,17 @@ static inline void fib6_rules_cleanup(void)
{
return ;
}
+static inline bool fib6_rule_default(const struct fib_rule *rule)
+{
+ return true;
+}
+static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return 0;
+}
+static inline unsigned int fib6_rules_seq_read(struct net *net)
+{
+ return 0;
+}
#endif
#endif
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index ef8992d49bc3..1a7f7e424320 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -19,6 +19,7 @@
#include <net/flow.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
+#include <net/fib_notifier.h>
#include <net/fib_rules.h>
#include <net/inetpeer.h>
#include <linux/percpu.h>
@@ -188,10 +189,6 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
#define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \
FIB_RES_SADDR(net, res))
-struct fib_notifier_info {
- struct net *net;
-};
-
struct fib_entry_notifier_info {
struct fib_notifier_info info; /* must be first */
u32 dst;
@@ -202,44 +199,21 @@ struct fib_entry_notifier_info {
u32 tb_id;
};
-struct fib_rule_notifier_info {
- struct fib_notifier_info info; /* must be first */
- struct fib_rule *rule;
-};
-
struct fib_nh_notifier_info {
struct fib_notifier_info info; /* must be first */
struct fib_nh *fib_nh;
};
-enum fib_event_type {
- FIB_EVENT_ENTRY_REPLACE,
- FIB_EVENT_ENTRY_APPEND,
- FIB_EVENT_ENTRY_ADD,
- FIB_EVENT_ENTRY_DEL,
- FIB_EVENT_RULE_ADD,
- FIB_EVENT_RULE_DEL,
- FIB_EVENT_NH_ADD,
- FIB_EVENT_NH_DEL,
-};
-
-int register_fib_notifier(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb));
-int unregister_fib_notifier(struct notifier_block *nb);
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
- enum fib_event_type event_type,
- struct fib_notifier_info *info);
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
struct fib_notifier_info *info);
+int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info);
+
+int __net_init fib4_notifier_init(struct net *net);
+void __net_exit fib4_notifier_exit(struct net *net);
void fib_notify(struct net *net, struct notifier_block *nb);
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-void fib_rules_notify(struct net *net, struct notifier_block *nb);
-#else
-static inline void fib_rules_notify(struct net *net, struct notifier_block *nb)
-{
-}
-#endif
struct fib_table {
struct hlist_node tb_hlist;
@@ -312,6 +286,16 @@ static inline bool fib4_rule_default(const struct fib_rule *rule)
return true;
}
+static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline unsigned int fib4_rules_seq_read(struct net *net)
+{
+ return 0;
+}
+
#else /* CONFIG_IP_MULTIPLE_TABLES */
int __net_init fib4_rules_init(struct net *net);
void __net_exit fib4_rules_exit(struct net *net);
@@ -357,6 +341,8 @@ out:
}
bool fib4_rule_default(const struct fib_rule *rule);
+int fib4_rules_dump(struct net *net, struct notifier_block *nb);
+unsigned int fib4_rules_seq_read(struct net *net);
#endif /* CONFIG_IP_MULTIPLE_TABLES */
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1c401bd4c2e0..57faa375eab9 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -88,6 +88,7 @@ struct net {
/* core fib_rules */
struct list_head rules_ops;
+ struct list_head fib_notifier_ops; /* protected by net_mutex */
struct net_device *loopback_dev; /* The loopback */
struct netns_core core;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 9a14a0850b0e..20d061c805e3 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -159,6 +159,7 @@ struct netns_ipv4 {
int sysctl_fib_multipath_hash_policy;
#endif
+ struct fib_notifier_ops *notifier_ops;
unsigned int fib_seq; /* protected by rtnl_mutex */
atomic_t rt_genid;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index de7745e2edcc..abdf3b40303b 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -86,6 +86,7 @@ struct netns_ipv6 {
atomic_t dev_addr_genid;
atomic_t fib6_sernum;
struct seg6_pernet_data *seg6_data;
+ struct fib_notifier_ops *notifier_ops;
};
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h
index d496c02e14bc..33e2a5732bd1 100644
--- a/include/uapi/linux/ipv6_route.h
+++ b/include/uapi/linux/ipv6_route.h
@@ -35,6 +35,7 @@
#define RTF_PREF(pref) ((pref) << 27)
#define RTF_PREF_MASK 0x18000000
+#define RTF_OFFLOAD 0x20000000 /* offloaded route */
#define RTF_PCPU 0x40000000 /* read-only: can not be set by user */
#define RTF_LOCAL 0x80000000
diff --git a/net/core/Makefile b/net/core/Makefile
index d501c4278015..56d771a887b6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,7 +9,8 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
- sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
+ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
+ fib_notifier.o
obj-y += net-sysfs.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
new file mode 100644
index 000000000000..292aab83702f
--- /dev/null
+++ b/net/core/fib_notifier.c
@@ -0,0 +1,164 @@
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+
+static ATOMIC_NOTIFIER_HEAD(fib_chain);
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->net = net;
+ return nb->notifier_call(nb, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifier);
+
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->net = net;
+ return atomic_notifier_call_chain(&fib_chain, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifiers);
+
+static unsigned int fib_seq_sum(void)
+{
+ struct fib_notifier_ops *ops;
+ unsigned int fib_seq = 0;
+ struct net *net;
+
+ rtnl_lock();
+ for_each_net(net) {
+ list_for_each_entry(ops, &net->fib_notifier_ops, list)
+ fib_seq += ops->fib_seq_read(net);
+ }
+ rtnl_unlock();
+
+ return fib_seq;
+}
+
+static int fib_net_dump(struct net *net, struct notifier_block *nb)
+{
+ struct fib_notifier_ops *ops;
+
+ list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+ int err = ops->fib_dump(net, nb);
+
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb),
+ unsigned int fib_seq)
+{
+ atomic_notifier_chain_register(&fib_chain, nb);
+ if (fib_seq == fib_seq_sum())
+ return true;
+ atomic_notifier_chain_unregister(&fib_chain, nb);
+ if (cb)
+ cb(nb);
+ return false;
+}
+
+#define FIB_DUMP_MAX_RETRIES 5
+int register_fib_notifier(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb))
+{
+ int retries = 0;
+ int err;
+
+ do {
+ unsigned int fib_seq = fib_seq_sum();
+ struct net *net;
+
+ rcu_read_lock();
+ for_each_net_rcu(net) {
+ err = fib_net_dump(net, nb);
+ if (err)
+ goto err_fib_net_dump;
+ }
+ rcu_read_unlock();
+
+ if (fib_dump_is_consistent(nb, cb, fib_seq))
+ return 0;
+ } while (++retries < FIB_DUMP_MAX_RETRIES);
+
+ return -EBUSY;
+
+err_fib_net_dump:
+ rcu_read_unlock();
+ return err;
+}
+EXPORT_SYMBOL(register_fib_notifier);
+
+int unregister_fib_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&fib_chain, nb);
+}
+EXPORT_SYMBOL(unregister_fib_notifier);
+
+static int __fib_notifier_ops_register(struct fib_notifier_ops *ops,
+ struct net *net)
+{
+ struct fib_notifier_ops *o;
+
+ list_for_each_entry(o, &net->fib_notifier_ops, list)
+ if (ops->family == o->family)
+ return -EEXIST;
+ list_add_tail_rcu(&ops->list, &net->fib_notifier_ops);
+ return 0;
+}
+
+struct fib_notifier_ops *
+fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net)
+{
+ struct fib_notifier_ops *ops;
+ int err;
+
+ ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
+ if (!ops)
+ return ERR_PTR(-ENOMEM);
+
+ err = __fib_notifier_ops_register(ops, net);
+ if (err)
+ goto err_register;
+
+ return ops;
+
+err_register:
+ kfree(ops);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(fib_notifier_ops_register);
+
+void fib_notifier_ops_unregister(struct fib_notifier_ops *ops)
+{
+ list_del_rcu(&ops->list);
+ kfree_rcu(ops, rcu);
+}
+EXPORT_SYMBOL(fib_notifier_ops_unregister);
+
+static int __net_init fib_notifier_net_init(struct net *net)
+{
+ INIT_LIST_HEAD(&net->fib_notifier_ops);
+ return 0;
+}
+
+static struct pernet_operations fib_notifier_net_ops = {
+ .init = fib_notifier_net_init,
+};
+
+static int __init fib_notifier_init(void)
+{
+ return register_pernet_subsys(&fib_notifier_net_ops);
+}
+
+subsys_initcall(fib_notifier_init);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index fdcb1bcd2afa..fc0b65093417 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -299,6 +299,67 @@ out:
}
EXPORT_SYMBOL_GPL(fib_rules_lookup);
+static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_rule *rule, int family)
+{
+ struct fib_rule_notifier_info info = {
+ .info.family = family,
+ .rule = rule,
+ };
+
+ return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_fib_rule_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct fib_rule *rule,
+ struct fib_rules_ops *ops)
+{
+ struct fib_rule_notifier_info info = {
+ .info.family = ops->family,
+ .rule = rule,
+ };
+
+ ops->fib_rules_seq++;
+ return call_fib_notifiers(net, event_type, &info.info);
+}
+
+/* Called with rcu_read_lock() */
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family)
+{
+ struct fib_rules_ops *ops;
+ struct fib_rule *rule;
+
+ ops = lookup_rules_ops(net, family);
+ if (!ops)
+ return -EAFNOSUPPORT;
+ list_for_each_entry_rcu(rule, &ops->rules_list, list)
+ call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule,
+ family);
+ rules_ops_put(ops);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fib_rules_dump);
+
+unsigned int fib_rules_seq_read(struct net *net, int family)
+{
+ unsigned int fib_rules_seq;
+ struct fib_rules_ops *ops;
+
+ ASSERT_RTNL();
+
+ ops = lookup_rules_ops(net, family);
+ if (!ops)
+ return 0;
+ fib_rules_seq = ops->fib_rules_seq;
+ rules_ops_put(ops);
+
+ return fib_rules_seq;
+}
+EXPORT_SYMBOL_GPL(fib_rules_seq_read);
+
static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
struct fib_rules_ops *ops)
{
@@ -548,6 +609,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rule->tun_id)
ip_tunnel_need_metadata();
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -687,6 +749,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops);
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
NETLINK_CB(skb).portid);
fib_rule_put(rule);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 044d2a159a3c..2cba559f14df 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1247,22 +1247,28 @@ static int __net_init ip_fib_net_init(struct net *net)
int err;
size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
- net->ipv4.fib_seq = 0;
+ err = fib4_notifier_init(net);
+ if (err)
+ return err;
/* Avoid false sharing : Use at least a full cache line */
size = max_t(size_t, size, L1_CACHE_BYTES);
net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
- if (!net->ipv4.fib_table_hash)
- return -ENOMEM;
+ if (!net->ipv4.fib_table_hash) {
+ err = -ENOMEM;
+ goto err_table_hash_alloc;
+ }
err = fib4_rules_init(net);
if (err < 0)
- goto fail;
+ goto err_rules_init;
return 0;
-fail:
+err_rules_init:
kfree(net->ipv4.fib_table_hash);
+err_table_hash_alloc:
+ fib4_notifier_exit(net);
return err;
}
@@ -1292,6 +1298,7 @@ static void ip_fib_net_exit(struct net *net)
#endif
rtnl_unlock();
kfree(net->ipv4.fib_table_hash);
+ fib4_notifier_exit(net);
}
static int __net_init fib_net_init(struct net *net)
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index e0714d975947..5d7afb145562 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -1,86 +1,71 @@
#include <linux/rtnetlink.h>
#include <linux/notifier.h>
-#include <linux/rcupdate.h>
+#include <linux/socket.h>
#include <linux/kernel.h>
#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
#include <net/netns/ipv4.h>
#include <net/ip_fib.h>
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
- enum fib_event_type event_type,
- struct fib_notifier_info *info)
+int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
{
- info->net = net;
- return nb->notifier_call(nb, event_type, info);
+ info->family = AF_INET;
+ return call_fib_notifier(nb, net, event_type, info);
}
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
- struct fib_notifier_info *info)
+int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info)
{
+ ASSERT_RTNL();
+
+ info->family = AF_INET;
net->ipv4.fib_seq++;
- info->net = net;
- return atomic_notifier_call_chain(&fib_chain, event_type, info);
+ return call_fib_notifiers(net, event_type, info);
}
-static unsigned int fib_seq_sum(void)
+static unsigned int fib4_seq_read(struct net *net)
{
- unsigned int fib_seq = 0;
- struct net *net;
-
- rtnl_lock();
- for_each_net(net)
- fib_seq += net->ipv4.fib_seq;
- rtnl_unlock();
+ ASSERT_RTNL();
- return fib_seq;
+ return net->ipv4.fib_seq + fib4_rules_seq_read(net);
}
-static bool fib_dump_is_consistent(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb),
- unsigned int fib_seq)
+static int fib4_dump(struct net *net, struct notifier_block *nb)
{
- atomic_notifier_chain_register(&fib_chain, nb);
- if (fib_seq == fib_seq_sum())
- return true;
- atomic_notifier_chain_unregister(&fib_chain, nb);
- if (cb)
- cb(nb);
- return false;
+ int err;
+
+ err = fib4_rules_dump(net, nb);
+ if (err)
+ return err;
+
+ fib_notify(net, nb);
+
+ return 0;
}
-#define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb))
-{
- int retries = 0;
+static const struct fib_notifier_ops fib4_notifier_ops_template = {
+ .family = AF_INET,
+ .fib_seq_read = fib4_seq_read,
+ .fib_dump = fib4_dump,
+};
- do {
- unsigned int fib_seq = fib_seq_sum();
- struct net *net;
+int __net_init fib4_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
- /* Mutex semantics guarantee that every change done to
- * FIB tries before we read the change sequence counter
- * is now visible to us.
- */
- rcu_read_lock();
- for_each_net_rcu(net) {
- fib_rules_notify(net, nb);
- fib_notify(net, nb);
- }
- rcu_read_unlock();
+ net->ipv4.fib_seq = 0;
- if (fib_dump_is_consistent(nb, cb, fib_seq))
- return 0;
- } while (++retries < FIB_DUMP_MAX_RETRIES);
+ ops = fib_notifier_ops_register(&fib4_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+ net->ipv4.notifier_ops = ops;
- return -EBUSY;
+ return 0;
}
-EXPORT_SYMBOL(register_fib_notifier);
-int unregister_fib_notifier(struct notifier_block *nb)
+void __net_exit fib4_notifier_exit(struct net *net)
{
- return atomic_notifier_chain_unregister(&fib_chain, nb);
+ fib_notifier_ops_unregister(net->ipv4.notifier_ops);
}
-EXPORT_SYMBOL(unregister_fib_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 778ecf977eb2..35d646a62ad4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -68,6 +68,16 @@ bool fib4_rule_default(const struct fib_rule *rule)
}
EXPORT_SYMBOL_GPL(fib4_rule_default);
+int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, AF_INET);
+}
+
+unsigned int fib4_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, AF_INET);
+}
+
int __fib_lookup(struct net *net, struct flowi4 *flp,
struct fib_result *res, unsigned int flags)
{
@@ -185,38 +195,6 @@ static struct fib_table *fib_empty_table(struct net *net)
return NULL;
}
-static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
- enum fib_event_type event_type,
- struct fib_rule *rule)
-{
- struct fib_rule_notifier_info info = {
- .rule = rule,
- };
-
- return call_fib_notifier(nb, net, event_type, &info.info);
-}
-
-static int call_fib_rule_notifiers(struct net *net,
- enum fib_event_type event_type,
- struct fib_rule *rule)
-{
- struct fib_rule_notifier_info info = {
- .rule = rule,
- };
-
- return call_fib_notifiers(net, event_type, &info.info);
-}
-
-/* Called with rcu_read_lock() */
-void fib_rules_notify(struct net *net, struct notifier_block *nb)
-{
- struct fib_rules_ops *ops = net->ipv4.rules_ops;
- struct fib_rule *rule;
-
- list_for_each_entry_rcu(rule, &ops->rules_list, list)
- call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule);
-}
-
static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
FRA_GENERIC_POLICY,
[FRA_FLOW] = { .type = NLA_U32 },
@@ -273,7 +251,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule);
err = 0;
errout:
@@ -295,7 +272,6 @@ static int fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule);
errout:
return err;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f62dc2463280..632b454ce77c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -44,6 +44,7 @@
#include <net/netlink.h>
#include <net/nexthop.h>
#include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
#include "fib_lookup.h"
@@ -1451,14 +1452,14 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
fib_nh->nh_flags & RTNH_F_LINKDOWN)
break;
- return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
- &info.info);
+ return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type,
+ &info.info);
case FIB_EVENT_NH_DEL:
if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
(fib_nh->nh_flags & RTNH_F_DEAD))
- return call_fib_notifiers(dev_net(fib_nh->nh_dev),
- event_type, &info.info);
+ return call_fib4_notifiers(dev_net(fib_nh->nh_dev),
+ event_type, &info.info);
default:
break;
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 64668c69dda6..1a6ffb0dab9c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -81,6 +81,7 @@
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/fib_notifier.h>
#include <trace/events/fib.h>
#include "fib_lookup.h"
@@ -97,7 +98,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
.type = type,
.tb_id = tb_id,
};
- return call_fib_notifier(nb, net, event_type, &info.info);
+ return call_fib4_notifier(nb, net, event_type, &info.info);
}
static int call_fib_entry_notifiers(struct net *net,
@@ -113,7 +114,7 @@ static int call_fib_entry_notifiers(struct net *net,
.type = type,
.tb_id = tb_id,
};
- return call_fib_notifiers(net, event_type, &info.info);
+ return call_fib4_notifiers(net, event_type, &info.info);
}
#define MAX_STAT_DEPTH 32
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 217e9ff0e24b..f8b24c2e0d77 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
- udp_offload.o seg6.o
+ udp_offload.o seg6.o fib6_notifier.o
ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3c46e9513a31..30ee23eef268 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3066,7 +3066,7 @@ static void init_loopback(struct net_device *dev)
* lo device down, release this obsolete dst and
* reallocate a new router for ifa.
*/
- if (!atomic_read(&sp_ifa->rt->rt6i_ref)) {
+ if (!sp_ifa->rt->rt6i_node) {
ip6_rt_put(sp_ifa->rt);
sp_ifa->rt = NULL;
} else {
@@ -3321,11 +3321,11 @@ static void addrconf_gre_config(struct net_device *dev)
static int fixup_permanent_addr(struct inet6_dev *idev,
struct inet6_ifaddr *ifp)
{
- /* rt6i_ref == 0 means the host route was removed from the
+ /* !rt6i_node means the host route was removed from the
* FIB, for example, if 'lo' device is taken down. In that
* case regenerate the host route.
*/
- if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
+ if (!ifp->rt || !ifp->rt->rt6i_node) {
struct rt6_info *rt, *prev;
rt = addrconf_dst_alloc(idev, &ifp->addr, false);
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
new file mode 100644
index 000000000000..66a103ef7e86
--- /dev/null
+++ b/net/ipv6/fib6_notifier.c
@@ -0,0 +1,61 @@
+#include <linux/notifier.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+#include <net/netns/ipv6.h>
+#include <net/ip6_fib.h>
+
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->family = AF_INET6;
+ return call_fib_notifier(nb, net, event_type, info);
+}
+
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->family = AF_INET6;
+ return call_fib_notifiers(net, event_type, info);
+}
+
+static unsigned int fib6_seq_read(struct net *net)
+{
+ return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
+}
+
+static int fib6_dump(struct net *net, struct notifier_block *nb)
+{
+ int err;
+
+ err = fib6_rules_dump(net, nb);
+ if (err)
+ return err;
+
+ return fib6_tables_dump(net, nb);
+}
+
+static const struct fib_notifier_ops fib6_notifier_ops_template = {
+ .family = AF_INET6,
+ .fib_seq_read = fib6_seq_read,
+ .fib_dump = fib6_dump,
+};
+
+int __net_init fib6_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
+
+ ops = fib_notifier_ops_register(&fib6_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+ net->ipv6.notifier_ops = ops;
+
+ return 0;
+}
+
+void __net_exit fib6_notifier_exit(struct net *net)
+{
+ fib_notifier_ops_unregister(net->ipv6.notifier_ops);
+}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec849d88a662..2f29e4e33bd3 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -14,6 +14,7 @@
*/
#include <linux/netdevice.h>
+#include <linux/notifier.h>
#include <linux/export.h>
#include <net/fib_rules.h>
@@ -29,6 +30,36 @@ struct fib6_rule {
u8 tclass;
};
+static bool fib6_rule_matchall(const struct fib_rule *rule)
+{
+ struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
+
+ if (r->dst.plen || r->src.plen || r->tclass)
+ return false;
+ return fib_rule_matchall(rule);
+}
+
+bool fib6_rule_default(const struct fib_rule *rule)
+{
+ if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
+ rule->l3mdev)
+ return false;
+ if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN)
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(fib6_rule_default);
+
+int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, AF_INET6);
+}
+
+unsigned int fib6_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, AF_INET6);
+}
+
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ebb299cf72b7..69ed0043d117 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -33,6 +33,7 @@
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
@@ -153,7 +154,7 @@ static void node_free(struct fib6_node *fn)
kmem_cache_free(fib6_node_kmem, fn);
}
-static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
{
int cpu;
@@ -176,15 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
free_percpu(non_pcpu_rt->rt6i_pcpu);
non_pcpu_rt->rt6i_pcpu = NULL;
}
-
-static void rt6_release(struct rt6_info *rt)
-{
- if (atomic_dec_and_test(&rt->rt6i_ref)) {
- rt6_free_pcpu(rt);
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- }
-}
+EXPORT_SYMBOL_GPL(rt6_free_pcpu);
static void fib6_link_table(struct net *net, struct fib6_table *tb)
{
@@ -302,6 +295,109 @@ static void __net_init fib6_tables_init(struct net *net)
#endif
+unsigned int fib6_tables_seq_read(struct net *net)
+{
+ unsigned int h, fib_seq = 0;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ struct fib6_table *tb;
+
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+ read_lock_bh(&tb->tb6_lock);
+ fib_seq += tb->fib_seq;
+ read_unlock_bh(&tb->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+
+ return fib_seq;
+}
+
+static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct rt6_info *rt)
+{
+ struct fib6_entry_notifier_info info = {
+ .rt = rt,
+ };
+
+ return call_fib6_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_fib6_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct rt6_info *rt)
+{
+ struct fib6_entry_notifier_info info = {
+ .rt = rt,
+ };
+
+ rt->rt6i_table->fib_seq++;
+ return call_fib6_notifiers(net, event_type, &info.info);
+}
+
+struct fib6_dump_arg {
+ struct net *net;
+ struct notifier_block *nb;
+};
+
+static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+{
+ if (rt == arg->net->ipv6.ip6_null_entry)
+ return;
+ call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+}
+
+static int fib6_node_dump(struct fib6_walker *w)
+{
+ struct rt6_info *rt;
+
+ for (rt = w->leaf; rt; rt = rt->dst.rt6_next)
+ fib6_rt_dump(rt, w->args);
+ w->leaf = NULL;
+ return 0;
+}
+
+static void fib6_table_dump(struct net *net, struct fib6_table *tb,
+ struct fib6_walker *w)
+{
+ w->root = &tb->tb6_root;
+ read_lock_bh(&tb->tb6_lock);
+ fib6_walk(net, w);
+ read_unlock_bh(&tb->tb6_lock);
+}
+
+/* Called with rcu_read_lock() */
+int fib6_tables_dump(struct net *net, struct notifier_block *nb)
+{
+ struct fib6_dump_arg arg;
+ struct fib6_walker *w;
+ unsigned int h;
+
+ w = kzalloc(sizeof(*w), GFP_ATOMIC);
+ if (!w)
+ return -ENOMEM;
+
+ w->func = fib6_node_dump;
+ arg.net = net;
+ arg.nb = nb;
+ w->args = &arg;
+
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ struct fib6_table *tb;
+
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist)
+ fib6_table_dump(net, tb, w);
+ }
+
+ kfree(w);
+
+ return 0;
+}
+
static int fib6_dump_node(struct fib6_walker *w)
{
int res;
@@ -733,8 +829,6 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
}
fn = fn->parent;
}
- /* No more references are possible at this point. */
- BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
}
}
@@ -879,6 +973,8 @@ add:
*ins = rt;
rt->rt6i_node = fn;
atomic_inc(&rt->rt6i_ref);
+ call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
+ rt);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -906,6 +1002,8 @@ add:
rt->rt6i_node = fn;
rt->dst.rt6_next = iter->dst.rt6_next;
atomic_inc(&rt->rt6i_ref);
+ call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
+ rt);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -913,6 +1011,7 @@ add:
fn->fn_flags |= RTN_RTINFO;
}
nsiblings = iter->rt6i_nsiblings;
+ iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
rt6_release(iter);
@@ -925,6 +1024,7 @@ add:
break;
if (rt6_qualify_for_ecmp(iter)) {
*ins = iter->dst.rt6_next;
+ iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
rt6_release(iter);
nsiblings--;
@@ -1459,6 +1559,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
fib6_purge_rt(rt, fn, net);
+ call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt);
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
rt6_release(rt);
@@ -1839,6 +1940,11 @@ static void fib6_gc_timer_cb(unsigned long arg)
static int __net_init fib6_net_init(struct net *net)
{
size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+ int err;
+
+ err = fib6_notifier_init(net);
+ if (err)
+ return err;
spin_lock_init(&net->ipv6.fib6_gc_lock);
rwlock_init(&net->ipv6.fib6_walker_lock);
@@ -1891,6 +1997,7 @@ out_fib_table_hash:
out_rt6_stats:
kfree(net->ipv6.rt6_stats);
out_timer:
+ fib6_notifier_exit(net);
return -ENOMEM;
}
@@ -1907,6 +2014,7 @@ static void fib6_net_exit(struct net *net)
kfree(net->ipv6.fib6_main_tbl);
kfree(net->ipv6.fib_table_hash);
kfree(net->ipv6.rt6_stats);
+ fib6_notifier_exit(net);
}
static struct pernet_operations fib6_net_ops = {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4d30c96a819d..aba07fce67fb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1820,6 +1820,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
+ if (cfg->fc_flags & RTF_OFFLOAD) {
+ NL_SET_ERR_MSG(extack, "Userspace can not set RTF_OFFLOAD");
+ goto out;
+ }
+
if (cfg->fc_dst_len > 128) {
NL_SET_ERR_MSG(extack, "Invalid prefix length");
goto out;
@@ -3327,6 +3332,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
goto nla_put_failure;
}
+ if (rt->rt6i_flags & RTF_OFFLOAD)
+ *flags |= RTNH_F_OFFLOAD;
+
/* not needed for multipath encoding b/c it has a rtnexthop struct */
if (!skip_oif && rt->dst.dev &&
nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))