summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Kconfig1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c855
-rw-r--r--drivers/net/ethernet/rocker/rocker_main.c5
-rw-r--r--include/net/fib_notifier.h44
-rw-r--r--include/net/fib_rules.h9
-rw-r--r--include/net/ip6_fib.h51
-rw-r--r--include/net/ip_fib.h54
-rw-r--r--include/net/net_namespace.h1
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/netns/ipv6.h1
-rw-r--r--include/uapi/linux/ipv6_route.h1
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/fib_notifier.c164
-rw-r--r--net/core/fib_rules.c63
-rw-r--r--net/ipv4/fib_frontend.c17
-rw-r--r--net/ipv4/fib_notifier.c99
-rw-r--r--net/ipv4/fib_rules.c44
-rw-r--r--net/ipv4/fib_semantics.c9
-rw-r--r--net/ipv4/fib_trie.c5
-rw-r--r--net/ipv6/Makefile2
-rw-r--r--net/ipv6/addrconf.c6
-rw-r--r--net/ipv6/fib6_notifier.c61
-rw-r--r--net/ipv6/fib6_rules.c31
-rw-r--r--net/ipv6/ip6_fib.c132
-rw-r--r--net/ipv6/route.c8
25 files changed, 1490 insertions, 177 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 695adff89d71..d56eea310509 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -75,6 +75,7 @@ config MLXSW_SPECTRUM
depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q
depends on PSAMPLE || PSAMPLE=n
depends on BRIDGE || BRIDGE=n
+ depends on IPV6 || IPV6=n
select PARMAN
select MLXFW
default m
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 2f03c7e71584..93b6da88e79c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -44,15 +44,18 @@
#include <linux/netdevice.h>
#include <linux/if_bridge.h>
#include <linux/socket.h>
+#include <linux/route.h>
#include <net/netevent.h>
#include <net/neighbour.h>
#include <net/arp.h>
#include <net/ip_fib.h>
+#include <net/ip6_fib.h>
#include <net/fib_rules.h>
#include <net/l3mdev.h>
#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/ipv6.h>
+#include <net/fib_notifier.h>
#include "spectrum.h"
#include "core.h"
@@ -404,6 +407,17 @@ struct mlxsw_sp_fib4_entry {
u8 type;
};
+struct mlxsw_sp_fib6_entry {
+ struct mlxsw_sp_fib_entry common;
+ struct list_head rt6_list;
+ unsigned int nrt6;
+};
+
+struct mlxsw_sp_rt6 {
+ struct list_head list;
+ struct rt6_info *rt;
+};
+
enum mlxsw_sp_l3proto {
MLXSW_SP_L3_PROTO_IPV4,
MLXSW_SP_L3_PROTO_IPV6,
@@ -2124,6 +2138,26 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
}
}
+static struct mlxsw_sp_nexthop *
+mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
+ const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
+{
+ int i;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+ struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+ if (nh->rif && nh->rif->dev == rt->dst.dev &&
+ ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
+ &rt->rt6i_gateway))
+ return nh;
+ continue;
+ }
+
+ return NULL;
+}
+
static void
mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
{
@@ -2158,6 +2192,48 @@ mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
}
}
+static void
+mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+ common);
+
+ if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
+ list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
+ list)->rt->rt6i_flags |= RTF_OFFLOAD;
+ return;
+ }
+
+ list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+ struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+ struct mlxsw_sp_nexthop *nh;
+
+ nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
+ if (nh && nh->offloaded)
+ mlxsw_sp_rt6->rt->rt6i_flags |= RTF_OFFLOAD;
+ else
+ mlxsw_sp_rt6->rt->rt6i_flags &= ~RTF_OFFLOAD;
+ }
+}
+
+static void
+mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+ common);
+ list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+ struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+ rt->rt6i_flags &= ~RTF_OFFLOAD;
+ }
+}
+
static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
{
switch (fib_entry->fib_node->fib->proto) {
@@ -2165,7 +2241,8 @@ static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib4_entry_offload_set(fib_entry);
break;
case MLXSW_SP_L3_PROTO_IPV6:
- WARN_ON_ONCE(1);
+ mlxsw_sp_fib6_entry_offload_set(fib_entry);
+ break;
}
}
@@ -2177,7 +2254,8 @@ mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib4_entry_offload_unset(fib_entry);
break;
case MLXSW_SP_L3_PROTO_IPV6:
- WARN_ON_ONCE(1);
+ mlxsw_sp_fib6_entry_offload_unset(fib_entry);
+ break;
}
}
@@ -2885,6 +2963,649 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
}
+static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
+{
+ /* Packets with link-local destination IP arriving to the router
+ * are trapped to the CPU, so no need to program specific routes
+ * for them.
+ */
+ if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
+ return true;
+
+ /* Multicast routes aren't supported, so ignore them. Neighbour
+ * Discovery packets are specifically trapped.
+ */
+ if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
+ return true;
+
+ /* Cloned routes are irrelevant in the forwarding path. */
+ if (rt->rt6i_flags & RTF_CACHE)
+ return true;
+
+ return false;
+}
+
+static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
+ if (!mlxsw_sp_rt6)
+ return ERR_PTR(-ENOMEM);
+
+ /* In case of route replace, replaced route is deleted with
+ * no notification. Take reference to prevent accessing freed
+ * memory.
+ */
+ mlxsw_sp_rt6->rt = rt;
+ rt6_hold(rt);
+
+ return mlxsw_sp_rt6;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+{
+ rt6_release(rt);
+}
+#else
+static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+{
+}
+#endif
+
+static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
+{
+ mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
+ kfree(mlxsw_sp_rt6);
+}
+
+static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
+{
+ /* RTF_CACHE routes are ignored */
+ return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
+}
+
+static struct rt6_info *
+mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
+ list)->rt;
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
+ const struct rt6_info *nrt, bool replace)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+
+ if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
+ return NULL;
+
+ list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+ struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+ /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
+ * virtual router.
+ */
+ if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
+ continue;
+ if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
+ break;
+ if (rt->rt6i_metric < nrt->rt6i_metric)
+ continue;
+ if (rt->rt6i_metric == nrt->rt6i_metric &&
+ mlxsw_sp_fib6_rt_can_mp(rt))
+ return fib6_entry;
+ if (rt->rt6i_metric > nrt->rt6i_metric)
+ break;
+ }
+
+ return NULL;
+}
+
+static struct mlxsw_sp_rt6 *
+mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
+ const struct rt6_info *rt)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+ if (mlxsw_sp_rt6->rt == rt)
+ return mlxsw_sp_rt6;
+ }
+
+ return NULL;
+}
+
+static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp,
+ struct mlxsw_sp_nexthop *nh,
+ const struct rt6_info *rt)
+{
+ struct net_device *dev = rt->dst.dev;
+ struct mlxsw_sp_rif *rif;
+ int err;
+
+ nh->nh_grp = nh_grp;
+ memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
+
+ if (!dev)
+ return 0;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+ if (!rif)
+ return 0;
+ mlxsw_sp_nexthop_rif_init(nh, rif);
+
+ err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
+ if (err)
+ goto err_nexthop_neigh_init;
+
+ return 0;
+
+err_nexthop_neigh_init:
+ mlxsw_sp_nexthop_rif_fini(nh);
+ return err;
+}
+
+static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_rif_fini(nh);
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp;
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+ struct mlxsw_sp_nexthop *nh;
+ size_t alloc_size;
+ int i = 0;
+ int err;
+
+ alloc_size = sizeof(*nh_grp) +
+ fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
+ nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+ if (!nh_grp)
+ return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&nh_grp->fib_list);
+#if IS_ENABLED(CONFIG_IPV6)
+ nh_grp->neigh_tbl = &nd_tbl;
+#endif
+ mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
+ struct mlxsw_sp_rt6, list);
+ nh_grp->gateway = !!(mlxsw_sp_rt6->rt->rt6i_flags & RTF_GATEWAY);
+ nh_grp->count = fib6_entry->nrt6;
+ for (i = 0; i < nh_grp->count; i++) {
+ struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+ nh = &nh_grp->nexthops[i];
+ err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
+ if (err)
+ goto err_nexthop6_init;
+ mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
+ }
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+ return nh_grp;
+
+err_nexthop6_init:
+ for (i--; i >= 0; i--) {
+ nh = &nh_grp->nexthops[i];
+ mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
+ }
+ kfree(nh_grp);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ struct mlxsw_sp_nexthop *nh;
+ int i = nh_grp->count;
+
+ for (i--; i >= 0; i--) {
+ nh = &nh_grp->nexthops[i];
+ mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
+ }
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+ WARN_ON(nh_grp->adj_index_valid);
+ kfree(nh_grp);
+}
+
+static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp;
+
+ /* For now, don't consolidate nexthop groups */
+ nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
+ if (IS_ERR(nh_grp))
+ return PTR_ERR(nh_grp);
+
+ list_add_tail(&fib6_entry->common.nexthop_group_node,
+ &nh_grp->fib_list);
+ fib6_entry->common.nh_group = nh_grp;
+
+ return 0;
+}
+
+static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+
+ list_del(&fib_entry->nexthop_group_node);
+ if (!list_empty(&nh_grp->fib_list))
+ return;
+ mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
+}
+
+static int
+mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
+ int err;
+
+ fib6_entry->common.nh_group = NULL;
+ list_del(&fib6_entry->common.nexthop_group_node);
+
+ err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
+ if (err)
+ goto err_nexthop6_group_get;
+
+ /* In case this entry is offloaded, then the adjacency index
+ * currently associated with it in the device's table is that
+ * of the old group. Start using the new one instead.
+ */
+ err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
+ if (err)
+ goto err_fib_node_entry_add;
+
+ if (list_empty(&old_nh_grp->fib_list))
+ mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
+
+ return 0;
+
+err_fib_node_entry_add:
+ mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
+err_nexthop6_group_get:
+ list_add_tail(&fib6_entry->common.nexthop_group_node,
+ &old_nh_grp->fib_list);
+ fib6_entry->common.nh_group = old_nh_grp;
+ return err;
+}
+
+static int
+mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry,
+ struct rt6_info *rt)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+ int err;
+
+ mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
+ if (IS_ERR(mlxsw_sp_rt6))
+ return PTR_ERR(mlxsw_sp_rt6);
+
+ list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+ fib6_entry->nrt6++;
+
+ err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
+ if (err)
+ goto err_nexthop6_group_update;
+
+ return 0;
+
+err_nexthop6_group_update:
+ fib6_entry->nrt6--;
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+ return err;
+}
+
+static void
+mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry,
+ struct rt6_info *rt)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
+ if (WARN_ON(!mlxsw_sp_rt6))
+ return;
+
+ fib6_entry->nrt6--;
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+}
+
+static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp_fib_entry *fib_entry,
+ const struct rt6_info *rt)
+{
+ /* Packets hitting RTF_REJECT routes need to be discarded by the
+ * stack. We can rely on their destination device not having a
+ * RIF (it's the loopback device) and can thus use action type
+ * local, which will cause them to be trapped with a lower
+ * priority than packets that need to be locally received.
+ */
+ if (rt->rt6i_flags & RTF_LOCAL)
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+ else if (rt->rt6i_flags & RTF_REJECT)
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+ else if (rt->rt6i_flags & RTF_GATEWAY)
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+ else
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+}
+
+static void
+mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
+
+ list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
+ list) {
+ fib6_entry->nrt6--;
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+ }
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node,
+ struct rt6_info *rt)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib_entry *fib_entry;
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+ int err;
+
+ fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
+ if (!fib6_entry)
+ return ERR_PTR(-ENOMEM);
+ fib_entry = &fib6_entry->common;
+
+ mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
+ if (IS_ERR(mlxsw_sp_rt6)) {
+ err = PTR_ERR(mlxsw_sp_rt6);
+ goto err_rt6_create;
+ }
+
+ mlxsw_sp_fib6_entry_type_set(fib_entry, mlxsw_sp_rt6->rt);
+
+ INIT_LIST_HEAD(&fib6_entry->rt6_list);
+ list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+ fib6_entry->nrt6 = 1;
+ err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
+ if (err)
+ goto err_nexthop6_group_get;
+
+ fib_entry->fib_node = fib_node;
+
+ return fib6_entry;
+
+err_nexthop6_group_get:
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+err_rt6_create:
+ kfree(fib6_entry);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
+ mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
+ WARN_ON(fib6_entry->nrt6);
+ kfree(fib6_entry);
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
+ const struct rt6_info *nrt, bool replace)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
+
+ list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+ struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+ if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
+ continue;
+ if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
+ break;
+ if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
+ if (mlxsw_sp_fib6_rt_can_mp(rt) ==
+ mlxsw_sp_fib6_rt_can_mp(nrt))
+ return fib6_entry;
+ if (mlxsw_sp_fib6_rt_can_mp(nrt))
+ fallback = fallback ?: fib6_entry;
+ }
+ if (rt->rt6i_metric > nrt->rt6i_metric)
+ return fallback ?: fib6_entry;
+ }
+
+ return fallback;
+}
+
+static int
+mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
+ bool replace)
+{
+ struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
+ struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+
+ fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
+
+ if (replace && WARN_ON(!fib6_entry))
+ return -EINVAL;
+
+ if (fib6_entry) {
+ list_add_tail(&new6_entry->common.list,
+ &fib6_entry->common.list);
+ } else {
+ struct mlxsw_sp_fib6_entry *last;
+
+ list_for_each_entry(last, &fib_node->entry_list, common.list) {
+ struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
+
+ if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
+ break;
+ fib6_entry = last;
+ }
+
+ if (fib6_entry)
+ list_add(&new6_entry->common.list,
+ &fib6_entry->common.list);
+ else
+ list_add(&new6_entry->common.list,
+ &fib_node->entry_list);
+ }
+
+ return 0;
+}
+
+static void
+mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ list_del(&fib6_entry->common.list);
+}
+
+static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry,
+ bool replace)
+{
+ int err;
+
+ err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
+ if (err)
+ goto err_fib_node_entry_add;
+
+ return 0;
+
+err_fib_node_entry_add:
+ mlxsw_sp_fib6_node_list_remove(fib6_entry);
+ return err;
+}
+
+static void
+mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
+ mlxsw_sp_fib6_node_list_remove(fib6_entry);
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
+ const struct rt6_info *rt)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+ struct mlxsw_sp_fib *fib;
+ struct mlxsw_sp_vr *vr;
+
+ vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
+ if (!vr)
+ return NULL;
+ fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
+
+ fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
+ sizeof(rt->rt6i_dst.addr),
+ rt->rt6i_dst.plen);
+ if (!fib_node)
+ return NULL;
+
+ list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+ struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+ if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
+ rt->rt6i_metric == iter_rt->rt6i_metric &&
+ mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
+ return fib6_entry;
+ }
+
+ return NULL;
+}
+
+static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry,
+ bool replace)
+{
+ struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
+ struct mlxsw_sp_fib6_entry *replaced;
+
+ if (!replace)
+ return;
+
+ replaced = list_next_entry(fib6_entry, common.list);
+
+ mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+}
+
+static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
+ struct rt6_info *rt, bool replace)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+ int err;
+
+ if (mlxsw_sp->router->aborted)
+ return 0;
+
+ if (rt->rt6i_src.plen)
+ return -EINVAL;
+
+ if (mlxsw_sp_fib6_rt_should_ignore(rt))
+ return 0;
+
+ fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
+ &rt->rt6i_dst.addr,
+ sizeof(rt->rt6i_dst.addr),
+ rt->rt6i_dst.plen,
+ MLXSW_SP_L3_PROTO_IPV6);
+ if (IS_ERR(fib_node))
+ return PTR_ERR(fib_node);
+
+ /* Before creating a new entry, try to append route to an existing
+ * multipath entry.
+ */
+ fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
+ if (fib6_entry) {
+ err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
+ if (err)
+ goto err_fib6_entry_nexthop_add;
+ return 0;
+ }
+
+ fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
+ if (IS_ERR(fib6_entry)) {
+ err = PTR_ERR(fib6_entry);
+ goto err_fib6_entry_create;
+ }
+
+ err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
+ if (err)
+ goto err_fib6_node_entry_link;
+
+ mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
+
+ return 0;
+
+err_fib6_node_entry_link:
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+err_fib6_entry_create:
+err_fib6_entry_nexthop_add:
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+ return err;
+}
+
+static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
+ struct rt6_info *rt)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+
+ if (mlxsw_sp->router->aborted)
+ return;
+
+ if (mlxsw_sp_fib6_rt_should_ignore(rt))
+ return;
+
+ fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
+ if (WARN_ON(!fib6_entry))
+ return;
+
+ /* If route is part of a multipath entry, but not the last one
+ * removed, then only reduce its nexthop group.
+ */
+ if (!list_is_singular(&fib6_entry->rt6_list)) {
+ mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
+ return;
+ }
+
+ fib_node = fib6_entry->common.fib_node;
+
+ mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+}
+
static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
enum mlxsw_reg_ralxx_protocol proto,
u8 tree_id)
@@ -2965,6 +3686,23 @@ static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
}
}
+static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
+
+ list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
+ common.list) {
+ bool do_break = &tmp->common.list == &fib_node->entry_list;
+
+ mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+ if (do_break)
+ break;
+ }
+}
+
static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_node *fib_node)
{
@@ -2973,7 +3711,7 @@ static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
break;
case MLXSW_SP_L3_PROTO_IPV6:
- WARN_ON_ONCE(1);
+ mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
break;
}
}
@@ -3031,6 +3769,7 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
struct mlxsw_sp_fib_event_work {
struct work_struct work;
union {
+ struct fib6_entry_notifier_info fen6_info;
struct fib_entry_notifier_info fen_info;
struct fib_rule_notifier_info fr_info;
struct fib_nh_notifier_info fnh_info;
@@ -3039,7 +3778,7 @@ struct mlxsw_sp_fib_event_work {
unsigned long event;
};
-static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
+static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
{
struct mlxsw_sp_fib_event_work *fib_work =
container_of(work, struct mlxsw_sp_fib_event_work, work);
@@ -3084,6 +3823,87 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
kfree(fib_work);
}
+static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
+{
+ struct mlxsw_sp_fib_event_work *fib_work =
+ container_of(work, struct mlxsw_sp_fib_event_work, work);
+ struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
+ struct fib_rule *rule;
+ bool replace;
+ int err;
+
+ rtnl_lock();
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_ADD:
+ replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
+ err = mlxsw_sp_router_fib6_add(mlxsw_sp,
+ fib_work->fen6_info.rt, replace);
+ if (err)
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+ break;
+ case FIB_EVENT_ENTRY_DEL:
+ mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
+ mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+ break;
+ case FIB_EVENT_RULE_ADD: /* fall through */
+ case FIB_EVENT_RULE_DEL:
+ rule = fib_work->fr_info.rule;
+ if (!fib6_rule_default(rule) && !rule->l3mdev)
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ fib_rule_put(rule);
+ break;
+ }
+ rtnl_unlock();
+ kfree(fib_work);
+}
+
+static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
+ struct fib_notifier_info *info)
+{
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_APPEND: /* fall through */
+ case FIB_EVENT_ENTRY_ADD: /* fall through */
+ case FIB_EVENT_ENTRY_DEL:
+ memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info));
+ /* Take referece on fib_info to prevent it from being
+ * freed while work is queued. Release it afterwards.
+ */
+ fib_info_hold(fib_work->fen_info.fi);
+ break;
+ case FIB_EVENT_RULE_ADD: /* fall through */
+ case FIB_EVENT_RULE_DEL:
+ memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
+ fib_rule_get(fib_work->fr_info.rule);
+ break;
+ case FIB_EVENT_NH_ADD: /* fall through */
+ case FIB_EVENT_NH_DEL:
+ memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info));
+ fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+ break;
+ }
+}
+
+static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
+ struct fib_notifier_info *info)
+{
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_ADD: /* fall through */
+ case FIB_EVENT_ENTRY_DEL:
+ memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info));
+ rt6_hold(fib_work->fen6_info.rt);
+ break;
+ case FIB_EVENT_RULE_ADD: /* fall through */
+ case FIB_EVENT_RULE_DEL:
+ memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
+ fib_rule_get(fib_work->fr_info.rule);
+ break;
+ }
+}
+
/* Called with rcu_read_lock() */
static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
unsigned long event, void *ptr)
@@ -3099,31 +3919,18 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
if (WARN_ON(!fib_work))
return NOTIFY_BAD;
- INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
router = container_of(nb, struct mlxsw_sp_router, fib_nb);
fib_work->mlxsw_sp = router->mlxsw_sp;
fib_work->event = event;
- switch (event) {
- case FIB_EVENT_ENTRY_REPLACE: /* fall through */
- case FIB_EVENT_ENTRY_APPEND: /* fall through */
- case FIB_EVENT_ENTRY_ADD: /* fall through */
- case FIB_EVENT_ENTRY_DEL:
- memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
- /* Take referece on fib_info to prevent it from being
- * freed while work is queued. Release it afterwards.
- */
- fib_info_hold(fib_work->fen_info.fi);
+ switch (info->family) {
+ case AF_INET:
+ INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
+ mlxsw_sp_router_fib4_event(fib_work, info);
break;
- case FIB_EVENT_RULE_ADD: /* fall through */
- case FIB_EVENT_RULE_DEL:
- memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info));
- fib_rule_get(fib_work->fr_info.rule);
- break;
- case FIB_EVENT_NH_ADD: /* fall through */
- case FIB_EVENT_NH_DEL:
- memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
- fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+ case AF_INET6:
+ INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
+ mlxsw_sp_router_fib6_event(fib_work, info);
break;
}
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index b1e5c07099fa..fc8f8bdf6579 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -34,6 +34,7 @@
#include <net/netevent.h>
#include <net/arp.h>
#include <net/fib_rules.h>
+#include <net/fib_notifier.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <generated/utsrelease.h>
@@ -2191,6 +2192,10 @@ static int rocker_router_fib_event(struct notifier_block *nb,
{
struct rocker *rocker = container_of(nb, struct rocker, fib_nb);
struct rocker_fib_event_work *fib_work;
+ struct fib_notifier_info *info = ptr;
+
+ if (info->family != AF_INET)
+ return NOTIFY_DONE;
fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
if (WARN_ON(!fib_work))
diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h
new file mode 100644
index 000000000000..241475224f74
--- /dev/null
+++ b/include/net/fib_notifier.h
@@ -0,0 +1,44 @@
+#ifndef __NET_FIB_NOTIFIER_H
+#define __NET_FIB_NOTIFIER_H
+
+#include <linux/types.h>
+#include <linux/notifier.h>
+#include <net/net_namespace.h>
+
+struct fib_notifier_info {
+ struct net *net;
+ int family;
+};
+
+enum fib_event_type {
+ FIB_EVENT_ENTRY_REPLACE,
+ FIB_EVENT_ENTRY_APPEND,
+ FIB_EVENT_ENTRY_ADD,
+ FIB_EVENT_ENTRY_DEL,
+ FIB_EVENT_RULE_ADD,
+ FIB_EVENT_RULE_DEL,
+ FIB_EVENT_NH_ADD,
+ FIB_EVENT_NH_DEL,
+};
+
+struct fib_notifier_ops {
+ int family;
+ struct list_head list;
+ unsigned int (*fib_seq_read)(struct net *net);
+ int (*fib_dump)(struct net *net, struct notifier_block *nb);
+ struct rcu_head rcu;
+};
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info);
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info);
+int register_fib_notifier(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb));
+int unregister_fib_notifier(struct notifier_block *nb);
+struct fib_notifier_ops *
+fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net);
+void fib_notifier_ops_unregister(struct fib_notifier_ops *ops);
+
+#endif
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index c487bfa2f479..3d7f1cefc6f5 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -8,6 +8,7 @@
#include <linux/refcount.h>
#include <net/flow.h>
#include <net/rtnetlink.h>
+#include <net/fib_notifier.h>
struct fib_kuid_range {
kuid_t start;
@@ -57,6 +58,7 @@ struct fib_rules_ops {
int addr_size;
int unresolved_rules;
int nr_goto_rules;
+ unsigned int fib_rules_seq;
int (*action)(struct fib_rule *,
struct flowi *, int,
@@ -89,6 +91,11 @@ struct fib_rules_ops {
struct rcu_head rcu;
};
+struct fib_rule_notifier_info {
+ struct fib_notifier_info info; /* must be first */
+ struct fib_rule *rule;
+};
+
#define FRA_GENERIC_POLICY \
[FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
[FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
@@ -143,6 +150,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
u32 flags);
bool fib_rule_matchall(const struct fib_rule *rule);
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family);
+unsigned int fib_rules_seq_read(struct net *net, int family);
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1a88008cc6f5..1d790ea40ea7 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -16,10 +16,12 @@
#include <linux/ipv6_route.h>
#include <linux/rtnetlink.h>
#include <linux/spinlock.h>
+#include <linux/notifier.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/netlink.h>
#include <net/inetpeer.h>
+#include <net/fib_notifier.h>
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
#define FIB6_TABLE_HASHSZ 256
@@ -185,6 +187,22 @@ static inline void ip6_rt_put(struct rt6_info *rt)
dst_release(&rt->dst);
}
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
+
+static inline void rt6_hold(struct rt6_info *rt)
+{
+ atomic_inc(&rt->rt6i_ref);
+}
+
+static inline void rt6_release(struct rt6_info *rt)
+{
+ if (atomic_dec_and_test(&rt->rt6i_ref)) {
+ rt6_free_pcpu(rt);
+ dst_dev_put(&rt->dst);
+ dst_release(&rt->dst);
+ }
+}
+
enum fib6_walk_state {
#ifdef CONFIG_IPV6_SUBTREES
FWS_S,
@@ -233,6 +251,7 @@ struct fib6_table {
struct fib6_node tb6_root;
struct inet_peer_base tb6_peers;
unsigned int flags;
+ unsigned int fib_seq;
#define RT6_TABLE_HAS_DFLT_ROUTER BIT(0)
};
@@ -256,6 +275,11 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
struct fib6_table *,
struct flowi6 *, int);
+struct fib6_entry_notifier_info {
+ struct fib_notifier_info info; /* must be first */
+ struct rt6_info *rt;
+};
+
/*
* exported functions
*/
@@ -292,9 +316,24 @@ int fib6_init(void);
int ipv6_route_open(struct inode *inode, struct file *file);
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info);
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info);
+
+int __net_init fib6_notifier_init(struct net *net);
+void __net_exit fib6_notifier_exit(struct net *net);
+
+unsigned int fib6_tables_seq_read(struct net *net);
+int fib6_tables_dump(struct net *net, struct notifier_block *nb);
+
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
int fib6_rules_init(void);
void fib6_rules_cleanup(void);
+bool fib6_rule_default(const struct fib_rule *rule);
+int fib6_rules_dump(struct net *net, struct notifier_block *nb);
+unsigned int fib6_rules_seq_read(struct net *net);
#else
static inline int fib6_rules_init(void)
{
@@ -304,5 +343,17 @@ static inline void fib6_rules_cleanup(void)
{
return ;
}
+static inline bool fib6_rule_default(const struct fib_rule *rule)
+{
+ return true;
+}
+static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return 0;
+}
+static inline unsigned int fib6_rules_seq_read(struct net *net)
+{
+ return 0;
+}
#endif
#endif
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index ef8992d49bc3..1a7f7e424320 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -19,6 +19,7 @@
#include <net/flow.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
+#include <net/fib_notifier.h>
#include <net/fib_rules.h>
#include <net/inetpeer.h>
#include <linux/percpu.h>
@@ -188,10 +189,6 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
#define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \
FIB_RES_SADDR(net, res))
-struct fib_notifier_info {
- struct net *net;
-};
-
struct fib_entry_notifier_info {
struct fib_notifier_info info; /* must be first */
u32 dst;
@@ -202,44 +199,21 @@ struct fib_entry_notifier_info {
u32 tb_id;
};
-struct fib_rule_notifier_info {
- struct fib_notifier_info info; /* must be first */
- struct fib_rule *rule;
-};
-
struct fib_nh_notifier_info {
struct fib_notifier_info info; /* must be first */
struct fib_nh *fib_nh;
};
-enum fib_event_type {
- FIB_EVENT_ENTRY_REPLACE,
- FIB_EVENT_ENTRY_APPEND,
- FIB_EVENT_ENTRY_ADD,
- FIB_EVENT_ENTRY_DEL,
- FIB_EVENT_RULE_ADD,
- FIB_EVENT_RULE_DEL,
- FIB_EVENT_NH_ADD,
- FIB_EVENT_NH_DEL,
-};
-
-int register_fib_notifier(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb));
-int unregister_fib_notifier(struct notifier_block *nb);
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
- enum fib_event_type event_type,
- struct fib_notifier_info *info);
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
struct fib_notifier_info *info);
+int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info);
+
+int __net_init fib4_notifier_init(struct net *net);
+void __net_exit fib4_notifier_exit(struct net *net);
void fib_notify(struct net *net, struct notifier_block *nb);
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-void fib_rules_notify(struct net *net, struct notifier_block *nb);
-#else
-static inline void fib_rules_notify(struct net *net, struct notifier_block *nb)
-{
-}
-#endif
struct fib_table {
struct hlist_node tb_hlist;
@@ -312,6 +286,16 @@ static inline bool fib4_rule_default(const struct fib_rule *rule)
return true;
}
+static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline unsigned int fib4_rules_seq_read(struct net *net)
+{
+ return 0;
+}
+
#else /* CONFIG_IP_MULTIPLE_TABLES */
int __net_init fib4_rules_init(struct net *net);
void __net_exit fib4_rules_exit(struct net *net);
@@ -357,6 +341,8 @@ out:
}
bool fib4_rule_default(const struct fib_rule *rule);
+int fib4_rules_dump(struct net *net, struct notifier_block *nb);
+unsigned int fib4_rules_seq_read(struct net *net);
#endif /* CONFIG_IP_MULTIPLE_TABLES */
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1c401bd4c2e0..57faa375eab9 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -88,6 +88,7 @@ struct net {
/* core fib_rules */
struct list_head rules_ops;
+ struct list_head fib_notifier_ops; /* protected by net_mutex */
struct net_device *loopback_dev; /* The loopback */
struct netns_core core;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 9a14a0850b0e..20d061c805e3 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -159,6 +159,7 @@ struct netns_ipv4 {
int sysctl_fib_multipath_hash_policy;
#endif
+ struct fib_notifier_ops *notifier_ops;
unsigned int fib_seq; /* protected by rtnl_mutex */
atomic_t rt_genid;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index de7745e2edcc..abdf3b40303b 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -86,6 +86,7 @@ struct netns_ipv6 {
atomic_t dev_addr_genid;
atomic_t fib6_sernum;
struct seg6_pernet_data *seg6_data;
+ struct fib_notifier_ops *notifier_ops;
};
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h
index d496c02e14bc..33e2a5732bd1 100644
--- a/include/uapi/linux/ipv6_route.h
+++ b/include/uapi/linux/ipv6_route.h
@@ -35,6 +35,7 @@
#define RTF_PREF(pref) ((pref) << 27)
#define RTF_PREF_MASK 0x18000000
+#define RTF_OFFLOAD 0x20000000 /* offloaded route */
#define RTF_PCPU 0x40000000 /* read-only: can not be set by user */
#define RTF_LOCAL 0x80000000
diff --git a/net/core/Makefile b/net/core/Makefile
index d501c4278015..56d771a887b6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,7 +9,8 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
- sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
+ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
+ fib_notifier.o
obj-y += net-sysfs.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
new file mode 100644
index 000000000000..292aab83702f
--- /dev/null
+++ b/net/core/fib_notifier.c
@@ -0,0 +1,164 @@
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+
+static ATOMIC_NOTIFIER_HEAD(fib_chain);
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->net = net;
+ return nb->notifier_call(nb, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifier);
+
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->net = net;
+ return atomic_notifier_call_chain(&fib_chain, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifiers);
+
+static unsigned int fib_seq_sum(void)
+{
+ struct fib_notifier_ops *ops;
+ unsigned int fib_seq = 0;
+ struct net *net;
+
+ rtnl_lock();
+ for_each_net(net) {
+ list_for_each_entry(ops, &net->fib_notifier_ops, list)
+ fib_seq += ops->fib_seq_read(net);
+ }
+ rtnl_unlock();
+
+ return fib_seq;
+}
+
+static int fib_net_dump(struct net *net, struct notifier_block *nb)
+{
+ struct fib_notifier_ops *ops;
+
+ list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+ int err = ops->fib_dump(net, nb);
+
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb),
+ unsigned int fib_seq)
+{
+ atomic_notifier_chain_register(&fib_chain, nb);
+ if (fib_seq == fib_seq_sum())
+ return true;
+ atomic_notifier_chain_unregister(&fib_chain, nb);
+ if (cb)
+ cb(nb);
+ return false;
+}
+
+#define FIB_DUMP_MAX_RETRIES 5
+int register_fib_notifier(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb))
+{
+ int retries = 0;
+ int err;
+
+ do {
+ unsigned int fib_seq = fib_seq_sum();
+ struct net *net;
+
+ rcu_read_lock();
+ for_each_net_rcu(net) {
+ err = fib_net_dump(net, nb);
+ if (err)
+ goto err_fib_net_dump;
+ }
+ rcu_read_unlock();
+
+ if (fib_dump_is_consistent(nb, cb, fib_seq))
+ return 0;
+ } while (++retries < FIB_DUMP_MAX_RETRIES);
+
+ return -EBUSY;
+
+err_fib_net_dump:
+ rcu_read_unlock();
+ return err;
+}
+EXPORT_SYMBOL(register_fib_notifier);
+
+int unregister_fib_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&fib_chain, nb);
+}
+EXPORT_SYMBOL(unregister_fib_notifier);
+
+static int __fib_notifier_ops_register(struct fib_notifier_ops *ops,
+ struct net *net)
+{
+ struct fib_notifier_ops *o;
+
+ list_for_each_entry(o, &net->fib_notifier_ops, list)
+ if (ops->family == o->family)
+ return -EEXIST;
+ list_add_tail_rcu(&ops->list, &net->fib_notifier_ops);
+ return 0;
+}
+
+struct fib_notifier_ops *
+fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net)
+{
+ struct fib_notifier_ops *ops;
+ int err;
+
+ ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
+ if (!ops)
+ return ERR_PTR(-ENOMEM);
+
+ err = __fib_notifier_ops_register(ops, net);
+ if (err)
+ goto err_register;
+
+ return ops;
+
+err_register:
+ kfree(ops);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(fib_notifier_ops_register);
+
+void fib_notifier_ops_unregister(struct fib_notifier_ops *ops)
+{
+ list_del_rcu(&ops->list);
+ kfree_rcu(ops, rcu);
+}
+EXPORT_SYMBOL(fib_notifier_ops_unregister);
+
+static int __net_init fib_notifier_net_init(struct net *net)
+{
+ INIT_LIST_HEAD(&net->fib_notifier_ops);
+ return 0;
+}
+
+static struct pernet_operations fib_notifier_net_ops = {
+ .init = fib_notifier_net_init,
+};
+
+static int __init fib_notifier_init(void)
+{
+ return register_pernet_subsys(&fib_notifier_net_ops);
+}
+
+subsys_initcall(fib_notifier_init);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index fdcb1bcd2afa..fc0b65093417 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -299,6 +299,67 @@ out:
}
EXPORT_SYMBOL_GPL(fib_rules_lookup);
+static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_rule *rule, int family)
+{
+ struct fib_rule_notifier_info info = {
+ .info.family = family,
+ .rule = rule,
+ };
+
+ return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_fib_rule_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct fib_rule *rule,
+ struct fib_rules_ops *ops)
+{
+ struct fib_rule_notifier_info info = {
+ .info.family = ops->family,
+ .rule = rule,
+ };
+
+ ops->fib_rules_seq++;
+ return call_fib_notifiers(net, event_type, &info.info);
+}
+
+/* Called with rcu_read_lock() */
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family)
+{
+ struct fib_rules_ops *ops;
+ struct fib_rule *rule;
+
+ ops = lookup_rules_ops(net, family);
+ if (!ops)
+ return -EAFNOSUPPORT;
+ list_for_each_entry_rcu(rule, &ops->rules_list, list)
+ call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule,
+ family);
+ rules_ops_put(ops);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fib_rules_dump);
+
+unsigned int fib_rules_seq_read(struct net *net, int family)
+{
+ unsigned int fib_rules_seq;
+ struct fib_rules_ops *ops;
+
+ ASSERT_RTNL();
+
+ ops = lookup_rules_ops(net, family);
+ if (!ops)
+ return 0;
+ fib_rules_seq = ops->fib_rules_seq;
+ rules_ops_put(ops);
+
+ return fib_rules_seq;
+}
+EXPORT_SYMBOL_GPL(fib_rules_seq_read);
+
static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
struct fib_rules_ops *ops)
{
@@ -548,6 +609,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rule->tun_id)
ip_tunnel_need_metadata();
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -687,6 +749,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops);
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
NETLINK_CB(skb).portid);
fib_rule_put(rule);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 044d2a159a3c..2cba559f14df 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1247,22 +1247,28 @@ static int __net_init ip_fib_net_init(struct net *net)
int err;
size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
- net->ipv4.fib_seq = 0;
+ err = fib4_notifier_init(net);
+ if (err)
+ return err;
/* Avoid false sharing : Use at least a full cache line */
size = max_t(size_t, size, L1_CACHE_BYTES);
net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
- if (!net->ipv4.fib_table_hash)
- return -ENOMEM;
+ if (!net->ipv4.fib_table_hash) {
+ err = -ENOMEM;
+ goto err_table_hash_alloc;
+ }
err = fib4_rules_init(net);
if (err < 0)
- goto fail;
+ goto err_rules_init;
return 0;
-fail:
+err_rules_init:
kfree(net->ipv4.fib_table_hash);
+err_table_hash_alloc:
+ fib4_notifier_exit(net);
return err;
}
@@ -1292,6 +1298,7 @@ static void ip_fib_net_exit(struct net *net)
#endif
rtnl_unlock();
kfree(net->ipv4.fib_table_hash);
+ fib4_notifier_exit(net);
}
static int __net_init fib_net_init(struct net *net)
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index e0714d975947..5d7afb145562 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -1,86 +1,71 @@
#include <linux/rtnetlink.h>
#include <linux/notifier.h>
-#include <linux/rcupdate.h>
+#include <linux/socket.h>
#include <linux/kernel.h>
#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
#include <net/netns/ipv4.h>
#include <net/ip_fib.h>
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
- enum fib_event_type event_type,
- struct fib_notifier_info *info)
+int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
{
- info->net = net;
- return nb->notifier_call(nb, event_type, info);
+ info->family = AF_INET;
+ return call_fib_notifier(nb, net, event_type, info);
}
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
- struct fib_notifier_info *info)
+int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info)
{
+ ASSERT_RTNL();
+
+ info->family = AF_INET;
net->ipv4.fib_seq++;
- info->net = net;
- return atomic_notifier_call_chain(&fib_chain, event_type, info);
+ return call_fib_notifiers(net, event_type, info);
}
-static unsigned int fib_seq_sum(void)
+static unsigned int fib4_seq_read(struct net *net)
{
- unsigned int fib_seq = 0;
- struct net *net;
-
- rtnl_lock();
- for_each_net(net)
- fib_seq += net->ipv4.fib_seq;
- rtnl_unlock();
+ ASSERT_RTNL();
- return fib_seq;
+ return net->ipv4.fib_seq + fib4_rules_seq_read(net);
}
-static bool fib_dump_is_consistent(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb),
- unsigned int fib_seq)
+static int fib4_dump(struct net *net, struct notifier_block *nb)
{
- atomic_notifier_chain_register(&fib_chain, nb);
- if (fib_seq == fib_seq_sum())
- return true;
- atomic_notifier_chain_unregister(&fib_chain, nb);
- if (cb)
- cb(nb);
- return false;
+ int err;
+
+ err = fib4_rules_dump(net, nb);
+ if (err)
+ return err;
+
+ fib_notify(net, nb);
+
+ return 0;
}
-#define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb))
-{
- int retries = 0;
+static const struct fib_notifier_ops fib4_notifier_ops_template = {
+ .family = AF_INET,
+ .fib_seq_read = fib4_seq_read,
+ .fib_dump = fib4_dump,
+};
- do {
- unsigned int fib_seq = fib_seq_sum();
- struct net *net;
+int __net_init fib4_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
- /* Mutex semantics guarantee that every change done to
- * FIB tries before we read the change sequence counter
- * is now visible to us.
- */
- rcu_read_lock();
- for_each_net_rcu(net) {
- fib_rules_notify(net, nb);
- fib_notify(net, nb);
- }
- rcu_read_unlock();
+ net->ipv4.fib_seq = 0;
- if (fib_dump_is_consistent(nb, cb, fib_seq))
- return 0;
- } while (++retries < FIB_DUMP_MAX_RETRIES);
+ ops = fib_notifier_ops_register(&fib4_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+ net->ipv4.notifier_ops = ops;
- return -EBUSY;
+ return 0;
}
-EXPORT_SYMBOL(register_fib_notifier);
-int unregister_fib_notifier(struct notifier_block *nb)
+void __net_exit fib4_notifier_exit(struct net *net)
{
- return atomic_notifier_chain_unregister(&fib_chain, nb);
+ fib_notifier_ops_unregister(net->ipv4.notifier_ops);
}
-EXPORT_SYMBOL(unregister_fib_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 778ecf977eb2..35d646a62ad4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -68,6 +68,16 @@ bool fib4_rule_default(const struct fib_rule *rule)
}
EXPORT_SYMBOL_GPL(fib4_rule_default);
+int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, AF_INET);
+}
+
+unsigned int fib4_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, AF_INET);
+}
+
int __fib_lookup(struct net *net, struct flowi4 *flp,
struct fib_result *res, unsigned int flags)
{
@@ -185,38 +195,6 @@ static struct fib_table *fib_empty_table(struct net *net)
return NULL;
}
-static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
- enum fib_event_type event_type,
- struct fib_rule *rule)
-{
- struct fib_rule_notifier_info info = {
- .rule = rule,
- };
-
- return call_fib_notifier(nb, net, event_type, &info.info);
-}
-
-static int call_fib_rule_notifiers(struct net *net,
- enum fib_event_type event_type,
- struct fib_rule *rule)
-{
- struct fib_rule_notifier_info info = {
- .rule = rule,
- };
-
- return call_fib_notifiers(net, event_type, &info.info);
-}
-
-/* Called with rcu_read_lock() */
-void fib_rules_notify(struct net *net, struct notifier_block *nb)
-{
- struct fib_rules_ops *ops = net->ipv4.rules_ops;
- struct fib_rule *rule;
-
- list_for_each_entry_rcu(rule, &ops->rules_list, list)
- call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule);
-}
-
static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
FRA_GENERIC_POLICY,
[FRA_FLOW] = { .type = NLA_U32 },
@@ -273,7 +251,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule);
err = 0;
errout:
@@ -295,7 +272,6 @@ static int fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule);
errout:
return err;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f62dc2463280..632b454ce77c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -44,6 +44,7 @@
#include <net/netlink.h>
#include <net/nexthop.h>
#include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
#include "fib_lookup.h"
@@ -1451,14 +1452,14 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
fib_nh->nh_flags & RTNH_F_LINKDOWN)
break;
- return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
- &info.info);
+ return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type,
+ &info.info);
case FIB_EVENT_NH_DEL:
if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
(fib_nh->nh_flags & RTNH_F_DEAD))
- return call_fib_notifiers(dev_net(fib_nh->nh_dev),
- event_type, &info.info);
+ return call_fib4_notifiers(dev_net(fib_nh->nh_dev),
+ event_type, &info.info);
default:
break;
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 64668c69dda6..1a6ffb0dab9c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -81,6 +81,7 @@
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/fib_notifier.h>
#include <trace/events/fib.h>
#include "fib_lookup.h"
@@ -97,7 +98,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
.type = type,
.tb_id = tb_id,
};
- return call_fib_notifier(nb, net, event_type, &info.info);
+ return call_fib4_notifier(nb, net, event_type, &info.info);
}
static int call_fib_entry_notifiers(struct net *net,
@@ -113,7 +114,7 @@ static int call_fib_entry_notifiers(struct net *net,
.type = type,
.tb_id = tb_id,
};
- return call_fib_notifiers(net, event_type, &info.info);
+ return call_fib4_notifiers(net, event_type, &info.info);
}
#define MAX_STAT_DEPTH 32
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 217e9ff0e24b..f8b24c2e0d77 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
- udp_offload.o seg6.o
+ udp_offload.o seg6.o fib6_notifier.o
ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3c46e9513a31..30ee23eef268 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3066,7 +3066,7 @@ static void init_loopback(struct net_device *dev)
* lo device down, release this obsolete dst and
* reallocate a new router for ifa.
*/
- if (!atomic_read(&sp_ifa->rt->rt6i_ref)) {
+ if (!sp_ifa->rt->rt6i_node) {
ip6_rt_put(sp_ifa->rt);
sp_ifa->rt = NULL;
} else {
@@ -3321,11 +3321,11 @@ static void addrconf_gre_config(struct net_device *dev)
static int fixup_permanent_addr(struct inet6_dev *idev,
struct inet6_ifaddr *ifp)
{
- /* rt6i_ref == 0 means the host route was removed from the
+ /* !rt6i_node means the host route was removed from the
* FIB, for example, if 'lo' device is taken down. In that
* case regenerate the host route.
*/
- if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
+ if (!ifp->rt || !ifp->rt->rt6i_node) {
struct rt6_info *rt, *prev;
rt = addrconf_dst_alloc(idev, &ifp->addr, false);
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
new file mode 100644
index 000000000000..66a103ef7e86
--- /dev/null
+++ b/net/ipv6/fib6_notifier.c
@@ -0,0 +1,61 @@
+#include <linux/notifier.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+#include <net/netns/ipv6.h>
+#include <net/ip6_fib.h>
+
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->family = AF_INET6;
+ return call_fib_notifier(nb, net, event_type, info);
+}
+
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->family = AF_INET6;
+ return call_fib_notifiers(net, event_type, info);
+}
+
+static unsigned int fib6_seq_read(struct net *net)
+{
+ return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
+}
+
+static int fib6_dump(struct net *net, struct notifier_block *nb)
+{
+ int err;
+
+ err = fib6_rules_dump(net, nb);
+ if (err)
+ return err;
+
+ return fib6_tables_dump(net, nb);
+}
+
+static const struct fib_notifier_ops fib6_notifier_ops_template = {
+ .family = AF_INET6,
+ .fib_seq_read = fib6_seq_read,
+ .fib_dump = fib6_dump,
+};
+
+int __net_init fib6_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
+
+ ops = fib_notifier_ops_register(&fib6_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+ net->ipv6.notifier_ops = ops;
+
+ return 0;
+}
+
+void __net_exit fib6_notifier_exit(struct net *net)
+{
+ fib_notifier_ops_unregister(net->ipv6.notifier_ops);
+}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec849d88a662..2f29e4e33bd3 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -14,6 +14,7 @@
*/
#include <linux/netdevice.h>
+#include <linux/notifier.h>
#include <linux/export.h>
#include <net/fib_rules.h>
@@ -29,6 +30,36 @@ struct fib6_rule {
u8 tclass;
};
+static bool fib6_rule_matchall(const struct fib_rule *rule)
+{
+ struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
+
+ if (r->dst.plen || r->src.plen || r->tclass)
+ return false;
+ return fib_rule_matchall(rule);
+}
+
+bool fib6_rule_default(const struct fib_rule *rule)
+{
+ if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
+ rule->l3mdev)
+ return false;
+ if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN)
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(fib6_rule_default);
+
+int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, AF_INET6);
+}
+
+unsigned int fib6_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, AF_INET6);
+}
+
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ebb299cf72b7..69ed0043d117 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -33,6 +33,7 @@
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
@@ -153,7 +154,7 @@ static void node_free(struct fib6_node *fn)
kmem_cache_free(fib6_node_kmem, fn);
}
-static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
{
int cpu;
@@ -176,15 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
free_percpu(non_pcpu_rt->rt6i_pcpu);
non_pcpu_rt->rt6i_pcpu = NULL;
}
-
-static void rt6_release(struct rt6_info *rt)
-{
- if (atomic_dec_and_test(&rt->rt6i_ref)) {
- rt6_free_pcpu(rt);
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- }
-}
+EXPORT_SYMBOL_GPL(rt6_free_pcpu);
static void fib6_link_table(struct net *net, struct fib6_table *tb)
{
@@ -302,6 +295,109 @@ static void __net_init fib6_tables_init(struct net *net)
#endif
+unsigned int fib6_tables_seq_read(struct net *net)
+{
+ unsigned int h, fib_seq = 0;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ struct fib6_table *tb;
+
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+ read_lock_bh(&tb->tb6_lock);
+ fib_seq += tb->fib_seq;
+ read_unlock_bh(&tb->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+
+ return fib_seq;
+}
+
+static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct rt6_info *rt)
+{
+ struct fib6_entry_notifier_info info = {
+ .rt = rt,
+ };
+
+ return call_fib6_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_fib6_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct rt6_info *rt)
+{
+ struct fib6_entry_notifier_info info = {
+ .rt = rt,
+ };
+
+ rt->rt6i_table->fib_seq++;
+ return call_fib6_notifiers(net, event_type, &info.info);
+}
+
+struct fib6_dump_arg {
+ struct net *net;
+ struct notifier_block *nb;
+};
+
+static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+{
+ if (rt == arg->net->ipv6.ip6_null_entry)
+ return;
+ call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+}
+
+static int fib6_node_dump(struct fib6_walker *w)
+{
+ struct rt6_info *rt;
+
+ for (rt = w->leaf; rt; rt = rt->dst.rt6_next)
+ fib6_rt_dump(rt, w->args);
+ w->leaf = NULL;
+ return 0;
+}
+
+static void fib6_table_dump(struct net *net, struct fib6_table *tb,
+ struct fib6_walker *w)
+{
+ w->root = &tb->tb6_root;
+ read_lock_bh(&tb->tb6_lock);
+ fib6_walk(net, w);
+ read_unlock_bh(&tb->tb6_lock);
+}
+
+/* Called with rcu_read_lock() */
+int fib6_tables_dump(struct net *net, struct notifier_block *nb)
+{
+ struct fib6_dump_arg arg;
+ struct fib6_walker *w;
+ unsigned int h;
+
+ w = kzalloc(sizeof(*w), GFP_ATOMIC);
+ if (!w)
+ return -ENOMEM;
+
+ w->func = fib6_node_dump;
+ arg.net = net;
+ arg.nb = nb;
+ w->args = &arg;
+
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ struct fib6_table *tb;
+
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist)
+ fib6_table_dump(net, tb, w);
+ }
+
+ kfree(w);
+
+ return 0;
+}
+
static int fib6_dump_node(struct fib6_walker *w)
{
int res;
@@ -733,8 +829,6 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
}
fn = fn->parent;
}
- /* No more references are possible at this point. */
- BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
}
}
@@ -879,6 +973,8 @@ add:
*ins = rt;
rt->rt6i_node = fn;
atomic_inc(&rt->rt6i_ref);
+ call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
+ rt);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -906,6 +1002,8 @@ add:
rt->rt6i_node = fn;
rt->dst.rt6_next = iter->dst.rt6_next;
atomic_inc(&rt->rt6i_ref);
+ call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
+ rt);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -913,6 +1011,7 @@ add:
fn->fn_flags |= RTN_RTINFO;
}
nsiblings = iter->rt6i_nsiblings;
+ iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
rt6_release(iter);
@@ -925,6 +1024,7 @@ add:
break;
if (rt6_qualify_for_ecmp(iter)) {
*ins = iter->dst.rt6_next;
+ iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
rt6_release(iter);
nsiblings--;
@@ -1459,6 +1559,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
fib6_purge_rt(rt, fn, net);
+ call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt);
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
rt6_release(rt);
@@ -1839,6 +1940,11 @@ static void fib6_gc_timer_cb(unsigned long arg)
static int __net_init fib6_net_init(struct net *net)
{
size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+ int err;
+
+ err = fib6_notifier_init(net);
+ if (err)
+ return err;
spin_lock_init(&net->ipv6.fib6_gc_lock);
rwlock_init(&net->ipv6.fib6_walker_lock);
@@ -1891,6 +1997,7 @@ out_fib_table_hash:
out_rt6_stats:
kfree(net->ipv6.rt6_stats);
out_timer:
+ fib6_notifier_exit(net);
return -ENOMEM;
}
@@ -1907,6 +2014,7 @@ static void fib6_net_exit(struct net *net)
kfree(net->ipv6.fib6_main_tbl);
kfree(net->ipv6.fib_table_hash);
kfree(net->ipv6.rt6_stats);
+ fib6_notifier_exit(net);
}
static struct pernet_operations fib6_net_ops = {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4d30c96a819d..aba07fce67fb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1820,6 +1820,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
+ if (cfg->fc_flags & RTF_OFFLOAD) {
+ NL_SET_ERR_MSG(extack, "Userspace can not set RTF_OFFLOAD");
+ goto out;
+ }
+
if (cfg->fc_dst_len > 128) {
NL_SET_ERR_MSG(extack, "Invalid prefix length");
goto out;
@@ -3327,6 +3332,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
goto nla_put_failure;
}
+ if (rt->rt6i_flags & RTF_OFFLOAD)
+ *flags |= RTNH_F_OFFLOAD;
+
/* not needed for multipath encoding b/c it has a rtnexthop struct */
if (!skip_oif && rt->dst.dev &&
nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))