summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-03-06 00:26:16 -0500
committerDavid S. Miller <davem@davemloft.net>2015-03-06 00:26:16 -0500
commitfabe7bed114a0ffc02845b731c26aadd800e7b5d (patch)
treefa5bf3aaa17cb488284366d10a314e8abfb66583 /net/ipv4
parent24d2e4a50737867aba1e96a587ef0d90c17e3035 (diff)
parentc1beeef7a32a791a60e2adcc217d4461cd1e25d1 (diff)
Merge branch 'l3_hw_offload'
Scott Feldman says: ==================== switchdev: add IPv4 routing offload v4: - Add NETIF_F_NETNS_LOCAL to rocker port feature list to keep rocker ports in the default netns. Rocker hardware can't be partitioned to support multiple namespaces, currently. It would be interesting to add netns support to rocker device by basically adding another match field to each table to match on some unique netns ID, with a port knowing it's netns ID. Future work TDB. - Up-level the RTNH_F_EXTERNAL marking of routes installed to offload device from driver to switchdev common code. Now driver can't skip routes. Either it can install the route or it cannot. Yes or No. If no on any route, all offloading is aborted by removing routes from offload device and setting ipv4.fib_offload_disabled so no more routes can be offloaded. This is harsh, but it's our starting point. We can refine the policies in follow-up work. - Add new net.ipv4.fib_offload_disabled bool that is set if anything goes wrong with route offloading. We can refine this later to make the setting per-device or per-device-port-netdev, but let's start here simple and refine in follow-up work. - Rebase against Alex's latest FIB changes. I think I did everything correctly, and didn't run into any issues with testing, but I'd like Alex to look over the changes and maybe follow-up with any cleanups. v3: Changes based on v2 review comments: - Move check for custom rules up earlier in patch set, to keep git bisect safe. - Simplify the route add/modify failure handling to simple try until failure, and then on failure, undo everything. The switchdev driver will return err when route can normally be installed to device, but the install fails for one reason or another (no space left on device, etc). If a failure happens, uninstall all routes from the device, punting forwarding for all routes back to the kernel. - Scan route's full nexthop list, ensuring all nexthop devs belong to the same switchdev device, otherwise don't try to install route to device. v2: Changes based on v1 review comments and discussions at netconf: - Allow route modification, but use same ndo op used for adding route. Driver/device is expected to modify route in-place, if it can, to avoid interruption of service. - Add new RTNH_F_EXTERNAL flag to mark FIB entries offloaded externally. - Don't offload routes if using custom IP rules. If routes are already offloaded, and custom IP rules are turned on, flush routes from offload device. (Offloaded routes are marked with RTNH_F_EXTERNAL). - Use kernel's neigh resolution code to resolve route's nexthops' neigh MAC addrs. (Thanks davem, works great!). - Use fib->fib_priority in rocker driver to give priorities to routes in OF-DPA unicast route table. v1: This patch set adds L3 routing offload support for IPv4 routes. The idea is to mirror routes installed in the kernel's FIB down to a hardware switch device to offload the data forwarding path for L3. Only the data forwarding path is intercepted. Control and management of the kernel's FIB remains with the kernel. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_frontend.c13
-rw-r--r--net/ipv4/fib_rules.c3
-rw-r--r--net/ipv4/fib_trie.c92
3 files changed, 107 insertions, 1 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 220c4b4af4cf..e067770235bf 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -144,6 +144,19 @@ static void fib_flush(struct net *net)
rt_cache_flush(net);
}
+void fib_flush_external(struct net *net)
+{
+ struct fib_table *tb;
+ struct hlist_head *head;
+ unsigned int h;
+
+ for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+ head = &net->ipv4.fib_table_hash[h];
+ hlist_for_each_entry(tb, head, tb_hlist)
+ fib_table_flush_external(tb);
+ }
+}
+
/*
* Find address type as if only "dev" was present in the system. If
* on_dev is NULL then all interfaces are taken into consideration.
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index d3db718be51d..190d0d00d744 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -209,6 +209,8 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
+ fib_flush_external(rule->fr_net);
+
err = 0;
errout:
return err;
@@ -224,6 +226,7 @@ static void fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
+ fib_flush_external(rule->fr_net);
}
static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index fae34ad4bb1a..6544f1a0cfa1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -79,6 +79,7 @@
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/switchdev.h>
#include "fib_lookup.h"
#define MAX_STAT_DEPTH 32
@@ -1135,7 +1136,18 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->fa_state = state & ~FA_S_ACCESSED;
new_fa->fa_slen = fa->fa_slen;
+ err = netdev_switch_fib_ipv4_add(key, plen, fi,
+ new_fa->fa_tos,
+ cfg->fc_type,
+ tb->tb_id);
+ if (err) {
+ netdev_switch_fib_ipv4_abort(fi);
+ kmem_cache_free(fn_alias_kmem, new_fa);
+ goto out;
+ }
+
hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
+
alias_free_mem_rcu(fa);
fib_release_info(fi_drop);
@@ -1171,10 +1183,18 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->fa_state = 0;
new_fa->fa_slen = slen;
+ /* (Optionally) offload fib entry to switch hardware. */
+ err = netdev_switch_fib_ipv4_add(key, plen, fi, tos,
+ cfg->fc_type, tb->tb_id);
+ if (err) {
+ netdev_switch_fib_ipv4_abort(fi);
+ goto out_free_new_fa;
+ }
+
/* Insert new entry to the list. */
err = fib_insert_alias(t, tp, l, new_fa, fa, key);
if (err)
- goto out_free_new_fa;
+ goto out_sw_fib_del;
if (!plen)
tb->tb_num_default++;
@@ -1185,6 +1205,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
succeeded:
return 0;
+out_sw_fib_del:
+ netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
out_free_new_fa:
kmem_cache_free(fn_alias_kmem, new_fa);
out:
@@ -1456,6 +1478,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
if (!fa_to_delete)
return -ESRCH;
+ netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
+ cfg->fc_type, tb->tb_id);
+
rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
@@ -1536,6 +1561,67 @@ found:
return n;
}
+/* Caller must hold RTNL */
+void fib_table_flush_external(struct fib_table *tb)
+{
+ struct trie *t = (struct trie *)tb->tb_data;
+ struct fib_alias *fa;
+ struct tnode *n, *pn;
+ unsigned long cindex;
+ unsigned char slen;
+ int found = 0;
+
+ n = rcu_dereference(t->trie);
+ if (!n)
+ return;
+
+ pn = NULL;
+ cindex = 0;
+
+ while (IS_TNODE(n)) {
+ /* record pn and cindex for leaf walking */
+ pn = n;
+ cindex = 1ul << n->bits;
+backtrace:
+ /* walk trie in reverse order */
+ do {
+ while (!(cindex--)) {
+ t_key pkey = pn->key;
+
+ n = pn;
+ pn = node_parent(n);
+
+ /* resize completed node */
+ resize(t, n);
+
+ /* if we got the root we are done */
+ if (!pn)
+ return;
+
+ cindex = get_index(pkey, pn);
+ }
+
+ /* grab the next available node */
+ n = tnode_get_child(pn, cindex);
+ } while (!n);
+ }
+
+ hlist_for_each_entry(fa, &n->leaf, fa_list) {
+ struct fib_info *fi = fa->fa_info;
+
+ if (fi && (fi->fib_flags & RTNH_F_EXTERNAL)) {
+ netdev_switch_fib_ipv4_del(n->key,
+ KEYLENGTH - fa->fa_slen,
+ fi, fa->fa_tos,
+ fa->fa_type, tb->tb_id);
+ }
+ }
+
+ /* if trie is leaf only loop is completed */
+ if (pn)
+ goto backtrace;
+}
+
/* Caller must hold RTNL. */
int fib_table_flush(struct fib_table *tb)
{
@@ -1589,6 +1675,10 @@ backtrace:
struct fib_info *fi = fa->fa_info;
if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
+ netdev_switch_fib_ipv4_del(n->key,
+ KEYLENGTH - fa->fa_slen,
+ fi, fa->fa_tos,
+ fa->fa_type, tb->tb_id);
hlist_del_rcu(&fa->fa_list);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);