summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--net/ipv4/fib_trie.c149
1 files changed, 66 insertions, 83 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 58e1224786fa..7fbd2c5a76b1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -146,9 +146,7 @@ struct trie {
#endif
};
-static void tnode_put_child_reorg(struct tnode *tn, unsigned long i,
- struct tnode *n, int wasfull);
-static struct tnode *resize(struct trie *t, struct tnode *tn);
+static void resize(struct trie *t, struct tnode *tn);
/* tnodes to free after resize(); protected by RTNL */
static struct callback_head *tnode_free_head;
static size_t tnode_free_size;
@@ -396,22 +394,13 @@ static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
}
-static inline void put_child(struct tnode *tn, unsigned long i,
- struct tnode *n)
-{
- tnode_put_child_reorg(tn, i, n, -1);
-}
-
- /*
- * Add a child at position i overwriting the old value.
- * Update the value of full_children and empty_children.
- */
-
-static void tnode_put_child_reorg(struct tnode *tn, unsigned long i,
- struct tnode *n, int wasfull)
+/* Add a child at position i overwriting the old value.
+ * Update the value of full_children and empty_children.
+ */
+static void put_child(struct tnode *tn, unsigned long i, struct tnode *n)
{
struct tnode *chi = rtnl_dereference(tn->child[i]);
- int isfull;
+ int isfull, wasfull;
BUG_ON(i >= tnode_child_length(tn));
@@ -422,10 +411,9 @@ static void tnode_put_child_reorg(struct tnode *tn, unsigned long i,
tn->empty_children--;
/* update fullChildren */
- if (wasfull == -1)
- wasfull = tnode_full(tn, chi);
-
+ wasfull = tnode_full(tn, chi);
isfull = tnode_full(tn, n);
+
if (wasfull && !isfull)
tn->full_children--;
else if (!wasfull && isfull)
@@ -458,9 +446,10 @@ static void tnode_clean_free(struct tnode *tn)
node_free(tn);
}
-static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
+static int inflate(struct trie *t, struct tnode *oldtnode)
{
unsigned long olen = tnode_child_length(oldtnode);
+ struct tnode *tp = node_parent(oldtnode);
struct tnode *tn;
unsigned long i;
t_key m;
@@ -468,9 +457,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
pr_debug("In inflate\n");
tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
-
if (!tn)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
/*
* Preallocate and store tnodes before the actual work so we
@@ -564,30 +552,36 @@ static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
put_child(left, j, rtnl_dereference(inode->child[j]));
put_child(right, j, rtnl_dereference(inode->child[j + size]));
}
- put_child(tn, 2*i, resize(t, left));
- put_child(tn, 2*i+1, resize(t, right));
+
+ put_child(tn, 2 * i, left);
+ put_child(tn, 2 * i + 1, right);
tnode_free_safe(inode);
+
+ resize(t, left);
+ resize(t, right);
}
+
+ put_child_root(tp, t, tn->key, tn);
tnode_free_safe(oldtnode);
- return tn;
+ return 0;
nomem:
tnode_clean_free(tn);
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
-static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
+static int halve(struct trie *t, struct tnode *oldtnode)
{
unsigned long olen = tnode_child_length(oldtnode);
+ struct tnode *tp = node_parent(oldtnode);
struct tnode *tn, *left, *right;
int i;
pr_debug("In halve\n");
tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
-
if (!tn)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
/*
* Preallocate and store tnodes before the actual work so we
@@ -606,8 +600,10 @@ static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
newn = tnode_new(left->key, oldtnode->pos, 1);
- if (!newn)
- goto nomem;
+ if (!newn) {
+ tnode_clean_free(tn);
+ return -ENOMEM;
+ }
put_child(tn, i/2, newn);
}
@@ -635,16 +631,18 @@ static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
/* Two nonempty children */
newBinNode = tnode_get_child(tn, i/2);
- put_child(tn, i/2, NULL);
put_child(newBinNode, 0, left);
put_child(newBinNode, 1, right);
- put_child(tn, i/2, resize(t, newBinNode));
+
+ put_child(tn, i / 2, newBinNode);
+
+ resize(t, newBinNode);
}
+
+ put_child_root(tp, t, tn->key, tn);
tnode_free_safe(oldtnode);
- return tn;
-nomem:
- tnode_clean_free(tn);
- return ERR_PTR(-ENOMEM);
+
+ return 0;
}
/* From "Implementing a dynamic compressed trie" by Stefan Nilsson of
@@ -704,45 +702,48 @@ nomem:
* tnode_child_length(tn)
*
*/
-static bool should_inflate(const struct tnode *tn)
+static bool should_inflate(const struct tnode *tp, const struct tnode *tn)
{
unsigned long used = tnode_child_length(tn);
unsigned long threshold = used;
/* Keep root node larger */
- threshold *= node_parent(tn) ? inflate_threshold :
- inflate_threshold_root;
+ threshold *= tp ? inflate_threshold : inflate_threshold_root;
used += tn->full_children;
used -= tn->empty_children;
return tn->pos && ((50 * used) >= threshold);
}
-static bool should_halve(const struct tnode *tn)
+static bool should_halve(const struct tnode *tp, const struct tnode *tn)
{
unsigned long used = tnode_child_length(tn);
unsigned long threshold = used;
/* Keep root node larger */
- threshold *= node_parent(tn) ? halve_threshold :
- halve_threshold_root;
+ threshold *= tp ? halve_threshold : halve_threshold_root;
used -= tn->empty_children;
return (tn->bits > 1) && ((100 * used) < threshold);
}
#define MAX_WORK 10
-static struct tnode *resize(struct trie *t, struct tnode *tn)
+static void resize(struct trie *t, struct tnode *tn)
{
- struct tnode *old_tn, *n = NULL;
+ struct tnode *tp = node_parent(tn), *n = NULL;
+ struct tnode __rcu **cptr;
int max_work;
- if (!tn)
- return NULL;
-
pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
tn, inflate_threshold, halve_threshold);
+ /* track the tnode via the pointer from the parent instead of
+ * doing it ourselves. This way we can let RCU fully do its
+ * thing without us interfering
+ */
+ cptr = tp ? &tp->child[get_index(tn->key, tp)] : &t->trie;
+ BUG_ON(tn != rtnl_dereference(*cptr));
+
/* No children */
if (tn->empty_children > (tnode_child_length(tn) - 1))
goto no_children;
@@ -755,39 +756,35 @@ static struct tnode *resize(struct trie *t, struct tnode *tn)
* nonempty nodes that are above the threshold.
*/
max_work = MAX_WORK;
- while (should_inflate(tn) && max_work--) {
- old_tn = tn;
- tn = inflate(t, tn);
-
- if (IS_ERR(tn)) {
- tn = old_tn;
+ while (should_inflate(tp, tn) && max_work--) {
+ if (inflate(t, tn)) {
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(t->stats->resize_node_skipped);
#endif
break;
}
+
+ tn = rtnl_dereference(*cptr);
}
/* Return if at least one inflate is run */
if (max_work != MAX_WORK)
- return tn;
+ return;
/* Halve as long as the number of empty children in this
* node is above threshold.
*/
max_work = MAX_WORK;
- while (should_halve(tn) && max_work--) {
- old_tn = tn;
- tn = halve(t, tn);
- if (IS_ERR(tn)) {
- tn = old_tn;
+ while (should_halve(tp, tn) && max_work--) {
+ if (halve(t, tn)) {
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(t->stats->resize_node_skipped);
#endif
break;
}
- }
+ tn = rtnl_dereference(*cptr);
+ }
/* Only one child remains */
if (tn->empty_children == (tnode_child_length(tn) - 1)) {
@@ -797,11 +794,12 @@ one_child:
n = tnode_get_child(tn, --i);
no_children:
/* compress one level */
- node_set_parent(n, NULL);
+ put_child_root(tp, t, tn->key, n);
+ node_set_parent(n, tp);
+
+ /* drop dead node */
tnode_free_safe(tn);
- return n;
}
- return tn;
}
/* readside must use rcu_read_lock currently dump routines
@@ -882,34 +880,19 @@ static struct tnode *fib_find_node(struct trie *t, u32 key)
static void trie_rebalance(struct trie *t, struct tnode *tn)
{
- int wasfull;
- t_key cindex, key;
struct tnode *tp;
- key = tn->key;
-
- while (tn != NULL && (tp = node_parent(tn)) != NULL) {
- cindex = get_index(key, tp);
- wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
- tn = resize(t, tn);
-
- tnode_put_child_reorg(tp, cindex, tn, wasfull);
-
- tp = node_parent(tn);
- if (!tp)
- rcu_assign_pointer(t->trie, tn);
+ while ((tp = node_parent(tn)) != NULL) {
+ resize(t, tn);
tnode_free_flush();
- if (!tp)
- break;
tn = tp;
}
/* Handle last (top) tnode */
if (IS_TNODE(tn))
- tn = resize(t, tn);
+ resize(t, tn);
- rcu_assign_pointer(t->trie, tn);
tnode_free_flush();
}