diff options
author | Eric Dumazet <edumazet@google.com> | 2016-12-04 09:48:16 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-12-05 15:21:59 -0500 |
commit | 1c0d32fde5bdf1184bc274f864c09799278a1114 (patch) | |
tree | 47367d46dfc125e19294c3f5fa9a021520bd5660 /net/netfilter | |
parent | a6e169312971219a34927e8fdece60046fafb8ba (diff) |
net_sched: gen_estimator: complete rewrite of rate estimators
1) Old code was hard to maintain, due to complex lock chains.
(We probably will be able to remove some kfree_rcu() in callers)
2) Using a single timer to update all estimators does not scale.
3) Code was buggy on 32bit kernel (WRITE_ONCE() on 64bit quantity
is not supposed to work well)
In this rewrite :
- I removed the RB tree that had to be scanned in
gen_estimator_active(). qdisc dumps should be much faster.
- Each estimator has its own timer.
- Estimations are maintained in net_rate_estimator structure,
instead of dirtying the qdisc. Minor, but part of the simplification.
- Reading the estimator uses RCU and a seqcount to provide proper
support for 32bit kernels.
- We reduce memory need when estimators are not used, since
we store a pointer, instead of the bytes/packets counters.
- xt_rateest_mt() no longer has to grab a spinlock.
(In the future, xt_rateest_tg() could be switched to per cpu counters)
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter')
-rw-r--r-- | net/netfilter/xt_RATEEST.c | 4 | ||||
-rw-r--r-- | net/netfilter/xt_rateest.c | 28 |
2 files changed, 15 insertions, 17 deletions
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index dbd6c4a12b97..91a373a3f534 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -63,7 +63,7 @@ void xt_rateest_put(struct xt_rateest *est) mutex_lock(&xt_rateest_mutex); if (--est->refcnt == 0) { hlist_del(&est->list); - gen_kill_estimator(&est->bstats, &est->rstats); + gen_kill_estimator(&est->rate_est); /* * gen_estimator est_timer() might access est->lock or bstats, * wait a RCU grace period before freeing 'est' @@ -132,7 +132,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) cfg.est.interval = info->interval; cfg.est.ewma_log = info->ewma_log; - ret = gen_new_estimator(&est->bstats, NULL, &est->rstats, + ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est, &est->lock, NULL, &cfg.opt); if (ret < 0) goto err2; diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 7720b036d76a..1db02f6fca54 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -18,35 +18,33 @@ static bool xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rateest_match_info *info = par->matchinfo; - struct gnet_stats_rate_est64 *r; + struct gnet_stats_rate_est64 sample = {0}; u_int32_t bps1, bps2, pps1, pps2; bool ret = true; - spin_lock_bh(&info->est1->lock); - r = &info->est1->rstats; + gen_estimator_read(&info->est1->rate_est, &sample); + if (info->flags & XT_RATEEST_MATCH_DELTA) { - bps1 = info->bps1 >= r->bps ? info->bps1 - r->bps : 0; - pps1 = info->pps1 >= r->pps ? info->pps1 - r->pps : 0; + bps1 = info->bps1 >= sample.bps ? info->bps1 - sample.bps : 0; + pps1 = info->pps1 >= sample.pps ? info->pps1 - sample.pps : 0; } else { - bps1 = r->bps; - pps1 = r->pps; + bps1 = sample.bps; + pps1 = sample.pps; } - spin_unlock_bh(&info->est1->lock); if (info->flags & XT_RATEEST_MATCH_ABS) { bps2 = info->bps2; pps2 = info->pps2; } else { - spin_lock_bh(&info->est2->lock); - r = &info->est2->rstats; + gen_estimator_read(&info->est2->rate_est, &sample); + if (info->flags & XT_RATEEST_MATCH_DELTA) { - bps2 = info->bps2 >= r->bps ? info->bps2 - r->bps : 0; - pps2 = info->pps2 >= r->pps ? info->pps2 - r->pps : 0; + bps2 = info->bps2 >= sample.bps ? info->bps2 - sample.bps : 0; + pps2 = info->pps2 >= sample.pps ? info->pps2 - sample.pps : 0; } else { - bps2 = r->bps; - pps2 = r->pps; + bps2 = sample.bps; + pps2 = sample.pps; } - spin_unlock_bh(&info->est2->lock); } switch (info->mode) { |