diff options
author | David S. Miller <davem@davemloft.net> | 2021-06-23 12:17:35 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-06-23 12:17:35 -0700 |
commit | e940eb3c1ba8202a73004e6af62508cb9fbb9a0b (patch) | |
tree | b0ab80b197393e5574f22a5e595a716b8875175e | |
parent | 38f75922a6905b010f597fc70dbb5db28398728e (diff) | |
parent | d3e0f57501bde8a9585aff79afcffd99e6a5d91c (diff) |
Merge branch 'lockless-qdisc-opts'
Yunsheng Lin says:
====================
Some optimization for lockless qdisc
Patch 1: remove unnecessary seqcount operation.
Patch 2: implement TCQ_F_CAN_BYPASS.
Patch 3: remove qdisc->empty.
Performance data for pktgen in queue_xmit mode + dummy netdev
with pfifo_fast:
threads unpatched patched delta
1 2.60Mpps 3.21Mpps +23%
2 3.84Mpps 5.56Mpps +44%
4 5.52Mpps 5.58Mpps +1%
8 2.77Mpps 2.76Mpps -0.3%
16 2.24Mpps 2.23Mpps -0.4%
Performance for IP forward testing: 1.05Mpps increases to
1.16Mpps, about 10% improvement.
V3: Add 'Acked-by' from Jakub and 'Tested-by' from Vladimir,
and resend based on latest net-next.
V2: Adjust the comment and commit log according to discussion
in V1.
V1: Drop RFC tag, add nolock_qdisc_is_empty() and do the qdisc
empty checking without the protection of qdisc->seqlock to
aviod doing unnecessary spin_trylock() for contention case.
RFC v4: Use STATE_MISSED and STATE_DRAINING to indicate non-empty
qdisc, and add patch 1 and 3.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/sch_generic.h | 31 | ||||
-rw-r--r-- | net/core/dev.c | 27 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 23 |
3 files changed, 59 insertions, 22 deletions
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1e625519ae96..c99ffe9cc88f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -37,8 +37,15 @@ enum qdisc_state_t { __QDISC_STATE_SCHED, __QDISC_STATE_DEACTIVATED, __QDISC_STATE_MISSED, + __QDISC_STATE_DRAINING, }; +#define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED) +#define QDISC_STATE_DRAINING BIT(__QDISC_STATE_DRAINING) + +#define QDISC_STATE_NON_EMPTY (QDISC_STATE_MISSED | \ + QDISC_STATE_DRAINING) + struct qdisc_size_table { struct rcu_head rcu; struct list_head list; @@ -110,8 +117,6 @@ struct Qdisc { spinlock_t busylock ____cacheline_aligned_in_smp; spinlock_t seqlock; - /* for NOLOCK qdisc, true if there are no enqueued skbs */ - bool empty; struct rcu_head rcu; /* private data */ @@ -145,6 +150,11 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc) return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } +static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc) +{ + return !(READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY); +} + static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) { return q->flags & TCQ_F_CPUSTATS; @@ -153,7 +163,7 @@ static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) static inline bool qdisc_is_empty(const struct Qdisc *qdisc) { if (qdisc_is_percpu_stats(qdisc)) - return READ_ONCE(qdisc->empty); + return nolock_qdisc_is_empty(qdisc); return !READ_ONCE(qdisc->q.qlen); } @@ -161,7 +171,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { if (spin_trylock(&qdisc->seqlock)) - goto nolock_empty; + return true; /* If the MISSED flag is set, it means other thread has * set the MISSED flag before second spin_trylock(), so @@ -183,11 +193,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) /* Retry again in case other CPU may not see the new flag * after it releases the lock at the end of qdisc_run_end(). */ - if (!spin_trylock(&qdisc->seqlock)) - return false; - -nolock_empty: - WRITE_ONCE(qdisc->empty, false); + return spin_trylock(&qdisc->seqlock); } else if (qdisc_is_running(qdisc)) { return false; } @@ -201,15 +207,14 @@ nolock_empty: static inline void qdisc_run_end(struct Qdisc *qdisc) { - write_seqcount_end(&qdisc->running); if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); if (unlikely(test_bit(__QDISC_STATE_MISSED, - &qdisc->state))) { - clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + &qdisc->state))) __netif_schedule(qdisc); - } + } else { + write_seqcount_end(&qdisc->running); } } diff --git a/net/core/dev.c b/net/core/dev.c index 50531a2d0b20..991d09b67bd9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3852,10 +3852,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { + if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) && + qdisc_run_begin(q)) { + /* Retest nolock_qdisc_is_empty() within the protection + * of q->seqlock to protect from racing with requeuing. + */ + if (unlikely(!nolock_qdisc_is_empty(q))) { + rc = q->enqueue(skb, q, &to_free) & + NET_XMIT_MASK; + __qdisc_run(q); + qdisc_run_end(q); + + goto no_lock_out; + } + + qdisc_bstats_cpu_update(q, skb); + if (sch_direct_xmit(skb, q, dev, txq, NULL, true) && + !nolock_qdisc_is_empty(q)) + __qdisc_run(q); + + qdisc_run_end(q); + return NET_XMIT_SUCCESS; + } + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; - if (likely(!netif_xmit_frozen_or_stopped(txq))) - qdisc_run(q); + qdisc_run(q); +no_lock_out: if (unlikely(to_free)) kfree_skb_list(to_free); return rc; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e9c0afc8becc..d9ac60ffe927 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -52,6 +52,8 @@ static void qdisc_maybe_clear_missed(struct Qdisc *q, */ if (!netif_xmit_frozen_or_stopped(txq)) set_bit(__QDISC_STATE_MISSED, &q->state); + else + set_bit(__QDISC_STATE_DRAINING, &q->state); } /* Main transmission queue. */ @@ -164,9 +166,13 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) skb = next; } - if (lock) + + if (lock) { spin_unlock(lock); - __netif_schedule(q); + set_bit(__QDISC_STATE_MISSED, &q->state); + } else { + __netif_schedule(q); + } } static void try_bulk_dequeue_skb(struct Qdisc *q, @@ -409,7 +415,11 @@ void __qdisc_run(struct Qdisc *q) while (qdisc_restart(q, &packets)) { quota -= packets; if (quota <= 0) { - __netif_schedule(q); + if (q->flags & TCQ_F_NOLOCK) + set_bit(__QDISC_STATE_MISSED, &q->state); + else + __netif_schedule(q); + break; } } @@ -698,13 +708,14 @@ retry: if (likely(skb)) { qdisc_update_stats_at_dequeue(qdisc, skb); } else if (need_retry && - test_bit(__QDISC_STATE_MISSED, &qdisc->state)) { + READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) { /* Delay clearing the STATE_MISSED here to reduce * the overhead of the second spin_trylock() in * qdisc_run_begin() and __netif_schedule() calling * in qdisc_run_end(). */ clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); /* Make sure dequeuing happens after clearing * STATE_MISSED. @@ -714,8 +725,6 @@ retry: need_retry = false; goto retry; - } else { - WRITE_ONCE(qdisc->empty, true); } return skb; @@ -916,7 +925,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; - sch->empty = true; dev_hold(dev); refcount_set(&sch->refcnt, 1); @@ -1222,6 +1230,7 @@ static void dev_reset_queue(struct net_device *dev, spin_unlock_bh(qdisc_lock(qdisc)); if (nolock) { clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); spin_unlock_bh(&qdisc->seqlock); } } |