summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-03-17 21:16:35 -0700
committerDavid S. Miller <davem@davemloft.net>2020-03-17 21:16:35 -0700
commitc7cba8326c1eba1f49f39a71cd9ef03a03a80da4 (patch)
treead0758e5b63398640aa6eb986b9d3e524682d800
parent24ee86511b01f73a1ef1dd8a8a2e685e5ec03bfe (diff)
parent583396f4ca4d6ee5ad314ba0f4cb5b89deb75e76 (diff)
Merge branch 'net_sched-allow-use-of-hrtimer-slack'
Eric Dumazet says: ==================== net_sched: allow use of hrtimer slack Packet schedulers have used hrtimers with exact expiry times. Some of them can afford having a slack, in order to reduce the number of timer interrupts and feed bigger batches to increase efficiency. FQ for example does not care if throttled packets are sent with an additional (small) delay. Original observation of having maybe too many interrupts was made by Willem de Bruijn. v2: added strict netlink checking (Jakub Kicinski) ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/pkt_sched.h10
-rw-r--r--include/uapi/linux/pkt_sched.h2
-rw-r--r--net/sched/sch_api.c21
-rw-r--r--net/sched/sch_fq.c21
4 files changed, 42 insertions, 12 deletions
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 20d2c6419612..9092e697059e 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -75,7 +75,15 @@ struct qdisc_watchdog {
void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
clockid_t clockid);
void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
-void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires);
+
+void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
+ u64 delta_ns);
+
+static inline void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd,
+ u64 expires)
+{
+ return qdisc_watchdog_schedule_range_ns(wd, expires, 0ULL);
+}
static inline void qdisc_watchdog_schedule(struct qdisc_watchdog *wd,
psched_time_t expires)
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index ea39287d59c8..7307a29a103e 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -911,6 +911,8 @@ enum {
TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */
+ TCA_FQ_TIMER_SLACK, /* timer slack */
+
__TCA_FQ_MAX
};
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 50794125bf02..0d99df1e764d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -618,21 +618,28 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
}
EXPORT_SYMBOL(qdisc_watchdog_init);
-void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
+void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
+ u64 delta_ns)
{
if (test_bit(__QDISC_STATE_DEACTIVATED,
&qdisc_root_sleeping(wd->qdisc)->state))
return;
- if (wd->last_expires == expires)
- return;
+ if (hrtimer_is_queued(&wd->timer)) {
+ /* If timer is already set in [expires, expires + delta_ns],
+ * do not reprogram it.
+ */
+ if (wd->last_expires - expires <= delta_ns)
+ return;
+ }
wd->last_expires = expires;
- hrtimer_start(&wd->timer,
- ns_to_ktime(expires),
- HRTIMER_MODE_ABS_PINNED);
+ hrtimer_start_range_ns(&wd->timer,
+ ns_to_ktime(expires),
+ delta_ns,
+ HRTIMER_MODE_ABS_PINNED);
}
-EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
+EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
{
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 371ad84def3b..4c060134c736 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -121,6 +121,8 @@ struct fq_sched_data {
u64 stat_flows_plimit;
u64 stat_pkts_too_long;
u64 stat_allocation_errors;
+
+ u32 timer_slack; /* hrtimer slack in ns */
struct qdisc_watchdog watchdog;
};
@@ -504,8 +506,9 @@ begin:
head = &q->old_flows;
if (!head->first) {
if (q->time_next_delayed_flow != ~0ULL)
- qdisc_watchdog_schedule_ns(&q->watchdog,
- q->time_next_delayed_flow);
+ qdisc_watchdog_schedule_range_ns(&q->watchdog,
+ q->time_next_delayed_flow,
+ q->timer_slack);
return NULL;
}
}
@@ -735,6 +738,8 @@ static int fq_resize(struct Qdisc *sch, u32 log)
}
static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
+ [TCA_FQ_UNSPEC] = { .strict_start_type = TCA_FQ_TIMER_SLACK },
+
[TCA_FQ_PLIMIT] = { .type = NLA_U32 },
[TCA_FQ_FLOW_PLIMIT] = { .type = NLA_U32 },
[TCA_FQ_QUANTUM] = { .type = NLA_U32 },
@@ -747,6 +752,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 },
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
+ [TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 },
};
static int fq_change(struct Qdisc *sch, struct nlattr *opt,
@@ -833,6 +839,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
q->ce_threshold = (u64)NSEC_PER_USEC *
nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);
+ if (tb[TCA_FQ_TIMER_SLACK])
+ q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
+
if (!err) {
sch_tree_unlock(sch);
err = fq_resize(sch, fq_log);
@@ -884,6 +893,8 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
q->orphan_mask = 1024 - 1;
q->low_rate_threshold = 550000 / 8;
+ q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */
+
/* Default ce_threshold of 4294 seconds */
q->ce_threshold = (u64)NSEC_PER_USEC * ~0U;
@@ -924,7 +935,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
q->low_rate_threshold) ||
nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
- nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
+ nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
+ nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack))
goto nla_put_failure;
return nla_nest_end(skb, opts);
@@ -947,7 +959,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.flows_plimit = q->stat_flows_plimit;
st.pkts_too_long = q->stat_pkts_too_long;
st.allocation_errors = q->stat_allocation_errors;
- st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns();
+ st.time_next_delayed_flow = q->time_next_delayed_flow + q->timer_slack -
+ ktime_get_ns();
st.flows = q->flows;
st.inactive_flows = q->inactive_flows;
st.throttled_flows = q->throttled_flows;