From 112f9cb65643caf7b922e1a66dc752bfab40aeb1 Mon Sep 17 00:00:00 2001 From: Dave Taht Date: Wed, 8 Nov 2017 15:12:26 -0800 Subject: netem: convert to qdisc_watchdog_schedule_ns Upgrade the internal netem scheduler to use nanoseconds rather than ticks throughout. Convert to and from the std "ticks" userspace api automatically, while allowing for finer grained scheduling to take place. Signed-off-by: Dave Taht Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 56 +++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index db0228a65e8c..e64e0e0d94ff 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -77,8 +77,8 @@ struct netem_sched_data { struct qdisc_watchdog watchdog; - psched_tdiff_t latency; - psched_tdiff_t jitter; + s64 latency; + s64 jitter; u32 loss; u32 ecn; @@ -145,7 +145,7 @@ struct netem_sched_data { * we save skb->tstamp value in skb->cb[] before destroying it. */ struct netem_skb_cb { - psched_time_t time_to_send; + u64 time_to_send; }; static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) @@ -305,11 +305,11 @@ static bool loss_event(struct netem_sched_data *q) * std deviation sigma. Uses table lookup to approximate the desired * distribution, and a uniformly-distributed pseudo-random source. */ -static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, - struct crndstate *state, - const struct disttable *dist) +static s64 tabledist(s64 mu, s64 sigma, + struct crndstate *state, + const struct disttable *dist) { - psched_tdiff_t x; + s64 x; long t; u32 rnd; @@ -332,10 +332,10 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; } -static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q) +static u64 packet_len_2_sched_time(unsigned int len, + struct netem_sched_data *q) { - u64 ticks; - + u64 offset; len += q->packet_overhead; if (q->cell_size) { @@ -345,11 +345,9 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche cells++; len = cells * (q->cell_size + q->cell_overhead); } - - ticks = (u64)len * NSEC_PER_SEC; - - do_div(ticks, q->rate); - return PSCHED_NS2TICKS(ticks); + offset = (u64)len * NSEC_PER_SEC; + do_div(offset, q->rate); + return offset; } static void tfifo_reset(struct Qdisc *sch) @@ -369,7 +367,7 @@ static void tfifo_reset(struct Qdisc *sch) static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; + u64 tnext = netem_skb_cb(nskb)->time_to_send; struct rb_node **p = &q->t_root.rb_node, *parent = NULL; while (*p) { @@ -515,13 +513,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (q->gap == 0 || /* not doing reordering */ q->counter < q->gap - 1 || /* inside last reordering gap */ q->reorder < get_crandom(&q->reorder_cor)) { - psched_time_t now; - psched_tdiff_t delay; + u64 now; + s64 delay; delay = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); - now = psched_get_time(); + now = ktime_get_ns(); if (q->rate) { struct netem_skb_cb *last = NULL; @@ -547,7 +545,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, * from delay. */ delay -= last->time_to_send - now; - delay = max_t(psched_tdiff_t, 0, delay); + delay = max_t(s64, 0, delay); now = last->time_to_send; } @@ -562,7 +560,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, * Do re-ordering by putting one out of N packets at the front * of the queue. */ - cb->time_to_send = psched_get_time(); + cb->time_to_send = ktime_get_ns(); q->counter = 0; netem_enqueue_skb_head(&sch->q, skb); @@ -609,13 +607,13 @@ deliver: } p = rb_first(&q->t_root); if (p) { - psched_time_t time_to_send; + u64 time_to_send; skb = rb_to_skb(p); /* if more time remaining? */ time_to_send = netem_skb_cb(skb)->time_to_send; - if (time_to_send <= psched_get_time()) { + if (time_to_send <= ktime_get_ns()) { rb_erase(p, &q->t_root); sch->q.qlen--; @@ -659,7 +657,7 @@ deliver: if (skb) goto deliver; } - qdisc_watchdog_schedule(&q->watchdog, time_to_send); + qdisc_watchdog_schedule_ns(&q->watchdog, time_to_send); } if (q->qdisc) { @@ -888,8 +886,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) sch->limit = qopt->limit; - q->latency = qopt->latency; - q->jitter = qopt->jitter; + q->latency = PSCHED_TICKS2NS(qopt->latency); + q->jitter = PSCHED_TICKS2NS(qopt->jitter); q->limit = qopt->limit; q->gap = qopt->gap; q->counter = 0; @@ -1011,8 +1009,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_corrupt corrupt; struct tc_netem_rate rate; - qopt.latency = q->latency; - qopt.jitter = q->jitter; + qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency), + UINT_MAX); + qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter), + UINT_MAX); qopt.limit = q->limit; qopt.loss = q->loss; qopt.gap = q->gap; -- cgit v1.2.3 From 99803171ef04037092bf5eb29ae801e8b4d49a75 Mon Sep 17 00:00:00 2001 From: Dave Taht Date: Wed, 8 Nov 2017 15:12:27 -0800 Subject: netem: add uapi to express delay and jitter in nanoseconds netem userspace has long relied on a horrible /proc/net/psched hack to translate the current notion of "ticks" to nanoseconds. Expressing latency and jitter instead, in well defined nanoseconds, increases the dynamic range of emulated delays and jitter in netem. It will also ease a transition where reducing a tick to nsec equivalence would constrain the max delay in prior versions of netem to only 4.3 seconds. Signed-off-by: Dave Taht Suggested-by: Eric Dumazet Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 ++ net/sched/sch_netem.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 6a2c5ea7e9c4..8fe6d1842bee 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -537,6 +537,8 @@ enum { TCA_NETEM_ECN, TCA_NETEM_RATE64, TCA_NETEM_PAD, + TCA_NETEM_LATENCY64, + TCA_NETEM_JITTER64, __TCA_NETEM_MAX, }; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index e64e0e0d94ff..47d6decba0ea 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -819,6 +819,8 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, [TCA_NETEM_ECN] = { .type = NLA_U32 }, [TCA_NETEM_RATE64] = { .type = NLA_U64 }, + [TCA_NETEM_LATENCY64] = { .type = NLA_S64 }, + [TCA_NETEM_JITTER64] = { .type = NLA_S64 }, }; static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, @@ -916,6 +918,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) q->rate = max_t(u64, q->rate, nla_get_u64(tb[TCA_NETEM_RATE64])); + if (tb[TCA_NETEM_LATENCY64]) + q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]); + + if (tb[TCA_NETEM_JITTER64]) + q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]); + if (tb[TCA_NETEM_ECN]) q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); @@ -1020,6 +1028,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) goto nla_put_failure; + if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency)) + goto nla_put_failure; + + if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter)) + goto nla_put_failure; + cor.delay_corr = q->delay_cor.rho; cor.loss_corr = q->loss_cor.rho; cor.dup_corr = q->dup_cor.rho; -- cgit v1.2.3 From 836af83b54e3e285c4a0cc06c24aeb737d3e0e18 Mon Sep 17 00:00:00 2001 From: Dave Taht Date: Wed, 8 Nov 2017 15:12:28 -0800 Subject: netem: support delivering packets in delayed time slots Slotting is a crude approximation of the behaviors of shared media such as cable, wifi, and LTE, which gather up a bunch of packets within a varying delay window and deliver them, relative to that, nearly all at once. It works within the existing loss, duplication, jitter and delay parameters of netem. Some amount of inherent latency must be specified, regardless. The new "slot" parameter specifies a minimum and maximum delay between transmission attempts. The "bytes" and "packets" parameters can be used to limit the amount of information transferred per slot. Examples of use: tc qdisc add dev eth0 root netem delay 200us \ slot 800us 10ms bytes 64k packets 42 A more correct example, using stacked netem instances and a packet limit to emulate a tail drop wifi queue with slots and variable packet delivery, with a 200Mbit isochronous underlying rate, and 20ms path delay: tc qdisc add dev eth0 root handle 1: netem delay 20ms rate 200mbit \ limit 10000 tc qdisc add dev eth0 parent 1:1 handle 10:1 netem delay 200us \ slot 800us 10ms bytes 64k packets 42 limit 512 Signed-off-by: Dave Taht Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 8 +++++ net/sched/sch_netem.c | 74 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 8fe6d1842bee..af3cc2f4e1ad 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -539,6 +539,7 @@ enum { TCA_NETEM_PAD, TCA_NETEM_LATENCY64, TCA_NETEM_JITTER64, + TCA_NETEM_SLOT, __TCA_NETEM_MAX, }; @@ -576,6 +577,13 @@ struct tc_netem_rate { __s32 cell_overhead; }; +struct tc_netem_slot { + __s64 min_delay; /* nsec */ + __s64 max_delay; + __s32 max_packets; + __s32 max_bytes; +}; + enum { NETEM_LOSS_UNSPEC, NETEM_LOSS_GI, /* General Intuitive - 4 state model */ diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 47d6decba0ea..b686e755fda9 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -135,6 +135,13 @@ struct netem_sched_data { u32 a5; /* p23 used only in 4-states */ } clg; + struct tc_netem_slot slot_config; + struct slotstate { + u64 slot_next; + s32 packets_left; + s32 bytes_left; + } slot; + }; /* Time stamp put into socket buffer control block @@ -591,6 +598,20 @@ finish_segs: return NET_XMIT_SUCCESS; } +/* Delay the next round with a new future slot with a + * correct number of bytes and packets. + */ + +static void get_slot_next(struct netem_sched_data *q, u64 now) +{ + q->slot.slot_next = now + q->slot_config.min_delay + + (prandom_u32() * + (q->slot_config.max_delay - + q->slot_config.min_delay) >> 32); + q->slot.packets_left = q->slot_config.max_packets; + q->slot.bytes_left = q->slot_config.max_bytes; +} + static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); @@ -608,14 +629,17 @@ deliver: p = rb_first(&q->t_root); if (p) { u64 time_to_send; + u64 now = ktime_get_ns(); skb = rb_to_skb(p); /* if more time remaining? */ time_to_send = netem_skb_cb(skb)->time_to_send; - if (time_to_send <= ktime_get_ns()) { - rb_erase(p, &q->t_root); + if (q->slot.slot_next && q->slot.slot_next < time_to_send) + get_slot_next(q, now); + if (time_to_send <= now && q->slot.slot_next <= now) { + rb_erase(p, &q->t_root); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); skb->next = NULL; @@ -634,6 +658,14 @@ deliver: skb->tstamp = 0; #endif + if (q->slot.slot_next) { + q->slot.packets_left--; + q->slot.bytes_left -= qdisc_pkt_len(skb); + if (q->slot.packets_left <= 0 || + q->slot.bytes_left <= 0) + get_slot_next(q, now); + } + if (q->qdisc) { unsigned int pkt_len = qdisc_pkt_len(skb); struct sk_buff *to_free = NULL; @@ -657,7 +689,10 @@ deliver: if (skb) goto deliver; } - qdisc_watchdog_schedule_ns(&q->watchdog, time_to_send); + + qdisc_watchdog_schedule_ns(&q->watchdog, + max(time_to_send, + q->slot.slot_next)); } if (q->qdisc) { @@ -688,6 +723,7 @@ static void dist_free(struct disttable *d) * Distribution data is a variable size payload containing * signed 16 bit values. */ + static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); @@ -718,6 +754,23 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) return 0; } +static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) +{ + const struct tc_netem_slot *c = nla_data(attr); + + q->slot_config = *c; + if (q->slot_config.max_packets == 0) + q->slot_config.max_packets = INT_MAX; + if (q->slot_config.max_bytes == 0) + q->slot_config.max_bytes = INT_MAX; + q->slot.packets_left = q->slot_config.max_packets; + q->slot.bytes_left = q->slot_config.max_bytes; + if (q->slot_config.min_delay | q->slot_config.max_delay) + q->slot.slot_next = ktime_get_ns(); + else + q->slot.slot_next = 0; +} + static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr) { const struct tc_netem_corr *c = nla_data(attr); @@ -821,6 +874,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_RATE64] = { .type = NLA_U64 }, [TCA_NETEM_LATENCY64] = { .type = NLA_S64 }, [TCA_NETEM_JITTER64] = { .type = NLA_S64 }, + [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) }, }; static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, @@ -927,6 +981,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_NETEM_ECN]) q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); + if (tb[TCA_NETEM_SLOT]) + get_slot(q, tb[TCA_NETEM_SLOT]); + return ret; } @@ -1016,6 +1073,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_reorder reorder; struct tc_netem_corrupt corrupt; struct tc_netem_rate rate; + struct tc_netem_slot slot; qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency), UINT_MAX); @@ -1070,6 +1128,16 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) if (dump_loss_model(q, skb) != 0) goto nla_put_failure; + if (q->slot_config.min_delay | q->slot_config.max_delay) { + slot = q->slot_config; + if (slot.max_packets == INT_MAX) + slot.max_packets = 0; + if (slot.max_bytes == INT_MAX) + slot.max_bytes = 0; + if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot)) + goto nla_put_failure; + } + return nla_nest_end(skb, nla); nla_put_failure: -- cgit v1.2.3