summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
authorPeter Oskolkov <posk@google.com>2018-12-04 11:55:56 -0800
committerDavid S. Miller <davem@davemloft.net>2018-12-05 20:18:41 -0800
commitd66280b12bd7ad6345df4dee2ee1c20f5902242d (patch)
tree888236ef605669fde8635a6a1c5e52af173e9b8f /net/sched
parent932c4417077958fe486e6d8a64a06e65d664cb85 (diff)
net: netem: use a list in addition to rbtree
When testing high-bandwidth TCP streams with large windows, high latency, and low jitter, netem consumes a lot of CPU cycles doing rbtree rebalancing. This patch uses a linear list/queue in addition to the rbtree: if an incoming packet is past the tail of the linear queue, it is added there, otherwise it is inserted into the rbtree. Without this patch, perf shows netem_enqueue, netem_dequeue, and rb_* functions among the top offenders. With this patch, only netem_enqueue is noticeable if jitter is low/absent. Suggested-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Peter Oskolkov <posk@google.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/sch_netem.c89
1 files changed, 69 insertions, 20 deletions
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 2c38e3d07924..84658f60a872 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -77,6 +77,10 @@ struct netem_sched_data {
/* internal t(ime)fifo qdisc uses t_root and sch->limit */
struct rb_root t_root;
+ /* a linear queue; reduces rbtree rebalancing when jitter is low */
+ struct sk_buff *t_head;
+ struct sk_buff *t_tail;
+
/* optional qdisc for classful handling (NULL at netem init) */
struct Qdisc *qdisc;
@@ -369,26 +373,39 @@ static void tfifo_reset(struct Qdisc *sch)
rb_erase(&skb->rbnode, &q->t_root);
rtnl_kfree_skbs(skb, skb);
}
+
+ rtnl_kfree_skbs(q->t_head, q->t_tail);
+ q->t_head = NULL;
+ q->t_tail = NULL;
}
static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
u64 tnext = netem_skb_cb(nskb)->time_to_send;
- struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
- while (*p) {
- struct sk_buff *skb;
-
- parent = *p;
- skb = rb_to_skb(parent);
- if (tnext >= netem_skb_cb(skb)->time_to_send)
- p = &parent->rb_right;
+ if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
+ if (q->t_tail)
+ q->t_tail->next = nskb;
else
- p = &parent->rb_left;
+ q->t_head = nskb;
+ q->t_tail = nskb;
+ } else {
+ struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
+
+ while (*p) {
+ struct sk_buff *skb;
+
+ parent = *p;
+ skb = rb_to_skb(parent);
+ if (tnext >= netem_skb_cb(skb)->time_to_send)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+ rb_link_node(&nskb->rbnode, parent, p);
+ rb_insert_color(&nskb->rbnode, &q->t_root);
}
- rb_link_node(&nskb->rbnode, parent, p);
- rb_insert_color(&nskb->rbnode, &q->t_root);
sch->q.qlen++;
}
@@ -530,9 +547,16 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
t_skb = skb_rb_last(&q->t_root);
t_last = netem_skb_cb(t_skb);
if (!last ||
- t_last->time_to_send > last->time_to_send) {
+ t_last->time_to_send > last->time_to_send)
+ last = t_last;
+ }
+ if (q->t_tail) {
+ struct netem_skb_cb *t_last =
+ netem_skb_cb(q->t_tail);
+
+ if (!last ||
+ t_last->time_to_send > last->time_to_send)
last = t_last;
- }
}
if (last) {
@@ -611,11 +635,38 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)
q->slot.bytes_left = q->slot_config.max_bytes;
}
+static struct sk_buff *netem_peek(struct netem_sched_data *q)
+{
+ struct sk_buff *skb = skb_rb_first(&q->t_root);
+ u64 t1, t2;
+
+ if (!skb)
+ return q->t_head;
+ if (!q->t_head)
+ return skb;
+
+ t1 = netem_skb_cb(skb)->time_to_send;
+ t2 = netem_skb_cb(q->t_head)->time_to_send;
+ if (t1 < t2)
+ return skb;
+ return q->t_head;
+}
+
+static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
+{
+ if (skb == q->t_head) {
+ q->t_head = skb->next;
+ if (!q->t_head)
+ q->t_tail = NULL;
+ } else {
+ rb_erase(&skb->rbnode, &q->t_root);
+ }
+}
+
static struct sk_buff *netem_dequeue(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
- struct rb_node *p;
tfifo_dequeue:
skb = __qdisc_dequeue_head(&sch->q);
@@ -625,20 +676,18 @@ deliver:
qdisc_bstats_update(sch, skb);
return skb;
}
- p = rb_first(&q->t_root);
- if (p) {
+ skb = netem_peek(q);
+ if (skb) {
u64 time_to_send;
u64 now = ktime_get_ns();
- skb = rb_to_skb(p);
-
/* if more time remaining? */
time_to_send = netem_skb_cb(skb)->time_to_send;
if (q->slot.slot_next && q->slot.slot_next < time_to_send)
get_slot_next(q, now);
- if (time_to_send <= now && q->slot.slot_next <= now) {
- rb_erase(p, &q->t_root);
+ if (time_to_send <= now && q->slot.slot_next <= now) {
+ netem_erase_head(q, skb);
sch->q.qlen--;
qdisc_qstats_backlog_dec(sch, skb);
skb->next = NULL;