diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/act_ct.c | 10 | ||||
-rw-r--r-- | net/sched/act_police.c | 59 | ||||
-rw-r--r-- | net/sched/act_sample.c | 27 | ||||
-rw-r--r-- | net/sched/cls_api.c | 3 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 40 | ||||
-rw-r--r-- | net/sched/sch_cbq.c | 4 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 75 | ||||
-rw-r--r-- | net/sched/sch_taprio.c | 70 |
8 files changed, 208 insertions, 80 deletions
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 16e888a9601d..ec7a1c438df9 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -732,7 +732,8 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, #endif } - *qdisc_skb_cb(skb) = cb; + if (err != -EINPROGRESS) + *qdisc_skb_cb(skb) = cb; skb_clear_hash(skb); skb->ignore_df = 1; return err; @@ -967,7 +968,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag); if (err == -EINPROGRESS) { retval = TC_ACT_STOLEN; - goto out; + goto out_clear; } if (err) goto drop; @@ -990,9 +991,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, /* Associate skb with specified zone. */ if (tmpl) { - ct = nf_ct_get(skb, &ctinfo); - if (skb_nfct(skb)) - nf_conntrack_put(skb_nfct(skb)); + nf_conntrack_put(skb_nfct(skb)); nf_conntrack_get(&tmpl->ct_general); nf_ct_set(skb, tmpl, IP_CT_NEW); } @@ -1030,7 +1029,6 @@ do_nat: out_push: skb_push_rcsum(skb, nh_ofs); -out: qdisc_skb_cb(skb)->post_ct = true; out_clear: tcf_action_update_bstats(&c->common, skb); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8d8452b1cdd4..0fab8de176d2 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -42,6 +42,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { [TCA_POLICE_RESULT] = { .type = NLA_U32 }, [TCA_POLICE_RATE64] = { .type = NLA_U64 }, [TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 }, + [TCA_POLICE_PKTRATE64] = { .type = NLA_U64, .min = 1 }, + [TCA_POLICE_PKTBURST64] = { .type = NLA_U64, .min = 1 }, }; static int tcf_police_init(struct net *net, struct nlattr *nla, @@ -61,6 +63,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, bool exists = false; u32 index; u64 rate64, prate64; + u64 pps, ppsburst; if (nla == NULL) return -EINVAL; @@ -142,6 +145,21 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, } } + if ((tb[TCA_POLICE_PKTRATE64] && !tb[TCA_POLICE_PKTBURST64]) || + (!tb[TCA_POLICE_PKTRATE64] && tb[TCA_POLICE_PKTBURST64])) { + NL_SET_ERR_MSG(extack, + "Both or neither packet-per-second burst and rate must be provided"); + err = -EINVAL; + goto failure; + } + + if (tb[TCA_POLICE_PKTRATE64] && R_tab) { + NL_SET_ERR_MSG(extack, + "packet-per-second and byte-per-second rate limits not allowed in same action"); + err = -EINVAL; + goto failure; + } + new = kzalloc(sizeof(*new), GFP_KERNEL); if (unlikely(!new)) { err = -ENOMEM; @@ -183,6 +201,14 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (tb[TCA_POLICE_AVRATE]) new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]); + if (tb[TCA_POLICE_PKTRATE64]) { + pps = nla_get_u64(tb[TCA_POLICE_PKTRATE64]); + ppsburst = nla_get_u64(tb[TCA_POLICE_PKTBURST64]); + new->pps_present = true; + new->tcfp_pkt_burst = PSCHED_TICKS2NS(ppsburst); + psched_ppscfg_precompute(&new->ppsrate, pps); + } + spin_lock_bh(&police->tcf_lock); spin_lock_bh(&police->tcfp_lock); police->tcfp_t_c = ktime_get_ns(); @@ -217,8 +243,8 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_police *police = to_police(a); + s64 now, toks, ppstoks = 0, ptoks = 0; struct tcf_police_params *p; - s64 now, toks, ptoks = 0; int ret; tcf_lastuse_update(&police->tcf_tm); @@ -236,7 +262,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, } if (qdisc_pkt_len(skb) <= p->tcfp_mtu) { - if (!p->rate_present) { + if (!p->rate_present && !p->pps_present) { ret = p->tcfp_result; goto end; } @@ -251,14 +277,23 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, ptoks -= (s64)psched_l2t_ns(&p->peak, qdisc_pkt_len(skb)); } - toks += police->tcfp_toks; - if (toks > p->tcfp_burst) - toks = p->tcfp_burst; - toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb)); - if ((toks|ptoks) >= 0) { + if (p->rate_present) { + toks += police->tcfp_toks; + if (toks > p->tcfp_burst) + toks = p->tcfp_burst; + toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb)); + } else if (p->pps_present) { + ppstoks = min_t(s64, now - police->tcfp_t_c, p->tcfp_pkt_burst); + ppstoks += police->tcfp_pkttoks; + if (ppstoks > p->tcfp_pkt_burst) + ppstoks = p->tcfp_pkt_burst; + ppstoks -= (s64)psched_pkt2t_ns(&p->ppsrate, 1); + } + if ((toks | ptoks | ppstoks) >= 0) { police->tcfp_t_c = now; police->tcfp_toks = toks; police->tcfp_ptoks = ptoks; + police->tcfp_pkttoks = ppstoks; spin_unlock_bh(&police->tcfp_lock); ret = p->tcfp_result; goto inc_drops; @@ -331,6 +366,16 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a, TCA_POLICE_PAD)) goto nla_put_failure; } + if (p->pps_present) { + if (nla_put_u64_64bit(skb, TCA_POLICE_PKTRATE64, + police->params->ppsrate.rate_pkts_ps, + TCA_POLICE_PAD)) + goto nla_put_failure; + if (nla_put_u64_64bit(skb, TCA_POLICE_PKTBURST64, + PSCHED_NS2TICKS(p->tcfp_pkt_burst), + TCA_POLICE_PAD)) + goto nla_put_failure; + } if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) goto nla_put_failure; if (p->tcfp_result && diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 3ebf9ede3cf1..6a0c16e4351d 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -158,10 +158,8 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, { struct tcf_sample *s = to_sample(a); struct psample_group *psample_group; + struct psample_metadata md = {}; int retval; - int size; - int iif; - int oif; tcf_lastuse_update(&s->tcf_tm); bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb); @@ -172,20 +170,18 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, /* randomly sample packets according to rate */ if (psample_group && (prandom_u32() % s->rate == 0)) { if (!skb_at_tc_ingress(skb)) { - iif = skb->skb_iif; - oif = skb->dev->ifindex; + md.in_ifindex = skb->skb_iif; + md.out_ifindex = skb->dev->ifindex; } else { - iif = skb->dev->ifindex; - oif = 0; + md.in_ifindex = skb->dev->ifindex; } /* on ingress, the mac header gets popped, so push it back */ if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev)) skb_push(skb, skb->mac_len); - size = s->truncate ? s->trunc_size : skb->len; - psample_sample_packet(psample_group, skb, size, iif, oif, - s->rate); + md.trunc_size = s->truncate ? s->trunc_size : skb->len; + psample_sample_packet(psample_group, skb, s->rate, &md); if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev)) skb_pull(skb, skb->mac_len); @@ -194,6 +190,16 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, return retval; } +static void tcf_sample_stats_update(struct tc_action *a, u64 bytes, u64 packets, + u64 drops, u64 lastuse, bool hw) +{ + struct tcf_sample *s = to_sample(a); + struct tcf_t *tm = &s->tcf_tm; + + tcf_action_update_stats(a, bytes, packets, drops, hw); + tm->lastuse = max_t(u64, tm->lastuse, lastuse); +} + static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { @@ -280,6 +286,7 @@ static struct tc_action_ops act_sample_ops = { .id = TCA_ID_SAMPLE, .owner = THIS_MODULE, .act = tcf_sample_act, + .stats_update = tcf_sample_stats_update, .dump = tcf_sample_dump, .init = tcf_sample_init, .cleanup = tcf_sample_cleanup, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 340d5af86e87..40fbea626dfd 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3662,6 +3662,9 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->police.burst = tcf_police_burst(act); entry->police.rate_bytes_ps = tcf_police_rate_bytes_ps(act); + entry->police.burst_pkt = tcf_police_burst_pkt(act); + entry->police.rate_pkt_ps = + tcf_police_rate_pkt_ps(act); entry->police.mtu = tcf_police_tcfp_mtu(act); entry->police.index = act->tcfa_index; } else if (is_tcf_ct(act)) { diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index c69a4ba9c33f..d7869a984881 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -209,16 +209,16 @@ static bool fl_range_port_dst_cmp(struct cls_fl_filter *filter, struct fl_flow_key *key, struct fl_flow_key *mkey) { - __be16 min_mask, max_mask, min_val, max_val; + u16 min_mask, max_mask, min_val, max_val; - min_mask = htons(filter->mask->key.tp_range.tp_min.dst); - max_mask = htons(filter->mask->key.tp_range.tp_max.dst); - min_val = htons(filter->key.tp_range.tp_min.dst); - max_val = htons(filter->key.tp_range.tp_max.dst); + min_mask = ntohs(filter->mask->key.tp_range.tp_min.dst); + max_mask = ntohs(filter->mask->key.tp_range.tp_max.dst); + min_val = ntohs(filter->key.tp_range.tp_min.dst); + max_val = ntohs(filter->key.tp_range.tp_max.dst); if (min_mask && max_mask) { - if (htons(key->tp_range.tp.dst) < min_val || - htons(key->tp_range.tp.dst) > max_val) + if (ntohs(key->tp_range.tp.dst) < min_val || + ntohs(key->tp_range.tp.dst) > max_val) return false; /* skb does not have min and max values */ @@ -232,16 +232,16 @@ static bool fl_range_port_src_cmp(struct cls_fl_filter *filter, struct fl_flow_key *key, struct fl_flow_key *mkey) { - __be16 min_mask, max_mask, min_val, max_val; + u16 min_mask, max_mask, min_val, max_val; - min_mask = htons(filter->mask->key.tp_range.tp_min.src); - max_mask = htons(filter->mask->key.tp_range.tp_max.src); - min_val = htons(filter->key.tp_range.tp_min.src); - max_val = htons(filter->key.tp_range.tp_max.src); + min_mask = ntohs(filter->mask->key.tp_range.tp_min.src); + max_mask = ntohs(filter->mask->key.tp_range.tp_max.src); + min_val = ntohs(filter->key.tp_range.tp_min.src); + max_val = ntohs(filter->key.tp_range.tp_max.src); if (min_mask && max_mask) { - if (htons(key->tp_range.tp.src) < min_val || - htons(key->tp_range.tp.src) > max_val) + if (ntohs(key->tp_range.tp.src) < min_val || + ntohs(key->tp_range.tp.src) > max_val) return false; /* skb does not have min and max values */ @@ -783,16 +783,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key, TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src)); if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst && - htons(key->tp_range.tp_max.dst) <= - htons(key->tp_range.tp_min.dst)) { + ntohs(key->tp_range.tp_max.dst) <= + ntohs(key->tp_range.tp_min.dst)) { NL_SET_ERR_MSG_ATTR(extack, tb[TCA_FLOWER_KEY_PORT_DST_MIN], "Invalid destination port range (min must be strictly smaller than max)"); return -EINVAL; } if (mask->tp_range.tp_min.src && mask->tp_range.tp_max.src && - htons(key->tp_range.tp_max.src) <= - htons(key->tp_range.tp_min.src)) { + ntohs(key->tp_range.tp_max.src) <= + ntohs(key->tp_range.tp_min.src)) { NL_SET_ERR_MSG_ATTR(extack, tb[TCA_FLOWER_KEY_PORT_SRC_MIN], "Invalid source port range (min must be strictly smaller than max)"); @@ -1044,8 +1044,8 @@ static int fl_set_key_flags(struct nlattr **tb, u32 *flags_key, return -EINVAL; } - key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS])); - mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK])); + key = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS])); + mask = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS_MASK])); *flags_key = 0; *flags_mask = 0; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 320b3d31fa97..b79a7e27bb31 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -263,7 +263,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) /* * Step 3+n. If classifier selected a link sharing class, * apply agency specific classifier. - * Repeat this procdure until we hit a leaf node. + * Repeat this procedure until we hit a leaf node. */ head = cl; } @@ -859,7 +859,7 @@ cbq_dequeue(struct Qdisc *sch) return NULL; } -/* CBQ class maintanance routines */ +/* CBQ class maintenance routines */ static void cbq_adjust_levels(struct cbq_class *this) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 49eae93d1489..44991ea726fc 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1325,6 +1325,48 @@ void dev_shutdown(struct net_device *dev) WARN_ON(timer_pending(&dev->watchdog_timer)); } +/** + * psched_ratecfg_precompute__() - Pre-compute values for reciprocal division + * @rate: Rate to compute reciprocal division values of + * @mult: Multiplier for reciprocal division + * @shift: Shift for reciprocal division + * + * The multiplier and shift for reciprocal division by rate are stored + * in mult and shift. + * + * The deal here is to replace a divide by a reciprocal one + * in fast path (a reciprocal divide is a multiply and a shift) + * + * Normal formula would be : + * time_in_ns = (NSEC_PER_SEC * len) / rate_bps + * + * We compute mult/shift to use instead : + * time_in_ns = (len * mult) >> shift; + * + * We try to get the highest possible mult value for accuracy, + * but have to make sure no overflows will ever happen. + * + * reciprocal_value() is not used here it doesn't handle 64-bit values. + */ +static void psched_ratecfg_precompute__(u64 rate, u32 *mult, u8 *shift) +{ + u64 factor = NSEC_PER_SEC; + + *mult = 1; + *shift = 0; + + if (rate <= 0) + return; + + for (;;) { + *mult = div64_u64(factor, rate); + if (*mult & (1U << 31) || factor & (1ULL << 63)) + break; + factor <<= 1; + (*shift)++; + } +} + void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf, u64 rate64) @@ -1333,34 +1375,17 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, r->overhead = conf->overhead; r->rate_bytes_ps = max_t(u64, conf->rate, rate64); r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); - r->mult = 1; - /* - * The deal here is to replace a divide by a reciprocal one - * in fast path (a reciprocal divide is a multiply and a shift) - * - * Normal formula would be : - * time_in_ns = (NSEC_PER_SEC * len) / rate_bps - * - * We compute mult/shift to use instead : - * time_in_ns = (len * mult) >> shift; - * - * We try to get the highest possible mult value for accuracy, - * but have to make sure no overflows will ever happen. - */ - if (r->rate_bytes_ps > 0) { - u64 factor = NSEC_PER_SEC; - - for (;;) { - r->mult = div64_u64(factor, r->rate_bytes_ps); - if (r->mult & (1U << 31) || factor & (1ULL << 63)) - break; - factor <<= 1; - r->shift++; - } - } + psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift); } EXPORT_SYMBOL(psched_ratecfg_precompute); +void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64) +{ + r->rate_pkts_ps = pktrate64; + psched_ratecfg_precompute__(r->rate_pkts_ps, &r->mult, &r->shift); +} +EXPORT_SYMBOL(psched_ppscfg_precompute); + static void mini_qdisc_rcu_func(struct rcu_head *head) { } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8287894541e3..5c91df52b8c2 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -411,18 +411,10 @@ done: return txtime; } -static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) +static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, + struct Qdisc *child, struct sk_buff **to_free) { struct taprio_sched *q = qdisc_priv(sch); - struct Qdisc *child; - int queue; - - queue = skb_get_queue_mapping(skb); - - child = q->qdiscs[queue]; - if (unlikely(!child)) - return qdisc_drop(skb, sch, to_free); if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) { if (!is_valid_interval(skb, sch)) @@ -439,6 +431,58 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, return qdisc_enqueue(skb, child, to_free); } +static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct Qdisc *child; + int queue; + + queue = skb_get_queue_mapping(skb); + + child = q->qdiscs[queue]; + if (unlikely(!child)) + return qdisc_drop(skb, sch, to_free); + + /* Large packets might not be transmitted when the transmission duration + * exceeds any configured interval. Therefore, segment the skb into + * smaller chunks. Skip it for the full offload case, as the driver + * and/or the hardware is expected to handle this. + */ + if (skb_is_gso(skb) && !FULL_OFFLOAD_IS_ENABLED(q->flags)) { + unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb); + netdev_features_t features = netif_skb_features(skb); + struct sk_buff *segs, *nskb; + int ret; + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR_OR_NULL(segs)) + return qdisc_drop(skb, sch, to_free); + + skb_list_walk_safe(segs, segs, nskb) { + skb_mark_not_on_list(segs); + qdisc_skb_cb(segs)->pkt_len = segs->len; + slen += segs->len; + + ret = taprio_enqueue_one(segs, sch, child, to_free); + if (ret != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(ret)) + qdisc_qstats_drop(sch); + } else { + numsegs++; + } + } + + if (numsegs > 1) + qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen); + consume_skb(skb); + + return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; + } + + return taprio_enqueue_one(skb, sch, child, to_free); +} + static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); @@ -901,6 +945,12 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, list_for_each_entry(entry, &new->entries, list) cycle = ktime_add_ns(cycle, entry->interval); + + if (!cycle) { + NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0"); + return -EINVAL; + } + new->cycle_time = cycle; } |