diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/Kconfig | 12 | ||||
-rw-r--r-- | net/sched/Makefile | 1 | ||||
-rw-r--r-- | net/sched/act_api.c | 13 | ||||
-rw-r--r-- | net/sched/act_ct.c | 2 | ||||
-rw-r--r-- | net/sched/act_gate.c | 636 | ||||
-rw-r--r-- | net/sched/cls_api.c | 38 | ||||
-rw-r--r-- | net/sched/em_ipt.c | 2 | ||||
-rw-r--r-- | net/sched/sch_choke.c | 6 | ||||
-rw-r--r-- | net/sched/sch_fq.c | 143 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 34 | ||||
-rw-r--r-- | net/sched/sch_red.c | 9 |
11 files changed, 823 insertions, 73 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index bfbefb7bff9d..2f20073f4f84 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -981,6 +981,18 @@ config NET_ACT_CT To compile this code as a module, choose M here: the module will be called act_ct. +config NET_ACT_GATE + tristate "Frame gate entry list control tc action" + depends on NET_CLS_ACT + help + Say Y here to allow to control the ingress flow to be passed at + specific time slot and be dropped at other specific time slot by + the gate entry list. + + If unsure, say N. + To compile this code as a module, choose M here: the + module will be called act_gate. + config NET_IFE_SKBMARK tristate "Support to encoding decoding skb mark on IFE action" depends on NET_ACT_IFE diff --git a/net/sched/Makefile b/net/sched/Makefile index 31c367a6cd09..66bbf9a98f9e 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_ACT_CT) += act_ct.o +obj-$(CONFIG_NET_ACT_GATE) += act_gate.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index df4560909157..fbbec2e562f5 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -876,19 +876,14 @@ static u8 tcf_action_hw_stats_get(struct nlattr *hw_stats_attr) return hw_stats_bf.value; } -static const u32 tca_act_flags_allowed = TCA_ACT_FLAGS_NO_PERCPU_STATS; -static const u32 tca_act_hw_stats_allowed = TCA_ACT_HW_STATS_ANY; - static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_KIND] = { .type = NLA_STRING }, [TCA_ACT_INDEX] = { .type = NLA_U32 }, [TCA_ACT_COOKIE] = { .type = NLA_BINARY, .len = TC_COOKIE_MAX_SIZE }, [TCA_ACT_OPTIONS] = { .type = NLA_NESTED }, - [TCA_ACT_FLAGS] = { .type = NLA_BITFIELD32, - .validation_data = &tca_act_flags_allowed }, - [TCA_ACT_HW_STATS] = { .type = NLA_BITFIELD32, - .validation_data = &tca_act_hw_stats_allowed }, + [TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS), + [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, @@ -1454,10 +1449,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, return ret; } -static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON; static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = { - [TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32, - .validation_data = &tcaa_root_flags_allowed }, + [TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_FLAG_LARGE_DUMP_ON), [TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 }, }; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 1a766393be62..9adff83b523b 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -30,6 +30,7 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <uapi/linux/netfilter/nf_nat.h> @@ -536,6 +537,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, flow_offload_refresh(nf_ft, flow); nf_conntrack_get(&ct->ct_general); nf_ct_set(skb, ct, ctinfo); + nf_ct_acct_update(ct, dir, skb->len); return true; } diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c new file mode 100644 index 000000000000..35fc48795541 --- /dev/null +++ b/net/sched/act_gate.c @@ -0,0 +1,636 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Copyright 2020 NXP */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <net/act_api.h> +#include <net/netlink.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gate.h> + +static unsigned int gate_net_id; +static struct tc_action_ops act_gate_ops; + +static ktime_t gate_get_time(struct tcf_gate *gact) +{ + ktime_t mono = ktime_get(); + + switch (gact->tk_offset) { + case TK_OFFS_MAX: + return mono; + default: + return ktime_mono_to_any(mono, gact->tk_offset); + } + + return KTIME_MAX; +} + +static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start) +{ + struct tcf_gate_params *param = &gact->param; + ktime_t now, base, cycle; + u64 n; + + base = ns_to_ktime(param->tcfg_basetime); + now = gate_get_time(gact); + + if (ktime_after(base, now)) { + *start = base; + return 0; + } + + cycle = param->tcfg_cycletime; + + /* cycle time should not be zero */ + if (!cycle) + return -EFAULT; + + n = div64_u64(ktime_sub_ns(now, base), cycle); + *start = ktime_add_ns(base, (n + 1) * cycle); + return 0; +} + +static void gate_start_timer(struct tcf_gate *gact, ktime_t start) +{ + ktime_t expires; + + expires = hrtimer_get_expires(&gact->hitimer); + if (expires == 0) + expires = KTIME_MAX; + + start = min_t(ktime_t, start, expires); + + hrtimer_start(&gact->hitimer, start, HRTIMER_MODE_ABS_SOFT); +} + +static enum hrtimer_restart gate_timer_func(struct hrtimer *timer) +{ + struct tcf_gate *gact = container_of(timer, struct tcf_gate, + hitimer); + struct tcf_gate_params *p = &gact->param; + struct tcfg_gate_entry *next; + ktime_t close_time, now; + + spin_lock(&gact->tcf_lock); + + next = gact->next_entry; + + /* cycle start, clear pending bit, clear total octets */ + gact->current_gate_status = next->gate_state ? GATE_ACT_GATE_OPEN : 0; + gact->current_entry_octets = 0; + gact->current_max_octets = next->maxoctets; + + gact->current_close_time = ktime_add_ns(gact->current_close_time, + next->interval); + + close_time = gact->current_close_time; + + if (list_is_last(&next->list, &p->entries)) + next = list_first_entry(&p->entries, + struct tcfg_gate_entry, list); + else + next = list_next_entry(next, list); + + now = gate_get_time(gact); + + if (ktime_after(now, close_time)) { + ktime_t cycle, base; + u64 n; + + cycle = p->tcfg_cycletime; + base = ns_to_ktime(p->tcfg_basetime); + n = div64_u64(ktime_sub_ns(now, base), cycle); + close_time = ktime_add_ns(base, (n + 1) * cycle); + } + + gact->next_entry = next; + + hrtimer_set_expires(&gact->hitimer, close_time); + + spin_unlock(&gact->tcf_lock); + + return HRTIMER_RESTART; +} + +static int tcf_gate_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_gate *gact = to_gate(a); + + spin_lock(&gact->tcf_lock); + + tcf_lastuse_update(&gact->tcf_tm); + bstats_update(&gact->tcf_bstats, skb); + + if (unlikely(gact->current_gate_status & GATE_ACT_PENDING)) { + spin_unlock(&gact->tcf_lock); + return gact->tcf_action; + } + + if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN)) + goto drop; + + if (gact->current_max_octets >= 0) { + gact->current_entry_octets += qdisc_pkt_len(skb); + if (gact->current_entry_octets > gact->current_max_octets) { + gact->tcf_qstats.overlimits++; + goto drop; + } + } + + spin_unlock(&gact->tcf_lock); + + return gact->tcf_action; +drop: + gact->tcf_qstats.drops++; + spin_unlock(&gact->tcf_lock); + + return TC_ACT_SHOT; +} + +static const struct nla_policy entry_policy[TCA_GATE_ENTRY_MAX + 1] = { + [TCA_GATE_ENTRY_INDEX] = { .type = NLA_U32 }, + [TCA_GATE_ENTRY_GATE] = { .type = NLA_FLAG }, + [TCA_GATE_ENTRY_INTERVAL] = { .type = NLA_U32 }, + [TCA_GATE_ENTRY_IPV] = { .type = NLA_S32 }, + [TCA_GATE_ENTRY_MAX_OCTETS] = { .type = NLA_S32 }, +}; + +static const struct nla_policy gate_policy[TCA_GATE_MAX + 1] = { + [TCA_GATE_PARMS] = { .len = sizeof(struct tc_gate), + .type = NLA_EXACT_LEN }, + [TCA_GATE_PRIORITY] = { .type = NLA_S32 }, + [TCA_GATE_ENTRY_LIST] = { .type = NLA_NESTED }, + [TCA_GATE_BASE_TIME] = { .type = NLA_U64 }, + [TCA_GATE_CYCLE_TIME] = { .type = NLA_U64 }, + [TCA_GATE_CYCLE_TIME_EXT] = { .type = NLA_U64 }, + [TCA_GATE_FLAGS] = { .type = NLA_U32 }, + [TCA_GATE_CLOCKID] = { .type = NLA_S32 }, +}; + +static int fill_gate_entry(struct nlattr **tb, struct tcfg_gate_entry *entry, + struct netlink_ext_ack *extack) +{ + u32 interval = 0; + + entry->gate_state = nla_get_flag(tb[TCA_GATE_ENTRY_GATE]); + + if (tb[TCA_GATE_ENTRY_INTERVAL]) + interval = nla_get_u32(tb[TCA_GATE_ENTRY_INTERVAL]); + + if (interval == 0) { + NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry"); + return -EINVAL; + } + + entry->interval = interval; + + if (tb[TCA_GATE_ENTRY_IPV]) + entry->ipv = nla_get_s32(tb[TCA_GATE_ENTRY_IPV]); + else + entry->ipv = -1; + + if (tb[TCA_GATE_ENTRY_MAX_OCTETS]) + entry->maxoctets = nla_get_s32(tb[TCA_GATE_ENTRY_MAX_OCTETS]); + else + entry->maxoctets = -1; + + return 0; +} + +static int parse_gate_entry(struct nlattr *n, struct tcfg_gate_entry *entry, + int index, struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_GATE_ENTRY_MAX + 1] = { }; + int err; + + err = nla_parse_nested(tb, TCA_GATE_ENTRY_MAX, n, entry_policy, extack); + if (err < 0) { + NL_SET_ERR_MSG(extack, "Could not parse nested entry"); + return -EINVAL; + } + + entry->index = index; + + return fill_gate_entry(tb, entry, extack); +} + +static void release_entry_list(struct list_head *entries) +{ + struct tcfg_gate_entry *entry, *e; + + list_for_each_entry_safe(entry, e, entries, list) { + list_del(&entry->list); + kfree(entry); + } +} + +static int parse_gate_list(struct nlattr *list_attr, + struct tcf_gate_params *sched, + struct netlink_ext_ack *extack) +{ + struct tcfg_gate_entry *entry; + struct nlattr *n; + int err, rem; + int i = 0; + + if (!list_attr) + return -EINVAL; + + nla_for_each_nested(n, list_attr, rem) { + if (nla_type(n) != TCA_GATE_ONE_ENTRY) { + NL_SET_ERR_MSG(extack, "Attribute isn't type 'entry'"); + continue; + } + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) { + NL_SET_ERR_MSG(extack, "Not enough memory for entry"); + err = -ENOMEM; + goto release_list; + } + + err = parse_gate_entry(n, entry, i, extack); + if (err < 0) { + kfree(entry); + goto release_list; + } + + list_add_tail(&entry->list, &sched->entries); + i++; + } + + sched->num_entries = i; + + return i; + +release_list: + release_entry_list(&sched->entries); + + return err; +} + +static int tcf_gate_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, u32 flags, + struct netlink_ext_ack *extack) +{ + struct tc_action_net *tn = net_generic(net, gate_net_id); + enum tk_offsets tk_offset = TK_OFFS_TAI; + struct nlattr *tb[TCA_GATE_MAX + 1]; + struct tcf_chain *goto_ch = NULL; + struct tcf_gate_params *p; + s32 clockid = CLOCK_TAI; + struct tcf_gate *gact; + struct tc_gate *parm; + int ret = 0, err; + u64 basetime = 0; + u32 gflags = 0; + s32 prio = -1; + ktime_t start; + u32 index; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_GATE_MAX, nla, gate_policy, extack); + if (err < 0) + return err; + + if (!tb[TCA_GATE_PARMS]) + return -EINVAL; + + parm = nla_data(tb[TCA_GATE_PARMS]); + index = parm->index; + + err = tcf_idr_check_alloc(tn, &index, a, bind); + if (err < 0) + return err; + + if (err && bind) + return 0; + + if (!err) { + ret = tcf_idr_create(tn, index, est, a, + &act_gate_ops, bind, false, 0); + if (ret) { + tcf_idr_cleanup(tn, index); + return ret; + } + + ret = ACT_P_CREATED; + } else if (!ovr) { + tcf_idr_release(*a, bind); + return -EEXIST; + } + + if (tb[TCA_GATE_PRIORITY]) + prio = nla_get_s32(tb[TCA_GATE_PRIORITY]); + + if (tb[TCA_GATE_BASE_TIME]) + basetime = nla_get_u64(tb[TCA_GATE_BASE_TIME]); + + if (tb[TCA_GATE_FLAGS]) + gflags = nla_get_u32(tb[TCA_GATE_FLAGS]); + + if (tb[TCA_GATE_CLOCKID]) { + clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]); + switch (clockid) { + case CLOCK_REALTIME: + tk_offset = TK_OFFS_REAL; + break; + case CLOCK_MONOTONIC: + tk_offset = TK_OFFS_MAX; + break; + case CLOCK_BOOTTIME: + tk_offset = TK_OFFS_BOOT; + break; + case CLOCK_TAI: + tk_offset = TK_OFFS_TAI; + break; + default: + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + goto release_idr; + } + } + + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + + gact = to_gate(*a); + + spin_lock_bh(&gact->tcf_lock); + p = &gact->param; + + if (tb[TCA_GATE_CYCLE_TIME]) { + p->tcfg_cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]); + if (!p->tcfg_cycletime_ext) + goto chain_put; + } + + INIT_LIST_HEAD(&p->entries); + if (tb[TCA_GATE_ENTRY_LIST]) { + err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack); + if (err < 0) + goto chain_put; + } + + if (!p->tcfg_cycletime) { + struct tcfg_gate_entry *entry; + ktime_t cycle = 0; + + list_for_each_entry(entry, &p->entries, list) + cycle = ktime_add_ns(cycle, entry->interval); + p->tcfg_cycletime = cycle; + } + + if (tb[TCA_GATE_CYCLE_TIME_EXT]) + p->tcfg_cycletime_ext = + nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); + + p->tcfg_priority = prio; + p->tcfg_basetime = basetime; + p->tcfg_clockid = clockid; + p->tcfg_flags = gflags; + + gact->tk_offset = tk_offset; + hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT); + gact->hitimer.function = gate_timer_func; + + err = gate_get_start_time(gact, &start); + if (err < 0) { + NL_SET_ERR_MSG(extack, + "Internal error: failed get start time"); + release_entry_list(&p->entries); + goto chain_put; + } + + gact->current_close_time = start; + gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING; + + gact->next_entry = list_first_entry(&p->entries, + struct tcfg_gate_entry, list); + + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); + + gate_start_timer(gact, start); + + spin_unlock_bh(&gact->tcf_lock); + + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + + if (ret == ACT_P_CREATED) + tcf_idr_insert(tn, *a); + + return ret; + +chain_put: + spin_unlock_bh(&gact->tcf_lock); + + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; +} + +static void tcf_gate_cleanup(struct tc_action *a) +{ + struct tcf_gate *gact = to_gate(a); + struct tcf_gate_params *p; + + hrtimer_cancel(&gact->hitimer); + + p = &gact->param; + + release_entry_list(&p->entries); +} + +static int dumping_entry(struct sk_buff *skb, + struct tcfg_gate_entry *entry) +{ + struct nlattr *item; + + item = nla_nest_start_noflag(skb, TCA_GATE_ONE_ENTRY); + if (!item) + return -ENOSPC; + + if (nla_put_u32(skb, TCA_GATE_ENTRY_INDEX, entry->index)) + goto nla_put_failure; + + if (entry->gate_state && nla_put_flag(skb, TCA_GATE_ENTRY_GATE)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_GATE_ENTRY_INTERVAL, entry->interval)) + goto nla_put_failure; + + if (nla_put_s32(skb, TCA_GATE_ENTRY_MAX_OCTETS, entry->maxoctets)) + goto nla_put_failure; + + if (nla_put_s32(skb, TCA_GATE_ENTRY_IPV, entry->ipv)) + goto nla_put_failure; + + return nla_nest_end(skb, item); + +nla_put_failure: + nla_nest_cancel(skb, item); + return -1; +} + +static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_gate *gact = to_gate(a); + struct tc_gate opt = { + .index = gact->tcf_index, + .refcnt = refcount_read(&gact->tcf_refcnt) - ref, + .bindcnt = atomic_read(&gact->tcf_bindcnt) - bind, + }; + struct tcfg_gate_entry *entry; + struct tcf_gate_params *p; + struct nlattr *entry_list; + struct tcf_t t; + + spin_lock_bh(&gact->tcf_lock); + opt.action = gact->tcf_action; + + p = &gact->param; + + if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + if (nla_put_u64_64bit(skb, TCA_GATE_BASE_TIME, + p->tcfg_basetime, TCA_GATE_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME, + p->tcfg_cycletime, TCA_GATE_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME_EXT, + p->tcfg_cycletime_ext, TCA_GATE_PAD)) + goto nla_put_failure; + + if (nla_put_s32(skb, TCA_GATE_CLOCKID, p->tcfg_clockid)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_GATE_FLAGS, p->tcfg_flags)) + goto nla_put_failure; + + if (nla_put_s32(skb, TCA_GATE_PRIORITY, p->tcfg_priority)) + goto nla_put_failure; + + entry_list = nla_nest_start_noflag(skb, TCA_GATE_ENTRY_LIST); + if (!entry_list) + goto nla_put_failure; + + list_for_each_entry(entry, &p->entries, list) { + if (dumping_entry(skb, entry) < 0) + goto nla_put_failure; + } + + nla_nest_end(skb, entry_list); + + tcf_tm_dump(&t, &gact->tcf_tm); + if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD)) + goto nla_put_failure; + spin_unlock_bh(&gact->tcf_lock); + + return skb->len; + +nla_put_failure: + spin_unlock_bh(&gact->tcf_lock); + nlmsg_trim(skb, b); + return -1; +} + +static int tcf_gate_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) +{ + struct tc_action_net *tn = net_generic(net, gate_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops, extack); +} + +static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u32 packets, + u64 lastuse, bool hw) +{ + struct tcf_gate *gact = to_gate(a); + struct tcf_t *tm = &gact->tcf_tm; + + tcf_action_update_stats(a, bytes, packets, false, hw); + tm->lastuse = max_t(u64, tm->lastuse, lastuse); +} + +static int tcf_gate_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, gate_net_id); + + return tcf_idr_search(tn, a, index); +} + +static size_t tcf_gate_get_fill_size(const struct tc_action *act) +{ + return nla_total_size(sizeof(struct tc_gate)); +} + +static struct tc_action_ops act_gate_ops = { + .kind = "gate", + .id = TCA_ID_GATE, + .owner = THIS_MODULE, + .act = tcf_gate_act, + .dump = tcf_gate_dump, + .init = tcf_gate_init, + .cleanup = tcf_gate_cleanup, + .walk = tcf_gate_walker, + .stats_update = tcf_gate_stats_update, + .get_fill_size = tcf_gate_get_fill_size, + .lookup = tcf_gate_search, + .size = sizeof(struct tcf_gate), +}; + +static __net_init int gate_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, gate_net_id); + + return tc_action_net_init(net, tn, &act_gate_ops); +} + +static void __net_exit gate_exit_net(struct list_head *net_list) +{ + tc_action_net_exit(net_list, gate_net_id); +} + +static struct pernet_operations gate_net_ops = { + .init = gate_init_net, + .exit_batch = gate_exit_net, + .id = &gate_net_id, + .size = sizeof(struct tc_action_net), +}; + +static int __init gate_init_module(void) +{ + return tcf_register_action(&act_gate_ops, &gate_net_ops); +} + +static void __exit gate_cleanup_module(void) +{ + tcf_unregister_action(&act_gate_ops, &gate_net_ops); +} + +module_init(gate_init_module); +module_exit(gate_cleanup_module); +MODULE_LICENSE("GPL v2"); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 0a7ecc292bd3..299b963c796e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -39,6 +39,7 @@ #include <net/tc_act/tc_skbedit.h> #include <net/tc_act/tc_ct.h> #include <net/tc_act/tc_mpls.h> +#include <net/tc_act/tc_gate.h> #include <net/flow_offload.h> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -735,8 +736,11 @@ static int tcf_block_offload_cmd(struct tcf_block *block, INIT_LIST_HEAD(&bo.cb_list); err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); - if (err < 0) + if (err < 0) { + if (err != -EOPNOTSUPP) + NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed"); return err; + } return tcf_block_setup(block, &bo); } @@ -3523,6 +3527,27 @@ static void tcf_sample_get_group(struct flow_action_entry *entry, #endif } +static void tcf_gate_entry_destructor(void *priv) +{ + struct action_gate_entry *oe = priv; + + kfree(oe); +} + +static int tcf_gate_get_entries(struct flow_action_entry *entry, + const struct tc_action *act) +{ + entry->gate.entries = tcf_gate_get_list(act); + + if (!entry->gate.entries) + return -EINVAL; + + entry->destructor = tcf_gate_entry_destructor; + entry->destructor_priv = entry->gate.entries; + + return 0; +} + static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) { if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY)) @@ -3679,6 +3704,17 @@ int tc_setup_flow_action(struct flow_action *flow_action, } else if (is_tcf_skbedit_priority(act)) { entry->id = FLOW_ACTION_PRIORITY; entry->priority = tcf_skbedit_priority(act); + } else if (is_tcf_gate(act)) { + entry->id = FLOW_ACTION_GATE; + entry->gate.index = tcf_gate_index(act); + entry->gate.prio = tcf_gate_prio(act); + entry->gate.basetime = tcf_gate_basetime(act); + entry->gate.cycletime = tcf_gate_cycletime(act); + entry->gate.cycletimeext = tcf_gate_cycletimeext(act); + entry->gate.num_entries = tcf_gate_num_entries(act); + err = tcf_gate_get_entries(entry, act); + if (err) + goto err_out; } else { err = -EOPNOTSUPP; goto err_out_locked; diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c index eecfe072c508..18755d29fd15 100644 --- a/net/sched/em_ipt.c +++ b/net/sched/em_ipt.c @@ -199,7 +199,7 @@ static void em_ipt_destroy(struct tcf_ematch *em) im->match->destroy(&par); } module_put(im->match->me); - kfree((void *)im); + kfree(im); } static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em, diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 1bcf8fbfd40e..bd618b00d319 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -131,7 +131,6 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx, } struct choke_skb_cb { - u16 classid; u8 keys_valid; struct flow_keys_digest keys; }; @@ -142,11 +141,6 @@ static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data; } -static inline void choke_set_classid(struct sk_buff *skb, u16 classid) -{ - choke_skb_cb(skb)->classid = classid; -} - /* * Compare flow of two packets * Returns true only if source and destination address and port match. diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 4c060134c736..8f06a808c59a 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -66,22 +66,27 @@ static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb) * in linear list (head,tail), otherwise are placed in a rbtree (t_root). */ struct fq_flow { +/* First cache line : used in fq_gc(), fq_enqueue(), fq_dequeue() */ struct rb_root t_root; struct sk_buff *head; /* list of skbs for this flow : first skb */ union { struct sk_buff *tail; /* last skb in the list */ - unsigned long age; /* jiffies when flow was emptied, for gc */ + unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */ }; struct rb_node fq_node; /* anchor in fq_root[] trees */ struct sock *sk; + u32 socket_hash; /* sk_hash */ int qlen; /* number of packets in flow queue */ + +/* Second cache line, used in fq_dequeue() */ int credit; - u32 socket_hash; /* sk_hash */ - struct fq_flow *next; /* next pointer in RR lists, or &detached */ + /* 32bit hole on 64bit arches */ + + struct fq_flow *next; /* next pointer in RR lists */ struct rb_node rate_node; /* anchor in q->delayed tree */ u64 time_next_packet; -}; +} ____cacheline_aligned_in_smp; struct fq_flow_head { struct fq_flow *first; @@ -95,6 +100,7 @@ struct fq_sched_data { struct rb_root delayed; /* for rate limited flows */ u64 time_next_delayed_flow; + u64 ktime_cache; /* copy of last ktime_get_ns() */ unsigned long unthrottle_latency_ns; struct fq_flow internal; /* for non classified or high prio packets */ @@ -104,12 +110,13 @@ struct fq_sched_data { u32 flow_plimit; /* max packets per flow */ unsigned long flow_max_rate; /* optional max rate per flow */ u64 ce_threshold; + u64 horizon; /* horizon in ns */ u32 orphan_mask; /* mask for orphaned skb */ u32 low_rate_threshold; struct rb_root *fq_root; u8 rate_enable; u8 fq_trees_log; - + u8 horizon_drop; u32 flows; u32 inactive_flows; u32 throttled_flows; @@ -118,6 +125,8 @@ struct fq_sched_data { u64 stat_internal_packets; u64 stat_throttled; u64 stat_ce_mark; + u64 stat_horizon_drops; + u64 stat_horizon_caps; u64 stat_flows_plimit; u64 stat_pkts_too_long; u64 stat_allocation_errors; @@ -126,20 +135,25 @@ struct fq_sched_data { struct qdisc_watchdog watchdog; }; -/* special value to mark a detached flow (not on old/new list) */ -static struct fq_flow detached, throttled; - +/* + * f->tail and f->age share the same location. + * We can use the low order bit to differentiate if this location points + * to a sk_buff or contains a jiffies value, if we force this value to be odd. + * This assumes f->tail low order bit must be 0 since alignof(struct sk_buff) >= 2 + */ static void fq_flow_set_detached(struct fq_flow *f) { - f->next = &detached; - f->age = jiffies; + f->age = jiffies | 1UL; } static bool fq_flow_is_detached(const struct fq_flow *f) { - return f->next == &detached; + return !!(f->age & 1UL); } +/* special value to mark a throttled flow (not on old/new list) */ +static struct fq_flow throttled; + static bool fq_flow_is_throttled(const struct fq_flow *f) { return f->next == &throttled; @@ -204,9 +218,10 @@ static void fq_gc(struct fq_sched_data *q, struct rb_root *root, struct sock *sk) { - struct fq_flow *f, *tofree[FQ_GC_MAX]; struct rb_node **p, *parent; - int fcnt = 0; + void *tofree[FQ_GC_MAX]; + struct fq_flow *f; + int i, fcnt = 0; p = &root->rb_node; parent = NULL; @@ -229,15 +244,18 @@ static void fq_gc(struct fq_sched_data *q, p = &parent->rb_left; } + if (!fcnt) + return; + + for (i = fcnt; i > 0; ) { + f = tofree[--i]; + rb_erase(&f->fq_node, root); + } q->flows -= fcnt; q->inactive_flows -= fcnt; q->stat_gc_flows += fcnt; - while (fcnt) { - struct fq_flow *f = tofree[--fcnt]; - rb_erase(&f->fq_node, root); - kmem_cache_free(fq_flow_cachep, f); - } + kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree); } static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) @@ -370,19 +388,17 @@ static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow, } } -/* remove one skb from head of flow queue */ -static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow) +/* Remove one skb from flow queue. + * This skb must be the return value of prior fq_peek(). + */ +static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow, + struct sk_buff *skb) { - struct sk_buff *skb = fq_peek(flow); - - if (skb) { - fq_erase_head(sch, flow, skb); - skb_mark_not_on_list(skb); - flow->qlen--; - qdisc_qstats_backlog_dec(sch, skb); - sch->q.qlen--; - } - return skb; + fq_erase_head(sch, flow, skb); + skb_mark_not_on_list(skb); + flow->qlen--; + qdisc_qstats_backlog_dec(sch, skb); + sch->q.qlen--; } static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) @@ -390,8 +406,6 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) struct rb_node **p, *parent; struct sk_buff *head, *aux; - fq_skb_cb(skb)->time_to_send = skb->tstamp ?: ktime_get_ns(); - head = flow->head; if (!head || fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) { @@ -419,6 +433,12 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) rb_insert_color(&skb->rbnode, &flow->t_root); } +static bool fq_packet_beyond_horizon(const struct sk_buff *skb, + const struct fq_sched_data *q) +{ + return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon)); +} + static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -428,6 +448,28 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch, to_free); + if (!skb->tstamp) { + fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns(); + } else { + /* Check if packet timestamp is too far in the future. + * Try first if our cached value, to avoid ktime_get_ns() + * cost in most cases. + */ + if (fq_packet_beyond_horizon(skb, q)) { + /* Refresh our cache and check another time */ + q->ktime_cache = ktime_get_ns(); + if (fq_packet_beyond_horizon(skb, q)) { + if (q->horizon_drop) { + q->stat_horizon_drops++; + return qdisc_drop(skb, sch, to_free); + } + q->stat_horizon_caps++; + skb->tstamp = q->ktime_cache + q->horizon; + } + } + fq_skb_cb(skb)->time_to_send = skb->tstamp; + } + f = fq_classify(skb, q); if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) { q->stat_flows_plimit++; @@ -494,11 +536,13 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) if (!sch->q.qlen) return NULL; - skb = fq_dequeue_head(sch, &q->internal); - if (skb) + skb = fq_peek(&q->internal); + if (unlikely(skb)) { + fq_dequeue_skb(sch, &q->internal, skb); goto out; + } - now = ktime_get_ns(); + q->ktime_cache = now = ktime_get_ns(); fq_check_throttled(q, now); begin: head = &q->new_flows; @@ -532,14 +576,13 @@ begin: fq_flow_set_throttled(q, f); goto begin; } + prefetch(&skb->end); if ((s64)(now - time_next_packet - q->ce_threshold) > 0) { INET_ECN_set_ce(skb); q->stat_ce_mark++; } - } - - skb = fq_dequeue_head(sch, f); - if (!skb) { + fq_dequeue_skb(sch, f, skb); + } else { head->first = f->next; /* force a pass through old_flows to prevent starvation */ if ((head == &q->new_flows) && q->old_flows.first) { @@ -550,7 +593,6 @@ begin: } goto begin; } - prefetch(&skb->end); plen = qdisc_pkt_len(skb); f->credit -= plen; @@ -753,6 +795,8 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 }, + [TCA_FQ_HORIZON] = { .type = NLA_U32 }, + [TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 }, }; static int fq_change(struct Qdisc *sch, struct nlattr *opt, @@ -842,7 +886,15 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_FQ_TIMER_SLACK]) q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]); + if (tb[TCA_FQ_HORIZON]) + q->horizon = (u64)NSEC_PER_USEC * + nla_get_u32(tb[TCA_FQ_HORIZON]); + + if (tb[TCA_FQ_HORIZON_DROP]) + q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]); + if (!err) { + sch_tree_unlock(sch); err = fq_resize(sch, fq_log); sch_tree_lock(sch); @@ -895,6 +947,9 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */ + q->horizon = 10ULL * NSEC_PER_SEC; /* 10 seconds */ + q->horizon_drop = 1; /* by default, drop packets beyond horizon */ + /* Default ce_threshold of 4294 seconds */ q->ce_threshold = (u64)NSEC_PER_USEC * ~0U; @@ -912,6 +967,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct fq_sched_data *q = qdisc_priv(sch); u64 ce_threshold = q->ce_threshold; + u64 horizon = q->horizon; struct nlattr *opts; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); @@ -921,6 +977,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */ do_div(ce_threshold, NSEC_PER_USEC); + do_div(horizon, NSEC_PER_USEC); if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || @@ -936,7 +993,9 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) q->low_rate_threshold) || nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) || - nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack)) + nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) || + nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) || + nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop)) goto nla_put_failure; return nla_nest_end(skb, opts); @@ -967,6 +1026,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.unthrottle_latency_ns = min_t(unsigned long, q->unthrottle_latency_ns, ~0U); st.ce_mark = q->stat_ce_mark; + st.horizon_drops = q->stat_horizon_drops; + st.horizon_caps = q->stat_horizon_caps; sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2efd5b61acef..ebc55d884247 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -794,6 +794,9 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { }; EXPORT_SYMBOL(pfifo_fast_ops); +static struct lock_class_key qdisc_tx_busylock; +static struct lock_class_key qdisc_running_key; + struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, struct netlink_ext_ack *extack) @@ -846,9 +849,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, } spin_lock_init(&sch->busylock); + lockdep_set_class(&sch->busylock, + dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + /* seqlock has the same scope of busylock, for NOLOCK qdisc */ spin_lock_init(&sch->seqlock); + lockdep_set_class(&sch->busylock, + dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + seqcount_init(&sch->running); + lockdep_set_class(&sch->running, + dev->qdisc_running_key ?: &qdisc_running_key); sch->ops = ops; sch->flags = ops->static_flags; @@ -859,12 +870,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, dev_hold(dev); refcount_set(&sch->refcnt, 1); - if (sch != &noop_qdisc) { - lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key); - lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key); - lockdep_set_class(&sch->running, &dev->qdisc_running_key); - } - return sch; errout1: kfree(p); @@ -1037,10 +1042,9 @@ static void attach_one_default_qdisc(struct net_device *dev, ops = &pfifo_fast_ops; qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL); - if (!qdisc) { - netdev_info(dev, "activation failed\n"); + if (!qdisc) return; - } + if (!netif_is_multiqueue(dev)) qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; dev_queue->qdisc_sleeping = qdisc; @@ -1065,6 +1069,18 @@ static void attach_default_qdiscs(struct net_device *dev) qdisc->ops->attach(qdisc); } } + + /* Detect default qdisc setup/init failed and fallback to "noqueue" */ + if (dev->qdisc == &noop_qdisc) { + netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n", + default_qdisc_ops->id, noqueue_qdisc_ops.id); + dev->priv_flags |= IFF_NO_QUEUE; + netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); + dev->qdisc = txq->qdisc_sleeping; + qdisc_refcount_inc(dev->qdisc); + dev->priv_flags ^= IFF_NO_QUEUE; + } + #ifdef CONFIG_NET_SCHED if (dev->qdisc != &noop_qdisc) qdisc_hash_add(dev->qdisc, false); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index c7de47c942e3..555a1b9e467f 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -48,7 +48,7 @@ struct red_sched_data { struct Qdisc *qdisc; }; -static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS | TC_RED_NODROP; +#define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP) static inline int red_use_ecn(struct red_sched_data *q) { @@ -212,8 +212,7 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, [TCA_RED_MAX_P] = { .type = NLA_U32 }, - [TCA_RED_FLAGS] = { .type = NLA_BITFIELD32, - .validation_data = &red_supported_flags }, + [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS), }; static int red_change(struct Qdisc *sch, struct nlattr *opt, @@ -248,7 +247,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS, - tb[TCA_RED_FLAGS], red_supported_flags, + tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS, &flags_bf, &userbits, extack); if (err) return err; @@ -372,7 +371,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) || nla_put_bitfield32(skb, TCA_RED_FLAGS, - q->flags, red_supported_flags)) + q->flags, TC_RED_SUPPORTED_FLAGS)) goto nla_put_failure; return nla_nest_end(skb, opts); |