diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/act_api.c | 221 | ||||
-rw-r--r-- | net/sched/act_tunnel_key.c | 25 | ||||
-rw-r--r-- | net/sched/act_vlan.c | 2 | ||||
-rw-r--r-- | net/sched/cls_api.c | 337 | ||||
-rw-r--r-- | net/sched/cls_bpf.c | 4 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 170 | ||||
-rw-r--r-- | net/sched/cls_matchall.c | 5 | ||||
-rw-r--r-- | net/sched/cls_u32.c | 10 | ||||
-rw-r--r-- | net/sched/sch_api.c | 99 | ||||
-rw-r--r-- | net/sched/sch_etf.c | 79 | ||||
-rw-r--r-- | net/sched/sch_fq.c | 28 | ||||
-rw-r--r-- | net/sched/sch_gred.c | 375 | ||||
-rw-r--r-- | net/sched/sch_mq.c | 18 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 89 | ||||
-rw-r--r-- | net/sched/sch_prio.c | 47 | ||||
-rw-r--r-- | net/sched/sch_red.c | 48 |
16 files changed, 1074 insertions, 483 deletions
diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9c1b0729aebf..d4b8355737d8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -21,8 +21,6 @@ #include <linux/kmod.h> #include <linux/err.h> #include <linux/module.h> -#include <linux/rhashtable.h> -#include <linux/list.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/sch_generic.h> @@ -1522,227 +1520,8 @@ out_module_put: return skb->len; } -struct tcf_action_net { - struct rhashtable egdev_ht; -}; - -static unsigned int tcf_action_net_id; - -struct tcf_action_egdev_cb { - struct list_head list; - tc_setup_cb_t *cb; - void *cb_priv; -}; - -struct tcf_action_egdev { - struct rhash_head ht_node; - const struct net_device *dev; - unsigned int refcnt; - struct list_head cb_list; -}; - -static const struct rhashtable_params tcf_action_egdev_ht_params = { - .key_offset = offsetof(struct tcf_action_egdev, dev), - .head_offset = offsetof(struct tcf_action_egdev, ht_node), - .key_len = sizeof(const struct net_device *), -}; - -static struct tcf_action_egdev * -tcf_action_egdev_lookup(const struct net_device *dev) -{ - struct net *net = dev_net(dev); - struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); - - return rhashtable_lookup_fast(&tan->egdev_ht, &dev, - tcf_action_egdev_ht_params); -} - -static struct tcf_action_egdev * -tcf_action_egdev_get(const struct net_device *dev) -{ - struct tcf_action_egdev *egdev; - struct tcf_action_net *tan; - - egdev = tcf_action_egdev_lookup(dev); - if (egdev) - goto inc_ref; - - egdev = kzalloc(sizeof(*egdev), GFP_KERNEL); - if (!egdev) - return NULL; - INIT_LIST_HEAD(&egdev->cb_list); - egdev->dev = dev; - tan = net_generic(dev_net(dev), tcf_action_net_id); - rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node, - tcf_action_egdev_ht_params); - -inc_ref: - egdev->refcnt++; - return egdev; -} - -static void tcf_action_egdev_put(struct tcf_action_egdev *egdev) -{ - struct tcf_action_net *tan; - - if (--egdev->refcnt) - return; - tan = net_generic(dev_net(egdev->dev), tcf_action_net_id); - rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node, - tcf_action_egdev_ht_params); - kfree(egdev); -} - -static struct tcf_action_egdev_cb * -tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev, - tc_setup_cb_t *cb, void *cb_priv) -{ - struct tcf_action_egdev_cb *egdev_cb; - - list_for_each_entry(egdev_cb, &egdev->cb_list, list) - if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv) - return egdev_cb; - return NULL; -} - -static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev, - enum tc_setup_type type, - void *type_data, bool err_stop) -{ - struct tcf_action_egdev_cb *egdev_cb; - int ok_count = 0; - int err; - - list_for_each_entry(egdev_cb, &egdev->cb_list, list) { - err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv); - if (err) { - if (err_stop) - return err; - } else { - ok_count++; - } - } - return ok_count; -} - -static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev, - tc_setup_cb_t *cb, void *cb_priv) -{ - struct tcf_action_egdev_cb *egdev_cb; - - egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv); - if (WARN_ON(egdev_cb)) - return -EEXIST; - egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL); - if (!egdev_cb) - return -ENOMEM; - egdev_cb->cb = cb; - egdev_cb->cb_priv = cb_priv; - list_add(&egdev_cb->list, &egdev->cb_list); - return 0; -} - -static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev, - tc_setup_cb_t *cb, void *cb_priv) -{ - struct tcf_action_egdev_cb *egdev_cb; - - egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv); - if (WARN_ON(!egdev_cb)) - return; - list_del(&egdev_cb->list); - kfree(egdev_cb); -} - -static int __tc_setup_cb_egdev_register(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ - struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev); - int err; - - if (!egdev) - return -ENOMEM; - err = tcf_action_egdev_cb_add(egdev, cb, cb_priv); - if (err) - goto err_cb_add; - return 0; - -err_cb_add: - tcf_action_egdev_put(egdev); - return err; -} -int tc_setup_cb_egdev_register(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ - int err; - - rtnl_lock(); - err = __tc_setup_cb_egdev_register(dev, cb, cb_priv); - rtnl_unlock(); - return err; -} -EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register); - -static void __tc_setup_cb_egdev_unregister(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ - struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev); - - if (WARN_ON(!egdev)) - return; - tcf_action_egdev_cb_del(egdev, cb, cb_priv); - tcf_action_egdev_put(egdev); -} -void tc_setup_cb_egdev_unregister(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ - rtnl_lock(); - __tc_setup_cb_egdev_unregister(dev, cb, cb_priv); - rtnl_unlock(); -} -EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister); - -int tc_setup_cb_egdev_call(const struct net_device *dev, - enum tc_setup_type type, void *type_data, - bool err_stop) -{ - struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev); - - if (!egdev) - return 0; - return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop); -} -EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call); - -static __net_init int tcf_action_net_init(struct net *net) -{ - struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); - - return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params); -} - -static void __net_exit tcf_action_net_exit(struct net *net) -{ - struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); - - rhashtable_destroy(&tan->egdev_ht); -} - -static struct pernet_operations tcf_action_net_ops = { - .init = tcf_action_net_init, - .exit = tcf_action_net_exit, - .id = &tcf_action_net_id, - .size = sizeof(struct tcf_action_net), -}; - static int __init tc_action_init(void) { - int err; - - err = register_pernet_subsys(&tcf_action_net_ops); - if (err) - return err; - rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 4cca8f274662..c3b90fadaff6 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -210,9 +210,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, struct tcf_tunnel_key *t; bool exists = false; __be16 dst_port = 0; + __be64 key_id = 0; int opts_len = 0; - __be64 key_id; - __be16 flags; + __be16 flags = 0; u8 tos, ttl; int ret = 0; int err; @@ -246,15 +246,15 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, case TCA_TUNNEL_KEY_ACT_RELEASE: break; case TCA_TUNNEL_KEY_ACT_SET: - if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) { - NL_SET_ERR_MSG(extack, "Missing tunnel key id"); - ret = -EINVAL; - goto err_out; - } + if (tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) { + __be32 key32; - key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); + key32 = nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]); + key_id = key32_to_tunnel_id(key32); + flags = TUNNEL_KEY; + } - flags = TUNNEL_KEY | TUNNEL_CSUM; + flags |= TUNNEL_CSUM; if (tb[TCA_TUNNEL_KEY_NO_CSUM] && nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM])) flags &= ~TUNNEL_CSUM; @@ -508,10 +508,13 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, struct ip_tunnel_key *key = &info->key; __be32 key_id = tunnel_id_to_key32(key->tun_id); - if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) || + if (((key->tun_flags & TUNNEL_KEY) && + nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id)) || tunnel_key_dump_addresses(skb, ¶ms->tcft_enc_metadata->u.tun_info) || - nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst) || + (key->tp_dst && + nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, + key->tp_dst)) || nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM, !(key->tun_flags & TUNNEL_CSUM)) || tunnel_key_opts_dump(skb, info)) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index ba677d54a7af..93fdaf707313 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -63,7 +63,7 @@ static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a, /* extract existing tag (and guarantee no hw-accel tag) */ if (skb_vlan_tag_present(skb)) { tci = skb_vlan_tag_get(skb); - skb->vlan_tci = 0; + __vlan_hwaccel_clear_tag(skb); } else { /* in-payload vlan tag, pop it */ err = __skb_vlan_pop(skb, &tci); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index f427a1e00e7e..8ce2a0507970 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -25,6 +25,7 @@ #include <linux/kmod.h> #include <linux/slab.h> #include <linux/idr.h> +#include <linux/rhashtable.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> @@ -365,6 +366,245 @@ static void tcf_chain_flush(struct tcf_chain *chain) } } +static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) +{ + const struct Qdisc_class_ops *cops; + struct Qdisc *qdisc; + + if (!dev_ingress_queue(dev)) + return NULL; + + qdisc = dev_ingress_queue(dev)->qdisc_sleeping; + if (!qdisc) + return NULL; + + cops = qdisc->ops->cl_ops; + if (!cops) + return NULL; + + if (!cops->tcf_block) + return NULL; + + return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); +} + +static struct rhashtable indr_setup_block_ht; + +struct tc_indr_block_dev { + struct rhash_head ht_node; + struct net_device *dev; + unsigned int refcnt; + struct list_head cb_list; + struct tcf_block *block; +}; + +struct tc_indr_block_cb { + struct list_head list; + void *cb_priv; + tc_indr_block_bind_cb_t *cb; + void *cb_ident; +}; + +static const struct rhashtable_params tc_indr_setup_block_ht_params = { + .key_offset = offsetof(struct tc_indr_block_dev, dev), + .head_offset = offsetof(struct tc_indr_block_dev, ht_node), + .key_len = sizeof(struct net_device *), +}; + +static struct tc_indr_block_dev * +tc_indr_block_dev_lookup(struct net_device *dev) +{ + return rhashtable_lookup_fast(&indr_setup_block_ht, &dev, + tc_indr_setup_block_ht_params); +} + +static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev) +{ + struct tc_indr_block_dev *indr_dev; + + indr_dev = tc_indr_block_dev_lookup(dev); + if (indr_dev) + goto inc_ref; + + indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL); + if (!indr_dev) + return NULL; + + INIT_LIST_HEAD(&indr_dev->cb_list); + indr_dev->dev = dev; + indr_dev->block = tc_dev_ingress_block(dev); + if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node, + tc_indr_setup_block_ht_params)) { + kfree(indr_dev); + return NULL; + } + +inc_ref: + indr_dev->refcnt++; + return indr_dev; +} + +static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev) +{ + if (--indr_dev->refcnt) + return; + + rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node, + tc_indr_setup_block_ht_params); + kfree(indr_dev); +} + +static struct tc_indr_block_cb * +tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ + struct tc_indr_block_cb *indr_block_cb; + + list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) + if (indr_block_cb->cb == cb && + indr_block_cb->cb_ident == cb_ident) + return indr_block_cb; + return NULL; +} + +static struct tc_indr_block_cb * +tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ + struct tc_indr_block_cb *indr_block_cb; + + indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); + if (indr_block_cb) + return ERR_PTR(-EEXIST); + + indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL); + if (!indr_block_cb) + return ERR_PTR(-ENOMEM); + + indr_block_cb->cb_priv = cb_priv; + indr_block_cb->cb = cb; + indr_block_cb->cb_ident = cb_ident; + list_add(&indr_block_cb->list, &indr_dev->cb_list); + + return indr_block_cb; +} + +static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb) +{ + list_del(&indr_block_cb->list); + kfree(indr_block_cb); +} + +static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev, + struct tc_indr_block_cb *indr_block_cb, + enum tc_block_command command) +{ + struct tc_block_offload bo = { + .command = command, + .binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS, + .block = indr_dev->block, + }; + + if (!indr_dev->block) + return; + + indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, + &bo); +} + +int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ + struct tc_indr_block_cb *indr_block_cb; + struct tc_indr_block_dev *indr_dev; + int err; + + indr_dev = tc_indr_block_dev_get(dev); + if (!indr_dev) + return -ENOMEM; + + indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident); + err = PTR_ERR_OR_ZERO(indr_block_cb); + if (err) + goto err_dev_put; + + tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_BIND); + return 0; + +err_dev_put: + tc_indr_block_dev_put(indr_dev); + return err; +} +EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register); + +int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ + int err; + + rtnl_lock(); + err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident); + rtnl_unlock(); + + return err; +} +EXPORT_SYMBOL_GPL(tc_indr_block_cb_register); + +void __tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ + struct tc_indr_block_cb *indr_block_cb; + struct tc_indr_block_dev *indr_dev; + + indr_dev = tc_indr_block_dev_lookup(dev); + if (!indr_dev) + return; + + indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); + if (!indr_block_cb) + return; + + /* Send unbind message if required to free any block cbs. */ + tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_UNBIND); + tc_indr_block_cb_del(indr_block_cb); + tc_indr_block_dev_put(indr_dev); +} +EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister); + +void tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ + rtnl_lock(); + __tc_indr_block_cb_unregister(dev, cb, cb_ident); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister); + +static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, + struct tcf_block_ext_info *ei, + enum tc_block_command command, + struct netlink_ext_ack *extack) +{ + struct tc_indr_block_cb *indr_block_cb; + struct tc_indr_block_dev *indr_dev; + struct tc_block_offload bo = { + .command = command, + .binder_type = ei->binder_type, + .block = block, + .extack = extack, + }; + + indr_dev = tc_indr_block_dev_lookup(dev); + if (!indr_dev) + return; + + indr_dev->block = command == TC_BLOCK_BIND ? block : NULL; + + list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) + indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, + &bo); +} + static bool tcf_block_offload_in_use(struct tcf_block *block) { return block->offloadcnt; @@ -406,12 +646,17 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack); if (err == -EOPNOTSUPP) goto no_offload_dev_inc; - return err; + if (err) + return err; + + tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack); + return 0; no_offload_dev_inc: if (tcf_block_offload_in_use(block)) return -EOPNOTSUPP; block->nooffloaddevcnt++; + tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack); return 0; } @@ -421,6 +666,8 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + tc_indr_block_call(block, dev, ei, TC_BLOCK_UNBIND, NULL); + if (!dev->netdev_ops->ndo_setup_tc) goto no_offload_dev_dec; err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL); @@ -1023,29 +1270,6 @@ void tcf_block_cb_unregister(struct tcf_block *block, } EXPORT_SYMBOL(tcf_block_cb_unregister); -static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type, - void *type_data, bool err_stop) -{ - struct tcf_block_cb *block_cb; - int ok_count = 0; - int err; - - /* Make sure all netdevs sharing this block are offload-capable. */ - if (block->nooffloaddevcnt && err_stop) - return -EOPNOTSUPP; - - list_for_each_entry(block_cb, &block->cb_list, list) { - err = block_cb->cb(type, type_data, block_cb->cb_priv); - if (err) { - if (err_stop) - return err; - } else { - ok_count++; - } - } - return ok_count; -} - /* Main classifier routine: scans classifier chain attached * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. @@ -2268,54 +2492,26 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_dump_stats); -static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts, - enum tc_setup_type type, - void *type_data, bool err_stop) +int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop) { + struct tcf_block_cb *block_cb; int ok_count = 0; -#ifdef CONFIG_NET_CLS_ACT - const struct tc_action *a; - struct net_device *dev; - int i, ret; + int err; - if (!tcf_exts_has_actions(exts)) - return 0; + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) + return -EOPNOTSUPP; - for (i = 0; i < exts->nr_actions; i++) { - a = exts->actions[i]; - if (!a->ops->get_dev) - continue; - dev = a->ops->get_dev(a); - if (!dev) - continue; - ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop); - a->ops->put_dev(dev); - if (ret < 0) - return ret; - ok_count += ret; + list_for_each_entry(block_cb, &block->cb_list, list) { + err = block_cb->cb(type, type_data, block_cb->cb_priv); + if (err) { + if (err_stop) + return err; + } else { + ok_count++; + } } -#endif - return ok_count; -} - -int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, - enum tc_setup_type type, void *type_data, bool err_stop) -{ - int ok_count; - int ret; - - ret = tcf_block_cb_call(block, type, type_data, err_stop); - if (ret < 0) - return ret; - ok_count = ret; - - if (!exts || ok_count) - return ok_count; - ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop); - if (ret < 0) - return ret; - ok_count += ret; - return ok_count; } EXPORT_SYMBOL(tc_setup_cb_call); @@ -2355,6 +2551,11 @@ static int __init tc_filter_init(void) if (err) goto err_register_pernet_subsys; + err = rhashtable_init(&indr_setup_block_ht, + &tc_indr_setup_block_ht_params); + if (err) + goto err_rhash_setup_block_ht; + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, @@ -2366,6 +2567,8 @@ static int __init tc_filter_init(void) return 0; +err_rhash_setup_block_ht: + unregister_pernet_subsys(&tcf_net_ops); err_register_pernet_subsys: destroy_workqueue(tc_filter_wq); return err; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index fa6fe2fe0f32..a95cb240a606 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -169,7 +169,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, if (oldprog) tcf_block_offload_dec(block, &oldprog->gen_flags); - err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); + err = tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); if (prog) { if (err < 0) { cls_bpf_offload_cmd(tp, oldprog, prog, extack); @@ -234,7 +234,7 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp, cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false); + tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false); } static int cls_bpf_init(struct tcf_proto *tp) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 208d940464d7..dad04e710493 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -55,6 +55,8 @@ struct fl_flow_key { struct flow_dissector_key_ip ip; struct flow_dissector_key_ip enc_ip; struct flow_dissector_key_enc_opts enc_opts; + struct flow_dissector_key_ports tp_min; + struct flow_dissector_key_ports tp_max; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -65,6 +67,7 @@ struct fl_flow_mask_range { struct fl_flow_mask { struct fl_flow_key key; struct fl_flow_mask_range range; + u32 flags; struct rhash_head ht_node; struct rhashtable ht; struct rhashtable_params filter_ht_params; @@ -179,13 +182,89 @@ static void fl_clear_masked_range(struct fl_flow_key *key, memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask)); } -static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask, - struct fl_flow_key *mkey) +static bool fl_range_port_dst_cmp(struct cls_fl_filter *filter, + struct fl_flow_key *key, + struct fl_flow_key *mkey) +{ + __be16 min_mask, max_mask, min_val, max_val; + + min_mask = htons(filter->mask->key.tp_min.dst); + max_mask = htons(filter->mask->key.tp_max.dst); + min_val = htons(filter->key.tp_min.dst); + max_val = htons(filter->key.tp_max.dst); + + if (min_mask && max_mask) { + if (htons(key->tp.dst) < min_val || + htons(key->tp.dst) > max_val) + return false; + + /* skb does not have min and max values */ + mkey->tp_min.dst = filter->mkey.tp_min.dst; + mkey->tp_max.dst = filter->mkey.tp_max.dst; + } + return true; +} + +static bool fl_range_port_src_cmp(struct cls_fl_filter *filter, + struct fl_flow_key *key, + struct fl_flow_key *mkey) +{ + __be16 min_mask, max_mask, min_val, max_val; + + min_mask = htons(filter->mask->key.tp_min.src); + max_mask = htons(filter->mask->key.tp_max.src); + min_val = htons(filter->key.tp_min.src); + max_val = htons(filter->key.tp_max.src); + + if (min_mask && max_mask) { + if (htons(key->tp.src) < min_val || + htons(key->tp.src) > max_val) + return false; + + /* skb does not have min and max values */ + mkey->tp_min.src = filter->mkey.tp_min.src; + mkey->tp_max.src = filter->mkey.tp_max.src; + } + return true; +} + +static struct cls_fl_filter *__fl_lookup(struct fl_flow_mask *mask, + struct fl_flow_key *mkey) { return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask), mask->filter_ht_params); } +static struct cls_fl_filter *fl_lookup_range(struct fl_flow_mask *mask, + struct fl_flow_key *mkey, + struct fl_flow_key *key) +{ + struct cls_fl_filter *filter, *f; + + list_for_each_entry_rcu(filter, &mask->filters, list) { + if (!fl_range_port_dst_cmp(filter, key, mkey)) + continue; + + if (!fl_range_port_src_cmp(filter, key, mkey)) + continue; + + f = __fl_lookup(mask, mkey); + if (f) + return f; + } + return NULL; +} + +static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask, + struct fl_flow_key *mkey, + struct fl_flow_key *key) +{ + if ((mask->flags & TCA_FLOWER_MASK_FLAGS_RANGE)) + return fl_lookup_range(mask, mkey, key); + + return __fl_lookup(mask, mkey); +} + static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { @@ -208,7 +287,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, fl_set_masked_key(&skb_mkey, &skb_key, mask); - f = fl_lookup(mask, &skb_mkey); + f = fl_lookup(mask, &skb_mkey, &skb_key); if (f && !tc_skip_sw(f->flags)) { *res = f->res; return tcf_exts_exec(skb, &f->exts, res); @@ -289,8 +368,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, cls_flower.command = TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long) f; - tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, - &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); tcf_block_offload_dec(block, &f->flags); } @@ -312,8 +390,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.exts = &f->exts; cls_flower.classid = f->res.classid; - err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, - &cls_flower, skip_sw); + err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); if (err < 0) { fl_hw_destroy_filter(tp, f, NULL); return err; @@ -339,8 +416,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) cls_flower.exts = &f->exts; cls_flower.classid = f->res.classid; - tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, - &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); } static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, @@ -514,6 +590,31 @@ static void fl_set_key_val(struct nlattr **tb, memcpy(mask, nla_data(tb[mask_type]), len); } +static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key, + struct fl_flow_key *mask) +{ + fl_set_key_val(tb, &key->tp_min.dst, + TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_min.dst, + TCA_FLOWER_UNSPEC, sizeof(key->tp_min.dst)); + fl_set_key_val(tb, &key->tp_max.dst, + TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_max.dst, + TCA_FLOWER_UNSPEC, sizeof(key->tp_max.dst)); + fl_set_key_val(tb, &key->tp_min.src, + TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_min.src, + TCA_FLOWER_UNSPEC, sizeof(key->tp_min.src)); + fl_set_key_val(tb, &key->tp_max.src, + TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_max.src, + TCA_FLOWER_UNSPEC, sizeof(key->tp_max.src)); + + if ((mask->tp_min.dst && mask->tp_max.dst && + htons(key->tp_max.dst) <= htons(key->tp_min.dst)) || + (mask->tp_min.src && mask->tp_max.src && + htons(key->tp_max.src) <= htons(key->tp_min.src))) + return -EINVAL; + + return 0; +} + static int fl_set_key_mpls(struct nlattr **tb, struct flow_dissector_key_mpls *key_val, struct flow_dissector_key_mpls *key_mask) @@ -921,6 +1022,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb, sizeof(key->arp.tha)); } + if (key->basic.ip_proto == IPPROTO_TCP || + key->basic.ip_proto == IPPROTO_UDP || + key->basic.ip_proto == IPPROTO_SCTP) { + ret = fl_set_key_port_range(tb, key, mask); + if (ret) + return ret; + } + if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) { key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; @@ -1038,8 +1147,9 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); - FL_KEY_SET_IF_MASKED(mask, keys, cnt, - FLOW_DISSECTOR_KEY_PORTS, tp); + if (FL_KEY_IS_MASKED(mask, tp) || + FL_KEY_IS_MASKED(mask, tp_min) || FL_KEY_IS_MASKED(mask, tp_max)) + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_IP, ip); FL_KEY_SET_IF_MASKED(mask, keys, cnt, @@ -1086,6 +1196,10 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head, fl_mask_copy(newmask, mask); + if ((newmask->key.tp_min.dst && newmask->key.tp_max.dst) || + (newmask->key.tp_min.src && newmask->key.tp_max.src)) + newmask->flags |= TCA_FLOWER_MASK_FLAGS_RANGE; + err = fl_init_mask_hashtable(newmask); if (err) goto errout_free; @@ -1238,7 +1352,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (err) goto errout_idr; - if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) { + if (!fold && __fl_lookup(fnew->mask, &fnew->mkey)) { err = -EEXIST; goto errout_mask; } @@ -1384,8 +1498,7 @@ static void fl_hw_create_tmplt(struct tcf_chain *chain, /* We don't care if driver (any of them) fails to handle this * call. It serves just as a hint for it. */ - tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER, - &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); } static void fl_hw_destroy_tmplt(struct tcf_chain *chain, @@ -1398,8 +1511,7 @@ static void fl_hw_destroy_tmplt(struct tcf_chain *chain, cls_flower.command = TC_CLSFLOWER_TMPLT_DESTROY; cls_flower.cookie = (unsigned long) tmplt; - tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER, - &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); } static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, @@ -1472,6 +1584,26 @@ static int fl_dump_key_val(struct sk_buff *skb, return 0; } +static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key, + struct fl_flow_key *mask) +{ + if (fl_dump_key_val(skb, &key->tp_min.dst, TCA_FLOWER_KEY_PORT_DST_MIN, + &mask->tp_min.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp_min.dst)) || + fl_dump_key_val(skb, &key->tp_max.dst, TCA_FLOWER_KEY_PORT_DST_MAX, + &mask->tp_max.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp_max.dst)) || + fl_dump_key_val(skb, &key->tp_min.src, TCA_FLOWER_KEY_PORT_SRC_MIN, + &mask->tp_min.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp_min.src)) || + fl_dump_key_val(skb, &key->tp_max.src, TCA_FLOWER_KEY_PORT_SRC_MAX, + &mask->tp_max.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp_max.src))) + return -1; + + return 0; +} + static int fl_dump_key_mpls(struct sk_buff *skb, struct flow_dissector_key_mpls *mpls_key, struct flow_dissector_key_mpls *mpls_mask) @@ -1808,6 +1940,12 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, sizeof(key->arp.tha)))) goto nla_put_failure; + if ((key->basic.ip_proto == IPPROTO_TCP || + key->basic.ip_proto == IPPROTO_UDP || + key->basic.ip_proto == IPPROTO_SCTP) && + fl_dump_key_port_range(skb, key, mask)) + goto nla_put_failure; + if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && (fl_dump_key_val(skb, &key->enc_ipv4.src, TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src, diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 856fa79d4ffd..0e408ee9dcec 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -71,7 +71,7 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_DESTROY; cls_mall.cookie = cookie; - tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false); + tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); tcf_block_offload_dec(block, &head->flags); } @@ -90,8 +90,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.exts = &head->exts; cls_mall.cookie = cookie; - err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, - &cls_mall, skip_sw); + err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw); if (err < 0) { mall_destroy_hw_filter(tp, head, cookie, NULL); return err; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4b28fd44576d..dcea21004604 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -491,7 +491,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); + tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false); } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, @@ -509,7 +509,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); + err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw); if (err < 0) { u32_clear_hw_hnode(tp, h, NULL); return err; @@ -533,7 +533,7 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.command = TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = n->handle; - tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); + tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false); tcf_block_offload_dec(block, &n->flags); } @@ -558,11 +558,12 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.knode.mask = 0; #endif cls_u32.knode.sel = &n->sel; + cls_u32.knode.res = &n->res; cls_u32.knode.exts = &n->exts; if (n->ht_down) cls_u32.knode.link_handle = ht->handle; - err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); + err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw); if (err < 0) { u32_remove_hw_knode(tp, n, NULL); return err; @@ -1206,6 +1207,7 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.knode.mask = 0; #endif cls_u32.knode.sel = &n->sel; + cls_u32.knode.res = &n->res; cls_u32.knode.exts = &n->exts; if (n->ht_down) cls_u32.knode.link_handle = ht->handle; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 016e628c6ac9..7e4d1ccf4c87 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -335,7 +335,6 @@ out: static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; - struct Qdisc *leaf; const struct Qdisc_class_ops *cops = p->ops->cl_ops; if (cops == NULL) @@ -344,8 +343,7 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) if (cl == 0) return NULL; - leaf = cops->leaf(p, cl); - return leaf; + return cops->leaf(p, cl); } /* Find queueing discipline by name */ @@ -810,6 +808,71 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, } EXPORT_SYMBOL(qdisc_tree_reduce_backlog); +int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type, + void *type_data) +{ + struct net_device *dev = qdisc_dev(sch); + int err; + + sch->flags &= ~TCQ_F_OFFLOADED; + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return 0; + + err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); + if (err == -EOPNOTSUPP) + return 0; + + if (!err) + sch->flags |= TCQ_F_OFFLOADED; + + return err; +} +EXPORT_SYMBOL(qdisc_offload_dump_helper); + +void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, + struct Qdisc *new, struct Qdisc *old, + enum tc_setup_type type, void *type_data, + struct netlink_ext_ack *extack) +{ + bool any_qdisc_is_offloaded; + int err; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return; + + err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); + + /* Don't report error if the graft is part of destroy operation. */ + if (!err || !new || new == &noop_qdisc) + return; + + /* Don't report error if the parent, the old child and the new + * one are not offloaded. + */ + any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED; + any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED; + any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED; + + if (any_qdisc_is_offloaded) + NL_SET_ERR_MSG(extack, "Offloading graft operation failed."); +} +EXPORT_SYMBOL(qdisc_offload_graft_helper); + +static void qdisc_offload_graft_root(struct net_device *dev, + struct Qdisc *new, struct Qdisc *old, + struct netlink_ext_ack *extack) +{ + struct tc_root_qopt_offload graft_offload = { + .command = TC_ROOT_GRAFT, + .handle = new ? new->handle : 0, + .ingress = (new && new->flags & TCQ_F_INGRESS) || + (old && old->flags & TCQ_F_INGRESS), + }; + + qdisc_offload_graft_helper(dev, NULL, new, old, + TC_SETUP_ROOT_QDISC, &graft_offload, extack); +} + static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, u32 portid, u32 seq, u16 flags, int event) { @@ -957,7 +1020,6 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, { struct Qdisc *q = old; struct net *net = dev_net(dev); - int err = 0; if (parent == NULL) { unsigned int i, num_q, ingress; @@ -977,6 +1039,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (dev->flags & IFF_UP) dev_deactivate(dev); + qdisc_offload_graft_root(dev, new, old, extack); + if (new && new->ops->attach) goto skip; @@ -1012,28 +1076,29 @@ skip: dev_activate(dev); } else { const struct Qdisc_class_ops *cops = parent->ops->cl_ops; + unsigned long cl; + int err; /* Only support running class lockless if parent is lockless */ if (new && (new->flags & TCQ_F_NOLOCK) && parent && !(parent->flags & TCQ_F_NOLOCK)) new->flags &= ~TCQ_F_NOLOCK; - err = -EOPNOTSUPP; - if (cops && cops->graft) { - unsigned long cl = cops->find(parent, classid); + if (!cops || !cops->graft) + return -EOPNOTSUPP; - if (cl) { - err = cops->graft(parent, cl, new, &old, - extack); - } else { - NL_SET_ERR_MSG(extack, "Specified class not found"); - err = -ENOENT; - } + cl = cops->find(parent, classid); + if (!cl) { + NL_SET_ERR_MSG(extack, "Specified class not found"); + return -ENOENT; } - if (!err) - notify_and_destroy(net, skb, n, classid, old, new); + + err = cops->graft(parent, cl, new, &old, extack); + if (err) + return err; + notify_and_destroy(net, skb, n, classid, old, new); } - return err; + return 0; } static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca, diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index 1538d6fa8165..1150f22983df 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -30,7 +30,7 @@ struct etf_sched_data { int queue; s32 delta; /* in ns */ ktime_t last; /* The txtime of the last skb sent to the netdevice. */ - struct rb_root head; + struct rb_root_cached head; struct qdisc_watchdog watchdog; ktime_t (*get_time)(void); }; @@ -104,7 +104,7 @@ static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch) struct etf_sched_data *q = qdisc_priv(sch); struct rb_node *p; - p = rb_first(&q->head); + p = rb_first_cached(&q->head); if (!p) return NULL; @@ -117,8 +117,10 @@ static void reset_watchdog(struct Qdisc *sch) struct sk_buff *skb = etf_peek_timesortedlist(sch); ktime_t next; - if (!skb) + if (!skb) { + qdisc_watchdog_cancel(&q->watchdog); return; + } next = ktime_sub_ns(skb->tstamp, q->delta); qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next)); @@ -154,8 +156,9 @@ static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, struct sk_buff **to_free) { struct etf_sched_data *q = qdisc_priv(sch); - struct rb_node **p = &q->head.rb_node, *parent = NULL; + struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL; ktime_t txtime = nskb->tstamp; + bool leftmost = true; if (!is_packet_valid(sch, nskb)) { report_sock_error(nskb, EINVAL, @@ -168,13 +171,15 @@ static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, parent = *p; skb = rb_to_skb(parent); - if (ktime_after(txtime, skb->tstamp)) + if (ktime_after(txtime, skb->tstamp)) { p = &parent->rb_right; - else + leftmost = false; + } else { p = &parent->rb_left; + } } rb_link_node(&nskb->rbnode, parent, p); - rb_insert_color(&nskb->rbnode, &q->head); + rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost); qdisc_qstats_backlog_inc(sch, nskb); sch->q.qlen++; @@ -185,12 +190,42 @@ static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, return NET_XMIT_SUCCESS; } -static void timesortedlist_erase(struct Qdisc *sch, struct sk_buff *skb, - bool drop) +static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb, + ktime_t now) +{ + struct etf_sched_data *q = qdisc_priv(sch); + struct sk_buff *to_free = NULL; + struct sk_buff *tmp = NULL; + + skb_rbtree_walk_from_safe(skb, tmp) { + if (ktime_after(skb->tstamp, now)) + break; + + rb_erase_cached(&skb->rbnode, &q->head); + + /* The rbnode field in the skb re-uses these fields, now that + * we are done with the rbnode, reset them. + */ + skb->next = NULL; + skb->prev = NULL; + skb->dev = qdisc_dev(sch); + + report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED); + + qdisc_qstats_backlog_dec(sch, skb); + qdisc_drop(skb, sch, &to_free); + qdisc_qstats_overlimit(sch); + sch->q.qlen--; + } + + kfree_skb_list(to_free); +} + +static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb) { struct etf_sched_data *q = qdisc_priv(sch); - rb_erase(&skb->rbnode, &q->head); + rb_erase_cached(&skb->rbnode, &q->head); /* The rbnode field in the skb re-uses these fields, now that * we are done with the rbnode, reset them. @@ -201,19 +236,9 @@ static void timesortedlist_erase(struct Qdisc *sch, struct sk_buff *skb, qdisc_qstats_backlog_dec(sch, skb); - if (drop) { - struct sk_buff *to_free = NULL; + qdisc_bstats_update(sch, skb); - report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED); - - qdisc_drop(skb, sch, &to_free); - kfree_skb_list(to_free); - qdisc_qstats_overlimit(sch); - } else { - qdisc_bstats_update(sch, skb); - - q->last = skb->tstamp; - } + q->last = skb->tstamp; sch->q.qlen--; } @@ -232,7 +257,7 @@ static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch) /* Drop if packet has expired while in queue. */ if (ktime_before(skb->tstamp, now)) { - timesortedlist_erase(sch, skb, true); + timesortedlist_drop(sch, skb, now); skb = NULL; goto out; } @@ -241,7 +266,7 @@ static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch) * txtime from deadline to (now + delta). */ if (q->deadline_mode) { - timesortedlist_erase(sch, skb, false); + timesortedlist_remove(sch, skb); skb->tstamp = now; goto out; } @@ -250,7 +275,7 @@ static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch) /* Dequeue only if now is within the [txtime - delta, txtime] range. */ if (ktime_after(now, next)) - timesortedlist_erase(sch, skb, false); + timesortedlist_remove(sch, skb); else skb = NULL; @@ -386,14 +411,14 @@ static int etf_init(struct Qdisc *sch, struct nlattr *opt, static void timesortedlist_clear(struct Qdisc *sch) { struct etf_sched_data *q = qdisc_priv(sch); - struct rb_node *p = rb_first(&q->head); + struct rb_node *p = rb_first_cached(&q->head); while (p) { struct sk_buff *skb = rb_to_skb(p); p = rb_next(p); - rb_erase(&skb->rbnode, &q->head); + rb_erase_cached(&skb->rbnode, &q->head); rtnl_kfree_skbs(skb, skb); sch->q.qlen--; } diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 25a7cf6d380f..1a662f2bb7bb 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -94,6 +94,7 @@ struct fq_sched_data { u32 flow_refill_delay; u32 flow_plimit; /* max packets per flow */ unsigned long flow_max_rate; /* optional max rate per flow */ + u64 ce_threshold; u32 orphan_mask; /* mask for orphaned skb */ u32 low_rate_threshold; struct rb_root *fq_root; @@ -107,6 +108,7 @@ struct fq_sched_data { u64 stat_gc_flows; u64 stat_internal_packets; u64 stat_throttled; + u64 stat_ce_mark; u64 stat_flows_plimit; u64 stat_pkts_too_long; u64 stat_allocation_errors; @@ -412,16 +414,21 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now) static struct sk_buff *fq_dequeue(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); - u64 now = ktime_get_ns(); struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; unsigned long rate; u32 plen; + u64 now; + + if (!sch->q.qlen) + return NULL; skb = fq_dequeue_head(sch, &q->internal); if (skb) goto out; + + now = ktime_get_ns(); fq_check_throttled(q, now); begin: head = &q->new_flows; @@ -454,6 +461,11 @@ begin: fq_flow_set_throttled(q, f); goto begin; } + if (time_next_packet && + (s64)(now - time_next_packet - q->ce_threshold) > 0) { + INET_ECN_set_ce(skb); + q->stat_ce_mark++; + } } skb = fq_dequeue_head(sch, f); @@ -657,6 +669,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, + [TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 }, }; static int fq_change(struct Qdisc *sch, struct nlattr *opt, @@ -736,6 +749,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_FQ_ORPHAN_MASK]) q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]); + if (tb[TCA_FQ_CE_THRESHOLD]) + q->ce_threshold = (u64)NSEC_PER_USEC * + nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]); + if (!err) { sch_tree_unlock(sch); err = fq_resize(sch, fq_log); @@ -786,6 +803,10 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, q->fq_trees_log = ilog2(1024); q->orphan_mask = 1024 - 1; q->low_rate_threshold = 550000 / 8; + + /* Default ce_threshold of 4294 seconds */ + q->ce_threshold = (u64)NSEC_PER_USEC * ~0U; + qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC); if (opt) @@ -799,6 +820,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct fq_sched_data *q = qdisc_priv(sch); + u64 ce_threshold = q->ce_threshold; struct nlattr *opts; opts = nla_nest_start(skb, TCA_OPTIONS); @@ -807,6 +829,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */ + do_div(ce_threshold, NSEC_PER_USEC); + if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) || @@ -819,6 +843,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) || nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD, q->low_rate_threshold) || + nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; @@ -848,6 +873,7 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.throttled_flows = q->throttled_flows; st.unthrottle_latency_ns = min_t(unsigned long, q->unthrottle_latency_ns, ~0U); + st.ce_mark = q->stat_ce_mark; sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 4a042abf844c..234afbf9115b 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -23,19 +23,23 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/skbuff.h> +#include <net/pkt_cls.h> #include <net/pkt_sched.h> #include <net/red.h> #define GRED_DEF_PRIO (MAX_DPs / 2) #define GRED_VQ_MASK (MAX_DPs - 1) +#define GRED_VQ_RED_FLAGS (TC_RED_ECN | TC_RED_HARDDROP) + struct gred_sched_data; struct gred_sched; struct gred_sched_data { u32 limit; /* HARD maximal queue length */ u32 DP; /* the drop parameters */ - u32 bytesin; /* bytes seen on virtualQ so far*/ + u32 red_flags; /* virtualQ version of red_flags */ + u64 bytesin; /* bytes seen on virtualQ so far*/ u32 packetsin; /* packets seen on virtualQ so far*/ u32 backlog; /* bytes on the virtualQ */ u8 prio; /* the prio of this vq */ @@ -139,14 +143,27 @@ static inline void gred_store_wred_set(struct gred_sched *table, table->wred_set.qidlestart = q->vars.qidlestart; } -static inline int gred_use_ecn(struct gred_sched *t) +static int gred_use_ecn(struct gred_sched_data *q) +{ + return q->red_flags & TC_RED_ECN; +} + +static int gred_use_harddrop(struct gred_sched_data *q) { - return t->red_flags & TC_RED_ECN; + return q->red_flags & TC_RED_HARDDROP; } -static inline int gred_use_harddrop(struct gred_sched *t) +static bool gred_per_vq_red_flags_used(struct gred_sched *table) { - return t->red_flags & TC_RED_HARDDROP; + unsigned int i; + + /* Local per-vq flags couldn't have been set unless global are 0 */ + if (table->red_flags) + return false; + for (i = 0; i < MAX_DPs; i++) + if (table->tab[i] && table->tab[i]->red_flags) + return true; + return false; } static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, @@ -212,7 +229,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, case RED_PROB_MARK: qdisc_qstats_overlimit(sch); - if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { + if (!gred_use_ecn(q) || !INET_ECN_set_ce(skb)) { q->stats.prob_drop++; goto congestion_drop; } @@ -222,7 +239,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, case RED_HARD_MARK: qdisc_qstats_overlimit(sch); - if (gred_use_harddrop(t) || !gred_use_ecn(t) || + if (gred_use_harddrop(q) || !gred_use_ecn(q) || !INET_ECN_set_ce(skb)) { q->stats.forced_drop++; goto congestion_drop; @@ -295,15 +312,103 @@ static void gred_reset(struct Qdisc *sch) } } +static void gred_offload(struct Qdisc *sch, enum tc_gred_command command) +{ + struct gred_sched *table = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_gred_qopt_offload opt = { + .command = command, + .handle = sch->handle, + .parent = sch->parent, + }; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return; + + if (command == TC_GRED_REPLACE) { + unsigned int i; + + opt.set.grio_on = gred_rio_mode(table); + opt.set.wred_on = gred_wred_mode(table); + opt.set.dp_cnt = table->DPs; + opt.set.dp_def = table->def; + + for (i = 0; i < table->DPs; i++) { + struct gred_sched_data *q = table->tab[i]; + + if (!q) + continue; + opt.set.tab[i].present = true; + opt.set.tab[i].limit = q->limit; + opt.set.tab[i].prio = q->prio; + opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog; + opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog; + opt.set.tab[i].is_ecn = gred_use_ecn(q); + opt.set.tab[i].is_harddrop = gred_use_harddrop(q); + opt.set.tab[i].probability = q->parms.max_P; + opt.set.tab[i].backlog = &q->backlog; + } + opt.set.qstats = &sch->qstats; + } + + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt); +} + +static int gred_offload_dump_stats(struct Qdisc *sch) +{ + struct gred_sched *table = qdisc_priv(sch); + struct tc_gred_qopt_offload *hw_stats; + unsigned int i; + int ret; + + hw_stats = kzalloc(sizeof(*hw_stats), GFP_KERNEL); + if (!hw_stats) + return -ENOMEM; + + hw_stats->command = TC_GRED_STATS; + hw_stats->handle = sch->handle; + hw_stats->parent = sch->parent; + + for (i = 0; i < MAX_DPs; i++) + if (table->tab[i]) + hw_stats->stats.xstats[i] = &table->tab[i]->stats; + + ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats); + /* Even if driver returns failure adjust the stats - in case offload + * ended but driver still wants to adjust the values. + */ + for (i = 0; i < MAX_DPs; i++) { + if (!table->tab[i]) + continue; + table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets; + table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes; + table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog; + + _bstats_update(&sch->bstats, + hw_stats->stats.bstats[i].bytes, + hw_stats->stats.bstats[i].packets); + sch->qstats.qlen += hw_stats->stats.qstats[i].qlen; + sch->qstats.backlog += hw_stats->stats.qstats[i].backlog; + sch->qstats.drops += hw_stats->stats.qstats[i].drops; + sch->qstats.requeues += hw_stats->stats.qstats[i].requeues; + sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits; + } + + kfree(hw_stats); + return ret; +} + static inline void gred_destroy_vq(struct gred_sched_data *q) { kfree(q); } -static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) +static int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps, + struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_sopt *sopt; + bool red_flags_changed; int i; if (!dps) @@ -311,13 +416,28 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) sopt = nla_data(dps); - if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || - sopt->def_DP >= sopt->DPs) + if (sopt->DPs > MAX_DPs) { + NL_SET_ERR_MSG_MOD(extack, "number of virtual queues too high"); + return -EINVAL; + } + if (sopt->DPs == 0) { + NL_SET_ERR_MSG_MOD(extack, + "number of virtual queues can't be 0"); + return -EINVAL; + } + if (sopt->def_DP >= sopt->DPs) { + NL_SET_ERR_MSG_MOD(extack, "default virtual queue above virtual queue count"); return -EINVAL; + } + if (sopt->flags && gred_per_vq_red_flags_used(table)) { + NL_SET_ERR_MSG_MOD(extack, "can't set per-Qdisc RED flags when per-virtual queue flags are used"); + return -EINVAL; + } sch_tree_lock(sch); table->DPs = sopt->DPs; table->def = sopt->def_DP; + red_flags_changed = table->red_flags != sopt->flags; table->red_flags = sopt->flags; /* @@ -337,6 +457,12 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) gred_disable_wred_mode(table); } + if (red_flags_changed) + for (i = 0; i < table->DPs; i++) + if (table->tab[i]) + table->tab[i]->red_flags = + table->red_flags & GRED_VQ_RED_FLAGS; + for (i = table->DPs; i < MAX_DPs; i++) { if (table->tab[i]) { pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n", @@ -346,25 +472,30 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) } } + gred_offload(sch, TC_GRED_REPLACE); return 0; } static inline int gred_change_vq(struct Qdisc *sch, int dp, struct tc_gred_qopt *ctl, int prio, u8 *stab, u32 max_P, - struct gred_sched_data **prealloc) + struct gred_sched_data **prealloc, + struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct gred_sched_data *q = table->tab[dp]; - if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) { + NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters"); return -EINVAL; + } if (!q) { table->tab[dp] = q = *prealloc; *prealloc = NULL; if (!q) return -ENOMEM; + q->red_flags = table->red_flags & GRED_VQ_RED_FLAGS; } q->DP = dp; @@ -384,14 +515,127 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, return 0; } +static const struct nla_policy gred_vq_policy[TCA_GRED_VQ_MAX + 1] = { + [TCA_GRED_VQ_DP] = { .type = NLA_U32 }, + [TCA_GRED_VQ_FLAGS] = { .type = NLA_U32 }, +}; + +static const struct nla_policy gred_vqe_policy[TCA_GRED_VQ_ENTRY_MAX + 1] = { + [TCA_GRED_VQ_ENTRY] = { .type = NLA_NESTED }, +}; + static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { [TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) }, [TCA_GRED_STAB] = { .len = 256 }, [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, [TCA_GRED_MAX_P] = { .type = NLA_U32 }, [TCA_GRED_LIMIT] = { .type = NLA_U32 }, + [TCA_GRED_VQ_LIST] = { .type = NLA_NESTED }, }; +static void gred_vq_apply(struct gred_sched *table, const struct nlattr *entry) +{ + struct nlattr *tb[TCA_GRED_VQ_MAX + 1]; + u32 dp; + + nla_parse_nested(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, NULL); + + dp = nla_get_u32(tb[TCA_GRED_VQ_DP]); + + if (tb[TCA_GRED_VQ_FLAGS]) + table->tab[dp]->red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]); +} + +static void gred_vqs_apply(struct gred_sched *table, struct nlattr *vqs) +{ + const struct nlattr *attr; + int rem; + + nla_for_each_nested(attr, vqs, rem) { + switch (nla_type(attr)) { + case TCA_GRED_VQ_ENTRY: + gred_vq_apply(table, attr); + break; + } + } +} + +static int gred_vq_validate(struct gred_sched *table, u32 cdp, + const struct nlattr *entry, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_GRED_VQ_MAX + 1]; + int err; + u32 dp; + + err = nla_parse_nested(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, + extack); + if (err < 0) + return err; + + if (!tb[TCA_GRED_VQ_DP]) { + NL_SET_ERR_MSG_MOD(extack, "Virtual queue with no index specified"); + return -EINVAL; + } + dp = nla_get_u32(tb[TCA_GRED_VQ_DP]); + if (dp >= table->DPs) { + NL_SET_ERR_MSG_MOD(extack, "Virtual queue with index out of bounds"); + return -EINVAL; + } + if (dp != cdp && !table->tab[dp]) { + NL_SET_ERR_MSG_MOD(extack, "Virtual queue not yet instantiated"); + return -EINVAL; + } + + if (tb[TCA_GRED_VQ_FLAGS]) { + u32 red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]); + + if (table->red_flags && table->red_flags != red_flags) { + NL_SET_ERR_MSG_MOD(extack, "can't change per-virtual queue RED flags when per-Qdisc flags are used"); + return -EINVAL; + } + if (red_flags & ~GRED_VQ_RED_FLAGS) { + NL_SET_ERR_MSG_MOD(extack, + "invalid RED flags specified"); + return -EINVAL; + } + } + + return 0; +} + +static int gred_vqs_validate(struct gred_sched *table, u32 cdp, + struct nlattr *vqs, struct netlink_ext_ack *extack) +{ + const struct nlattr *attr; + int rem, err; + + err = nla_validate_nested(vqs, TCA_GRED_VQ_ENTRY_MAX, + gred_vqe_policy, extack); + if (err < 0) + return err; + + nla_for_each_nested(attr, vqs, rem) { + switch (nla_type(attr)) { + case TCA_GRED_VQ_ENTRY: + err = gred_vq_validate(table, cdp, attr, extack); + if (err) + return err; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "GRED_VQ_LIST can contain only entry attributes"); + return -EINVAL; + } + } + + if (rem > 0) { + NL_SET_ERR_MSG_MOD(extack, "Trailing data after parsing virtual queue list"); + return -EINVAL; + } + + return 0; +} + static int gred_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -406,29 +650,39 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL); + err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, extack); if (err < 0) return err; if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { if (tb[TCA_GRED_LIMIT] != NULL) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); - return gred_change_table_def(sch, tb[TCA_GRED_DPS]); + return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack); } if (tb[TCA_GRED_PARMS] == NULL || tb[TCA_GRED_STAB] == NULL || - tb[TCA_GRED_LIMIT] != NULL) + tb[TCA_GRED_LIMIT] != NULL) { + NL_SET_ERR_MSG_MOD(extack, "can't configure Qdisc and virtual queue at the same time"); return -EINVAL; + } max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0; - err = -EINVAL; ctl = nla_data(tb[TCA_GRED_PARMS]); stab = nla_data(tb[TCA_GRED_STAB]); - if (ctl->DP >= table->DPs) - goto errout; + if (ctl->DP >= table->DPs) { + NL_SET_ERR_MSG_MOD(extack, "virtual queue index above virtual queue count"); + return -EINVAL; + } + + if (tb[TCA_GRED_VQ_LIST]) { + err = gred_vqs_validate(table, ctl->DP, tb[TCA_GRED_VQ_LIST], + extack); + if (err) + return err; + } if (gred_rio_mode(table)) { if (ctl->prio == 0) { @@ -448,9 +702,13 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt, prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); sch_tree_lock(sch); - err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc); + err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc, + extack); if (err < 0) - goto errout_locked; + goto err_unlock_free; + + if (tb[TCA_GRED_VQ_LIST]) + gred_vqs_apply(table, tb[TCA_GRED_VQ_LIST]); if (gred_rio_mode(table)) { gred_disable_wred_mode(table); @@ -458,12 +716,15 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt, gred_enable_wred_mode(table); } - err = 0; + sch_tree_unlock(sch); + kfree(prealloc); + + gred_offload(sch, TC_GRED_REPLACE); + return 0; -errout_locked: +err_unlock_free: sch_tree_unlock(sch); kfree(prealloc); -errout: return err; } @@ -476,12 +737,15 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL); + err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, extack); if (err < 0) return err; - if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) + if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) { + NL_SET_ERR_MSG_MOD(extack, + "virtual queue configuration can't be specified at initialization time"); return -EINVAL; + } if (tb[TCA_GRED_LIMIT]) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); @@ -489,13 +753,13 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt, sch->limit = qdisc_dev(sch)->tx_queue_len * psched_mtu(qdisc_dev(sch)); - return gred_change_table_def(sch, tb[TCA_GRED_DPS]); + return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack); } static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) { struct gred_sched *table = qdisc_priv(sch); - struct nlattr *parms, *opts = NULL; + struct nlattr *parms, *vqs, *opts = NULL; int i; u32 max_p[MAX_DPs]; struct tc_gred_sopt sopt = { @@ -505,6 +769,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) .flags = table->red_flags, }; + if (gred_offload_dump_stats(sch)) + goto nla_put_failure; + opts = nla_nest_start(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; @@ -522,6 +789,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit)) goto nla_put_failure; + /* Old style all-in-one dump of VQs */ parms = nla_nest_start(skb, TCA_GRED_PARMS); if (parms == NULL) goto nla_put_failure; @@ -572,6 +840,58 @@ append_opt: nla_nest_end(skb, parms); + /* Dump the VQs again, in more structured way */ + vqs = nla_nest_start(skb, TCA_GRED_VQ_LIST); + if (!vqs) + goto nla_put_failure; + + for (i = 0; i < MAX_DPs; i++) { + struct gred_sched_data *q = table->tab[i]; + struct nlattr *vq; + + if (!q) + continue; + + vq = nla_nest_start(skb, TCA_GRED_VQ_ENTRY); + if (!vq) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_GRED_VQ_DP, q->DP)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_GRED_VQ_FLAGS, q->red_flags)) + goto nla_put_failure; + + /* Stats */ + if (nla_put_u64_64bit(skb, TCA_GRED_VQ_STAT_BYTES, q->bytesin, + TCA_GRED_VQ_PAD)) + goto nla_put_failure; + if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PACKETS, q->packetsin)) + goto nla_put_failure; + if (nla_put_u32(skb, TCA_GRED_VQ_STAT_BACKLOG, + gred_backlog(table, q, sch))) + goto nla_put_failure; + if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_DROP, + q->stats.prob_drop)) + goto nla_put_failure; + if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_MARK, + q->stats.prob_mark)) + goto nla_put_failure; + if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_DROP, + q->stats.forced_drop)) + goto nla_put_failure; + if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_MARK, + q->stats.forced_mark)) + goto nla_put_failure; + if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PDROP, q->stats.pdrop)) + goto nla_put_failure; + if (nla_put_u32(skb, TCA_GRED_VQ_STAT_OTHER, q->stats.other)) + goto nla_put_failure; + + nla_nest_end(skb, vq); + } + nla_nest_end(skb, vqs); + return nla_nest_end(skb, opts); nla_put_failure: @@ -588,6 +908,7 @@ static void gred_destroy(struct Qdisc *sch) if (table->tab[i]) gred_destroy_vq(table->tab[i]); } + gred_offload(sch, TC_GRED_DESTROY); } static struct Qdisc_ops gred_qdisc_ops __read_mostly = { diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index f20f3a0f8424..203659bc3906 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -38,9 +38,8 @@ static int mq_offload(struct Qdisc *sch, enum tc_mq_command cmd) return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt); } -static void mq_offload_stats(struct Qdisc *sch) +static int mq_offload_stats(struct Qdisc *sch) { - struct net_device *dev = qdisc_dev(sch); struct tc_mq_qopt_offload opt = { .command = TC_MQ_STATS, .handle = sch->handle, @@ -50,8 +49,7 @@ static void mq_offload_stats(struct Qdisc *sch) }, }; - if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt); + return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_MQ, &opt); } static void mq_destroy(struct Qdisc *sch) @@ -171,9 +169,8 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) spin_unlock_bh(qdisc_lock(qdisc)); } - mq_offload_stats(sch); - return 0; + return mq_offload_stats(sch); } static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl) @@ -196,6 +193,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); + struct tc_mq_qopt_offload graft_offload; struct net_device *dev = qdisc_dev(sch); if (dev->flags & IFF_UP) @@ -206,6 +204,14 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) dev_activate(dev); + + graft_offload.handle = sch->handle; + graft_offload.graft_params.queue = cl - 1; + graft_offload.graft_params.child_handle = new ? new->handle : 0; + graft_offload.command = TC_MQ_GRAFT; + + qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, *old, + TC_SETUP_QDISC_MQ, &graft_offload, extack); return 0; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 22cd46a60057..75046ec72144 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -77,6 +77,10 @@ struct netem_sched_data { /* internal t(ime)fifo qdisc uses t_root and sch->limit */ struct rb_root t_root; + /* a linear queue; reduces rbtree rebalancing when jitter is low */ + struct sk_buff *t_head; + struct sk_buff *t_tail; + /* optional qdisc for classful handling (NULL at netem init) */ struct Qdisc *qdisc; @@ -369,26 +373,39 @@ static void tfifo_reset(struct Qdisc *sch) rb_erase(&skb->rbnode, &q->t_root); rtnl_kfree_skbs(skb, skb); } + + rtnl_kfree_skbs(q->t_head, q->t_tail); + q->t_head = NULL; + q->t_tail = NULL; } static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); u64 tnext = netem_skb_cb(nskb)->time_to_send; - struct rb_node **p = &q->t_root.rb_node, *parent = NULL; - while (*p) { - struct sk_buff *skb; - - parent = *p; - skb = rb_to_skb(parent); - if (tnext >= netem_skb_cb(skb)->time_to_send) - p = &parent->rb_right; + if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) { + if (q->t_tail) + q->t_tail->next = nskb; else - p = &parent->rb_left; + q->t_head = nskb; + q->t_tail = nskb; + } else { + struct rb_node **p = &q->t_root.rb_node, *parent = NULL; + + while (*p) { + struct sk_buff *skb; + + parent = *p; + skb = rb_to_skb(parent); + if (tnext >= netem_skb_cb(skb)->time_to_send) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + rb_link_node(&nskb->rbnode, parent, p); + rb_insert_color(&nskb->rbnode, &q->t_root); } - rb_link_node(&nskb->rbnode, parent, p); - rb_insert_color(&nskb->rbnode, &q->t_root); sch->q.qlen++; } @@ -533,9 +550,16 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, t_skb = skb_rb_last(&q->t_root); t_last = netem_skb_cb(t_skb); if (!last || - t_last->time_to_send > last->time_to_send) { + t_last->time_to_send > last->time_to_send) + last = t_last; + } + if (q->t_tail) { + struct netem_skb_cb *t_last = + netem_skb_cb(q->t_tail); + + if (!last || + t_last->time_to_send > last->time_to_send) last = t_last; - } } if (last) { @@ -614,11 +638,38 @@ static void get_slot_next(struct netem_sched_data *q, u64 now) q->slot.bytes_left = q->slot_config.max_bytes; } +static struct sk_buff *netem_peek(struct netem_sched_data *q) +{ + struct sk_buff *skb = skb_rb_first(&q->t_root); + u64 t1, t2; + + if (!skb) + return q->t_head; + if (!q->t_head) + return skb; + + t1 = netem_skb_cb(skb)->time_to_send; + t2 = netem_skb_cb(q->t_head)->time_to_send; + if (t1 < t2) + return skb; + return q->t_head; +} + +static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb) +{ + if (skb == q->t_head) { + q->t_head = skb->next; + if (!q->t_head) + q->t_tail = NULL; + } else { + rb_erase(&skb->rbnode, &q->t_root); + } +} + static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - struct rb_node *p; tfifo_dequeue: skb = __qdisc_dequeue_head(&sch->q); @@ -628,20 +679,18 @@ deliver: qdisc_bstats_update(sch, skb); return skb; } - p = rb_first(&q->t_root); - if (p) { + skb = netem_peek(q); + if (skb) { u64 time_to_send; u64 now = ktime_get_ns(); - skb = rb_to_skb(p); - /* if more time remaining? */ time_to_send = netem_skb_cb(skb)->time_to_send; if (q->slot.slot_next && q->slot.slot_next < time_to_send) get_slot_next(q, now); - if (time_to_send <= now && q->slot.slot_next <= now) { - rb_erase(p, &q->t_root); + if (time_to_send <= now && q->slot.slot_next <= now) { + netem_erase_head(q, skb); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); skb->next = NULL; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index f8af98621179..cdf68706e40f 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -220,7 +220,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); - qdisc_put(child); } for (i = oldbands; i < q->bands; i++) { @@ -230,6 +229,9 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, } sch_tree_unlock(sch); + + for (i = q->bands; i < oldbands; i++) + qdisc_put(q->queues[i]); return 0; } @@ -251,7 +253,6 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt, static int prio_dump_offload(struct Qdisc *sch) { - struct net_device *dev = qdisc_dev(sch); struct tc_prio_qopt_offload hw_stats = { .command = TC_PRIO_STATS, .handle = sch->handle, @@ -263,21 +264,8 @@ static int prio_dump_offload(struct Qdisc *sch) }, }, }; - int err; - - sch->flags &= ~TCQ_F_OFFLOADED; - if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) - return 0; - - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, - &hw_stats); - if (err == -EOPNOTSUPP) - return 0; - if (!err) - sch->flags |= TCQ_F_OFFLOADED; - - return err; + return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_PRIO, &hw_stats); } static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -309,43 +297,22 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, { struct prio_sched_data *q = qdisc_priv(sch); struct tc_prio_qopt_offload graft_offload; - struct net_device *dev = qdisc_dev(sch); unsigned long band = arg - 1; - bool any_qdisc_is_offloaded; - int err; if (new == NULL) new = &noop_qdisc; *old = qdisc_replace(sch, new, &q->queues[band]); - if (!tc_can_offload(dev)) - return 0; - graft_offload.handle = sch->handle; graft_offload.parent = sch->parent; graft_offload.graft_params.band = band; graft_offload.graft_params.child_handle = new->handle; graft_offload.command = TC_PRIO_GRAFT; - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, - &graft_offload); - - /* Don't report error if the graft is part of destroy operation. */ - if (err && new != &noop_qdisc) { - /* Don't report error if the parent, the old child and the new - * one are not offloaded. - */ - any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; - any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED; - if (*old) - any_qdisc_is_offloaded |= (*old)->flags & - TCQ_F_OFFLOADED; - - if (any_qdisc_is_offloaded) - NL_SET_ERR_MSG(extack, "Offloading graft operation failed."); - } - + qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, *old, + TC_SETUP_QDISC_PRIO, &graft_offload, + extack); return 0; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 3ce6c0a2c493..9df9942340ea 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -166,7 +166,9 @@ static int red_offload(struct Qdisc *sch, bool enable) opt.set.min = q->parms.qth_min >> q->parms.Wlog; opt.set.max = q->parms.qth_max >> q->parms.Wlog; opt.set.probability = q->parms.max_P; + opt.set.limit = q->limit; opt.set.is_ecn = red_use_ecn(q); + opt.set.is_harddrop = red_use_harddrop(q); opt.set.qstats = &sch->qstats; } else { opt.command = TC_RED_DESTROY; @@ -193,10 +195,10 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { static int red_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + struct Qdisc *old_child = NULL, *child = NULL; struct red_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_RED_MAX + 1]; struct tc_red_qopt *ctl; - struct Qdisc *child = NULL; int err; u32 max_P; @@ -233,7 +235,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (child) { qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, q->qdisc->qstats.backlog); - qdisc_put(q->qdisc); + old_child = q->qdisc; q->qdisc = child; } @@ -252,7 +254,11 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, red_start_of_idle_period(&q->vars); sch_tree_unlock(sch); + red_offload(sch, true); + + if (old_child) + qdisc_put(old_child); return 0; } @@ -279,9 +285,8 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt, return red_change(sch, opt, extack); } -static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt) +static int red_dump_offload_stats(struct Qdisc *sch) { - struct net_device *dev = qdisc_dev(sch); struct tc_red_qopt_offload hw_stats = { .command = TC_RED_STATS, .handle = sch->handle, @@ -291,22 +296,8 @@ static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt) .stats.qstats = &sch->qstats, }, }; - int err; - - sch->flags &= ~TCQ_F_OFFLOADED; - - if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) - return 0; - - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, - &hw_stats); - if (err == -EOPNOTSUPP) - return 0; - if (!err) - sch->flags |= TCQ_F_OFFLOADED; - - return err; + return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats); } static int red_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -324,7 +315,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) }; int err; - err = red_dump_offload_stats(sch, &opt); + err = red_dump_offload_stats(sch); if (err) goto nla_put_failure; @@ -377,6 +368,21 @@ static int red_dump_class(struct Qdisc *sch, unsigned long cl, return 0; } +static void red_graft_offload(struct Qdisc *sch, + struct Qdisc *new, struct Qdisc *old, + struct netlink_ext_ack *extack) +{ + struct tc_red_qopt_offload graft_offload = { + .handle = sch->handle, + .parent = sch->parent, + .child_handle = new->handle, + .command = TC_RED_GRAFT, + }; + + qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old, + TC_SETUP_QDISC_RED, &graft_offload, extack); +} + static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { @@ -386,6 +392,8 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; *old = qdisc_replace(sch, new, &q->qdisc); + + red_graft_offload(sch, new, *old, extack); return 0; } |