diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 40 | ||||
-rw-r--r-- | net/core/devlink.c | 71 | ||||
-rw-r--r-- | net/core/fib_rules.c | 2 | ||||
-rw-r--r-- | net/core/netclassid_cgroup.c | 47 | ||||
-rw-r--r-- | net/core/page_pool.c | 22 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 26 | ||||
-rw-r--r-- | net/core/skbuff.c | 6 | ||||
-rw-r--r-- | net/core/sock.c | 5 |
8 files changed, 123 insertions, 96 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index a69e8bd7ed74..c6c985fe7b1b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -146,7 +146,6 @@ #include "net-sysfs.h" #define MAX_GRO_SKBS 8 -#define MAX_NEST_DEV 8 /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) @@ -331,6 +330,12 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name) name_node = netdev_name_node_lookup(net, name); if (!name_node) return -ENOENT; + /* lookup might have found our primary name or a name belonging + * to another device. + */ + if (name_node == dev->name_node || name_node->dev != dev) + return -EINVAL; + __netdev_name_node_alt_destroy(name_node); return 0; @@ -3071,6 +3076,8 @@ static u16 skb_tx_hash(const struct net_device *dev, if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); + if (hash >= qoffset) + hash -= qoffset; while (unlikely(hash >= qcount)) hash -= qcount; return hash + qoffset; @@ -3657,26 +3664,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { - if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) && - qdisc_run_begin(q)) { - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, - &q->state))) { - __qdisc_drop(skb, &to_free); - rc = NET_XMIT_DROP; - goto end_run; - } - qdisc_bstats_cpu_update(q, skb); - - rc = NET_XMIT_SUCCESS; - if (sch_direct_xmit(skb, q, dev, txq, NULL, true)) - __qdisc_run(q); - -end_run: - qdisc_run_end(q); - } else { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; - qdisc_run(q); - } + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + qdisc_run(q); if (unlikely(to_free)) kfree_skb_list(to_free); @@ -4527,14 +4516,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, /* Reinjected packets coming from act_mirred or similar should * not get XDP generic processing. */ - if (skb_cloned(skb) || skb_is_tc_redirected(skb)) + if (skb_is_tc_redirected(skb)) return XDP_PASS; /* XDP packets must be linear and must have sufficient headroom * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also * native XDP provides, thus we need to do it here as well. */ - if (skb_is_nonlinear(skb) || + if (skb_cloned(skb) || skb_is_nonlinear(skb) || skb_headroom(skb) < XDP_PACKET_HEADROOM) { int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); int troom = skb->tail + skb->data_len - skb->end; @@ -7201,8 +7190,8 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev, return 0; } -static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, - struct list_head **iter) +struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *lower; @@ -7214,6 +7203,7 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, return lower->dev; } +EXPORT_SYMBOL(netdev_next_lower_dev_rcu); static u8 __netdev_upper_depth(struct net_device *dev) { diff --git a/net/core/devlink.c b/net/core/devlink.c index 549ee56b7a21..b831c5545d6a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2103,11 +2103,11 @@ err_action_values_put: static struct devlink_dpipe_table * devlink_dpipe_table_find(struct list_head *dpipe_tables, - const char *table_name) + const char *table_name, struct devlink *devlink) { struct devlink_dpipe_table *table; - - list_for_each_entry_rcu(table, dpipe_tables, list) { + list_for_each_entry_rcu(table, dpipe_tables, list, + lockdep_is_held(&devlink->lock)) { if (!strcmp(table->name, table_name)) return table; } @@ -2226,7 +2226,7 @@ static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); if (!table) return -EINVAL; @@ -2382,7 +2382,7 @@ static int devlink_dpipe_table_counters_set(struct devlink *devlink, struct devlink_dpipe_table *table; table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); if (!table) return -EINVAL; @@ -3352,34 +3352,41 @@ devlink_param_value_get_from_info(const struct devlink_param *param, struct genl_info *info, union devlink_param_value *value) { + struct nlattr *param_data; int len; - if (param->type != DEVLINK_PARAM_TYPE_BOOL && - !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) + param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]; + + if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data) return -EINVAL; switch (param->type) { case DEVLINK_PARAM_TYPE_U8: - value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u8)) + return -EINVAL; + value->vu8 = nla_get_u8(param_data); break; case DEVLINK_PARAM_TYPE_U16: - value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u16)) + return -EINVAL; + value->vu16 = nla_get_u16(param_data); break; case DEVLINK_PARAM_TYPE_U32: - value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u32)) + return -EINVAL; + value->vu32 = nla_get_u32(param_data); break; case DEVLINK_PARAM_TYPE_STRING: - len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]), - nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); - if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) || + len = strnlen(nla_data(param_data), nla_len(param_data)); + if (len == nla_len(param_data) || len >= __DEVLINK_PARAM_MAX_STRING_VALUE) return -EINVAL; - strcpy(value->vstr, - nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); + strcpy(value->vstr, nla_data(param_data)); break; case DEVLINK_PARAM_TYPE_BOOL: - value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ? - true : false; + if (param_data && nla_len(param_data)) + return -EINVAL; + value->vbool = nla_get_flag(param_data); break; } return 0; @@ -5951,6 +5958,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 }, + [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, @@ -6854,7 +6863,7 @@ bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, rcu_read_lock(); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); enabled = false; if (table) enabled = table->counters_enabled; @@ -6878,26 +6887,34 @@ int devlink_dpipe_table_register(struct devlink *devlink, void *priv, bool counter_control_extern) { struct devlink_dpipe_table *table; - - if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name)) - return -EEXIST; + int err = 0; if (WARN_ON(!table_ops->size_get)) return -EINVAL; + mutex_lock(&devlink->lock); + + if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name, + devlink)) { + err = -EEXIST; + goto unlock; + } + table = kzalloc(sizeof(*table), GFP_KERNEL); - if (!table) - return -ENOMEM; + if (!table) { + err = -ENOMEM; + goto unlock; + } table->name = table_name; table->table_ops = table_ops; table->priv = priv; table->counter_control_extern = counter_control_extern; - mutex_lock(&devlink->lock); list_add_tail_rcu(&table->list, &devlink->dpipe_table_list); +unlock: mutex_unlock(&devlink->lock); - return 0; + return err; } EXPORT_SYMBOL_GPL(devlink_dpipe_table_register); @@ -6914,7 +6931,7 @@ void devlink_dpipe_table_unregister(struct devlink *devlink, mutex_lock(&devlink->lock); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); if (!table) goto unlock; list_del_rcu(&table->list); @@ -7071,7 +7088,7 @@ int devlink_dpipe_table_resource_set(struct devlink *devlink, mutex_lock(&devlink->lock); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); if (!table) { err = -EINVAL; goto out; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 3e7e15278c46..bd7eba9066f8 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -974,7 +974,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh = nlmsg_data(nlh); frh->family = ops->family; - frh->table = rule->table; + frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT; if (nla_put_u32(skb, FRA_TABLE, rule->table)) goto nla_put_failure; if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen)) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 0642f91c4038..b4c87fe31be2 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -53,30 +53,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css) kfree(css_cls_state(css)); } +/* + * To avoid freezing of sockets creation for tasks with big number of threads + * and opened sockets lets release file_lock every 1000 iterated descriptors. + * New sockets will already have been created with new classid. + */ + +struct update_classid_context { + u32 classid; + unsigned int batch; +}; + +#define UPDATE_CLASSID_BATCH 1000 + static int update_classid_sock(const void *v, struct file *file, unsigned n) { int err; + struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file, &err); if (sock) { spin_lock(&cgroup_sk_update_lock); - sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, - (unsigned long)v); + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); spin_unlock(&cgroup_sk_update_lock); } + if (--ctx->batch == 0) { + ctx->batch = UPDATE_CLASSID_BATCH; + return n + 1; + } return 0; } +static void update_classid_task(struct task_struct *p, u32 classid) +{ + struct update_classid_context ctx = { + .classid = classid, + .batch = UPDATE_CLASSID_BATCH + }; + unsigned int fd = 0; + + do { + task_lock(p); + fd = iterate_fd(p->files, fd, update_classid_sock, &ctx); + task_unlock(p); + cond_resched(); + } while (fd); +} + static void cgrp_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct task_struct *p; cgroup_taskset_for_each(p, css, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)css_cls_state(css)->classid); - task_unlock(p); + update_classid_task(p, css_cls_state(css)->classid); } } @@ -98,10 +128,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, css_task_iter_start(css, 0, &it); while ((p = css_task_iter_next(&it))) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)cs->classid); - task_unlock(p); + update_classid_task(p, cs->classid); cond_resched(); } css_task_iter_end(&it); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 9b7cbe35df37..10d2b255df5e 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -99,8 +99,7 @@ EXPORT_SYMBOL(page_pool_create); static void __page_pool_return_page(struct page_pool *pool, struct page *page); noinline -static struct page *page_pool_refill_alloc_cache(struct page_pool *pool, - bool refill) +static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) { struct ptr_ring *r = &pool->ring; struct page *page; @@ -141,8 +140,7 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool, page = NULL; break; } - } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL && - refill); + } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL); /* Return last page */ if (likely(pool->alloc.count > 0)) @@ -155,20 +153,16 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool, /* fast path */ static struct page *__page_pool_get_cached(struct page_pool *pool) { - bool refill = false; struct page *page; - /* Test for safe-context, caller should provide this guarantee */ - if (likely(in_serving_softirq())) { - if (likely(pool->alloc.count)) { - /* Fast-path */ - page = pool->alloc.cache[--pool->alloc.count]; - return page; - } - refill = true; + /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */ + if (likely(pool->alloc.count)) { + /* Fast-path */ + page = pool->alloc.cache[--pool->alloc.count]; + } else { + page = page_pool_refill_alloc_cache(pool); } - page = page_pool_refill_alloc_cache(pool, refill); return page; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 09c44bf2e1d2..e1152f4ffe33 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3504,27 +3504,25 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr, if (err) return err; - alt_ifname = nla_data(attr); + alt_ifname = nla_strdup(attr, GFP_KERNEL); + if (!alt_ifname) + return -ENOMEM; + if (cmd == RTM_NEWLINKPROP) { - alt_ifname = kstrdup(alt_ifname, GFP_KERNEL); - if (!alt_ifname) - return -ENOMEM; err = netdev_name_node_alt_create(dev, alt_ifname); - if (err) { - kfree(alt_ifname); - return err; - } + if (!err) + alt_ifname = NULL; } else if (cmd == RTM_DELLINKPROP) { err = netdev_name_node_alt_destroy(dev, alt_ifname); - if (err) - return err; } else { - WARN_ON(1); - return 0; + WARN_ON_ONCE(1); + err = -EINVAL; } - *changed = true; - return 0; + kfree(alt_ifname); + if (!err) + *changed = true; + return err; } static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 864cb9e9622f..e1101a4f90a6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -467,7 +467,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, return NULL; } - /* use OR instead of assignment to avoid clearing of bits in mask */ if (pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; @@ -527,7 +526,6 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, return NULL; } - /* use OR instead of assignment to avoid clearing of bits in mask */ if (nc->page.pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; @@ -4805,9 +4803,9 @@ static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb, typeof(IPPROTO_IP) proto, unsigned int off) { - switch (proto) { - int err; + int err; + switch (proto) { case IPPROTO_TCP: err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr), off + MAX_TCP_HDR_LEN); diff --git a/net/core/sock.c b/net/core/sock.c index a4c8fac781ff..8f71684305c3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1830,7 +1830,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); - mem_cgroup_sk_alloc(newsk); + + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + cgroup_sk_alloc(&newsk->sk_cgrp_data); rcu_read_lock(); |