summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-29 10:05:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-29 10:05:22 -0700
commit86e98ed15b3e34460d1b3095bd119b6fac11841c (patch)
tree1e65d1844af224948c04fe0abaf9362ad071ee2b /kernel
parentcd546fa325161fbe374480a5081b6ebb7d1bec95 (diff)
parent9403d9cb564b6a3af86cb18fe722097ed7620f6f (diff)
Merge tag 'cgroup-for-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - cpuset changes including the fix for an incorrect interaction with CPU hotplug and an optimization - Other doc and cosmetic changes * tag 'cgroup-for-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: docs: cgroup-v1/cpusets: update libcgroup project link cgroup/cpuset: Minor updates to test_cpuset_prs.sh cgroup/cpuset: Include offline CPUs when tasks' cpumasks in top_cpuset are updated cgroup/cpuset: Skip task update if hotplug doesn't affect current cpuset cpuset: Clean up cpuset_node_allowed cgroup: bpf: use cgroup_lock()/cgroup_unlock() wrappers
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/cgroup.c38
-rw-r--r--kernel/bpf/cgroup_iter.c4
-rw-r--r--kernel/bpf/local_storage.c4
-rw-r--r--kernel/cgroup/cgroup-v1.c16
-rw-r--r--kernel/cgroup/cgroup.c60
-rw-r--r--kernel/cgroup/cpuset.c30
6 files changed, 80 insertions, 72 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index a06e118a9be5..517b6a5928cc 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -173,11 +173,11 @@ void bpf_cgroup_atype_put(int cgroup_atype)
{
int i = cgroup_atype - CGROUP_LSM_START;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
if (--cgroup_lsm_atype[i].refcnt <= 0)
cgroup_lsm_atype[i].attach_btf_id = 0;
WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
}
#else
static enum cgroup_bpf_attach_type
@@ -282,7 +282,7 @@ static void cgroup_bpf_release(struct work_struct *work)
unsigned int atype;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
struct hlist_head *progs = &cgrp->bpf.progs[atype];
@@ -315,7 +315,7 @@ static void cgroup_bpf_release(struct work_struct *work)
bpf_cgroup_storage_free(storage);
}
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
cgroup_bpf_put(p);
@@ -729,9 +729,9 @@ static int cgroup_bpf_attach(struct cgroup *cgrp,
{
int ret;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -831,7 +831,7 @@ static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog,
cg_link = container_of(link, struct bpf_cgroup_link, link);
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
/* link might have been auto-released by dying cgroup, so fail */
if (!cg_link->cgroup) {
ret = -ENOLINK;
@@ -843,7 +843,7 @@ static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog,
}
ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog);
out_unlock:
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -1009,9 +1009,9 @@ static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
{
int ret;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -1120,9 +1120,9 @@ static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
{
int ret;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
ret = __cgroup_bpf_query(cgrp, attr, uattr);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -1189,11 +1189,11 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
if (!cg_link->cgroup)
return;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
/* re-check cgroup under lock again */
if (!cg_link->cgroup) {
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return;
}
@@ -1205,7 +1205,7 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
cg = cg_link->cgroup;
cg_link->cgroup = NULL;
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
cgroup_put(cg);
}
@@ -1232,10 +1232,10 @@ static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link,
container_of(link, struct bpf_cgroup_link, link);
u64 cg_id = 0;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
if (cg_link->cgroup)
cg_id = cgroup_id(cg_link->cgroup);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
seq_printf(seq,
"cgroup_id:\t%llu\n"
@@ -1251,10 +1251,10 @@ static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link,
container_of(link, struct bpf_cgroup_link, link);
u64 cg_id = 0;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
if (cg_link->cgroup)
cg_id = cgroup_id(cg_link->cgroup);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
info->cgroup.cgroup_id = cg_id;
info->cgroup.attach_type = cg_link->type;
diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c
index 06989d278846..810378f04fbc 100644
--- a/kernel/bpf/cgroup_iter.c
+++ b/kernel/bpf/cgroup_iter.c
@@ -58,7 +58,7 @@ static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos)
{
struct cgroup_iter_priv *p = seq->private;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
/* cgroup_iter doesn't support read across multiple sessions. */
if (*pos > 0) {
@@ -89,7 +89,7 @@ static void cgroup_iter_seq_stop(struct seq_file *seq, void *v)
{
struct cgroup_iter_priv *p = seq->private;
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
/* pass NULL to the prog for post-processing */
if (!v) {
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 4c7bbec4a9e4..a04f505aefe9 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -333,14 +333,14 @@ static void cgroup_storage_map_free(struct bpf_map *_map)
struct list_head *storages = &map->list;
struct bpf_cgroup_storage *storage, *stmp;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
list_for_each_entry_safe(storage, stmp, storages, list_map) {
bpf_cgroup_storage_unlink(storage);
bpf_cgroup_storage_free(storage);
}
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
WARN_ON(!RB_EMPTY_ROOT(&map->root));
WARN_ON(!list_empty(&map->list));
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 52bb5a74a23b..aeef06c465ef 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -58,7 +58,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
struct cgroup_root *root;
int retval = 0;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
cgroup_attach_lock(true);
for_each_root(root) {
struct cgroup *from_cgrp;
@@ -72,7 +72,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
break;
}
cgroup_attach_unlock(true);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return retval;
}
@@ -106,7 +106,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
if (ret)
return ret;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
percpu_down_write(&cgroup_threadgroup_rwsem);
@@ -145,7 +145,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
out_err:
cgroup_migrate_finish(&mgctx);
percpu_up_write(&cgroup_threadgroup_rwsem);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -847,13 +847,13 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent
kernfs_break_active_protection(new_parent);
kernfs_break_active_protection(kn);
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
ret = kernfs_rename(kn, new_parent, new_name_str);
if (!ret)
TRACE_CGROUP_PATH(rename, cgrp);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
kernfs_unbreak_active_protection(kn);
kernfs_unbreak_active_protection(new_parent);
@@ -1119,7 +1119,7 @@ int cgroup1_reconfigure(struct fs_context *fc)
trace_cgroup_remount(root);
out_unlock:
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -1246,7 +1246,7 @@ int cgroup1_get_tree(struct fs_context *fc)
if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt))
ret = 1; /* restart */
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
if (!ret)
ret = cgroup_do_get_tree(fc);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index f2a77b1b45a2..625d7483951c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1391,7 +1391,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
cgroup_favor_dynmods(root, false);
cgroup_exit_root_id(root);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
cgroup_rstat_exit(cgrp);
kernfs_destroy_root(root->kf_root);
@@ -1635,7 +1635,7 @@ void cgroup_kn_unlock(struct kernfs_node *kn)
else
cgrp = kn->parent->priv;
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
kernfs_unbreak_active_protection(kn);
cgroup_put(cgrp);
@@ -1680,7 +1680,7 @@ struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
if (drain_offline)
cgroup_lock_and_drain_offline(cgrp);
else
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
if (!cgroup_is_dead(cgrp))
return cgrp;
@@ -2177,13 +2177,13 @@ int cgroup_do_get_tree(struct fs_context *fc)
struct super_block *sb = fc->root->d_sb;
struct cgroup *cgrp;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
spin_lock_irq(&css_set_lock);
cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root);
spin_unlock_irq(&css_set_lock);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
nsdentry = kernfs_node_dentry(cgrp->kn, sb);
dput(fc->root);
@@ -2366,13 +2366,13 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
{
int ret;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
spin_lock_irq(&css_set_lock);
ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
spin_unlock_irq(&css_set_lock);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -2398,7 +2398,7 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
int hierarchy_id = 1;
int ret;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
spin_lock_irq(&css_set_lock);
root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
@@ -2412,7 +2412,7 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
}
spin_unlock_irq(&css_set_lock);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(task_cgroup_path);
@@ -3121,7 +3121,7 @@ void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
int ssid;
restart:
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
for_each_subsys(ss, ssid) {
@@ -3135,7 +3135,7 @@ restart:
prepare_to_wait(&dsct->offline_waitq, &wait,
TASK_UNINTERRUPTIBLE);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
schedule();
finish_wait(&dsct->offline_waitq, &wait);
@@ -4384,9 +4384,9 @@ int cgroup_rm_cftypes(struct cftype *cfts)
if (!(cfts[0].flags & __CFTYPE_ADDED))
return -ENOENT;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
ret = cgroup_rm_cftypes_locked(cfts);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -4418,14 +4418,14 @@ static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
if (ret)
return ret;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
list_add_tail(&cfts->node, &ss->cfts);
ret = cgroup_apply_cftypes(cfts, true);
if (ret)
cgroup_rm_cftypes_locked(cfts);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
return ret;
}
@@ -5395,7 +5395,7 @@ static void css_release_work_fn(struct work_struct *work)
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
css->flags |= CSS_RELEASED;
list_del_rcu(&css->sibling);
@@ -5436,7 +5436,7 @@ static void css_release_work_fn(struct work_struct *work)
NULL);
}
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
@@ -5784,7 +5784,7 @@ static void css_killed_work_fn(struct work_struct *work)
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, destroy_work);
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
do {
offline_css(css);
@@ -5793,7 +5793,7 @@ static void css_killed_work_fn(struct work_struct *work)
css = css->parent;
} while (css && atomic_dec_and_test(&css->online_cnt));
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
}
/* css kill confirmation processing requires process context, bounce */
@@ -5977,7 +5977,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
pr_debug("Initializing cgroup subsys %s\n", ss->name);
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
idr_init(&ss->css_idr);
INIT_LIST_HEAD(&ss->cfts);
@@ -6021,7 +6021,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
BUG_ON(online_css(css));
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
}
/**
@@ -6081,7 +6081,7 @@ int __init cgroup_init(void)
get_user_ns(init_cgroup_ns.user_ns);
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
/*
* Add init_css_set to the hash table so that dfl_root can link to
@@ -6092,7 +6092,7 @@ int __init cgroup_init(void)
BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
for_each_subsys(ss, ssid) {
if (ss->early_init) {
@@ -6144,9 +6144,9 @@ int __init cgroup_init(void)
if (ss->bind)
ss->bind(init_css_set.subsys[ssid]);
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
css_populate_dir(init_css_set.subsys[ssid]);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
}
/* init_css_set.subsys[] has been updated, re-hash */
@@ -6251,7 +6251,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
if (!buf)
goto out;
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
spin_lock_irq(&css_set_lock);
for_each_root(root) {
@@ -6306,7 +6306,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
retval = 0;
out_unlock:
spin_unlock_irq(&css_set_lock);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
kfree(buf);
out:
return retval;
@@ -6390,7 +6390,7 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
struct file *f;
if (kargs->flags & CLONE_INTO_CGROUP)
- mutex_lock(&cgroup_mutex);
+ cgroup_lock();
cgroup_threadgroup_change_begin(current);
@@ -6465,7 +6465,7 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
err:
cgroup_threadgroup_change_end(current);
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
if (f)
fput(f);
if (dst_cgrp)
@@ -6492,7 +6492,7 @@ static void cgroup_css_set_put_fork(struct kernel_clone_args *kargs)
struct cgroup *cgrp = kargs->cgrp;
struct css_set *cset = kargs->cset;
- mutex_unlock(&cgroup_mutex);
+ cgroup_unlock();
if (cset) {
put_css_set(cset);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 505d86b16642..e4ca2dd2b764 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1209,7 +1209,9 @@ void rebuild_sched_domains(void)
*
* Iterate through each task of @cs updating its cpus_allowed to the
* effective cpuset's. As this function is called with cpuset_rwsem held,
- * cpuset membership stays stable.
+ * cpuset membership stays stable. For top_cpuset, task_cpu_possible_mask()
+ * is used instead of effective_cpus to make sure all offline CPUs are also
+ * included as hotplug code won't update cpumasks for tasks in top_cpuset.
*/
static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
{
@@ -1219,15 +1221,18 @@ static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
css_task_iter_start(&cs->css, 0, &it);
while ((task = css_task_iter_next(&it))) {
- /*
- * Percpu kthreads in top_cpuset are ignored
- */
- if (top_cs && (task->flags & PF_KTHREAD) &&
- kthread_is_per_cpu(task))
- continue;
+ const struct cpumask *possible_mask = task_cpu_possible_mask(task);
- cpumask_and(new_cpus, cs->effective_cpus,
- task_cpu_possible_mask(task));
+ if (top_cs) {
+ /*
+ * Percpu kthreads in top_cpuset are ignored
+ */
+ if ((task->flags & PF_KTHREAD) && kthread_is_per_cpu(task))
+ continue;
+ cpumask_andnot(new_cpus, possible_mask, cs->subparts_cpus);
+ } else {
+ cpumask_and(new_cpus, possible_mask, cs->effective_cpus);
+ }
set_cpus_allowed_ptr(task, new_cpus);
}
css_task_iter_end(&it);
@@ -3618,6 +3623,8 @@ retry:
update_tasks:
cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
mems_updated = !nodes_equal(new_mems, cs->effective_mems);
+ if (!cpus_updated && !mems_updated)
+ goto unlock; /* Hotplug doesn't affect this cpuset */
if (mems_updated)
check_insane_mems_config(&new_mems);
@@ -3629,6 +3636,7 @@ update_tasks:
hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems,
cpus_updated, mems_updated);
+unlock:
percpu_up_write(&cpuset_rwsem);
}
@@ -3941,7 +3949,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
}
/*
- * __cpuset_node_allowed - Can we allocate on a memory node?
+ * cpuset_node_allowed - Can we allocate on a memory node?
* @node: is this an allowed node?
* @gfp_mask: memory allocation flags
*
@@ -3980,7 +3988,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
* GFP_KERNEL - any node in enclosing hardwalled cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok.
*/
-bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
+bool cpuset_node_allowed(int node, gfp_t gfp_mask)
{
struct cpuset *cs; /* current cpuset ancestors */
bool allowed; /* is allocation in zone z allowed? */