summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 15:48:36 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 15:48:36 -0800
commita312a8cc3c7fe96f5e54e69c676f5bd12995f44e (patch)
tree29e44c7ab6c59dd841bc217be0796b4cf26a7f30
parentbf57ae2165bad1cb273095a5c09708ab503cd874 (diff)
parent674b745e22b3caae48ad20422795eefd3f832a7b (diff)
Merge tag 'cgroup-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: "Nothing too interesting: - Add CONFIG_DEBUG_GROUP_REF which makes cgroup refcnt operations kprobable - A couple cpuset optimizations - Other misc changes including doc and test updates" * tag 'cgroup-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: remove rcu_read_lock()/rcu_read_unlock() in critical section of spin_lock_irq() cgroup/cpuset: Improve cpuset_css_alloc() description kselftest/cgroup: Add cleanup() to test_cpuset_prs.sh cgroup/cpuset: Optimize cpuset_attach() on v2 cgroup/cpuset: Skip spread flags update on v2 kselftest/cgroup: Fix gathering number of CPUs cgroup: cgroup refcnt functions should be exported when CONFIG_DEBUG_CGROUP_REF cgroup: Implement DEBUG_CGROUP_REF
-rw-r--r--include/linux/cgroup.h98
-rw-r--r--include/linux/cgroup_refcnt.h96
-rw-r--r--kernel/cgroup/cgroup.c8
-rw-r--r--kernel/cgroup/cpuset.c48
-rw-r--r--lib/Kconfig.debug10
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh19
6 files changed, 181 insertions, 98 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2b7d077de7ef..3410aecffdb4 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -310,72 +310,25 @@ void css_task_iter_end(struct css_task_iter *it);
* Inline functions.
*/
+#ifdef CONFIG_DEBUG_CGROUP_REF
+void css_get(struct cgroup_subsys_state *css);
+void css_get_many(struct cgroup_subsys_state *css, unsigned int n);
+bool css_tryget(struct cgroup_subsys_state *css);
+bool css_tryget_online(struct cgroup_subsys_state *css);
+void css_put(struct cgroup_subsys_state *css);
+void css_put_many(struct cgroup_subsys_state *css, unsigned int n);
+#else
+#define CGROUP_REF_FN_ATTRS static inline
+#define CGROUP_REF_EXPORT(fn)
+#include <linux/cgroup_refcnt.h>
+#endif
+
static inline u64 cgroup_id(const struct cgroup *cgrp)
{
return cgrp->kn->id;
}
/**
- * css_get - obtain a reference on the specified css
- * @css: target css
- *
- * The caller must already have a reference.
- */
-static inline void css_get(struct cgroup_subsys_state *css)
-{
- if (!(css->flags & CSS_NO_REF))
- percpu_ref_get(&css->refcnt);
-}
-
-/**
- * css_get_many - obtain references on the specified css
- * @css: target css
- * @n: number of references to get
- *
- * The caller must already have a reference.
- */
-static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
-{
- if (!(css->flags & CSS_NO_REF))
- percpu_ref_get_many(&css->refcnt, n);
-}
-
-/**
- * css_tryget - try to obtain a reference on the specified css
- * @css: target css
- *
- * Obtain a reference on @css unless it already has reached zero and is
- * being released. This function doesn't care whether @css is on or
- * offline. The caller naturally needs to ensure that @css is accessible
- * but doesn't have to be holding a reference on it - IOW, RCU protected
- * access is good enough for this function. Returns %true if a reference
- * count was successfully obtained; %false otherwise.
- */
-static inline bool css_tryget(struct cgroup_subsys_state *css)
-{
- if (!(css->flags & CSS_NO_REF))
- return percpu_ref_tryget(&css->refcnt);
- return true;
-}
-
-/**
- * css_tryget_online - try to obtain a reference on the specified css if online
- * @css: target css
- *
- * Obtain a reference on @css if it's online. The caller naturally needs
- * to ensure that @css is accessible but doesn't have to be holding a
- * reference on it - IOW, RCU protected access is good enough for this
- * function. Returns %true if a reference count was successfully obtained;
- * %false otherwise.
- */
-static inline bool css_tryget_online(struct cgroup_subsys_state *css)
-{
- if (!(css->flags & CSS_NO_REF))
- return percpu_ref_tryget_live(&css->refcnt);
- return true;
-}
-
-/**
* css_is_dying - test whether the specified css is dying
* @css: target css
*
@@ -395,31 +348,6 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css)
return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt);
}
-/**
- * css_put - put a css reference
- * @css: target css
- *
- * Put a reference obtained via css_get() and css_tryget_online().
- */
-static inline void css_put(struct cgroup_subsys_state *css)
-{
- if (!(css->flags & CSS_NO_REF))
- percpu_ref_put(&css->refcnt);
-}
-
-/**
- * css_put_many - put css references
- * @css: target css
- * @n: number of references to put
- *
- * Put references obtained via css_get() and css_tryget_online().
- */
-static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
-{
- if (!(css->flags & CSS_NO_REF))
- percpu_ref_put_many(&css->refcnt, n);
-}
-
static inline void cgroup_get(struct cgroup *cgrp)
{
css_get(&cgrp->self);
diff --git a/include/linux/cgroup_refcnt.h b/include/linux/cgroup_refcnt.h
new file mode 100644
index 000000000000..2eea0a69ecfc
--- /dev/null
+++ b/include/linux/cgroup_refcnt.h
@@ -0,0 +1,96 @@
+/**
+ * css_get - obtain a reference on the specified css
+ * @css: target css
+ *
+ * The caller must already have a reference.
+ */
+CGROUP_REF_FN_ATTRS
+void css_get(struct cgroup_subsys_state *css)
+{
+ if (!(css->flags & CSS_NO_REF))
+ percpu_ref_get(&css->refcnt);
+}
+CGROUP_REF_EXPORT(css_get)
+
+/**
+ * css_get_many - obtain references on the specified css
+ * @css: target css
+ * @n: number of references to get
+ *
+ * The caller must already have a reference.
+ */
+CGROUP_REF_FN_ATTRS
+void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
+{
+ if (!(css->flags & CSS_NO_REF))
+ percpu_ref_get_many(&css->refcnt, n);
+}
+CGROUP_REF_EXPORT(css_get_many)
+
+/**
+ * css_tryget - try to obtain a reference on the specified css
+ * @css: target css
+ *
+ * Obtain a reference on @css unless it already has reached zero and is
+ * being released. This function doesn't care whether @css is on or
+ * offline. The caller naturally needs to ensure that @css is accessible
+ * but doesn't have to be holding a reference on it - IOW, RCU protected
+ * access is good enough for this function. Returns %true if a reference
+ * count was successfully obtained; %false otherwise.
+ */
+CGROUP_REF_FN_ATTRS
+bool css_tryget(struct cgroup_subsys_state *css)
+{
+ if (!(css->flags & CSS_NO_REF))
+ return percpu_ref_tryget(&css->refcnt);
+ return true;
+}
+CGROUP_REF_EXPORT(css_tryget)
+
+/**
+ * css_tryget_online - try to obtain a reference on the specified css if online
+ * @css: target css
+ *
+ * Obtain a reference on @css if it's online. The caller naturally needs
+ * to ensure that @css is accessible but doesn't have to be holding a
+ * reference on it - IOW, RCU protected access is good enough for this
+ * function. Returns %true if a reference count was successfully obtained;
+ * %false otherwise.
+ */
+CGROUP_REF_FN_ATTRS
+bool css_tryget_online(struct cgroup_subsys_state *css)
+{
+ if (!(css->flags & CSS_NO_REF))
+ return percpu_ref_tryget_live(&css->refcnt);
+ return true;
+}
+CGROUP_REF_EXPORT(css_tryget_online)
+
+/**
+ * css_put - put a css reference
+ * @css: target css
+ *
+ * Put a reference obtained via css_get() and css_tryget_online().
+ */
+CGROUP_REF_FN_ATTRS
+void css_put(struct cgroup_subsys_state *css)
+{
+ if (!(css->flags & CSS_NO_REF))
+ percpu_ref_put(&css->refcnt);
+}
+CGROUP_REF_EXPORT(css_put)
+
+/**
+ * css_put_many - put css references
+ * @css: target css
+ * @n: number of references to put
+ *
+ * Put references obtained via css_get() and css_tryget_online().
+ */
+CGROUP_REF_FN_ATTRS
+void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
+{
+ if (!(css->flags & CSS_NO_REF))
+ percpu_ref_put_many(&css->refcnt, n);
+}
+CGROUP_REF_EXPORT(css_put_many)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 2319946715e0..15cc26513596 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -248,6 +248,12 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css,
struct cgroup *cgrp, struct cftype cfts[],
bool is_add);
+#ifdef CONFIG_DEBUG_CGROUP_REF
+#define CGROUP_REF_FN_ATTRS noinline
+#define CGROUP_REF_EXPORT(fn) EXPORT_SYMBOL_GPL(fn);
+#include <linux/cgroup_refcnt.h>
+#endif
+
/**
* cgroup_ssid_enabled - cgroup subsys enabled test by subsys ID
* @ssid: subsys ID of interest
@@ -2860,14 +2866,12 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup,
* take an rcu_read_lock.
*/
spin_lock_irq(&css_set_lock);
- rcu_read_lock();
task = leader;
do {
cgroup_migrate_add_task(task, mgctx);
if (!threadgroup)
break;
} while_each_thread(leader, task);
- rcu_read_unlock();
spin_unlock_irq(&css_set_lock);
return cgroup_migrate_execute(mgctx);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index b474289c15b8..589827ccda8b 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -550,11 +550,15 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
/*
* update task's spread flag if cpuset's page/slab spread flag is set
*
- * Call with callback_lock or cpuset_rwsem held.
+ * Call with callback_lock or cpuset_rwsem held. The check can be skipped
+ * if on default hierarchy.
*/
-static void cpuset_update_task_spread_flag(struct cpuset *cs,
+static void cpuset_update_task_spread_flags(struct cpuset *cs,
struct task_struct *tsk)
{
+ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
+ return;
+
if (is_spread_page(cs))
task_set_spread_page(tsk);
else
@@ -2153,7 +2157,7 @@ static void update_tasks_flags(struct cpuset *cs)
css_task_iter_start(&cs->css, 0, &it);
while ((task = css_task_iter_next(&it)))
- cpuset_update_task_spread_flag(cs, task);
+ cpuset_update_task_spread_flags(cs, task);
css_task_iter_end(&it);
}
@@ -2509,12 +2513,28 @@ static void cpuset_attach(struct cgroup_taskset *tset)
struct cgroup_subsys_state *css;
struct cpuset *cs;
struct cpuset *oldcs = cpuset_attach_old_cs;
+ bool cpus_updated, mems_updated;
cgroup_taskset_first(tset, &css);
cs = css_cs(css);
lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */
percpu_down_write(&cpuset_rwsem);
+ cpus_updated = !cpumask_equal(cs->effective_cpus,
+ oldcs->effective_cpus);
+ mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems);
+
+ /*
+ * In the default hierarchy, enabling cpuset in the child cgroups
+ * will trigger a number of cpuset_attach() calls with no change
+ * in effective cpus and mems. In that case, we can optimize out
+ * by skipping the task iteration and update.
+ */
+ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+ !cpus_updated && !mems_updated) {
+ cpuset_attach_nodemask_to = cs->effective_mems;
+ goto out;
+ }
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
@@ -2530,14 +2550,19 @@ static void cpuset_attach(struct cgroup_taskset *tset)
WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
- cpuset_update_task_spread_flag(cs, task);
+ cpuset_update_task_spread_flags(cs, task);
}
/*
* Change mm for all threadgroup leaders. This is expensive and may
- * sleep and should be moved outside migration path proper.
+ * sleep and should be moved outside migration path proper. Skip it
+ * if there is no change in effective_mems and CS_MEMORY_MIGRATE is
+ * not set.
*/
cpuset_attach_nodemask_to = cs->effective_mems;
+ if (!is_memory_migrate(cs) && !mems_updated)
+ goto out;
+
cgroup_taskset_for_each_leader(leader, css, tset) {
struct mm_struct *mm = get_task_mm(leader);
@@ -2560,6 +2585,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
}
}
+out:
cs->old_mems_allowed = cpuset_attach_nodemask_to;
cs->attach_in_progress--;
@@ -3046,11 +3072,15 @@ static struct cftype dfl_files[] = {
};
-/*
- * cpuset_css_alloc - allocate a cpuset css
- * cgrp: control group that the new cpuset will be part of
+/**
+ * cpuset_css_alloc - Allocate a cpuset css
+ * @parent_css: Parent css of the control group that the new cpuset will be
+ * part of
+ * Return: cpuset css on success, -ENOMEM on failure.
+ *
+ * Allocate and initialize a new cpuset css, for non-NULL @parent_css, return
+ * top cpuset css otherwise.
*/
-
static struct cgroup_subsys_state *
cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
{
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 3638b3424be5..13a1046a7d6f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1717,6 +1717,16 @@ config LATENCYTOP
Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations.
+config DEBUG_CGROUP_REF
+ bool "Disable inlining of cgroup css reference count functions"
+ depends on DEBUG_KERNEL
+ depends on CGROUPS
+ depends on KPROBES
+ default n
+ help
+ Force cgroup css reference count functions to not be inlined so
+ that they can be kprobed for debugging.
+
source "kernel/trace/Kconfig"
config PROVIDE_OHCI1394_DMA_INIT
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index 526d2c42d870..186e1c26867e 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -16,7 +16,12 @@ skip_test() {
[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
# Set sched verbose flag, if available
-[[ -d /sys/kernel/debug/sched ]] && echo Y > /sys/kernel/debug/sched/verbose
+if [[ -d /sys/kernel/debug/sched ]]
+then
+ # Used to restore the original setting during cleanup
+ SCHED_DEBUG=$(cat /sys/kernel/debug/sched/verbose)
+ echo Y > /sys/kernel/debug/sched/verbose
+fi
# Get wait_inotify location
WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify
@@ -25,7 +30,7 @@ WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify
CGROUP2=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
[[ -n "$CGROUP2" ]] || skip_test "Cgroup v2 mount point not found!"
-CPUS=$(lscpu | grep "^CPU(s)" | sed -e "s/.*:[[:space:]]*//")
+CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//")
[[ $CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!"
# Set verbose flag and delay factor
@@ -54,6 +59,15 @@ echo +cpuset > cgroup.subtree_control
[[ -d test ]] || mkdir test
cd test
+cleanup()
+{
+ online_cpus
+ rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
+ cd ..
+ rmdir test > /dev/null 2>&1
+ echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose
+}
+
# Pause in ms
pause()
{
@@ -666,6 +680,7 @@ test_inotify()
fi
}
+trap cleanup 0 2 3 6
run_state_test TEST_MATRIX
test_isolated
test_inotify