summaryrefslogtreecommitdiff
path: root/kernel/cgroup
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup')
-rw-r--r--kernel/cgroup/Makefile2
-rw-r--r--kernel/cgroup/cgroup.c110
-rw-r--r--kernel/cgroup/freezer.c317
3 files changed, 424 insertions, 5 deletions
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
index 8d5689ca94b9..5d7a76bfbbb7 100644
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
+obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o freezer.o
obj-$(CONFIG_CGROUP_FREEZER) += legacy_freezer.o
obj-$(CONFIG_CGROUP_PIDS) += pids.o
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 786ceef2f222..6895464b54c6 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2435,8 +2435,15 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
get_css_set(to_cset);
to_cset->nr_tasks++;
css_set_move_task(task, from_cset, to_cset, true);
- put_css_set_locked(from_cset);
from_cset->nr_tasks--;
+ /*
+ * If the source or destination cgroup is frozen,
+ * the task might require to change its state.
+ */
+ cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
+ to_cset->dfl_cgrp);
+ put_css_set_locked(from_cset);
+
}
}
spin_unlock_irq(&css_set_lock);
@@ -3477,8 +3484,11 @@ static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
static int cgroup_events_show(struct seq_file *seq, void *v)
{
- seq_printf(seq, "populated %d\n",
- cgroup_is_populated(seq_css(seq)->cgroup));
+ struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+ seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
+ seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
+
return 0;
}
@@ -3540,6 +3550,40 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
}
#endif
+static int cgroup_freeze_show(struct seq_file *seq, void *v)
+{
+ struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+ seq_printf(seq, "%d\n", cgrp->freezer.freeze);
+
+ return 0;
+}
+
+static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct cgroup *cgrp;
+ ssize_t ret;
+ int freeze;
+
+ ret = kstrtoint(strstrip(buf), 0, &freeze);
+ if (ret)
+ return ret;
+
+ if (freeze < 0 || freeze > 1)
+ return -ERANGE;
+
+ cgrp = cgroup_kn_lock_live(of->kn, false);
+ if (!cgrp)
+ return -ENOENT;
+
+ cgroup_freeze(cgrp, freeze);
+
+ cgroup_kn_unlock(of->kn);
+
+ return nbytes;
+}
+
static int cgroup_file_open(struct kernfs_open_file *of)
{
struct cftype *cft = of->kn->priv;
@@ -4684,6 +4728,12 @@ static struct cftype cgroup_base_files[] = {
.seq_show = cgroup_stat_show,
},
{
+ .name = "cgroup.freeze",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = cgroup_freeze_show,
+ .write = cgroup_freeze_write,
+ },
+ {
.name = "cpu.stat",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = cpu_stat_show,
@@ -5033,12 +5083,29 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
if (ret)
goto out_psi_free;
+ /*
+ * New cgroup inherits effective freeze counter, and
+ * if the parent has to be frozen, the child has too.
+ */
+ cgrp->freezer.e_freeze = parent->freezer.e_freeze;
+ if (cgrp->freezer.e_freeze)
+ set_bit(CGRP_FROZEN, &cgrp->flags);
+
spin_lock_irq(&css_set_lock);
for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
- if (tcgrp != cgrp)
+ if (tcgrp != cgrp) {
tcgrp->nr_descendants++;
+
+ /*
+ * If the new cgroup is frozen, all ancestor cgroups
+ * get a new frozen descendant, but their state can't
+ * change because of this.
+ */
+ if (cgrp->freezer.e_freeze)
+ tcgrp->freezer.nr_frozen_descendants++;
+ }
}
spin_unlock_irq(&css_set_lock);
@@ -5329,6 +5396,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
tcgrp->nr_descendants--;
tcgrp->nr_dying_descendants++;
+ /*
+ * If the dying cgroup is frozen, decrease frozen descendants
+ * counters of ancestor cgroups.
+ */
+ if (test_bit(CGRP_FROZEN, &cgrp->flags))
+ tcgrp->freezer.nr_frozen_descendants--;
}
spin_unlock_irq(&css_set_lock);
@@ -5782,6 +5855,29 @@ void cgroup_post_fork(struct task_struct *child)
cset->nr_tasks++;
css_set_move_task(child, NULL, cset, false);
}
+
+ /*
+ * If the cgroup has to be frozen, the new task has too.
+ * Let's set the JOBCTL_TRAP_FREEZE jobctl bit to get
+ * the task into the frozen state.
+ */
+ if (unlikely(cgroup_task_freeze(child))) {
+ struct cgroup *cgrp;
+
+ spin_lock(&child->sighand->siglock);
+ WARN_ON_ONCE(child->frozen);
+ cgrp = cset->dfl_cgrp;
+ child->jobctl |= JOBCTL_TRAP_FREEZE;
+ spin_unlock(&child->sighand->siglock);
+
+ /*
+ * Calling cgroup_update_frozen() isn't required here,
+ * because it will be called anyway a bit later
+ * from do_freezer_trap(). So we avoid cgroup's
+ * transient switch from the frozen state and back.
+ */
+ }
+
spin_unlock_irq(&css_set_lock);
}
@@ -5830,6 +5926,12 @@ void cgroup_exit(struct task_struct *tsk)
spin_lock_irq(&css_set_lock);
css_set_move_task(tsk, cset, NULL, false);
cset->nr_tasks--;
+
+ if (unlikely(cgroup_task_frozen(tsk)))
+ cgroup_freezer_frozen_exit(tsk);
+ else if (unlikely(cgroup_task_freeze(tsk)))
+ cgroup_update_frozen(task_dfl_cgroup(tsk));
+
spin_unlock_irq(&css_set_lock);
} else {
get_css_set(cset);
diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c
new file mode 100644
index 000000000000..9d8cda478fc9
--- /dev/null
+++ b/kernel/cgroup/freezer.c
@@ -0,0 +1,317 @@
+//SPDX-License-Identifier: GPL-2.0
+#include <linux/cgroup.h>
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/signal.h>
+
+#include "cgroup-internal.h"
+
+/*
+ * Propagate the cgroup frozen state upwards by the cgroup tree.
+ */
+static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
+{
+ int desc = 1;
+
+ /*
+ * If the new state is frozen, some freezing ancestor cgroups may change
+ * their state too, depending on if all their descendants are frozen.
+ *
+ * Otherwise, all ancestor cgroups are forced into the non-frozen state.
+ */
+ while ((cgrp = cgroup_parent(cgrp))) {
+ if (frozen) {
+ cgrp->freezer.nr_frozen_descendants += desc;
+ if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
+ test_bit(CGRP_FREEZE, &cgrp->flags) &&
+ cgrp->freezer.nr_frozen_descendants ==
+ cgrp->nr_descendants) {
+ set_bit(CGRP_FROZEN, &cgrp->flags);
+ cgroup_file_notify(&cgrp->events_file);
+ desc++;
+ }
+ } else {
+ cgrp->freezer.nr_frozen_descendants -= desc;
+ if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
+ clear_bit(CGRP_FROZEN, &cgrp->flags);
+ cgroup_file_notify(&cgrp->events_file);
+ desc++;
+ }
+ }
+ }
+}
+
+/*
+ * Revisit the cgroup frozen state.
+ * Checks if the cgroup is really frozen and perform all state transitions.
+ */
+void cgroup_update_frozen(struct cgroup *cgrp)
+{
+ bool frozen;
+
+ lockdep_assert_held(&css_set_lock);
+
+ /*
+ * If the cgroup has to be frozen (CGRP_FREEZE bit set),
+ * and all tasks are frozen and/or stopped, let's consider
+ * the cgroup frozen. Otherwise it's not frozen.
+ */
+ frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
+ cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
+
+ if (frozen) {
+ /* Already there? */
+ if (test_bit(CGRP_FROZEN, &cgrp->flags))
+ return;
+
+ set_bit(CGRP_FROZEN, &cgrp->flags);
+ } else {
+ /* Already there? */
+ if (!test_bit(CGRP_FROZEN, &cgrp->flags))
+ return;
+
+ clear_bit(CGRP_FROZEN, &cgrp->flags);
+ }
+ cgroup_file_notify(&cgrp->events_file);
+
+ /* Update the state of ancestor cgroups. */
+ cgroup_propagate_frozen(cgrp, frozen);
+}
+
+/*
+ * Increment cgroup's nr_frozen_tasks.
+ */
+static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
+{
+ cgrp->freezer.nr_frozen_tasks++;
+}
+
+/*
+ * Decrement cgroup's nr_frozen_tasks.
+ */
+static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
+{
+ cgrp->freezer.nr_frozen_tasks--;
+ WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
+}
+
+/*
+ * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
+ * and revisit the state of the cgroup, if necessary.
+ */
+void cgroup_enter_frozen(void)
+{
+ struct cgroup *cgrp;
+
+ if (current->frozen)
+ return;
+
+ spin_lock_irq(&css_set_lock);
+ current->frozen = true;
+ cgrp = task_dfl_cgroup(current);
+ cgroup_inc_frozen_cnt(cgrp);
+ cgroup_update_frozen(cgrp);
+ spin_unlock_irq(&css_set_lock);
+}
+
+/*
+ * Conditionally leave frozen/stopped state. Update cgroup's counters,
+ * and revisit the state of the cgroup, if necessary.
+ *
+ * If always_leave is not set, and the cgroup is freezing,
+ * we're racing with the cgroup freezing. In this case, we don't
+ * drop the frozen counter to avoid a transient switch to
+ * the unfrozen state.
+ */
+void cgroup_leave_frozen(bool always_leave)
+{
+ struct cgroup *cgrp;
+
+ spin_lock_irq(&css_set_lock);
+ cgrp = task_dfl_cgroup(current);
+ if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
+ cgroup_dec_frozen_cnt(cgrp);
+ cgroup_update_frozen(cgrp);
+ WARN_ON_ONCE(!current->frozen);
+ current->frozen = false;
+ }
+ spin_unlock_irq(&css_set_lock);
+
+ if (unlikely(current->frozen)) {
+ /*
+ * If the task remained in the frozen state,
+ * make sure it won't reach userspace without
+ * entering the signal handling loop.
+ */
+ spin_lock_irq(&current->sighand->siglock);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+ }
+}
+
+/*
+ * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
+ * jobctl bit.
+ */
+static void cgroup_freeze_task(struct task_struct *task, bool freeze)
+{
+ unsigned long flags;
+
+ /* If the task is about to die, don't bother with freezing it. */
+ if (!lock_task_sighand(task, &flags))
+ return;
+
+ if (freeze) {
+ task->jobctl |= JOBCTL_TRAP_FREEZE;
+ signal_wake_up(task, false);
+ } else {
+ task->jobctl &= ~JOBCTL_TRAP_FREEZE;
+ wake_up_process(task);
+ }
+
+ unlock_task_sighand(task, &flags);
+}
+
+/*
+ * Freeze or unfreeze all tasks in the given cgroup.
+ */
+static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
+{
+ struct css_task_iter it;
+ struct task_struct *task;
+
+ lockdep_assert_held(&cgroup_mutex);
+
+ spin_lock_irq(&css_set_lock);
+ if (freeze)
+ set_bit(CGRP_FREEZE, &cgrp->flags);
+ else
+ clear_bit(CGRP_FREEZE, &cgrp->flags);
+ spin_unlock_irq(&css_set_lock);
+
+ css_task_iter_start(&cgrp->self, 0, &it);
+ while ((task = css_task_iter_next(&it))) {
+ /*
+ * Ignore kernel threads here. Freezing cgroups containing
+ * kthreads isn't supported.
+ */
+ if (task->flags & PF_KTHREAD)
+ continue;
+ cgroup_freeze_task(task, freeze);
+ }
+ css_task_iter_end(&it);
+
+ /*
+ * Cgroup state should be revisited here to cover empty leaf cgroups
+ * and cgroups which descendants are already in the desired state.
+ */
+ spin_lock_irq(&css_set_lock);
+ if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
+ cgroup_update_frozen(cgrp);
+ spin_unlock_irq(&css_set_lock);
+}
+
+/*
+ * Adjust the task state (freeze or unfreeze) and revisit the state of
+ * source and destination cgroups.
+ */
+void cgroup_freezer_migrate_task(struct task_struct *task,
+ struct cgroup *src, struct cgroup *dst)
+{
+ lockdep_assert_held(&css_set_lock);
+
+ /*
+ * Kernel threads are not supposed to be frozen at all.
+ */
+ if (task->flags & PF_KTHREAD)
+ return;
+
+ /*
+ * Adjust counters of freezing and frozen tasks.
+ * Note, that if the task is frozen, but the destination cgroup is not
+ * frozen, we bump both counters to keep them balanced.
+ */
+ if (task->frozen) {
+ cgroup_inc_frozen_cnt(dst);
+ cgroup_dec_frozen_cnt(src);
+ }
+ cgroup_update_frozen(dst);
+ cgroup_update_frozen(src);
+
+ /*
+ * Force the task to the desired state.
+ */
+ cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
+}
+
+void cgroup_freezer_frozen_exit(struct task_struct *task)
+{
+ struct cgroup *cgrp = task_dfl_cgroup(task);
+
+ lockdep_assert_held(&css_set_lock);
+
+ cgroup_dec_frozen_cnt(cgrp);
+ cgroup_update_frozen(cgrp);
+}
+
+void cgroup_freeze(struct cgroup *cgrp, bool freeze)
+{
+ struct cgroup_subsys_state *css;
+ struct cgroup *dsct;
+ bool applied = false;
+
+ lockdep_assert_held(&cgroup_mutex);
+
+ /*
+ * Nothing changed? Just exit.
+ */
+ if (cgrp->freezer.freeze == freeze)
+ return;
+
+ cgrp->freezer.freeze = freeze;
+
+ /*
+ * Propagate changes downwards the cgroup tree.
+ */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ dsct = css->cgroup;
+
+ if (cgroup_is_dead(dsct))
+ continue;
+
+ if (freeze) {
+ dsct->freezer.e_freeze++;
+ /*
+ * Already frozen because of ancestor's settings?
+ */
+ if (dsct->freezer.e_freeze > 1)
+ continue;
+ } else {
+ dsct->freezer.e_freeze--;
+ /*
+ * Still frozen because of ancestor's settings?
+ */
+ if (dsct->freezer.e_freeze > 0)
+ continue;
+
+ WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
+ }
+
+ /*
+ * Do change actual state: freeze or unfreeze.
+ */
+ cgroup_do_freeze(dsct, freeze);
+ applied = true;
+ }
+
+ /*
+ * Even if the actual state hasn't changed, let's notify a user.
+ * The state can be enforced by an ancestor cgroup: the cgroup
+ * can already be in the desired state or it can be locked in the
+ * opposite state, so that the transition will never happen.
+ * In both cases it's better to notify a user, that there is
+ * nothing to wait for.
+ */
+ if (!applied)
+ cgroup_file_notify(&cgrp->events_file);
+}