summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Burkov <boris@bur.io>2020-05-27 14:43:19 -0700
committerTejun Heo <tj@kernel.org>2020-05-28 10:06:35 -0400
commit936f2a70f2077f64fab1dcb3eca71879e82ecd3f (patch)
tree7a649b1c8e4776ddbada58db00a69b56b9433942
parent6b6ebb34744b21467aa01be7c53cc570fc41f70d (diff)
cgroup: add cpu.stat file to root cgroup
Currently, the root cgroup does not have a cpu.stat file. Add one which is consistent with /proc/stat to capture global cpu statistics that might not fall under cgroup accounting. We haven't done this in the past because the data are already presented in /proc/stat and we didn't want to add overhead from collecting root cgroup stats when cgroups are configured, but no cgroups have been created. By keeping the data consistent with /proc/stat, I think we avoid the first problem, while improving the usability of cgroups stats. We avoid the second problem by computing the contents of cpu.stat from existing data collected for /proc/stat anyway. Signed-off-by: Boris Burkov <boris@bur.io> Suggested-by: Tejun Heo <tj@kernel.org> Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst6
-rw-r--r--kernel/cgroup/cgroup.c1
-rw-r--r--kernel/cgroup/rstat.c60
3 files changed, 54 insertions, 13 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index bcc80269bb6a..341a6c2340d5 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -714,9 +714,7 @@ Conventions
- Settings for a single feature should be contained in a single file.
- The root cgroup should be exempt from resource control and thus
- shouldn't have resource control interface files. Also,
- informational files on the root cgroup which end up showing global
- information available elsewhere shouldn't exist.
+ shouldn't have resource control interface files.
- The default time unit is microseconds. If a different unit is ever
used, an explicit unit suffix must be present.
@@ -985,7 +983,7 @@ CPU Interface Files
All time durations are in microseconds.
cpu.stat
- A read-only flat-keyed file which exists on non-root cgroups.
+ A read-only flat-keyed file.
This file exists whether the controller is enabled or not.
It always reports the following three stats:
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 7a016749de21..51924ebdff51 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4874,7 +4874,6 @@ static struct cftype cgroup_base_files[] = {
},
{
.name = "cpu.stat",
- .flags = CFTYPE_NOT_ON_ROOT,
.seq_show = cpu_stat_show,
},
#ifdef CONFIG_PSI
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 41ca996568df..b6397a186ce9 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -389,18 +389,62 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
cgroup_base_stat_cputime_account_end(cgrp, rstatc);
}
+/*
+ * compute the cputime for the root cgroup by getting the per cpu data
+ * at a global level, then categorizing the fields in a manner consistent
+ * with how it is done by __cgroup_account_cputime_field for each bit of
+ * cpu time attributed to a cgroup.
+ */
+static void root_cgroup_cputime(struct task_cputime *cputime)
+{
+ int i;
+
+ cputime->stime = 0;
+ cputime->utime = 0;
+ cputime->sum_exec_runtime = 0;
+ for_each_possible_cpu(i) {
+ struct kernel_cpustat kcpustat;
+ u64 *cpustat = kcpustat.cpustat;
+ u64 user = 0;
+ u64 sys = 0;
+
+ kcpustat_cpu_fetch(&kcpustat, i);
+
+ user += cpustat[CPUTIME_USER];
+ user += cpustat[CPUTIME_NICE];
+ cputime->utime += user;
+
+ sys += cpustat[CPUTIME_SYSTEM];
+ sys += cpustat[CPUTIME_IRQ];
+ sys += cpustat[CPUTIME_SOFTIRQ];
+ cputime->stime += sys;
+
+ cputime->sum_exec_runtime += user;
+ cputime->sum_exec_runtime += sys;
+ cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL];
+ cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST];
+ cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST_NICE];
+ }
+}
+
void cgroup_base_stat_cputime_show(struct seq_file *seq)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
u64 usage, utime, stime;
-
- if (!cgroup_parent(cgrp))
- return;
-
- cgroup_rstat_flush_hold(cgrp);
- usage = cgrp->bstat.cputime.sum_exec_runtime;
- cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, &utime, &stime);
- cgroup_rstat_flush_release();
+ struct task_cputime cputime;
+
+ if (cgroup_parent(cgrp)) {
+ cgroup_rstat_flush_hold(cgrp);
+ usage = cgrp->bstat.cputime.sum_exec_runtime;
+ cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
+ &utime, &stime);
+ cgroup_rstat_flush_release();
+ } else {
+ root_cgroup_cputime(&cputime);
+ usage = cputime.sum_exec_runtime;
+ utime = cputime.utime;
+ stime = cputime.stime;
+ }
do_div(usage, NSEC_PER_USEC);
do_div(utime, NSEC_PER_USEC);