diff options
author | Boris Burkov <boris@bur.io> | 2020-05-27 14:43:19 -0700 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2020-05-28 10:06:35 -0400 |
commit | 936f2a70f2077f64fab1dcb3eca71879e82ecd3f (patch) | |
tree | 7a649b1c8e4776ddbada58db00a69b56b9433942 | |
parent | 6b6ebb34744b21467aa01be7c53cc570fc41f70d (diff) |
cgroup: add cpu.stat file to root cgroup
Currently, the root cgroup does not have a cpu.stat file. Add one which
is consistent with /proc/stat to capture global cpu statistics that
might not fall under cgroup accounting.
We haven't done this in the past because the data are already presented
in /proc/stat and we didn't want to add overhead from collecting root
cgroup stats when cgroups are configured, but no cgroups have been
created.
By keeping the data consistent with /proc/stat, I think we avoid the
first problem, while improving the usability of cgroups stats.
We avoid the second problem by computing the contents of cpu.stat from
existing data collected for /proc/stat anyway.
Signed-off-by: Boris Burkov <boris@bur.io>
Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r-- | Documentation/admin-guide/cgroup-v2.rst | 6 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 1 | ||||
-rw-r--r-- | kernel/cgroup/rstat.c | 60 |
3 files changed, 54 insertions, 13 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index bcc80269bb6a..341a6c2340d5 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -714,9 +714,7 @@ Conventions - Settings for a single feature should be contained in a single file. - The root cgroup should be exempt from resource control and thus - shouldn't have resource control interface files. Also, - informational files on the root cgroup which end up showing global - information available elsewhere shouldn't exist. + shouldn't have resource control interface files. - The default time unit is microseconds. If a different unit is ever used, an explicit unit suffix must be present. @@ -985,7 +983,7 @@ CPU Interface Files All time durations are in microseconds. cpu.stat - A read-only flat-keyed file which exists on non-root cgroups. + A read-only flat-keyed file. This file exists whether the controller is enabled or not. It always reports the following three stats: diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 7a016749de21..51924ebdff51 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4874,7 +4874,6 @@ static struct cftype cgroup_base_files[] = { }, { .name = "cpu.stat", - .flags = CFTYPE_NOT_ON_ROOT, .seq_show = cpu_stat_show, }, #ifdef CONFIG_PSI diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 41ca996568df..b6397a186ce9 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -389,18 +389,62 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp, cgroup_base_stat_cputime_account_end(cgrp, rstatc); } +/* + * compute the cputime for the root cgroup by getting the per cpu data + * at a global level, then categorizing the fields in a manner consistent + * with how it is done by __cgroup_account_cputime_field for each bit of + * cpu time attributed to a cgroup. + */ +static void root_cgroup_cputime(struct task_cputime *cputime) +{ + int i; + + cputime->stime = 0; + cputime->utime = 0; + cputime->sum_exec_runtime = 0; + for_each_possible_cpu(i) { + struct kernel_cpustat kcpustat; + u64 *cpustat = kcpustat.cpustat; + u64 user = 0; + u64 sys = 0; + + kcpustat_cpu_fetch(&kcpustat, i); + + user += cpustat[CPUTIME_USER]; + user += cpustat[CPUTIME_NICE]; + cputime->utime += user; + + sys += cpustat[CPUTIME_SYSTEM]; + sys += cpustat[CPUTIME_IRQ]; + sys += cpustat[CPUTIME_SOFTIRQ]; + cputime->stime += sys; + + cputime->sum_exec_runtime += user; + cputime->sum_exec_runtime += sys; + cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL]; + cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST]; + cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST_NICE]; + } +} + void cgroup_base_stat_cputime_show(struct seq_file *seq) { struct cgroup *cgrp = seq_css(seq)->cgroup; u64 usage, utime, stime; - - if (!cgroup_parent(cgrp)) - return; - - cgroup_rstat_flush_hold(cgrp); - usage = cgrp->bstat.cputime.sum_exec_runtime; - cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, &utime, &stime); - cgroup_rstat_flush_release(); + struct task_cputime cputime; + + if (cgroup_parent(cgrp)) { + cgroup_rstat_flush_hold(cgrp); + usage = cgrp->bstat.cputime.sum_exec_runtime; + cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, + &utime, &stime); + cgroup_rstat_flush_release(); + } else { + root_cgroup_cputime(&cputime); + usage = cputime.sum_exec_runtime; + utime = cputime.utime; + stime = cputime.stime; + } do_div(usage, NSEC_PER_USEC); do_div(utime, NSEC_PER_USEC); |