diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-05 17:08:45 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-05 17:08:45 -0700 |
commit | 9f25a8da423226d7797e35a132535186c531228b (patch) | |
tree | f35f830ac31fa31f5591eb96eea650ae5d20cc04 /kernel/cgroup/cgroup.c | |
parent | 0bbddb8cbe7a8765e9c6ef598a33b50461934f88 (diff) | |
parent | d8742e22902186e30c346b1ba881cb52942ae3e4 (diff) |
Merge branch 'for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo:
- For cpustat, cgroup has a percpu hierarchical stat mechanism which
propagates up the hierarchy lazily.
This contains commits to factor out and generalize the mechanism so
that it can be used for other cgroup stats too.
The original intention was to update memcg stats to use it but memcg
went for a different approach, so still the only user is cpustat. The
factoring out and generalization still make sense and it's likely
that this can be used for other purposes in the future.
- cgroup uses kernfs_notify() (which uses fsnotify()) to inform user
space of certain events. A rate limiting mechanism is added.
- Other misc changes.
* 'for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup: css_set_lock should nest inside tasklist_lock
rdmacg: Convert to use match_string() helper
cgroup: Make cgroup_rstat_updated() ready for root cgroup usage
cgroup: Add memory barriers to plug cgroup_rstat_updated() race window
cgroup: Add cgroup_subsys->css_rstat_flush()
cgroup: Replace cgroup_rstat_mutex with a spinlock
cgroup: Factor out and expose cgroup_rstat_*() interface functions
cgroup: Reorganize kernel/cgroup/rstat.c
cgroup: Distinguish base resource stat implementation from rstat
cgroup: Rename stat to rstat
cgroup: Rename kernel/cgroup/stat.c to kernel/cgroup/rstat.c
cgroup: Limit event generation frequency
cgroup: Explicitly remove core interface files
Diffstat (limited to 'kernel/cgroup/cgroup.c')
-rw-r--r-- | kernel/cgroup/cgroup.c | 105 |
1 files changed, 74 insertions, 31 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 12883656e63e..acb66713f9b6 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -54,6 +54,7 @@ #include <linux/proc_ns.h> #include <linux/nsproxy.h> #include <linux/file.h> +#include <linux/sched/cputime.h> #include <net/sock.h> #define CREATE_TRACE_POINTS @@ -61,6 +62,8 @@ #define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \ MAX_CFTYPE_NAME + 2) +/* let's not notify more than 100 times per second */ +#define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100) /* * cgroup_mutex is the master lock. Any modification to cgroup or its @@ -142,14 +145,14 @@ static struct static_key_true *cgroup_subsys_on_dfl_key[] = { }; #undef SUBSYS -static DEFINE_PER_CPU(struct cgroup_cpu_stat, cgrp_dfl_root_cpu_stat); +static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu); /* * The default hierarchy, reserved for the subsystems that are otherwise * unattached - it never has more than a single cgroup, and all tasks are * part of that cgroup. */ -struct cgroup_root cgrp_dfl_root = { .cgrp.cpu_stat = &cgrp_dfl_root_cpu_stat }; +struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu }; EXPORT_SYMBOL_GPL(cgrp_dfl_root); /* @@ -1554,6 +1557,8 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) spin_lock_irq(&cgroup_file_kn_lock); cfile->kn = NULL; spin_unlock_irq(&cgroup_file_kn_lock); + + del_timer_sync(&cfile->notify_timer); } kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name)); @@ -1573,8 +1578,17 @@ static void css_clear_dir(struct cgroup_subsys_state *css) css->flags &= ~CSS_VISIBLE; - list_for_each_entry(cfts, &css->ss->cfts, node) + if (!css->ss) { + if (cgroup_on_dfl(cgrp)) + cfts = cgroup_base_files; + else + cfts = cgroup1_base_files; + cgroup_addrm_files(css, cgrp, cfts, false); + } else { + list_for_each_entry(cfts, &css->ss->cfts, node) + cgroup_addrm_files(css, cgrp, cfts, false); + } } /** @@ -1598,14 +1612,16 @@ static int css_populate_dir(struct cgroup_subsys_state *css) else cfts = cgroup1_base_files; - return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true); - } - - list_for_each_entry(cfts, &css->ss->cfts, node) { - ret = cgroup_addrm_files(css, cgrp, cfts, true); - if (ret < 0) { - failed_cfts = cfts; - goto err; + ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true); + if (ret < 0) + return ret; + } else { + list_for_each_entry(cfts, &css->ss->cfts, node) { + ret = cgroup_addrm_files(css, cgrp, cfts, true); + if (ret < 0) { + failed_cfts = cfts; + goto err; + } } } @@ -1782,13 +1798,6 @@ static void cgroup_enable_task_cg_lists(void) { struct task_struct *p, *g; - spin_lock_irq(&css_set_lock); - - if (use_task_css_set_links) - goto out_unlock; - - use_task_css_set_links = true; - /* * We need tasklist_lock because RCU is not safe against * while_each_thread(). Besides, a forking task that has passed @@ -1797,6 +1806,13 @@ static void cgroup_enable_task_cg_lists(void) * tasklist if we walk through it with RCU. */ read_lock(&tasklist_lock); + spin_lock_irq(&css_set_lock); + + if (use_task_css_set_links) + goto out_unlock; + + use_task_css_set_links = true; + do_each_thread(g, p) { WARN_ON_ONCE(!list_empty(&p->cg_list) || task_css_set(p) != &init_css_set); @@ -1824,9 +1840,9 @@ static void cgroup_enable_task_cg_lists(void) } spin_unlock(&p->sighand->siglock); } while_each_thread(g, p); - read_unlock(&tasklist_lock); out_unlock: spin_unlock_irq(&css_set_lock); + read_unlock(&tasklist_lock); } static void init_cgroup_housekeeping(struct cgroup *cgrp) @@ -1844,6 +1860,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) cgrp->dom_cgrp = cgrp; cgrp->max_descendants = INT_MAX; cgrp->max_depth = INT_MAX; + INIT_LIST_HEAD(&cgrp->rstat_css_list); + prev_cputime_init(&cgrp->prev_cputime); for_each_subsys(ss, ssid) INIT_LIST_HEAD(&cgrp->e_csets[ssid]); @@ -3381,7 +3399,7 @@ static int cpu_stat_show(struct seq_file *seq, void *v) struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup; int ret = 0; - cgroup_stat_show_cputime(seq); + cgroup_base_stat_cputime_show(seq); #ifdef CONFIG_CGROUP_SCHED ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id); #endif @@ -3521,6 +3539,12 @@ static int cgroup_kn_set_ugid(struct kernfs_node *kn) return kernfs_setattr(kn, &iattr); } +static void cgroup_file_notify_timer(struct timer_list *timer) +{ + cgroup_file_notify(container_of(timer, struct cgroup_file, + notify_timer)); +} + static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp, struct cftype *cft) { @@ -3547,6 +3571,8 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp, if (cft->file_offset) { struct cgroup_file *cfile = (void *)css + cft->file_offset; + timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0); + spin_lock_irq(&cgroup_file_kn_lock); cfile->kn = kn; spin_unlock_irq(&cgroup_file_kn_lock); @@ -3796,8 +3822,17 @@ void cgroup_file_notify(struct cgroup_file *cfile) unsigned long flags; spin_lock_irqsave(&cgroup_file_kn_lock, flags); - if (cfile->kn) - kernfs_notify(cfile->kn); + if (cfile->kn) { + unsigned long last = cfile->notified_at; + unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV; + + if (time_in_range(jiffies, last, next)) { + timer_reduce(&cfile->notify_timer, next); + } else { + kernfs_notify(cfile->kn); + cfile->notified_at = jiffies; + } + } spin_unlock_irqrestore(&cgroup_file_kn_lock, flags); } @@ -4560,7 +4595,7 @@ static void css_free_rwork_fn(struct work_struct *work) cgroup_put(cgroup_parent(cgrp)); kernfs_put(cgrp->kn); if (cgroup_on_dfl(cgrp)) - cgroup_stat_exit(cgrp); + cgroup_rstat_exit(cgrp); kfree(cgrp); } else { /* @@ -4587,6 +4622,11 @@ static void css_release_work_fn(struct work_struct *work) if (ss) { /* css release path */ + if (!list_empty(&css->rstat_css_node)) { + cgroup_rstat_flush(cgrp); + list_del_rcu(&css->rstat_css_node); + } + cgroup_idr_replace(&ss->css_idr, NULL, css->id); if (ss->css_released) ss->css_released(css); @@ -4597,7 +4637,7 @@ static void css_release_work_fn(struct work_struct *work) trace_cgroup_release(cgrp); if (cgroup_on_dfl(cgrp)) - cgroup_stat_flush(cgrp); + cgroup_rstat_flush(cgrp); for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) @@ -4648,6 +4688,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css, css->id = -1; INIT_LIST_HEAD(&css->sibling); INIT_LIST_HEAD(&css->children); + INIT_LIST_HEAD(&css->rstat_css_node); css->serial_nr = css_serial_nr_next++; atomic_set(&css->online_cnt, 0); @@ -4656,6 +4697,9 @@ static void init_and_link_css(struct cgroup_subsys_state *css, css_get(css->parent); } + if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush) + list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list); + BUG_ON(cgroup_css(cgrp, ss)); } @@ -4757,6 +4801,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, err_list_del: list_del_rcu(&css->sibling); err_free_css: + list_del_rcu(&css->rstat_css_node); INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); return ERR_PTR(err); @@ -4785,7 +4830,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent) goto out_free_cgrp; if (cgroup_on_dfl(parent)) { - ret = cgroup_stat_init(cgrp); + ret = cgroup_rstat_init(cgrp); if (ret) goto out_cancel_ref; } @@ -4850,7 +4895,7 @@ out_idr_free: cgroup_idr_remove(&root->cgroup_idr, cgrp->id); out_stat_exit: if (cgroup_on_dfl(parent)) - cgroup_stat_exit(cgrp); + cgroup_rstat_exit(cgrp); out_cancel_ref: percpu_ref_exit(&cgrp->self.refcnt); out_free_cgrp: @@ -5090,10 +5135,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) for_each_css(css, ssid, cgrp) kill_css(css); - /* - * Remove @cgrp directory along with the base files. @cgrp has an - * extra ref on its kn. - */ + /* clear and remove @cgrp dir, @cgrp has an extra ref on its kn */ + css_clear_dir(&cgrp->self); kernfs_remove(cgrp->kn); if (parent && cgroup_is_threaded(cgrp)) @@ -5245,7 +5288,7 @@ int __init cgroup_init(void) BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); - cgroup_stat_boot(); + cgroup_rstat_boot(); /* * The latency of the synchronize_sched() is too high for cgroups, |