From 67e9c74b8a873408c27ac9a8e4c1d1c8d72c93ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Nov 2015 11:13:34 -0500 Subject: cgroup: replace __DEVEL__sane_behavior with cgroup2 fs type With major controllers - cpu, memory and io - shaping up for the unified hierarchy, cgroup2 is about ready to be, gradually, released into the wild. Replace __DEVEL__sane_behavior flag which was used to select the unified hierarchy with a separate filesystem type "cgroup2" so that unified hierarchy can be mounted as follows. mount -t cgroup2 none $MOUNT_POINT The cgroup2 fs has its own magic number - 0x63677270 ("cgrp"). v2: Assign a different magic number to cgroup2 fs. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Johannes Weiner --- kernel/cgroup.c | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b316debadeb3..af0886262f58 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -211,6 +211,7 @@ static unsigned long have_free_callback __read_mostly; /* Ditto for the can_fork callback. */ static unsigned long have_canfork_callback __read_mostly; +static struct file_system_type cgroup2_fs_type; static struct cftype cgroup_dfl_base_files[]; static struct cftype cgroup_legacy_base_files[]; @@ -1641,10 +1642,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) all_ss = true; continue; } - if (!strcmp(token, "__DEVEL__sane_behavior")) { - opts->flags |= CGRP_ROOT_SANE_BEHAVIOR; - continue; - } if (!strcmp(token, "noprefix")) { opts->flags |= CGRP_ROOT_NOPREFIX; continue; @@ -1711,15 +1708,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) return -ENOENT; } - if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) { - pr_warn("sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n"); - if (nr_opts != 1) { - pr_err("sane_behavior: no other mount options allowed\n"); - return -EINVAL; - } - return 0; - } - /* * If the 'all' option was specified select all the subsystems, * otherwise if 'none', 'name=' and a subsystem name options were @@ -1998,6 +1986,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, int flags, const char *unused_dev_name, void *data) { + bool is_v2 = fs_type == &cgroup2_fs_type; struct super_block *pinned_sb = NULL; struct cgroup_subsys *ss; struct cgroup_root *root; @@ -2014,6 +2003,17 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, if (!use_task_css_set_links) cgroup_enable_task_cg_lists(); + if (is_v2) { + if (data) { + pr_err("cgroup2: unknown option \"%s\"\n", (char *)data); + return ERR_PTR(-EINVAL); + } + cgrp_dfl_root_visible = true; + root = &cgrp_dfl_root; + cgroup_get(&root->cgrp); + goto out_mount; + } + mutex_lock(&cgroup_mutex); /* First find the desired set of subsystems */ @@ -2021,15 +2021,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, if (ret) goto out_unlock; - /* look for a matching existing root */ - if (opts.flags & CGRP_ROOT_SANE_BEHAVIOR) { - cgrp_dfl_root_visible = true; - root = &cgrp_dfl_root; - cgroup_get(&root->cgrp); - ret = 0; - goto out_unlock; - } - /* * Destruction of cgroup root is asynchronous, so subsystems may * still be dying after the previous unmount. Let's drain the @@ -2140,9 +2131,10 @@ out_free: if (ret) return ERR_PTR(ret); - +out_mount: dentry = kernfs_mount(fs_type, flags, root->kf_root, - CGROUP_SUPER_MAGIC, &new_sb); + is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC, + &new_sb); if (IS_ERR(dentry) || !new_sb) cgroup_put(&root->cgrp); @@ -2185,6 +2177,12 @@ static struct file_system_type cgroup_fs_type = { .kill_sb = cgroup_kill_sb, }; +static struct file_system_type cgroup2_fs_type = { + .name = "cgroup2", + .mount = cgroup_mount, + .kill_sb = cgroup_kill_sb, +}; + /** * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy * @task: target task @@ -5315,6 +5313,7 @@ int __init cgroup_init(void) WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup")); WARN_ON(register_filesystem(&cgroup_fs_type)); + WARN_ON(register_filesystem(&cgroup2_fs_type)); WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations)); return 0; -- cgit v1.2.3-58-ga151 From d2b4365809060b256330a99289de9797a5dd6967 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 25 Nov 2015 16:16:55 +0100 Subject: cpuset: Replace all instances of time_t with time64_t The following patch replaces all instances of time_t with time64_t i.e. change the type used for representing time from 32-bit to 64-bit. All 32-bit kernels to date use a signed 32-bit time_t type, which can only represent time until January 2038. Since embedded systems running 32-bit Linux are going to survive beyond that date, we have to change all current uses, in a backwards compatible way. The patch also changes the function get_seconds() that returns a 32-bit integer to ktime_get_seconds() that returns seconds as 64-bit integer. The patch changes the type of ticks from time_t to u32. We keep ticks as 32-bits as the function uses 32-bit arithmetic which would prove less expensive than 64-bit arithmetic and the function is expected to be called atleast once every 32 seconds. Signed-off-by: Heena Sirwani Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Tejun Heo --- kernel/cpuset.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 10ae73611d80..c9ea63ff70a7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -68,7 +69,7 @@ struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE; struct fmeter { int cnt; /* unprocessed events count */ int val; /* most recent output value */ - time_t time; /* clock (secs) when val computed */ + time64_t time; /* clock (secs) when val computed */ spinlock_t lock; /* guards read or write of above */ }; @@ -1374,7 +1375,7 @@ out: */ #define FM_COEF 933 /* coefficient for half-life of 10 secs */ -#define FM_MAXTICKS ((time_t)99) /* useless computing more ticks than this */ +#define FM_MAXTICKS ((u32)99) /* useless computing more ticks than this */ #define FM_MAXCNT 1000000 /* limit cnt to avoid overflow */ #define FM_SCALE 1000 /* faux fixed point scale */ @@ -1390,8 +1391,11 @@ static void fmeter_init(struct fmeter *fmp) /* Internal meter update - process cnt events and update value */ static void fmeter_update(struct fmeter *fmp) { - time_t now = get_seconds(); - time_t ticks = now - fmp->time; + time64_t now; + u32 ticks; + + now = ktime_get_seconds(); + ticks = now - fmp->time; if (ticks == 0) return; -- cgit v1.2.3-58-ga151 From b53202e6308939d33ba0c78712e850f891b4e76f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 3 Dec 2015 10:24:08 -0500 Subject: cgroup: kill cgrp_ss_priv[CGROUP_CANFORK_COUNT] and friends Now that nobody use the "priv" arg passed to can_fork/cancel_fork/fork we can kill CGROUP_CANFORK_COUNT/SUBSYS_TAG/etc and cgrp_ss_priv[] in copy_process(). Signed-off-by: Oleg Nesterov Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 12 +++--------- include/linux/cgroup.h | 19 ++++++------------- include/linux/cgroup_subsys.h | 18 ------------------ kernel/cgroup.c | 30 +++++++----------------------- kernel/cgroup_freezer.c | 2 +- kernel/cgroup_pids.c | 4 ++-- kernel/fork.c | 7 +++---- kernel/sched/core.c | 2 +- 8 files changed, 23 insertions(+), 71 deletions(-) (limited to 'kernel') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 678cd5e4e881..8cfbc9dfd650 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -34,17 +34,12 @@ struct seq_file; /* define the enumeration of all cgroup subsystems */ #define SUBSYS(_x) _x ## _cgrp_id, -#define SUBSYS_TAG(_t) CGROUP_ ## _t, \ - __unused_tag_ ## _t = CGROUP_ ## _t - 1, enum cgroup_subsys_id { #include CGROUP_SUBSYS_COUNT, }; -#undef SUBSYS_TAG #undef SUBSYS -#define CGROUP_CANFORK_COUNT (CGROUP_CANFORK_END - CGROUP_CANFORK_START) - /* bits in struct cgroup_subsys_state flags field */ enum { CSS_NO_REF = (1 << 0), /* no reference counting for this css */ @@ -424,9 +419,9 @@ struct cgroup_subsys { int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset); - int (*can_fork)(struct task_struct *task, void **priv_p); - void (*cancel_fork)(struct task_struct *task, void *priv); - void (*fork)(struct task_struct *task, void *priv); + int (*can_fork)(struct task_struct *task); + void (*cancel_fork)(struct task_struct *task); + void (*fork)(struct task_struct *task); void (*exit)(struct task_struct *task); void (*free)(struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); @@ -512,7 +507,6 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) #else /* CONFIG_CGROUPS */ -#define CGROUP_CANFORK_COUNT 0 #define CGROUP_SUBSYS_COUNT 0 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index cb91b44f5f78..2b3e231448ca 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -96,12 +96,9 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); void cgroup_fork(struct task_struct *p); -extern int cgroup_can_fork(struct task_struct *p, - void *ss_priv[CGROUP_CANFORK_COUNT]); -extern void cgroup_cancel_fork(struct task_struct *p, - void *ss_priv[CGROUP_CANFORK_COUNT]); -extern void cgroup_post_fork(struct task_struct *p, - void *old_ss_priv[CGROUP_CANFORK_COUNT]); +extern int cgroup_can_fork(struct task_struct *p); +extern void cgroup_cancel_fork(struct task_struct *p); +extern void cgroup_post_fork(struct task_struct *p); void cgroup_exit(struct task_struct *p); void cgroup_free(struct task_struct *p); @@ -539,13 +536,9 @@ static inline int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) { return -EINVAL; } static inline void cgroup_fork(struct task_struct *p) {} -static inline int cgroup_can_fork(struct task_struct *p, - void *ss_priv[CGROUP_CANFORK_COUNT]) -{ return 0; } -static inline void cgroup_cancel_fork(struct task_struct *p, - void *ss_priv[CGROUP_CANFORK_COUNT]) {} -static inline void cgroup_post_fork(struct task_struct *p, - void *ss_priv[CGROUP_CANFORK_COUNT]) {} +static inline int cgroup_can_fork(struct task_struct *p) { return 0; } +static inline void cgroup_cancel_fork(struct task_struct *p) {} +static inline void cgroup_post_fork(struct task_struct *p) {} static inline void cgroup_exit(struct task_struct *p) {} static inline void cgroup_free(struct task_struct *p) {} diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 1a96fdaa33d5..0df0336acee9 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -6,14 +6,8 @@ /* * This file *must* be included with SUBSYS() defined. - * SUBSYS_TAG() is a noop if undefined. */ -#ifndef SUBSYS_TAG -#define __TMP_SUBSYS_TAG -#define SUBSYS_TAG(_x) -#endif - #if IS_ENABLED(CONFIG_CPUSETS) SUBSYS(cpuset) #endif @@ -58,17 +52,10 @@ SUBSYS(net_prio) SUBSYS(hugetlb) #endif -/* - * Subsystems that implement the can_fork() family of callbacks. - */ -SUBSYS_TAG(CANFORK_START) - #if IS_ENABLED(CONFIG_CGROUP_PIDS) SUBSYS(pids) #endif -SUBSYS_TAG(CANFORK_END) - /* * The following subsystems are not supported on the default hierarchy. */ @@ -76,11 +63,6 @@ SUBSYS_TAG(CANFORK_END) SUBSYS(debug) #endif -#ifdef __TMP_SUBSYS_TAG -#undef __TMP_SUBSYS_TAG -#undef SUBSYS_TAG -#endif - /* * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ad35ac033d9b..7f2f007397fe 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5488,19 +5488,6 @@ static const struct file_operations proc_cgroupstats_operations = { .release = single_release, }; -static void **subsys_canfork_priv_p(void *ss_priv[CGROUP_CANFORK_COUNT], int i) -{ - if (CGROUP_CANFORK_START <= i && i < CGROUP_CANFORK_END) - return &ss_priv[i - CGROUP_CANFORK_START]; - return NULL; -} - -static void *subsys_canfork_priv(void *ss_priv[CGROUP_CANFORK_COUNT], int i) -{ - void **private = subsys_canfork_priv_p(ss_priv, i); - return private ? *private : NULL; -} - /** * cgroup_fork - initialize cgroup related fields during copy_process() * @child: pointer to task_struct of forking parent process. @@ -5523,14 +5510,13 @@ void cgroup_fork(struct task_struct *child) * returns an error, the fork aborts with that error code. This allows for * a cgroup subsystem to conditionally allow or deny new forks. */ -int cgroup_can_fork(struct task_struct *child, - void *ss_priv[CGROUP_CANFORK_COUNT]) +int cgroup_can_fork(struct task_struct *child) { struct cgroup_subsys *ss; int i, j, ret; for_each_subsys_which(ss, i, &have_canfork_callback) { - ret = ss->can_fork(child, subsys_canfork_priv_p(ss_priv, i)); + ret = ss->can_fork(child); if (ret) goto out_revert; } @@ -5542,7 +5528,7 @@ out_revert: if (j >= i) break; if (ss->cancel_fork) - ss->cancel_fork(child, subsys_canfork_priv(ss_priv, j)); + ss->cancel_fork(child); } return ret; @@ -5555,15 +5541,14 @@ out_revert: * This calls the cancel_fork() callbacks if a fork failed *after* * cgroup_can_fork() succeded. */ -void cgroup_cancel_fork(struct task_struct *child, - void *ss_priv[CGROUP_CANFORK_COUNT]) +void cgroup_cancel_fork(struct task_struct *child) { struct cgroup_subsys *ss; int i; for_each_subsys(ss, i) if (ss->cancel_fork) - ss->cancel_fork(child, subsys_canfork_priv(ss_priv, i)); + ss->cancel_fork(child); } /** @@ -5576,8 +5561,7 @@ void cgroup_cancel_fork(struct task_struct *child, * cgroup_task_iter_start() - to guarantee that the new task ends up on its * list. */ -void cgroup_post_fork(struct task_struct *child, - void *old_ss_priv[CGROUP_CANFORK_COUNT]) +void cgroup_post_fork(struct task_struct *child) { struct cgroup_subsys *ss; int i; @@ -5621,7 +5605,7 @@ void cgroup_post_fork(struct task_struct *child, * and addition to css_set. */ for_each_subsys_which(ss, i, &have_fork_callback) - ss->fork(child, subsys_canfork_priv(old_ss_priv, i)); + ss->fork(child); } /** diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 2d3df82c54f2..1b72d56edce5 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -200,7 +200,7 @@ static void freezer_attach(struct cgroup_taskset *tset) * to do anything as freezer_attach() will put @task into the appropriate * state. */ -static void freezer_fork(struct task_struct *task, void *private) +static void freezer_fork(struct task_struct *task) { struct freezer *freezer; diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c index b50d5a167fda..18107aea2895 100644 --- a/kernel/cgroup_pids.c +++ b/kernel/cgroup_pids.c @@ -209,7 +209,7 @@ static void pids_cancel_attach(struct cgroup_taskset *tset) * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies * on threadgroup_change_begin() held by the copy_process(). */ -static int pids_can_fork(struct task_struct *task, void **priv_p) +static int pids_can_fork(struct task_struct *task) { struct cgroup_subsys_state *css; struct pids_cgroup *pids; @@ -219,7 +219,7 @@ static int pids_can_fork(struct task_struct *task, void **priv_p) return pids_try_charge(pids, 1); } -static void pids_cancel_fork(struct task_struct *task, void *priv) +static void pids_cancel_fork(struct task_struct *task) { struct cgroup_subsys_state *css; struct pids_cgroup *pids; diff --git a/kernel/fork.c b/kernel/fork.c index fce002ee3ddf..ba7d1c037490 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1249,7 +1249,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, { int retval; struct task_struct *p; - void *cgrp_ss_priv[CGROUP_CANFORK_COUNT] = {}; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -1526,7 +1525,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, * between here and cgroup_post_fork() if an organisation operation is in * progress. */ - retval = cgroup_can_fork(p, cgrp_ss_priv); + retval = cgroup_can_fork(p); if (retval) goto bad_fork_free_pid; @@ -1608,7 +1607,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, write_unlock_irq(&tasklist_lock); proc_fork_connector(p); - cgroup_post_fork(p, cgrp_ss_priv); + cgroup_post_fork(p); threadgroup_change_end(current); perf_event_fork(p); @@ -1618,7 +1617,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, return p; bad_fork_cancel_cgroup: - cgroup_cancel_fork(p, cgrp_ss_priv); + cgroup_cancel_fork(p); bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a9db4819e586..b7d2271cd948 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8212,7 +8212,7 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) sched_offline_group(tg); } -static void cpu_cgroup_fork(struct task_struct *task, void *private) +static void cpu_cgroup_fork(struct task_struct *task) { sched_move_task(task); } -- cgit v1.2.3-58-ga151 From fccd3af57100027e5330079819987aade07631ad Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Sun, 13 Dec 2015 22:13:08 +0200 Subject: cgroup_pids: fix a typo. This patch fixes a typo in pids_charge() method. Signed-off-by: Rami Rosen Signed-off-by: Tejun Heo --- kernel/cgroup_pids.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c index 18107aea2895..303097b37429 100644 --- a/kernel/cgroup_pids.c +++ b/kernel/cgroup_pids.c @@ -134,7 +134,7 @@ static void pids_charge(struct pids_cgroup *pids, int num) * * This function follows the set limit. It will fail if the charge would cause * the new value to exceed the hierarchical limit. Returns 0 if the charge - * succeded, otherwise -EAGAIN. + * succeeded, otherwise -EAGAIN. */ static int pids_try_charge(struct pids_cgroup *pids, int num) { -- cgit v1.2.3-58-ga151 From a5ae989957cbc3f3b7bc40677bd9e459c0917528 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 29 Dec 2015 14:53:56 -0500 Subject: cgroup: demote subsystem init messages to KERN_DEBUG These are noisy during boot and not all that interesting. Signed-off-by: Tejun Heo --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 6b33631251b9..122ec5543335 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5169,7 +5169,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) { struct cgroup_subsys_state *css; - printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); + pr_debug("Initializing cgroup subsys %s\n", ss->name); mutex_lock(&cgroup_mutex); -- cgit v1.2.3-58-ga151 From 2cfa2b191e56fd4c5c6ffb72e1d1e82ed225676d Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Sat, 9 Jan 2016 23:33:06 +0200 Subject: cgroup: fix a typo. This patch fixes a typo in a comment in cgroup.c. Signed-off-by: Rami Rosen Signed-off-by: Tejun Heo --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 122ec5543335..effb6366fd08 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4037,7 +4037,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) goto out_err; /* - * Migrate tasks one-by-one until @form is empty. This fails iff + * Migrate tasks one-by-one until @from is empty. This fails iff * ->can_attach() fails. */ do { -- cgit v1.2.3-58-ga151