diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-12 14:08:19 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-12 14:08:19 -0700 |
commit | 7b47a9e7c8f672b6fb0b77fca11a63a8a77f5a91 (patch) | |
tree | cf05645120ba2323c36acefdea6e62addf320f8c /fs | |
parent | dbc2fba3fc46084f502aec53183995a632998dcd (diff) | |
parent | c99c2171fc61476afac0dfb59fb2c447a01fb1e0 (diff) |
Merge branch 'work.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs mount infrastructure updates from Al Viro:
"The rest of core infrastructure; no new syscalls in that pile, but the
old parts are switched to new infrastructure. At that point
conversions of individual filesystems can happen independently; some
are done here (afs, cgroup, procfs, etc.), there's also a large series
outside of that pile dealing with NFS (quite a bit of option-parsing
stuff is getting used there - it's one of the most convoluted
filesystems in terms of mount-related logics), but NFS bits are the
next cycle fodder.
It got seriously simplified since the last cycle; documentation is
probably the weakest bit at the moment - I considered dropping the
commit introducing Documentation/filesystems/mount_api.txt (cutting
the size increase by quarter ;-), but decided that it would be better
to fix it up after -rc1 instead.
That pile allows to do followup work in independent branches, which
should make life much easier for the next cycle. fs/super.c size
increase is unpleasant; there's a followup series that allows to
shrink it considerably, but I decided to leave that until the next
cycle"
* 'work.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (41 commits)
afs: Use fs_context to pass parameters over automount
afs: Add fs_context support
vfs: Add some logging to the core users of the fs_context log
vfs: Implement logging through fs_context
vfs: Provide documentation for new mount API
vfs: Remove kern_mount_data()
hugetlbfs: Convert to fs_context
cpuset: Use fs_context
kernfs, sysfs, cgroup, intel_rdt: Support fs_context
cgroup: store a reference to cgroup_ns into cgroup_fs_context
cgroup1_get_tree(): separate "get cgroup_root to use" into a separate helper
cgroup_do_mount(): massage calling conventions
cgroup: stash cgroup_root reference into cgroup_fs_context
cgroup2: switch to option-by-option parsing
cgroup1: switch to option-by-option parsing
cgroup: take options parsing into ->parse_monolithic()
cgroup: fold cgroup1_mount() into cgroup1_get_tree()
cgroup: start switching to fs_context
ipc: Convert mqueue fs to fs_context
proc: Add fs_context support to procfs
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Kconfig | 7 | ||||
-rw-r--r-- | fs/Makefile | 2 | ||||
-rw-r--r-- | fs/afs/internal.h | 9 | ||||
-rw-r--r-- | fs/afs/mntpt.c | 149 | ||||
-rw-r--r-- | fs/afs/super.c | 430 | ||||
-rw-r--r-- | fs/afs/volume.c | 4 | ||||
-rw-r--r-- | fs/filesystems.c | 4 | ||||
-rw-r--r-- | fs/fs_context.c | 642 | ||||
-rw-r--r-- | fs/fs_parser.c | 447 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 358 | ||||
-rw-r--r-- | fs/internal.h | 13 | ||||
-rw-r--r-- | fs/kernfs/kernfs-internal.h | 1 | ||||
-rw-r--r-- | fs/kernfs/mount.c | 119 | ||||
-rw-r--r-- | fs/mount.h | 5 | ||||
-rw-r--r-- | fs/namei.c | 4 | ||||
-rw-r--r-- | fs/namespace.c | 395 | ||||
-rw-r--r-- | fs/pnode.c | 5 | ||||
-rw-r--r-- | fs/pnode.h | 3 | ||||
-rw-r--r-- | fs/proc/inode.c | 52 | ||||
-rw-r--r-- | fs/proc/internal.h | 5 | ||||
-rw-r--r-- | fs/proc/root.c | 236 | ||||
-rw-r--r-- | fs/super.c | 344 | ||||
-rw-r--r-- | fs/sysfs/mount.c | 73 |
23 files changed, 2421 insertions, 886 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 2557506051a3..3e6d3101f3ff 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -8,6 +8,13 @@ menu "File systems" config DCACHE_WORD_ACCESS bool +config VALIDATE_FS_PARSER + bool "Validate filesystem parameter description" + default y + help + Enable this to perform validation of the parameter description for a + filesystem when it is registered. + if BLOCK config FS_IOMAP diff --git a/fs/Makefile b/fs/Makefile index 7bff9abecfa4..427fec226fae 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -13,7 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o splice.o sync.o utimes.o d_path.o \ stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ - fs_types.o + fs_types.o fs_context.o fs_parser.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o block_dev.o direct-io.o mpage.o diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8871b9e8645f..bb1f244b2b3a 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -36,15 +36,14 @@ struct pagevec; struct afs_call; -struct afs_mount_params { - bool rwpath; /* T if the parent should be considered R/W */ +struct afs_fs_context { bool force; /* T to force cell type */ bool autocell; /* T if set auto mount operation */ bool dyn_root; /* T if dynamic root */ + bool no_cell; /* T if the source is "none" (for dynroot) */ afs_voltype_t type; /* type of volume requested */ - int volnamesz; /* size of volume name */ + unsigned int volnamesz; /* size of volume name */ const char *volname; /* name of volume to mount */ - struct net *net_ns; /* Network namespace in effect */ struct afs_net *net; /* the AFS net namespace stuff */ struct afs_cell *cell; /* cell in which to find volume */ struct afs_volume *volume; /* volume record */ @@ -1274,7 +1273,7 @@ static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume) return volume; } -extern struct afs_volume *afs_create_volume(struct afs_mount_params *); +extern struct afs_volume *afs_create_volume(struct afs_fs_context *); extern void afs_activate_volume(struct afs_volume *); extern void afs_deactivate_volume(struct afs_volume *); extern void afs_put_volume(struct afs_cell *, struct afs_volume *); diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 2e51c6994148..eecd8b699186 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -17,6 +17,7 @@ #include <linux/mount.h> #include <linux/namei.h> #include <linux/gfp.h> +#include <linux/fs_context.h> #include "internal.h" @@ -47,6 +48,8 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); static unsigned long afs_mntpt_expiry_timeout = 10 * 60; +static const char afs_root_volume[] = "root.cell"; + /* * no valid lookup procedure on this sort of dir */ @@ -68,108 +71,112 @@ static int afs_mntpt_open(struct inode *inode, struct file *file) } /* - * create a vfsmount to be automounted + * Set the parameters for the proposed superblock. */ -static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) +static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) { - struct afs_super_info *as; - struct vfsmount *mnt; - struct afs_vnode *vnode; - struct page *page; - char *devname, *options; - bool rwpath = false; + struct afs_fs_context *ctx = fc->fs_private; + struct afs_super_info *src_as = AFS_FS_S(mntpt->d_sb); + struct afs_vnode *vnode = AFS_FS_I(d_inode(mntpt)); + struct afs_cell *cell; + const char *p; int ret; - _enter("{%pd}", mntpt); - - BUG_ON(!d_inode(mntpt)); - - ret = -ENOMEM; - devname = (char *) get_zeroed_page(GFP_KERNEL); - if (!devname) - goto error_no_devname; - - options = (char *) get_zeroed_page(GFP_KERNEL); - if (!options) - goto error_no_options; + if (fc->net_ns != src_as->net_ns) { + put_net(fc->net_ns); + fc->net_ns = get_net(src_as->net_ns); + } - vnode = AFS_FS_I(d_inode(mntpt)); + if (src_as->volume && src_as->volume->type == AFSVL_RWVOL) { + ctx->type = AFSVL_RWVOL; + ctx->force = true; + } + if (ctx->cell) { + afs_put_cell(ctx->net, ctx->cell); + ctx->cell = NULL; + } if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { /* if the directory is a pseudo directory, use the d_name */ - static const char afs_root_cell[] = ":root.cell."; unsigned size = mntpt->d_name.len; - ret = -ENOENT; - if (size < 2 || size > AFS_MAXCELLNAME) - goto error_no_page; + if (size < 2) + return -ENOENT; + p = mntpt->d_name.name; if (mntpt->d_name.name[0] == '.') { - devname[0] = '%'; - memcpy(devname + 1, mntpt->d_name.name + 1, size - 1); - memcpy(devname + size, afs_root_cell, - sizeof(afs_root_cell)); - rwpath = true; - } else { - devname[0] = '#'; - memcpy(devname + 1, mntpt->d_name.name, size); - memcpy(devname + size + 1, afs_root_cell, - sizeof(afs_root_cell)); + size--; + p++; + ctx->type = AFSVL_RWVOL; + ctx->force = true; } + if (size > AFS_MAXCELLNAME) + return -ENAMETOOLONG; + + cell = afs_lookup_cell(ctx->net, p, size, NULL, false); + if (IS_ERR(cell)) { + pr_err("kAFS: unable to lookup cell '%pd'\n", mntpt); + return PTR_ERR(cell); + } + ctx->cell = cell; + + ctx->volname = afs_root_volume; + ctx->volnamesz = sizeof(afs_root_volume) - 1; } else { /* read the contents of the AFS special symlink */ + struct page *page; loff_t size = i_size_read(d_inode(mntpt)); char *buf; - ret = -EINVAL; + if (src_as->cell) + ctx->cell = afs_get_cell(src_as->cell); + if (size > PAGE_SIZE - 1) - goto error_no_page; + return -EINVAL; page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto error_no_page; - } + if (IS_ERR(page)) + return PTR_ERR(page); if (PageError(page)) { ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt); - goto error; + put_page(page); + return ret; } - buf = kmap_atomic(page); - memcpy(devname, buf, size); - kunmap_atomic(buf); + buf = kmap(page); + ret = vfs_parse_fs_string(fc, "source", buf, size); + kunmap(page); put_page(page); - page = NULL; + if (ret < 0) + return ret; } - /* work out what options we want */ - as = AFS_FS_S(mntpt->d_sb); - if (as->cell) { - memcpy(options, "cell=", 5); - strcpy(options + 5, as->cell->name); - if ((as->volume && as->volume->type == AFSVL_RWVOL) || rwpath) - strcat(options, ",rwpath"); - } + return 0; +} - /* try and do the mount */ - _debug("--- attempting mount %s -o %s ---", devname, options); - mnt = vfs_submount(mntpt, &afs_fs_type, devname, options); - _debug("--- mount result %p ---", mnt); +/* + * create a vfsmount to be automounted + */ +static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) +{ + struct fs_context *fc; + struct vfsmount *mnt; + int ret; - free_page((unsigned long) devname); - free_page((unsigned long) options); - _leave(" = %p", mnt); - return mnt; + BUG_ON(!d_inode(mntpt)); -error: - put_page(page); -error_no_page: - free_page((unsigned long) options); -error_no_options: - free_page((unsigned long) devname); -error_no_devname: - _leave(" = %d", ret); - return ERR_PTR(ret); + fc = fs_context_for_submount(&afs_fs_type, mntpt); + if (IS_ERR(fc)) + return ERR_CAST(fc); + + ret = afs_mntpt_set_params(fc, mntpt); + if (!ret) + mnt = fc_mount(fc); + else + mnt = ERR_PTR(ret); + + put_fs_context(fc); + return mnt; } /* diff --git a/fs/afs/super.c b/fs/afs/super.c index e684f6769b15..5adf012b8e27 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -1,6 +1,6 @@ /* AFS superblock handling * - * Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved. + * Copyright (c) 2002, 2007, 2018 Red Hat, Inc. All rights reserved. * * This software may be freely redistributed under the terms of the * GNU General Public License. @@ -21,7 +21,7 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> -#include <linux/parser.h> +#include <linux/fs_parser.h> #include <linux/statfs.h> #include <linux/sched.h> #include <linux/nsproxy.h> @@ -30,21 +30,22 @@ #include "internal.h" static void afs_i_init_once(void *foo); -static struct dentry *afs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data); static void afs_kill_super(struct super_block *sb); static struct inode *afs_alloc_inode(struct super_block *sb); static void afs_destroy_inode(struct inode *inode); static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); static int afs_show_devname(struct seq_file *m, struct dentry *root); static int afs_show_options(struct seq_file *m, struct dentry *root); +static int afs_init_fs_context(struct fs_context *fc); +static const struct fs_parameter_description afs_fs_parameters; struct file_system_type afs_fs_type = { - .owner = THIS_MODULE, - .name = "afs", - .mount = afs_mount, - .kill_sb = afs_kill_super, - .fs_flags = 0, + .owner = THIS_MODULE, + .name = "afs", + .init_fs_context = afs_init_fs_context, + .parameters = &afs_fs_parameters, + .kill_sb = afs_kill_super, + .fs_flags = 0, }; MODULE_ALIAS_FS("afs"); @@ -63,22 +64,22 @@ static const struct super_operations afs_super_ops = { static struct kmem_cache *afs_inode_cachep; static atomic_t afs_count_active_inodes; -enum { - afs_no_opt, - afs_opt_cell, - afs_opt_dyn, - afs_opt_rwpath, - afs_opt_vol, - afs_opt_autocell, +enum afs_param { + Opt_autocell, + Opt_dyn, + Opt_source, }; -static const match_table_t afs_options_list = { - { afs_opt_cell, "cell=%s" }, - { afs_opt_dyn, "dyn" }, - { afs_opt_rwpath, "rwpath" }, - { afs_opt_vol, "vol=%s" }, - { afs_opt_autocell, "autocell" }, - { afs_no_opt, NULL }, +static const struct fs_parameter_spec afs_param_specs[] = { + fsparam_flag ("autocell", Opt_autocell), + fsparam_flag ("dyn", Opt_dyn), + fsparam_string("source", Opt_source), + {} +}; + +static const struct fs_parameter_description afs_fs_parameters = { + .name = "kAFS", + .specs = afs_param_specs, }; /* @@ -190,84 +191,23 @@ static int afs_show_options(struct seq_file *m, struct dentry *root) } /* - * parse the mount options - * - this function has been shamelessly adapted from the ext3 fs which - * shamelessly adapted it from the msdos fs - */ -static int afs_parse_options(struct afs_mount_params *params, - char *options, const char **devname) -{ - struct afs_cell *cell; - substring_t args[MAX_OPT_ARGS]; - char *p; - int token; - - _enter("%s", options); - - options[PAGE_SIZE - 1] = 0; - - while ((p = strsep(&options, ","))) { - if (!*p) - continue; - - token = match_token(p, afs_options_list, args); - switch (token) { - case afs_opt_cell: - rcu_read_lock(); - cell = afs_lookup_cell_rcu(params->net, - args[0].from, - args[0].to - args[0].from); - rcu_read_unlock(); - if (IS_ERR(cell)) - return PTR_ERR(cell); - afs_put_cell(params->net, params->cell); - params->cell = cell; - break; - - case afs_opt_rwpath: - params->rwpath = true; - break; - - case afs_opt_vol: - *devname = args[0].from; - break; - - case afs_opt_autocell: - params->autocell = true; - break; - - case afs_opt_dyn: - params->dyn_root = true; - break; - - default: - printk(KERN_ERR "kAFS:" - " Unknown or invalid mount option: '%s'\n", p); - return -EINVAL; - } - } - - _leave(" = 0"); - return 0; -} - -/* - * parse a device name to get cell name, volume name, volume type and R/W - * selector - * - this can be one of the following: + * Parse the source name to get cell name, volume name, volume type and R/W + * selector. + * + * This can be one of the following: * "%[cell:]volume[.]" R/W volume - * "#[cell:]volume[.]" R/O or R/W volume (rwpath=0), - * or R/W (rwpath=1) volume + * "#[cell:]volume[.]" R/O or R/W volume (R/O parent), + * or R/W (R/W parent) volume * "%[cell:]volume.readonly" R/O volume * "#[cell:]volume.readonly" R/O volume * "%[cell:]volume.backup" Backup volume * "#[cell:]volume.backup" Backup volume */ -static int afs_parse_device_name(struct afs_mount_params *params, - const char *name) +static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) { + struct afs_fs_context *ctx = fc->fs_private; struct afs_cell *cell; - const char *cellname, *suffix; + const char *cellname, *suffix, *name = param->string; int cellnamesz; _enter(",%s", name); @@ -278,69 +218,149 @@ static int afs_parse_device_name(struct afs_mount_params *params, } if ((name[0] != '%' && name[0] != '#') || !name[1]) { + /* To use dynroot, we don't want to have to provide a source */ + if (strcmp(name, "none") == 0) { + ctx->no_cell = true; + return 0; + } printk(KERN_ERR "kAFS: unparsable volume name\n"); return -EINVAL; } /* determine the type of volume we're looking for */ - params->type = AFSVL_ROVOL; - params->force = false; - if (params->rwpath || name[0] == '%') { - params->type = AFSVL_RWVOL; - params->force = true; + if (name[0] == '%') { + ctx->type = AFSVL_RWVOL; + ctx->force = true; } name++; /* split the cell name out if there is one */ - params->volname = strchr(name, ':'); - if (params->volname) { + ctx->volname = strchr(name, ':'); + if (ctx->volname) { cellname = name; - cellnamesz = params->volname - name; - params->volname++; + cellnamesz = ctx->volname - name; + ctx->volname++; } else { - params->volname = name; + ctx->volname = name; cellname = NULL; cellnamesz = 0; } /* the volume type is further affected by a possible suffix */ - suffix = strrchr(params->volname, '.'); + suffix = strrchr(ctx->volname, '.'); if (suffix) { if (strcmp(suffix, ".readonly") == 0) { - params->type = AFSVL_ROVOL; - params->force = true; + ctx->type = AFSVL_ROVOL; + ctx->force = true; } else if (strcmp(suffix, ".backup") == 0) { - params->type = AFSVL_BACKVOL; - params->force = true; + ctx->type = AFSVL_BACKVOL; + ctx->force = true; } else if (suffix[1] == 0) { } else { suffix = NULL; } } - params->volnamesz = suffix ? - suffix - params->volname : strlen(params->volname); + ctx->volnamesz = suffix ? + suffix - ctx->volname : strlen(ctx->volname); _debug("cell %*.*s [%p]", - cellnamesz, cellnamesz, cellname ?: "", params->cell); + cellnamesz, cellnamesz, cellname ?: "", ctx->cell); /* lookup the cell record */ - if (cellname || !params->cell) { - cell = afs_lookup_cell(params->net, cellname, cellnamesz, + if (cellname) { + cell = afs_lookup_cell(ctx->net, cellname, cellnamesz, NULL, false); if (IS_ERR(cell)) { - printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n", + pr_err("kAFS: unable to lookup cell '%*.*s'\n", cellnamesz, cellnamesz, cellname ?: ""); return PTR_ERR(cell); } - afs_put_cell(params->net, params->cell); - params->cell = cell; + afs_put_cell(ctx->net, ctx->cell); + ctx->cell = cell; } _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s", - params->cell->name, params->cell, - params->volnamesz, params->volnamesz, params->volname, - suffix ?: "-", params->type, params->force ? " FORCE" : ""); + ctx->cell->name, ctx->cell, + ctx->volnamesz, ctx->volnamesz, ctx->volname, + suffix ?: "-", ctx->type, ctx->force ? " FORCE" : ""); + + fc->source = param->string; + param->string = NULL; + return 0; +} + +/* + * Parse a single mount parameter. + */ +static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct fs_parse_result result; + struct afs_fs_context *ctx = fc->fs_private; + int opt; + + opt = fs_parse(fc, &afs_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_source: + return afs_parse_source(fc, param); + + case Opt_autocell: + ctx->autocell = true; + break; + + case Opt_dyn: + ctx->dyn_root = true; + break; + + default: + return -EINVAL; + } + + _leave(" = 0"); + return 0; +} + +/* + * Validate the options, get the cell key and look up the volume. + */ +static int afs_validate_fc(struct fs_context *fc) +{ + struct afs_fs_context *ctx = fc->fs_private; + struct afs_volume *volume; + struct key *key; + + if (!ctx->dyn_root) { + if (ctx->no_cell) { + pr_warn("kAFS: Can only specify source 'none' with -o dyn\n"); + return -EINVAL; + } + + if (!ctx->cell) { + pr_warn("kAFS: No cell specified\n"); + return -EDESTADDRREQ; + } + + /* We try to do the mount securely. */ + key = afs_request_key(ctx->cell); + if (IS_ERR(key)) + return PTR_ERR(key); + + ctx->key = key; + + if (ctx->volume) { + afs_put_volume(ctx->cell, ctx->volume); + ctx->volume = NULL; + } + + volume = afs_create_volume(ctx); + if (IS_ERR(volume)) + return PTR_ERR(volume); + + ctx->volume = volume; + } return 0; } @@ -348,39 +368,34 @@ static int afs_parse_device_name(struct afs_mount_params *params, /* * check a superblock to see if it's the one we're looking for */ -static int afs_test_super(struct super_block *sb, void *data) +static int afs_test_super(struct super_block *sb, struct fs_context *fc) { - struct afs_super_info *as1 = data; + struct afs_fs_context *ctx = fc->fs_private; struct afs_super_info *as = AFS_FS_S(sb); - return (as->net_ns == as1->net_ns && + return (as->net_ns == fc->net_ns && as->volume && - as->volume->vid == as1->volume->vid && + as->volume->vid == ctx->volume->vid && !as->dyn_root); } -static int afs_dynroot_test_super(struct super_block *sb, void *data) +static int afs_dynroot_test_super(struct super_block *sb, struct fs_context *fc) { - struct afs_super_info *as1 = data; struct afs_super_info *as = AFS_FS_S(sb); - return (as->net_ns == as1->net_ns && + return (as->net_ns == fc->net_ns && as->dyn_root); } -static int afs_set_super(struct super_block *sb, void *data) +static int afs_set_super(struct super_block *sb, struct fs_context *fc) { - struct afs_super_info *as = data; - - sb->s_fs_info = as; return set_anon_super(sb, NULL); } /* * fill in the superblock */ -static int afs_fill_super(struct super_block *sb, - struct afs_mount_params *params) +static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) { struct afs_super_info *as = AFS_FS_S(sb); struct afs_fid fid; @@ -412,13 +427,13 @@ static int afs_fill_super(struct super_block *sb, fid.vnode = 1; fid.vnode_hi = 0; fid.unique = 1; - inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL); + inode = afs_iget(sb, ctx->key, &fid, NULL, NULL, NULL); } if (IS_ERR(inode)) return PTR_ERR(inode); - if (params->autocell || params->dyn_root) + if (ctx->autocell || as->dyn_root) set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); ret = -ENOMEM; @@ -443,17 +458,20 @@ error: return ret; } -static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params) +static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc) { + struct afs_fs_context *ctx = fc->fs_private; struct afs_super_info *as; as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); if (as) { - as->net_ns = get_net(params->net_ns); - if (params->dyn_root) + as->net_ns = get_net(fc->net_ns); + if (ctx->dyn_root) { as->dyn_root = true; - else - as->cell = afs_get_cell(params->cell); + } else { + as->cell = afs_get_cell(ctx->cell); + as->volume = __afs_get_volume(ctx->volume); + } } return as; } @@ -475,7 +493,7 @@ static void afs_kill_super(struct super_block *sb) if (as->dyn_root) afs_dynroot_depopulate(sb); - + /* Clear the callback interests (which will do ilookup5) before * deactivating the superblock. */ @@ -488,111 +506,103 @@ static void afs_kill_super(struct super_block *sb) } /* - * get an AFS superblock + * Get an AFS superblock and root directory. */ -static struct dentry *afs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *options) +static int afs_get_tree(struct fs_context *fc) { - struct afs_mount_params params; + struct afs_fs_context *ctx = fc->fs_private; struct super_block *sb; - struct afs_volume *candidate; - struct key *key; struct afs_super_info *as; int ret; - _enter(",,%s,%p", dev_name, options); - - memset(¶ms, 0, sizeof(params)); - - ret = -EINVAL; - if (current->nsproxy->net_ns != &init_net) + ret = afs_validate_fc(fc); + if (ret) goto error; - params.net_ns = current->nsproxy->net_ns; - params.net = afs_net(params.net_ns); - - /* parse the options and device name */ - if (options) { - ret = afs_parse_options(¶ms, options, &dev_name); - if (ret < 0) - goto error; - } - - if (!params.dyn_root) { - ret = afs_parse_device_name(¶ms, dev_name); - if (ret < 0) - goto error; - /* try and do the mount securely */ - key = afs_request_key(params.cell); - if (IS_ERR(key)) { - _leave(" = %ld [key]", PTR_ERR(key)); - ret = PTR_ERR(key); - goto error; - } - params.key = key; - } + _enter(""); /* allocate a superblock info record */ ret = -ENOMEM; - as = afs_alloc_sbi(¶ms); + as = afs_alloc_sbi(fc); if (!as) - goto error_key; - - if (!params.dyn_root) { - /* Assume we're going to need a volume record; at the very - * least we can use it to update the volume record if we have - * one already. This checks that the volume exists within the - * cell. - */ - candidate = afs_create_volume(¶ms); - if (IS_ERR(candidate)) { - ret = PTR_ERR(candidate); - goto error_as; - } - - as->volume = candidate; - } + goto error; + fc->s_fs_info = as; /* allocate a deviceless superblock */ - sb = sget(fs_type, - as->dyn_root ? afs_dynroot_test_super : afs_test_super, - afs_set_super, flags, as); + sb = sget_fc(fc, + as->dyn_root ? afs_dynroot_test_super : afs_test_super, + afs_set_super); if (IS_ERR(sb)) { ret = PTR_ERR(sb); - goto error_as; + goto error; } if (!sb->s_root) { /* initial superblock/root creation */ _debug("create"); - ret = afs_fill_super(sb, ¶ms); + ret = afs_fill_super(sb, ctx); if (ret < 0) goto error_sb; - as = NULL; sb->s_flags |= SB_ACTIVE; } else { _debug("reuse"); ASSERTCMP(sb->s_flags, &, SB_ACTIVE); - afs_destroy_sbi(as); - as = NULL; } - afs_put_cell(params.net, params.cell); - key_put(params.key); + fc->root = dget(sb->s_root); _leave(" = 0 [%p]", sb); - return dget(sb->s_root); + return 0; error_sb: deactivate_locked_super(sb); - goto error_key; -error_as: - afs_destroy_sbi(as); -error_key: - key_put(params.key); error: - afs_put_cell(params.net, params.cell); _leave(" = %d", ret); - return ERR_PTR(ret); + return ret; +} + +static void afs_free_fc(struct fs_context *fc) +{ + struct afs_fs_context *ctx = fc->fs_private; + + afs_destroy_sbi(fc->s_fs_info); + afs_put_volume(ctx->cell, ctx->volume); + afs_put_cell(ctx->net, ctx->cell); + key_put(ctx->key); + kfree(ctx); +} + +static const struct fs_context_operations afs_context_ops = { + .free = afs_free_fc, + .parse_param = afs_parse_param, + .get_tree = afs_get_tree, +}; + +/* + * Set up the filesystem mount context. + */ +static int afs_init_fs_context(struct fs_context *fc) +{ + struct afs_fs_context *ctx; + struct afs_cell *cell; + + ctx = kzalloc(sizeof(struct afs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->type = AFSVL_ROVOL; + ctx->net = afs_net(fc->net_ns); + + /* Default to the workstation cell. */ + rcu_read_lock(); + cell = afs_lookup_cell_rcu(ctx->net, NULL, 0); + rcu_read_unlock(); + if (IS_ERR(cell)) + cell = NULL; + ctx->cell = cell; + + fc->fs_private = ctx; + fc->ops = &afs_context_ops; + return 0; } /* diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 00975ed3640f..f6eba2def0a1 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -21,7 +21,7 @@ static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" }; /* * Allocate a volume record and load it up from a vldb record. */ -static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params, +static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, struct afs_vldb_entry *vldb, unsigned long type_mask) { @@ -113,7 +113,7 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, * - Rule 3: If parent volume is R/W, then only mount R/W volume unless * explicitly told otherwise */ -struct afs_volume *afs_create_volume(struct afs_mount_params *params) +struct afs_volume *afs_create_volume(struct afs_fs_context *params) { struct afs_vldb_entry *vldb; struct afs_volume *volume; diff --git a/fs/filesystems.c b/fs/filesystems.c index b03f57b1105b..9135646e41ac 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/fs_parser.h> /* * Handling of filesystem drivers list. @@ -73,6 +74,9 @@ int register_filesystem(struct file_system_type * fs) int res = 0; struct file_system_type ** p; + if (fs->parameters && !fs_validate_description(fs->parameters)) + return -EINVAL; + BUG_ON(strchr(fs->name, '.')); if (fs->next) return -EBUSY; diff --git a/fs/fs_context.c b/fs/fs_context.c new file mode 100644 index 000000000000..87e3546b9a52 --- /dev/null +++ b/fs/fs_context.c @@ -0,0 +1,642 @@ +/* Provide a way to create a superblock configuration context within the kernel + * that allows a superblock to be set up prior to mounting. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/fs_context.h> +#include <linux/fs_parser.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/nsproxy.h> +#include <linux/slab.h> +#include <linux/magic.h> +#include <linux/security.h> +#include <linux/mnt_namespace.h> +#include <linux/pid_namespace.h> +#include <linux/user_namespace.h> +#include <net/net_namespace.h> +#include "mount.h" +#include "internal.h" + +enum legacy_fs_param { + LEGACY_FS_UNSET_PARAMS, + LEGACY_FS_MONOLITHIC_PARAMS, + LEGACY_FS_INDIVIDUAL_PARAMS, +}; + +struct legacy_fs_context { + char *legacy_data; /* Data page for legacy filesystems */ + size_t data_size; + enum legacy_fs_param param_type; +}; + +static int legacy_init_fs_context(struct fs_context *fc); + +static const struct constant_table common_set_sb_flag[] = { + { "dirsync", SB_DIRSYNC }, + { "lazytime", SB_LAZYTIME }, + { "mand", SB_MANDLOCK }, + { "posixacl", SB_POSIXACL }, + { "ro", SB_RDONLY }, + { "sync", SB_SYNCHRONOUS }, +}; + +static const struct constant_table common_clear_sb_flag[] = { + { "async", SB_SYNCHRONOUS }, + { "nolazytime", SB_LAZYTIME }, + { "nomand", SB_MANDLOCK }, + { "rw", SB_RDONLY }, + { "silent", SB_SILENT }, +}; + +static const char *const forbidden_sb_flag[] = { + "bind", + "dev", + "exec", + "move", + "noatime", + "nodev", + "nodiratime", + "noexec", + "norelatime", + "nostrictatime", + "nosuid", + "private", + "rec", + "relatime", + "remount", + "shared", + "slave", + "strictatime", + "suid", + "unbindable", +}; + +/* + * Check for a common mount option that manipulates s_flags. + */ +static int vfs_parse_sb_flag(struct fs_context *fc, const char *key) +{ + unsigned int token; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(forbidden_sb_flag); i++) + if (strcmp(key, forbidden_sb_flag[i]) == 0) + return -EINVAL; + + token = lookup_constant(common_set_sb_flag, key, 0); + if (token) { + fc->sb_flags |= token; + fc->sb_flags_mask |= token; + return 0; + } + + token = lookup_constant(common_clear_sb_flag, key, 0); + if (token) { + fc->sb_flags &= ~token; + fc->sb_flags_mask |= token; + return 0; + } + + return -ENOPARAM; +} + +/** + * vfs_parse_fs_param - Add a single parameter to a superblock config + * @fc: The filesystem context to modify + * @param: The parameter + * + * A single mount option in string form is applied to the filesystem context + * being set up. Certain standard options (for example "ro") are translated + * into flag bits without going to the filesystem. The active security module + * is allowed to observe and poach options. Any other options are passed over + * to the filesystem to parse. + * + * This may be called multiple times for a context. + * + * Returns 0 on success and a negative error code on failure. In the event of + * failure, supplementary error information may have been set. + */ +int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param) +{ + int ret; + + if (!param->key) + return invalf(fc, "Unnamed parameter\n"); + + ret = vfs_parse_sb_flag(fc, param->key); + if (ret != -ENOPARAM) + return ret; + + ret = security_fs_context_parse_param(fc, param); + if (ret != -ENOPARAM) + /* Param belongs to the LSM or is disallowed by the LSM; so + * don't pass to the FS. + */ + return ret; + + if (fc->ops->parse_param) { + ret = fc->ops->parse_param(fc, param); + if (ret != -ENOPARAM) + return ret; + } + + /* If the filesystem doesn't take any arguments, give it the + * default handling of source. + */ + if (strcmp(param->key, "source") == 0) { + if (param->type != fs_value_is_string) + return invalf(fc, "VFS: Non-string source"); + if (fc->source) + return invalf(fc, "VFS: Multiple sources"); + fc->source = param->string; + param->string = NULL; + return 0; + } + + return invalf(fc, "%s: Unknown parameter '%s'", + fc->fs_type->name, param->key); +} +EXPORT_SYMBOL(vfs_parse_fs_param); + +/** + * vfs_parse_fs_string - Convenience function to just parse a string. + */ +int vfs_parse_fs_string(struct fs_context *fc, const char *key, + const char *value, size_t v_size) +{ + int ret; + + struct fs_parameter param = { + .key = key, + .type = fs_value_is_string, + .size = v_size, + }; + + if (v_size > 0) { + param.string = kmemdup_nul(value, v_size, GFP_KERNEL); + if (!param.string) + return -ENOMEM; + } + + ret = vfs_parse_fs_param(fc, ¶m); + kfree(param.string); + return ret; +} +EXPORT_SYMBOL(vfs_parse_fs_string); + +/** + * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data + * @ctx: The superblock configuration to fill in. + * @data: The data to parse + * + * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be + * called from the ->monolithic_mount_data() fs_context operation. + * + * Returns 0 on success or the error returned by the ->parse_option() fs_context + * operation on failure. + */ +int generic_parse_monolithic(struct fs_context *fc, void *data) +{ + char *options = data, *key; + int ret = 0; + + if (!options) + return 0; + + ret = security_sb_eat_lsm_opts(options, &fc->security); + if (ret) + return ret; + + while ((key = strsep(&options, ",")) != NULL) { + if (*key) { + size_t v_len = 0; + char *value = strchr(key, '='); + + if (value) { + if (value == key) + continue; + *value++ = 0; + v_len = strlen(value); + } + ret = vfs_parse_fs_string(fc, key, value, v_len); + if (ret < 0) + break; + } + } + + return ret; +} +EXPORT_SYMBOL(generic_parse_monolithic); + +/** + * alloc_fs_context - Create a filesystem context. + * @fs_type: The filesystem type. + * @reference: The dentry from which this one derives (or NULL) + * @sb_flags: Filesystem/superblock flags (SB_*) + * @sb_flags_mask: Applicable members of @sb_flags + * @purpose: The purpose that this configuration shall be used for. + * + * Open a filesystem and create a mount context. The mount context is + * initialised with the supplied flags and, if a submount/automount from + * another superblock (referred to by @reference) is supplied, may have + * parameters such as namespaces copied across from that superblock. + */ +static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, + struct dentry *reference, + unsigned int sb_flags, + unsigned int sb_flags_mask, + enum fs_context_purpose purpose) +{ + int (*init_fs_context)(struct fs_context *); + struct fs_context *fc; + int ret = -ENOMEM; + + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); + if (!fc) + return ERR_PTR(-ENOMEM); + + fc->purpose = purpose; + fc->sb_flags = sb_flags; + fc->sb_flags_mask = sb_flags_mask; + fc->fs_type = get_filesystem(fs_type); + fc->cred = get_current_cred(); + fc->net_ns = get_net(current->nsproxy->net_ns); + + switch (purpose) { + case FS_CONTEXT_FOR_MOUNT: + fc->user_ns = get_user_ns(fc->cred->user_ns); + break; + case FS_CONTEXT_FOR_SUBMOUNT: + fc->user_ns = get_user_ns(reference->d_sb->s_user_ns); + break; + case FS_CONTEXT_FOR_RECONFIGURE: + /* We don't pin any namespaces as the superblock's + * subscriptions cannot be changed at this point. + */ + atomic_inc(&reference->d_sb->s_active); + fc->root = dget(reference); + break; + } + + /* TODO: Make all filesystems support this unconditionally */ + init_fs_context = fc->fs_type->init_fs_context; + if (!init_fs_context) + init_fs_context = legacy_init_fs_context; + + ret = init_fs_context(fc); + if (ret < 0) + goto err_fc; + fc->need_free = true; + return fc; + +err_fc: + put_fs_context(fc); + return ERR_PTR(ret); +} + +struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, + unsigned int sb_flags) +{ + return alloc_fs_context(fs_type, NULL, sb_flags, 0, + FS_CONTEXT_FOR_MOUNT); +} +EXPORT_SYMBOL(fs_context_for_mount); + +struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, + unsigned int sb_flags, + unsigned int sb_flags_mask) +{ + return alloc_fs_context(dentry->d_sb->s_type, dentry, sb_flags, + sb_flags_mask, FS_CONTEXT_FOR_RECONFIGURE); +} +EXPORT_SYMBOL(fs_context_for_reconfigure); + +struct fs_context *fs_context_for_submount(struct file_system_type *type, + struct dentry *reference) +{ + return alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT); +} +EXPORT_SYMBOL(fs_context_for_submount); + +void fc_drop_locked(struct fs_context *fc) +{ + struct super_block *sb = fc->root->d_sb; + dput(fc->root); + fc->root = NULL; + deactivate_locked_super(sb); +} + +static void legacy_fs_context_free(struct fs_context *fc); + +/** + * vfs_dup_fc_config: Duplicate a filesystem context. + * @src_fc: The context to copy. + */ +struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc) +{ + struct fs_context *fc; + int ret; + + if (!src_fc->ops->dup) + return ERR_PTR(-EOPNOTSUPP); + + fc = kmemdup(src_fc, sizeof(struct fs_context), GFP_KERNEL); + if (!fc) + return ERR_PTR(-ENOMEM); + + fc->fs_private = NULL; + fc->s_fs_info = NULL; + fc->source = NULL; + fc->security = NULL; + get_filesystem(fc->fs_type); + get_net(fc->net_ns); + get_user_ns(fc->user_ns); + get_cred(fc->cred); + + /* Can't call put until we've called ->dup */ + ret = fc->ops->dup(fc, src_fc); + if (ret < 0) + goto err_fc; + + ret = security_fs_context_dup(fc, src_fc); + if (ret < 0) + goto err_fc; + return fc; + +err_fc: + put_fs_context(fc); + return ERR_PTR(ret); +} +EXPORT_SYMBOL(vfs_dup_fs_context); + +#ifdef CONFIG_PRINTK +/** + * logfc - Log a message to a filesystem context + * @fc: The filesystem context to log to. + * @fmt: The format of the buffer. + */ +void logfc(struct fs_context *fc, const char *fmt, ...) +{ + va_list va; + + va_start(va, fmt); + + switch (fmt[0]) { + case 'w': + vprintk_emit(0, LOGLEVEL_WARNING, NULL, 0, fmt, va); + break; + case 'e': + vprintk_emit(0, LOGLEVEL_ERR, NULL, 0, fmt, va); + break; + default: + vprintk_emit(0, LOGLEVEL_NOTICE, NULL, 0, fmt, va); + break; + } + + pr_cont("\n"); + va_end(va); +} +EXPORT_SYMBOL(logfc); +#endif + +/** + * put_fs_context - Dispose of a superblock configuration context. + * @fc: The context to dispose of. + */ +void put_fs_context(struct fs_context *fc) +{ + struct super_block *sb; + + if (fc->root) { + sb = fc->root->d_sb; + dput(fc->root); + fc->root = NULL; + deactivate_super(sb); + } + + if (fc->need_free && fc->ops && fc->ops->free) + fc->ops->free(fc); + + security_free_mnt_opts(&fc->security); + put_net(fc->net_ns); + put_user_ns(fc->user_ns); + put_cred(fc->cred); + kfree(fc->subtype); + put_filesystem(fc->fs_type); + kfree(fc->source); + kfree(fc); +} +EXPORT_SYMBOL(put_fs_context); + +/* + * Free the config for a filesystem that doesn't support fs_context. + */ +static void legacy_fs_context_free(struct fs_context *fc) +{ + struct legacy_fs_context *ctx = fc->fs_private; + + if (ctx) { + if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS) + kfree(ctx->legacy_data); + kfree(ctx); + } +} + +/* + * Duplicate a legacy config. + */ +static int legacy_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) +{ + struct legacy_fs_context *ctx; + struct legacy_fs_context *src_ctx = src_fc->fs_private; + + ctx = kmemdup(src_ctx, sizeof(*src_ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS) { + ctx->legacy_data = kmemdup(src_ctx->legacy_data, + src_ctx->data_size, GFP_KERNEL); + if (!ctx->legacy_data) { + kfree(ctx); + return -ENOMEM; + } + } + + fc->fs_private = ctx; + return 0; +} + +/* + * Add a parameter to a legacy config. We build up a comma-separated list of + * options. + */ +static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct legacy_fs_context *ctx = fc->fs_private; + unsigned int size = ctx->data_size; + size_t len = 0; + + if (strcmp(param->key, "source") == 0) { + if (param->type != fs_value_is_string) + return invalf(fc, "VFS: Legacy: Non-string source"); + if (fc->source) + return invalf(fc, "VFS: Legacy: Multiple sources"); + fc->source = param->string; + param->string = NULL; + return 0; + } + + if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) && + strcmp(param->key, "subtype") == 0) { + if (param->type != fs_value_is_string) + return invalf(fc, "VFS: Legacy: Non-string subtype"); + if (fc->subtype) + return invalf(fc, "VFS: Legacy: Multiple subtype"); + fc->subtype = param->string; + param->string = NULL; + return 0; + } + + if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS) + return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options"); + + switch (param->type) { + case fs_value_is_string: + len = 1 + param->size; + /* Fall through */ + case fs_value_is_flag: + len += strlen(param->key); + break; + default: + return invalf(fc, "VFS: Legacy: Parameter type for '%s' not supported", + param->key); + } + + if (len > PAGE_SIZE - 2 - size) + return invalf(fc, "VFS: Legacy: Cumulative options too large"); + if (strchr(param->key, ',') || + (param->type == fs_value_is_string && + memchr(param->string, ',', param->size))) + return invalf(fc, "VFS: Legacy: Option '%s' contained comma", + param->key); + if (!ctx->legacy_data) { + ctx->legacy_data = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!ctx->legacy_data) + return -ENOMEM; + } + + ctx->legacy_data[size++] = ','; + len = strlen(param->key); + memcpy(ctx->legacy_data + size, param->key, len); + size += len; + if (param->type == fs_value_is_string) { + ctx->legacy_data[size++] = '='; + memcpy(ctx->legacy_data + size, param->string, param->size); + size += param->size; + } + ctx->legacy_data[size] = '\0'; + ctx->data_size = size; + ctx->param_type = LEGACY_FS_INDIVIDUAL_PARAMS; + return 0; +} + +/* + * Add monolithic mount data. + */ +static int legacy_parse_monolithic(struct fs_context *fc, void *data) +{ + struct legacy_fs_context *ctx = fc->fs_private; + + if (ctx->param_type != LEGACY_FS_UNSET_PARAMS) { + pr_warn("VFS: Can't mix monolithic and individual options\n"); + return -EINVAL; + } + + ctx->legacy_data = data; + ctx->param_type = LEGACY_FS_MONOLITHIC_PARAMS; + if (!ctx->legacy_data) + return 0; + + if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA) + return 0; + return security_sb_eat_lsm_opts(ctx->legacy_data, &fc->security); +} + +/* + * Get a mountable root with the legacy mount command. + */ +static int legacy_get_tree(struct fs_context *fc) +{ + struct legacy_fs_context *ctx = fc->fs_private; + struct super_block *sb; + struct dentry *root; + + root = fc->fs_type->mount(fc->fs_type, fc->sb_flags, + fc->source, ctx->legacy_data); + if (IS_ERR(root)) + return PTR_ERR(root); + + sb = root->d_sb; + BUG_ON(!sb); + + fc->root = root; + return 0; +} + +/* + * Handle remount. + */ +static int legacy_reconfigure(struct fs_context *fc) +{ + struct legacy_fs_context *ctx = fc->fs_private; + struct super_block *sb = fc->root->d_sb; + + if (!sb->s_op->remount_fs) + return 0; + + return sb->s_op->remount_fs(sb, &fc->sb_flags, + ctx ? ctx->legacy_data : NULL); +} + +const struct fs_context_operations legacy_fs_context_ops = { + .free = legacy_fs_context_free, + .dup = legacy_fs_context_dup, + .parse_param = legacy_parse_param, + .parse_monolithic = legacy_parse_monolithic, + .get_tree = legacy_get_tree, + .reconfigure = legacy_reconfigure, +}; + +/* + * Initialise a legacy context for a filesystem that doesn't support + * fs_context. + */ +static int legacy_init_fs_context(struct fs_context *fc) +{ + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL); + if (!fc->fs_private) + return -ENOMEM; + fc->ops = &legacy_fs_context_ops; + return 0; +} + +int parse_monolithic_mount_data(struct fs_context *fc, void *data) +{ + int (*monolithic_mount_data)(struct fs_context *, void *); + + monolithic_mount_data = fc->ops->parse_monolithic; + if (!monolithic_mount_data) + monolithic_mount_data = generic_parse_monolithic; + + return monolithic_mount_data(fc, data); +} diff --git a/fs/fs_parser.c b/fs/fs_parser.c new file mode 100644 index 000000000000..842e8f749db6 --- /dev/null +++ b/fs/fs_parser.c @@ -0,0 +1,447 @@ +/* Filesystem parameter parser. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/export.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> +#include <linux/slab.h> +#include <linux/security.h> +#include <linux/namei.h> +#include "internal.h" + +static const struct constant_table bool_names[] = { + { "0", false }, + { "1", true }, + { "false", false }, + { "no", false }, + { "true", true }, + { "yes", true }, +}; + +/** + * lookup_constant - Look up a constant by name in an ordered table + * @tbl: The table of constants to search. + * @tbl_size: The size of the table. + * @name: The name to look up. + * @not_found: The value to return if the name is not found. + */ +int __lookup_constant(const struct constant_table *tbl, size_t tbl_size, + const char *name, int not_found) +{ + unsigned int i; + + for (i = 0; i < tbl_size; i++) + if (strcmp(name, tbl[i].name) == 0) + return tbl[i].value; + + return not_found; +} +EXPORT_SYMBOL(__lookup_constant); + +static const struct fs_parameter_spec *fs_lookup_key( + const struct fs_parameter_description *desc, + const char *name) +{ + const struct fs_parameter_spec *p; + + if (!desc->specs) + return NULL; + + for (p = desc->specs; p->name; p++) + if (strcmp(p->name, name) == 0) + return p; + + return NULL; +} + +/* + * fs_parse - Parse a filesystem configuration parameter + * @fc: The filesystem context to log errors through. + * @desc: The parameter description to use. + * @param: The parameter. + * @result: Where to place the result of the parse + * + * Parse a filesystem configuration parameter and attempt a conversion for a + * simple parameter for which this is requested. If successful, the determined + * parameter ID is placed into @result->key, the desired type is indicated in + * @result->t and any converted value is placed into an appropriate member of + * the union in @result. + * + * The function returns the parameter number if the parameter was matched, + * -ENOPARAM if it wasn't matched and @desc->ignore_unknown indicated that + * unknown parameters are okay and -EINVAL if there was a conversion issue or + * the parameter wasn't recognised and unknowns aren't okay. + */ +int fs_parse(struct fs_context *fc, + const struct fs_parameter_description *desc, + struct fs_parameter *param, + struct fs_parse_result *result) +{ + const struct fs_parameter_spec *p; + const struct fs_parameter_enum *e; + int ret = -ENOPARAM, b; + + result->has_value = !!param->string; + result->negated = false; + result->uint_64 = 0; + + p = fs_lookup_key(desc, param->key); + if (!p) { + /* If we didn't find something that looks like "noxxx", see if + * "xxx" takes the "no"-form negative - but only if there + * wasn't an value. + */ + if (result->has_value) + goto unknown_parameter; + if (param->key[0] != 'n' || param->key[1] != 'o' || !param->key[2]) + goto unknown_parameter; + + p = fs_lookup_key(desc, param->key + 2); + if (!p) + goto unknown_parameter; + if (!(p->flags & fs_param_neg_with_no)) + goto unknown_parameter; + result->boolean = false; + result->negated = true; + } + + if (p->flags & fs_param_deprecated) + warnf(fc, "%s: Deprecated parameter '%s'", + desc->name, param->key); + + if (result->negated) + goto okay; + + /* Certain parameter types only take a string and convert it. */ + switch (p->type) { + case __fs_param_wasnt_defined: + return -EINVAL; + case fs_param_is_u32: + case fs_param_is_u32_octal: + case fs_param_is_u32_hex: + case fs_param_is_s32: + case fs_param_is_u64: + case fs_param_is_enum: + case fs_param_is_string: + if (param->type != fs_value_is_string) + goto bad_value; + if (!result->has_value) { + if (p->flags & fs_param_v_optional) + goto okay; + goto bad_value; + } + /* Fall through */ + default: + break; + } + + /* Try to turn the type we were given into the type desired by the + * parameter and give an error if we can't. + */ + switch (p->type) { + case fs_param_is_flag: + if (param->type != fs_value_is_flag && + (param->type != fs_value_is_string || result->has_value)) + return invalf(fc, "%s: Unexpected value for '%s'", + desc->name, param->key); + result->boolean = true; + goto okay; + + case fs_param_is_bool: + switch (param->type) { + case fs_value_is_flag: + result->boolean = true; + goto okay; + case fs_value_is_string: + if (param->size == 0) { + result->boolean = true; + goto okay; + } + b = lookup_constant(bool_names, param->string, -1); + if (b == -1) + goto bad_value; + result->boolean = b; + goto okay; + default: + goto bad_value; + } + + case fs_param_is_u32: + ret = kstrtouint(param->string, 0, &result->uint_32); + goto maybe_okay; + case fs_param_is_u32_octal: + ret = kstrtouint(param->string, 8, &result->uint_32); + goto maybe_okay; + case fs_param_is_u32_hex: + ret = kstrtouint(param->string, 16, &result->uint_32); + goto maybe_okay; + case fs_param_is_s32: + ret = kstrtoint(param->string, 0, &result->int_32); + goto maybe_okay; + case fs_param_is_u64: + ret = kstrtoull(param->string, 0, &result->uint_64); + goto maybe_okay; + + case fs_param_is_enum: + for (e = desc->enums; e->name[0]; e++) { + if (e->opt == p->opt && + strcmp(e->name, param->string) == 0) { + result->uint_32 = e->value; + goto okay; + } + } + goto bad_value; + + case fs_param_is_string: + goto okay; + case fs_param_is_blob: + if (param->type != fs_value_is_blob) + goto bad_value; + goto okay; + + case fs_param_is_fd: { + if (param->type != fs_value_is_file) + goto bad_value; + goto okay; + } + + case fs_param_is_blockdev: + case fs_param_is_path: + goto okay; + default: + BUG(); + } + +maybe_okay: + if (ret < 0) + goto bad_value; +okay: + return p->opt; + +bad_value: + return invalf(fc, "%s: Bad value for '%s'", desc->name, param->key); +unknown_parameter: + return -ENOPARAM; +} +EXPORT_SYMBOL(fs_parse); + +/** + * fs_lookup_param - Look up a path referred to by a parameter + * @fc: The filesystem context to log errors through. + * @param: The parameter. + * @want_bdev: T if want a blockdev + * @_path: The result of the lookup + */ +int fs_lookup_param(struct fs_context *fc, + struct fs_parameter *param, + bool want_bdev, + struct path *_path) +{ + struct filename *f; + unsigned int flags = 0; + bool put_f; + int ret; + + switch (param->type) { + case fs_value_is_string: + f = getname_kernel(param->string); + if (IS_ERR(f)) + return PTR_ERR(f); + put_f = true; + break; + case fs_value_is_filename_empty: + flags = LOOKUP_EMPTY; + /* Fall through */ + case fs_value_is_filename: + f = param->name; + put_f = false; + break; + default: + return invalf(fc, "%s: not usable as path", param->key); + } + + ret = filename_lookup(param->dirfd, f, flags, _path, NULL); + if (ret < 0) { + errorf(fc, "%s: Lookup failure for '%s'", param->key, f->name); + goto out; + } + + if (want_bdev && + !S_ISBLK(d_backing_inode(_path->dentry)->i_mode)) { + path_put(_path); + _path->dentry = NULL; + _path->mnt = NULL; + errorf(fc, "%s: Non-blockdev passed as '%s'", + param->key, f->name); + ret = -ENOTBLK; + } + +out: + if (put_f) + putname(f); + return ret; +} +EXPORT_SYMBOL(fs_lookup_param); + +#ifdef CONFIG_VALIDATE_FS_PARSER +/** + * validate_constant_table - Validate a constant table + * @name: Name to use in reporting + * @tbl: The constant table to validate. + * @tbl_size: The size of the table. + * @low: The lowest permissible value. + * @high: The highest permissible value. + * @special: One special permissible value outside of the range. + */ +bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, + int low, int high, int special) +{ + size_t i; + bool good = true; + + if (tbl_size == 0) { + pr_warn("VALIDATE C-TBL: Empty\n"); + return true; + } + + for (i = 0; i < tbl_size; i++) { + if (!tbl[i].name) { + pr_err("VALIDATE C-TBL[%zu]: Null\n", i); + good = false; + } else if (i > 0 && tbl[i - 1].name) { + int c = strcmp(tbl[i-1].name, tbl[i].name); + + if (c == 0) { + pr_err("VALIDATE C-TBL[%zu]: Duplicate %s\n", + i, tbl[i].name); + good = false; + } + if (c > 0) { + pr_err("VALIDATE C-TBL[%zu]: Missorted %s>=%s\n", + i, tbl[i-1].name, tbl[i].name); + good = false; + } + } + + if (tbl[i].value != special && + (tbl[i].value < low || tbl[i].value > high)) { + pr_err("VALIDATE C-TBL[%zu]: %s->%d const out of range (%d-%d)\n", + i, tbl[i].name, tbl[i].value, low, high); + good = false; + } + } + + return good; +} + +/** + * fs_validate_description - Validate a parameter description + * @desc: The parameter description to validate. + */ +bool fs_validate_description(const struct fs_parameter_description *desc) +{ + const struct fs_parameter_spec *param, *p2; + const struct fs_parameter_enum *e; + const char *name = desc->name; + unsigned int nr_params = 0; + bool good = true, enums = false; + + pr_notice("*** VALIDATE %s ***\n", name); + + if (!name[0]) { + pr_err("VALIDATE Parser: No name\n"); + name = "Unknown"; + good = false; + } + + if (desc->specs) { + for (param = desc->specs; param->name; param++) { + enum fs_parameter_type t = param->type; + + /* Check that the type is in range */ + if (t == __fs_param_wasnt_defined || + t >= nr__fs_parameter_type) { + pr_err("VALIDATE %s: PARAM[%s] Bad type %u\n", + name, param->name, t); + good = false; + } else if (t == fs_param_is_enum) { + enums = true; + } + + /* Check for duplicate parameter names */ + for (p2 = desc->specs; p2 < param; p2++) { + if (strcmp(param->name, p2->name) == 0) { + pr_err("VALIDATE %s: PARAM[%s]: Duplicate\n", + name, param->name); + good = false; + } + } + } + + nr_params = param - desc->specs; + } + + if (desc->enums) { + if (!nr_params) { + pr_err("VALIDATE %s: Enum table but no parameters\n", + name); + good = false; + goto no_enums; + } + if (!enums) { + pr_err("VALIDATE %s: Enum table but no enum-type values\n", + name); + good = false; + goto no_enums; + } + + for (e = desc->enums; e->name[0]; e++) { + /* Check that all entries in the enum table have at + * least one parameter that uses them. + */ + for (param = desc->specs; param->name; param++) { + if (param->opt == e->opt && + param->type != fs_param_is_enum) { + pr_err("VALIDATE %s: e[%lu] enum val for %s\n", + name, e - desc->enums, param->name); + good = false; + } + } + } + + /* Check that all enum-type parameters have at least one enum + * value in the enum table. + */ + for (param = desc->specs; param->name; param++) { + if (param->type != fs_param_is_enum) + continue; + for (e = desc->enums; e->name[0]; e++) + if (e->opt == param->opt) + break; + if (!e->name[0]) { + pr_err("VALIDATE %s: PARAM[%s] enum with no values\n", + name, param->name); + good = false; + } + } + } else { + if (enums) { + pr_err("VALIDATE %s: enum-type values, but no enum table\n", + name); + good = false; + goto no_enums; + } + } + +no_enums: + return good; +} +#endif /* CONFIG_VALIDATE_FS_PARSER */ diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index b0eef008de67..ec32fece5e1e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -27,7 +27,7 @@ #include <linux/backing-dev.h> #include <linux/hugetlb.h> #include <linux/pagevec.h> -#include <linux/parser.h> +#include <linux/fs_parser.h> #include <linux/mman.h> #include <linux/slab.h> #include <linux/dnotify.h> @@ -45,11 +45,17 @@ const struct file_operations hugetlbfs_file_operations; static const struct inode_operations hugetlbfs_dir_inode_operations; static const struct inode_operations hugetlbfs_inode_operations; -struct hugetlbfs_config { +enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT }; + +struct hugetlbfs_fs_context { struct hstate *hstate; + unsigned long long max_size_opt; + unsigned long long min_size_opt; long max_hpages; long nr_inodes; long min_hpages; + enum hugetlbfs_size_type max_val_type; + enum hugetlbfs_size_type min_val_type; kuid_t uid; kgid_t gid; umode_t mode; @@ -57,22 +63,30 @@ struct hugetlbfs_config { int sysctl_hugetlb_shm_group; -enum { - Opt_size, Opt_nr_inodes, - Opt_mode, Opt_uid, Opt_gid, - Opt_pagesize, Opt_min_size, - Opt_err, +enum hugetlb_param { + Opt_gid, + Opt_min_size, + Opt_mode, + Opt_nr_inodes, + Opt_pagesize, + Opt_size, + Opt_uid, }; -static const match_table_t tokens = { - {Opt_size, "size=%s"}, - {Opt_nr_inodes, "nr_inodes=%s"}, - {Opt_mode, "mode=%o"}, - {Opt_uid, "uid=%u"}, - {Opt_gid, "gid=%u"}, - {Opt_pagesize, "pagesize=%s"}, - {Opt_min_size, "min_size=%s"}, - {Opt_err, NULL}, +static const struct fs_parameter_spec hugetlb_param_specs[] = { + fsparam_u32 ("gid", Opt_gid), + fsparam_string("min_size", Opt_min_size), + fsparam_u32 ("mode", Opt_mode), + fsparam_string("nr_inodes", Opt_nr_inodes), + fsparam_string("pagesize", Opt_pagesize), + fsparam_string("size", Opt_size), + fsparam_u32 ("uid", Opt_uid), + {} +}; + +static const struct fs_parameter_description hugetlb_fs_parameters = { + .name = "hugetlbfs", + .specs = hugetlb_param_specs, }; #ifdef CONFIG_NUMA @@ -708,16 +722,16 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) } static struct inode *hugetlbfs_get_root(struct super_block *sb, - struct hugetlbfs_config *config) + struct hugetlbfs_fs_context *ctx) { struct inode *inode; inode = new_inode(sb); if (inode) { inode->i_ino = get_next_ino(); - inode->i_mode = S_IFDIR | config->mode; - inode->i_uid = config->uid; - inode->i_gid = config->gid; + inode->i_mode = S_IFDIR | ctx->mode; + inode->i_uid = ctx->uid; + inode->i_gid = ctx->gid; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); inode->i_op = &hugetlbfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; @@ -1093,8 +1107,6 @@ static const struct super_operations hugetlbfs_ops = { .show_options = hugetlbfs_show_options, }; -enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT }; - /* * Convert size option passed from command line to number of huge pages * in the pool specified by hstate. Size option could be in bytes @@ -1117,170 +1129,151 @@ hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt, return size_opt; } -static int -hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) +/* + * Parse one mount parameter. + */ +static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { - char *p, *rest; - substring_t args[MAX_OPT_ARGS]; - int option; - unsigned long long max_size_opt = 0, min_size_opt = 0; - enum hugetlbfs_size_type max_val_type = NO_SIZE, min_val_type = NO_SIZE; - - if (!options) + struct hugetlbfs_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; + char *rest; + unsigned long ps; + int opt; + + opt = fs_parse(fc, &hugetlb_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_uid: + ctx->uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(ctx->uid)) + goto bad_val; return 0; - while ((p = strsep(&options, ",")) != NULL) { - int token; - if (!*p) - continue; + case Opt_gid: + ctx->gid = make_kgid(current_user_ns(), result.uint_32); + if (!gid_valid(ctx->gid)) + goto bad_val; + return 0; - token = match_token(p, tokens, args); - switch (token) { - case Opt_uid: - if (match_int(&args[0], &option)) - goto bad_val; - pconfig->uid = make_kuid(current_user_ns(), option); - if (!uid_valid(pconfig->uid)) - goto bad_val; - break; + case Opt_mode: + ctx->mode = result.uint_32 & 01777U; + return 0; - case Opt_gid: - if (match_int(&args[0], &option)) - goto bad_val; - pconfig->gid = make_kgid(current_user_ns(), option); - if (!gid_valid(pconfig->gid)) - goto bad_val; - break; + case Opt_size: + /* memparse() will accept a K/M/G without a digit */ + if (!isdigit(param->string[0])) + goto bad_val; + ctx->max_size_opt = memparse(param->string, &rest); + ctx->max_val_type = SIZE_STD; + if (*rest == '%') + ctx->max_val_type = SIZE_PERCENT; + return 0; - case Opt_mode: - if (match_octal(&args[0], &option)) - goto bad_val; - pconfig->mode = option & 01777U; - break; + case Opt_nr_inodes: + /* memparse() will accept a K/M/G without a digit */ + if (!isdigit(param->string[0])) + goto bad_val; + ctx->nr_inodes = memparse(param->string, &rest); + return 0; - case Opt_size: { - /* memparse() will accept a K/M/G without a digit */ - if (!isdigit(*args[0].from)) - goto bad_val; - max_size_opt = memparse(args[0].from, &rest); - max_val_type = SIZE_STD; - if (*rest == '%') - max_val_type = SIZE_PERCENT; - break; + case Opt_pagesize: + ps = memparse(param->string, &rest); + ctx->hstate = size_to_hstate(ps); + if (!ctx->hstate) { + pr_err("Unsupported page size %lu MB\n", ps >> 20); + return -EINVAL; } + return 0; - case Opt_nr_inodes: - /* memparse() will accept a K/M/G without a digit */ - if (!isdigit(*args[0].from)) - goto bad_val; - pconfig->nr_inodes = memparse(args[0].from, &rest); - break; + case Opt_min_size: + /* memparse() will accept a K/M/G without a digit */ + if (!isdigit(param->string[0])) + goto bad_val; + ctx->min_size_opt = memparse(param->string, &rest); + ctx->min_val_type = SIZE_STD; + if (*rest == '%') + ctx->min_val_type = SIZE_PERCENT; + return 0; - case Opt_pagesize: { - unsigned long ps; - ps = memparse(args[0].from, &rest); - pconfig->hstate = size_to_hstate(ps); - if (!pconfig->hstate) { - pr_err("Unsupported page size %lu MB\n", - ps >> 20); - return -EINVAL; - } - break; - } + default: + return -EINVAL; + } - case Opt_min_size: { - /* memparse() will accept a K/M/G without a digit */ - if (!isdigit(*args[0].from)) - goto bad_val; - min_size_opt = memparse(args[0].from, &rest); - min_val_type = SIZE_STD; - if (*rest == '%') - min_val_type = SIZE_PERCENT; - break; - } +bad_val: + return invalf(fc, "hugetlbfs: Bad value '%s' for mount option '%s'\n", + param->string, param->key); +} - default: - pr_err("Bad mount option: \"%s\"\n", p); - return -EINVAL; - break; - } - } +/* + * Validate the parsed options. + */ +static int hugetlbfs_validate(struct fs_context *fc) +{ + struct hugetlbfs_fs_context *ctx = fc->fs_private; /* * Use huge page pool size (in hstate) to convert the size * options to number of huge pages. If NO_SIZE, -1 is returned. */ - pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, - max_size_opt, max_val_type); - pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, - min_size_opt, min_val_type); + ctx->max_hpages = hugetlbfs_size_to_hpages(ctx->hstate, + ctx->max_size_opt, + ctx->max_val_type); + ctx->min_hpages = hugetlbfs_size_to_hpages(ctx->hstate, + ctx->min_size_opt, + ctx->min_val_type); /* * If max_size was specified, then min_size must be smaller */ - if (max_val_type > NO_SIZE && - pconfig->min_hpages > pconfig->max_hpages) { - pr_err("minimum size can not be greater than maximum size\n"); + if (ctx->max_val_type > NO_SIZE && + ctx->min_hpages > ctx->max_hpages) { + pr_err("Minimum size can not be greater than maximum size\n"); return -EINVAL; } return 0; - -bad_val: - pr_err("Bad value '%s' for mount option '%s'\n", args[0].from, p); - return -EINVAL; } static int -hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) +hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc) { - int ret; - struct hugetlbfs_config config; + struct hugetlbfs_fs_context *ctx = fc->fs_private; struct hugetlbfs_sb_info *sbinfo; - config.max_hpages = -1; /* No limit on size by default */ - config.nr_inodes = -1; /* No limit on number of inodes by default */ - config.uid = current_fsuid(); - config.gid = current_fsgid(); - config.mode = 0755; - config.hstate = &default_hstate; - config.min_hpages = -1; /* No default minimum size */ - ret = hugetlbfs_parse_options(data, &config); - if (ret) - return ret; - sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); if (!sbinfo) return -ENOMEM; sb->s_fs_info = sbinfo; - sbinfo->hstate = config.hstate; spin_lock_init(&sbinfo->stat_lock); - sbinfo->max_inodes = config.nr_inodes; - sbinfo->free_inodes = config.nr_inodes; - sbinfo->spool = NULL; - sbinfo->uid = config.uid; - sbinfo->gid = config.gid; - sbinfo->mode = config.mode; + sbinfo->hstate = ctx->hstate; + sbinfo->max_inodes = ctx->nr_inodes; + sbinfo->free_inodes = ctx->nr_inodes; + sbinfo->spool = NULL; + sbinfo->uid = ctx->uid; + sbinfo->gid = ctx->gid; + sbinfo->mode = ctx->mode; /* * Allocate and initialize subpool if maximum or minimum size is * specified. Any needed reservations (for minimim size) are taken * taken when the subpool is created. */ - if (config.max_hpages != -1 || config.min_hpages != -1) { - sbinfo->spool = hugepage_new_subpool(config.hstate, - config.max_hpages, - config.min_hpages); + if (ctx->max_hpages != -1 || ctx->min_hpages != -1) { + sbinfo->spool = hugepage_new_subpool(ctx->hstate, + ctx->max_hpages, + ctx->min_hpages); if (!sbinfo->spool) goto out_free; } sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_blocksize = huge_page_size(config.hstate); - sb->s_blocksize_bits = huge_page_shift(config.hstate); + sb->s_blocksize = huge_page_size(ctx->hstate); + sb->s_blocksize_bits = huge_page_shift(ctx->hstate); sb->s_magic = HUGETLBFS_MAGIC; sb->s_op = &hugetlbfs_ops; sb->s_time_gran = 1; - sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config)); + sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx)); if (!sb->s_root) goto out_free; return 0; @@ -1290,16 +1283,52 @@ out_free: return -ENOMEM; } -static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int hugetlbfs_get_tree(struct fs_context *fc) +{ + int err = hugetlbfs_validate(fc); + if (err) + return err; + return vfs_get_super(fc, vfs_get_independent_super, hugetlbfs_fill_super); +} + +static void hugetlbfs_fs_context_free(struct fs_context *fc) +{ + kfree(fc->fs_private); +} + +static const struct fs_context_operations hugetlbfs_fs_context_ops = { + .free = hugetlbfs_fs_context_free, + .parse_param = hugetlbfs_parse_param, + .get_tree = hugetlbfs_get_tree, +}; + +static int hugetlbfs_init_fs_context(struct fs_context *fc) { - return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super); + struct hugetlbfs_fs_context *ctx; + + ctx = kzalloc(sizeof(struct hugetlbfs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->max_hpages = -1; /* No limit on size by default */ + ctx->nr_inodes = -1; /* No limit on number of inodes by default */ + ctx->uid = current_fsuid(); + ctx->gid = current_fsgid(); + ctx->mode = 0755; + ctx->hstate = &default_hstate; + ctx->min_hpages = -1; /* No default minimum size */ + ctx->max_val_type = NO_SIZE; + ctx->min_val_type = NO_SIZE; + fc->fs_private = ctx; + fc->ops = &hugetlbfs_fs_context_ops; + return 0; } static struct file_system_type hugetlbfs_fs_type = { - .name = "hugetlbfs", - .mount = hugetlbfs_mount, - .kill_sb = kill_litter_super, + .name = "hugetlbfs", + .init_fs_context = hugetlbfs_init_fs_context, + .parameters = &hugetlb_fs_parameters, + .kill_sb = kill_litter_super, }; static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; @@ -1384,8 +1413,29 @@ out: return file; } +static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h) +{ + struct fs_context *fc; + struct vfsmount *mnt; + + fc = fs_context_for_mount(&hugetlbfs_fs_type, SB_KERNMOUNT); + if (IS_ERR(fc)) { + mnt = ERR_CAST(fc); + } else { + struct hugetlbfs_fs_context *ctx = fc->fs_private; + ctx->hstate = h; + mnt = fc_mount(fc); + put_fs_context(fc); + } + if (IS_ERR(mnt)) + pr_err("Cannot mount internal hugetlbfs for page size %uK", + 1U << (h->order + PAGE_SHIFT - 10)); + return mnt; +} + static int __init init_hugetlbfs_fs(void) { + struct vfsmount *mnt; struct hstate *h; int error; int i; @@ -1408,24 +1458,16 @@ static int __init init_hugetlbfs_fs(void) i = 0; for_each_hstate(h) { - char buf[50]; - unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10); - - snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb); - hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type, - buf); - - if (IS_ERR(hugetlbfs_vfsmount[i])) { - pr_err("Cannot mount internal hugetlbfs for " - "page size %uK", ps_kb); - error = PTR_ERR(hugetlbfs_vfsmount[i]); - hugetlbfs_vfsmount[i] = NULL; + mnt = mount_one_hugetlbfs(h); + if (IS_ERR(mnt) && i == 0) { + error = PTR_ERR(mnt); + goto out; } + hugetlbfs_vfsmount[i] = mnt; i++; } - /* Non default hstates are optional */ - if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx])) - return 0; + + return 0; out: kmem_cache_destroy(hugetlbfs_inode_cachep); diff --git a/fs/internal.h b/fs/internal.h index d410186bc369..6a8b71643af4 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -17,6 +17,7 @@ struct linux_binprm; struct path; struct mount; struct shrink_control; +struct fs_context; /* * block_dev.c @@ -52,8 +53,16 @@ int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, extern void __init chrdev_init(void); /* + * fs_context.c + */ +extern int parse_monolithic_mount_data(struct fs_context *, void *); +extern void fc_drop_locked(struct fs_context *); + +/* * namei.c */ +extern int filename_lookup(int dfd, struct filename *name, unsigned flags, + struct path *path, struct path *root); extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); @@ -99,10 +108,8 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *); /* * super.c */ -extern int do_remount_sb(struct super_block *, int, void *, int); +extern int reconfigure_super(struct fs_context *); extern bool trylock_super(struct super_block *sb); -extern struct dentry *mount_fs(struct file_system_type *, - int, const char *, void *); extern struct super_block *user_get_super(dev_t); /* diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index dba810cd83b1..0b7d197a904c 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -17,6 +17,7 @@ #include <linux/xattr.h> #include <linux/kernfs.h> +#include <linux/fs_context.h> struct kernfs_iattrs { struct iattr ia_iattr; diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index f3ac352699cf..9a4646eecb71 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -22,16 +22,6 @@ struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache; -static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data) -{ - struct kernfs_root *root = kernfs_info(sb)->root; - struct kernfs_syscall_ops *scops = root->syscall_ops; - - if (scops && scops->remount_fs) - return scops->remount_fs(root, flags, data); - return 0; -} - static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) { struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry)); @@ -60,7 +50,6 @@ const struct super_operations kernfs_sops = { .drop_inode = generic_delete_inode, .evict_inode = kernfs_evict_inode, - .remount_fs = kernfs_sop_remount_fs, .show_options = kernfs_sop_show_options, .show_path = kernfs_sop_show_path, }; @@ -222,7 +211,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, } while (true); } -static int kernfs_fill_super(struct super_block *sb, unsigned long magic) +static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc) { struct kernfs_super_info *info = kernfs_info(sb); struct inode *inode; @@ -233,7 +222,7 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic) sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; - sb->s_magic = magic; + sb->s_magic = kfc->magic; sb->s_op = &kernfs_sops; sb->s_xattr = kernfs_xattr_handlers; if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP) @@ -263,21 +252,20 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic) return 0; } -static int kernfs_test_super(struct super_block *sb, void *data) +static int kernfs_test_super(struct super_block *sb, struct fs_context *fc) { struct kernfs_super_info *sb_info = kernfs_info(sb); - struct kernfs_super_info *info = data; + struct kernfs_super_info *info = fc->s_fs_info; return sb_info->root == info->root && sb_info->ns == info->ns; } -static int kernfs_set_super(struct super_block *sb, void *data) +static int kernfs_set_super(struct super_block *sb, struct fs_context *fc) { - int error; - error = set_anon_super(sb, data); - if (!error) - sb->s_fs_info = data; - return error; + struct kernfs_fs_context *kfc = fc->fs_private; + + kfc->ns_tag = NULL; + return set_anon_super_fc(sb, fc); } /** @@ -294,63 +282,60 @@ const void *kernfs_super_ns(struct super_block *sb) } /** - * kernfs_mount_ns - kernfs mount helper - * @fs_type: file_system_type of the fs being mounted - * @flags: mount flags specified for the mount - * @root: kernfs_root of the hierarchy being mounted - * @magic: file system specific magic number - * @new_sb_created: tell the caller if we allocated a new superblock - * @ns: optional namespace tag of the mount + * kernfs_get_tree - kernfs filesystem access/retrieval helper + * @fc: The filesystem context. * - * This is to be called from each kernfs user's file_system_type->mount() - * implementation, which should pass through the specified @fs_type and - * @flags, and specify the hierarchy and namespace tag to mount via @root - * and @ns, respectively. - * - * The return value can be passed to the vfs layer verbatim. + * This is to be called from each kernfs user's fs_context->ops->get_tree() + * implementation, which should set the specified ->@fs_type and ->@flags, and + * specify the hierarchy and namespace tag to mount via ->@root and ->@ns, + * respectively. */ -struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, unsigned long magic, - bool *new_sb_created, const void *ns) +int kernfs_get_tree(struct fs_context *fc) { + struct kernfs_fs_context *kfc = fc->fs_private; struct super_block *sb; struct kernfs_super_info *info; int error; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) - return ERR_PTR(-ENOMEM); + return -ENOMEM; - info->root = root; - info->ns = ns; + info->root = kfc->root; + info->ns = kfc->ns_tag; INIT_LIST_HEAD(&info->node); - sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags, - &init_user_ns, info); - if (IS_ERR(sb) || sb->s_fs_info != info) - kfree(info); + fc->s_fs_info = info; + sb = sget_fc(fc, kernfs_test_super, kernfs_set_super); if (IS_ERR(sb)) - return ERR_CAST(sb); - - if (new_sb_created) - *new_sb_created = !sb->s_root; + return PTR_ERR(sb); if (!sb->s_root) { struct kernfs_super_info *info = kernfs_info(sb); - error = kernfs_fill_super(sb, magic); + kfc->new_sb_created = true; + + error = kernfs_fill_super(sb, kfc); if (error) { deactivate_locked_super(sb); - return ERR_PTR(error); + return error; } sb->s_flags |= SB_ACTIVE; mutex_lock(&kernfs_mutex); - list_add(&info->node, &root->supers); + list_add(&info->node, &info->root->supers); mutex_unlock(&kernfs_mutex); } - return dget(sb->s_root); + fc->root = dget(sb->s_root); + return 0; +} + +void kernfs_free_fs_context(struct fs_context *fc) +{ + /* Note that we don't deal with kfc->ns_tag here. */ + kfree(fc->s_fs_info); + fc->s_fs_info = NULL; } /** @@ -377,36 +362,6 @@ void kernfs_kill_sb(struct super_block *sb) kfree(info); } -/** - * kernfs_pin_sb: try to pin the superblock associated with a kernfs_root - * @kernfs_root: the kernfs_root in question - * @ns: the namespace tag - * - * Pin the superblock so the superblock won't be destroyed in subsequent - * operations. This can be used to block ->kill_sb() which may be useful - * for kernfs users which dynamically manage superblocks. - * - * Returns NULL if there's no superblock associated to this kernfs_root, or - * -EINVAL if the superblock is being freed. - */ -struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns) -{ - struct kernfs_super_info *info; - struct super_block *sb = NULL; - - mutex_lock(&kernfs_mutex); - list_for_each_entry(info, &root->supers, node) { - if (info->ns == ns) { - sb = info->sb; - if (!atomic_inc_not_zero(&info->sb->s_active)) - sb = ERR_PTR(-EINVAL); - break; - } - } - mutex_unlock(&kernfs_mutex); - return sb; -} - void __init kernfs_init(void) { diff --git a/fs/mount.h b/fs/mount.h index f39bc9da4d73..6250de544760 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -146,3 +146,8 @@ static inline bool is_local_mountpoint(struct dentry *dentry) return __is_local_mountpoint(dentry); } + +static inline bool is_anon_ns(struct mnt_namespace *ns) +{ + return ns->seq == 0; +} diff --git a/fs/namei.c b/fs/namei.c index 3662a09830be..dede0147b3f6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2331,8 +2331,8 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path return err; } -static int filename_lookup(int dfd, struct filename *name, unsigned flags, - struct path *path, struct path *root) +int filename_lookup(int dfd, struct filename *name, unsigned flags, + struct path *path, struct path *root) { int retval; struct nameidata nd; diff --git a/fs/namespace.c b/fs/namespace.c index 98a8c182af4f..c9cab307fa77 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -27,6 +27,7 @@ #include <linux/task_work.h> #include <linux/sched/task.h> #include <uapi/linux/mount.h> +#include <linux/fs_context.h> #include "pnode.h" #include "internal.h" @@ -940,38 +941,81 @@ static struct mount *skip_mnt_tree(struct mount *p) return p; } -struct vfsmount * -vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) +/** + * vfs_create_mount - Create a mount for a configured superblock + * @fc: The configuration context with the superblock attached + * + * Create a mount to an already configured superblock. If necessary, the + * caller should invoke vfs_get_tree() before calling this. + * + * Note that this does not attach the mount to anything. + */ +struct vfsmount *vfs_create_mount(struct fs_context *fc) { struct mount *mnt; - struct dentry *root; - if (!type) - return ERR_PTR(-ENODEV); + if (!fc->root) + return ERR_PTR(-EINVAL); - mnt = alloc_vfsmnt(name); + mnt = alloc_vfsmnt(fc->source ?: "none"); if (!mnt) return ERR_PTR(-ENOMEM); - if (flags & SB_KERNMOUNT) + if (fc->sb_flags & SB_KERNMOUNT) mnt->mnt.mnt_flags = MNT_INTERNAL; - root = mount_fs(type, flags, name, data); - if (IS_ERR(root)) { - mnt_free_id(mnt); - free_vfsmnt(mnt); - return ERR_CAST(root); - } + atomic_inc(&fc->root->d_sb->s_active); + mnt->mnt.mnt_sb = fc->root->d_sb; + mnt->mnt.mnt_root = dget(fc->root); + mnt->mnt_mountpoint = mnt->mnt.mnt_root; + mnt->mnt_parent = mnt; - mnt->mnt.mnt_root = root; - mnt->mnt.mnt_sb = root->d_sb; - mnt->mnt_mountpoint = mnt->mnt.mnt_root; - mnt->mnt_parent = mnt; lock_mount_hash(); - list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); + list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); unlock_mount_hash(); return &mnt->mnt; } +EXPORT_SYMBOL(vfs_create_mount); + +struct vfsmount *fc_mount(struct fs_context *fc) +{ + int err = vfs_get_tree(fc); + if (!err) { + up_write(&fc->root->d_sb->s_umount); + return vfs_create_mount(fc); + } + return ERR_PTR(err); +} +EXPORT_SYMBOL(fc_mount); + +struct vfsmount *vfs_kern_mount(struct file_system_type *type, + int flags, const char *name, + void *data) +{ + struct fs_context *fc; + struct vfsmount *mnt; + int ret = 0; + + if (!type) + return ERR_PTR(-EINVAL); + + fc = fs_context_for_mount(type, flags); + if (IS_ERR(fc)) + return ERR_CAST(fc); + + if (name) + ret = vfs_parse_fs_string(fc, "source", + name, strlen(name)); + if (!ret) + ret = parse_monolithic_mount_data(fc, data); + if (!ret) + mnt = fc_mount(fc); + else + mnt = ERR_PTR(ret); + + put_fs_context(fc); + return mnt; +} EXPORT_SYMBOL_GPL(vfs_kern_mount); struct vfsmount * @@ -1013,27 +1057,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, mnt->mnt.mnt_flags = old->mnt.mnt_flags; mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); - /* Don't allow unprivileged users to change mount flags */ - if (flag & CL_UNPRIVILEGED) { - mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; - - if (mnt->mnt.mnt_flags & MNT_READONLY) - mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; - - if (mnt->mnt.mnt_flags & MNT_NODEV) - mnt->mnt.mnt_flags |= MNT_LOCK_NODEV; - - if (mnt->mnt.mnt_flags & MNT_NOSUID) - mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID; - - if (mnt->mnt.mnt_flags & MNT_NOEXEC) - mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC; - } - - /* Don't allow unprivileged users to reveal what is under a mount */ - if ((flag & CL_UNPRIVILEGED) && - (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire))) - mnt->mnt.mnt_flags |= MNT_LOCKED; atomic_inc(&sb->s_active); mnt->mnt.mnt_sb = sb; @@ -1464,6 +1487,29 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) static void shrink_submounts(struct mount *mnt); +static int do_umount_root(struct super_block *sb) +{ + int ret = 0; + + down_write(&sb->s_umount); + if (!sb_rdonly(sb)) { + struct fs_context *fc; + + fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY, + SB_RDONLY); + if (IS_ERR(fc)) { + ret = PTR_ERR(fc); + } else { + ret = parse_monolithic_mount_data(fc, NULL); + if (!ret) + ret = reconfigure_super(fc); + put_fs_context(fc); + } + } + up_write(&sb->s_umount); + return ret; +} + static int do_umount(struct mount *mnt, int flags) { struct super_block *sb = mnt->mnt.mnt_sb; @@ -1529,11 +1575,7 @@ static int do_umount(struct mount *mnt, int flags) */ if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; - down_write(&sb->s_umount); - if (!sb_rdonly(sb)) - retval = do_remount_sb(sb, SB_RDONLY, NULL, 0); - up_write(&sb->s_umount); - return retval; + return do_umount_root(sb); } namespace_lock(); @@ -1839,6 +1881,33 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, return 0; } +static void lock_mnt_tree(struct mount *mnt) +{ + struct mount *p; + + for (p = mnt; p; p = next_mnt(p, mnt)) { + int flags = p->mnt.mnt_flags; + /* Don't allow unprivileged users to change mount flags */ + flags |= MNT_LOCK_ATIME; + + if (flags & MNT_READONLY) + flags |= MNT_LOCK_READONLY; + + if (flags & MNT_NODEV) + flags |= MNT_LOCK_NODEV; + + if (flags & MNT_NOSUID) + flags |= MNT_LOCK_NOSUID; + + if (flags & MNT_NOEXEC) + flags |= MNT_LOCK_NOEXEC; + /* Don't allow unprivileged users to reveal what is under a mount */ + if (list_empty(&p->mnt_expire)) + flags |= MNT_LOCKED; + p->mnt.mnt_flags = flags; + } +} + static void cleanup_group_ids(struct mount *mnt, struct mount *end) { struct mount *p; @@ -1956,6 +2025,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, struct mountpoint *dest_mp, struct path *parent_path) { + struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; HLIST_HEAD(tree_list); struct mnt_namespace *ns = dest_mnt->mnt_ns; struct mountpoint *smp; @@ -2006,6 +2076,9 @@ static int attach_recursive_mnt(struct mount *source_mnt, child->mnt_mountpoint); if (q) mnt_change_mountpoint(child, smp, q); + /* Notice when we are propagating across user namespaces */ + if (child->mnt_parent->mnt_ns->user_ns != user_ns) + lock_mnt_tree(child); commit_tree(child); } put_mountpoint(smp); @@ -2313,7 +2386,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, int err; struct super_block *sb = path->mnt->mnt_sb; struct mount *mnt = real_mount(path->mnt); - void *sec_opts = NULL; + struct fs_context *fc; if (!check_mnt(mnt)) return -EINVAL; @@ -2324,24 +2397,22 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, if (!can_change_locked_flags(mnt, mnt_flags)) return -EPERM; - if (data && !(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)) { - err = security_sb_eat_lsm_opts(data, &sec_opts); - if (err) - return err; - } - err = security_sb_remount(sb, sec_opts); - security_free_mnt_opts(&sec_opts); - if (err) - return err; + fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK); + if (IS_ERR(fc)) + return PTR_ERR(fc); - down_write(&sb->s_umount); - err = -EPERM; - if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { - err = do_remount_sb(sb, sb_flags, data, 0); - if (!err) - set_mount_attributes(mnt, mnt_flags); + err = parse_monolithic_mount_data(fc, data); + if (!err) { + down_write(&sb->s_umount); + err = -EPERM; + if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { + err = reconfigure_super(fc); + if (!err) + set_mount_attributes(mnt, mnt_flags); + } + up_write(&sb->s_umount); } - up_write(&sb->s_umount); + put_fs_context(fc); return err; } @@ -2425,29 +2496,6 @@ out: return err; } -static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) -{ - int err; - const char *subtype = strchr(fstype, '.'); - if (subtype) { - subtype++; - err = -EINVAL; - if (!subtype[0]) - goto err; - } else - subtype = ""; - - mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); - err = -ENOMEM; - if (!mnt->mnt_sb->s_subtype) - goto err; - return mnt; - - err: - mntput(mnt); - return ERR_PTR(err); -} - /* * add a mount into a namespace's mount tree */ @@ -2492,7 +2540,39 @@ unlock: return err; } -static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags); +static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags); + +/* + * Create a new mount using a superblock configuration and request it + * be added to the namespace tree. + */ +static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint, + unsigned int mnt_flags) +{ + struct vfsmount *mnt; + struct super_block *sb = fc->root->d_sb; + int error; + + error = security_sb_kern_mount(sb); + if (!error && mount_too_revealing(sb, &mnt_flags)) + error = -EPERM; + + if (unlikely(error)) { + fc_drop_locked(fc); + return error; + } + + up_write(&sb->s_umount); + + mnt = vfs_create_mount(fc); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + + error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags); + if (error < 0) + mntput(mnt); + return error; +} /* * create a new mount for userspace and request it to be added into the @@ -2502,8 +2582,9 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags, int mnt_flags, const char *name, void *data) { struct file_system_type *type; - struct vfsmount *mnt; - int err; + struct fs_context *fc; + const char *subtype = NULL; + int err = 0; if (!fstype) return -EINVAL; @@ -2512,23 +2593,37 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags, if (!type) return -ENODEV; - mnt = vfs_kern_mount(type, sb_flags, name, data); - if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && - !mnt->mnt_sb->s_subtype) - mnt = fs_set_subtype(mnt, fstype); + if (type->fs_flags & FS_HAS_SUBTYPE) { + subtype = strchr(fstype, '.'); + if (subtype) { + subtype++; + if (!*subtype) { + put_filesystem(type); + return -EINVAL; + } + } else { + subtype = ""; + } + } + fc = fs_context_for_mount(type, sb_flags); put_filesystem(type); - if (IS_ERR(mnt)) - return PTR_ERR(mnt); - - if (mount_too_revealing(mnt, &mnt_flags)) { - mntput(mnt); - return -EPERM; - } + if (IS_ERR(fc)) + return PTR_ERR(fc); + + if (subtype) + err = vfs_parse_fs_string(fc, "subtype", + subtype, strlen(subtype)); + if (!err && name) + err = vfs_parse_fs_string(fc, "source", name, strlen(name)); + if (!err) + err = parse_monolithic_mount_data(fc, data); + if (!err) + err = vfs_get_tree(fc); + if (!err) + err = do_new_mount_fc(fc, path, mnt_flags); - err = do_add_mount(real_mount(mnt), path, mnt_flags); - if (err) - mntput(mnt); + put_fs_context(fc); return err; } @@ -2863,7 +2958,8 @@ static void dec_mnt_namespaces(struct ucounts *ucounts) static void free_mnt_ns(struct mnt_namespace *ns) { - ns_free_inum(&ns->ns); + if (!is_anon_ns(ns)) + ns_free_inum(&ns->ns); dec_mnt_namespaces(ns->ucounts); put_user_ns(ns->user_ns); kfree(ns); @@ -2878,7 +2974,7 @@ static void free_mnt_ns(struct mnt_namespace *ns) */ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); -static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) +static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon) { struct mnt_namespace *new_ns; struct ucounts *ucounts; @@ -2888,28 +2984,27 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) if (!ucounts) return ERR_PTR(-ENOSPC); - new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); + new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL); if (!new_ns) { dec_mnt_namespaces(ucounts); return ERR_PTR(-ENOMEM); } - ret = ns_alloc_inum(&new_ns->ns); - if (ret) { - kfree(new_ns); - dec_mnt_namespaces(ucounts); - return ERR_PTR(ret); + if (!anon) { + ret = ns_alloc_inum(&new_ns->ns); + if (ret) { + kfree(new_ns); + dec_mnt_namespaces(ucounts); + return ERR_PTR(ret); + } } new_ns->ns.ops = &mntns_operations; - new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); + if (!anon) + new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); atomic_set(&new_ns->count, 1); - new_ns->root = NULL; INIT_LIST_HEAD(&new_ns->list); init_waitqueue_head(&new_ns->poll); - new_ns->event = 0; new_ns->user_ns = get_user_ns(user_ns); new_ns->ucounts = ucounts; - new_ns->mounts = 0; - new_ns->pending_mounts = 0; return new_ns; } @@ -2933,7 +3028,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, old = ns->root; - new_ns = alloc_mnt_ns(user_ns); + new_ns = alloc_mnt_ns(user_ns, false); if (IS_ERR(new_ns)) return new_ns; @@ -2941,13 +3036,18 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, /* First pass: copy the tree topology */ copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; if (user_ns != ns->user_ns) - copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; + copy_flags |= CL_SHARED_TO_SLAVE; new = copy_tree(old, old->mnt.mnt_root, copy_flags); if (IS_ERR(new)) { namespace_unlock(); free_mnt_ns(new_ns); return ERR_CAST(new); } + if (user_ns != ns->user_ns) { + lock_mount_hash(); + lock_mnt_tree(new); + unlock_mount_hash(); + } new_ns->root = new; list_add_tail(&new_ns->list, &new->mnt_list); @@ -2988,37 +3088,25 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, return new_ns; } -/** - * create_mnt_ns - creates a private namespace and adds a root filesystem - * @mnt: pointer to the new root filesystem mountpoint - */ -static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) -{ - struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns); - if (!IS_ERR(new_ns)) { - struct mount *mnt = real_mount(m); - mnt->mnt_ns = new_ns; - new_ns->root = mnt; - new_ns->mounts++; - list_add(&mnt->mnt_list, &new_ns->list); - } else { - mntput(m); - } - return new_ns; -} - -struct dentry *mount_subtree(struct vfsmount *mnt, const char *name) +struct dentry *mount_subtree(struct vfsmount *m, const char *name) { + struct mount *mnt = real_mount(m); struct mnt_namespace *ns; struct super_block *s; struct path path; int err; - ns = create_mnt_ns(mnt); - if (IS_ERR(ns)) + ns = alloc_mnt_ns(&init_user_ns, true); + if (IS_ERR(ns)) { + mntput(m); return ERR_CAST(ns); + } + mnt->mnt_ns = ns; + ns->root = mnt; + ns->mounts++; + list_add(&mnt->mnt_list, &ns->list); - err = vfs_path_lookup(mnt->mnt_root, mnt, + err = vfs_path_lookup(m->mnt_root, m, name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); put_mnt_ns(ns); @@ -3228,6 +3316,7 @@ out0: static void __init init_mount_tree(void) { struct vfsmount *mnt; + struct mount *m; struct mnt_namespace *ns; struct path root; struct file_system_type *type; @@ -3240,10 +3329,14 @@ static void __init init_mount_tree(void) if (IS_ERR(mnt)) panic("Can't create rootfs"); - ns = create_mnt_ns(mnt); + ns = alloc_mnt_ns(&init_user_ns, false); if (IS_ERR(ns)) panic("Can't allocate initial namespace"); - + m = real_mount(mnt); + m->mnt_ns = ns; + ns->root = m; + ns->mounts = 1; + list_add(&m->mnt_list, &ns->list); init_task.nsproxy->mnt_ns = ns; get_mnt_ns(ns); @@ -3297,10 +3390,10 @@ void put_mnt_ns(struct mnt_namespace *ns) free_mnt_ns(ns); } -struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) +struct vfsmount *kern_mount(struct file_system_type *type) { struct vfsmount *mnt; - mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data); + mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL); if (!IS_ERR(mnt)) { /* * it is a longterm mount, don't release mnt until @@ -3310,7 +3403,7 @@ struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) } return mnt; } -EXPORT_SYMBOL_GPL(kern_mount_data); +EXPORT_SYMBOL_GPL(kern_mount); void kern_unmount(struct vfsmount *mnt) { @@ -3352,7 +3445,8 @@ bool current_chrooted(void) return chrooted; } -static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, +static bool mnt_already_visible(struct mnt_namespace *ns, + const struct super_block *sb, int *new_mnt_flags) { int new_flags = *new_mnt_flags; @@ -3364,7 +3458,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, struct mount *child; int mnt_flags; - if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type) + if (mnt->mnt.mnt_sb->s_type != sb->s_type) continue; /* This mount is not fully visible if it's root directory @@ -3415,7 +3509,7 @@ found: return visible; } -static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags) +static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags) { const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV; struct mnt_namespace *ns = current->nsproxy->mnt_ns; @@ -3425,7 +3519,7 @@ static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags) return false; /* Can this filesystem be too revealing? */ - s_iflags = mnt->mnt_sb->s_iflags; + s_iflags = sb->s_iflags; if (!(s_iflags & SB_I_USERNS_VISIBLE)) return false; @@ -3435,7 +3529,7 @@ static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags) return true; } - return !mnt_already_visible(ns, mnt, new_mnt_flags); + return !mnt_already_visible(ns, sb, new_mnt_flags); } bool mnt_may_suid(struct vfsmount *mnt) @@ -3484,6 +3578,9 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns) !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) return -EPERM; + if (is_anon_ns(mnt_ns)) + return -EINVAL; + if (fs->users != 1) return -EINVAL; diff --git a/fs/pnode.c b/fs/pnode.c index 1100e810d855..7ea6cfb65077 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -214,7 +214,6 @@ static struct mount *next_group(struct mount *m, struct mount *origin) } /* all accesses are serialized by namespace_sem */ -static struct user_namespace *user_ns; static struct mount *last_dest, *first_source, *last_source, *dest_master; static struct mountpoint *mp; static struct hlist_head *list; @@ -260,9 +259,6 @@ static int propagate_one(struct mount *m) type |= CL_MAKE_SHARED; } - /* Notice when we are propagating across user namespaces */ - if (m->mnt_ns->user_ns != user_ns) - type |= CL_UNPRIVILEGED; child = copy_tree(last_source, last_source->mnt.mnt_root, type); if (IS_ERR(child)) return PTR_ERR(child); @@ -303,7 +299,6 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, * propagate_one(); everything is serialized by namespace_sem, * so globals will do just fine. */ - user_ns = current->nsproxy->mnt_ns->user_ns; last_dest = dest_mnt; first_source = source_mnt; last_source = source_mnt; diff --git a/fs/pnode.h b/fs/pnode.h index dc87e65becd2..3960a83666cf 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -27,8 +27,7 @@ #define CL_MAKE_SHARED 0x08 #define CL_PRIVATE 0x10 #define CL_SHARED_TO_SLAVE 0x20 -#define CL_UNPRIVILEGED 0x40 -#define CL_COPY_MNT_NS_FILE 0x80 +#define CL_COPY_MNT_NS_FILE 0x40 #define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index da649ccd6804..fc7e38def174 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -24,7 +24,6 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/mount.h> -#include <linux/magic.h> #include <linux/uaccess.h> @@ -122,13 +121,12 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root) return 0; } -static const struct super_operations proc_sops = { +const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, .drop_inode = generic_delete_inode, .evict_inode = proc_evict_inode, .statfs = simple_statfs, - .remount_fs = proc_remount, .show_options = proc_show_options, }; @@ -488,51 +486,3 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) pde_put(de); return inode; } - -int proc_fill_super(struct super_block *s, void *data, int silent) -{ - struct pid_namespace *ns = get_pid_ns(s->s_fs_info); - struct inode *root_inode; - int ret; - - if (!proc_parse_options(data, ns)) - return -EINVAL; - - /* User space would break if executables or devices appear on proc */ - s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; - s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; - s->s_blocksize = 1024; - s->s_blocksize_bits = 10; - s->s_magic = PROC_SUPER_MAGIC; - s->s_op = &proc_sops; - s->s_time_gran = 1; - - /* - * procfs isn't actually a stacking filesystem; however, there is - * too much magic going on inside it to permit stacking things on - * top of it - */ - s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; - - /* procfs dentries and inodes don't require IO to create */ - s->s_shrink.seeks = 0; - - pde_get(&proc_root); - root_inode = proc_get_inode(s, &proc_root); - if (!root_inode) { - pr_err("proc_fill_super: get root inode failed\n"); - return -ENOMEM; - } - - s->s_root = d_make_root(root_inode); - if (!s->s_root) { - pr_err("proc_fill_super: allocate dentry failed\n"); - return -ENOMEM; - } - - ret = proc_setup_self(s); - if (ret) { - return ret; - } - return proc_setup_thread_self(s); -} diff --git a/fs/proc/internal.h b/fs/proc/internal.h index ea575375f210..d1671e97f7fe 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -207,13 +207,12 @@ struct pde_opener { struct completion *c; } __randomize_layout; extern const struct inode_operations proc_link_inode_operations; - extern const struct inode_operations proc_pid_link_inode_operations; +extern const struct super_operations proc_sops; void proc_init_kmemcache(void); void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); -extern int proc_fill_super(struct super_block *, void *data, int flags); extern void proc_entry_rundown(struct proc_dir_entry *); /* @@ -271,10 +270,8 @@ static inline void proc_tty_init(void) {} * root.c */ extern struct proc_dir_entry proc_root; -extern int proc_parse_options(char *options, struct pid_namespace *pid); extern void proc_self_init(void); -extern int proc_remount(struct super_block *, int *, char *); /* * task_[no]mmu.c diff --git a/fs/proc/root.c b/fs/proc/root.c index 621e6ec322ca..8b145e7b9661 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -19,86 +19,178 @@ #include <linux/module.h> #include <linux/bitops.h> #include <linux/user_namespace.h> +#include <linux/fs_context.h> #include <linux/mount.h> #include <linux/pid_namespace.h> -#include <linux/parser.h> +#include <linux/fs_parser.h> #include <linux/cred.h> +#include <linux/magic.h> +#include <linux/slab.h> #include "internal.h" -enum { - Opt_gid, Opt_hidepid, Opt_err, +struct proc_fs_context { + struct pid_namespace *pid_ns; + unsigned int mask; + int hidepid; + int gid; }; -static const match_table_t tokens = { - {Opt_hidepid, "hidepid=%u"}, - {Opt_gid, "gid=%u"}, - {Opt_err, NULL}, +enum proc_param { + Opt_gid, + Opt_hidepid, }; -int proc_parse_options(char *options, struct pid_namespace *pid) +static const struct fs_parameter_spec proc_param_specs[] = { + fsparam_u32("gid", Opt_gid), + fsparam_u32("hidepid", Opt_hidepid), + {} +}; + +static const struct fs_parameter_description proc_fs_parameters = { + .name = "proc", + .specs = proc_param_specs, +}; + +static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) { - char *p; - substring_t args[MAX_OPT_ARGS]; - int option; - - if (!options) - return 1; - - while ((p = strsep(&options, ",")) != NULL) { - int token; - if (!*p) - continue; - - args[0].to = args[0].from = NULL; - token = match_token(p, tokens, args); - switch (token) { - case Opt_gid: - if (match_int(&args[0], &option)) - return 0; - pid->pid_gid = make_kgid(current_user_ns(), option); - break; - case Opt_hidepid: - if (match_int(&args[0], &option)) - return 0; - if (option < HIDEPID_OFF || - option > HIDEPID_INVISIBLE) { - pr_err("proc: hidepid value must be between 0 and 2.\n"); - return 0; - } - pid->hide_pid = option; - break; - default: - pr_err("proc: unrecognized mount option \"%s\" " - "or missing value\n", p); - return 0; - } + struct proc_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, &proc_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_gid: + ctx->gid = result.uint_32; + break; + + case Opt_hidepid: + ctx->hidepid = result.uint_32; + if (ctx->hidepid < HIDEPID_OFF || + ctx->hidepid > HIDEPID_INVISIBLE) + return invalf(fc, "proc: hidepid value must be between 0 and 2.\n"); + break; + + default: + return -EINVAL; } - return 1; + ctx->mask |= 1 << opt; + return 0; } -int proc_remount(struct super_block *sb, int *flags, char *data) +static void proc_apply_options(struct super_block *s, + struct fs_context *fc, + struct pid_namespace *pid_ns, + struct user_namespace *user_ns) { + struct proc_fs_context *ctx = fc->fs_private; + + if (ctx->mask & (1 << Opt_gid)) + pid_ns->pid_gid = make_kgid(user_ns, ctx->gid); + if (ctx->mask & (1 << Opt_hidepid)) + pid_ns->hide_pid = ctx->hidepid; +} + +static int proc_fill_super(struct super_block *s, struct fs_context *fc) +{ + struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info); + struct inode *root_inode; + int ret; + + proc_apply_options(s, fc, pid_ns, current_user_ns()); + + /* User space would break if executables or devices appear on proc */ + s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; + s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; + s->s_blocksize = 1024; + s->s_blocksize_bits = 10; + s->s_magic = PROC_SUPER_MAGIC; + s->s_op = &proc_sops; + s->s_time_gran = 1; + + /* + * procfs isn't actually a stacking filesystem; however, there is + * too much magic going on inside it to permit stacking things on + * top of it + */ + s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; + + /* procfs dentries and inodes don't require IO to create */ + s->s_shrink.seeks = 0; + + pde_get(&proc_root); + root_inode = proc_get_inode(s, &proc_root); + if (!root_inode) { + pr_err("proc_fill_super: get root inode failed\n"); + return -ENOMEM; + } + + s->s_root = d_make_root(root_inode); + if (!s->s_root) { + pr_err("proc_fill_super: allocate dentry failed\n"); + return -ENOMEM; + } + + ret = proc_setup_self(s); + if (ret) { + return ret; + } + return proc_setup_thread_self(s); +} + +static int proc_reconfigure(struct fs_context *fc) +{ + struct super_block *sb = fc->root->d_sb; struct pid_namespace *pid = sb->s_fs_info; sync_filesystem(sb); - return !proc_parse_options(data, pid); + + proc_apply_options(sb, fc, pid, current_user_ns()); + return 0; } -static struct dentry *proc_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int proc_get_tree(struct fs_context *fc) { - struct pid_namespace *ns; + struct proc_fs_context *ctx = fc->fs_private; - if (flags & SB_KERNMOUNT) { - ns = data; - data = NULL; - } else { - ns = task_active_pid_ns(current); - } + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(ctx->pid_ns->user_ns); + fc->s_fs_info = ctx->pid_ns; + return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super); +} - return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super); +static void proc_fs_context_free(struct fs_context *fc) +{ + struct proc_fs_context *ctx = fc->fs_private; + + if (ctx->pid_ns) + put_pid_ns(ctx->pid_ns); + kfree(ctx); +} + +static const struct fs_context_operations proc_fs_context_ops = { + .free = proc_fs_context_free, + .parse_param = proc_parse_param, + .get_tree = proc_get_tree, + .reconfigure = proc_reconfigure, +}; + +static int proc_init_fs_context(struct fs_context *fc) +{ + struct proc_fs_context *ctx; + + ctx = kzalloc(sizeof(struct proc_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->pid_ns = get_pid_ns(task_active_pid_ns(current)); + fc->fs_private = ctx; + fc->ops = &proc_fs_context_ops; + return 0; } static void proc_kill_sb(struct super_block *sb) @@ -115,10 +207,11 @@ static void proc_kill_sb(struct super_block *sb) } static struct file_system_type proc_fs_type = { - .name = "proc", - .mount = proc_mount, - .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "proc", + .init_fs_context = proc_init_fs_context, + .parameters = &proc_fs_parameters, + .kill_sb = proc_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; void __init proc_root_init(void) @@ -156,7 +249,7 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr { if (!proc_pid_lookup(dentry, flags)) return NULL; - + return proc_lookup(dir, dentry, flags); } @@ -209,9 +302,28 @@ struct proc_dir_entry proc_root = { int pid_ns_prepare_proc(struct pid_namespace *ns) { + struct proc_fs_context *ctx; + struct fs_context *fc; struct vfsmount *mnt; - mnt = kern_mount_data(&proc_fs_type, ns); + fc = fs_context_for_mount(&proc_fs_type, SB_KERNMOUNT); + if (IS_ERR(fc)) + return PTR_ERR(fc); + + if (fc->user_ns != ns->user_ns) { + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(ns->user_ns); + } + + ctx = fc->fs_private; + if (ctx->pid_ns != ns) { + put_pid_ns(ctx->pid_ns); + get_pid_ns(ns); + ctx->pid_ns = ns; + } + + mnt = fc_mount(fc); + put_fs_context(fc); if (IS_ERR(mnt)) return PTR_ERR(mnt); diff --git a/fs/super.c b/fs/super.c index 48e25eba8465..583a0124bc39 100644 --- a/fs/super.c +++ b/fs/super.c @@ -35,6 +35,7 @@ #include <linux/fsnotify.h> #include <linux/lockdep.h> #include <linux/user_namespace.h> +#include <linux/fs_context.h> #include <uapi/linux/mount.h> #include "internal.h" @@ -476,6 +477,94 @@ void generic_shutdown_super(struct super_block *sb) EXPORT_SYMBOL(generic_shutdown_super); /** + * sget_fc - Find or create a superblock + * @fc: Filesystem context. + * @test: Comparison callback + * @set: Setup callback + * + * Find or create a superblock using the parameters stored in the filesystem + * context and the two callback functions. + * + * If an extant superblock is matched, then that will be returned with an + * elevated reference count that the caller must transfer or discard. + * + * If no match is made, a new superblock will be allocated and basic + * initialisation will be performed (s_type, s_fs_info and s_id will be set and + * the set() callback will be invoked), the superblock will be published and it + * will be returned in a partially constructed state with SB_BORN and SB_ACTIVE + * as yet unset. + */ +struct super_block *sget_fc(struct fs_context *fc, + int (*test)(struct super_block *, struct fs_context *), + int (*set)(struct super_block *, struct fs_context *)) +{ + struct super_block *s = NULL; + struct super_block *old; + struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns; + int err; + + if (!(fc->sb_flags & SB_KERNMOUNT) && + fc->purpose != FS_CONTEXT_FOR_SUBMOUNT) { + /* Don't allow mounting unless the caller has CAP_SYS_ADMIN + * over the namespace. + */ + if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) { + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + } else { + if (!ns_capable(fc->user_ns, CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + } + } + +retry: + spin_lock(&sb_lock); + if (test) { + hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) { + if (test(old, fc)) + goto share_extant_sb; + } + } + if (!s) { + spin_unlock(&sb_lock); + s = alloc_super(fc->fs_type, fc->sb_flags, user_ns); + if (!s) + return ERR_PTR(-ENOMEM); + goto retry; + } + + s->s_fs_info = fc->s_fs_info; + err = set(s, fc); + if (err) { + s->s_fs_info = NULL; + spin_unlock(&sb_lock); + destroy_unused_super(s); + return ERR_PTR(err); + } + fc->s_fs_info = NULL; + s->s_type = fc->fs_type; + strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id)); + list_add_tail(&s->s_list, &super_blocks); + hlist_add_head(&s->s_instances, &s->s_type->fs_supers); + spin_unlock(&sb_lock); + get_filesystem(s->s_type); + register_shrinker_prepared(&s->s_shrink); + return s; + +share_extant_sb: + if (user_ns != old->s_user_ns) { + spin_unlock(&sb_lock); + destroy_unused_super(s); + return ERR_PTR(-EBUSY); + } + if (!grab_super(old)) + goto retry; + destroy_unused_super(s); + return old; +} +EXPORT_SYMBOL(sget_fc); + +/** * sget_userns - find or create a superblock * @type: filesystem type superblock should belong to * @test: comparison callback @@ -835,28 +924,35 @@ rescan: } /** - * do_remount_sb - asks filesystem to change mount options. - * @sb: superblock in question - * @sb_flags: revised superblock flags - * @data: the rest of options - * @force: whether or not to force the change + * reconfigure_super - asks filesystem to change superblock parameters + * @fc: The superblock and configuration * - * Alters the mount options of a mounted file system. + * Alters the configuration parameters of a live superblock. */ -int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) +int reconfigure_super(struct fs_context *fc) { + struct super_block *sb = fc->root->d_sb; int retval; - int remount_ro; + bool remount_ro = false; + bool force = fc->sb_flags & SB_FORCE; + if (fc->sb_flags_mask & ~MS_RMT_MASK) + return -EINVAL; if (sb->s_writers.frozen != SB_UNFROZEN) return -EBUSY; + retval = security_sb_remount(sb, fc->security); + if (retval) + return retval; + + if (fc->sb_flags_mask & SB_RDONLY) { #ifdef CONFIG_BLOCK - if (!(sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) - return -EACCES; + if (!(fc->sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) + return -EACCES; #endif - remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); + remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb); + } if (remount_ro) { if (!hlist_empty(&sb->s_pins)) { @@ -867,13 +963,14 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) return 0; if (sb->s_writers.frozen != SB_UNFROZEN) return -EBUSY; - remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); + remount_ro = !sb_rdonly(sb); } } shrink_dcache_sb(sb); - /* If we are remounting RDONLY and current sb is read/write, - make sure there are no rw files opened */ + /* If we are reconfiguring to RDONLY and current sb is read/write, + * make sure there are no files open for writing. + */ if (remount_ro) { if (force) { sb->s_readonly_remount = 1; @@ -885,8 +982,8 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) } } - if (sb->s_op->remount_fs) { - retval = sb->s_op->remount_fs(sb, &sb_flags, data); + if (fc->ops->reconfigure) { + retval = fc->ops->reconfigure(fc); if (retval) { if (!force) goto cancel_readonly; @@ -895,7 +992,9 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) sb->s_type->name, retval); } } - sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (sb_flags & MS_RMT_MASK); + + WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) | + (fc->sb_flags & fc->sb_flags_mask))); /* Needs to be ordered wrt mnt_is_readonly() */ smp_wmb(); sb->s_readonly_remount = 0; @@ -922,10 +1021,15 @@ static void do_emergency_remount_callback(struct super_block *sb) down_write(&sb->s_umount); if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) && !sb_rdonly(sb)) { - /* - * What lock protects sb->s_flags?? - */ - do_remount_sb(sb, SB_RDONLY, NULL, 1); + struct fs_context *fc; + + fc = fs_context_for_reconfigure(sb->s_root, + SB_RDONLY | SB_FORCE, SB_RDONLY); + if (!IS_ERR(fc)) { + if (parse_monolithic_mount_data(fc, NULL) == 0) + (void)reconfigure_super(fc); + put_fs_context(fc); + } } up_write(&sb->s_umount); } @@ -1087,6 +1191,89 @@ struct dentry *mount_ns(struct file_system_type *fs_type, EXPORT_SYMBOL(mount_ns); +int set_anon_super_fc(struct super_block *sb, struct fs_context *fc) +{ + return set_anon_super(sb, NULL); +} +EXPORT_SYMBOL(set_anon_super_fc); + +static int test_keyed_super(struct super_block *sb, struct fs_context *fc) +{ + return sb->s_fs_info == fc->s_fs_info; +} + +static int test_single_super(struct super_block *s, struct fs_context *fc) +{ + return 1; +} + +/** + * vfs_get_super - Get a superblock with a search key set in s_fs_info. + * @fc: The filesystem context holding the parameters + * @keying: How to distinguish superblocks + * @fill_super: Helper to initialise a new superblock + * + * Search for a superblock and create a new one if not found. The search + * criterion is controlled by @keying. If the search fails, a new superblock + * is created and @fill_super() is called to initialise it. + * + * @keying can take one of a number of values: + * + * (1) vfs_get_single_super - Only one superblock of this type may exist on the + * system. This is typically used for special system filesystems. + * + * (2) vfs_get_keyed_super - Multiple superblocks may exist, but they must have + * distinct keys (where the key is in s_fs_info). Searching for the same + * key again will turn up the superblock for that key. + * + * (3) vfs_get_independent_super - Multiple superblocks may exist and are + * unkeyed. Each call will get a new superblock. + * + * A permissions check is made by sget_fc() unless we're getting a superblock + * for a kernel-internal mount or a submount. + */ +int vfs_get_super(struct fs_context *fc, + enum vfs_get_super_keying keying, + int (*fill_super)(struct super_block *sb, + struct fs_context *fc)) +{ + int (*test)(struct super_block *, struct fs_context *); + struct super_block *sb; + + switch (keying) { + case vfs_get_single_super: + test = test_single_super; + break; + case vfs_get_keyed_super: + test = test_keyed_super; + break; + case vfs_get_independent_super: + test = NULL; + break; + default: + BUG(); + } + + sb = sget_fc(fc, test, set_anon_super_fc); + if (IS_ERR(sb)) + return PTR_ERR(sb); + + if (!sb->s_root) { + int err = fill_super(sb, fc); + if (err) { + deactivate_locked_super(sb); + return err; + } + + sb->s_flags |= SB_ACTIVE; + } + + BUG_ON(fc->root); + fc->root = dget(sb->s_root); + return 0; +} +EXPORT_SYMBOL(vfs_get_super); + #ifdef CONFIG_BLOCK static int set_bdev_super(struct super_block *s, void *data) { @@ -1212,6 +1399,31 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, } EXPORT_SYMBOL(mount_nodev); +static int reconfigure_single(struct super_block *s, + int flags, void *data) +{ + struct fs_context *fc; + int ret; + + /* The caller really need to be passing fc down into mount_single(), + * then a chunk of this can be removed. [Bollocks -- AV] + * Better yet, reconfiguration shouldn't happen, but rather the second + * mount should be rejected if the parameters are not compatible. + */ + fc = fs_context_for_reconfigure(s->s_root, flags, MS_RMT_MASK); + if (IS_ERR(fc)) + return PTR_ERR(fc); + + ret = parse_monolithic_mount_data(fc, data); + if (ret < 0) + goto out; + + ret = reconfigure_super(fc); +out: + put_fs_context(fc); + return ret; +} + static int compare_single(struct super_block *s, void *p) { return 1; @@ -1229,41 +1441,64 @@ struct dentry *mount_single(struct file_system_type *fs_type, return ERR_CAST(s); if (!s->s_root) { error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); - if (error) { - deactivate_locked_super(s); - return ERR_PTR(error); - } - s->s_flags |= SB_ACTIVE; + if (!error) + s->s_flags |= SB_ACTIVE; } else { - do_remount_sb(s, flags, data, 0); + error = reconfigure_single(s, flags, data); + } + if (unlikely(error)) { + deactivate_locked_super(s); + return ERR_PTR(error); } return dget(s->s_root); } EXPORT_SYMBOL(mount_single); -struct dentry * -mount_fs(struct file_system_type *type, int flags, const char *name, void *data) +/** + * vfs_get_tree - Get the mountable root + * @fc: The superblock configuration context. + * + * The filesystem is invoked to get or create a superblock which can then later + * be used for mounting. The filesystem places a pointer to the root to be + * used for mounting in @fc->root. + */ +int vfs_get_tree(struct fs_context *fc) { - struct dentry *root; struct super_block *sb; - int error = -ENOMEM; - void *sec_opts = NULL; + int error; - if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { - error = security_sb_eat_lsm_opts(data, &sec_opts); - if (error) - return ERR_PTR(error); + if (fc->fs_type->fs_flags & FS_REQUIRES_DEV && !fc->source) { + errorf(fc, "Filesystem requires source device"); + return -ENOENT; } - root = type->mount(type, flags, name, data); - if (IS_ERR(root)) { - error = PTR_ERR(root); - goto out_free_secdata; + if (fc->root) + return -EBUSY; + + /* Get the mountable root in fc->root, with a ref on the root and a ref + * on the superblock. + */ + error = fc->ops->get_tree(fc); + if (error < 0) + return error; + + if (!fc->root) { + pr_err("Filesystem %s get_tree() didn't set fc->root\n", + fc->fs_type->name); + /* We don't know what the locking state of the superblock is - + * if there is a superblock. + */ + BUG(); } - sb = root->d_sb; - BUG_ON(!sb); + + sb = fc->root->d_sb; WARN_ON(!sb->s_bdi); + if (fc->subtype && !sb->s_subtype) { + sb->s_subtype = fc->subtype; + fc->subtype = NULL; + } + /* * Write barrier is for super_cache_count(). We place it before setting * SB_BORN as the data dependency between the two functions is the @@ -1273,14 +1508,10 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) smp_wmb(); sb->s_flags |= SB_BORN; - error = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL); - if (error) - goto out_sb; - - if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT))) { - error = security_sb_kern_mount(sb); - if (error) - goto out_sb; + error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL); + if (unlikely(error)) { + fc_drop_locked(fc); + return error; } /* @@ -1290,18 +1521,11 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) * violate this rule. */ WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " - "negative value (%lld)\n", type->name, sb->s_maxbytes); + "negative value (%lld)\n", fc->fs_type->name, sb->s_maxbytes); - up_write(&sb->s_umount); - security_free_mnt_opts(&sec_opts); - return root; -out_sb: - dput(root); - deactivate_locked_super(sb); -out_free_secdata: - security_free_mnt_opts(&sec_opts); - return ERR_PTR(error); + return 0; } +EXPORT_SYMBOL(vfs_get_tree); /* * Setup private BDI for given superblock. It gets automatically cleaned up diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 92682fcc41f6..4cb21b558a85 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -13,34 +13,69 @@ #include <linux/magic.h> #include <linux/mount.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/user_namespace.h> +#include <linux/fs_context.h> +#include <net/net_namespace.h> #include "sysfs.h" static struct kernfs_root *sysfs_root; struct kernfs_node *sysfs_root_kn; -static struct dentry *sysfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int sysfs_get_tree(struct fs_context *fc) { - struct dentry *root; - void *ns; - bool new_sb = false; + struct kernfs_fs_context *kfc = fc->fs_private; + int ret; - if (!(flags & SB_KERNMOUNT)) { + ret = kernfs_get_tree(fc); + if (ret) + return ret; + + if (kfc->new_sb_created) + fc->root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE; + return 0; +} + +static void sysfs_fs_context_free(struct fs_context *fc) +{ + struct kernfs_fs_context *kfc = fc->fs_private; + + if (kfc->ns_tag) + kobj_ns_drop(KOBJ_NS_TYPE_NET, kfc->ns_tag); + kernfs_free_fs_context(fc); + kfree(kfc); +} + +static const struct fs_context_operations sysfs_fs_context_ops = { + .free = sysfs_fs_context_free, + .get_tree = sysfs_get_tree, +}; + +static int sysfs_init_fs_context(struct fs_context *fc) +{ + struct kernfs_fs_context *kfc; + struct net *netns; + + if (!(fc->sb_flags & SB_KERNMOUNT)) { if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) - return ERR_PTR(-EPERM); + return -EPERM; } - ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); - root = kernfs_mount_ns(fs_type, flags, sysfs_root, - SYSFS_MAGIC, &new_sb, ns); - if (!new_sb) - kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); - else if (!IS_ERR(root)) - root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE; + kfc = kzalloc(sizeof(struct kernfs_fs_context), GFP_KERNEL); + if (!kfc) + return -ENOMEM; - return root; + kfc->ns_tag = netns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); + kfc->root = sysfs_root; + kfc->magic = SYSFS_MAGIC; + fc->fs_private = kfc; + fc->ops = &sysfs_fs_context_ops; + if (fc->user_ns) + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(netns->user_ns); + fc->global = true; + return 0; } static void sysfs_kill_sb(struct super_block *sb) @@ -52,10 +87,10 @@ static void sysfs_kill_sb(struct super_block *sb) } static struct file_system_type sysfs_fs_type = { - .name = "sysfs", - .mount = sysfs_mount, - .kill_sb = sysfs_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "sysfs", + .init_fs_context = sysfs_init_fs_context, + .kill_sb = sysfs_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; int __init sysfs_init(void) |