From 64964528b24ea390824f0e5ce9d34b8d39b28cde Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 1 Nov 2014 00:37:32 -0400 Subject: make proc_ns_operations work with struct ns_common * instead of void * We can do that now. And kill ->inum(), while we are at it - all instances are identical. Signed-off-by: Al Viro --- fs/proc/inode.c | 2 +- fs/proc/namespaces.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 333080d7a671..43b703c6cd3b 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -33,7 +33,7 @@ static void proc_evict_inode(struct inode *inode) struct proc_dir_entry *de; struct ctl_table_header *head; const struct proc_ns_operations *ns_ops; - void *ns; + struct ns_common *ns; truncate_inode_pages_final(&inode->i_data); clear_inode(inode); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 89026095f2b5..995e8e98237d 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -64,7 +64,7 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb, struct inode *inode; struct proc_inode *ei; struct qstr qname = { .name = "", }; - void *ns; + struct ns_common *ns; ns = ns_ops->get(task); if (!ns) @@ -76,7 +76,7 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb, return ERR_PTR(-ENOMEM); } - inode = iget_locked(sb, ns_ops->inum(ns)); + inode = iget_locked(sb, ns->inum); if (!inode) { dput(dentry); ns_ops->put(ns); @@ -144,7 +144,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl struct proc_inode *ei = PROC_I(inode); const struct proc_ns_operations *ns_ops = ei->ns.ns_ops; struct task_struct *task; - void *ns; + struct ns_common *ns; char name[50]; int res = -EACCES; @@ -160,7 +160,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl if (!ns) goto out_put_task; - snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); + snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns->inum); res = readlink_copy(buffer, buflen, name); ns_ops->put(ns); out_put_task: -- cgit v1.2.3-58-ga151 From 33c429405a2c8d9e42afb9fee88a63cfb2de1e98 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 1 Nov 2014 02:32:53 -0400 Subject: copy address of proc_ns_ops into ns_common Signed-off-by: Al Viro --- fs/namespace.c | 1 + fs/proc/inode.c | 6 ++---- include/linux/ns_common.h | 3 +++ init/version.c | 3 +++ ipc/msgutil.c | 3 +++ ipc/namespace.c | 1 + kernel/nsproxy.c | 8 ++++---- kernel/pid.c | 3 +++ kernel/pid_namespace.c | 1 + kernel/user.c | 3 +++ kernel/user_namespace.c | 1 + kernel/utsname.c | 2 ++ net/core/net_namespace.c | 9 +++++++-- 13 files changed, 34 insertions(+), 10 deletions(-) (limited to 'fs/proc') diff --git a/fs/namespace.c b/fs/namespace.c index 30738d200866..f815218f92d3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2672,6 +2672,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) kfree(new_ns); return ERR_PTR(ret); } + new_ns->ns.ops = &mntns_operations; new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); atomic_set(&new_ns->count, 1); new_ns->root = NULL; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 43b703c6cd3b..a212996e0987 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -32,7 +32,6 @@ static void proc_evict_inode(struct inode *inode) { struct proc_dir_entry *de; struct ctl_table_header *head; - const struct proc_ns_operations *ns_ops; struct ns_common *ns; truncate_inode_pages_final(&inode->i_data); @@ -51,10 +50,9 @@ static void proc_evict_inode(struct inode *inode) sysctl_head_put(head); } /* Release any associated namespace */ - ns_ops = PROC_I(inode)->ns.ns_ops; ns = PROC_I(inode)->ns.ns; - if (ns_ops && ns) - ns_ops->put(ns); + if (ns && ns->ops) + ns->ops->put(ns); } static struct kmem_cache * proc_inode_cachep; diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index e7db1cd54047..ce23cf4bbe69 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -1,7 +1,10 @@ #ifndef _LINUX_NS_COMMON_H #define _LINUX_NS_COMMON_H +struct proc_ns_operations; + struct ns_common { + const struct proc_ns_operations *ops; unsigned int inum; }; diff --git a/init/version.c b/init/version.c index e23dbdabb26b..fe41a63efed6 100644 --- a/init/version.c +++ b/init/version.c @@ -36,6 +36,9 @@ struct uts_namespace init_uts_ns = { }, .user_ns = &init_user_ns, .ns.inum = PROC_UTS_INIT_INO, +#ifdef CONFIG_UTS_NS + .ns.ops = &utsns_operations, +#endif }; EXPORT_SYMBOL_GPL(init_uts_ns); diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 5930471a2902..2b491590ebab 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -32,6 +32,9 @@ struct ipc_namespace init_ipc_ns = { .count = ATOMIC_INIT(1), .user_ns = &init_user_ns, .ns.inum = PROC_IPC_INIT_INO, +#ifdef CONFIG_IPC_NS + .ns.ops = &ipcns_operations, +#endif }; atomic_t nr_ipc_ns = ATOMIC_INIT(1); diff --git a/ipc/namespace.c b/ipc/namespace.c index bcdd7a5c122a..382e2aa42d8a 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -31,6 +31,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, kfree(ns); return ERR_PTR(err); } + ns->ns.ops = &ipcns_operations; atomic_set(&ns->count, 1); err = mq_init_ns(ns); diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index ef42d0ab3115..87c37221cb7f 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -220,11 +220,11 @@ void exit_task_namespaces(struct task_struct *p) SYSCALL_DEFINE2(setns, int, fd, int, nstype) { - const struct proc_ns_operations *ops; struct task_struct *tsk = current; struct nsproxy *new_nsproxy; struct proc_ns *ei; struct file *file; + struct ns_common *ns; int err; file = proc_ns_fget(fd); @@ -233,8 +233,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) err = -EINVAL; ei = get_proc_ns(file_inode(file)); - ops = ei->ns_ops; - if (nstype && (ops->type != nstype)) + ns = ei->ns; + if (nstype && (ns->ops->type != nstype)) goto out; new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs); @@ -243,7 +243,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) goto out; } - err = ops->install(new_nsproxy, ei->ns); + err = ns->ops->install(new_nsproxy, ns); if (err) { free_nsproxy(new_nsproxy); goto out; diff --git a/kernel/pid.c b/kernel/pid.c index 3650698cf1dc..c17a993a4d2a 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -80,6 +80,9 @@ struct pid_namespace init_pid_ns = { .child_reaper = &init_task, .user_ns = &init_user_ns, .ns.inum = PROC_PID_INIT_INO, +#ifdef CONFIG_PID_NS + .ns.ops = &pidns_operations, +#endif }; EXPORT_SYMBOL_GPL(init_pid_ns); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 5aa9158a84d5..e1bafe3b47bb 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -108,6 +108,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns err = ns_alloc_inum(&ns->ns); if (err) goto out_free_map; + ns->ns.ops = &pidns_operations; kref_init(&ns->kref); ns->level = level; diff --git a/kernel/user.c b/kernel/user.c index a7ca84bad8e6..69b800aebf13 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -51,6 +51,9 @@ struct user_namespace init_user_ns = { .owner = GLOBAL_ROOT_UID, .group = GLOBAL_ROOT_GID, .ns.inum = PROC_USER_INIT_INO, +#ifdef CONFIG_USER_NS + .ns.ops = &userns_operations, +#endif #ifdef CONFIG_PERSISTENT_KEYRINGS .persistent_keyring_register_sem = __RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem), diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 6bf8177768e5..1491ad00388f 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -91,6 +91,7 @@ int create_user_ns(struct cred *new) kmem_cache_free(user_ns_cachep, ns); return ret; } + ns->ns.ops = &userns_operations; atomic_set(&ns->count, 1); /* Leave the new->user_ns reference with the new user namespace. */ diff --git a/kernel/utsname.c b/kernel/utsname.c index c2a2b321d88a..831ea7108232 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -48,6 +48,8 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, return ERR_PTR(err); } + ns->ns.ops = &utsns_operations; + down_read(&uts_sem); memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); ns->user_ns = get_user_ns(user_ns); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index da775f53f3fd..4d4acaf7b498 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -339,6 +339,7 @@ struct net *get_net_ns_by_fd(int fd) { struct proc_ns *ei; struct file *file; + struct ns_common *ns; struct net *net; file = proc_ns_fget(fd); @@ -346,8 +347,9 @@ struct net *get_net_ns_by_fd(int fd) return ERR_CAST(file); ei = get_proc_ns(file_inode(file)); - if (ei->ns_ops == &netns_operations) - net = get_net(container_of(ei->ns, struct net, ns)); + ns = ei->ns; + if (ns->ops == &netns_operations) + net = get_net(container_of(ns, struct net, ns)); else net = ERR_PTR(-EINVAL); @@ -386,6 +388,9 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_pid); static __net_init int net_ns_net_init(struct net *net) { +#ifdef CONFIG_NET_NS + net->ns.ops = &netns_operations; +#endif return ns_alloc_inum(&net->ns); } -- cgit v1.2.3-58-ga151 From f77c80142e1afe6d5c16975ca5d7d1fc324b16f9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 1 Nov 2014 03:13:17 -0400 Subject: bury struct proc_ns in fs/proc a) make get_proc_ns() return a pointer to struct ns_common b) mirror ns_ops in dentry->d_fsdata of ns dentries, so that is_mnt_ns_file() could get away with fewer dereferences. That way struct proc_ns becomes invisible outside of fs/proc/*.c Signed-off-by: Al Viro --- fs/namespace.c | 13 ++----------- fs/proc/internal.h | 5 +++++ fs/proc/namespaces.c | 7 ++++--- include/linux/proc_ns.h | 9 ++------- kernel/nsproxy.c | 4 +--- net/core/net_namespace.c | 4 +--- 6 files changed, 15 insertions(+), 27 deletions(-) (limited to 'fs/proc') diff --git a/fs/namespace.c b/fs/namespace.c index f815218f92d3..9dfb4cac0c41 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1570,16 +1570,7 @@ static bool is_mnt_ns_file(struct dentry *dentry) { /* Is this a proxy for a mount namespace? */ struct inode *inode = dentry->d_inode; - struct proc_ns *ei; - - if (!proc_ns_inode(inode)) - return false; - - ei = get_proc_ns(inode); - if (ei->ns_ops != &mntns_operations) - return false; - - return true; + return proc_ns_inode(inode) && dentry->d_fsdata == &mntns_operations; } struct mnt_namespace *to_mnt_ns(struct ns_common *ns) @@ -1596,7 +1587,7 @@ static bool mnt_ns_loop(struct dentry *dentry) if (!is_mnt_ns_file(dentry)) return false; - mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode)->ns); + mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode)); return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index aa7a0ee182e1..0fabc48d905f 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -57,6 +57,11 @@ union proc_op { struct task_struct *task); }; +struct proc_ns { + struct ns_common *ns; + const struct proc_ns_operations *ns_ops; +}; + struct proc_inode { struct pid *pid; int fd; diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 995e8e98237d..18fc1cf899de 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -45,7 +45,7 @@ static const struct inode_operations ns_inode_operations = { static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) { struct inode *inode = dentry->d_inode; - const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops; + const struct proc_ns_operations *ns_ops = dentry->d_fsdata; return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", ns_ops->name, inode->i_ino); @@ -75,6 +75,7 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb, ns_ops->put(ns); return ERR_PTR(-ENOMEM); } + dentry->d_fsdata = (void *)ns_ops; inode = iget_locked(sb, ns->inum); if (!inode) { @@ -286,9 +287,9 @@ out_invalid: return ERR_PTR(-EINVAL); } -struct proc_ns *get_proc_ns(struct inode *inode) +struct ns_common *get_proc_ns(struct inode *inode) { - return &PROC_I(inode)->ns; + return PROC_I(inode)->ns.ns; } bool proc_ns_inode(struct inode *inode) diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index f5780ee7f8f7..2837ff41cfe3 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -16,11 +16,6 @@ struct proc_ns_operations { int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); }; -struct proc_ns { - struct ns_common *ns; - const struct proc_ns_operations *ns_ops; -}; - extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; @@ -44,7 +39,7 @@ enum { extern int pid_ns_prepare_proc(struct pid_namespace *ns); extern void pid_ns_release_proc(struct pid_namespace *ns); extern struct file *proc_ns_fget(int fd); -extern struct proc_ns *get_proc_ns(struct inode *); +extern struct ns_common *get_proc_ns(struct inode *); extern int proc_alloc_inum(unsigned int *pino); extern void proc_free_inum(unsigned int inum); extern bool proc_ns_inode(struct inode *inode); @@ -59,7 +54,7 @@ static inline struct file *proc_ns_fget(int fd) return ERR_PTR(-EINVAL); } -static inline struct proc_ns *get_proc_ns(struct inode *inode) { return NULL; } +static inline struct ns_common *get_proc_ns(struct inode *inode) { return NULL; } static inline int proc_alloc_inum(unsigned int *inum) { diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 87c37221cb7f..49746c81ad8d 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -222,7 +222,6 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) { struct task_struct *tsk = current; struct nsproxy *new_nsproxy; - struct proc_ns *ei; struct file *file; struct ns_common *ns; int err; @@ -232,8 +231,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) return PTR_ERR(file); err = -EINVAL; - ei = get_proc_ns(file_inode(file)); - ns = ei->ns; + ns = get_proc_ns(file_inode(file)); if (nstype && (ns->ops->type != nstype)) goto out; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 4d4acaf7b498..ce780c722e48 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -337,7 +337,6 @@ EXPORT_SYMBOL_GPL(__put_net); struct net *get_net_ns_by_fd(int fd) { - struct proc_ns *ei; struct file *file; struct ns_common *ns; struct net *net; @@ -346,8 +345,7 @@ struct net *get_net_ns_by_fd(int fd) if (IS_ERR(file)) return ERR_CAST(file); - ei = get_proc_ns(file_inode(file)); - ns = ei->ns; + ns = get_proc_ns(file_inode(file)); if (ns->ops == &netns_operations) net = get_net(container_of(ns, struct net, ns)); else -- cgit v1.2.3-58-ga151 From e149ed2b805fefdccf7ccdfc19eca22fdd4514ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 1 Nov 2014 10:57:28 -0400 Subject: take the targets of /proc/*/ns/* symlinks to separate fs New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now. It's not mountable (not even registered, so it's not in /proc/filesystems, etc.). Files on it *are* bindable - we explicitly permit that in do_loopback(). This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well. get_proc_ns() is a macro now (it's simply returning ->i_private; would have been an inline, if not for header ordering headache). proc_ns_inode() is an ex-parrot. The interface used in procfs is ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops). Dentries and inodes are never hashed; a non-counting reference to dentry is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path() if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details of that mechanism. As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt; it does nd_jump_link() on a consistent pair it gets from ns_get_path(). Signed-off-by: Al Viro --- fs/Makefile | 2 +- fs/internal.h | 5 ++ fs/namespace.c | 9 ++- fs/nsfs.c | 161 +++++++++++++++++++++++++++++++++++++++++++++ fs/proc/inode.c | 5 -- fs/proc/namespaces.c | 152 ++++-------------------------------------- include/linux/ns_common.h | 1 + include/linux/proc_ns.h | 31 +++++---- include/uapi/linux/magic.h | 1 + init/main.c | 2 + 10 files changed, 208 insertions(+), 161 deletions(-) create mode 100644 fs/nsfs.c (limited to 'fs/proc') diff --git a/fs/Makefile b/fs/Makefile index 34a1b9dea6dd..34393376eaa2 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o splice.o sync.o utimes.o \ - stack.o fs_struct.o statfs.o fs_pin.o + stack.o fs_struct.o statfs.o fs_pin.o nsfs.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o block_dev.o direct-io.o mpage.o diff --git a/fs/internal.h b/fs/internal.h index 757ba2abf21e..e9a61fe67575 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -147,3 +147,8 @@ extern const struct file_operations pipefifo_fops; */ extern void sb_pin_kill(struct super_block *sb); extern void mnt_pin_kill(struct mount *m); + +/* + * fs/nsfs.c + */ +extern struct dentry_operations ns_dentry_operations; diff --git a/fs/namespace.c b/fs/namespace.c index 9dfb4cac0c41..30df6e7dd807 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1569,8 +1569,8 @@ SYSCALL_DEFINE1(oldumount, char __user *, name) static bool is_mnt_ns_file(struct dentry *dentry) { /* Is this a proxy for a mount namespace? */ - struct inode *inode = dentry->d_inode; - return proc_ns_inode(inode) && dentry->d_fsdata == &mntns_operations; + return dentry->d_op == &ns_dentry_operations && + dentry->d_fsdata == &mntns_operations; } struct mnt_namespace *to_mnt_ns(struct ns_common *ns) @@ -2016,7 +2016,10 @@ static int do_loopback(struct path *path, const char *old_name, if (IS_MNT_UNBINDABLE(old)) goto out2; - if (!check_mnt(parent) || !check_mnt(old)) + if (!check_mnt(parent)) + goto out2; + + if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations) goto out2; if (!recurse && has_locked_children(old, old_path.dentry)) diff --git a/fs/nsfs.c b/fs/nsfs.c new file mode 100644 index 000000000000..af1b24fa899d --- /dev/null +++ b/fs/nsfs.c @@ -0,0 +1,161 @@ +#include +#include +#include +#include +#include +#include + +static struct vfsmount *nsfs_mnt; + +static const struct file_operations ns_file_operations = { + .llseek = no_llseek, +}; + +static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) +{ + struct inode *inode = dentry->d_inode; + const struct proc_ns_operations *ns_ops = dentry->d_fsdata; + + return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", + ns_ops->name, inode->i_ino); +} + +static void ns_prune_dentry(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + if (inode) { + struct ns_common *ns = inode->i_private; + atomic_long_set(&ns->stashed, 0); + } +} + +const struct dentry_operations ns_dentry_operations = +{ + .d_prune = ns_prune_dentry, + .d_delete = always_delete_dentry, + .d_dname = ns_dname, +}; + +static void nsfs_evict(struct inode *inode) +{ + struct ns_common *ns = inode->i_private; + clear_inode(inode); + ns->ops->put(ns); +} + +void *ns_get_path(struct path *path, struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct vfsmount *mnt = mntget(nsfs_mnt); + struct qstr qname = { .name = "", }; + struct dentry *dentry; + struct inode *inode; + struct ns_common *ns; + unsigned long d; + +again: + ns = ns_ops->get(task); + if (!ns) { + mntput(mnt); + return ERR_PTR(-ENOENT); + } + rcu_read_lock(); + d = atomic_long_read(&ns->stashed); + if (!d) + goto slow; + dentry = (struct dentry *)d; + if (!lockref_get_not_dead(&dentry->d_lockref)) + goto slow; + rcu_read_unlock(); + ns_ops->put(ns); +got_it: + path->mnt = mnt; + path->dentry = dentry; + return NULL; +slow: + rcu_read_unlock(); + inode = new_inode_pseudo(mnt->mnt_sb); + if (!inode) { + ns_ops->put(ns); + mntput(mnt); + return ERR_PTR(-ENOMEM); + } + inode->i_ino = ns->inum; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_flags |= S_IMMUTABLE; + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_fop = &ns_file_operations; + inode->i_private = ns; + + dentry = d_alloc_pseudo(mnt->mnt_sb, &qname); + if (!dentry) { + iput(inode); + mntput(mnt); + return ERR_PTR(-ENOMEM); + } + d_instantiate(dentry, inode); + dentry->d_fsdata = (void *)ns_ops; + d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); + if (d) { + d_delete(dentry); /* make sure ->d_prune() does nothing */ + dput(dentry); + cpu_relax(); + goto again; + } + goto got_it; +} + +int ns_get_name(char *buf, size_t size, struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct ns_common *ns; + int res = -ENOENT; + ns = ns_ops->get(task); + if (ns) { + res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum); + ns_ops->put(ns); + } + return res; +} + +struct file *proc_ns_fget(int fd) +{ + struct file *file; + + file = fget(fd); + if (!file) + return ERR_PTR(-EBADF); + + if (file->f_op != &ns_file_operations) + goto out_invalid; + + return file; + +out_invalid: + fput(file); + return ERR_PTR(-EINVAL); +} + +static const struct super_operations nsfs_ops = { + .statfs = simple_statfs, + .evict_inode = nsfs_evict, +}; +static struct dentry *nsfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_pseudo(fs_type, "nsfs:", &nsfs_ops, + &ns_dentry_operations, NSFS_MAGIC); +} +static struct file_system_type nsfs = { + .name = "nsfs", + .mount = nsfs_mount, + .kill_sb = kill_anon_super, +}; + +void __init nsfs_init(void) +{ + nsfs_mnt = kern_mount(&nsfs); + if (IS_ERR(nsfs_mnt)) + panic("can't set nsfs up\n"); + nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER; +} diff --git a/fs/proc/inode.c b/fs/proc/inode.c index a212996e0987..57a9be9a6668 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -32,7 +32,6 @@ static void proc_evict_inode(struct inode *inode) { struct proc_dir_entry *de; struct ctl_table_header *head; - struct ns_common *ns; truncate_inode_pages_final(&inode->i_data); clear_inode(inode); @@ -49,10 +48,6 @@ static void proc_evict_inode(struct inode *inode) RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL); sysctl_head_put(head); } - /* Release any associated namespace */ - ns = PROC_I(inode)->ns.ns; - if (ns && ns->ops) - ns->ops->put(ns); } static struct kmem_cache * proc_inode_cachep; diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 18fc1cf899de..aaaac77abad0 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -1,10 +1,6 @@ #include #include -#include #include -#include -#include -#include #include #include #include @@ -34,139 +30,45 @@ static const struct proc_ns_operations *ns_entries[] = { &mntns_operations, }; -static const struct file_operations ns_file_operations = { - .llseek = no_llseek, -}; - -static const struct inode_operations ns_inode_operations = { - .setattr = proc_setattr, -}; - -static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) -{ - struct inode *inode = dentry->d_inode; - const struct proc_ns_operations *ns_ops = dentry->d_fsdata; - - return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", - ns_ops->name, inode->i_ino); -} - -const struct dentry_operations ns_dentry_operations = -{ - .d_delete = always_delete_dentry, - .d_dname = ns_dname, -}; - -static struct dentry *proc_ns_get_dentry(struct super_block *sb, - struct task_struct *task, const struct proc_ns_operations *ns_ops) -{ - struct dentry *dentry, *result; - struct inode *inode; - struct proc_inode *ei; - struct qstr qname = { .name = "", }; - struct ns_common *ns; - - ns = ns_ops->get(task); - if (!ns) - return ERR_PTR(-ENOENT); - - dentry = d_alloc_pseudo(sb, &qname); - if (!dentry) { - ns_ops->put(ns); - return ERR_PTR(-ENOMEM); - } - dentry->d_fsdata = (void *)ns_ops; - - inode = iget_locked(sb, ns->inum); - if (!inode) { - dput(dentry); - ns_ops->put(ns); - return ERR_PTR(-ENOMEM); - } - - ei = PROC_I(inode); - if (inode->i_state & I_NEW) { - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_op = &ns_inode_operations; - inode->i_mode = S_IFREG | S_IRUGO; - inode->i_fop = &ns_file_operations; - ei->ns.ns_ops = ns_ops; - ei->ns.ns = ns; - unlock_new_inode(inode); - } else { - ns_ops->put(ns); - } - - d_set_d_op(dentry, &ns_dentry_operations); - result = d_instantiate_unique(dentry, inode); - if (result) { - dput(dentry); - dentry = result; - } - - return dentry; -} - static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; - struct super_block *sb = inode->i_sb; - struct proc_inode *ei = PROC_I(inode); + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops; struct task_struct *task; struct path ns_path; void *error = ERR_PTR(-EACCES); task = get_proc_task(inode); if (!task) - goto out; + return error; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out_put_task; - - ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops); - if (IS_ERR(ns_path.dentry)) { - error = ERR_CAST(ns_path.dentry); - goto out_put_task; + if (ptrace_may_access(task, PTRACE_MODE_READ)) { + error = ns_get_path(&ns_path, task, ns_ops); + if (!error) + nd_jump_link(nd, &ns_path); } - - ns_path.mnt = mntget(nd->path.mnt); - nd_jump_link(nd, &ns_path); - error = NULL; - -out_put_task: put_task_struct(task); -out: return error; } static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct inode *inode = dentry->d_inode; - struct proc_inode *ei = PROC_I(inode); - const struct proc_ns_operations *ns_ops = ei->ns.ns_ops; + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops; struct task_struct *task; - struct ns_common *ns; char name[50]; int res = -EACCES; task = get_proc_task(inode); if (!task) - goto out; - - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out_put_task; + return res; - res = -ENOENT; - ns = ns_ops->get(task); - if (!ns) - goto out_put_task; - - snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns->inum); - res = readlink_copy(buffer, buflen, name); - ns_ops->put(ns); -out_put_task: + if (ptrace_may_access(task, PTRACE_MODE_READ)) { + res = ns_get_name(name, sizeof(name), task, ns_ops); + if (res >= 0) + res = readlink_copy(buffer, buflen, name); + } put_task_struct(task); -out: return res; } @@ -268,31 +170,3 @@ const struct inode_operations proc_ns_dir_inode_operations = { .getattr = pid_getattr, .setattr = proc_setattr, }; - -struct file *proc_ns_fget(int fd) -{ - struct file *file; - - file = fget(fd); - if (!file) - return ERR_PTR(-EBADF); - - if (file->f_op != &ns_file_operations) - goto out_invalid; - - return file; - -out_invalid: - fput(file); - return ERR_PTR(-EINVAL); -} - -struct ns_common *get_proc_ns(struct inode *inode) -{ - return PROC_I(inode)->ns.ns; -} - -bool proc_ns_inode(struct inode *inode) -{ - return inode->i_fop == &ns_file_operations; -} diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index ce23cf4bbe69..85a5c8c16be9 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -4,6 +4,7 @@ struct proc_ns_operations; struct ns_common { + atomic_long_t stashed; const struct proc_ns_operations *ops; unsigned int inum; }; diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 2837ff41cfe3..42dfc615dbf8 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -4,9 +4,11 @@ #ifndef _LINUX_PROC_NS_H #define _LINUX_PROC_NS_H +#include + struct pid_namespace; struct nsproxy; -struct ns_common; +struct path; struct proc_ns_operations { const char *name; @@ -38,35 +40,38 @@ enum { extern int pid_ns_prepare_proc(struct pid_namespace *ns); extern void pid_ns_release_proc(struct pid_namespace *ns); -extern struct file *proc_ns_fget(int fd); -extern struct ns_common *get_proc_ns(struct inode *); extern int proc_alloc_inum(unsigned int *pino); extern void proc_free_inum(unsigned int inum); -extern bool proc_ns_inode(struct inode *inode); #else /* CONFIG_PROC_FS */ static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; } static inline void pid_ns_release_proc(struct pid_namespace *ns) {} -static inline struct file *proc_ns_fget(int fd) -{ - return ERR_PTR(-EINVAL); -} - -static inline struct ns_common *get_proc_ns(struct inode *inode) { return NULL; } - static inline int proc_alloc_inum(unsigned int *inum) { *inum = 1; return 0; } static inline void proc_free_inum(unsigned int inum) {} -static inline bool proc_ns_inode(struct inode *inode) { return false; } #endif /* CONFIG_PROC_FS */ -#define ns_alloc_inum(ns) proc_alloc_inum(&(ns)->inum) +static inline int ns_alloc_inum(struct ns_common *ns) +{ + atomic_long_set(&ns->stashed, 0); + return proc_alloc_inum(&ns->inum); +} + #define ns_free_inum(ns) proc_free_inum((ns)->inum) +extern struct file *proc_ns_fget(int fd); +#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private) +extern void *ns_get_path(struct path *path, struct task_struct *task, + const struct proc_ns_operations *ns_ops); + +extern int ns_get_name(char *buf, size_t size, struct task_struct *task, + const struct proc_ns_operations *ns_ops); +extern void nsfs_init(void); + #endif /* _LINUX_PROC_NS_H */ diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 77c60311a6c6..7d664ea85ebd 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -72,5 +72,6 @@ #define MTD_INODE_FS_MAGIC 0x11307854 #define ANON_INODE_FS_MAGIC 0x09041934 #define BTRFS_TEST_MAGIC 0x73727279 +#define NSFS_MAGIC 0x6e736673 #endif /* __LINUX_MAGIC_H__ */ diff --git a/init/main.c b/init/main.c index 800a0daede7e..bcc75057ea87 100644 --- a/init/main.c +++ b/init/main.c @@ -78,6 +78,7 @@ #include #include #include +#include #include #include @@ -660,6 +661,7 @@ asmlinkage __visible void __init start_kernel(void) /* rootfs populating might need page-writeback */ page_writeback_init(); proc_root_init(); + nsfs_init(); cgroup_init(); cpuset_init(); taskstats_init_early(); -- cgit v1.2.3-58-ga151 From 3d3d35b1e94ec918fc0ae670663235bf197d8609 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 1 Nov 2014 11:10:28 -0400 Subject: kill proc_ns completely procfs inodes need only the ns_ops part; nsfs inodes don't need it at all Signed-off-by: Al Viro --- fs/proc/inode.c | 3 +-- fs/proc/internal.h | 7 +------ fs/proc/namespaces.c | 6 +++--- 3 files changed, 5 insertions(+), 11 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 57a9be9a6668..8420a2f80811 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -66,8 +66,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb) ei->pde = NULL; ei->sysctl = NULL; ei->sysctl_entry = NULL; - ei->ns.ns = NULL; - ei->ns.ns_ops = NULL; + ei->ns_ops = NULL; inode = &ei->vfs_inode; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; return inode; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 0fabc48d905f..d689fd6960d5 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -57,11 +57,6 @@ union proc_op { struct task_struct *task); }; -struct proc_ns { - struct ns_common *ns; - const struct proc_ns_operations *ns_ops; -}; - struct proc_inode { struct pid *pid; int fd; @@ -69,7 +64,7 @@ struct proc_inode { struct proc_dir_entry *pde; struct ctl_table_header *sysctl; struct ctl_table *sysctl_entry; - struct proc_ns ns; + const struct proc_ns_operations *ns_ops; struct inode vfs_inode; }; diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index aaaac77abad0..c9eac4563fa8 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -33,7 +33,7 @@ static const struct proc_ns_operations *ns_entries[] = { static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; - const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops; + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; struct task_struct *task; struct path ns_path; void *error = ERR_PTR(-EACCES); @@ -54,7 +54,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct inode *inode = dentry->d_inode; - const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops; + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; struct task_struct *task; char name[50]; int res = -EACCES; @@ -92,7 +92,7 @@ static int proc_ns_instantiate(struct inode *dir, ei = PROC_I(inode); inode->i_mode = S_IFLNK|S_IRWXUGO; inode->i_op = &proc_ns_link_inode_operations; - ei->ns.ns_ops = ns_ops; + ei->ns_ops = ns_ops; d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); -- cgit v1.2.3-58-ga151