summaryrefslogtreecommitdiff
path: root/fs/attr.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-20 11:53:11 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-20 11:53:11 -0800
commit05e6295f7b5e05f09e369a3eb2882ec5b40fff20 (patch)
tree60ea7c6806f58371bfafe6317089c7ae1a65fca4 /fs/attr.c
parentde630176bdf885eed442902afe94eb60d8f5f826 (diff)
parent7a80e5b8c6fa7d0ae6624bd6aedc4a6a1cfc62fa (diff)
Merge tag 'fs.idmapped.v6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping
Pull vfs idmapping updates from Christian Brauner: - Last cycle we introduced the dedicated struct mnt_idmap type for mount idmapping and the required infrastucture in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). As promised in last cycle's pull request message this converts everything to rely on struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevant on the mount level. Especially for non-vfs developers without detailed knowledge in this area this was a potential source for bugs. This finishes the conversion. Instead of passing the plain namespace around this updates all places that currently take a pointer to a mnt_userns with a pointer to struct mnt_idmap. Now that the conversion is done all helpers down to the really low-level helpers only accept a struct mnt_idmap argument instead of two namespace arguments. Conflating mount and other idmappings will now cause the compiler to complain loudly thus eliminating the possibility of any bugs. This makes it impossible for filesystem developers to mix up mount and filesystem idmappings as they are two distinct types and require distinct helpers that cannot be used interchangeably. Everything associated with struct mnt_idmap is moved into a single separate file. With that change no code can poke around in struct mnt_idmap. It can only be interacted with through dedicated helpers. That means all filesystems are and all of the vfs is completely oblivious to the actual implementation of idmappings. We are now also able to extend struct mnt_idmap as we see fit. For example, we can decouple it completely from namespaces for users that don't require or don't want to use them at all. We can also extend the concept of idmappings so we can cover filesystem specific requirements. In combination with the vfs{g,u}id_t work we finished in v6.2 this makes this feature substantially more robust and thus difficult to implement wrong by a given filesystem and also protects the vfs. - Enable idmapped mounts for tmpfs and fulfill a longstanding request. A long-standing request from users had been to make it possible to create idmapped mounts for tmpfs. For example, to share the host's tmpfs mount between multiple sandboxes. This is a prerequisite for some advanced Kubernetes cases. Systemd also has a range of use-cases to increase service isolation. And there are more users of this. However, with all of the other work going on this was way down on the priority list but luckily someone other than ourselves picked this up. As usual the patch is tiny as all the infrastructure work had been done multiple kernel releases ago. In addition to all the tests that we already have I requested that Rodrigo add a dedicated tmpfs testsuite for idmapped mounts to xfstests. It is to be included into xfstests during the v6.3 development cycle. This should add a slew of additional tests. * tag 'fs.idmapped.v6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping: (26 commits) shmem: support idmapped mounts for tmpfs fs: move mnt_idmap fs: port vfs{g,u}id helpers to mnt_idmap fs: port fs{g,u}id helpers to mnt_idmap fs: port i_{g,u}id_into_vfs{g,u}id() to mnt_idmap fs: port i_{g,u}id_{needs_}update() to mnt_idmap quota: port to mnt_idmap fs: port privilege checking helpers to mnt_idmap fs: port inode_owner_or_capable() to mnt_idmap fs: port inode_init_owner() to mnt_idmap fs: port acl to mnt_idmap fs: port xattr to mnt_idmap fs: port ->permission() to pass mnt_idmap fs: port ->fileattr_set() to pass mnt_idmap fs: port ->set_acl() to pass mnt_idmap fs: port ->get_acl() to pass mnt_idmap fs: port ->tmpfile() to pass mnt_idmap fs: port ->rename() to pass mnt_idmap fs: port ->mknod() to pass mnt_idmap fs: port ->mkdir() to pass mnt_idmap ...
Diffstat (limited to 'fs/attr.c')
-rw-r--r--fs/attr.c129
1 files changed, 64 insertions, 65 deletions
diff --git a/fs/attr.c b/fs/attr.c
index f3eb8e57b451..aca9ff7aed33 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -24,7 +24,7 @@
/**
* setattr_should_drop_sgid - determine whether the setgid bit needs to be
* removed
- * @mnt_userns: user namespace of the mount @inode was found from
+ * @idmap: idmap of the mount @inode was found from
* @inode: inode to check
*
* This function determines whether the setgid bit needs to be removed.
@@ -34,7 +34,7 @@
*
* Return: ATTR_KILL_SGID if setgid bit needs to be removed, 0 otherwise.
*/
-int setattr_should_drop_sgid(struct user_namespace *mnt_userns,
+int setattr_should_drop_sgid(struct mnt_idmap *idmap,
const struct inode *inode)
{
umode_t mode = inode->i_mode;
@@ -43,8 +43,7 @@ int setattr_should_drop_sgid(struct user_namespace *mnt_userns,
return 0;
if (mode & S_IXGRP)
return ATTR_KILL_SGID;
- if (!in_group_or_capable(mnt_userns, inode,
- i_gid_into_vfsgid(mnt_userns, inode)))
+ if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
return ATTR_KILL_SGID;
return 0;
}
@@ -52,7 +51,7 @@ int setattr_should_drop_sgid(struct user_namespace *mnt_userns,
/**
* setattr_should_drop_suidgid - determine whether the set{g,u}id bit needs to
* be dropped
- * @mnt_userns: user namespace of the mount @inode was found from
+ * @idmap: idmap of the mount @inode was found from
* @inode: inode to check
*
* This function determines whether the set{g,u}id bits need to be removed.
@@ -64,7 +63,7 @@ int setattr_should_drop_sgid(struct user_namespace *mnt_userns,
* Return: A mask of ATTR_KILL_S{G,U}ID indicating which - if any - setid bits
* to remove, 0 otherwise.
*/
-int setattr_should_drop_suidgid(struct user_namespace *mnt_userns,
+int setattr_should_drop_suidgid(struct mnt_idmap *idmap,
struct inode *inode)
{
umode_t mode = inode->i_mode;
@@ -74,7 +73,7 @@ int setattr_should_drop_suidgid(struct user_namespace *mnt_userns,
if (unlikely(mode & S_ISUID))
kill = ATTR_KILL_SUID;
- kill |= setattr_should_drop_sgid(mnt_userns, inode);
+ kill |= setattr_should_drop_sgid(idmap, inode);
if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
return kill;
@@ -85,24 +84,24 @@ EXPORT_SYMBOL(setattr_should_drop_suidgid);
/**
* chown_ok - verify permissions to chown inode
- * @mnt_userns: user namespace of the mount @inode was found from
+ * @idmap: idmap of the mount @inode was found from
* @inode: inode to check permissions on
* @ia_vfsuid: uid to chown @inode to
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply pass @nop_mnt_idmap.
*/
-static bool chown_ok(struct user_namespace *mnt_userns,
+static bool chown_ok(struct mnt_idmap *idmap,
const struct inode *inode, vfsuid_t ia_vfsuid)
{
- vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
if (vfsuid_eq_kuid(vfsuid, current_fsuid()) &&
vfsuid_eq(ia_vfsuid, vfsuid))
return true;
- if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
+ if (capable_wrt_inode_uidgid(idmap, inode, CAP_CHOWN))
return true;
if (!vfsuid_valid(vfsuid) &&
ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
@@ -112,28 +111,28 @@ static bool chown_ok(struct user_namespace *mnt_userns,
/**
* chgrp_ok - verify permissions to chgrp inode
- * @mnt_userns: user namespace of the mount @inode was found from
+ * @idmap: idmap of the mount @inode was found from
* @inode: inode to check permissions on
* @ia_vfsgid: gid to chown @inode to
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply pass @nop_mnt_idmap.
*/
-static bool chgrp_ok(struct user_namespace *mnt_userns,
+static bool chgrp_ok(struct mnt_idmap *idmap,
const struct inode *inode, vfsgid_t ia_vfsgid)
{
- vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
- vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
if (vfsuid_eq_kuid(vfsuid, current_fsuid())) {
if (vfsgid_eq(ia_vfsgid, vfsgid))
return true;
if (vfsgid_in_group_p(ia_vfsgid))
return true;
}
- if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
+ if (capable_wrt_inode_uidgid(idmap, inode, CAP_CHOWN))
return true;
if (!vfsgid_valid(vfsgid) &&
ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
@@ -143,7 +142,7 @@ static bool chgrp_ok(struct user_namespace *mnt_userns,
/**
* setattr_prepare - check if attribute changes to a dentry are allowed
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: dentry to check
* @attr: attributes to change
*
@@ -153,16 +152,16 @@ static bool chgrp_ok(struct user_namespace *mnt_userns,
* SGID bit from mode if user is not allowed to set it. Also file capabilities
* and IMA extended attributes are cleared if ATTR_KILL_PRIV is set.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply passs @nop_mnt_idmap.
*
* Should be called as the first thing in ->setattr implementations,
* possibly after taking additional locks.
*/
-int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry,
+int setattr_prepare(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -184,34 +183,34 @@ int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry,
/* Make sure a caller can chown. */
if ((ia_valid & ATTR_UID) &&
- !chown_ok(mnt_userns, inode, attr->ia_vfsuid))
+ !chown_ok(idmap, inode, attr->ia_vfsuid))
return -EPERM;
/* Make sure caller can chgrp. */
if ((ia_valid & ATTR_GID) &&
- !chgrp_ok(mnt_userns, inode, attr->ia_vfsgid))
+ !chgrp_ok(idmap, inode, attr->ia_vfsgid))
return -EPERM;
/* Make sure a caller can chmod. */
if (ia_valid & ATTR_MODE) {
vfsgid_t vfsgid;
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EPERM;
if (ia_valid & ATTR_GID)
vfsgid = attr->ia_vfsgid;
else
- vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ vfsgid = i_gid_into_vfsgid(idmap, inode);
/* Also check the setgid bit! */
- if (!in_group_or_capable(mnt_userns, inode, vfsgid))
+ if (!in_group_or_capable(idmap, inode, vfsgid))
attr->ia_mode &= ~S_ISGID;
}
/* Check for setting the inode time. */
if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EPERM;
}
@@ -220,7 +219,7 @@ kill_priv:
if (ia_valid & ATTR_KILL_PRIV) {
int error;
- error = security_inode_killpriv(mnt_userns, dentry);
+ error = security_inode_killpriv(idmap, dentry);
if (error)
return error;
}
@@ -277,7 +276,7 @@ EXPORT_SYMBOL(inode_newsize_ok);
/**
* setattr_copy - copy simple metadata updates into the generic inode
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: the inode to be updated
* @attr: the new attributes
*
@@ -290,23 +289,23 @@ EXPORT_SYMBOL(inode_newsize_ok);
* Noticeably missing is inode size update, which is more complex
* as it requires pagecache updates.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply pass @nop_mnt_idmap.
*
* The inode is not marked as dirty after this operation. The rationale is
* that for "simple" filesystems, the struct inode is the inode storage.
* The caller is free to mark the inode dirty afterwards if needed.
*/
-void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode,
+void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
const struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
- i_uid_update(mnt_userns, attr, inode);
- i_gid_update(mnt_userns, attr, inode);
+ i_uid_update(idmap, attr, inode);
+ i_gid_update(idmap, attr, inode);
if (ia_valid & ATTR_ATIME)
inode->i_atime = attr->ia_atime;
if (ia_valid & ATTR_MTIME)
@@ -315,15 +314,15 @@ void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode,
inode->i_ctime = attr->ia_ctime;
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
- if (!in_group_or_capable(mnt_userns, inode,
- i_gid_into_vfsgid(mnt_userns, inode)))
+ if (!in_group_or_capable(idmap, inode,
+ i_gid_into_vfsgid(idmap, inode)))
mode &= ~S_ISGID;
inode->i_mode = mode;
}
}
EXPORT_SYMBOL(setattr_copy);
-int may_setattr(struct user_namespace *mnt_userns, struct inode *inode,
+int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
unsigned int ia_valid)
{
int error;
@@ -341,8 +340,8 @@ int may_setattr(struct user_namespace *mnt_userns, struct inode *inode,
if (IS_IMMUTABLE(inode))
return -EPERM;
- if (!inode_owner_or_capable(mnt_userns, inode)) {
- error = inode_permission(mnt_userns, inode, MAY_WRITE);
+ if (!inode_owner_or_capable(idmap, inode)) {
+ error = inode_permission(idmap, inode, MAY_WRITE);
if (error)
return error;
}
@@ -353,7 +352,7 @@ EXPORT_SYMBOL(may_setattr);
/**
* notify_change - modify attributes of a filesytem object
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: object affected
* @attr: new attributes
* @delegated_inode: returns inode, if the inode is delegated
@@ -372,13 +371,13 @@ EXPORT_SYMBOL(may_setattr);
* the file open for write, as there can be no conflicting delegation in
* that case.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply pass @nop_mnt_idmap.
*/
-int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr, struct inode **delegated_inode)
{
struct inode *inode = dentry->d_inode;
@@ -389,7 +388,7 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
WARN_ON_ONCE(!inode_is_locked(inode));
- error = may_setattr(mnt_userns, inode, ia_valid);
+ error = may_setattr(idmap, inode, ia_valid);
if (error)
return error;
@@ -454,11 +453,11 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
* namespace of the superblock.
*/
if (ia_valid & ATTR_UID &&
- !vfsuid_has_fsmapping(mnt_userns, inode->i_sb->s_user_ns,
+ !vfsuid_has_fsmapping(idmap, inode->i_sb->s_user_ns,
attr->ia_vfsuid))
return -EOVERFLOW;
if (ia_valid & ATTR_GID &&
- !vfsgid_has_fsmapping(mnt_userns, inode->i_sb->s_user_ns,
+ !vfsgid_has_fsmapping(idmap, inode->i_sb->s_user_ns,
attr->ia_vfsgid))
return -EOVERFLOW;
@@ -466,13 +465,13 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
* gids unless those uids & gids are being made valid.
*/
if (!(ia_valid & ATTR_UID) &&
- !vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)))
+ !vfsuid_valid(i_uid_into_vfsuid(idmap, inode)))
return -EOVERFLOW;
if (!(ia_valid & ATTR_GID) &&
- !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode)))
+ !vfsgid_valid(i_gid_into_vfsgid(idmap, inode)))
return -EOVERFLOW;
- error = security_inode_setattr(mnt_userns, dentry, attr);
+ error = security_inode_setattr(idmap, dentry, attr);
if (error)
return error;
error = try_break_deleg(inode, delegated_inode);
@@ -480,13 +479,13 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
return error;
if (inode->i_op->setattr)
- error = inode->i_op->setattr(mnt_userns, dentry, attr);
+ error = inode->i_op->setattr(idmap, dentry, attr);
else
- error = simple_setattr(mnt_userns, dentry, attr);
+ error = simple_setattr(idmap, dentry, attr);
if (!error) {
fsnotify_change(dentry, ia_valid);
- ima_inode_post_setattr(mnt_userns, dentry);
+ ima_inode_post_setattr(idmap, dentry);
evm_inode_post_setattr(dentry, ia_valid);
}