diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2023-11-20 20:02:11 -0500 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2023-11-25 02:54:14 -0500 |
commit | a8b0026847b8c43445c921ad2c85521c92eb175f (patch) | |
tree | 39078ede8594fab57ee0486e522655df86cb131f /fs | |
parent | dbd4540df2b2857a91593754275c02f3e415fc30 (diff) |
rename(): avoid a deadlock in the case of parents having no common ancestor
... and fix the directory locking documentation and proof of correctness.
Holding ->s_vfs_rename_mutex *almost* prevents ->d_parent changes; the
case where we really don't want it is splicing the root of disconnected
tree to somewhere.
In other words, ->s_vfs_rename_mutex is sufficient to stabilize "X is an
ancestor of Y" only if X and Y are already in the same tree. Otherwise
it can go from false to true, and one can construct a deadlock on that.
Make lock_two_directories() report an error in such case and update the
callers of lock_rename()/lock_rename_child() to handle such errors.
And yes, such conditions are not impossible to create ;-/
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/cachefiles/namei.c | 2 | ||||
-rw-r--r-- | fs/ecryptfs/inode.c | 2 | ||||
-rw-r--r-- | fs/namei.c | 37 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 4 | ||||
-rw-r--r-- | fs/overlayfs/copy_up.c | 9 | ||||
-rw-r--r-- | fs/overlayfs/dir.c | 4 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 6 | ||||
-rw-r--r-- | fs/overlayfs/util.c | 7 | ||||
-rw-r--r-- | fs/smb/server/vfs.c | 5 |
9 files changed, 62 insertions, 14 deletions
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 7bf7a5fcc045..7ade836beb58 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -305,6 +305,8 @@ try_again: /* do the multiway lock magic */ trap = lock_rename(cache->graveyard, dir); + if (IS_ERR(trap)) + return PTR_ERR(trap); /* do some checks before getting the grave dentry */ if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index a25dd3d20008..8efd20dc902b 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -599,6 +599,8 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, target_inode = d_inode(new_dentry); trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + if (IS_ERR(trap)) + return PTR_ERR(trap); dget(lower_new_dentry); rc = -EINVAL; if (lower_old_dentry->d_parent != lower_old_dir_dentry) diff --git a/fs/namei.c b/fs/namei.c index 29bafbdb44ca..6b0302ac80d1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3014,21 +3014,37 @@ static inline int may_create(struct mnt_idmap *idmap, return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); } +// p1 != p2, both are on the same filesystem, ->s_vfs_rename_mutex is held static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2) { - struct dentry *p; + struct dentry *p = p1, *q = p2, *r; - p = d_ancestor(p2, p1); - if (p) { + while ((r = p->d_parent) != p2 && r != p) + p = r; + if (r == p2) { + // p is a child of p2 and an ancestor of p1 or p1 itself inode_lock_nested(p2->d_inode, I_MUTEX_PARENT); inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2); return p; } - - p = d_ancestor(p1, p2); - inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); - inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2); - return p; + // p is the root of connected component that contains p1 + // p2 does not occur on the path from p to p1 + while ((r = q->d_parent) != p1 && r != p && r != q) + q = r; + if (r == p1) { + // q is a child of p1 and an ancestor of p2 or p2 itself + inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); + inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2); + return q; + } else if (likely(r == p)) { + // both p2 and p1 are descendents of p + inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); + inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2); + return NULL; + } else { // no common ancestor at the time we'd been called + mutex_unlock(&p1->d_sb->s_vfs_rename_mutex); + return ERR_PTR(-EXDEV); + } } /* @@ -4947,6 +4963,10 @@ retry: retry_deleg: trap = lock_rename(new_path.dentry, old_path.dentry); + if (IS_ERR(trap)) { + error = PTR_ERR(trap); + goto exit_lock_rename; + } old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry, lookup_flags); @@ -5014,6 +5034,7 @@ exit4: dput(old_dentry); exit3: unlock_rename(new_path.dentry, old_path.dentry); +exit_lock_rename: if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fbbea7498f02..a99260c3f9bc 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1813,6 +1813,10 @@ retry: } trap = lock_rename(tdentry, fdentry); + if (IS_ERR(trap)) { + err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; + goto out; + } err = fh_fill_pre_attrs(ffhp); if (err != nfs_ok) goto out_unlock; diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 4382881b0709..e44dc5f66161 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -722,7 +722,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) struct inode *inode; struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir); struct path path = { .mnt = ovl_upper_mnt(ofs) }; - struct dentry *temp, *upper; + struct dentry *temp, *upper, *trap; struct ovl_cu_creds cc; int err; struct ovl_cattr cattr = { @@ -760,9 +760,11 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) * If temp was moved, abort without the cleanup. */ ovl_start_write(c->dentry); - if (lock_rename(c->workdir, c->destdir) != NULL || - temp->d_parent != c->workdir) { + trap = lock_rename(c->workdir, c->destdir); + if (trap || temp->d_parent != c->workdir) { err = -EIO; + if (IS_ERR(trap)) + goto out; goto unlock; } else if (err) { goto cleanup; @@ -803,6 +805,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) ovl_set_flag(OVL_WHITEOUTS, inode); unlock: unlock_rename(c->workdir, c->destdir); +out: ovl_end_write(c->dentry); return err; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index aab3f5d93556..0f8b4a719237 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1180,6 +1180,10 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, } trap = lock_rename(new_upperdir, old_upperdir); + if (IS_ERR(trap)) { + err = PTR_ERR(trap); + goto out_revert_creds; + } olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir, old->d_name.len); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index a0967bb25003..fc3a6ff648bd 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -439,8 +439,10 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) bool ok = false; if (workdir != upperdir) { - ok = (lock_rename(workdir, upperdir) == NULL); - unlock_rename(workdir, upperdir); + struct dentry *trap = lock_rename(workdir, upperdir); + if (!IS_ERR(trap)) + unlock_rename(workdir, upperdir); + ok = (trap == NULL); } return ok; } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 50a201e9cd39..7b667345e673 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -1198,12 +1198,17 @@ void ovl_nlink_end(struct dentry *dentry) int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir) { + struct dentry *trap; + /* Workdir should not be the same as upperdir */ if (workdir == upperdir) goto err; /* Workdir should not be subdir of upperdir and vice versa */ - if (lock_rename(workdir, upperdir) != NULL) + trap = lock_rename(workdir, upperdir); + if (IS_ERR(trap)) + goto err; + if (trap) goto err_unlock; return 0; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index c53dea5598fc..4cf8523ad038 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -708,6 +708,10 @@ retry: goto out2; trap = lock_rename_child(old_child, new_path.dentry); + if (IS_ERR(trap)) { + err = PTR_ERR(trap); + goto out_drop_write; + } old_parent = dget(old_child->d_parent); if (d_unhashed(old_child)) { @@ -770,6 +774,7 @@ out4: out3: dput(old_parent); unlock_rename(old_parent, new_path.dentry); +out_drop_write: mnt_drop_write(old_path->mnt); out2: path_put(&new_path); |