ceph: avoid dereferencing invalid pointer during cached readdir

Readdir cache keeps array of dentry pointers in page cache. If any dentry in readdir cache gets pruned, ceph_d_prune() disables readdir cache for later readdir syscall. The problem is that ceph_d_prune() ignores unhashed dentry. Ideally MDS should have already revoked CEPH_CAP_FILE_SHARED (which also disables readdir cache) when dentry gets unhashed. But if it is somehow MDS does not properly revoke CEPH_CAP_FILE_SHARED and the unhashed dentry gets pruned later, ceph_d_prune() will not disable readdir cache, later readdir may reference invalid dentry pointer. The fix is make ceph_d_prune() do extra check for unhashed dentry. Disable readdir cache if the unhashed dentry is still referenced by readdir cache. Another fix in this patch is handle d_splice_alias(). If a dentry gets spliced into new parent dentry, treat it as if it was pruned (call ceph_d_prune() for it). Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Acked-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
author: Yan, Zheng <zyan@redhat.com> 2017-11-27 11:23:48 +0800
committer: Ilya Dryomov <idryomov@gmail.com> 2018-01-29 18:36:07 +0100
commit: 5495c2d04f85da09512f5f346ed24dc0261d905d (patch)
tree: 3a6f0c53be2012bc86aba2ef8eb31cd3c3f4cd2b /fs/ceph/dir.c
parent: 97aeb6bf988e0830fd80dca724fd89526b3f35e4 (diff)
1 files changed, 32 insertions, 13 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index d671d5876828..0c4346806e17 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -231,11 +231,17 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 			goto out;
 		}
 
-		di = ceph_dentry(dentry);
 		spin_lock(&dentry->d_lock);
-		if (di->lease_shared_gen == shared_gen &&
-		    d_really_is_positive(dentry) &&
-		    fpos_cmp(ctx->pos, di->offset) <= 0) {
+		di = ceph_dentry(dentry);
+		if (d_unhashed(dentry) ||
+		    d_really_is_negative(dentry) ||
+		    di->lease_shared_gen != shared_gen) {
+			spin_unlock(&dentry->d_lock);
+			dput(dentry);
+			err = -EAGAIN;
+			goto out;
+		}
+		if (fpos_cmp(ctx->pos, di->offset) <= 0) {
 			emit_dentry = true;
 		}
 		spin_unlock(&dentry->d_lock);
@@ -1324,24 +1330,37 @@ static void ceph_d_release(struct dentry *dentry)
  */
 static void ceph_d_prune(struct dentry *dentry)
 {
-	dout("ceph_d_prune %p\n", dentry);
+	struct ceph_inode_info *dir_ci;
+	struct ceph_dentry_info *di;
+
+	dout("ceph_d_prune %pd %p\n", dentry, dentry);
 
 	/* do we have a valid parent? */
 	if (IS_ROOT(dentry))
 		return;
 
-	/* if we are not hashed, we don't affect dir's completeness */
-	if (d_unhashed(dentry))
+	/* we hold d_lock, so d_parent is stable */
+	dir_ci = ceph_inode(d_inode(dentry->d_parent));
+	if (dir_ci->i_vino.snap == CEPH_SNAPDIR)
 		return;
 
-	if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_SNAPDIR)
+	/* who calls d_delete() should also disable dcache readdir */
+	if (d_really_is_negative(dentry))
 		return;
 
-	/*
-	 * we hold d_lock, so d_parent is stable, and d_fsdata is never
-	 * cleared until d_release
-	 */
-	ceph_dir_clear_complete(d_inode(dentry->d_parent));
+	/* d_fsdata does not get cleared until d_release */
+	if (!d_unhashed(dentry)) {
+		__ceph_dir_clear_complete(dir_ci);
+		return;
+	}
+
+	/* Disable dcache readdir just in case that someone called d_drop()
+	 * or d_invalidate(), but MDS didn't revoke CEPH_CAP_FILE_SHARED
+	 * properly (dcache readdir is still enabled) */
+	di = ceph_dentry(dentry);
+	if (di->offset > 0 &&
+	    di->lease_shared_gen == atomic_read(&dir_ci->i_shared_gen))
+		__ceph_dir_clear_ordered(dir_ci);
 }
 
 /*
author	Yan, Zheng <zyan@redhat.com>	2017-11-27 11:23:48 +0800
committer	Ilya Dryomov <idryomov@gmail.com>	2018-01-29 18:36:07 +0100
commit	5495c2d04f85da09512f5f346ed24dc0261d905d (patch)
tree	3a6f0c53be2012bc86aba2ef8eb31cd3c3f4cd2b /fs/ceph/dir.c
parent	97aeb6bf988e0830fd80dca724fd89526b3f35e4 (diff)