From e9964c102312967a4bc1fd501cb628c4a3b19034 Mon Sep 17 00:00:00 2001 From: Sage Weil <sage@newdream.net> Date: Mon, 1 Mar 2010 15:16:56 -0800 Subject: ceph: fix flush_dirty_caps race with caps migration The flush_dirty_caps() used to loop over the first entry of the cap_dirty dirty list on the assumption that after calling ceph_check_caps() it would be removed from the list. This isn't true for caps that are being migrated between MDSs, where we've received the EXPORT but not the IMPORT. Instead, do a safe list iteration, and pin the next inode on the list via the CEPH_I_NOFLUSH flag. Signed-off-by: Sage Weil <sage@newdream.net> --- fs/ceph/caps.c | 45 ++++++++++++++++++++++++++++++++++++++------- fs/ceph/super.h | 1 + 2 files changed, 39 insertions(+), 7 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 295b7e547a31..8b89b9123252 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1573,6 +1573,11 @@ retry_locked: } ack: + if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { + dout(" skipping %p I_NOFLUSH set\n", inode); + continue; + } + if (session && session != cap->session) { dout("oops, wrong session %p mutex\n", session); mutex_unlock(&session->s_mutex); @@ -1652,6 +1657,10 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, retry: spin_lock(&inode->i_lock); + if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { + dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); + goto out; + } if (ci->i_dirty_caps && ci->i_auth_cap) { struct ceph_cap *cap = ci->i_auth_cap; int used = __ceph_caps_used(ci); @@ -2747,16 +2756,38 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) */ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) { - struct ceph_inode_info *ci; - struct inode *inode; + struct ceph_inode_info *ci, *nci = NULL; + struct inode *inode, *ninode = NULL; + struct list_head *p, *n; dout("flush_dirty_caps\n"); spin_lock(&mdsc->cap_dirty_lock); - while (!list_empty(&mdsc->cap_dirty)) { - ci = list_first_entry(&mdsc->cap_dirty, - struct ceph_inode_info, - i_dirty_item); - inode = igrab(&ci->vfs_inode); + list_for_each_safe(p, n, &mdsc->cap_dirty) { + if (nci) { + ci = nci; + inode = ninode; + ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; + dout("flush_dirty_caps inode %p (was next inode)\n", + inode); + } else { + ci = list_entry(p, struct ceph_inode_info, + i_dirty_item); + inode = igrab(&ci->vfs_inode); + BUG_ON(!inode); + dout("flush_dirty_caps inode %p\n", inode); + } + if (n != &mdsc->cap_dirty) { + nci = list_entry(n, struct ceph_inode_info, + i_dirty_item); + ninode = igrab(&nci->vfs_inode); + BUG_ON(!ninode); + nci->i_ceph_flags |= CEPH_I_NOFLUSH; + dout("flush_dirty_caps next inode %p, noflush\n", + ninode); + } else { + nci = NULL; + ninode = NULL; + } spin_unlock(&mdsc->cap_dirty_lock); if (inode) { ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ff7aaa32736c..6a778f2c3f6e 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -289,6 +289,7 @@ struct ceph_inode_xattrs_info { #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ #define CEPH_I_NODELAY 4 /* do not delay cap release */ #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ +#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ struct ceph_inode_info { struct ceph_vino i_vino; /* ceph ino + snap */ -- cgit v1.2.3-58-ga151