diff options
Diffstat (limited to 'fs/gfs2/glock.c')
-rw-r--r-- | fs/gfs2/glock.c | 208 |
1 files changed, 185 insertions, 23 deletions
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index bf70e3b14938..2299dcc417ea 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -125,12 +125,11 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu) { struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); - if (gl->gl_ops->go_flags & GLOF_ASPACE) { + kfree(gl->gl_lksb.sb_lvbptr); + if (gl->gl_ops->go_flags & GLOF_ASPACE) kmem_cache_free(gfs2_glock_aspace_cachep, gl); - } else { - kfree(gl->gl_lksb.sb_lvbptr); + else kmem_cache_free(gfs2_glock_cachep, gl); - } } /** @@ -164,7 +163,7 @@ void gfs2_glock_free(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - BUG_ON(atomic_read(&gl->gl_revokes)); + gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0); rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); smp_mb(); wake_up_glock(gl); @@ -465,6 +464,15 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) gl->gl_tchange = jiffies; } +static void gfs2_set_demote(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + set_bit(GLF_DEMOTE, &gl->gl_flags); + smp_mb(); + wake_up(&sdp->sd_async_glock_wait); +} + static void gfs2_demote_wake(struct gfs2_glock *gl) { gl->gl_demote_state = LM_ST_EXCLUSIVE; @@ -626,7 +634,8 @@ __acquires(&gl->gl_lockref.lock) */ if ((atomic_read(&gl->gl_ail_count) != 0) && (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) { - gfs2_assert_warn(sdp, !atomic_read(&gl->gl_ail_count)); + gfs2_glock_assert_warn(gl, + !atomic_read(&gl->gl_ail_count)); gfs2_dump_glock(NULL, gl, true); } glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); @@ -756,20 +765,127 @@ out_unlock: return; } +void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation) +{ + struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; + + if (ri->ri_magic == 0) + ri->ri_magic = cpu_to_be32(GFS2_MAGIC); + if (ri->ri_magic == cpu_to_be32(GFS2_MAGIC)) + ri->ri_generation_deleted = cpu_to_be64(generation); +} + +bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation) +{ + struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; + + if (ri->ri_magic != cpu_to_be32(GFS2_MAGIC)) + return false; + return generation <= be64_to_cpu(ri->ri_generation_deleted); +} + +static void gfs2_glock_poke(struct gfs2_glock *gl) +{ + int flags = LM_FLAG_TRY_1CB | LM_FLAG_ANY | GL_SKIP; + struct gfs2_holder gh; + int error; + + error = gfs2_glock_nq_init(gl, LM_ST_SHARED, flags, &gh); + if (!error) + gfs2_glock_dq(&gh); +} + +static bool gfs2_try_evict(struct gfs2_glock *gl) +{ + struct gfs2_inode *ip; + bool evicted = false; + + /* + * If there is contention on the iopen glock and we have an inode, try + * to grab and release the inode so that it can be evicted. This will + * allow the remote node to go ahead and delete the inode without us + * having to do it, which will avoid rgrp glock thrashing. + * + * The remote node is likely still holding the corresponding inode + * glock, so it will run before we get to verify that the delete has + * happened below. + */ + spin_lock(&gl->gl_lockref.lock); + ip = gl->gl_object; + if (ip && !igrab(&ip->i_inode)) + ip = NULL; + spin_unlock(&gl->gl_lockref.lock); + if (ip) { + struct gfs2_glock *inode_gl = NULL; + + gl->gl_no_formal_ino = ip->i_no_formal_ino; + set_bit(GIF_DEFERRED_DELETE, &ip->i_flags); + d_prune_aliases(&ip->i_inode); + iput(&ip->i_inode); + + /* If the inode was evicted, gl->gl_object will now be NULL. */ + spin_lock(&gl->gl_lockref.lock); + ip = gl->gl_object; + if (ip) { + inode_gl = ip->i_gl; + lockref_get(&inode_gl->gl_lockref); + clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags); + } + spin_unlock(&gl->gl_lockref.lock); + if (inode_gl) { + gfs2_glock_poke(inode_gl); + gfs2_glock_put(inode_gl); + } + evicted = !ip; + } + return evicted; +} + static void delete_work_func(struct work_struct *work) { - struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); + struct delayed_work *dwork = to_delayed_work(work); + struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete); struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct inode *inode; u64 no_addr = gl->gl_name.ln_number; + spin_lock(&gl->gl_lockref.lock); + clear_bit(GLF_PENDING_DELETE, &gl->gl_flags); + spin_unlock(&gl->gl_lockref.lock); + /* If someone's using this glock to create a new dinode, the block must have been freed by another node, then re-used, in which case our iopen callback is too late after the fact. Ignore it. */ if (test_bit(GLF_INODE_CREATING, &gl->gl_flags)) goto out; - inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); + if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { + /* + * If we can evict the inode, give the remote node trying to + * delete the inode some time before verifying that the delete + * has happened. Otherwise, if we cause contention on the inode glock + * immediately, the remote node will think that we still have + * the inode in use, and so it will give up waiting. + * + * If we can't evict the inode, signal to the remote node that + * the inode is still in use. We'll later try to delete the + * inode locally in gfs2_evict_inode. + * + * FIXME: We only need to verify that the remote node has + * deleted the inode because nodes before this remote delete + * rework won't cooperate. At a later time, when we no longer + * care about compatibility with such nodes, we can skip this + * step entirely. + */ + if (gfs2_try_evict(gl)) { + if (gfs2_queue_delete_work(gl, 5 * HZ)) + return; + } + goto out; + } + + inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, + GFS2_BLKST_UNLINKED); if (!IS_ERR_OR_NULL(inode)) { d_prune_aliases(inode); iput(inode); @@ -800,7 +916,7 @@ static void glock_work_func(struct work_struct *work) if (!delay) { clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); - set_bit(GLF_DEMOTE, &gl->gl_flags); + gfs2_set_demote(gl); } } run_queue(gl, 0); @@ -931,7 +1047,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_object = NULL; gl->gl_hold_time = GL_GLOCK_DFT_HOLD; INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); - INIT_WORK(&gl->gl_delete, delete_work_func); + INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func); mapping = gfs2_glock2aspace(gl); if (mapping) { @@ -1145,9 +1261,10 @@ wait_for_dlm: static void handle_callback(struct gfs2_glock *gl, unsigned int state, unsigned long delay, bool remote) { - int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; - - set_bit(bit, &gl->gl_flags); + if (delay) + set_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); + else + gfs2_set_demote(gl); if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { gl->gl_demote_state = state; gl->gl_demote_time = jiffies; @@ -1754,6 +1871,44 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) rhashtable_walk_exit(&iter); } +bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay) +{ + bool queued; + + spin_lock(&gl->gl_lockref.lock); + queued = queue_delayed_work(gfs2_delete_workqueue, + &gl->gl_delete, delay); + if (queued) + set_bit(GLF_PENDING_DELETE, &gl->gl_flags); + spin_unlock(&gl->gl_lockref.lock); + return queued; +} + +void gfs2_cancel_delete_work(struct gfs2_glock *gl) +{ + if (cancel_delayed_work_sync(&gl->gl_delete)) { + clear_bit(GLF_PENDING_DELETE, &gl->gl_flags); + gfs2_glock_put(gl); + } +} + +bool gfs2_delete_work_queued(const struct gfs2_glock *gl) +{ + return test_bit(GLF_PENDING_DELETE, &gl->gl_flags); +} + +static void flush_delete_work(struct gfs2_glock *gl) +{ + flush_delayed_work(&gl->gl_delete); + gfs2_glock_queue_work(gl, 0); +} + +void gfs2_flush_delete_work(struct gfs2_sbd *sdp) +{ + glock_hash_walk(flush_delete_work, sdp); + flush_workqueue(gfs2_delete_workqueue); +} + /** * thaw_glock - thaw out a glock which has an unprocessed reply waiting * @gl: The glock to thaw @@ -1836,7 +1991,7 @@ void gfs2_glock_finish_truncate(struct gfs2_inode *ip) int ret; ret = gfs2_truncatei_resume(ip); - gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0); + gfs2_glock_assert_withdraw(gl, ret == 0); spin_lock(&gl->gl_lockref.lock); clear_bit(GLF_LOCK, &gl->gl_flags); @@ -1978,7 +2133,13 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) char gflags_buf[32]; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; + unsigned long nrpages = 0; + + if (gl->gl_ops->go_flags & GLOF_ASPACE) { + struct address_space *mapping = gfs2_glock2aspace(gl); + nrpages = mapping->nrpages; + } memset(fs_id_buf, 0, sizeof(fs_id_buf)); if (fsid && sdp) /* safety precaution */ sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); @@ -1987,15 +2148,16 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d " - "v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state), - gl->gl_name.ln_type, - (unsigned long long)gl->gl_name.ln_number, - gflags2str(gflags_buf, gl), - state2str(gl->gl_target), - state2str(gl->gl_demote_state), dtime, - atomic_read(&gl->gl_ail_count), - atomic_read(&gl->gl_revokes), - (int)gl->gl_lockref.count, gl->gl_hold_time); + "v:%d r:%d m:%ld p:%lu\n", + fs_id_buf, state2str(gl->gl_state), + gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number, + gflags2str(gflags_buf, gl), + state2str(gl->gl_target), + state2str(gl->gl_demote_state), dtime, + atomic_read(&gl->gl_ail_count), + atomic_read(&gl->gl_revokes), + (int)gl->gl_lockref.count, gl->gl_hold_time, nrpages); list_for_each_entry(gh, &gl->gl_holders, gh_list) dump_holder(seq, gh, fs_id_buf); |