diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-07-03 09:42:17 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-07-03 09:42:17 -0700 |
commit | 20855e4cb361adeabce3665f5174b09b4a6ebfe6 (patch) | |
tree | dc344c6f60847394614ba90870c9b125620f5e4b | |
parent | 69cb6c6556ad89620547318439d6be8bb1629a5a (diff) | |
parent | 7561cea5dbb97fecb952548a0fb74fb105bf4664 (diff) |
Merge tag 'xfs-5.19-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Darrick Wong:
"This fixes some stalling problems and corrects the last of the
problems (I hope) observed during testing of the new atomic xattr
update feature.
- Fix statfs blocking on background inode gc workers
- Fix some broken inode lock assertion code
- Fix xattr leaf buffer leaks when cancelling a deferred xattr update
operation
- Clean up xattr recovery to make it easier to understand.
- Fix xattr leaf block verifiers tripping over empty blocks.
- Remove complicated and error prone xattr leaf block bholding mess.
- Fix a bug where an rt extent crossing EOF was treated as "posteof"
blocks and cleaned unnecessarily.
- Fix a UAF when log shutdown races with unmount"
* tag 'xfs-5.19-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: prevent a UAF when log IO errors race with unmount
xfs: dont treat rt extents beyond EOF as eofblocks to be cleared
xfs: don't hold xattr leaf buffers across transaction rolls
xfs: empty xattr leaf header blocks are not corruption
xfs: clean up the end of xfs_attri_item_recover
xfs: always free xattri_leaf_bp when cancelling a deferred op
xfs: use invalidate_lock to check the state of mmap_lock
xfs: factor out the common lock flags assert
xfs: introduce xfs_inodegc_push()
xfs: bound maximum wait time for inodegc work
-rw-r--r-- | fs/xfs/libxfs/xfs_attr.c | 38 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr.h | 5 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.c | 35 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_attr_item.c | 27 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 56 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 64 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_syscalls.c | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 1 |
14 files changed, 130 insertions, 131 deletions
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 1824f61621a2..224649a76cbb 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -50,7 +50,7 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args); STATIC int xfs_attr_leaf_get(xfs_da_args_t *args); STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp); -STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args, struct xfs_buf *bp); +STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args); /* * Internal routines when attribute list is more than one block. @@ -393,16 +393,10 @@ xfs_attr_sf_addname( * It won't fit in the shortform, transform to a leaf block. GROT: * another possible req'mt for a double-split btree op. */ - error = xfs_attr_shortform_to_leaf(args, &attr->xattri_leaf_bp); + error = xfs_attr_shortform_to_leaf(args); if (error) return error; - /* - * Prevent the leaf buffer from being unlocked so that a concurrent AIL - * push cannot grab the half-baked leaf buffer and run into problems - * with the write verifier. - */ - xfs_trans_bhold(args->trans, attr->xattri_leaf_bp); attr->xattri_dela_state = XFS_DAS_LEAF_ADD; out: trace_xfs_attr_sf_addname_return(attr->xattri_dela_state, args->dp); @@ -447,11 +441,9 @@ xfs_attr_leaf_addname( /* * Use the leaf buffer we may already hold locked as a result of - * a sf-to-leaf conversion. The held buffer is no longer valid - * after this call, regardless of the result. + * a sf-to-leaf conversion. */ - error = xfs_attr_leaf_try_add(args, attr->xattri_leaf_bp); - attr->xattri_leaf_bp = NULL; + error = xfs_attr_leaf_try_add(args); if (error == -ENOSPC) { error = xfs_attr3_leaf_to_node(args); @@ -497,8 +489,6 @@ xfs_attr_node_addname( struct xfs_da_args *args = attr->xattri_da_args; int error; - ASSERT(!attr->xattri_leaf_bp); - error = xfs_attr_node_addname_find_attr(attr); if (error) return error; @@ -1215,24 +1205,14 @@ xfs_attr_restore_rmt_blk( */ STATIC int xfs_attr_leaf_try_add( - struct xfs_da_args *args, - struct xfs_buf *bp) + struct xfs_da_args *args) { + struct xfs_buf *bp; int error; - /* - * If the caller provided a buffer to us, it is locked and held in - * the transaction because it just did a shortform to leaf conversion. - * Hence we don't need to read it again. Otherwise read in the leaf - * buffer. - */ - if (bp) { - xfs_trans_bhold_release(args->trans, bp); - } else { - error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp); - if (error) - return error; - } + error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp); + if (error) + return error; /* * Look up the xattr name to set the insertion point for the new xattr. diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index b4a2fc77017e..dfb47fa63c6d 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -515,11 +515,6 @@ struct xfs_attr_intent { */ struct xfs_attri_log_nameval *xattri_nameval; - /* - * Used by xfs_attr_set to hold a leaf buffer across a transaction roll - */ - struct xfs_buf *xattri_leaf_bp; - /* Used to keep track of current state of delayed operation */ enum xfs_delattr_state xattri_dela_state; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 37e7c33f6283..8f47396f8dd2 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -289,6 +289,23 @@ xfs_attr3_leaf_verify_entry( return NULL; } +/* + * Validate an attribute leaf block. + * + * Empty leaf blocks can occur under the following circumstances: + * + * 1. setxattr adds a new extended attribute to a file; + * 2. The file has zero existing attributes; + * 3. The attribute is too large to fit in the attribute fork; + * 4. The attribute is small enough to fit in a leaf block; + * 5. A log flush occurs after committing the transaction that creates + * the (empty) leaf block; and + * 6. The filesystem goes down after the log flush but before the new + * attribute can be committed to the leaf block. + * + * Hence we need to ensure that we don't fail the validation purely + * because the leaf is empty. + */ static xfs_failaddr_t xfs_attr3_leaf_verify( struct xfs_buf *bp) @@ -311,15 +328,6 @@ xfs_attr3_leaf_verify( return fa; /* - * Empty leaf blocks should never occur; they imply the existence of a - * software bug that needs fixing. xfs_repair also flags them as a - * corruption that needs fixing, so we should never let these go to - * disk. - */ - if (ichdr.count == 0) - return __this_address; - - /* * firstused is the block offset of the first name info structure. * Make sure it doesn't go off the block or crash into the header. */ @@ -922,14 +930,10 @@ xfs_attr_shortform_getvalue( return -ENOATTR; } -/* - * Convert from using the shortform to the leaf. On success, return the - * buffer so that we can keep it locked until we're totally done with it. - */ +/* Convert from using the shortform to the leaf format. */ int xfs_attr_shortform_to_leaf( - struct xfs_da_args *args, - struct xfs_buf **leaf_bp) + struct xfs_da_args *args) { struct xfs_inode *dp; struct xfs_attr_shortform *sf; @@ -991,7 +995,6 @@ xfs_attr_shortform_to_leaf( sfe = xfs_attr_sf_nextentry(sfe); } error = 0; - *leaf_bp = bp; out: kmem_free(tmpbuffer); return error; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index efa757f1e912..368f4d9fa1d5 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -49,8 +49,7 @@ void xfs_attr_shortform_create(struct xfs_da_args *args); void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); int xfs_attr_shortform_lookup(struct xfs_da_args *args); int xfs_attr_shortform_getvalue(struct xfs_da_args *args); -int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, - struct xfs_buf **leaf_bp); +int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); int xfs_attr_sf_removename(struct xfs_da_args *args); int xfs_attr_sf_findname(struct xfs_da_args *args, struct xfs_attr_sf_entry **sfep, diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 135d44133477..5077a7ad5646 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -576,7 +576,7 @@ xfs_attri_item_recover( struct xfs_trans_res tres; struct xfs_attri_log_format *attrp; struct xfs_attri_log_nameval *nv = attrip->attri_nameval; - int error, ret = 0; + int error; int total; int local; struct xfs_attrd_log_item *done_item = NULL; @@ -655,29 +655,32 @@ xfs_attri_item_recover( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - ret = xfs_xattri_finish_update(attr, done_item); - if (ret == -EAGAIN) { - /* There's more work to do, so add it to this transaction */ + error = xfs_xattri_finish_update(attr, done_item); + if (error == -EAGAIN) { + /* + * There's more work to do, so add the intent item to this + * transaction so that we can continue it later. + */ xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_ATTR, &attr->xattri_list); - } else - error = ret; + error = xfs_defer_ops_capture_and_commit(tp, capture_list); + if (error) + goto out_unlock; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_irele(ip); + return 0; + } if (error) { xfs_trans_cancel(tp); goto out_unlock; } error = xfs_defer_ops_capture_and_commit(tp, capture_list); - out_unlock: - if (attr->xattri_leaf_bp) - xfs_buf_relse(attr->xattri_leaf_bp); - xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_irele(ip); out: - if (ret != -EAGAIN) - xfs_attr_free_item(attr); + xfs_attr_free_item(attr); return error; } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 52be58372c63..85e1a26c92e8 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -686,6 +686,8 @@ xfs_can_free_eofblocks( * forever. */ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); + if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) + end_fsb = roundup_64(end_fsb, mp->m_sb.sb_rextsize); last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); if (last_fsb <= end_fsb) return false; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 5269354b1b69..2609825d53ee 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -440,7 +440,7 @@ xfs_inodegc_queue_all( for_each_online_cpu(cpu) { gc = per_cpu_ptr(mp->m_inodegc, cpu); if (!llist_empty(&gc->list)) - queue_work_on(cpu, mp->m_inodegc_wq, &gc->work); + mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0); } } @@ -1841,8 +1841,8 @@ void xfs_inodegc_worker( struct work_struct *work) { - struct xfs_inodegc *gc = container_of(work, struct xfs_inodegc, - work); + struct xfs_inodegc *gc = container_of(to_delayed_work(work), + struct xfs_inodegc, work); struct llist_node *node = llist_del_all(&gc->list); struct xfs_inode *ip, *n; @@ -1862,19 +1862,29 @@ xfs_inodegc_worker( } /* - * Force all currently queued inode inactivation work to run immediately and - * wait for the work to finish. + * Expedite all pending inodegc work to run immediately. This does not wait for + * completion of the work. */ void -xfs_inodegc_flush( +xfs_inodegc_push( struct xfs_mount *mp) { if (!xfs_is_inodegc_enabled(mp)) return; + trace_xfs_inodegc_push(mp, __return_address); + xfs_inodegc_queue_all(mp); +} +/* + * Force all currently queued inode inactivation work to run immediately and + * wait for the work to finish. + */ +void +xfs_inodegc_flush( + struct xfs_mount *mp) +{ + xfs_inodegc_push(mp); trace_xfs_inodegc_flush(mp, __return_address); - - xfs_inodegc_queue_all(mp); flush_workqueue(mp->m_inodegc_wq); } @@ -2014,6 +2024,7 @@ xfs_inodegc_queue( struct xfs_inodegc *gc; int items; unsigned int shrinker_hits; + unsigned long queue_delay = 1; trace_xfs_inode_set_need_inactive(ip); spin_lock(&ip->i_flags_lock); @@ -2025,19 +2036,26 @@ xfs_inodegc_queue( items = READ_ONCE(gc->items); WRITE_ONCE(gc->items, items + 1); shrinker_hits = READ_ONCE(gc->shrinker_hits); - put_cpu_ptr(gc); - if (!xfs_is_inodegc_enabled(mp)) + /* + * We queue the work while holding the current CPU so that the work + * is scheduled to run on this CPU. + */ + if (!xfs_is_inodegc_enabled(mp)) { + put_cpu_ptr(gc); return; - - if (xfs_inodegc_want_queue_work(ip, items)) { - trace_xfs_inodegc_queue(mp, __return_address); - queue_work(mp->m_inodegc_wq, &gc->work); } + if (xfs_inodegc_want_queue_work(ip, items)) + queue_delay = 0; + + trace_xfs_inodegc_queue(mp, __return_address); + mod_delayed_work(mp->m_inodegc_wq, &gc->work, queue_delay); + put_cpu_ptr(gc); + if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) { trace_xfs_inodegc_throttle(mp, __return_address); - flush_work(&gc->work); + flush_delayed_work(&gc->work); } } @@ -2054,7 +2072,7 @@ xfs_inodegc_cpu_dead( unsigned int count = 0; dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu); - cancel_work_sync(&dead_gc->work); + cancel_delayed_work_sync(&dead_gc->work); if (llist_empty(&dead_gc->list)) return; @@ -2073,12 +2091,12 @@ xfs_inodegc_cpu_dead( llist_add_batch(first, last, &gc->list); count += READ_ONCE(gc->items); WRITE_ONCE(gc->items, count); - put_cpu_ptr(gc); if (xfs_is_inodegc_enabled(mp)) { trace_xfs_inodegc_queue(mp, __return_address); - queue_work(mp->m_inodegc_wq, &gc->work); + mod_delayed_work(mp->m_inodegc_wq, &gc->work, 0); } + put_cpu_ptr(gc); } /* @@ -2173,7 +2191,7 @@ xfs_inodegc_shrinker_scan( unsigned int h = READ_ONCE(gc->shrinker_hits); WRITE_ONCE(gc->shrinker_hits, h + 1); - queue_work_on(cpu, mp->m_inodegc_wq, &gc->work); + mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0); no_items = false; } } diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 2e4cfddf8b8e..6cd180721659 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -76,6 +76,7 @@ void xfs_blockgc_stop(struct xfs_mount *mp); void xfs_blockgc_start(struct xfs_mount *mp); void xfs_inodegc_worker(struct work_struct *work); +void xfs_inodegc_push(struct xfs_mount *mp); void xfs_inodegc_flush(struct xfs_mount *mp); void xfs_inodegc_stop(struct xfs_mount *mp); void xfs_inodegc_start(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 52d6f2c7d58b..3e1c62ffa4f7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -132,6 +132,26 @@ xfs_ilock_attr_map_shared( } /* + * You can't set both SHARED and EXCL for the same lock, + * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_MMAPLOCK_SHARED, + * XFS_MMAPLOCK_EXCL, XFS_ILOCK_SHARED, XFS_ILOCK_EXCL are valid values + * to set in lock_flags. + */ +static inline void +xfs_lock_flags_assert( + uint lock_flags) +{ + ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != + (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); + ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != + (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); + ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != + (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); + ASSERT(lock_flags != 0); +} + +/* * In addition to i_rwsem in the VFS inode, the xfs inode contains 2 * multi-reader locks: invalidate_lock and the i_lock. This routine allows * various combinations of the locks to be obtained. @@ -168,18 +188,7 @@ xfs_ilock( { trace_xfs_ilock(ip, lock_flags, _RET_IP_); - /* - * You can't set both SHARED and EXCL for the same lock, - * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, - * and XFS_ILOCK_EXCL are valid values to set in lock_flags. - */ - ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != - (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); - ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != - (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); - ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != - (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); + xfs_lock_flags_assert(lock_flags); if (lock_flags & XFS_IOLOCK_EXCL) { down_write_nested(&VFS_I(ip)->i_rwsem, @@ -222,18 +231,7 @@ xfs_ilock_nowait( { trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_); - /* - * You can't set both SHARED and EXCL for the same lock, - * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, - * and XFS_ILOCK_EXCL are valid values to set in lock_flags. - */ - ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != - (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); - ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != - (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); - ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != - (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); + xfs_lock_flags_assert(lock_flags); if (lock_flags & XFS_IOLOCK_EXCL) { if (!down_write_trylock(&VFS_I(ip)->i_rwsem)) @@ -291,19 +289,7 @@ xfs_iunlock( xfs_inode_t *ip, uint lock_flags) { - /* - * You can't set both SHARED and EXCL for the same lock, - * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, - * and XFS_ILOCK_EXCL are valid values to set in lock_flags. - */ - ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != - (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); - ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != - (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); - ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != - (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); - ASSERT(lock_flags != 0); + xfs_lock_flags_assert(lock_flags); if (lock_flags & XFS_IOLOCK_EXCL) up_write(&VFS_I(ip)->i_rwsem); @@ -379,8 +365,8 @@ xfs_isilocked( } if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) { - return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem, - (lock_flags & XFS_IOLOCK_SHARED)); + return __xfs_rwsem_islocked(&VFS_I(ip)->i_mapping->invalidate_lock, + (lock_flags & XFS_MMAPLOCK_SHARED)); } if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) { diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 1e972f884a81..ae904b21e9cc 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2092,8 +2092,6 @@ xlog_dealloc_log( xlog_in_core_t *iclog, *next_iclog; int i; - xlog_cil_destroy(log); - /* * Cycle all the iclogbuf locks to make sure all log IO completion * is done before we tear down these buffers. @@ -2105,6 +2103,13 @@ xlog_dealloc_log( iclog = iclog->ic_next; } + /* + * Destroy the CIL after waiting for iclog IO completion because an + * iclog EIO error will try to shut down the log, which accesses the + * CIL to wake up the waiters. + */ + xlog_cil_destroy(log); + iclog = log->l_iclog; for (i = 0; i < log->l_iclog_bufs; i++) { next_iclog = iclog->ic_next; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index ba5d42abf66e..d2eaebd85abf 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -61,7 +61,7 @@ struct xfs_error_cfg { */ struct xfs_inodegc { struct llist_head list; - struct work_struct work; + struct delayed_work work; /* approximate count of inodes in the list */ unsigned int items; diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 74ac9ca9e119..392cb39cc10c 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -454,9 +454,12 @@ xfs_qm_scall_getquota( struct xfs_dquot *dqp; int error; - /* Flush inodegc work at the start of a quota reporting scan. */ + /* + * Expedite pending inodegc work at the start of a quota reporting + * scan but don't block waiting for it to complete. + */ if (id == 0) - xfs_inodegc_flush(mp); + xfs_inodegc_push(mp); /* * Try to get the dquot. We don't want it allocated on disk, so don't @@ -498,7 +501,7 @@ xfs_qm_scall_getquota_next( /* Flush inodegc work at the start of a quota reporting scan. */ if (*id == 0) - xfs_inodegc_flush(mp); + xfs_inodegc_push(mp); error = xfs_qm_dqget_next(mp, *id, type, &dqp); if (error) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index ed18160e6181..aa977c7ea370 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -797,8 +797,11 @@ xfs_fs_statfs( xfs_extlen_t lsize; int64_t ffree; - /* Wait for whatever inactivations are in progress. */ - xfs_inodegc_flush(mp); + /* + * Expedite background inodegc but don't wait. We do not want to block + * here waiting hours for a billion extent file to be truncated. + */ + xfs_inodegc_push(mp); statp->f_type = XFS_SUPER_MAGIC; statp->f_namelen = MAXNAMELEN - 1; @@ -1074,7 +1077,7 @@ xfs_inodegc_init_percpu( gc = per_cpu_ptr(mp->m_inodegc, cpu); init_llist_head(&gc->list); gc->items = 0; - INIT_WORK(&gc->work, xfs_inodegc_worker); + INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker); } return 0; } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index d32026585c1b..0fa1b7a2918c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -240,6 +240,7 @@ DEFINE_EVENT(xfs_fs_class, name, \ TP_PROTO(struct xfs_mount *mp, void *caller_ip), \ TP_ARGS(mp, caller_ip)) DEFINE_FS_EVENT(xfs_inodegc_flush); +DEFINE_FS_EVENT(xfs_inodegc_push); DEFINE_FS_EVENT(xfs_inodegc_start); DEFINE_FS_EVENT(xfs_inodegc_stop); DEFINE_FS_EVENT(xfs_inodegc_queue); |