diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-31 10:18:00 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-31 10:18:00 -0800 |
commit | 20c59c71ae711aff845eef640b25935bc9578c93 (patch) | |
tree | bba42c29760903c293fdd2e76c2a5ed078500820 | |
parent | 5a87e37ee0943afe11504299e4b87d2e4d8d88d5 (diff) | |
parent | 1e369b0e199bbfbab5218e1c1443d839700d8884 (diff) |
Merge tag 'xfs-4.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong:
"This merge cycle, we're again some substantive changes to XFS.
Metadata verifiers have been restructured to provide more detail about
which part of a metadata structure failed checks, and we've enhanced
the new online fsck feature to cross-reference extent allocation
information with the other metadata structures. With this pull, the
metadata verification part of online fsck is more or less finished,
though the feature is still experimental and still disabled by
default.
We're also preparing to remove the EXPERIMENTAL tag from a couple of
features this cycle. This week we're committing a bunch of space
accounting fixes for reflink and removing the EXPERIMENTAL tag from
reflink; I anticipate that we'll be ready to do the same for the
reverse mapping feature next week. (I don't have any pending fixes for
rmap; however I wish to remove the tags one at a time.)
This giant pile of patches has been run through a full xfstests run
over the weekend and through a quick xfstests run against this
morning's master, with no major failures reported. Let me know if
there's any merge problems -- git merge reported that one of our
patches touched the same function as the i_version series, but it
resolved things cleanly.
Summary:
- Log faulting code locations when verifiers fail, for improved
diagnosis of corrupt filesystems.
- Implement metadata verifiers for local format inode fork data.
- Online scrub now cross-references metadata records with other
metadata.
- Refactor the fs geometry ioctl generation functions.
- Harden various metadata verifiers.
- Fix various accounting problems.
- Fix uncancelled transactions leaking when xattr functions fail.
- Prevent the copy-on-write speculative preallocation garbage
collector from racing with writeback.
- Emit log reservation type information as trace data so that we can
compare against xfsprogs.
- Fix some erroneous asserts in the online scrub code.
- Clean up the transaction reservation calculations.
- Fix various minor bugs in online scrub.
- Log complaints about mixed dio/buffered writes once per day and
less noisily than before.
- Refactor buffer log item lists to use list_head.
- Break PNFS leases before reflinking blocks.
- Reduce lock contention on reflink source files.
- Fix some quota accounting problems with reflink.
- Fix a serious corruption problem in the direct cow write code where
we fed bad iomaps to the vfs iomap consumers.
- Various other refactorings.
- Remove EXPERIMENTAL tag from reflink!"
* tag 'xfs-4.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (94 commits)
xfs: remove experimental tag for reflinks
xfs: don't screw up direct writes when freesp is fragmented
xfs: check reflink allocation mappings
iomap: warn on zero-length mappings
xfs: treat CoW fork operations as delalloc for quota accounting
xfs: only grab shared inode locks for source file during reflink
xfs: allow xfs_lock_two_inodes to take different EXCL/SHARED modes
xfs: reflink should break pnfs leases before sharing blocks
xfs: don't clobber inobt/finobt cursors when xref with rmap
xfs: skip CoW writes past EOF when writeback races with truncate
xfs: preserve i_rdev when recycling a reclaimable inode
xfs: refactor accounting updates out of xfs_bmap_btalloc
xfs: refactor inode verifier corruption error printing
xfs: make tracepoint inode number format consistent
xfs: always zero di_flags2 when we free the inode
xfs: call xfs_qm_dqattach before performing reflink operations
xfs: bmap code cleanup
Use list_head infra-structure for buffer's log items list
Split buffer's b_fspriv field
Get rid of xfs_buf_log_item_t typedef
...
97 files changed, 4606 insertions, 1558 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index 3aafb3343a65..a0ca9e48e993 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -219,6 +219,27 @@ static inline struct page *dio_get_page(struct dio *dio, return dio->pages[sdio->head]; } +/* + * Warn about a page cache invalidation failure during a direct io write. + */ +void dio_warn_stale_pagecache(struct file *filp) +{ + static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); + char pathname[128]; + struct inode *inode = file_inode(filp); + char *path; + + errseq_set(&inode->i_mapping->wb_err, -EIO); + if (__ratelimit(&_rs)) { + path = file_path(filp, pathname, sizeof(pathname)); + if (IS_ERR(path)) + path = "(unknown)"; + pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n"); + pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid, + current->comm); + } +} + /** * dio_complete() - called when all DIO BIO I/O has been completed * @offset: the byte offset in the file of the completed operation @@ -290,7 +311,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) err = invalidate_inode_pages2_range(dio->inode->i_mapping, offset >> PAGE_SHIFT, (offset + ret - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(err); + if (err) + dio_warn_stale_pagecache(dio->iocb->ki_filp); } if (!(dio->flags & DIO_SKIP_DIO_COUNT)) diff --git a/fs/iomap.c b/fs/iomap.c index 47d29ccffaef..afd163586aa0 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -65,6 +65,8 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, return ret; if (WARN_ON(iomap.offset > pos)) return -EIO; + if (WARN_ON(iomap.length == 0)) + return -EIO; /* * Cut down the length to the one actually provided by the filesystem, @@ -753,7 +755,8 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) err = invalidate_inode_pages2_range(inode->i_mapping, offset >> PAGE_SHIFT, (offset + dio->size - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(err); + if (err) + dio_warn_stale_pagecache(iocb->ki_filp); } inode_dio_end(file_inode(iocb->ki_filp)); @@ -1018,9 +1021,16 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (ret) goto out_free_dio; + /* + * Try to invalidate cache pages for the range we're direct + * writing. If this invalidation fails, tough, the write will + * still work, but racing two incompatible write paths is a + * pretty crazy thing to do, so we don't support it 100%. + */ ret = invalidate_inode_pages2_range(mapping, start >> PAGE_SHIFT, end >> PAGE_SHIFT); - WARN_ON_ONCE(ret); + if (ret) + dio_warn_stale_pagecache(iocb->ki_filp); ret = 0; if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 83ed7715f856..c02781a4c091 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -167,7 +167,7 @@ xfs_alloc_lookup_ge( * Lookup the first record less than or equal to [bno, len] * in the btree given by cur. */ -static int /* error */ +int /* error */ xfs_alloc_lookup_le( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ @@ -520,7 +520,7 @@ xfs_alloc_fixup_trees( return 0; } -static bool +static xfs_failaddr_t xfs_agfl_verify( struct xfs_buf *bp) { @@ -528,10 +528,19 @@ xfs_agfl_verify( struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); int i; + /* + * There is no verification of non-crc AGFLs because mkfs does not + * initialise the AGFL to zero or NULL. Hence the only valid part of the + * AGFL is what the AGF says is active. We can't get to the AGF, so we + * can't verify just those entries are valid. + */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return NULL; + if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) - return false; + return __this_address; /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't @@ -539,16 +548,17 @@ xfs_agfl_verify( * so we can detect and avoid this problem. */ if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) - return false; + return __this_address; for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) - return false; + return __this_address; } - return xfs_log_check_lsn(mp, - be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)); + if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn))) + return __this_address; + return NULL; } static void @@ -556,6 +566,7 @@ xfs_agfl_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; /* * There is no verification of non-crc AGFLs because mkfs does not @@ -567,28 +578,29 @@ xfs_agfl_read_verify( return; if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_agfl_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agfl_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void xfs_agfl_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_log_item; + xfs_failaddr_t fa; /* no verification of non-crc AGFLs */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - if (!xfs_agfl_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_agfl_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -602,6 +614,7 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = { .name = "xfs_agfl", .verify_read = xfs_agfl_read_verify, .verify_write = xfs_agfl_write_verify, + .verify_struct = xfs_agfl_verify, }; /* @@ -2397,19 +2410,19 @@ xfs_alloc_put_freelist( return 0; } -static bool +static xfs_failaddr_t xfs_agf_verify( - struct xfs_mount *mp, - struct xfs_buf *bp) - { - struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) - return false; + return __this_address; } if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && @@ -2418,18 +2431,18 @@ xfs_agf_verify( be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) - return false; + return __this_address; if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) - return false; + return __this_address; if (xfs_sb_version_hasrmapbt(&mp->m_sb) && (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) - return false; + return __this_address; /* * during growfs operations, the perag is not fully initialised, @@ -2438,18 +2451,18 @@ xfs_agf_verify( * so we can detect and avoid this problem. */ if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) - return false; + return __this_address; if (xfs_sb_version_haslazysbcount(&mp->m_sb) && be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) - return false; + return __this_address; if (xfs_sb_version_hasreflink(&mp->m_sb) && (be32_to_cpu(agf->agf_refcount_level) < 1 || be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) - return false; + return __this_address; - return true;; + return NULL; } @@ -2458,28 +2471,29 @@ xfs_agf_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, - XFS_ERRTAG_ALLOC_READ_AGF)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agf_verify(bp); + if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF)) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void xfs_agf_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_log_item; + xfs_failaddr_t fa; - if (!xfs_agf_verify(mp, bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_agf_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -2496,6 +2510,7 @@ const struct xfs_buf_ops xfs_agf_buf_ops = { .name = "xfs_agf", .verify_read = xfs_agf_read_verify, .verify_write = xfs_agf_write_verify, + .verify_struct = xfs_agf_verify, }; /* @@ -2981,3 +2996,22 @@ xfs_verify_fsbno( return false; return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); } + +/* Is there a record covering a given extent? */ +int +xfs_alloc_has_record( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + union xfs_btree_irec low; + union xfs_btree_irec high; + + memset(&low, 0, sizeof(low)); + low.a.ar_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.a.ar_startblock = bno + len - 1; + + return xfs_btree_has_record(cur, &low, &high, exists); +} diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 7ba2d129d504..65a0cafe06e4 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -198,6 +198,13 @@ xfs_free_extent( enum xfs_ag_resv_type type); /* block reservation type */ int /* error */ +xfs_alloc_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + +int /* error */ xfs_alloc_lookup_ge( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ @@ -237,4 +244,7 @@ bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno); bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); +int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, bool *exist); + #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index cfde0a0f9706..6840b588187e 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -307,13 +307,14 @@ xfs_cntbt_diff_two_keys( be32_to_cpu(k2->alloc.ar_startblock); } -static bool +static xfs_failaddr_t xfs_allocbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; + xfs_failaddr_t fa; unsigned int level; /* @@ -331,29 +332,31 @@ xfs_allocbt_verify( level = be16_to_cpu(block->bb_level); switch (block->bb_magic) { case cpu_to_be32(XFS_ABTB_CRC_MAGIC): - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_ABTB_MAGIC): if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[XFS_BTNUM_BNOi]) - return false; + return __this_address; } else if (level >= mp->m_ag_maxlevels) - return false; + return __this_address; break; case cpu_to_be32(XFS_ABTC_CRC_MAGIC): - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_ABTC_MAGIC): if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[XFS_BTNUM_CNTi]) - return false; + return __this_address; } else if (level >= mp->m_ag_maxlevels) - return false; + return __this_address; break; default: - return false; + return __this_address; } return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]); @@ -363,25 +366,30 @@ static void xfs_allocbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_allocbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_allocbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void xfs_allocbt_write_verify( struct xfs_buf *bp) { - if (!xfs_allocbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_allocbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); @@ -392,6 +400,7 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = { .name = "xfs_allocbt", .verify_read = xfs_allocbt_read_verify, .verify_write = xfs_allocbt_write_verify, + .verify_struct = xfs_allocbt_verify, }; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index a76914db72ef..ce4a34a2751d 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -717,7 +717,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) return error; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; return error; } @@ -770,7 +769,6 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) return 0; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; return error; } @@ -1045,7 +1043,6 @@ out: return retval; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; goto out; } @@ -1186,7 +1183,6 @@ out: return error; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; goto out; } diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 601eaa36f1ad..2135b8e67dcc 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -247,14 +247,15 @@ xfs_attr3_leaf_hdr_to_disk( } } -static bool +static xfs_failaddr_t xfs_attr3_leaf_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_attr_leafblock *leaf = bp->b_addr; - struct xfs_perag *pag = bp->b_pag; - struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_attr_leafblock *leaf = bp->b_addr; + struct xfs_perag *pag = bp->b_pag; + struct xfs_attr_leaf_entry *entries; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); @@ -262,17 +263,17 @@ xfs_attr3_leaf_verify( struct xfs_da3_node_hdr *hdr3 = bp->b_addr; if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC) - return false; + return __this_address; if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) - return false; + return __this_address; } else { if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) - return false; + return __this_address; } /* * In recovery there is a transient state where count == 0 is valid @@ -280,12 +281,27 @@ xfs_attr3_leaf_verify( * if the attr didn't fit in shortform. */ if (pag && pag->pagf_init && ichdr.count == 0) - return false; + return __this_address; + + /* + * firstused is the block offset of the first name info structure. + * Make sure it doesn't go off the block or crash into the header. + */ + if (ichdr.firstused > mp->m_attr_geo->blksize) + return __this_address; + if (ichdr.firstused < xfs_attr3_leaf_hdr_size(leaf)) + return __this_address; + + /* Make sure the entries array doesn't crash into the name info. */ + entries = xfs_attr3_leaf_entryp(bp->b_addr); + if ((char *)&entries[ichdr.count] > + (char *)bp->b_addr + ichdr.firstused) + return __this_address; /* XXX: need to range check rest of attr header values */ /* XXX: hash order check? */ - return true; + return NULL; } static void @@ -293,12 +309,13 @@ xfs_attr3_leaf_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_attr3_leaf_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_attr3_leaf_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -322,21 +339,23 @@ xfs_attr3_leaf_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_attr3_leaf_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_attr3_leaf_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { .name = "xfs_attr3_leaf", .verify_read = xfs_attr3_leaf_read_verify, .verify_write = xfs_attr3_leaf_write_verify, + .verify_struct = xfs_attr3_leaf_verify, }; int @@ -870,6 +889,80 @@ xfs_attr_shortform_allfit( return xfs_attr_shortform_bytesfit(dp, bytes); } +/* Verify the consistency of an inline attribute fork. */ +xfs_failaddr_t +xfs_attr_shortform_verify( + struct xfs_inode *ip) +{ + struct xfs_attr_shortform *sfp; + struct xfs_attr_sf_entry *sfep; + struct xfs_attr_sf_entry *next_sfep; + char *endp; + struct xfs_ifork *ifp; + int i; + int size; + + ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL); + ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); + sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data; + size = ifp->if_bytes; + + /* + * Give up if the attribute is way too short. + */ + if (size < sizeof(struct xfs_attr_sf_hdr)) + return __this_address; + + endp = (char *)sfp + size; + + /* Check all reported entries */ + sfep = &sfp->list[0]; + for (i = 0; i < sfp->hdr.count; i++) { + /* + * struct xfs_attr_sf_entry has a variable length. + * Check the fixed-offset parts of the structure are + * within the data buffer. + */ + if (((char *)sfep + sizeof(*sfep)) >= endp) + return __this_address; + + /* Don't allow names with known bad length. */ + if (sfep->namelen == 0) + return __this_address; + + /* + * Check that the variable-length part of the structure is + * within the data buffer. The next entry starts after the + * name component, so nextentry is an acceptable test. + */ + next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep); + if ((char *)next_sfep > endp) + return __this_address; + + /* + * Check for unknown flags. Short form doesn't support + * the incomplete or local bits, so we can use the namespace + * mask here. + */ + if (sfep->flags & ~XFS_ATTR_NSP_ONDISK_MASK) + return __this_address; + + /* + * Check for invalid namespace combinations. We only allow + * one namespace flag per xattr, so we can just count the + * bits (i.e. hweight) here. + */ + if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) + return __this_address; + + sfep = next_sfep; + } + if ((void *)sfep != (void *)endp) + return __this_address; + + return NULL; +} + /* * Convert a leaf attribute list to shortform attribute list */ @@ -2173,7 +2266,8 @@ xfs_attr3_leaf_lookup_int( leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); - ASSERT(ichdr.count < args->geo->blksize / 8); + if (ichdr.count >= args->geo->blksize / 8) + return -EFSCORRUPTED; /* * Binary search. (note: small blocks will skip this loop) @@ -2189,8 +2283,10 @@ xfs_attr3_leaf_lookup_int( else break; } - ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count)); - ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval); + if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) + return -EFSCORRUPTED; + if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) + return -EFSCORRUPTED; /* * Since we may have duplicate hashval's, find the first matching diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 894124efb421..4da08af5b134 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -53,6 +53,7 @@ int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); +xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_inode *ip); void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); /* diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index d56caf037ca0..21be186067a2 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -65,7 +65,7 @@ xfs_attr3_rmt_blocks( * does CRC, location and bounds checking, the unpacking function checks the * attribute parameters and owner. */ -static bool +static xfs_failaddr_t xfs_attr3_rmt_hdr_ok( void *ptr, xfs_ino_t ino, @@ -76,19 +76,19 @@ xfs_attr3_rmt_hdr_ok( struct xfs_attr3_rmt_hdr *rmt = ptr; if (bno != be64_to_cpu(rmt->rm_blkno)) - return false; + return __this_address; if (offset != be32_to_cpu(rmt->rm_offset)) - return false; + return __this_address; if (size != be32_to_cpu(rmt->rm_bytes)) - return false; + return __this_address; if (ino != be64_to_cpu(rmt->rm_owner)) - return false; + return __this_address; /* ok */ - return true; + return NULL; } -static bool +static xfs_failaddr_t xfs_attr3_rmt_verify( struct xfs_mount *mp, void *ptr, @@ -98,27 +98,29 @@ xfs_attr3_rmt_verify( struct xfs_attr3_rmt_hdr *rmt = ptr; if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(rmt->rm_blkno) != bno) - return false; + return __this_address; if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) - return false; + return __this_address; if (be32_to_cpu(rmt->rm_offset) + be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) - return false; + return __this_address; if (rmt->rm_owner == 0) - return false; + return __this_address; - return true; + return NULL; } -static void -xfs_attr3_rmt_read_verify( - struct xfs_buf *bp) +static int +__xfs_attr3_rmt_read_verify( + struct xfs_buf *bp, + bool check_crc, + xfs_failaddr_t *failaddr) { struct xfs_mount *mp = bp->b_target->bt_mount; char *ptr; @@ -128,7 +130,7 @@ xfs_attr3_rmt_read_verify( /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; + return 0; ptr = bp->b_addr; bno = bp->b_bn; @@ -136,23 +138,48 @@ xfs_attr3_rmt_read_verify( ASSERT(len >= blksize); while (len > 0) { - if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { - xfs_buf_ioerror(bp, -EFSBADCRC); - break; - } - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - break; + if (check_crc && + !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { + *failaddr = __this_address; + return -EFSBADCRC; } + *failaddr = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); + if (*failaddr) + return -EFSCORRUPTED; len -= blksize; ptr += blksize; bno += BTOBB(blksize); } - if (bp->b_error) - xfs_verifier_error(bp); - else - ASSERT(len == 0); + if (len != 0) { + *failaddr = __this_address; + return -EFSCORRUPTED; + } + + return 0; +} + +static void +xfs_attr3_rmt_read_verify( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + int error; + + error = __xfs_attr3_rmt_read_verify(bp, true, &fa); + if (error) + xfs_verifier_error(bp, error, fa); +} + +static xfs_failaddr_t +xfs_attr3_rmt_verify_struct( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + int error; + + error = __xfs_attr3_rmt_read_verify(bp, false, &fa); + return error ? fa : NULL; } static void @@ -160,6 +187,7 @@ xfs_attr3_rmt_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; int blksize = mp->m_attr_geo->blksize; char *ptr; int len; @@ -177,9 +205,9 @@ xfs_attr3_rmt_write_verify( while (len > 0) { struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -188,8 +216,7 @@ xfs_attr3_rmt_write_verify( * xfs_attr3_rmt_hdr_set() for the explanation. */ if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); return; } xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); @@ -198,13 +225,16 @@ xfs_attr3_rmt_write_verify( ptr += blksize; bno += BTOBB(blksize); } - ASSERT(len == 0); + + if (len != 0) + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); } const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { .name = "xfs_attr3_rmt", .verify_read = xfs_attr3_rmt_read_verify, .verify_write = xfs_attr3_rmt_write_verify, + .verify_struct = xfs_attr3_rmt_verify_struct, }; STATIC int @@ -269,7 +299,7 @@ xfs_attr_rmtval_copyout( byte_cnt = min(*valuelen, byte_cnt); if (xfs_sb_version_hascrc(&mp->m_sb)) { - if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset, + if (xfs_attr3_rmt_hdr_ok(src, ino, *offset, byte_cnt, bno)) { xfs_alert(mp, "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 1bddbba6b80c..daae00ed30c5 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -400,7 +400,7 @@ xfs_bmap_check_leaf_extents( pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(mp, - XFS_FSB_SANITY_CHECK(mp, bno), error0); + xfs_verify_fsbno(mp, bno), error0); if (bp_release) { bp_release = 0; xfs_trans_brelse(NULL, bp); @@ -1220,7 +1220,7 @@ xfs_iread_extents( pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(mp, - XFS_FSB_SANITY_CHECK(mp, bno), out_brelse); + xfs_verify_fsbno(mp, bno), out_brelse); xfs_trans_brelse(tp, bp); } @@ -3337,6 +3337,49 @@ xfs_bmap_btalloc_filestreams( return 0; } +/* Update all inode and quota accounting for the allocation we just did. */ +static void +xfs_bmap_btalloc_accounting( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args) +{ + if (ap->flags & XFS_BMAPI_COWFORK) { + /* + * COW fork blocks are in-core only and thus are treated as + * in-core quota reservation (like delalloc blocks) even when + * converted to real blocks. The quota reservation is not + * accounted to disk until blocks are remapped to the data + * fork. So if these blocks were previously delalloc, we + * already have quota reservation and there's nothing to do + * yet. + */ + if (ap->wasdel) + return; + + /* + * Otherwise, we've allocated blocks in a hole. The transaction + * has acquired in-core quota reservation for this extent. + * Rather than account these as real blocks, however, we reduce + * the transaction quota reservation based on the allocation. + * This essentially transfers the transaction quota reservation + * to that of a delalloc extent. + */ + ap->ip->i_delayed_blks += args->len; + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS, + -(long)args->len); + return; + } + + /* data/attr fork only */ + ap->ip->i_d.di_nblocks += args->len; + xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); + if (ap->wasdel) + ap->ip->i_delayed_blks -= args->len; + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, + ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, + args->len); +} + STATIC int xfs_bmap_btalloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ @@ -3347,6 +3390,8 @@ xfs_bmap_btalloc( xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ xfs_agnumber_t ag; xfs_alloc_arg_t args; + xfs_fileoff_t orig_offset; + xfs_extlen_t orig_length; xfs_extlen_t blen; xfs_extlen_t nextminlen = 0; int nullfb; /* true if ap->firstblock isn't set */ @@ -3356,6 +3401,8 @@ xfs_bmap_btalloc( int stripe_align; ASSERT(ap->length); + orig_offset = ap->offset; + orig_length = ap->length; mp = ap->ip->i_mount; @@ -3571,19 +3618,23 @@ xfs_bmap_btalloc( *ap->firstblock = args.fsbno; ASSERT(nullfb || fb_agno <= args.agno); ap->length = args.len; - if (!(ap->flags & XFS_BMAPI_COWFORK)) - ap->ip->i_d.di_nblocks += args.len; - xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); - if (ap->wasdel) - ap->ip->i_delayed_blks -= args.len; /* - * Adjust the disk quota also. This was reserved - * earlier. + * If the extent size hint is active, we tried to round the + * caller's allocation request offset down to extsz and the + * length up to another extsz boundary. If we found a free + * extent we mapped it in starting at this new offset. If the + * newly mapped space isn't long enough to cover any of the + * range of offsets that was originally requested, move the + * mapping up so that we can fill as much of the caller's + * original request as possible. Free space is apparently + * very fragmented so we're unlikely to be able to satisfy the + * hints anyway. */ - xfs_trans_mod_dquot_byino(ap->tp, ap->ip, - ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : - XFS_TRANS_DQ_BCOUNT, - (long) args.len); + if (ap->length <= orig_length) + ap->offset = orig_offset; + else if (ap->offset + ap->length < orig_offset + orig_length) + ap->offset = orig_offset + orig_length - ap->length; + xfs_bmap_btalloc_accounting(ap, &args); } else { ap->blkno = NULLFSBLOCK; ap->length = 0; @@ -3876,8 +3927,6 @@ xfs_bmapi_reserve_delalloc( struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); xfs_extlen_t alen; xfs_extlen_t indlen; - char rt = XFS_IS_REALTIME_INODE(ip); - xfs_extlen_t extsz; int error; xfs_fileoff_t aoff = off; @@ -3892,31 +3941,25 @@ xfs_bmapi_reserve_delalloc( prealloc = alen - len; /* Figure out the extent size, adjust alen */ - if (whichfork == XFS_COW_FORK) - extsz = xfs_get_cowextsz_hint(ip); - else - extsz = xfs_get_extsz_hint(ip); - if (extsz) { + if (whichfork == XFS_COW_FORK) { struct xfs_bmbt_irec prev; + xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); if (!xfs_iext_peek_prev_extent(ifp, icur, &prev)) prev.br_startoff = NULLFILEOFF; - error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, + error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof, 1, 0, &aoff, &alen); ASSERT(!error); } - if (rt) - extsz = alen / mp->m_sb.sb_rextsize; - /* * Make a transaction-less quota reservation for delayed allocation * blocks. This number gets adjusted later. We return if we haven't * allocated blocks already inside this loop. */ error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, - rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + XFS_QMOPT_RES_REGBLKS); if (error) return error; @@ -3927,12 +3970,7 @@ xfs_bmapi_reserve_delalloc( indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); ASSERT(indlen > 0); - if (rt) { - error = xfs_mod_frextents(mp, -((int64_t)extsz)); - } else { - error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); - } - + error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); if (error) goto out_unreserve_quota; @@ -3963,14 +4001,11 @@ xfs_bmapi_reserve_delalloc( return 0; out_unreserve_blocks: - if (rt) - xfs_mod_frextents(mp, extsz); - else - xfs_mod_fdblocks(mp, alen, false); + xfs_mod_fdblocks(mp, alen, false); out_unreserve_quota: if (XFS_IS_QUOTA_ON(mp)) - xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, + XFS_QMOPT_RES_REGBLKS); return error; } @@ -4304,8 +4339,16 @@ xfs_bmapi_write( while (bno < end && n < *nmap) { bool need_alloc = false, wasdelay = false; - /* in hole or beyoned EOF? */ + /* in hole or beyond EOF? */ if (eof || bma.got.br_startoff > bno) { + /* + * CoW fork conversions should /never/ hit EOF or + * holes. There should always be something for us + * to work on. + */ + ASSERT(!((flags & XFS_BMAPI_CONVERT) && + (flags & XFS_BMAPI_COWFORK))); + if (flags & XFS_BMAPI_DELALLOC) { /* * For the COW fork we can reasonably get a @@ -4824,6 +4867,7 @@ xfs_bmap_del_extent_cow( xfs_iext_insert(ip, icur, &new, state); break; } + ip->i_delayed_blks -= del->br_blockcount; } /* diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index c10aecaaae44..9faf479aba49 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -425,33 +425,29 @@ xfs_bmbt_diff_two_keys( be64_to_cpu(k2->bmbt.br_startoff); } -static bool +static xfs_failaddr_t xfs_bmbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_failaddr_t fa; unsigned int level; switch (block->bb_magic) { case cpu_to_be32(XFS_BMAP_CRC_MAGIC): - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) - return false; - if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn) - return false; /* * XXX: need a better way of verifying the owner here. Right now * just make sure there has been one set. */ - if (be64_to_cpu(block->bb_u.l.bb_owner) == 0) - return false; + fa = xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_BMAP_MAGIC): break; default: - return false; + return __this_address; } /* @@ -463,46 +459,39 @@ xfs_bmbt_verify( */ level = be16_to_cpu(block->bb_level); if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) - return false; - if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) - return false; - - /* sibling pointer verification */ - if (!block->bb_u.l.bb_leftsib || - (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) - return false; - if (!block->bb_u.l.bb_rightsib || - (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) - return false; - - return true; + return __this_address; + + return xfs_btree_lblock_verify(bp, mp->m_bmap_dmxr[level != 0]); } static void xfs_bmbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_lblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_bmbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_bmbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void xfs_bmbt_write_verify( struct xfs_buf *bp) { - if (!xfs_bmbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_bmbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_lblock_calc_crc(bp); @@ -512,6 +501,7 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = { .name = "xfs_bmbt", .verify_read = xfs_bmbt_read_verify, .verify_write = xfs_bmbt_write_verify, + .verify_struct = xfs_bmbt_verify, }; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 5f33adf8eecb..79ee4a1951d1 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -273,7 +273,7 @@ xfs_btree_lblock_calc_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) return; @@ -311,7 +311,7 @@ xfs_btree_sblock_calc_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) return; @@ -329,7 +329,7 @@ xfs_btree_sblock_verify_crc( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) - return false; + return __this_address; return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); } @@ -853,7 +853,7 @@ xfs_btree_read_bufl( xfs_daddr_t d; /* real disk block address */ int error; - if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) + if (!xfs_verify_fsbno(mp, fsbno)) return -EFSCORRUPTED; d = XFS_FSB_TO_DADDR(mp, fsbno); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, @@ -4529,6 +4529,51 @@ xfs_btree_change_owner( &bbcoi); } +/* Verify the v5 fields of a long-format btree block. */ +xfs_failaddr_t +xfs_btree_lblock_v5hdr_verify( + struct xfs_buf *bp, + uint64_t owner) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return __this_address; + if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) + return __this_address; + if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn)) + return __this_address; + if (owner != XFS_RMAP_OWN_UNKNOWN && + be64_to_cpu(block->bb_u.l.bb_owner) != owner) + return __this_address; + return NULL; +} + +/* Verify a long-format btree block. */ +xfs_failaddr_t +xfs_btree_lblock_verify( + struct xfs_buf *bp, + unsigned int max_recs) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + + /* numrecs verification */ + if (be16_to_cpu(block->bb_numrecs) > max_recs) + return __this_address; + + /* sibling pointer verification */ + if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) + return __this_address; + if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))) + return __this_address; + + return NULL; +} + /** * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format * btree block @@ -4537,7 +4582,7 @@ xfs_btree_change_owner( * @max_recs: pointer to the m_*_mxr max records field in the xfs mount * @pag_max_level: pointer to the per-ag max level field */ -bool +xfs_failaddr_t xfs_btree_sblock_v5hdr_verify( struct xfs_buf *bp) { @@ -4546,14 +4591,14 @@ xfs_btree_sblock_v5hdr_verify( struct xfs_perag *pag = bp->b_pag; if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) - return false; + return __this_address; if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) - return false; - return true; + return __this_address; + return NULL; } /** @@ -4562,29 +4607,29 @@ xfs_btree_sblock_v5hdr_verify( * @bp: buffer containing the btree block * @max_recs: maximum records allowed in this btree node */ -bool +xfs_failaddr_t xfs_btree_sblock_verify( struct xfs_buf *bp, unsigned int max_recs) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_agblock_t agno; /* numrecs verification */ if (be16_to_cpu(block->bb_numrecs) > max_recs) - return false; + return __this_address; /* sibling pointer verification */ - if (!block->bb_u.s.bb_leftsib || - (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && - block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) - return false; - if (!block->bb_u.s.bb_rightsib || - (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && - block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) - return false; + agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); + if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && + !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib))) + return __this_address; + if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && + !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib))) + return __this_address; - return true; + return NULL; } /* @@ -4953,3 +4998,33 @@ xfs_btree_diff_two_ptrs( return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); } + +/* If there's an extent, we're done. */ +STATIC int +xfs_btree_has_record_helper( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + void *priv) +{ + return XFS_BTREE_QUERY_RANGE_ABORT; +} + +/* Is there a record covering a given range of keys? */ +int +xfs_btree_has_record( + struct xfs_btree_cur *cur, + union xfs_btree_irec *low, + union xfs_btree_irec *high, + bool *exists) +{ + int error; + + error = xfs_btree_query_range(cur, low, high, + &xfs_btree_has_record_helper, NULL); + if (error == XFS_BTREE_QUERY_RANGE_ABORT) { + *exists = true; + return 0; + } + *exists = false; + return error; +} diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index b57501c6f71d..50440b5618e8 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -473,10 +473,6 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) #define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b)) #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) -#define XFS_FSB_SANITY_CHECK(mp,fsb) \ - (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ - XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) - /* * Trace hooks. Currently not implemented as they need to be ported * over to the generic tracing functionality, which is some effort. @@ -496,8 +492,14 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) #define XFS_BTREE_TRACE_ARGR(c, r) #define XFS_BTREE_TRACE_CURSOR(c, t) -bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); -bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); +xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); +xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp, + unsigned int max_recs); +xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp, + uint64_t owner); +xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp, + unsigned int max_recs); + uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, unsigned long len); xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits, @@ -545,5 +547,7 @@ void xfs_btree_get_keys(struct xfs_btree_cur *cur, struct xfs_btree_block *block, union xfs_btree_key *key); union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur, union xfs_btree_key *key); +int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low, + union xfs_btree_irec *high, bool *exists); #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 651611530d2f..ea187b4a7991 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -128,7 +128,7 @@ xfs_da_state_free(xfs_da_state_t *state) kmem_zone_free(xfs_da_state_zone, state); } -static bool +static xfs_failaddr_t xfs_da3_node_verify( struct xfs_buf *bp) { @@ -145,24 +145,24 @@ xfs_da3_node_verify( struct xfs_da3_node_hdr *hdr3 = bp->b_addr; if (ichdr.magic != XFS_DA3_NODE_MAGIC) - return false; + return __this_address; if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) - return false; + return __this_address; } else { if (ichdr.magic != XFS_DA_NODE_MAGIC) - return false; + return __this_address; } if (ichdr.level == 0) - return false; + return __this_address; if (ichdr.level > XFS_DA_NODE_MAXDEPTH) - return false; + return __this_address; if (ichdr.count == 0) - return false; + return __this_address; /* * we don't know if the node is for and attribute or directory tree, @@ -170,11 +170,11 @@ xfs_da3_node_verify( */ if (ichdr.count > mp->m_dir_geo->node_ents && ichdr.count > mp->m_attr_geo->node_ents) - return false; + return __this_address; /* XXX: hash order check? */ - return true; + return NULL; } static void @@ -182,12 +182,13 @@ xfs_da3_node_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_da3_node_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_da3_node_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_da3_node_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -211,19 +212,20 @@ xfs_da3_node_read_verify( struct xfs_buf *bp) { struct xfs_da_blkinfo *info = bp->b_addr; + xfs_failaddr_t fa; switch (be16_to_cpu(info->magic)) { case XFS_DA3_NODE_MAGIC: if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC, + __this_address); break; } /* fall through */ case XFS_DA_NODE_MAGIC: - if (!xfs_da3_node_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - break; - } + fa = xfs_da3_node_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; case XFS_ATTR_LEAF_MAGIC: case XFS_ATTR3_LEAF_MAGIC: @@ -236,18 +238,40 @@ xfs_da3_node_read_verify( bp->b_ops->verify_read(bp); return; default: - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); break; } +} + +/* Verify the structure of a da3 block. */ +static xfs_failaddr_t +xfs_da3_node_verify_struct( + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; - /* corrupt block */ - xfs_verifier_error(bp); + switch (be16_to_cpu(info->magic)) { + case XFS_DA3_NODE_MAGIC: + case XFS_DA_NODE_MAGIC: + return xfs_da3_node_verify(bp); + case XFS_ATTR_LEAF_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: + bp->b_ops = &xfs_attr3_leaf_buf_ops; + return bp->b_ops->verify_struct(bp); + case XFS_DIR2_LEAFN_MAGIC: + case XFS_DIR3_LEAFN_MAGIC: + bp->b_ops = &xfs_dir3_leafn_buf_ops; + return bp->b_ops->verify_struct(bp); + default: + return __this_address; + } } const struct xfs_buf_ops xfs_da3_node_buf_ops = { .name = "xfs_da3_node", .verify_read = xfs_da3_node_read_verify, .verify_write = xfs_da3_node_write_verify, + .verify_struct = xfs_da3_node_verify_struct, }; int diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 3771edcb301d..7e77299b7789 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -875,4 +875,10 @@ struct xfs_attr3_rmt_hdr { ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ sizeof(struct xfs_attr3_rmt_hdr) : 0)) +/* Number of bytes in a directory block. */ +static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp) +{ + return 1 << (sbp->sb_blocklog + sbp->sb_dirblklog); +} + #endif /* __XFS_DA_FORMAT_H__ */ diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index e10778c102ea..92f94e190f04 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -119,8 +119,7 @@ xfs_da_mount( ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); - ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= - XFS_MAX_BLOCKSIZE); + ASSERT(xfs_dir2_dirblock_bytes(&mp->m_sb) <= XFS_MAX_BLOCKSIZE); mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); @@ -140,7 +139,7 @@ xfs_da_mount( dageo = mp->m_dir_geo; dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog; dageo->fsblog = mp->m_sb.sb_blocklog; - dageo->blksize = 1 << dageo->blklog; + dageo->blksize = xfs_dir2_dirblock_bytes(&mp->m_sb); dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; /* diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 1a8f2cf977ca..388d67c5c903 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -340,5 +340,7 @@ xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) #define XFS_READDIR_BUFSIZE (32768) unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype); +void *xfs_dir3_data_endp(struct xfs_da_geometry *geo, + struct xfs_dir2_data_hdr *hdr); #endif /* __XFS_DIR2_H__ */ diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 43c902f7a68d..2da86a394bcf 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -58,7 +58,7 @@ xfs_dir_startup(void) xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); } -static bool +static xfs_failaddr_t xfs_dir3_block_verify( struct xfs_buf *bp) { @@ -67,20 +67,18 @@ xfs_dir3_block_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) - return false; + return __this_address; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) - return false; + return __this_address; } - if (__xfs_dir3_data_check(NULL, bp)) - return false; - return true; + return __xfs_dir3_data_check(NULL, bp); } static void @@ -88,15 +86,16 @@ xfs_dir3_block_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dir3_block_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_block_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -104,12 +103,13 @@ xfs_dir3_block_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_dir3_block_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_dir3_block_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -126,6 +126,7 @@ const struct xfs_buf_ops xfs_dir3_block_buf_ops = { .name = "xfs_dir3_block", .verify_read = xfs_dir3_block_read_verify, .verify_write = xfs_dir3_block_write_verify, + .verify_struct = xfs_dir3_block_verify, }; int diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 8727a43115ef..920279485275 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -36,9 +36,9 @@ /* * Check the consistency of the data block. * The input can also be a block-format directory. - * Return 0 is the buffer is good, otherwise an error. + * Return NULL if the buffer is good, otherwise the address of the error. */ -int +xfs_failaddr_t __xfs_dir3_data_check( struct xfs_inode *dp, /* incore inode pointer */ struct xfs_buf *bp) /* data block's buffer */ @@ -73,6 +73,14 @@ __xfs_dir3_data_check( */ ops = xfs_dir_get_ops(mp, dp); + /* + * If this isn't a directory, or we don't get handed the dir ops, + * something is seriously wrong. Bail out. + */ + if ((dp && !S_ISDIR(VFS_I(dp)->i_mode)) || + ops != xfs_dir_get_ops(mp, NULL)) + return __this_address; + hdr = bp->b_addr; p = (char *)ops->data_entry_p(hdr); @@ -81,7 +89,6 @@ __xfs_dir3_data_check( case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): btp = xfs_dir2_block_tail_p(geo, hdr); lep = xfs_dir2_block_leaf_p(btp); - endp = (char *)lep; /* * The number of leaf entries is limited by the size of the @@ -90,17 +97,19 @@ __xfs_dir3_data_check( * so just ensure that the count falls somewhere inside the * block right now. */ - XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) < - ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); + if (be32_to_cpu(btp->count) >= + ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)) + return __this_address; break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): - endp = (char *)hdr + geo->blksize; break; default: - XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); - return -EFSCORRUPTED; + return __this_address; } + endp = xfs_dir3_data_endp(geo, hdr); + if (!endp) + return __this_address; /* * Account for zero bestfree entries. @@ -108,22 +117,25 @@ __xfs_dir3_data_check( bf = ops->data_bestfree_p(hdr); count = lastfree = freeseen = 0; if (!bf[0].length) { - XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset); + if (bf[0].offset) + return __this_address; freeseen |= 1 << 0; } if (!bf[1].length) { - XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset); + if (bf[1].offset) + return __this_address; freeseen |= 1 << 1; } if (!bf[2].length) { - XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset); + if (bf[2].offset) + return __this_address; freeseen |= 1 << 2; } - XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >= - be16_to_cpu(bf[1].length)); - XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >= - be16_to_cpu(bf[2].length)); + if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length)) + return __this_address; + if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length)) + return __this_address; /* * Loop over the data/unused entries. */ @@ -135,22 +147,23 @@ __xfs_dir3_data_check( * doesn't need to be there. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0); - XFS_WANT_CORRUPTED_RETURN(mp, endp >= - p + be16_to_cpu(dup->length)); - XFS_WANT_CORRUPTED_RETURN(mp, - be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == - (char *)dup - (char *)hdr); + if (lastfree != 0) + return __this_address; + if (endp < p + be16_to_cpu(dup->length)) + return __this_address; + if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != + (char *)dup - (char *)hdr) + return __this_address; dfp = xfs_dir2_data_freefind(hdr, bf, dup); if (dfp) { i = (int)(dfp - bf); - XFS_WANT_CORRUPTED_RETURN(mp, - (freeseen & (1 << i)) == 0); + if ((freeseen & (1 << i)) != 0) + return __this_address; freeseen |= 1 << i; } else { - XFS_WANT_CORRUPTED_RETURN(mp, - be16_to_cpu(dup->length) <= - be16_to_cpu(bf[2].length)); + if (be16_to_cpu(dup->length) > + be16_to_cpu(bf[2].length)) + return __this_address; } p += be16_to_cpu(dup->length); lastfree = 1; @@ -163,16 +176,17 @@ __xfs_dir3_data_check( * The linear search is crude but this is DEBUG code. */ dep = (xfs_dir2_data_entry_t *)p; - XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0); - XFS_WANT_CORRUPTED_RETURN(mp, - !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); - XFS_WANT_CORRUPTED_RETURN(mp, endp >= - p + ops->data_entsize(dep->namelen)); - XFS_WANT_CORRUPTED_RETURN(mp, - be16_to_cpu(*ops->data_entry_tag_p(dep)) == - (char *)dep - (char *)hdr); - XFS_WANT_CORRUPTED_RETURN(mp, - ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); + if (dep->namelen == 0) + return __this_address; + if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))) + return __this_address; + if (endp < p + ops->data_entsize(dep->namelen)) + return __this_address; + if (be16_to_cpu(*ops->data_entry_tag_p(dep)) != + (char *)dep - (char *)hdr) + return __this_address; + if (ops->data_get_ftype(dep) >= XFS_DIR3_FT_MAX) + return __this_address; count++; lastfree = 0; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || @@ -188,34 +202,52 @@ __xfs_dir3_data_check( be32_to_cpu(lep[i].hashval) == hash) break; } - XFS_WANT_CORRUPTED_RETURN(mp, - i < be32_to_cpu(btp->count)); + if (i >= be32_to_cpu(btp->count)) + return __this_address; } p += ops->data_entsize(dep->namelen); } /* * Need to have seen all the entries and all the bestfree slots. */ - XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7); + if (freeseen != 7) + return __this_address; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { if (lep[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; - if (i > 0) - XFS_WANT_CORRUPTED_RETURN(mp, - be32_to_cpu(lep[i].hashval) >= - be32_to_cpu(lep[i - 1].hashval)); + if (i > 0 && be32_to_cpu(lep[i].hashval) < + be32_to_cpu(lep[i - 1].hashval)) + return __this_address; } - XFS_WANT_CORRUPTED_RETURN(mp, count == - be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); - XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale)); + if (count != be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)) + return __this_address; + if (stale != be32_to_cpu(btp->stale)) + return __this_address; } - return 0; + return NULL; +} + +#ifdef DEBUG +void +xfs_dir3_data_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + fa = __xfs_dir3_data_check(dp, bp); + if (!fa) + return; + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, + bp->b_addr, __FILE__, __LINE__, fa); + ASSERT(0); } +#endif -static bool +static xfs_failaddr_t xfs_dir3_data_verify( struct xfs_buf *bp) { @@ -224,20 +256,18 @@ xfs_dir3_data_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) - return false; + return __this_address; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) - return false; + return __this_address; } - if (__xfs_dir3_data_check(NULL, bp)) - return false; - return true; + return __xfs_dir3_data_check(NULL, bp); } /* @@ -263,8 +293,7 @@ xfs_dir3_data_reada_verify( bp->b_ops->verify_read(bp); return; default: - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); break; } } @@ -274,15 +303,16 @@ xfs_dir3_data_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dir3_data_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_data_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -290,12 +320,13 @@ xfs_dir3_data_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_dir3_data_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_dir3_data_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -312,6 +343,7 @@ const struct xfs_buf_ops xfs_dir3_data_buf_ops = { .name = "xfs_dir3_data", .verify_read = xfs_dir3_data_read_verify, .verify_write = xfs_dir3_data_write_verify, + .verify_struct = xfs_dir3_data_verify, }; static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { @@ -515,7 +547,6 @@ xfs_dir2_data_freescan_int( struct xfs_dir2_data_hdr *hdr, int *loghead) { - xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* active data entry */ xfs_dir2_data_unused_t *dup; /* unused data entry */ struct xfs_dir2_data_free *bf; @@ -537,12 +568,7 @@ xfs_dir2_data_freescan_int( * Set up pointers. */ p = (char *)ops->data_entry_p(hdr); - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - btp = xfs_dir2_block_tail_p(geo, hdr); - endp = (char *)xfs_dir2_block_leaf_p(btp); - } else - endp = (char *)hdr + geo->blksize; + endp = xfs_dir3_data_endp(geo, hdr); /* * Loop over the block's entries. */ @@ -755,17 +781,9 @@ xfs_dir2_data_make_free( /* * Figure out where the end of the data area is. */ - if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)) - endptr = (char *)hdr + args->geo->blksize; - else { - xfs_dir2_block_tail_t *btp; /* block tail */ + endptr = xfs_dir3_data_endp(args->geo, hdr); + ASSERT(endptr != NULL); - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - btp = xfs_dir2_block_tail_p(args->geo, hdr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); - } /* * If this isn't the start of the block, then back up to * the previous entry and see if it's free. @@ -1067,3 +1085,21 @@ xfs_dir2_data_use_free( } *needscanp = needscan; } + +/* Find the end of the entry data in a data/block format dir block. */ +void * +xfs_dir3_data_endp( + struct xfs_da_geometry *geo, + struct xfs_dir2_data_hdr *hdr) +{ + switch (hdr->magic) { + case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): + case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): + return xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr)); + case cpu_to_be32(XFS_DIR3_DATA_MAGIC): + case cpu_to_be32(XFS_DIR2_DATA_MAGIC): + return (char *)hdr + geo->blksize; + default: + return NULL; + } +} diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index 27297a689d9c..d7e630f41f9c 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -50,13 +50,7 @@ static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args, * Pop an assert if something is wrong. */ #ifdef DEBUG -#define xfs_dir3_leaf_check(dp, bp) \ -do { \ - if (!xfs_dir3_leaf1_check((dp), (bp))) \ - ASSERT(0); \ -} while (0); - -STATIC bool +static xfs_failaddr_t xfs_dir3_leaf1_check( struct xfs_inode *dp, struct xfs_buf *bp) @@ -69,17 +63,32 @@ xfs_dir3_leaf1_check( if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; + return __this_address; } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) - return false; + return __this_address; return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); } + +static inline void +xfs_dir3_leaf_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + fa = xfs_dir3_leaf1_check(dp, bp); + if (!fa) + return; + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, + bp->b_addr, __FILE__, __LINE__, fa); + ASSERT(0); +} #else #define xfs_dir3_leaf_check(dp, bp) #endif -bool +xfs_failaddr_t xfs_dir3_leaf_check_int( struct xfs_mount *mp, struct xfs_inode *dp, @@ -114,27 +123,27 @@ xfs_dir3_leaf_check_int( * We can deduce a value for that from di_size. */ if (hdr->count > ops->leaf_max_ents(geo)) - return false; + return __this_address; /* Leaves and bests don't overlap in leaf format. */ if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || hdr->magic == XFS_DIR3_LEAF1_MAGIC) && (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) - return false; + return __this_address; /* Check hash value order, count stale entries. */ for (i = stale = 0; i < hdr->count; i++) { if (i + 1 < hdr->count) { if (be32_to_cpu(ents[i].hashval) > be32_to_cpu(ents[i + 1].hashval)) - return false; + return __this_address; } if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } if (hdr->stale != stale) - return false; - return true; + return __this_address; + return NULL; } /* @@ -142,7 +151,7 @@ xfs_dir3_leaf_check_int( * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due * to incorrect magic numbers. */ -static bool +static xfs_failaddr_t xfs_dir3_leaf_verify( struct xfs_buf *bp, uint16_t magic) @@ -160,16 +169,16 @@ xfs_dir3_leaf_verify( : XFS_DIR3_LEAFN_MAGIC; if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) - return false; + return __this_address; if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) - return false; + return __this_address; } else { if (leaf->hdr.info.magic != cpu_to_be16(magic)) - return false; + return __this_address; } return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf); @@ -181,15 +190,16 @@ __read_verify( uint16_t magic) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dir3_leaf_verify(bp, magic)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_leaf_verify(bp, magic); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -198,12 +208,13 @@ __write_verify( uint16_t magic) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_dir3_leaf_verify(bp, magic)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_dir3_leaf_verify(bp, magic); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -216,6 +227,13 @@ __write_verify( xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); } +static xfs_failaddr_t +xfs_dir3_leaf1_verify( + struct xfs_buf *bp) +{ + return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAF1_MAGIC); +} + static void xfs_dir3_leaf1_read_verify( struct xfs_buf *bp) @@ -230,6 +248,13 @@ xfs_dir3_leaf1_write_verify( __write_verify(bp, XFS_DIR2_LEAF1_MAGIC); } +static xfs_failaddr_t +xfs_dir3_leafn_verify( + struct xfs_buf *bp) +{ + return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAFN_MAGIC); +} + static void xfs_dir3_leafn_read_verify( struct xfs_buf *bp) @@ -248,12 +273,14 @@ const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = { .name = "xfs_dir3_leaf1", .verify_read = xfs_dir3_leaf1_read_verify, .verify_write = xfs_dir3_leaf1_write_verify, + .verify_struct = xfs_dir3_leaf1_verify, }; const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = { .name = "xfs_dir3_leafn", .verify_read = xfs_dir3_leafn_read_verify, .verify_write = xfs_dir3_leafn_write_verify, + .verify_struct = xfs_dir3_leafn_verify, }; int diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 682e2bf370c7..239d97a64296 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -53,13 +53,7 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args, * Check internal consistency of a leafn block. */ #ifdef DEBUG -#define xfs_dir3_leaf_check(dp, bp) \ -do { \ - if (!xfs_dir3_leafn_check((dp), (bp))) \ - ASSERT(0); \ -} while (0); - -static bool +static xfs_failaddr_t xfs_dir3_leafn_check( struct xfs_inode *dp, struct xfs_buf *bp) @@ -72,17 +66,32 @@ xfs_dir3_leafn_check( if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; + return __this_address; } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) - return false; + return __this_address; return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); } + +static inline void +xfs_dir3_leaf_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + fa = xfs_dir3_leafn_check(dp, bp); + if (!fa) + return; + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, + bp->b_addr, __FILE__, __LINE__, fa); + ASSERT(0); +} #else #define xfs_dir3_leaf_check(dp, bp) #endif -static bool +static xfs_failaddr_t xfs_dir3_free_verify( struct xfs_buf *bp) { @@ -93,21 +102,21 @@ xfs_dir3_free_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) - return false; + return __this_address; } else { if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) - return false; + return __this_address; } /* XXX: should bounds check the xfs_dir3_icfree_hdr here */ - return true; + return NULL; } static void @@ -115,15 +124,16 @@ xfs_dir3_free_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dir3_free_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_free_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -131,12 +141,13 @@ xfs_dir3_free_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_dir3_free_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_dir3_free_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -153,10 +164,11 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = { .name = "xfs_dir3_free", .verify_read = xfs_dir3_free_read_verify, .verify_write = xfs_dir3_free_write_verify, + .verify_struct = xfs_dir3_free_verify, }; /* Everything ok in the free block header? */ -static bool +static xfs_failaddr_t xfs_dir3_free_header_check( struct xfs_inode *dp, xfs_dablk_t fbno, @@ -174,22 +186,22 @@ xfs_dir3_free_header_check( struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; if (be32_to_cpu(hdr3->firstdb) != firstdb) - return false; + return __this_address; if (be32_to_cpu(hdr3->nvalid) > maxbests) - return false; + return __this_address; if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) - return false; + return __this_address; } else { struct xfs_dir2_free_hdr *hdr = bp->b_addr; if (be32_to_cpu(hdr->firstdb) != firstdb) - return false; + return __this_address; if (be32_to_cpu(hdr->nvalid) > maxbests) - return false; + return __this_address; if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) - return false; + return __this_address; } - return true; + return NULL; } static int @@ -200,6 +212,7 @@ __xfs_dir3_free_read( xfs_daddr_t mappedbno, struct xfs_buf **bpp) { + xfs_failaddr_t fa; int err; err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, @@ -208,9 +221,9 @@ __xfs_dir3_free_read( return err; /* Check things that we can't do in the verifier. */ - if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { - xfs_buf_ioerror(*bpp, -EFSCORRUPTED); - xfs_verifier_error(*bpp); + fa = xfs_dir3_free_header_check(dp, fbno, *bpp); + if (fa) { + xfs_verifier_error(*bpp, -EFSCORRUPTED, fa); xfs_trans_brelse(tp, *bpp); return -EFSCORRUPTED; } @@ -1906,7 +1919,7 @@ xfs_dir2_node_addname_int( (unsigned long long)ifbno, lastfbno); if (fblk) { xfs_alert(mp, - " fblk 0x%p blkno %llu index %d magic 0x%x", + " fblk "PTR_FMT" blkno %llu index %d magic 0x%x", fblk, (unsigned long long)fblk->blkno, fblk->index, diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 4badd26c47e6..753aeeeffc18 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -39,12 +39,13 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, /* xfs_dir2_data.c */ #ifdef DEBUG -#define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp); +extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); #else #define xfs_dir3_data_check(dp,bp) #endif -extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); +extern xfs_failaddr_t __xfs_dir3_data_check(struct xfs_inode *dp, + struct xfs_buf *bp); extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, @@ -89,8 +90,9 @@ xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr, int lowstale, int highstale, int *lfloglow, int *lfloghigh); extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); -extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp, - struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf); +extern xfs_failaddr_t xfs_dir3_leaf_check_int(struct xfs_mount *mp, + struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr, + struct xfs_dir2_leaf *leaf); /* xfs_dir2_node.c */ extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, @@ -127,7 +129,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args); -extern int xfs_dir2_sf_verify(struct xfs_inode *ip); +extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip); /* xfs_dir2_readdir.c */ extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp, diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index be8b9755f66a..0c75a7f00883 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -156,7 +156,6 @@ xfs_dir2_block_to_sf( xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ { xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_block_tail_t *btp; /* block tail pointer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* unused data pointer */ @@ -192,9 +191,8 @@ xfs_dir2_block_to_sf( /* * Set up to loop over the block's entries. */ - btp = xfs_dir2_block_tail_p(args->geo, hdr); ptr = (char *)dp->d_ops->data_entry_p(hdr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); + endptr = xfs_dir3_data_endp(args->geo, hdr); sfep = xfs_dir2_sf_firstentry(sfp); /* * Loop over the active and unused entries. @@ -630,7 +628,7 @@ xfs_dir2_sf_check( #endif /* DEBUG */ /* Verify the consistency of an inline directory. */ -int +xfs_failaddr_t xfs_dir2_sf_verify( struct xfs_inode *ip) { @@ -665,7 +663,7 @@ xfs_dir2_sf_verify( */ if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) || size < xfs_dir2_sf_hdr_size(sfp->i8count)) - return -EFSCORRUPTED; + return __this_address; endp = (char *)sfp + size; @@ -674,7 +672,7 @@ xfs_dir2_sf_verify( i8count = ino > XFS_DIR2_MAX_SHORT_INUM; error = xfs_dir_ino_validate(mp, ino); if (error) - return error; + return __this_address; offset = dops->data_first_offset; /* Check all reported entries */ @@ -686,11 +684,11 @@ xfs_dir2_sf_verify( * within the data buffer. */ if (((char *)sfep + sizeof(*sfep)) >= endp) - return -EFSCORRUPTED; + return __this_address; /* Don't allow names with known bad length. */ if (sfep->namelen == 0) - return -EFSCORRUPTED; + return __this_address; /* * Check that the variable-length part of the structure is @@ -699,23 +697,23 @@ xfs_dir2_sf_verify( */ next_sfep = dops->sf_nextentry(sfp, sfep); if (endp < (char *)next_sfep) - return -EFSCORRUPTED; + return __this_address; /* Check that the offsets always increase. */ if (xfs_dir2_sf_get_offset(sfep) < offset) - return -EFSCORRUPTED; + return __this_address; /* Check the inode number. */ ino = dops->sf_get_ino(sfp, sfep); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; error = xfs_dir_ino_validate(mp, ino); if (error) - return error; + return __this_address; /* Check the file type. */ filetype = dops->sf_get_ftype(sfep); if (filetype >= XFS_DIR3_FT_MAX) - return -EFSCORRUPTED; + return __this_address; offset = xfs_dir2_sf_get_offset(sfep) + dops->data_entsize(sfep->namelen); @@ -723,16 +721,16 @@ xfs_dir2_sf_verify( sfep = next_sfep; } if (i8count != sfp->i8count) - return -EFSCORRUPTED; + return __this_address; if ((void *)sfep != (void *)endp) - return -EFSCORRUPTED; + return __this_address; /* Make sure this whole thing ought to be in local format. */ if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize) - return -EFSCORRUPTED; + return __this_address; - return 0; + return NULL; } /* diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 747085b4ef44..8b7a6c3cb599 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -42,18 +42,14 @@ xfs_calc_dquots_per_chunk( /* * Do some primitive error checking on ondisk dquot data structures. */ -int -xfs_dqcheck( +xfs_failaddr_t +xfs_dquot_verify( struct xfs_mount *mp, xfs_disk_dquot_t *ddq, xfs_dqid_t id, uint type, /* used only when IO_dorepair is true */ - uint flags, - const char *str) + uint flags) { - xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; - int errs = 0; - /* * We can encounter an uninitialized dquot buffer for 2 reasons: * 1. If we crash while deleting the quotainode(s), and those blks got @@ -69,87 +65,57 @@ xfs_dqcheck( * This is all fine; things are still consistent, and we haven't lost * any quota information. Just don't complain about bad dquot blks. */ - if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", - str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); - errs++; - } - if (ddq->d_version != XFS_DQUOT_VERSION) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", - str, id, ddq->d_version, XFS_DQUOT_VERSION); - errs++; - } + if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) + return __this_address; + if (ddq->d_version != XFS_DQUOT_VERSION) + return __this_address; if (ddq->d_flags != XFS_DQ_USER && ddq->d_flags != XFS_DQ_PROJ && - ddq->d_flags != XFS_DQ_GROUP) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, unknown flags 0x%x", - str, id, ddq->d_flags); - errs++; - } + ddq->d_flags != XFS_DQ_GROUP) + return __this_address; - if (id != -1 && id != be32_to_cpu(ddq->d_id)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : ondisk-dquot 0x%p, ID mismatch: " - "0x%x expected, found id 0x%x", - str, ddq, id, be32_to_cpu(ddq->d_id)); - errs++; - } + if (id != -1 && id != be32_to_cpu(ddq->d_id)) + return __this_address; - if (!errs && ddq->d_id) { - if (ddq->d_blk_softlimit && - be64_to_cpu(ddq->d_bcount) > - be64_to_cpu(ddq->d_blk_softlimit)) { - if (!ddq->d_btimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_ino_softlimit && - be64_to_cpu(ddq->d_icount) > - be64_to_cpu(ddq->d_ino_softlimit)) { - if (!ddq->d_itimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_rtb_softlimit && - be64_to_cpu(ddq->d_rtbcount) > - be64_to_cpu(ddq->d_rtb_softlimit)) { - if (!ddq->d_rtbtimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - } + if (!ddq->d_id) + return NULL; + + if (ddq->d_blk_softlimit && + be64_to_cpu(ddq->d_bcount) > be64_to_cpu(ddq->d_blk_softlimit) && + !ddq->d_btimer) + return __this_address; + + if (ddq->d_ino_softlimit && + be64_to_cpu(ddq->d_icount) > be64_to_cpu(ddq->d_ino_softlimit) && + !ddq->d_itimer) + return __this_address; - if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) - return errs; + if (ddq->d_rtb_softlimit && + be64_to_cpu(ddq->d_rtbcount) > be64_to_cpu(ddq->d_rtb_softlimit) && + !ddq->d_rtbtimer) + return __this_address; + + return NULL; +} + +/* + * Do some primitive error checking on ondisk dquot data structures. + */ +int +xfs_dquot_repair( + struct xfs_mount *mp, + struct xfs_disk_dquot *ddq, + xfs_dqid_t id, + uint type) +{ + struct xfs_dqblk *d = (struct xfs_dqblk *)ddq; - if (flags & XFS_QMOPT_DOWARN) - xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); /* * Typically, a repair is only requested by quotacheck. */ ASSERT(id != -1); - ASSERT(flags & XFS_QMOPT_DQREPAIR); memset(d, 0, sizeof(xfs_dqblk_t)); d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); @@ -163,7 +129,7 @@ xfs_dqcheck( XFS_DQUOT_CRC_OFF); } - return errs; + return 0; } STATIC bool @@ -198,13 +164,13 @@ xfs_dquot_buf_verify_crc( return true; } -STATIC bool +STATIC xfs_failaddr_t xfs_dquot_buf_verify( struct xfs_mount *mp, - struct xfs_buf *bp, - int warn) + struct xfs_buf *bp) { struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; + xfs_failaddr_t fa; xfs_dqid_t id = 0; int ndquots; int i; @@ -228,33 +194,43 @@ xfs_dquot_buf_verify( */ for (i = 0; i < ndquots; i++) { struct xfs_disk_dquot *ddq; - int error; ddq = &d[i].dd_diskdq; if (i == 0) id = be32_to_cpu(ddq->d_id); - error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__); - if (error) - return false; + fa = xfs_dquot_verify(mp, ddq, id + i, 0, 0); + if (fa) + return fa; } - return true; + + return NULL; +} + +static xfs_failaddr_t +xfs_dquot_buf_verify_struct( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + return xfs_dquot_buf_verify(mp, bp); } static void xfs_dquot_buf_read_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (!xfs_dquot_buf_verify_crc(mp, bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dquot_buf_verify(mp, bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); + } } /* @@ -270,7 +246,7 @@ xfs_dquot_buf_readahead_verify( struct xfs_mount *mp = bp->b_target->bt_mount; if (!xfs_dquot_buf_verify_crc(mp, bp) || - !xfs_dquot_buf_verify(mp, bp, 0)) { + xfs_dquot_buf_verify(mp, bp) != NULL) { xfs_buf_ioerror(bp, -EIO); bp->b_flags &= ~XBF_DONE; } @@ -283,21 +259,21 @@ xfs_dquot_buf_readahead_verify( */ static void xfs_dquot_buf_write_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; - if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } + fa = xfs_dquot_buf_verify(mp, bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); } const struct xfs_buf_ops xfs_dquot_buf_ops = { .name = "xfs_dquot", .verify_read = xfs_dquot_buf_read_verify, .verify_write = xfs_dquot_buf_write_verify, + .verify_struct = xfs_dquot_buf_verify_struct, }; const struct xfs_buf_ops xfs_dquot_buf_ra_ops = { diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index b90924104596..faf1a4edd618 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -233,6 +233,13 @@ typedef struct xfs_fsop_resblks { #define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL) #define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL) +/* + * Limits on sb_agblocks/sb_agblklog -- mkfs won't format AGs smaller than + * 16MB or larger than 1TB. + */ +#define XFS_MIN_AG_BYTES (1ULL << 24) /* 16 MB */ +#define XFS_MAX_AG_BYTES (1ULL << 40) /* 1 TB */ + /* keep the maximum size under 2^31 by a small amount */ #define XFS_MAX_LOG_BYTES \ ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 3b57ef0f2f76..0e2cf5f0be1f 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2491,7 +2491,7 @@ xfs_check_agi_unlinked( #define xfs_check_agi_unlinked(agi) #endif -static bool +static xfs_failaddr_t xfs_agi_verify( struct xfs_buf *bp) { @@ -2500,28 +2500,28 @@ xfs_agi_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn))) - return false; + return __this_address; } /* * Validate the magic number of the agi block. */ if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC)) - return false; + return __this_address; if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) - return false; + return __this_address; if (be32_to_cpu(agi->agi_level) < 1 || be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) - return false; + return __this_address; if (xfs_sb_version_hasfinobt(&mp->m_sb) && (be32_to_cpu(agi->agi_free_level) < 1 || be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS)) - return false; + return __this_address; /* * during growfs operations, the perag is not fully initialised, @@ -2530,10 +2530,10 @@ xfs_agi_verify( * so we can detect and avoid this problem. */ if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) - return false; + return __this_address; xfs_check_agi_unlinked(agi); - return true; + return NULL; } static void @@ -2541,28 +2541,29 @@ xfs_agi_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, - XFS_ERRTAG_IALLOC_READ_AGI)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agi_verify(bp); + if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI)) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void xfs_agi_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_log_item; + xfs_failaddr_t fa; - if (!xfs_agi_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_agi_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -2578,6 +2579,7 @@ const struct xfs_buf_ops xfs_agi_buf_ops = { .name = "xfs_agi", .verify_read = xfs_agi_read_verify, .verify_write = xfs_agi_write_verify, + .verify_struct = xfs_agi_verify, }; /* @@ -2751,3 +2753,102 @@ xfs_verify_dir_ino( return false; return xfs_verify_ino(mp, ino); } + +/* Is there an inode record covering a given range of inode numbers? */ +int +xfs_ialloc_has_inode_record( + struct xfs_btree_cur *cur, + xfs_agino_t low, + xfs_agino_t high, + bool *exists) +{ + struct xfs_inobt_rec_incore irec; + xfs_agino_t agino; + uint16_t holemask; + int has_record; + int i; + int error; + + *exists = false; + error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record); + while (error == 0 && has_record) { + error = xfs_inobt_get_rec(cur, &irec, &has_record); + if (error || irec.ir_startino > high) + break; + + agino = irec.ir_startino; + holemask = irec.ir_holemask; + for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1, + i++, agino += XFS_INODES_PER_HOLEMASK_BIT) { + if (holemask & 1) + continue; + if (agino + XFS_INODES_PER_HOLEMASK_BIT > low && + agino <= high) { + *exists = true; + return 0; + } + } + + error = xfs_btree_increment(cur, 0, &has_record); + } + return error; +} + +/* Is there an inode record covering a given extent? */ +int +xfs_ialloc_has_inodes_at_extent( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + xfs_agino_t low; + xfs_agino_t high; + + low = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno, 0); + high = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno + len, 0) - 1; + + return xfs_ialloc_has_inode_record(cur, low, high, exists); +} + +struct xfs_ialloc_count_inodes { + xfs_agino_t count; + xfs_agino_t freecount; +}; + +/* Record inode counts across all inobt records. */ +STATIC int +xfs_ialloc_count_inodes_rec( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + void *priv) +{ + struct xfs_inobt_rec_incore irec; + struct xfs_ialloc_count_inodes *ci = priv; + + xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec); + ci->count += irec.ir_count; + ci->freecount += irec.ir_freecount; + + return 0; +} + +/* Count allocated and free inodes under an inobt. */ +int +xfs_ialloc_count_inodes( + struct xfs_btree_cur *cur, + xfs_agino_t *count, + xfs_agino_t *freecount) +{ + struct xfs_ialloc_count_inodes ci = {0}; + int error; + + ASSERT(cur->bc_btnum == XFS_BTNUM_INO); + error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci); + if (error) + return error; + + *count = ci.count; + *freecount = ci.freecount; + return 0; +} diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 66a8de0b1caa..c5402bb4ce0c 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -170,6 +170,12 @@ int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, union xfs_btree_rec; void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec, struct xfs_inobt_rec_incore *irec); +int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur, + xfs_agblock_t bno, xfs_extlen_t len, bool *exists); +int xfs_ialloc_has_inode_record(struct xfs_btree_cur *cur, xfs_agino_t low, + xfs_agino_t high, bool *exists); +int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count, + xfs_agino_t *freecount); int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 317caba9faa6..af197a5f3a82 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -141,21 +141,42 @@ xfs_finobt_alloc_block( union xfs_btree_ptr *new, int *stat) { + if (cur->bc_mp->m_inotbt_nores) + return xfs_inobt_alloc_block(cur, start, new, stat); return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_METADATA); } STATIC int -xfs_inobt_free_block( +__xfs_inobt_free_block( struct xfs_btree_cur *cur, - struct xfs_buf *bp) + struct xfs_buf *bp, + enum xfs_ag_resv_type resv) { struct xfs_owner_info oinfo; xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); return xfs_free_extent(cur->bc_tp, XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, - &oinfo, XFS_AG_RESV_NONE); + &oinfo, resv); +} + +STATIC int +xfs_inobt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_NONE); +} + +STATIC int +xfs_finobt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + if (cur->bc_mp->m_inotbt_nores) + return xfs_inobt_free_block(cur, bp); + return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_METADATA); } STATIC int @@ -250,12 +271,13 @@ xfs_inobt_diff_two_keys( be32_to_cpu(k2->inobt.ir_startino); } -static int +static xfs_failaddr_t xfs_inobt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_failaddr_t fa; unsigned int level; /* @@ -271,20 +293,21 @@ xfs_inobt_verify( switch (block->bb_magic) { case cpu_to_be32(XFS_IBT_CRC_MAGIC): case cpu_to_be32(XFS_FIBT_CRC_MAGIC): - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_IBT_MAGIC): case cpu_to_be32(XFS_FIBT_MAGIC): break; default: - return 0; + return NULL; } /* level verification */ level = be16_to_cpu(block->bb_level); if (level >= mp->m_in_maxlevels) - return false; + return __this_address; return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]); } @@ -293,25 +316,30 @@ static void xfs_inobt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_inobt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_inobt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void xfs_inobt_write_verify( struct xfs_buf *bp) { - if (!xfs_inobt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_inobt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); @@ -322,6 +350,7 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = { .name = "xfs_inobt", .verify_read = xfs_inobt_read_verify, .verify_write = xfs_inobt_write_verify, + .verify_struct = xfs_inobt_verify, }; STATIC int @@ -372,7 +401,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_finobt_set_root, .alloc_block = xfs_finobt_alloc_block, - .free_block = xfs_inobt_free_block, + .free_block = xfs_finobt_free_block, .get_minrecs = xfs_inobt_get_minrecs, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index b9c0bf80669c..4fe17b368316 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -115,8 +115,7 @@ xfs_inode_buf_verify( return; } - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); #ifdef DEBUG xfs_alert(mp, "bad inode magic/vsn daddr %lld #%d (magic=%x)", @@ -384,7 +383,7 @@ xfs_log_dinode_to_disk( } } -bool +xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, xfs_ino_t ino, @@ -393,53 +392,122 @@ xfs_dinode_verify( uint16_t mode; uint16_t flags; uint64_t flags2; + uint64_t di_size; if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) - return false; + return __this_address; + + /* Verify v3 integrity information first */ + if (dip->di_version >= 3) { + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return __this_address; + if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, + XFS_DINODE_CRC_OFF)) + return __this_address; + if (be64_to_cpu(dip->di_ino) != ino) + return __this_address; + if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) + return __this_address; + } /* don't allow invalid i_size */ - if (be64_to_cpu(dip->di_size) & (1ULL << 63)) - return false; + di_size = be64_to_cpu(dip->di_size); + if (di_size & (1ULL << 63)) + return __this_address; mode = be16_to_cpu(dip->di_mode); if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) - return false; + return __this_address; /* No zero-length symlinks/dirs. */ - if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0) - return false; + if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) + return __this_address; + + /* Fork checks carried over from xfs_iformat_fork */ + if (mode && + be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > + be64_to_cpu(dip->di_nblocks)) + return __this_address; + + if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) + return __this_address; + + flags = be16_to_cpu(dip->di_flags); + + if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) + return __this_address; + + /* Do we have appropriate data fork formats for the mode? */ + switch (mode & S_IFMT) { + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + case S_IFSOCK: + if (dip->di_format != XFS_DINODE_FMT_DEV) + return __this_address; + break; + case S_IFREG: + case S_IFLNK: + case S_IFDIR: + switch (dip->di_format) { + case XFS_DINODE_FMT_LOCAL: + /* + * no local regular files yet + */ + if (S_ISREG(mode)) + return __this_address; + if (di_size > XFS_DFORK_DSIZE(dip, mp)) + return __this_address; + /* fall through */ + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + break; + default: + return __this_address; + } + break; + case 0: + /* Uninitialized inode ok. */ + break; + default: + return __this_address; + } + + if (XFS_DFORK_Q(dip)) { + switch (dip->di_aformat) { + case XFS_DINODE_FMT_LOCAL: + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + break; + default: + return __this_address; + } + } /* only version 3 or greater inodes are extensively verified here */ if (dip->di_version < 3) - return true; - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, - XFS_DINODE_CRC_OFF)) - return false; - if (be64_to_cpu(dip->di_ino) != ino) - return false; - if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return NULL; - flags = be16_to_cpu(dip->di_flags); flags2 = be64_to_cpu(dip->di_flags2); /* don't allow reflink/cowextsize if we don't have reflink */ if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && !xfs_sb_version_hasreflink(&mp->m_sb)) - return false; + return __this_address; + + /* only regular files get reflink */ + if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) + return __this_address; /* don't let reflink and realtime mix */ if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) - return false; + return __this_address; /* don't let reflink and dax mix */ if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX)) - return false; + return __this_address; - return true; + return NULL; } void @@ -479,6 +547,7 @@ xfs_iread( { xfs_buf_t *bp; xfs_dinode_t *dip; + xfs_failaddr_t fa; int error; /* @@ -510,11 +579,10 @@ xfs_iread( return error; /* even unallocated inodes are verified */ - if (!xfs_dinode_verify(mp, ip->i_ino, dip)) { - xfs_alert(mp, "%s: validation failed for inode %lld", - __func__, ip->i_ino); - - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); + fa = xfs_dinode_verify(mp, ip->i_ino, dip); + if (fa) { + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip, + sizeof(*dip), fa); error = -EFSCORRUPTED; goto out_brelse; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index a9c97a356c30..8a5e1da52d74 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -82,7 +82,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #define xfs_inobp_check(mp, bp) #endif /* DEBUG */ -bool xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, - struct xfs_dinode *dip); +xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, + struct xfs_dinode *dip); #endif /* __XFS_INODE_BUF_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index c79a1616b79d..866d2861c625 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -35,6 +35,8 @@ #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_dir2_priv.h" +#include "xfs_attr_leaf.h" +#include "xfs_shared.h" kmem_zone_t *xfs_ifork_zone; @@ -62,69 +64,11 @@ xfs_iformat_fork( int error = 0; xfs_fsize_t di_size; - if (unlikely(be32_to_cpu(dip->di_nextents) + - be16_to_cpu(dip->di_anextents) > - be64_to_cpu(dip->di_nblocks))) { - xfs_warn(ip->i_mount, - "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", - (unsigned long long)ip->i_ino, - (int)(be32_to_cpu(dip->di_nextents) + - be16_to_cpu(dip->di_anextents)), - (unsigned long long) - be64_to_cpu(dip->di_nblocks)); - XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { - xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", - (unsigned long long)ip->i_ino, - dip->di_forkoff); - XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && - !ip->i_mount->m_rtdev_targp)) { - xfs_warn(ip->i_mount, - "corrupt dinode %Lu, has realtime flag set.", - ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", - XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely(xfs_is_reflink_inode(ip) && !S_ISREG(inode->i_mode))) { - xfs_warn(ip->i_mount, - "corrupt dinode %llu, wrong file type for reflink.", - ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", - XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely(xfs_is_reflink_inode(ip) && - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) { - xfs_warn(ip->i_mount, - "corrupt dinode %llu, has reflink+realtime flag set.", - ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", - XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return -EFSCORRUPTED; - } - switch (inode->i_mode & S_IFMT) { case S_IFIFO: case S_IFCHR: case S_IFBLK: case S_IFSOCK: - if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { - XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } ip->i_d.di_size = 0; inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip)); break; @@ -134,32 +78,7 @@ xfs_iformat_fork( case S_IFDIR: switch (dip->di_format) { case XFS_DINODE_FMT_LOCAL: - /* - * no local regular files yet - */ - if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (local format for regular file).", - (unsigned long long) ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(4)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - di_size = be64_to_cpu(dip->di_size); - if (unlikely(di_size < 0 || - di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (bad size %Ld for local inode).", - (unsigned long long) ip->i_ino, - (long long) di_size); - XFS_CORRUPTION_ERROR("xfs_iformat(5)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - size = (int)di_size; error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); break; @@ -170,28 +89,16 @@ xfs_iformat_fork( error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); break; default: - XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, - ip->i_mount); return -EFSCORRUPTED; } break; default: - XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); return -EFSCORRUPTED; } if (error) return error; - /* Check inline dir contents. */ - if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) { - error = xfs_dir2_sf_verify(ip); - if (error) { - xfs_idestroy_fork(ip, XFS_DATA_FORK); - return error; - } - } - if (xfs_is_reflink_inode(ip)) { ASSERT(ip->i_cowfp == NULL); xfs_ifork_init_cow(ip); @@ -208,18 +115,6 @@ xfs_iformat_fork( atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); size = be16_to_cpu(atp->hdr.totsize); - if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (bad attr fork size %Ld).", - (unsigned long long) ip->i_ino, - (long long) size); - XFS_CORRUPTION_ERROR("xfs_iformat(8)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - error = -EFSCORRUPTED; - break; - } - error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); break; case XFS_DINODE_FMT_EXTENTS: @@ -403,6 +298,7 @@ xfs_iformat_btree( */ if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= XFS_IFORK_MAXEXT(ip, whichfork) || + nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > XFS_DFORK_SIZE(dip, mp, whichfork) || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || @@ -827,3 +723,45 @@ xfs_ifork_init_cow( ip->i_cformat = XFS_DINODE_FMT_EXTENTS; ip->i_cnextents = 0; } + +/* Default fork content verifiers. */ +struct xfs_ifork_ops xfs_default_ifork_ops = { + .verify_attr = xfs_attr_shortform_verify, + .verify_dir = xfs_dir2_sf_verify, + .verify_symlink = xfs_symlink_shortform_verify, +}; + +/* Verify the inline contents of the data fork of an inode. */ +xfs_failaddr_t +xfs_ifork_verify_data( + struct xfs_inode *ip, + struct xfs_ifork_ops *ops) +{ + /* Non-local data fork, we're done. */ + if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) + return NULL; + + /* Check the inline data fork if there is one. */ + switch (VFS_I(ip)->i_mode & S_IFMT) { + case S_IFDIR: + return ops->verify_dir(ip); + case S_IFLNK: + return ops->verify_symlink(ip); + default: + return NULL; + } +} + +/* Verify the inline contents of the attr fork of an inode. */ +xfs_failaddr_t +xfs_ifork_verify_attr( + struct xfs_inode *ip, + struct xfs_ifork_ops *ops) +{ + /* There has to be an attr fork allocated if aformat is local. */ + if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) + return NULL; + if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK)) + return __this_address; + return ops->verify_attr(ip); +} diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index b9f0098e33b8..dd8aba0dd119 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -186,4 +186,18 @@ extern struct kmem_zone *xfs_ifork_zone; extern void xfs_ifork_init_cow(struct xfs_inode *ip); +typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *); + +struct xfs_ifork_ops { + xfs_ifork_verifier_t verify_symlink; + xfs_ifork_verifier_t verify_dir; + xfs_ifork_verifier_t verify_attr; +}; +extern struct xfs_ifork_ops xfs_default_ifork_ops; + +xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip, + struct xfs_ifork_ops *ops); +xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip, + struct xfs_ifork_ops *ops); + #endif /* __XFS_INODE_FORK_H__ */ diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index c10597973333..cc4cbe290939 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -55,7 +55,7 @@ xfs_log_calc_max_attrsetm_res( * the maximum one in terms of the pre-calculated values which were done * at mount time. */ -STATIC void +void xfs_log_get_max_trans_res( struct xfs_mount *mp, struct xfs_trans_res *max_resp) diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index d69c772271cb..bb1b13a9b5f4 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -112,8 +112,6 @@ typedef uint16_t xfs_qwarncnt_t; #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ -#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ -#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ #define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */ @@ -153,8 +151,11 @@ typedef uint16_t xfs_qwarncnt_t; (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) -extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, - xfs_dqid_t id, uint type, uint flags, const char *str); +extern xfs_failaddr_t xfs_dquot_verify(struct xfs_mount *mp, + struct xfs_disk_dquot *ddq, xfs_dqid_t id, uint type, + uint flags); extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); +extern int xfs_dquot_repair(struct xfs_mount *mp, struct xfs_disk_dquot *ddq, + xfs_dqid_t id, uint type); #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index c40d26763075..bee68c23d612 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1696,3 +1696,22 @@ out_cursor: xfs_trans_brelse(tp, agbp); goto out_trans; } + +/* Is there a record covering a given extent? */ +int +xfs_refcount_has_record( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + union xfs_btree_irec low; + union xfs_btree_irec high; + + memset(&low, 0, sizeof(low)); + low.rc.rc_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.rc.rc_startblock = bno + len - 1; + + return xfs_btree_has_record(cur, &low, &high, exists); +} diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index eafb9d1f3b37..2a731ac68fe4 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -83,4 +83,7 @@ static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res) return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD; } +extern int xfs_refcount_has_record(struct xfs_btree_cur *cur, + xfs_agblock_t bno, xfs_extlen_t len, bool *exists); + #endif /* __XFS_REFCOUNT_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 3c59dd3d58d7..8479769e470d 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -223,29 +223,31 @@ xfs_refcountbt_diff_two_keys( be32_to_cpu(k2->refc.rc_startblock); } -STATIC bool +STATIC xfs_failaddr_t xfs_refcountbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; + xfs_failaddr_t fa; unsigned int level; if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC)) - return false; + return __this_address; if (!xfs_sb_version_hasreflink(&mp->m_sb)) - return false; - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + return __this_address; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; level = be16_to_cpu(block->bb_level); if (pag && pag->pagf_init) { if (level >= pag->pagf_refcount_level) - return false; + return __this_address; } else if (level >= mp->m_refc_maxlevels) - return false; + return __this_address; return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]); } @@ -254,25 +256,30 @@ STATIC void xfs_refcountbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_refcountbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_refcountbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } STATIC void xfs_refcountbt_write_verify( struct xfs_buf *bp) { - if (!xfs_refcountbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_refcountbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); @@ -283,6 +290,7 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = { .name = "xfs_refcountbt", .verify_read = xfs_refcountbt_read_verify, .verify_write = xfs_refcountbt_write_verify, + .verify_struct = xfs_refcountbt_verify, }; STATIC int diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 50db920ceeeb..79822cf6ebe3 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -2387,3 +2387,70 @@ xfs_rmap_compare( else return 0; } + +/* Is there a record covering a given extent? */ +int +xfs_rmap_has_record( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + union xfs_btree_irec low; + union xfs_btree_irec high; + + memset(&low, 0, sizeof(low)); + low.r.rm_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.r.rm_startblock = bno + len - 1; + + return xfs_btree_has_record(cur, &low, &high, exists); +} + +/* + * Is there a record for this owner completely covering a given physical + * extent? If so, *has_rmap will be set to true. If there is no record + * or the record only covers part of the range, we set *has_rmap to false. + * This function doesn't perform range lookups or offset checks, so it is + * not suitable for checking data fork blocks. + */ +int +xfs_rmap_record_exists( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo, + bool *has_rmap) +{ + uint64_t owner; + uint64_t offset; + unsigned int flags; + int has_record; + struct xfs_rmap_irec irec; + int error; + + xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); + ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) || + (flags & XFS_RMAP_BMBT_BLOCK)); + + error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, + &has_record); + if (error) + return error; + if (!has_record) { + *has_rmap = false; + return 0; + } + + error = xfs_rmap_get_rec(cur, &irec, &has_record); + if (error) + return error; + if (!has_record) { + *has_rmap = false; + return 0; + } + + *has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno && + irec.rm_startblock + irec.rm_blockcount >= bno + len); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 0fcd5b1ba729..380e53be98d5 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -233,5 +233,10 @@ int xfs_rmap_compare(const struct xfs_rmap_irec *a, union xfs_btree_rec; int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec, struct xfs_rmap_irec *irec); +int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, bool *exists); +int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, struct xfs_owner_info *oinfo, + bool *has_rmap); #endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 9d9c9192584c..e829c3e489ea 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -303,13 +303,14 @@ xfs_rmapbt_diff_two_keys( return 0; } -static bool +static xfs_failaddr_t xfs_rmapbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; + xfs_failaddr_t fa; unsigned int level; /* @@ -325,19 +326,20 @@ xfs_rmapbt_verify( * in this case. */ if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC)) - return false; + return __this_address; if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) - return false; - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + return __this_address; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; level = be16_to_cpu(block->bb_level); if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) - return false; + return __this_address; } else if (level >= mp->m_rmap_maxlevels) - return false; + return __this_address; return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]); } @@ -346,25 +348,30 @@ static void xfs_rmapbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_rmapbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_rmapbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void xfs_rmapbt_write_verify( struct xfs_buf *bp) { - if (!xfs_rmapbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_rmapbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); @@ -375,6 +382,7 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = { .name = "xfs_rmapbt", .verify_read = xfs_rmapbt_read_verify, .verify_write = xfs_rmapbt_write_verify, + .verify_struct = xfs_rmapbt_verify, }; STATIC int diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 3fb29a5ea915..106be2d0bb88 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1097,3 +1097,24 @@ xfs_verify_rtbno( { return rtbno < mp->m_sb.sb_rblocks; } + +/* Is the given extent all free? */ +int +xfs_rtalloc_extent_is_free( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_rtblock_t start, + xfs_extlen_t len, + bool *is_free) +{ + xfs_rtblock_t end; + int matches; + int error; + + error = xfs_rtcheck_range(mp, tp, start, len, 1, &end, &matches); + if (error) + return error; + + *is_free = matches; + return 0; +} diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 9b5aae2bcc0b..46af6aa60a8e 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -40,6 +40,8 @@ #include "xfs_rmap_btree.h" #include "xfs_bmap.h" #include "xfs_refcount_btree.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -116,6 +118,9 @@ xfs_mount_validate_sb( bool check_inprogress, bool check_version) { + u32 agcount = 0; + u32 rem; + if (sbp->sb_magicnum != XFS_SB_MAGIC) { xfs_warn(mp, "bad magic number"); return -EWRONGFS; @@ -226,6 +231,13 @@ xfs_mount_validate_sb( return -EINVAL; } + /* Compute agcount for this number of dblocks and agblocks */ + if (sbp->sb_agblocks) { + agcount = div_u64_rem(sbp->sb_dblocks, sbp->sb_agblocks, &rem); + if (rem) + agcount++; + } + /* * More sanity checking. Most of these were stolen directly from * xfs_repair. @@ -250,6 +262,10 @@ xfs_mount_validate_sb( sbp->sb_inodesize != (1 << sbp->sb_inodelog) || sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE || sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || + XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES || + XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES || + sbp->sb_agblklog != xfs_highbit32(sbp->sb_agblocks - 1) + 1 || + agcount == 0 || agcount != sbp->sb_agcount || (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || @@ -640,11 +656,10 @@ xfs_sb_read_verify( error = xfs_sb_verify(bp, true); out_error: - if (error) { + if (error == -EFSCORRUPTED || error == -EFSBADCRC) + xfs_verifier_error(bp, error, __this_address); + else if (error) xfs_buf_ioerror(bp, error); - if (error == -EFSCORRUPTED || error == -EFSBADCRC) - xfs_verifier_error(bp); - } } /* @@ -673,13 +688,12 @@ xfs_sb_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; int error; error = xfs_sb_verify(bp, false); if (error) { - xfs_buf_ioerror(bp, error); - xfs_verifier_error(bp); + xfs_verifier_error(bp, error, __this_address); return; } @@ -876,3 +890,88 @@ xfs_sync_sb( xfs_trans_set_sync(tp); return xfs_trans_commit(tp); } + +int +xfs_fs_geometry( + struct xfs_sb *sbp, + struct xfs_fsop_geom *geo, + int struct_version) +{ + memset(geo, 0, sizeof(struct xfs_fsop_geom)); + + geo->blocksize = sbp->sb_blocksize; + geo->rtextsize = sbp->sb_rextsize; + geo->agblocks = sbp->sb_agblocks; + geo->agcount = sbp->sb_agcount; + geo->logblocks = sbp->sb_logblocks; + geo->sectsize = sbp->sb_sectsize; + geo->inodesize = sbp->sb_inodesize; + geo->imaxpct = sbp->sb_imax_pct; + geo->datablocks = sbp->sb_dblocks; + geo->rtblocks = sbp->sb_rblocks; + geo->rtextents = sbp->sb_rextents; + geo->logstart = sbp->sb_logstart; + BUILD_BUG_ON(sizeof(geo->uuid) != sizeof(sbp->sb_uuid)); + memcpy(geo->uuid, &sbp->sb_uuid, sizeof(sbp->sb_uuid)); + + if (struct_version < 2) + return 0; + + geo->sunit = sbp->sb_unit; + geo->swidth = sbp->sb_width; + + if (struct_version < 3) + return 0; + + geo->version = XFS_FSOP_GEOM_VERSION; + geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | + XFS_FSOP_GEOM_FLAGS_DIRV2; + if (xfs_sb_version_hasattr(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR; + if (xfs_sb_version_hasquota(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA; + if (xfs_sb_version_hasalign(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN; + if (xfs_sb_version_hasdalign(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN; + if (xfs_sb_version_hasextflgbit(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_EXTFLG; + if (xfs_sb_version_hassector(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; + if (xfs_sb_version_hasasciici(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI; + if (xfs_sb_version_haslazysbcount(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB; + if (xfs_sb_version_hasattr2(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2; + if (xfs_sb_version_hasprojid32bit(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32; + if (xfs_sb_version_hascrc(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB; + if (xfs_sb_version_hasftype(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE; + if (xfs_sb_version_hasfinobt(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT; + if (xfs_sb_version_hassparseinodes(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES; + if (xfs_sb_version_hasrmapbt(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT; + if (xfs_sb_version_hasreflink(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK; + if (xfs_sb_version_hassector(sbp)) + geo->logsectsize = sbp->sb_logsectsize; + else + geo->logsectsize = BBSIZE; + geo->rtsectsize = sbp->sb_blocksize; + geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); + + if (struct_version < 4) + return 0; + + if (xfs_sb_version_haslogv2(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2; + + geo->logsunit = sbp->sb_logsunit; + + return 0; +} diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 961e6475a309..63dcd2a1a657 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -34,4 +34,8 @@ extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); +#define XFS_FS_GEOM_MAX_STRUCT_VER (4) +extern int xfs_fs_geometry(struct xfs_sb *sbp, struct xfs_fsop_geom *geo, + int struct_version); + #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index c6f4eb46fe26..d0b84da0cb1e 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -76,6 +76,9 @@ struct xfs_log_item_desc { int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); int xfs_log_calc_minimum_size(struct xfs_mount *); +struct xfs_trans_res; +void xfs_log_get_max_trans_res(struct xfs_mount *mp, + struct xfs_trans_res *max_resp); /* * Values for t_flags. @@ -143,5 +146,6 @@ bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset, uint32_t size, struct xfs_buf *bp); void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_inode *ip, struct xfs_ifork *ifp); +xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip); #endif /* __XFS_SHARED_H__ */ diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index c484877129a0..5ef5f354587e 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -98,7 +98,7 @@ xfs_symlink_hdr_ok( return true; } -static bool +static xfs_failaddr_t xfs_symlink_verify( struct xfs_buf *bp) { @@ -106,22 +106,22 @@ xfs_symlink_verify( struct xfs_dsymlink_hdr *dsl = bp->b_addr; if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (bp->b_bn != be64_to_cpu(dsl->sl_blkno)) - return false; + return __this_address; if (be32_to_cpu(dsl->sl_offset) + be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN) - return false; + return __this_address; if (dsl->sl_owner == 0) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn))) - return false; + return __this_address; - return true; + return NULL; } static void @@ -129,18 +129,19 @@ xfs_symlink_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_symlink_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_symlink_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -148,15 +149,16 @@ xfs_symlink_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; + xfs_failaddr_t fa; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - if (!xfs_symlink_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_symlink_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -171,6 +173,7 @@ const struct xfs_buf_ops xfs_symlink_buf_ops = { .name = "xfs_symlink", .verify_read = xfs_symlink_read_verify, .verify_write = xfs_symlink_write_verify, + .verify_struct = xfs_symlink_verify, }; void @@ -207,3 +210,37 @@ xfs_symlink_local_to_remote( xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) + ifp->if_bytes - 1); } + +/* Verify the consistency of an inline symlink. */ +xfs_failaddr_t +xfs_symlink_shortform_verify( + struct xfs_inode *ip) +{ + char *sfp; + char *endp; + struct xfs_ifork *ifp; + int size; + + ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + sfp = (char *)ifp->if_u1.if_data; + size = ifp->if_bytes; + endp = sfp + size; + + /* Zero length symlinks can exist while we're deleting a remote one. */ + if (size == 0) + return NULL; + + /* No negative sizes or overly long symlink targets. */ + if (size < 0 || size > XFS_SYMLINK_MAXLEN) + return __this_address; + + /* No NULLs in the target either. */ + if (memchr(sfp, 0, size - 1)) + return __this_address; + + /* We /did/ null-terminate the buffer, right? */ + if (*endp != 0) + return __this_address; + return NULL; +} diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 6bd916bd35e2..5f17641f040f 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -34,6 +34,9 @@ #include "xfs_trans_space.h" #include "xfs_trace.h" +#define _ALLOC true +#define _FREE false + /* * A buffer has a format structure overhead in the log in addition * to the data, so we need to take this into account when reserving @@ -132,43 +135,77 @@ xfs_calc_inode_res( } /* - * The free inode btree is a conditional feature and the log reservation - * requirements differ slightly from that of the traditional inode allocation - * btree. The finobt tracks records for inode chunks with at least one free - * inode. A record can be removed from the tree for an inode allocation - * or free and thus the finobt reservation is unconditional across: + * Inode btree record insertion/removal modifies the inode btree and free space + * btrees (since the inobt does not use the agfl). This requires the following + * reservation: * - * - inode allocation - * - inode free - * - inode chunk allocation + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size * - * The 'modify' param indicates to include the record modification scenario. The - * 'alloc' param indicates to include the reservation for free space btree - * modifications on behalf of finobt modifications. This is required only for - * transactions that do not already account for free space btree modifications. + * The caller must account for SB and AG header modifications, etc. + */ +STATIC uint +xfs_calc_inobt_res( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + XFS_FSB_TO_B(mp, 1)); +} + +/* + * The free inode btree is a conditional feature. The behavior differs slightly + * from that of the traditional inode btree in that the finobt tracks records + * for inode chunks with at least one free inode. A record can be removed from + * the tree during individual inode allocation. Therefore the finobt + * reservation is unconditional for both the inode chunk allocation and + * individual inode allocation (modify) cases. * - * the free inode btree: max depth * block size - * the allocation btrees: 2 trees * (max depth - 1) * block size - * the free inode btree entry: block size + * Behavior aside, the reservation for finobt modification is equivalent to the + * traditional inobt: cover a full finobt shape change plus block allocation. */ STATIC uint xfs_calc_finobt_res( - struct xfs_mount *mp, - int alloc, - int modify) + struct xfs_mount *mp) { - uint res; - if (!xfs_sb_version_hasfinobt(&mp->m_sb)) return 0; - res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); - if (alloc) - res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)); - if (modify) - res += (uint)XFS_FSB_TO_B(mp, 1); + return xfs_calc_inobt_res(mp); +} +/* + * Calculate the reservation required to allocate or free an inode chunk. This + * includes: + * + * the allocation btrees: 2 trees * (max depth - 1) * block size + * the inode chunk: m_ialloc_blks * N + * + * The size N of the inode chunk reservation depends on whether it is for + * allocation or free and which type of create transaction is in use. An inode + * chunk free always invalidates the buffers and only requires reservation for + * headers (N == 0). An inode chunk allocation requires a chunk sized + * reservation on v4 and older superblocks to initialize the chunk. No chunk + * reservation is required for allocation on v5 supers, which use ordered + * buffers to initialize. + */ +STATIC uint +xfs_calc_inode_chunk_res( + struct xfs_mount *mp, + bool alloc) +{ + uint res, size = 0; + + res = xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + XFS_FSB_TO_B(mp, 1)); + if (alloc) { + /* icreate tx uses ordered buffers */ + if (xfs_sb_version_hascrc(&mp->m_sb)) + return res; + size = XFS_FSB_TO_B(mp, 1); + } + + res += xfs_calc_buf_res(mp->m_ialloc_blks, size); return res; } @@ -232,8 +269,6 @@ xfs_calc_write_reservation( * the super block to reflect the freed blocks: sector size * worst case split in allocation btrees per extent assuming 4 extents: * 4 exts * 2 trees * (2 * max depth - 1) * block size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size */ STATIC uint xfs_calc_itruncate_reservation( @@ -245,12 +280,7 @@ xfs_calc_itruncate_reservation( XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(5, 0) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(2 + mp->m_ialloc_blks + - mp->m_in_maxlevels, 0))); + XFS_FSB_TO_B(mp, 1)))); } /* @@ -282,13 +312,14 @@ xfs_calc_rename_reservation( * For removing an inode from unlinked list at first, we can modify: * the agi hash list and counters: sector size * the on disk inode before ours in the agi hash list: inode cluster size + * the on disk inode in the agi hash list: inode cluster size */ STATIC uint xfs_calc_iunlink_remove_reservation( struct xfs_mount *mp) { return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); + 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); } /* @@ -320,13 +351,13 @@ xfs_calc_link_reservation( /* * For adding an inode to unlinked list we can modify: * the agi hash list: sector size - * the unlinked inode: inode size + * the on disk inode: inode cluster size */ STATIC uint xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) { return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - xfs_calc_inode_res(mp, 1); + max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); } /* @@ -379,45 +410,16 @@ xfs_calc_create_resv_modify( xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + (uint)XFS_FSB_TO_B(mp, 1) + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 1, 1); -} - -/* - * For create we can allocate some inodes giving: - * the agi and agf of the ag getting the new inodes: 2 * sectorsize - * the superblock for the nlink flag: sector size - * the inode blocks allocated: mp->m_ialloc_blks * blocksize - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - */ -STATIC uint -xfs_calc_create_resv_alloc( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + - mp->m_sb.sb_sectsize + - xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)); -} - -STATIC uint -__xfs_calc_create_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - MAX(xfs_calc_create_resv_alloc(mp), - xfs_calc_create_resv_modify(mp)); + xfs_calc_finobt_res(mp); } /* * For icreate we can allocate some inodes giving: * the agi and agf of the ag getting the new inodes: 2 * sectorsize * the superblock for the nlink flag: sector size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - * the finobt (record insertion) + * the inode chunk (allocation, optional init) + * the inobt (record insertion) + * the finobt (optional, record insertion) */ STATIC uint xfs_calc_icreate_resv_alloc( @@ -425,10 +427,9 @@ xfs_calc_icreate_resv_alloc( { return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + mp->m_sb.sb_sectsize + - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 0, 0); + xfs_calc_inode_chunk_res(mp, _ALLOC) + + xfs_calc_inobt_res(mp) + + xfs_calc_finobt_res(mp); } STATIC uint @@ -440,26 +441,12 @@ xfs_calc_icreate_reservation(xfs_mount_t *mp) } STATIC uint -xfs_calc_create_reservation( - struct xfs_mount *mp) -{ - if (xfs_sb_version_hascrc(&mp->m_sb)) - return xfs_calc_icreate_reservation(mp); - return __xfs_calc_create_reservation(mp); - -} - -STATIC uint xfs_calc_create_tmpfile_reservation( struct xfs_mount *mp) { uint res = XFS_DQUOT_LOGRES(mp); - if (xfs_sb_version_hascrc(&mp->m_sb)) - res += xfs_calc_icreate_resv_alloc(mp); - else - res += xfs_calc_create_resv_alloc(mp); - + res += xfs_calc_icreate_resv_alloc(mp); return res + xfs_calc_iunlink_add_reservation(mp); } @@ -470,7 +457,7 @@ STATIC uint xfs_calc_mkdir_reservation( struct xfs_mount *mp) { - return xfs_calc_create_reservation(mp); + return xfs_calc_icreate_reservation(mp); } @@ -483,20 +470,24 @@ STATIC uint xfs_calc_symlink_reservation( struct xfs_mount *mp) { - return xfs_calc_create_reservation(mp) + + return xfs_calc_icreate_reservation(mp) + xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN); } /* * In freeing an inode we can modify: * the inode being freed: inode size - * the super block free inode counter: sector size - * the agi hash list and counters: sector size - * the inode btree entry: block size - * the on disk inode before ours in the agi hash list: inode cluster size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size + * the super block free inode counter, AGF and AGFL: sector size + * the on disk inode (agi unlinked list removal) + * the inode chunk (invalidated, headers only) + * the inode btree * the finobt (record insertion, removal or modification) + * + * Note that the inode chunk res. includes an allocfree res. for freeing of the + * inode chunk. This is technically extraneous because the inode chunk free is + * deferred (it occurs after a transaction roll). Include the extra reservation + * anyways since we've had reports of ifree transaction overruns due to too many + * agfl fixups during inode chunk frees. */ STATIC uint xfs_calc_ifree_reservation( @@ -504,15 +495,11 @@ xfs_calc_ifree_reservation( { return XFS_DQUOT_LOGRES(mp) + xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_iunlink_remove_reservation(mp) + - xfs_calc_buf_res(1, 0) + - xfs_calc_buf_res(2 + mp->m_ialloc_blks + - mp->m_in_maxlevels, 0) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 0, 1); + xfs_calc_inode_chunk_res(mp, _FREE) + + xfs_calc_inobt_res(mp) + + xfs_calc_finobt_res(mp); } /* @@ -842,7 +829,7 @@ xfs_trans_resv_calc( resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT; resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - resp->tr_create.tr_logres = xfs_calc_create_reservation(mp); + resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp); resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 2a9b4f9e93c6..fd975524f460 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -32,30 +32,17 @@ #include "xfs_inode.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" +#include "xfs_rmap.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" /* - * Set up scrub to check all the static metadata in each AG. - * This means the SB, AGF, AGI, and AGFL headers. + * Walk all the blocks in the AGFL. The fn function can return any negative + * error code or XFS_BTREE_QUERY_RANGE_ABORT. */ int -xfs_scrub_setup_ag_header( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) -{ - struct xfs_mount *mp = sc->mp; - - if (sc->sm->sm_agno >= mp->m_sb.sb_agcount || - sc->sm->sm_ino || sc->sm->sm_gen) - return -EINVAL; - return xfs_scrub_setup_fs(sc, ip); -} - -/* Walk all the blocks in the AGFL. */ -int xfs_scrub_walk_agfl( struct xfs_scrub_context *sc, int (*fn)(struct xfs_scrub_context *, @@ -115,6 +102,36 @@ xfs_scrub_walk_agfl( /* Superblock */ +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_superblock_xref( + struct xfs_scrub_context *sc, + struct xfs_buf *bp) +{ + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + xfs_agnumber_t agno = sc->sm->sm_agno; + xfs_agblock_t agbno; + int error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + agbno = XFS_SB_BLOCK(mp); + + error = xfs_scrub_ag_init(sc, agno, &sc->sa); + if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); + + /* scrub teardown will take care of sc->sa for us */ +} + /* * Scrub the filesystem superblock. * @@ -143,6 +160,22 @@ xfs_scrub_superblock( error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops); + /* + * The superblock verifier can return several different error codes + * if it thinks the superblock doesn't look right. For a mount these + * would all get bounced back to userspace, but if we're here then the + * fs mounted successfully, which means that this secondary superblock + * is simply incorrect. Treat all these codes the same way we treat + * any corruption. + */ + switch (error) { + case -EINVAL: /* also -EWRONGFS */ + case -ENOSYS: + case -EFBIG: + error = -EFSCORRUPTED; + default: + break; + } if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error)) return error; @@ -387,11 +420,175 @@ xfs_scrub_superblock( BBTOB(bp->b_length) - sizeof(struct xfs_dsb))) xfs_scrub_block_set_corrupt(sc, bp); + xfs_scrub_superblock_xref(sc, bp); + return error; } /* AGF */ +/* Tally freespace record lengths. */ +STATIC int +xfs_scrub_agf_record_bno_lengths( + struct xfs_btree_cur *cur, + struct xfs_alloc_rec_incore *rec, + void *priv) +{ + xfs_extlen_t *blocks = priv; + + (*blocks) += rec->ar_blockcount; + return 0; +} + +/* Check agf_freeblks */ +static inline void +xfs_scrub_agf_xref_freeblks( + struct xfs_scrub_context *sc) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + xfs_extlen_t blocks = 0; + int error; + + if (!sc->sa.bno_cur) + return; + + error = xfs_alloc_query_all(sc->sa.bno_cur, + xfs_scrub_agf_record_bno_lengths, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) + return; + if (blocks != be32_to_cpu(agf->agf_freeblks)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); +} + +/* Cross reference the AGF with the cntbt (freespace by length btree) */ +static inline void +xfs_scrub_agf_xref_cntbt( + struct xfs_scrub_context *sc) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + xfs_agblock_t agbno; + xfs_extlen_t blocks; + int have; + int error; + + if (!sc->sa.cnt_cur) + return; + + /* Any freespace at all? */ + error = xfs_alloc_lookup_le(sc->sa.cnt_cur, 0, -1U, &have); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) + return; + if (!have) { + if (agf->agf_freeblks != be32_to_cpu(0)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); + return; + } + + /* Check agf_longest */ + error = xfs_alloc_get_rec(sc->sa.cnt_cur, &agbno, &blocks, &have); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) + return; + if (!have || blocks != be32_to_cpu(agf->agf_longest)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); +} + +/* Check the btree block counts in the AGF against the btrees. */ +STATIC void +xfs_scrub_agf_xref_btreeblks( + struct xfs_scrub_context *sc) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + struct xfs_mount *mp = sc->mp; + xfs_agblock_t blocks; + xfs_agblock_t btreeblks; + int error; + + /* Check agf_rmap_blocks; set up for agf_btreeblks check */ + if (sc->sa.rmap_cur) { + error = xfs_btree_count_blocks(sc->sa.rmap_cur, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + btreeblks = blocks - 1; + if (blocks != be32_to_cpu(agf->agf_rmap_blocks)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); + } else { + btreeblks = 0; + } + + /* + * No rmap cursor; we can't xref if we have the rmapbt feature. + * We also can't do it if we're missing the free space btree cursors. + */ + if ((xfs_sb_version_hasrmapbt(&mp->m_sb) && !sc->sa.rmap_cur) || + !sc->sa.bno_cur || !sc->sa.cnt_cur) + return; + + /* Check agf_btreeblks */ + error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) + return; + btreeblks += blocks - 1; + + error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) + return; + btreeblks += blocks - 1; + + if (btreeblks != be32_to_cpu(agf->agf_btreeblks)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); +} + +/* Check agf_refcount_blocks against tree size */ +static inline void +xfs_scrub_agf_xref_refcblks( + struct xfs_scrub_context *sc) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + xfs_agblock_t blocks; + int error; + + if (!sc->sa.refc_cur) + return; + + error = xfs_btree_count_blocks(sc->sa.refc_cur, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (blocks != be32_to_cpu(agf->agf_refcount_blocks)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_agf_xref( + struct xfs_scrub_context *sc) +{ + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + xfs_agblock_t agbno; + int error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + agbno = XFS_AGF_BLOCK(mp); + + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); + if (error) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_agf_xref_freeblks(sc); + xfs_scrub_agf_xref_cntbt(sc); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_agf_xref_btreeblks(sc); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); + xfs_scrub_agf_xref_refcblks(sc); + + /* scrub teardown will take care of sc->sa for us */ +} + /* Scrub the AGF. */ int xfs_scrub_agf( @@ -414,6 +611,7 @@ xfs_scrub_agf( &sc->sa.agf_bp, &sc->sa.agfl_bp); if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error)) goto out; + xfs_scrub_buffer_recheck(sc, sc->sa.agf_bp); agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); @@ -470,6 +668,7 @@ xfs_scrub_agf( if (agfl_count != 0 && fl_count != agfl_count) xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xfs_scrub_agf_xref(sc); out: return error; } @@ -477,11 +676,28 @@ out: /* AGFL */ struct xfs_scrub_agfl_info { + struct xfs_owner_info oinfo; unsigned int sz_entries; unsigned int nr_entries; xfs_agblock_t *entries; }; +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_agfl_block_xref( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + struct xfs_owner_info *oinfo) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); +} + /* Scrub an AGFL block. */ STATIC int xfs_scrub_agfl_block( @@ -499,6 +715,8 @@ xfs_scrub_agfl_block( else xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp); + xfs_scrub_agfl_block_xref(sc, agbno, priv); + return 0; } @@ -513,6 +731,37 @@ xfs_scrub_agblock_cmp( return (int)*a - (int)*b; } +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_agfl_xref( + struct xfs_scrub_context *sc) +{ + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + xfs_agblock_t agbno; + int error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + agbno = XFS_AGFL_BLOCK(mp); + + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); + if (error) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); + + /* + * Scrub teardown will take care of sc->sa for us. Leave sc->sa + * active so that the agfl block xref can use it too. + */ +} + /* Scrub the AGFL. */ int xfs_scrub_agfl( @@ -532,6 +781,12 @@ xfs_scrub_agfl( goto out; if (!sc->sa.agf_bp) return -EFSCORRUPTED; + xfs_scrub_buffer_recheck(sc, sc->sa.agfl_bp); + + xfs_scrub_agfl_xref(sc); + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; /* Allocate buffer to ensure uniqueness of AGFL entries. */ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); @@ -548,6 +803,7 @@ xfs_scrub_agfl( } /* Check the blocks in the AGFL. */ + xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG); error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai); if (error) goto out_free; @@ -575,6 +831,56 @@ out: /* AGI */ +/* Check agi_count/agi_freecount */ +static inline void +xfs_scrub_agi_xref_icounts( + struct xfs_scrub_context *sc) +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); + xfs_agino_t icount; + xfs_agino_t freecount; + int error; + + if (!sc->sa.ino_cur) + return; + + error = xfs_ialloc_count_inodes(sc->sa.ino_cur, &icount, &freecount); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.ino_cur)) + return; + if (be32_to_cpu(agi->agi_count) != icount || + be32_to_cpu(agi->agi_freecount) != freecount) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agi_bp); +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_agi_xref( + struct xfs_scrub_context *sc) +{ + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + xfs_agblock_t agbno; + int error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + agbno = XFS_AGI_BLOCK(mp); + + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); + if (error) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_scrub_agi_xref_icounts(sc); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); + + /* scrub teardown will take care of sc->sa for us */ +} + /* Scrub the AGI. */ int xfs_scrub_agi( @@ -598,6 +904,7 @@ xfs_scrub_agi( &sc->sa.agf_bp, &sc->sa.agfl_bp); if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error)) goto out; + xfs_scrub_buffer_recheck(sc, sc->sa.agi_bp); agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); @@ -653,6 +960,7 @@ xfs_scrub_agi( if (agi->agi_pad32 != cpu_to_be32(0)) xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + xfs_scrub_agi_xref(sc); out: return error; } diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 059663e13414..517c079d3f68 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -31,6 +31,7 @@ #include "xfs_sb.h" #include "xfs_alloc.h" #include "xfs_rmap.h" +#include "xfs_alloc.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -49,6 +50,64 @@ xfs_scrub_setup_ag_allocbt( } /* Free space btree scrubber. */ +/* + * Ensure there's a corresponding cntbt/bnobt record matching this + * bnobt/cntbt record, respectively. + */ +STATIC void +xfs_scrub_allocbt_xref_other( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + struct xfs_btree_cur **pcur; + xfs_agblock_t fbno; + xfs_extlen_t flen; + int has_otherrec; + int error; + + if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT) + pcur = &sc->sa.cnt_cur; + else + pcur = &sc->sa.bno_cur; + if (!*pcur) + return; + + error = xfs_alloc_lookup_le(*pcur, agbno, len, &has_otherrec); + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) + return; + if (!has_otherrec) { + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); + return; + } + + error = xfs_alloc_get_rec(*pcur, &fbno, &flen, &has_otherrec); + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) + return; + if (!has_otherrec) { + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); + return; + } + + if (fbno != agbno || flen != len) + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_allocbt_xref( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + xfs_scrub_allocbt_xref_other(sc, agbno, len); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); + xfs_scrub_xref_has_no_owner(sc, agbno, len); + xfs_scrub_xref_is_not_shared(sc, agbno, len); +} /* Scrub a bnobt/cntbt record. */ STATIC int @@ -70,6 +129,8 @@ xfs_scrub_allocbt_rec( !xfs_verify_agbno(mp, agno, bno + len - 1)) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xfs_scrub_allocbt_xref(bs->sc, bno, len); + return error; } @@ -100,3 +161,23 @@ xfs_scrub_cntbt( { return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT); } + +/* xref check that the extent is not free */ +void +xfs_scrub_xref_is_used_space( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + bool is_freesp; + int error; + + if (!sc->sa.bno_cur) + return; + + error = xfs_alloc_has_record(sc->sa.bno_cur, agbno, len, &is_freesp); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) + return; + if (is_freesp) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0); +} diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 42fec0bcd9e1..d00282130492 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -37,6 +37,7 @@ #include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" #include "xfs_rmap.h" +#include "xfs_refcount.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -99,6 +100,201 @@ struct xfs_scrub_bmap_info { int whichfork; }; +/* Look for a corresponding rmap for this irec. */ +static inline bool +xfs_scrub_bmap_get_rmap( + struct xfs_scrub_bmap_info *info, + struct xfs_bmbt_irec *irec, + xfs_agblock_t agbno, + uint64_t owner, + struct xfs_rmap_irec *rmap) +{ + xfs_fileoff_t offset; + unsigned int rflags = 0; + int has_rmap; + int error; + + if (info->whichfork == XFS_ATTR_FORK) + rflags |= XFS_RMAP_ATTR_FORK; + + /* + * CoW staging extents are owned (on disk) by the refcountbt, so + * their rmaps do not have offsets. + */ + if (info->whichfork == XFS_COW_FORK) + offset = 0; + else + offset = irec->br_startoff; + + /* + * If the caller thinks this could be a shared bmbt extent (IOWs, + * any data fork extent of a reflink inode) then we have to use the + * range rmap lookup to make sure we get the correct owner/offset. + */ + if (info->is_shared) { + error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno, + owner, offset, rflags, rmap, &has_rmap); + if (!xfs_scrub_should_check_xref(info->sc, &error, + &info->sc->sa.rmap_cur)) + return false; + goto out; + } + + /* + * Otherwise, use the (faster) regular lookup. + */ + error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner, + offset, rflags, &has_rmap); + if (!xfs_scrub_should_check_xref(info->sc, &error, + &info->sc->sa.rmap_cur)) + return false; + if (!has_rmap) + goto out; + + error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap); + if (!xfs_scrub_should_check_xref(info->sc, &error, + &info->sc->sa.rmap_cur)) + return false; + +out: + if (!has_rmap) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + return has_rmap; +} + +/* Make sure that we have rmapbt records for this extent. */ +STATIC void +xfs_scrub_bmap_xref_rmap( + struct xfs_scrub_bmap_info *info, + struct xfs_bmbt_irec *irec, + xfs_agblock_t agbno) +{ + struct xfs_rmap_irec rmap; + unsigned long long rmap_end; + uint64_t owner; + + if (!info->sc->sa.rmap_cur) + return; + + if (info->whichfork == XFS_COW_FORK) + owner = XFS_RMAP_OWN_COW; + else + owner = info->sc->ip->i_ino; + + /* Find the rmap record for this irec. */ + if (!xfs_scrub_bmap_get_rmap(info, irec, agbno, owner, &rmap)) + return; + + /* Check the rmap. */ + rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; + if (rmap.rm_startblock > agbno || + agbno + irec->br_blockcount > rmap_end) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + /* + * Check the logical offsets if applicable. CoW staging extents + * don't track logical offsets since the mappings only exist in + * memory. + */ + if (info->whichfork != XFS_COW_FORK) { + rmap_end = (unsigned long long)rmap.rm_offset + + rmap.rm_blockcount; + if (rmap.rm_offset > irec->br_startoff || + irec->br_startoff + irec->br_blockcount > rmap_end) + xfs_scrub_fblock_xref_set_corrupt(info->sc, + info->whichfork, irec->br_startoff); + } + + if (rmap.rm_owner != owner) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + /* + * Check for discrepancies between the unwritten flag in the irec and + * the rmap. Note that the (in-memory) CoW fork distinguishes between + * unwritten and written extents, but we don't track that in the rmap + * records because the blocks are owned (on-disk) by the refcountbt, + * which doesn't track unwritten state. + */ + if (owner != XFS_RMAP_OWN_COW && + irec->br_state == XFS_EXT_UNWRITTEN && + !(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + if (info->whichfork == XFS_ATTR_FORK && + !(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); +} + +/* Cross-reference a single rtdev extent record. */ +STATIC void +xfs_scrub_bmap_rt_extent_xref( + struct xfs_scrub_bmap_info *info, + struct xfs_inode *ip, + struct xfs_btree_cur *cur, + struct xfs_bmbt_irec *irec) +{ + if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + xfs_scrub_xref_is_used_rt_space(info->sc, irec->br_startblock, + irec->br_blockcount); +} + +/* Cross-reference a single datadev extent record. */ +STATIC void +xfs_scrub_bmap_extent_xref( + struct xfs_scrub_bmap_info *info, + struct xfs_inode *ip, + struct xfs_btree_cur *cur, + struct xfs_bmbt_irec *irec) +{ + struct xfs_mount *mp = info->sc->mp; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_extlen_t len; + int error; + + if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); + len = irec->br_blockcount; + + error = xfs_scrub_ag_init(info->sc, agno, &info->sc->sa); + if (!xfs_scrub_fblock_process_error(info->sc, info->whichfork, + irec->br_startoff, &error)) + return; + + xfs_scrub_xref_is_used_space(info->sc, agbno, len); + xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len); + xfs_scrub_bmap_xref_rmap(info, irec, agbno); + switch (info->whichfork) { + case XFS_DATA_FORK: + if (xfs_is_reflink_inode(info->sc->ip)) + break; + /* fall through */ + case XFS_ATTR_FORK: + xfs_scrub_xref_is_not_shared(info->sc, agbno, + irec->br_blockcount); + break; + case XFS_COW_FORK: + xfs_scrub_xref_is_cow_staging(info->sc, agbno, + irec->br_blockcount); + break; + } + + xfs_scrub_ag_free(info->sc, &info->sc->sa); +} + /* Scrub a single extent record. */ STATIC int xfs_scrub_bmap_extent( @@ -109,6 +305,7 @@ xfs_scrub_bmap_extent( { struct xfs_mount *mp = info->sc->mp; struct xfs_buf *bp = NULL; + xfs_filblks_t end; int error = 0; if (cur) @@ -136,19 +333,23 @@ xfs_scrub_bmap_extent( irec->br_startoff); /* Make sure the extent points to a valid place. */ + if (irec->br_blockcount > MAXEXTLEN) + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock) xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); + end = irec->br_startblock + irec->br_blockcount - 1; if (info->is_rt && (!xfs_verify_rtbno(mp, irec->br_startblock) || - !xfs_verify_rtbno(mp, irec->br_startblock + - irec->br_blockcount - 1))) + !xfs_verify_rtbno(mp, end))) xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (!info->is_rt && (!xfs_verify_fsbno(mp, irec->br_startblock) || - !xfs_verify_fsbno(mp, irec->br_startblock + - irec->br_blockcount - 1))) + !xfs_verify_fsbno(mp, end) || + XFS_FSB_TO_AGNO(mp, irec->br_startblock) != + XFS_FSB_TO_AGNO(mp, end))) xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); @@ -158,6 +359,11 @@ xfs_scrub_bmap_extent( xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); + if (info->is_rt) + xfs_scrub_bmap_rt_extent_xref(info, ip, cur, irec); + else + xfs_scrub_bmap_extent_xref(info, ip, cur, irec); + info->lastoff = irec->br_startoff + irec->br_blockcount; return error; } @@ -235,7 +441,6 @@ xfs_scrub_bmap( struct xfs_ifork *ifp; xfs_fileoff_t endoff; struct xfs_iext_cursor icur; - bool found; int error = 0; ifp = XFS_IFORK_PTR(ip, whichfork); @@ -314,9 +519,7 @@ xfs_scrub_bmap( /* Scrub extent records. */ info.lastoff = 0; ifp = XFS_IFORK_PTR(ip, whichfork); - for (found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &irec); - found != 0; - found = xfs_iext_next_extent(ifp, &icur, &irec)) { + for_each_xfs_iext(ifp, &icur, &irec) { if (xfs_scrub_should_terminate(sc, &error)) break; if (isnullstartblock(irec.br_startblock)) diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index df0766132ace..54218168c8f9 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -42,12 +42,14 @@ * Check for btree operation errors. See the section about handling * operational errors in common.c. */ -bool -xfs_scrub_btree_process_error( +static bool +__xfs_scrub_btree_process_error( struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, int level, - int *error) + int *error, + __u32 errflag, + void *ret_ip) { if (*error == 0) return true; @@ -60,36 +62,80 @@ xfs_scrub_btree_process_error( case -EFSBADCRC: case -EFSCORRUPTED: /* Note the badness but don't abort. */ - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + sc->sm->sm_flags |= errflag; *error = 0; /* fall through */ default: if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) trace_xfs_scrub_ifork_btree_op_error(sc, cur, level, - *error, __return_address); + *error, ret_ip); else trace_xfs_scrub_btree_op_error(sc, cur, level, - *error, __return_address); + *error, ret_ip); break; } return false; } +bool +xfs_scrub_btree_process_error( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level, + int *error) +{ + return __xfs_scrub_btree_process_error(sc, cur, level, error, + XFS_SCRUB_OFLAG_CORRUPT, __return_address); +} + +bool +xfs_scrub_btree_xref_process_error( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level, + int *error) +{ + return __xfs_scrub_btree_process_error(sc, cur, level, error, + XFS_SCRUB_OFLAG_XFAIL, __return_address); +} + /* Record btree block corruption. */ -void -xfs_scrub_btree_set_corrupt( +static void +__xfs_scrub_btree_set_corrupt( struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, - int level) + int level, + __u32 errflag, + void *ret_ip) { - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + sc->sm->sm_flags |= errflag; if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) trace_xfs_scrub_ifork_btree_error(sc, cur, level, - __return_address); + ret_ip); else trace_xfs_scrub_btree_error(sc, cur, level, - __return_address); + ret_ip); +} + +void +xfs_scrub_btree_set_corrupt( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level) +{ + __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT, + __return_address); +} + +void +xfs_scrub_btree_xref_set_corrupt( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level) +{ + __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT, + __return_address); } /* @@ -268,6 +314,8 @@ xfs_scrub_btree_block_check_sibling( pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock); if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp)) goto out; + if (pbp) + xfs_scrub_buffer_recheck(bs->sc, pbp); if (xfs_btree_diff_two_ptrs(cur, pp, sibling)) xfs_scrub_btree_set_corrupt(bs->sc, cur, level); @@ -315,6 +363,97 @@ out: return error; } +struct check_owner { + struct list_head list; + xfs_daddr_t daddr; + int level; +}; + +/* + * Make sure this btree block isn't in the free list and that there's + * an rmap record for it. + */ +STATIC int +xfs_scrub_btree_check_block_owner( + struct xfs_scrub_btree *bs, + int level, + xfs_daddr_t daddr) +{ + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_btnum_t btnum; + bool init_sa; + int error = 0; + + if (!bs->cur) + return 0; + + btnum = bs->cur->bc_btnum; + agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr); + agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr); + + init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS; + if (init_sa) { + error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa); + if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, + level, &error)) + return error; + } + + xfs_scrub_xref_is_used_space(bs->sc, agbno, 1); + /* + * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we + * have to nullify it (to shut down further block owner checks) if + * self-xref encounters problems. + */ + if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO) + bs->cur = NULL; + + xfs_scrub_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo); + if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP) + bs->cur = NULL; + + if (init_sa) + xfs_scrub_ag_free(bs->sc, &bs->sc->sa); + + return error; +} + +/* Check the owner of a btree block. */ +STATIC int +xfs_scrub_btree_check_owner( + struct xfs_scrub_btree *bs, + int level, + struct xfs_buf *bp) +{ + struct xfs_btree_cur *cur = bs->cur; + struct check_owner *co; + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL) + return 0; + + /* + * We want to cross-reference each btree block with the bnobt + * and the rmapbt. We cannot cross-reference the bnobt or + * rmapbt while scanning the bnobt or rmapbt, respectively, + * because we cannot alter the cursor and we'd prefer not to + * duplicate cursors. Therefore, save the buffer daddr for + * later scanning. + */ + if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) { + co = kmem_alloc(sizeof(struct check_owner), + KM_MAYFAIL | KM_NOFS); + if (!co) + return -ENOMEM; + co->level = level; + co->daddr = XFS_BUF_ADDR(bp); + list_add_tail(&co->list, &bs->to_check); + return 0; + } + + return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp)); +} + /* * Grab and scrub a btree block given a btree pointer. Returns block * and buffer pointers (if applicable) if they're ok to use. @@ -349,6 +488,16 @@ xfs_scrub_btree_get_block( xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); return 0; } + if (*pbp) + xfs_scrub_buffer_recheck(bs->sc, *pbp); + + /* + * Check the block's owner; this function absorbs error codes + * for us. + */ + error = xfs_scrub_btree_check_owner(bs, level, *pbp); + if (error) + return error; /* * Check the block's siblings; this function absorbs error codes @@ -421,6 +570,8 @@ xfs_scrub_btree( struct xfs_btree_block *block; int level; struct xfs_buf *bp; + struct check_owner *co; + struct check_owner *n; int i; int error = 0; @@ -512,5 +663,14 @@ xfs_scrub_btree( } out: + /* Process deferred owner checks on btree blocks. */ + list_for_each_entry_safe(co, n, &bs.to_check, list) { + if (!error && bs.cur) + error = xfs_scrub_btree_check_block_owner(&bs, + co->level, co->daddr); + list_del(&co->list); + kmem_free(co); + } + return error; } diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h index 4de825a626d1..e2b868ede70b 100644 --- a/fs/xfs/scrub/btree.h +++ b/fs/xfs/scrub/btree.h @@ -26,10 +26,19 @@ bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, int level, int *error); +/* Check for btree xref operation errors. */ +bool xfs_scrub_btree_xref_process_error(struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, int level, + int *error); + /* Check for btree corruption. */ void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, int level); +/* Check for btree xref discrepancies. */ +void xfs_scrub_btree_xref_set_corrupt(struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, int level); + struct xfs_scrub_btree; typedef int (*xfs_scrub_btree_rec_fn)( struct xfs_scrub_btree *bs, diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index ac95fe911d96..8033ab9d8f47 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -78,12 +78,14 @@ */ /* Check for operational errors. */ -bool -xfs_scrub_process_error( +static bool +__xfs_scrub_process_error( struct xfs_scrub_context *sc, xfs_agnumber_t agno, xfs_agblock_t bno, - int *error) + int *error, + __u32 errflag, + void *ret_ip) { switch (*error) { case 0: @@ -95,24 +97,48 @@ xfs_scrub_process_error( case -EFSBADCRC: case -EFSCORRUPTED: /* Note the badness but don't abort. */ - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + sc->sm->sm_flags |= errflag; *error = 0; /* fall through */ default: trace_xfs_scrub_op_error(sc, agno, bno, *error, - __return_address); + ret_ip); break; } return false; } -/* Check for operational errors for a file offset. */ bool -xfs_scrub_fblock_process_error( +xfs_scrub_process_error( + struct xfs_scrub_context *sc, + xfs_agnumber_t agno, + xfs_agblock_t bno, + int *error) +{ + return __xfs_scrub_process_error(sc, agno, bno, error, + XFS_SCRUB_OFLAG_CORRUPT, __return_address); +} + +bool +xfs_scrub_xref_process_error( + struct xfs_scrub_context *sc, + xfs_agnumber_t agno, + xfs_agblock_t bno, + int *error) +{ + return __xfs_scrub_process_error(sc, agno, bno, error, + XFS_SCRUB_OFLAG_XFAIL, __return_address); +} + +/* Check for operational errors for a file offset. */ +static bool +__xfs_scrub_fblock_process_error( struct xfs_scrub_context *sc, int whichfork, xfs_fileoff_t offset, - int *error) + int *error, + __u32 errflag, + void *ret_ip) { switch (*error) { case 0: @@ -124,17 +150,39 @@ xfs_scrub_fblock_process_error( case -EFSBADCRC: case -EFSCORRUPTED: /* Note the badness but don't abort. */ - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + sc->sm->sm_flags |= errflag; *error = 0; /* fall through */ default: trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error, - __return_address); + ret_ip); break; } return false; } +bool +xfs_scrub_fblock_process_error( + struct xfs_scrub_context *sc, + int whichfork, + xfs_fileoff_t offset, + int *error) +{ + return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error, + XFS_SCRUB_OFLAG_CORRUPT, __return_address); +} + +bool +xfs_scrub_fblock_xref_process_error( + struct xfs_scrub_context *sc, + int whichfork, + xfs_fileoff_t offset, + int *error) +{ + return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error, + XFS_SCRUB_OFLAG_XFAIL, __return_address); +} + /* * Handling scrub corruption/optimization/warning checks. * @@ -183,6 +231,16 @@ xfs_scrub_block_set_corrupt( trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); } +/* Record a corruption while cross-referencing. */ +void +xfs_scrub_block_xref_set_corrupt( + struct xfs_scrub_context *sc, + struct xfs_buf *bp) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; + trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); +} + /* * Record a corrupt inode. The trace data will include the block given * by bp if bp is given; otherwise it will use the block location of the @@ -198,6 +256,17 @@ xfs_scrub_ino_set_corrupt( trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address); } +/* Record a corruption while cross-referencing with an inode. */ +void +xfs_scrub_ino_xref_set_corrupt( + struct xfs_scrub_context *sc, + xfs_ino_t ino, + struct xfs_buf *bp) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; + trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address); +} + /* Record corruption in a block indexed by a file fork. */ void xfs_scrub_fblock_set_corrupt( @@ -209,6 +278,17 @@ xfs_scrub_fblock_set_corrupt( trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); } +/* Record a corruption while cross-referencing a fork block. */ +void +xfs_scrub_fblock_xref_set_corrupt( + struct xfs_scrub_context *sc, + int whichfork, + xfs_fileoff_t offset) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; + trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); +} + /* * Warn about inodes that need administrative review but is not * incorrect. @@ -245,6 +325,59 @@ xfs_scrub_set_incomplete( } /* + * rmap scrubbing -- compute the number of blocks with a given owner, + * at least according to the reverse mapping data. + */ + +struct xfs_scrub_rmap_ownedby_info { + struct xfs_owner_info *oinfo; + xfs_filblks_t *blocks; +}; + +STATIC int +xfs_scrub_count_rmap_ownedby_irec( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_scrub_rmap_ownedby_info *sroi = priv; + bool irec_attr; + bool oinfo_attr; + + irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK; + oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK; + + if (rec->rm_owner != sroi->oinfo->oi_owner) + return 0; + + if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr) + (*sroi->blocks) += rec->rm_blockcount; + + return 0; +} + +/* + * Calculate the number of blocks the rmap thinks are owned by something. + * The caller should pass us an rmapbt cursor. + */ +int +xfs_scrub_count_rmap_ownedby_ag( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + struct xfs_owner_info *oinfo, + xfs_filblks_t *blocks) +{ + struct xfs_scrub_rmap_ownedby_info sroi; + + sroi.oinfo = oinfo; + *blocks = 0; + sroi.blocks = blocks; + + return xfs_rmap_query_all(cur, xfs_scrub_count_rmap_ownedby_irec, + &sroi); +} + +/* * AG scrubbing * * These helpers facilitate locking an allocation group's header @@ -302,7 +435,7 @@ xfs_scrub_ag_read_headers( error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl); if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) goto out; - + error = 0; out: return error; } @@ -472,7 +605,7 @@ xfs_scrub_setup_ag_btree( return error; } - error = xfs_scrub_setup_ag_header(sc, ip); + error = xfs_scrub_setup_fs(sc, ip); if (error) return error; @@ -503,18 +636,11 @@ xfs_scrub_get_inode( struct xfs_scrub_context *sc, struct xfs_inode *ip_in) { + struct xfs_imap imap; struct xfs_mount *mp = sc->mp; struct xfs_inode *ip = NULL; int error; - /* - * If userspace passed us an AG number or a generation number - * without an inode number, they haven't got a clue so bail out - * immediately. - */ - if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino)) - return -EINVAL; - /* We want to scan the inode we already had opened. */ if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { sc->ip = ip_in; @@ -526,10 +652,33 @@ xfs_scrub_get_inode( return -ENOENT; error = xfs_iget(mp, NULL, sc->sm->sm_ino, XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip); - if (error == -ENOENT || error == -EINVAL) { - /* inode doesn't exist... */ - return -ENOENT; - } else if (error) { + switch (error) { + case -ENOENT: + /* Inode doesn't exist, just bail out. */ + return error; + case 0: + /* Got an inode, continue. */ + break; + case -EINVAL: + /* + * -EINVAL with IGET_UNTRUSTED could mean one of several + * things: userspace gave us an inode number that doesn't + * correspond to fs space, or doesn't have an inobt entry; + * or it could simply mean that the inode buffer failed the + * read verifiers. + * + * Try just the inode mapping lookup -- if it succeeds, then + * the inode buffer verifier failed and something needs fixing. + * Otherwise, we really couldn't find it so tell userspace + * that it no longer exists. + */ + error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap, + XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE); + if (error) + return -ENOENT; + error = -EFSCORRUPTED; + /* fall through */ + default: trace_xfs_scrub_op_error(sc, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino), XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), @@ -572,3 +721,61 @@ out: /* scrub teardown will unlock and release the inode for us */ return error; } + +/* + * Predicate that decides if we need to evaluate the cross-reference check. + * If there was an error accessing the cross-reference btree, just delete + * the cursor and skip the check. + */ +bool +xfs_scrub_should_check_xref( + struct xfs_scrub_context *sc, + int *error, + struct xfs_btree_cur **curpp) +{ + if (*error == 0) + return true; + + if (curpp) { + /* If we've already given up on xref, just bail out. */ + if (!*curpp) + return false; + + /* xref error, delete cursor and bail out. */ + xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR); + *curpp = NULL; + } + + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL; + trace_xfs_scrub_xref_error(sc, *error, __return_address); + + /* + * Errors encountered during cross-referencing with another + * data structure should not cause this scrubber to abort. + */ + *error = 0; + return false; +} + +/* Run the structure verifiers on in-memory buffers to detect bad memory. */ +void +xfs_scrub_buffer_recheck( + struct xfs_scrub_context *sc, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + if (bp->b_ops == NULL) { + xfs_scrub_block_set_corrupt(sc, bp); + return; + } + if (bp->b_ops->verify_struct == NULL) { + xfs_scrub_set_incomplete(sc); + return; + } + fa = bp->b_ops->verify_struct(bp); + if (!fa) + return; + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + trace_xfs_scrub_block_error(sc, bp->b_bn, fa); +} diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 5c043855570e..ddb65d22c76a 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -56,6 +56,11 @@ bool xfs_scrub_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno, bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork, xfs_fileoff_t offset, int *error); +bool xfs_scrub_xref_process_error(struct xfs_scrub_context *sc, + xfs_agnumber_t agno, xfs_agblock_t bno, int *error); +bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc, + int whichfork, xfs_fileoff_t offset, int *error); + void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc, struct xfs_buf *bp); void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino, @@ -68,6 +73,13 @@ void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino, void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork, xfs_fileoff_t offset); +void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc, + struct xfs_buf *bp); +void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino, + struct xfs_buf *bp); +void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc, + int whichfork, xfs_fileoff_t offset); + void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino, struct xfs_buf *bp); void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork, @@ -76,10 +88,12 @@ void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork, void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc); int xfs_scrub_checkpoint_log(struct xfs_mount *mp); +/* Are we set up for a cross-referencing check? */ +bool xfs_scrub_should_check_xref(struct xfs_scrub_context *sc, int *error, + struct xfs_btree_cur **curpp); + /* Setup functions */ int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip); -int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc, - struct xfs_inode *ip); int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc, struct xfs_inode *ip); int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc, @@ -134,11 +148,16 @@ int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc, int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno, void *), void *priv); +int xfs_scrub_count_rmap_ownedby_ag(struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + struct xfs_owner_info *oinfo, + xfs_filblks_t *blocks); int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc, struct xfs_inode *ip, bool force_log); int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in); int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc, struct xfs_inode *ip, unsigned int resblks); +void xfs_scrub_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp); #endif /* __XFS_SCRUB_COMMON_H__ */ diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index d94edd93cba8..bffdb7dc09bf 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -233,11 +233,28 @@ xfs_scrub_da_btree_write_verify( return; } } +static void * +xfs_scrub_da_btree_verify( + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DIR2_LEAF1_MAGIC: + case XFS_DIR3_LEAF1_MAGIC: + bp->b_ops = &xfs_dir3_leaf1_buf_ops; + return bp->b_ops->verify_struct(bp); + default: + bp->b_ops = &xfs_da3_node_buf_ops; + return bp->b_ops->verify_struct(bp); + } +} static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = { .name = "xfs_scrub_da_btree", .verify_read = xfs_scrub_da_btree_read_verify, .verify_write = xfs_scrub_da_btree_write_verify, + .verify_struct = xfs_scrub_da_btree_verify, }; /* Check a block's sibling. */ @@ -276,6 +293,9 @@ xfs_scrub_da_btree_block_check_sibling( xfs_scrub_da_set_corrupt(ds, level); return error; } + if (ds->state->altpath.blk[level].bp) + xfs_scrub_buffer_recheck(ds->sc, + ds->state->altpath.blk[level].bp); /* Compare upper level pointer to sibling pointer. */ if (ds->state->altpath.blk[level].blkno != sibling) @@ -358,6 +378,8 @@ xfs_scrub_da_btree_block( &xfs_scrub_da_btree_buf_ops); if (!xfs_scrub_da_process_error(ds, level, &error)) goto out_nobuf; + if (blk->bp) + xfs_scrub_buffer_recheck(ds->sc, blk->bp); /* * We didn't find a dir btree root block, which means that diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 69e1efdd4019..50b6a26b0299 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -92,7 +92,7 @@ xfs_scrub_dir_check_ftype( * inodes can trigger immediate inactive cleanup of the inode. */ error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip); - if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset, + if (!xfs_scrub_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset, &error)) goto out; @@ -200,6 +200,7 @@ xfs_scrub_dir_rec( struct xfs_inode *dp = ds->dargs.dp; struct xfs_dir2_data_entry *dent; struct xfs_buf *bp; + char *p, *endp; xfs_ino_t ino; xfs_dablk_t rec_bno; xfs_dir2_db_t db; @@ -237,9 +238,37 @@ xfs_scrub_dir_rec( xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); goto out; } + xfs_scrub_buffer_recheck(ds->sc, bp); - /* Retrieve the entry, sanity check it, and compare hashes. */ dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off); + + /* Make sure we got a real directory entry. */ + p = (char *)mp->m_dir_inode_ops->data_entry_p(bp->b_addr); + endp = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr); + if (!endp) { + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + goto out_relse; + } + while (p < endp) { + struct xfs_dir2_data_entry *dep; + struct xfs_dir2_data_unused *dup; + + dup = (struct xfs_dir2_data_unused *)p; + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + p += be16_to_cpu(dup->length); + continue; + } + dep = (struct xfs_dir2_data_entry *)p; + if (dep == dent) + break; + p += mp->m_dir_inode_ops->data_entsize(dep->namelen); + } + if (p >= endp) { + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + goto out_relse; + } + + /* Retrieve the entry, sanity check it, and compare hashes. */ ino = be64_to_cpu(dent->inumber); hash = be32_to_cpu(ent->hashval); tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent)); @@ -324,6 +353,7 @@ xfs_scrub_directory_data_bestfree( } if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; + xfs_scrub_buffer_recheck(sc, bp); /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */ @@ -361,13 +391,7 @@ xfs_scrub_directory_data_bestfree( /* Make sure the bestfrees are actually the best free spaces. */ ptr = (char *)d_ops->data_entry_p(bp->b_addr); - if (is_block) { - struct xfs_dir2_block_tail *btp; - - btp = xfs_dir2_block_tail_p(mp->m_dir_geo, bp->b_addr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); - } else - endptr = (char *)bp->b_addr + BBTOB(bp->b_length); + endptr = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr); /* Iterate the entries, stopping when we hit or go past the end. */ while (ptr < endptr) { @@ -474,6 +498,7 @@ xfs_scrub_directory_leaf1_bestfree( error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp); if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; + xfs_scrub_buffer_recheck(sc, bp); leaf = bp->b_addr; d_ops->leaf_hdr_from_disk(&leafhdr, leaf); @@ -559,6 +584,7 @@ xfs_scrub_directory_free_bestfree( error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp); if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; + xfs_scrub_buffer_recheck(sc, bp); if (xfs_sb_version_hascrc(&sc->mp->m_sb)) { struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 496d6f2fbb9e..63ab3f98430d 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -58,6 +58,56 @@ xfs_scrub_setup_ag_iallocbt( /* Inode btree scrubber. */ +/* + * If we're checking the finobt, cross-reference with the inobt. + * Otherwise we're checking the inobt; if there is an finobt, make sure + * we have a record or not depending on freecount. + */ +static inline void +xfs_scrub_iallocbt_chunk_xref_other( + struct xfs_scrub_context *sc, + struct xfs_inobt_rec_incore *irec, + xfs_agino_t agino) +{ + struct xfs_btree_cur **pcur; + bool has_irec; + int error; + + if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT) + pcur = &sc->sa.ino_cur; + else + pcur = &sc->sa.fino_cur; + if (!(*pcur)) + return; + error = xfs_ialloc_has_inode_record(*pcur, agino, agino, &has_irec); + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) + return; + if (((irec->ir_freecount > 0 && !has_irec) || + (irec->ir_freecount == 0 && has_irec))) + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_iallocbt_chunk_xref( + struct xfs_scrub_context *sc, + struct xfs_inobt_rec_incore *irec, + xfs_agino_t agino, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + struct xfs_owner_info oinfo; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, len); + xfs_scrub_iallocbt_chunk_xref_other(sc, irec, agino); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); + xfs_scrub_xref_is_owned_by(sc, agbno, len, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, len); +} + /* Is this chunk worth checking? */ STATIC bool xfs_scrub_iallocbt_chunk( @@ -76,6 +126,8 @@ xfs_scrub_iallocbt_chunk( !xfs_verify_agbno(mp, agno, bno + len - 1)) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xfs_scrub_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len); + return true; } @@ -190,8 +242,14 @@ xfs_scrub_iallocbt_check_freemask( } /* If any part of this is a hole, skip it. */ - if (ir_holemask) + if (ir_holemask) { + xfs_scrub_xref_is_not_owned_by(bs->sc, agbno, + blks_per_cluster, &oinfo); continue; + } + + xfs_scrub_xref_is_owned_by(bs->sc, agbno, blks_per_cluster, + &oinfo); /* Grab the inode cluster buffer. */ imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno, @@ -227,6 +285,7 @@ xfs_scrub_iallocbt_rec( union xfs_btree_rec *rec) { struct xfs_mount *mp = bs->cur->bc_mp; + xfs_filblks_t *inode_blocks = bs->private; struct xfs_inobt_rec_incore irec; uint64_t holes; xfs_agnumber_t agno = bs->cur->bc_private.a.agno; @@ -264,6 +323,9 @@ xfs_scrub_iallocbt_rec( (agbno & (xfs_icluster_size_fsb(mp) - 1))) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + *inode_blocks += XFS_B_TO_FSB(mp, + irec.ir_count * mp->m_sb.sb_inodesize); + /* Handle non-sparse inodes */ if (!xfs_inobt_issparse(irec.ir_holemask)) { len = XFS_B_TO_FSB(mp, @@ -308,6 +370,72 @@ out: return error; } +/* + * Make sure the inode btrees are as large as the rmap thinks they are. + * Don't bother if we're missing btree cursors, as we're already corrupt. + */ +STATIC void +xfs_scrub_iallocbt_xref_rmap_btreeblks( + struct xfs_scrub_context *sc, + int which) +{ + struct xfs_owner_info oinfo; + xfs_filblks_t blocks; + xfs_extlen_t inobt_blocks = 0; + xfs_extlen_t finobt_blocks = 0; + int error; + + if (!sc->sa.ino_cur || !sc->sa.rmap_cur || + (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur)) + return; + + /* Check that we saw as many inobt blocks as the rmap says. */ + error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks); + if (!xfs_scrub_process_error(sc, 0, 0, &error)) + return; + + if (sc->sa.fino_cur) { + error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks); + if (!xfs_scrub_process_error(sc, 0, 0, &error)) + return; + } + + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, + &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (blocks != inobt_blocks + finobt_blocks) + xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0); +} + +/* + * Make sure that the inobt records point to the same number of blocks as + * the rmap says are owned by inodes. + */ +STATIC void +xfs_scrub_iallocbt_xref_rmap_inodes( + struct xfs_scrub_context *sc, + int which, + xfs_filblks_t inode_blocks) +{ + struct xfs_owner_info oinfo; + xfs_filblks_t blocks; + int error; + + if (!sc->sa.rmap_cur) + return; + + /* Check that we saw as many inode blocks as the rmap knows about. */ + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, + &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (blocks != inode_blocks) + xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0); +} + /* Scrub the inode btrees for some AG. */ STATIC int xfs_scrub_iallocbt( @@ -316,10 +444,29 @@ xfs_scrub_iallocbt( { struct xfs_btree_cur *cur; struct xfs_owner_info oinfo; + xfs_filblks_t inode_blocks = 0; + int error; xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur; - return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, NULL); + error = xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, + &inode_blocks); + if (error) + return error; + + xfs_scrub_iallocbt_xref_rmap_btreeblks(sc, which); + + /* + * If we're scrubbing the inode btree, inode_blocks is the number of + * blocks pointed to by all the inode chunk records. Therefore, we + * should compare to the number of inode chunk blocks that the rmap + * knows about. We can't do this for the finobt since it only points + * to inode chunks with free inodes. + */ + if (which == XFS_BTNUM_INO) + xfs_scrub_iallocbt_xref_rmap_inodes(sc, which, inode_blocks); + + return error; } int @@ -335,3 +482,46 @@ xfs_scrub_finobt( { return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO); } + +/* See if an inode btree has (or doesn't have) an inode chunk record. */ +static inline void +xfs_scrub_xref_inode_check( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len, + struct xfs_btree_cur **icur, + bool should_have_inodes) +{ + bool has_inodes; + int error; + + if (!(*icur)) + return; + + error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &has_inodes); + if (!xfs_scrub_should_check_xref(sc, &error, icur)) + return; + if (has_inodes != should_have_inodes) + xfs_scrub_btree_xref_set_corrupt(sc, *icur, 0); +} + +/* xref check that the extent is not covered by inodes */ +void +xfs_scrub_xref_is_not_inode_chunk( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false); + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false); +} + +/* xref check that the extent is covered by inodes */ +void +xfs_scrub_xref_is_inode_chunk( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true); +} diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index f120fb20452f..21297bef8df1 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -36,9 +36,13 @@ #include "xfs_ialloc.h" #include "xfs_da_format.h" #include "xfs_reflink.h" +#include "xfs_rmap.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" +#include "scrub/btree.h" #include "scrub/trace.h" /* @@ -64,7 +68,7 @@ xfs_scrub_setup_inode( break; case -EFSCORRUPTED: case -EFSBADCRC: - return 0; + return xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); default: return error; } @@ -392,6 +396,14 @@ xfs_scrub_dinode( break; } + /* di_[amc]time.nsec */ + if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC) + xfs_scrub_ino_set_corrupt(sc, ino, bp); + if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC) + xfs_scrub_ino_set_corrupt(sc, ino, bp); + if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC) + xfs_scrub_ino_set_corrupt(sc, ino, bp); + /* * di_size. xfs_dinode_verify checks for things that screw up * the VFS such as the upper bit being set and zero-length @@ -495,6 +507,8 @@ xfs_scrub_dinode( } if (dip->di_version >= 3) { + if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC) + xfs_scrub_ino_set_corrupt(sc, ino, bp); xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2); xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags, flags2); @@ -546,7 +560,7 @@ xfs_scrub_inode_map_raw( */ bp->b_ops = &xfs_inode_buf_ops; dip = xfs_buf_offset(bp, imap.im_boffset); - if (!xfs_dinode_verify(mp, ino, dip) || + if (xfs_dinode_verify(mp, ino, dip) != NULL || !xfs_dinode_good_version(mp, dip->di_version)) { xfs_scrub_ino_set_corrupt(sc, ino, bp); goto out_buf; @@ -567,18 +581,155 @@ out_buf: return error; } +/* + * Make sure the finobt doesn't think this inode is free. + * We don't have to check the inobt ourselves because we got the inode via + * IGET_UNTRUSTED, which checks the inobt for us. + */ +static void +xfs_scrub_inode_xref_finobt( + struct xfs_scrub_context *sc, + xfs_ino_t ino) +{ + struct xfs_inobt_rec_incore rec; + xfs_agino_t agino; + int has_record; + int error; + + if (!sc->sa.fino_cur) + return; + + agino = XFS_INO_TO_AGINO(sc->mp, ino); + + /* + * Try to get the finobt record. If we can't get it, then we're + * in good shape. + */ + error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, + &has_record); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || + !has_record) + return; + + error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || + !has_record) + return; + + /* + * Otherwise, make sure this record either doesn't cover this inode, + * or that it does but it's marked present. + */ + if (rec.ir_startino > agino || + rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) + return; + + if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); +} + +/* Cross reference the inode fields with the forks. */ +STATIC void +xfs_scrub_inode_xref_bmap( + struct xfs_scrub_context *sc, + struct xfs_dinode *dip) +{ + xfs_extnum_t nextents; + xfs_filblks_t count; + xfs_filblks_t acount; + int error; + + /* Walk all the extents to check nextents/naextents/nblocks. */ + error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, + &nextents, &count); + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) + return; + if (nextents < be32_to_cpu(dip->di_nextents)) + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); + + error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, + &nextents, &acount); + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) + return; + if (nextents != be16_to_cpu(dip->di_anextents)) + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); + + /* Check nblocks against the inode. */ + if (count + acount != be64_to_cpu(dip->di_nblocks)) + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_inode_xref( + struct xfs_scrub_context *sc, + xfs_ino_t ino, + struct xfs_dinode *dip) +{ + struct xfs_owner_info oinfo; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + int error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + agno = XFS_INO_TO_AGNO(sc->mp, ino); + agbno = XFS_INO_TO_AGBNO(sc->mp, ino); + + error = xfs_scrub_ag_init(sc, agno, &sc->sa); + if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_inode_xref_finobt(sc, ino); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); + xfs_scrub_inode_xref_bmap(sc, dip); + + xfs_scrub_ag_free(sc, &sc->sa); +} + +/* + * If the reflink iflag disagrees with a scan for shared data fork extents, + * either flag an error (shared extents w/ no flag) or a preen (flag set w/o + * any shared extents). We already checked for reflink iflag set on a non + * reflink filesystem. + */ +static void +xfs_scrub_inode_check_reflink_iflag( + struct xfs_scrub_context *sc, + xfs_ino_t ino, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = sc->mp; + bool has_shared; + int error; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return; + + error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, + &has_shared); + if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), + XFS_INO_TO_AGBNO(mp, ino), &error)) + return; + if (xfs_is_reflink_inode(sc->ip) && !has_shared) + xfs_scrub_ino_set_preen(sc, ino, bp); + else if (!xfs_is_reflink_inode(sc->ip) && has_shared) + xfs_scrub_ino_set_corrupt(sc, ino, bp); +} + /* Scrub an inode. */ int xfs_scrub_inode( struct xfs_scrub_context *sc) { struct xfs_dinode di; - struct xfs_mount *mp = sc->mp; struct xfs_buf *bp = NULL; struct xfs_dinode *dip; xfs_ino_t ino; - - bool has_shared; int error = 0; /* Did we get the in-core inode, or are we doing this manually? */ @@ -603,19 +754,14 @@ xfs_scrub_inode( goto out; /* - * Does this inode have the reflink flag set but no shared extents? - * Set the preening flag if this is the case. + * Look for discrepancies between file's data blocks and the reflink + * iflag. We already checked the iflag against the file mode when + * we scrubbed the dinode. */ - if (xfs_is_reflink_inode(sc->ip)) { - error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, - &has_shared); - if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), - XFS_INO_TO_AGBNO(mp, ino), &error)) - goto out; - if (!has_shared) - xfs_scrub_ino_set_preen(sc, ino, bp); - } + if (S_ISREG(VFS_I(sc->ip)->i_mode)) + xfs_scrub_inode_check_reflink_iflag(sc, ino, bp); + xfs_scrub_inode_xref(sc, ino, dip); out: if (bp) xfs_trans_brelse(sc->tp, bp); diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 63a25334fc83..0d3851410c74 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -169,9 +169,9 @@ xfs_scrub_parent_validate( * immediate inactive cleanup of the inode. */ error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out; - if (dp == sc->ip) { + if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); goto out_rele; } @@ -185,7 +185,7 @@ xfs_scrub_parent_validate( */ if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out_unlock; if (nlink != expected_nlink) @@ -205,7 +205,7 @@ xfs_scrub_parent_validate( /* Go looking for our dentry. */ error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out_unlock; /* Drop the parent lock, relock this inode. */ diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 3d9037eceaf1..51daa4ae2627 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -67,13 +67,6 @@ xfs_scrub_setup_quota( { uint dqtype; - /* - * If userspace gave us an AG number or inode data, they don't - * know what they're doing. Get out. - */ - if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen) - return -EINVAL; - dqtype = xfs_scrub_quota_to_dqtype(sc); if (dqtype == 0) return -EINVAL; diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 2f88a8d44bd0..400f1561cd3d 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -31,6 +31,7 @@ #include "xfs_sb.h" #include "xfs_alloc.h" #include "xfs_rmap.h" +#include "xfs_refcount.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -50,6 +51,307 @@ xfs_scrub_setup_ag_refcountbt( /* Reference count btree scrubber. */ +/* + * Confirming Reference Counts via Reverse Mappings + * + * We want to count the reverse mappings overlapping a refcount record + * (bno, len, refcount), allowing for the possibility that some of the + * overlap may come from smaller adjoining reverse mappings, while some + * comes from single extents which overlap the range entirely. The + * outer loop is as follows: + * + * 1. For all reverse mappings overlapping the refcount extent, + * a. If a given rmap completely overlaps, mark it as seen. + * b. Otherwise, record the fragment (in agbno order) for later + * processing. + * + * Once we've seen all the rmaps, we know that for all blocks in the + * refcount record we want to find $refcount owners and we've already + * visited $seen extents that overlap all the blocks. Therefore, we + * need to find ($refcount - $seen) owners for every block in the + * extent; call that quantity $target_nr. Proceed as follows: + * + * 2. Pull the first $target_nr fragments from the list; all of them + * should start at or before the start of the extent. + * Call this subset of fragments the working set. + * 3. Until there are no more unprocessed fragments, + * a. Find the shortest fragments in the set and remove them. + * b. Note the block number of the end of these fragments. + * c. Pull the same number of fragments from the list. All of these + * fragments should start at the block number recorded in the + * previous step. + * d. Put those fragments in the set. + * 4. Check that there are $target_nr fragments remaining in the list, + * and that they all end at or beyond the end of the refcount extent. + * + * If the refcount is correct, all the check conditions in the algorithm + * should always hold true. If not, the refcount is incorrect. + */ +struct xfs_scrub_refcnt_frag { + struct list_head list; + struct xfs_rmap_irec rm; +}; + +struct xfs_scrub_refcnt_check { + struct xfs_scrub_context *sc; + struct list_head fragments; + + /* refcount extent we're examining */ + xfs_agblock_t bno; + xfs_extlen_t len; + xfs_nlink_t refcount; + + /* number of owners seen */ + xfs_nlink_t seen; +}; + +/* + * Decide if the given rmap is large enough that we can redeem it + * towards refcount verification now, or if it's a fragment, in + * which case we'll hang onto it in the hopes that we'll later + * discover that we've collected exactly the correct number of + * fragments as the refcountbt says we should have. + */ +STATIC int +xfs_scrub_refcountbt_rmap_check( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_scrub_refcnt_check *refchk = priv; + struct xfs_scrub_refcnt_frag *frag; + xfs_agblock_t rm_last; + xfs_agblock_t rc_last; + int error = 0; + + if (xfs_scrub_should_terminate(refchk->sc, &error)) + return error; + + rm_last = rec->rm_startblock + rec->rm_blockcount - 1; + rc_last = refchk->bno + refchk->len - 1; + + /* Confirm that a single-owner refc extent is a CoW stage. */ + if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) { + xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0); + return 0; + } + + if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) { + /* + * The rmap overlaps the refcount record, so we can confirm + * one refcount owner seen. + */ + refchk->seen++; + } else { + /* + * This rmap covers only part of the refcount record, so + * save the fragment for later processing. If the rmapbt + * is healthy each rmap_irec we see will be in agbno order + * so we don't need insertion sort here. + */ + frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag), + KM_MAYFAIL | KM_NOFS); + if (!frag) + return -ENOMEM; + memcpy(&frag->rm, rec, sizeof(frag->rm)); + list_add_tail(&frag->list, &refchk->fragments); + } + + return 0; +} + +/* + * Given a bunch of rmap fragments, iterate through them, keeping + * a running tally of the refcount. If this ever deviates from + * what we expect (which is the refcountbt's refcount minus the + * number of extents that totally covered the refcountbt extent), + * we have a refcountbt error. + */ +STATIC void +xfs_scrub_refcountbt_process_rmap_fragments( + struct xfs_scrub_refcnt_check *refchk) +{ + struct list_head worklist; + struct xfs_scrub_refcnt_frag *frag; + struct xfs_scrub_refcnt_frag *n; + xfs_agblock_t bno; + xfs_agblock_t rbno; + xfs_agblock_t next_rbno; + xfs_nlink_t nr; + xfs_nlink_t target_nr; + + target_nr = refchk->refcount - refchk->seen; + if (target_nr == 0) + return; + + /* + * There are (refchk->rc.rc_refcount - refchk->nr refcount) + * references we haven't found yet. Pull that many off the + * fragment list and figure out where the smallest rmap ends + * (and therefore the next rmap should start). All the rmaps + * we pull off should start at or before the beginning of the + * refcount record's range. + */ + INIT_LIST_HEAD(&worklist); + rbno = NULLAGBLOCK; + nr = 1; + + /* Make sure the fragments actually /are/ in agbno order. */ + bno = 0; + list_for_each_entry(frag, &refchk->fragments, list) { + if (frag->rm.rm_startblock < bno) + goto done; + bno = frag->rm.rm_startblock; + } + + /* + * Find all the rmaps that start at or before the refc extent, + * and put them on the worklist. + */ + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { + if (frag->rm.rm_startblock > refchk->bno) + goto done; + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; + if (bno < rbno) + rbno = bno; + list_move_tail(&frag->list, &worklist); + if (nr == target_nr) + break; + nr++; + } + + /* + * We should have found exactly $target_nr rmap fragments starting + * at or before the refcount extent. + */ + if (nr != target_nr) + goto done; + + while (!list_empty(&refchk->fragments)) { + /* Discard any fragments ending at rbno from the worklist. */ + nr = 0; + next_rbno = NULLAGBLOCK; + list_for_each_entry_safe(frag, n, &worklist, list) { + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; + if (bno != rbno) { + if (bno < next_rbno) + next_rbno = bno; + continue; + } + list_del(&frag->list); + kmem_free(frag); + nr++; + } + + /* Try to add nr rmaps starting at rbno to the worklist. */ + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; + if (frag->rm.rm_startblock != rbno) + goto done; + list_move_tail(&frag->list, &worklist); + if (next_rbno > bno) + next_rbno = bno; + nr--; + if (nr == 0) + break; + } + + /* + * If we get here and nr > 0, this means that we added fewer + * items to the worklist than we discarded because the fragment + * list ran out of items. Therefore, we cannot maintain the + * required refcount. Something is wrong, so we're done. + */ + if (nr) + goto done; + + rbno = next_rbno; + } + + /* + * Make sure the last extent we processed ends at or beyond + * the end of the refcount extent. + */ + if (rbno < refchk->bno + refchk->len) + goto done; + + /* Actually record us having seen the remaining refcount. */ + refchk->seen = refchk->refcount; +done: + /* Delete fragments and work list. */ + list_for_each_entry_safe(frag, n, &worklist, list) { + list_del(&frag->list); + kmem_free(frag); + } + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { + list_del(&frag->list); + kmem_free(frag); + } +} + +/* Use the rmap entries covering this extent to verify the refcount. */ +STATIC void +xfs_scrub_refcountbt_xref_rmap( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + xfs_nlink_t refcount) +{ + struct xfs_scrub_refcnt_check refchk = { + .sc = sc, + .bno = bno, + .len = len, + .refcount = refcount, + .seen = 0, + }; + struct xfs_rmap_irec low; + struct xfs_rmap_irec high; + struct xfs_scrub_refcnt_frag *frag; + struct xfs_scrub_refcnt_frag *n; + int error; + + if (!sc->sa.rmap_cur) + return; + + /* Cross-reference with the rmapbt to confirm the refcount. */ + memset(&low, 0, sizeof(low)); + low.rm_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.rm_startblock = bno + len - 1; + + INIT_LIST_HEAD(&refchk.fragments); + error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, + &xfs_scrub_refcountbt_rmap_check, &refchk); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + goto out_free; + + xfs_scrub_refcountbt_process_rmap_fragments(&refchk); + if (refcount != refchk.seen) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); + +out_free: + list_for_each_entry_safe(frag, n, &refchk.fragments, list) { + list_del(&frag->list); + kmem_free(frag); + } +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_refcountbt_xref( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len, + xfs_nlink_t refcount) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, len); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); + xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount); +} + /* Scrub a refcountbt record. */ STATIC int xfs_scrub_refcountbt_rec( @@ -57,6 +359,7 @@ xfs_scrub_refcountbt_rec( union xfs_btree_rec *rec) { struct xfs_mount *mp = bs->cur->bc_mp; + xfs_agblock_t *cow_blocks = bs->private; xfs_agnumber_t agno = bs->cur->bc_private.a.agno; xfs_agblock_t bno; xfs_extlen_t len; @@ -72,6 +375,8 @@ xfs_scrub_refcountbt_rec( has_cowflag = (bno & XFS_REFC_COW_START); if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + if (has_cowflag) + (*cow_blocks) += len; /* Check the extent. */ bno &= ~XFS_REFC_COW_START; @@ -83,17 +388,128 @@ xfs_scrub_refcountbt_rec( if (refcount == 0) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xfs_scrub_refcountbt_xref(bs->sc, bno, len, refcount); + return error; } +/* Make sure we have as many refc blocks as the rmap says. */ +STATIC void +xfs_scrub_refcount_xref_rmap( + struct xfs_scrub_context *sc, + struct xfs_owner_info *oinfo, + xfs_filblks_t cow_blocks) +{ + xfs_extlen_t refcbt_blocks = 0; + xfs_filblks_t blocks; + int error; + + if (!sc->sa.rmap_cur) + return; + + /* Check that we saw as many refcbt blocks as the rmap knows about. */ + error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks); + if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error)) + return; + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, + &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (blocks != refcbt_blocks) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); + + /* Check that we saw as many cow blocks as the rmap knows about. */ + xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW); + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, + &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (blocks != cow_blocks) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); +} + /* Scrub the refcount btree for some AG. */ int xfs_scrub_refcountbt( struct xfs_scrub_context *sc) { struct xfs_owner_info oinfo; + xfs_agblock_t cow_blocks = 0; + int error; xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC); - return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, - &oinfo, NULL); + error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, + &oinfo, &cow_blocks); + if (error) + return error; + + xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks); + + return 0; +} + +/* xref check that a cow staging extent is marked in the refcountbt. */ +void +xfs_scrub_xref_is_cow_staging( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + struct xfs_refcount_irec rc; + bool has_cowflag; + int has_refcount; + int error; + + if (!sc->sa.refc_cur) + return; + + /* Find the CoW staging extent. */ + error = xfs_refcount_lookup_le(sc->sa.refc_cur, + agbno + XFS_REFC_COW_START, &has_refcount); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (!has_refcount) { + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + return; + } + + error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (!has_refcount) { + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + return; + } + + /* CoW flag must be set, refcount must be 1. */ + has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START); + if (!has_cowflag || rc.rc_refcount != 1) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + + /* Must be at least as long as what was passed in */ + if (rc.rc_blockcount < len) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); +} + +/* + * xref check that the extent is not shared. Only file data blocks + * can have multiple owners. + */ +void +xfs_scrub_xref_is_not_shared( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + bool shared; + int error; + + if (!sc->sa.refc_cur) + return; + + error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (shared) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); } diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index 97846c424690..8f2a7c3ff455 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -32,6 +32,7 @@ #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_rmap.h" +#include "xfs_refcount.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -51,6 +52,61 @@ xfs_scrub_setup_ag_rmapbt( /* Reverse-mapping scrubber. */ +/* Cross-reference a rmap against the refcount btree. */ +STATIC void +xfs_scrub_rmapbt_xref_refc( + struct xfs_scrub_context *sc, + struct xfs_rmap_irec *irec) +{ + xfs_agblock_t fbno; + xfs_extlen_t flen; + bool non_inode; + bool is_bmbt; + bool is_attr; + bool is_unwritten; + int error; + + if (!sc->sa.refc_cur) + return; + + non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner); + is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK; + is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK; + is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN; + + /* If this is shared, must be a data fork extent. */ + error = xfs_refcount_find_shared(sc->sa.refc_cur, irec->rm_startblock, + irec->rm_blockcount, &fbno, &flen, false); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (flen != 0 && (non_inode || is_attr || is_bmbt || is_unwritten)) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_rmapbt_xref( + struct xfs_scrub_context *sc, + struct xfs_rmap_irec *irec) +{ + xfs_agblock_t agbno = irec->rm_startblock; + xfs_extlen_t len = irec->rm_blockcount; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, len); + if (irec->rm_owner == XFS_RMAP_OWN_INODES) + xfs_scrub_xref_is_inode_chunk(sc, agbno, len); + else + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); + if (irec->rm_owner == XFS_RMAP_OWN_COW) + xfs_scrub_xref_is_cow_staging(sc, irec->rm_startblock, + irec->rm_blockcount); + else + xfs_scrub_rmapbt_xref_refc(sc, irec); +} + /* Scrub an rmapbt record. */ STATIC int xfs_scrub_rmapbt_rec( @@ -121,6 +177,8 @@ xfs_scrub_rmapbt_rec( irec.rm_owner > XFS_RMAP_OWN_FS) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); } + + xfs_scrub_rmapbt_xref(bs->sc, &irec); out: return error; } @@ -136,3 +194,68 @@ xfs_scrub_rmapbt( return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec, &oinfo, NULL); } + +/* xref check that the extent is owned by a given owner */ +static inline void +xfs_scrub_xref_check_owner( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo, + bool should_have_rmap) +{ + bool has_rmap; + int error; + + if (!sc->sa.rmap_cur) + return; + + error = xfs_rmap_record_exists(sc->sa.rmap_cur, bno, len, oinfo, + &has_rmap); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (has_rmap != should_have_rmap) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); +} + +/* xref check that the extent is owned by a given owner */ +void +xfs_scrub_xref_is_owned_by( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo) +{ + xfs_scrub_xref_check_owner(sc, bno, len, oinfo, true); +} + +/* xref check that the extent is not owned by a given owner */ +void +xfs_scrub_xref_is_not_owned_by( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo) +{ + xfs_scrub_xref_check_owner(sc, bno, len, oinfo, false); +} + +/* xref check that the extent has no reverse mapping at all */ +void +xfs_scrub_xref_has_no_owner( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len) +{ + bool has_rmap; + int error; + + if (!sc->sa.rmap_cur) + return; + + error = xfs_rmap_has_record(sc->sa.rmap_cur, bno, len, &has_rmap); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (has_rmap) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); +} diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index c6fedb698008..26390991369a 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -43,22 +43,14 @@ xfs_scrub_setup_rt( struct xfs_scrub_context *sc, struct xfs_inode *ip) { - struct xfs_mount *mp = sc->mp; - int error = 0; - - /* - * If userspace gave us an AG number or inode data, they don't - * know what they're doing. Get out. - */ - if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen) - return -EINVAL; + int error; error = xfs_scrub_setup_fs(sc, ip); if (error) return error; sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP; - sc->ip = mp->m_rbmip; + sc->ip = sc->mp->m_rbmip; xfs_ilock(sc->ip, sc->ilock_flags); return 0; @@ -106,3 +98,26 @@ xfs_scrub_rtsummary( /* XXX: implement this some day */ return -ENOENT; } + + +/* xref check that the extent is not free in the rtbitmap */ +void +xfs_scrub_xref_is_used_rt_space( + struct xfs_scrub_context *sc, + xfs_rtblock_t fsbno, + xfs_extlen_t len) +{ + bool is_free; + int error; + + xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); + error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, fsbno, len, + &is_free); + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) + goto out_unlock; + if (is_free) + xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino, + NULL); +out_unlock: + xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); +} diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index ab3aef2ae823..26c75967a072 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -110,6 +110,16 @@ * structure itself is corrupt, the CORRUPT flag will be set. If * the metadata is correct but otherwise suboptimal, the PREEN flag * will be set. + * + * We perform secondary validation of filesystem metadata by + * cross-referencing every record with all other available metadata. + * For example, for block mapping extents, we verify that there are no + * records in the free space and inode btrees corresponding to that + * space extent and that there is a corresponding entry in the reverse + * mapping btree. Inconsistent metadata is noted by setting the + * XCORRUPT flag; btree query function errors are noted by setting the + * XFAIL flag and deleting the cursor to prevent further attempts to + * cross-reference with a defective btree. */ /* @@ -128,8 +138,6 @@ xfs_scrub_probe( { int error = 0; - if (sc->sm->sm_ino || sc->sm->sm_agno) - return -EINVAL; if (xfs_scrub_should_terminate(sc, &error)) return error; @@ -151,7 +159,8 @@ xfs_scrub_teardown( sc->tp = NULL; } if (sc->ip) { - xfs_iunlock(sc->ip, sc->ilock_flags); + if (sc->ilock_flags) + xfs_iunlock(sc->ip, sc->ilock_flags); if (sc->ip != ip_in && !xfs_internal_inum(sc->mp, sc->ip->i_ino)) iput(VFS_I(sc->ip)); @@ -167,106 +176,130 @@ xfs_scrub_teardown( /* Scrubbing dispatch. */ static const struct xfs_scrub_meta_ops meta_scrub_ops[] = { - { /* ioctl presence test */ + [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */ + .type = ST_NONE, .setup = xfs_scrub_setup_fs, .scrub = xfs_scrub_probe, }, - { /* superblock */ - .setup = xfs_scrub_setup_ag_header, + [XFS_SCRUB_TYPE_SB] = { /* superblock */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_fs, .scrub = xfs_scrub_superblock, }, - { /* agf */ - .setup = xfs_scrub_setup_ag_header, + [XFS_SCRUB_TYPE_AGF] = { /* agf */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_fs, .scrub = xfs_scrub_agf, }, - { /* agfl */ - .setup = xfs_scrub_setup_ag_header, + [XFS_SCRUB_TYPE_AGFL]= { /* agfl */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_fs, .scrub = xfs_scrub_agfl, }, - { /* agi */ - .setup = xfs_scrub_setup_ag_header, + [XFS_SCRUB_TYPE_AGI] = { /* agi */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_fs, .scrub = xfs_scrub_agi, }, - { /* bnobt */ + [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */ + .type = ST_PERAG, .setup = xfs_scrub_setup_ag_allocbt, .scrub = xfs_scrub_bnobt, }, - { /* cntbt */ + [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */ + .type = ST_PERAG, .setup = xfs_scrub_setup_ag_allocbt, .scrub = xfs_scrub_cntbt, }, - { /* inobt */ + [XFS_SCRUB_TYPE_INOBT] = { /* inobt */ + .type = ST_PERAG, .setup = xfs_scrub_setup_ag_iallocbt, .scrub = xfs_scrub_inobt, }, - { /* finobt */ + [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */ + .type = ST_PERAG, .setup = xfs_scrub_setup_ag_iallocbt, .scrub = xfs_scrub_finobt, .has = xfs_sb_version_hasfinobt, }, - { /* rmapbt */ + [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */ + .type = ST_PERAG, .setup = xfs_scrub_setup_ag_rmapbt, .scrub = xfs_scrub_rmapbt, .has = xfs_sb_version_hasrmapbt, }, - { /* refcountbt */ + [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */ + .type = ST_PERAG, .setup = xfs_scrub_setup_ag_refcountbt, .scrub = xfs_scrub_refcountbt, .has = xfs_sb_version_hasreflink, }, - { /* inode record */ + [XFS_SCRUB_TYPE_INODE] = { /* inode record */ + .type = ST_INODE, .setup = xfs_scrub_setup_inode, .scrub = xfs_scrub_inode, }, - { /* inode data fork */ + [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */ + .type = ST_INODE, .setup = xfs_scrub_setup_inode_bmap, .scrub = xfs_scrub_bmap_data, }, - { /* inode attr fork */ + [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */ + .type = ST_INODE, .setup = xfs_scrub_setup_inode_bmap, .scrub = xfs_scrub_bmap_attr, }, - { /* inode CoW fork */ + [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */ + .type = ST_INODE, .setup = xfs_scrub_setup_inode_bmap, .scrub = xfs_scrub_bmap_cow, }, - { /* directory */ + [XFS_SCRUB_TYPE_DIR] = { /* directory */ + .type = ST_INODE, .setup = xfs_scrub_setup_directory, .scrub = xfs_scrub_directory, }, - { /* extended attributes */ + [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */ + .type = ST_INODE, .setup = xfs_scrub_setup_xattr, .scrub = xfs_scrub_xattr, }, - { /* symbolic link */ + [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ + .type = ST_INODE, .setup = xfs_scrub_setup_symlink, .scrub = xfs_scrub_symlink, }, - { /* parent pointers */ + [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */ + .type = ST_INODE, .setup = xfs_scrub_setup_parent, .scrub = xfs_scrub_parent, }, - { /* realtime bitmap */ + [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */ + .type = ST_FS, .setup = xfs_scrub_setup_rt, .scrub = xfs_scrub_rtbitmap, .has = xfs_sb_version_hasrealtime, }, - { /* realtime summary */ + [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ + .type = ST_FS, .setup = xfs_scrub_setup_rt, .scrub = xfs_scrub_rtsummary, .has = xfs_sb_version_hasrealtime, }, - { /* user quota */ - .setup = xfs_scrub_setup_quota, - .scrub = xfs_scrub_quota, + [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ + .type = ST_FS, + .setup = xfs_scrub_setup_quota, + .scrub = xfs_scrub_quota, }, - { /* group quota */ - .setup = xfs_scrub_setup_quota, - .scrub = xfs_scrub_quota, + [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */ + .type = ST_FS, + .setup = xfs_scrub_setup_quota, + .scrub = xfs_scrub_quota, }, - { /* project quota */ - .setup = xfs_scrub_setup_quota, - .scrub = xfs_scrub_quota, + [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */ + .type = ST_FS, + .setup = xfs_scrub_setup_quota, + .scrub = xfs_scrub_quota, }, }; @@ -284,44 +317,56 @@ xfs_scrub_experimental_warning( "EXPERIMENTAL online scrub feature in use. Use at your own risk!"); } -/* Dispatch metadata scrubbing. */ -int -xfs_scrub_metadata( - struct xfs_inode *ip, +static int +xfs_scrub_validate_inputs( + struct xfs_mount *mp, struct xfs_scrub_metadata *sm) { - struct xfs_scrub_context sc; - struct xfs_mount *mp = ip->i_mount; + int error; const struct xfs_scrub_meta_ops *ops; - bool try_harder = false; - int error = 0; - - trace_xfs_scrub_start(ip, sm, error); - - /* Forbidden if we are shut down or mounted norecovery. */ - error = -ESHUTDOWN; - if (XFS_FORCED_SHUTDOWN(mp)) - goto out; - error = -ENOTRECOVERABLE; - if (mp->m_flags & XFS_MOUNT_NORECOVERY) - goto out; - /* Check our inputs. */ error = -EINVAL; + /* Check our inputs. */ sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) goto out; + /* sm_reserved[] must be zero */ if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) goto out; - /* Do we know about this type of metadata? */ error = -ENOENT; + /* Do we know about this type of metadata? */ if (sm->sm_type >= XFS_SCRUB_TYPE_NR) goto out; ops = &meta_scrub_ops[sm->sm_type]; - if (ops->scrub == NULL) + if (ops->setup == NULL || ops->scrub == NULL) goto out; + /* Does this fs even support this type of metadata? */ + if (ops->has && !ops->has(&mp->m_sb)) + goto out; + + error = -EINVAL; + /* restricting fields must be appropriate for type */ + switch (ops->type) { + case ST_NONE: + case ST_FS: + if (sm->sm_ino || sm->sm_gen || sm->sm_agno) + goto out; + break; + case ST_PERAG: + if (sm->sm_ino || sm->sm_gen || + sm->sm_agno >= mp->m_sb.sb_agcount) + goto out; + break; + case ST_INODE: + if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino)) + goto out; + break; + default: + goto out; + } + error = -EOPNOTSUPP; /* * We won't scrub any filesystem that doesn't have the ability * to record unwritten extents. The option was made default in @@ -331,20 +376,46 @@ xfs_scrub_metadata( * We also don't support v1-v3 filesystems, which aren't * mountable. */ - error = -EOPNOTSUPP; if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) goto out; - /* Does this fs even support this type of metadata? */ - error = -ENOENT; - if (ops->has && !ops->has(&mp->m_sb)) - goto out; - /* We don't know how to repair anything yet. */ - error = -EOPNOTSUPP; if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) goto out; + error = 0; +out: + return error; +} + +/* Dispatch metadata scrubbing. */ +int +xfs_scrub_metadata( + struct xfs_inode *ip, + struct xfs_scrub_metadata *sm) +{ + struct xfs_scrub_context sc; + struct xfs_mount *mp = ip->i_mount; + bool try_harder = false; + int error = 0; + + BUILD_BUG_ON(sizeof(meta_scrub_ops) != + (sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR)); + + trace_xfs_scrub_start(ip, sm, error); + + /* Forbidden if we are shut down or mounted norecovery. */ + error = -ESHUTDOWN; + if (XFS_FORCED_SHUTDOWN(mp)) + goto out; + error = -ENOTRECOVERABLE; + if (mp->m_flags & XFS_MOUNT_NORECOVERY) + goto out; + + error = xfs_scrub_validate_inputs(mp, sm); + if (error) + goto out; + xfs_scrub_experimental_warning(mp); retry_op: @@ -352,7 +423,7 @@ retry_op: memset(&sc, 0, sizeof(sc)); sc.mp = ip->i_mount; sc.sm = sm; - sc.ops = ops; + sc.ops = &meta_scrub_ops[sm->sm_type]; sc.try_harder = try_harder; sc.sa.agno = NULLAGNUMBER; error = sc.ops->setup(&sc, ip); diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index e9ec041cf713..0d92af86f67a 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -22,6 +22,14 @@ struct xfs_scrub_context; +/* Type info and names for the scrub types. */ +enum xfs_scrub_type { + ST_NONE = 1, /* disabled */ + ST_PERAG, /* per-AG metadata */ + ST_FS, /* per-FS metadata */ + ST_INODE, /* per-inode metadata */ +}; + struct xfs_scrub_meta_ops { /* Acquire whatever resources are needed for the operation. */ int (*setup)(struct xfs_scrub_context *, @@ -32,6 +40,9 @@ struct xfs_scrub_meta_ops { /* Decide if we even have this piece of metadata. */ bool (*has)(struct xfs_sb *); + + /* type describing required/allowed inputs */ + enum xfs_scrub_type type; }; /* Buffer pointers and btree cursors for an entire AG. */ @@ -112,4 +123,30 @@ xfs_scrub_quota(struct xfs_scrub_context *sc) } #endif +/* cross-referencing helpers */ +void xfs_scrub_xref_is_used_space(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len); +void xfs_scrub_xref_is_not_inode_chunk(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len); +void xfs_scrub_xref_is_inode_chunk(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len); +void xfs_scrub_xref_is_owned_by(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len, + struct xfs_owner_info *oinfo); +void xfs_scrub_xref_is_not_owned_by(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len, + struct xfs_owner_info *oinfo); +void xfs_scrub_xref_has_no_owner(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len); +void xfs_scrub_xref_is_cow_staging(struct xfs_scrub_context *sc, + xfs_agblock_t bno, xfs_extlen_t len); +void xfs_scrub_xref_is_not_shared(struct xfs_scrub_context *sc, + xfs_agblock_t bno, xfs_extlen_t len); +#ifdef CONFIG_XFS_RT +void xfs_scrub_xref_is_used_rt_space(struct xfs_scrub_context *sc, + xfs_rtblock_t rtbno, xfs_extlen_t len); +#else +# define xfs_scrub_xref_is_used_rt_space(sc, rtbno, len) do { } while (0) +#endif + #endif /* __XFS_SCRUB_SCRUB_H__ */ diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index c4ebfb5c1ee8..4dc896852bf0 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -50,7 +50,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_class, __entry->flags = sm->sm_flags; __entry->error = error; ), - TP_printk("dev %d:%d ino %llu type %u agno %u inum %llu gen %u flags 0x%x error %d", + TP_printk("dev %d:%d ino 0x%llx type %u agno %u inum %llu gen %u flags 0x%x error %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->type, @@ -90,7 +90,7 @@ TRACE_EVENT(xfs_scrub_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pF", + TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->agno, @@ -121,7 +121,7 @@ TRACE_EVENT(xfs_scrub_file_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu error %d ret_ip %pF", + TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->whichfork, @@ -156,7 +156,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_block_error_class, __entry->bno = bno; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pF", + TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->agno, @@ -207,7 +207,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class, __entry->bno = bno; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu type %u agno %u agbno %u ret_ip %pF", + TP_printk("dev %d:%d ino 0x%llx type %u agno %u agbno %u ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->type, @@ -246,7 +246,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class, __entry->offset = offset; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu ret_ip %pF", + TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->whichfork, @@ -277,7 +277,7 @@ TRACE_EVENT(xfs_scrub_incomplete, __entry->type = sc->sm->sm_type; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %u ret_ip %pF", + TP_printk("dev %d:%d type %u ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->ret_ip) @@ -311,7 +311,7 @@ TRACE_EVENT(xfs_scrub_btree_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF", + TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->btnum, @@ -354,7 +354,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF", + TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->whichfork, @@ -393,7 +393,7 @@ TRACE_EVENT(xfs_scrub_btree_error, __entry->ptr = cur->bc_ptrs[level]; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF", + TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->btnum, @@ -433,7 +433,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_error, __entry->ptr = cur->bc_ptrs[level]; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF", + TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->whichfork, @@ -491,6 +491,28 @@ DEFINE_EVENT(xfs_scrub_sbtree_class, name, \ DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec); DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key); +TRACE_EVENT(xfs_scrub_xref_error, + TP_PROTO(struct xfs_scrub_context *sc, int error, void *ret_ip), + TP_ARGS(sc, error, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(int, error) + __field(void *, ret_ip) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->type = sc->sm->sm_type; + __entry->error = error; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d type %u xref error %d ret_ip %pF", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->error, + __entry->ret_ip) +); + #endif /* _TRACE_XFS_SCRUB_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4fc526a27a94..9c6a830da0ee 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -390,6 +390,19 @@ xfs_map_blocks( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; + /* + * Truncate can race with writeback since writeback doesn't take the + * iolock and truncate decreases the file size before it starts + * truncating the pages between new_size and old_size. Therefore, we + * can end up in the situation where writeback gets a CoW fork mapping + * but the truncate makes the mapping invalid and we end up in here + * trying to get a new mapping. Bail out here so that we simply never + * get a valid mapping and so we drop the write altogether. The page + * truncation will kill the contents anyway. + */ + if (type == XFS_IO_COW && offset > i_size_read(inode)) + return 0; + ASSERT(type != XFS_IO_COW); if (type == XFS_IO_UNWRITTEN) bmapi_flags |= XFS_BMAPI_IGSTATE; @@ -791,7 +804,7 @@ xfs_aops_discard_page( goto out_invalidate; xfs_alert(ip->i_mount, - "page discard on page %p, inode 0x%llx, offset %llu.", + "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.", page, ip->i_ino, offset); xfs_ilock(ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 6d37ab43195f..c83f549dc17b 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1872,7 +1872,7 @@ xfs_swap_extents( */ lock_two_nondirectories(VFS_I(ip), VFS_I(tip)); lock_flags = XFS_MMAPLOCK_EXCL; - xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); + xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL); /* Verify that both files have the same format */ if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { @@ -1919,7 +1919,7 @@ xfs_swap_extents( * Lock and join the inodes to the tansaction so that transaction commit * or cancel will unlock the inodes from this point onwards. */ - xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL); lock_flags |= XFS_ILOCK_EXCL; xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, tip, 0); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4c6e86d861fd..d1da2ee9e6db 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -236,6 +236,7 @@ _xfs_buf_alloc( init_completion(&bp->b_iowait); INIT_LIST_HEAD(&bp->b_lru); INIT_LIST_HEAD(&bp->b_list); + INIT_LIST_HEAD(&bp->b_li_list); sema_init(&bp->b_sema, 0); /* held, no waiters */ spin_lock_init(&bp->b_lock); XB_SET_OWNER(bp); @@ -585,7 +586,7 @@ _xfs_buf_find( * returning a specific error on buffer lookup failures. */ xfs_alert(btp->bt_mount, - "%s: Block out of range: block 0x%llx, EOFS 0x%llx ", + "%s: daddr 0x%llx out of range, EOFS 0x%llx", __func__, cmap.bm_bn, eofs); WARN_ON(1); return NULL; @@ -1180,13 +1181,14 @@ xfs_buf_ioend_async( } void -xfs_buf_ioerror( +__xfs_buf_ioerror( xfs_buf_t *bp, - int error) + int error, + xfs_failaddr_t failaddr) { ASSERT(error <= 0 && error >= -1000); bp->b_error = error; - trace_xfs_buf_ioerror(bp, error, _RET_IP_); + trace_xfs_buf_ioerror(bp, error, failaddr); } void @@ -1195,8 +1197,9 @@ xfs_buf_ioerror_alert( const char *func) { xfs_alert(bp->b_target->bt_mount, -"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d", - (uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); +"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d", + func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, + -bp->b_error); } int @@ -1378,9 +1381,10 @@ _xfs_buf_ioapply( */ if (xfs_sb_version_hascrc(&mp->m_sb)) { xfs_warn(mp, - "%s: no ops on block 0x%llx/0x%x", + "%s: no buf ops on daddr 0x%llx len %d", __func__, bp->b_bn, bp->b_length); - xfs_hex_dump(bp->b_addr, 64); + xfs_hex_dump(bp->b_addr, + XFS_CORRUPTION_DUMP_LEN); dump_stack(); } } @@ -1671,7 +1675,7 @@ xfs_wait_buftarg( list_del_init(&bp->b_lru); if (bp->b_flags & XBF_WRITE_FAIL) { xfs_alert(btp->bt_mount, -"Corruption Alert: Buffer at block 0x%llx had permanent write failures!", +"Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!", (long long)bp->b_bn); xfs_alert(btp->bt_mount, "Please run xfs_repair to determine the extent of the problem."); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index f873bb786824..2f4c91452861 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -140,6 +140,7 @@ struct xfs_buf_ops { char *name; void (*verify_read)(struct xfs_buf *); void (*verify_write)(struct xfs_buf *); + xfs_failaddr_t (*verify_struct)(struct xfs_buf *bp); }; typedef struct xfs_buf { @@ -175,7 +176,8 @@ typedef struct xfs_buf { struct workqueue_struct *b_ioend_wq; /* I/O completion wq */ xfs_buf_iodone_t b_iodone; /* I/O completion function */ struct completion b_iowait; /* queue for I/O waiters */ - void *b_fspriv; + void *b_log_item; + struct list_head b_li_list; /* Log items list head */ struct xfs_trans *b_transp; struct page **b_pages; /* array of page pointers */ struct page *b_page_array[XB_PAGES]; /* inline pages */ @@ -315,7 +317,9 @@ extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ extern int xfs_bwrite(struct xfs_buf *bp); extern void xfs_buf_ioend(struct xfs_buf *bp); -extern void xfs_buf_ioerror(xfs_buf_t *, int); +extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error, + xfs_failaddr_t failaddr); +#define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address) extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); extern void xfs_buf_submit(struct xfs_buf *bp); extern int xfs_buf_submit_wait(struct xfs_buf *bp); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index e0a0af0946f2..270ddb4d2313 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -61,14 +61,14 @@ xfs_buf_log_format_size( */ STATIC void xfs_buf_item_size_segment( - struct xfs_buf_log_item *bip, - struct xfs_buf_log_format *blfp, - int *nvecs, - int *nbytes) + struct xfs_buf_log_item *bip, + struct xfs_buf_log_format *blfp, + int *nvecs, + int *nbytes) { - struct xfs_buf *bp = bip->bli_buf; - int next_bit; - int last_bit; + struct xfs_buf *bp = bip->bli_buf; + int next_bit; + int last_bit; last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); if (last_bit == -1) @@ -218,12 +218,12 @@ xfs_buf_item_format_segment( uint offset, struct xfs_buf_log_format *blfp) { - struct xfs_buf *bp = bip->bli_buf; - uint base_size; - int first_bit; - int last_bit; - int next_bit; - uint nbits; + struct xfs_buf *bp = bip->bli_buf; + uint base_size; + int first_bit; + int last_bit; + int next_bit; + uint nbits; /* copy the flags across from the base format item */ blfp->blf_flags = bip->__bli_format.blf_flags; @@ -406,12 +406,12 @@ xfs_buf_item_unpin( int remove) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); - xfs_buf_t *bp = bip->bli_buf; - struct xfs_ail *ailp = lip->li_ailp; - int stale = bip->bli_flags & XFS_BLI_STALE; - int freed; + xfs_buf_t *bp = bip->bli_buf; + struct xfs_ail *ailp = lip->li_ailp; + int stale = bip->bli_flags & XFS_BLI_STALE; + int freed; - ASSERT(bp->b_fspriv == bip); + ASSERT(bp->b_log_item == bip); ASSERT(atomic_read(&bip->bli_refcount) > 0); trace_xfs_buf_item_unpin(bip); @@ -456,13 +456,14 @@ xfs_buf_item_unpin( */ if (bip->bli_flags & XFS_BLI_STALE_INODE) { xfs_buf_do_callbacks(bp); - bp->b_fspriv = NULL; + bp->b_log_item = NULL; + list_del_init(&bp->b_li_list); bp->b_iodone = NULL; } else { spin_lock(&ailp->xa_lock); xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR); xfs_buf_item_relse(bp); - ASSERT(bp->b_fspriv == NULL); + ASSERT(bp->b_log_item == NULL); } xfs_buf_relse(bp); } else if (freed && remove) { @@ -722,18 +723,15 @@ xfs_buf_item_free_format( /* * Allocate a new buf log item to go with the given buffer. - * Set the buffer's b_fsprivate field to point to the new - * buf log item. If there are other item's attached to the - * buffer (see xfs_buf_attach_iodone() below), then put the - * buf log item at the front. + * Set the buffer's b_log_item field to point to the new + * buf log item. */ int xfs_buf_item_init( struct xfs_buf *bp, struct xfs_mount *mp) { - struct xfs_log_item *lip = bp->b_fspriv; - struct xfs_buf_log_item *bip; + struct xfs_buf_log_item *bip = bp->b_log_item; int chunks; int map_size; int error; @@ -741,13 +739,14 @@ xfs_buf_item_init( /* * Check to see if there is already a buf log item for - * this buffer. If there is, it is guaranteed to be - * the first. If we do already have one, there is + * this buffer. If we do already have one, there is * nothing to do here so return. */ ASSERT(bp->b_target->bt_mount == mp); - if (lip != NULL && lip->li_type == XFS_LI_BUF) + if (bip != NULL) { + ASSERT(bip->bli_item.li_type == XFS_LI_BUF); return 0; + } bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP); xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); @@ -781,13 +780,7 @@ xfs_buf_item_init( bip->bli_formats[i].blf_map_size = map_size; } - /* - * Put the buf item into the list of items attached to the - * buffer at the front. - */ - if (bp->b_fspriv) - bip->bli_item.li_bio_list = bp->b_fspriv; - bp->b_fspriv = bip; + bp->b_log_item = bip; xfs_buf_hold(bp); return 0; } @@ -880,7 +873,7 @@ xfs_buf_item_log_segment( */ void xfs_buf_item_log( - xfs_buf_log_item_t *bip, + struct xfs_buf_log_item *bip, uint first, uint last) { @@ -943,7 +936,7 @@ xfs_buf_item_dirty_format( STATIC void xfs_buf_item_free( - xfs_buf_log_item_t *bip) + struct xfs_buf_log_item *bip) { xfs_buf_item_free_format(bip); kmem_free(bip->bli_item.li_lv_shadow); @@ -961,13 +954,13 @@ void xfs_buf_item_relse( xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; trace_xfs_buf_item_relse(bp, _RET_IP_); ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); - bp->b_fspriv = bip->bli_item.li_bio_list; - if (bp->b_fspriv == NULL) + bp->b_log_item = NULL; + if (list_empty(&bp->b_li_list)) bp->b_iodone = NULL; xfs_buf_rele(bp); @@ -980,9 +973,7 @@ xfs_buf_item_relse( * to be called when the buffer's I/O completes. If it is not set * already, set the buffer's b_iodone() routine to be * xfs_buf_iodone_callbacks() and link the log item into the list of - * items rooted at b_fsprivate. Items are always added as the second - * entry in the list if there is a first, because the buf item code - * assumes that the buf log item is first. + * items rooted at b_li_list. */ void xfs_buf_attach_iodone( @@ -990,18 +981,10 @@ xfs_buf_attach_iodone( void (*cb)(xfs_buf_t *, xfs_log_item_t *), xfs_log_item_t *lip) { - xfs_log_item_t *head_lip; - ASSERT(xfs_buf_islocked(bp)); lip->li_cb = cb; - head_lip = bp->b_fspriv; - if (head_lip) { - lip->li_bio_list = head_lip->li_bio_list; - head_lip->li_bio_list = lip; - } else { - bp->b_fspriv = lip; - } + list_add_tail(&lip->li_bio_list, &bp->b_li_list); ASSERT(bp->b_iodone == NULL || bp->b_iodone == xfs_buf_iodone_callbacks); @@ -1011,12 +994,12 @@ xfs_buf_attach_iodone( /* * We can have many callbacks on a buffer. Running the callbacks individually * can cause a lot of contention on the AIL lock, so we allow for a single - * callback to be able to scan the remaining lip->li_bio_list for other items - * of the same type and callback to be processed in the first call. + * callback to be able to scan the remaining items in bp->b_li_list for other + * items of the same type and callback to be processed in the first call. * * As a result, the loop walking the callback list below will also modify the * list. it removes the first item from the list and then runs the callback. - * The loop then restarts from the new head of the list. This allows the + * The loop then restarts from the new first item int the list. This allows the * callback to scan and modify the list attached to the buffer and we don't * have to care about maintaining a next item pointer. */ @@ -1024,18 +1007,26 @@ STATIC void xfs_buf_do_callbacks( struct xfs_buf *bp) { + struct xfs_buf_log_item *blip = bp->b_log_item; struct xfs_log_item *lip; - while ((lip = bp->b_fspriv) != NULL) { - bp->b_fspriv = lip->li_bio_list; - ASSERT(lip->li_cb != NULL); + /* If there is a buf_log_item attached, run its callback */ + if (blip) { + lip = &blip->bli_item; + lip->li_cb(bp, lip); + } + + while (!list_empty(&bp->b_li_list)) { + lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, + li_bio_list); + /* - * Clear the next pointer so we don't have any + * Remove the item from the list, so we don't have any * confusion if the item is added to another buf. * Don't touch the log item after calling its * callback, because it could have freed itself. */ - lip->li_bio_list = NULL; + list_del_init(&lip->li_bio_list); lip->li_cb(bp, lip); } } @@ -1052,13 +1043,22 @@ STATIC void xfs_buf_do_callbacks_fail( struct xfs_buf *bp) { - struct xfs_log_item *next; - struct xfs_log_item *lip = bp->b_fspriv; - struct xfs_ail *ailp = lip->li_ailp; + struct xfs_log_item *lip; + struct xfs_ail *ailp; + /* + * Buffer log item errors are handled directly by xfs_buf_item_push() + * and xfs_buf_iodone_callback_error, and they have no IO error + * callbacks. Check only for items in b_li_list. + */ + if (list_empty(&bp->b_li_list)) + return; + + lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, + li_bio_list); + ailp = lip->li_ailp; spin_lock(&ailp->xa_lock); - for (; lip; lip = next) { - next = lip->li_bio_list; + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { if (lip->li_ops->iop_error) lip->li_ops->iop_error(lip, bp); } @@ -1069,13 +1069,23 @@ static bool xfs_buf_iodone_callback_error( struct xfs_buf *bp) { - struct xfs_log_item *lip = bp->b_fspriv; - struct xfs_mount *mp = lip->li_mountp; + struct xfs_buf_log_item *bip = bp->b_log_item; + struct xfs_log_item *lip; + struct xfs_mount *mp; static ulong lasttime; static xfs_buftarg_t *lasttarg; struct xfs_error_cfg *cfg; /* + * The failed buffer might not have a buf_log_item attached or the + * log_item list might be empty. Get the mp from the available + * xfs_log_item + */ + lip = list_first_entry_or_null(&bp->b_li_list, struct xfs_log_item, + li_bio_list); + mp = lip ? lip->li_mountp : bip->bli_item.li_mountp; + + /* * If we've already decided to shutdown the filesystem because of * I/O errors, there's no point in giving this a retry. */ @@ -1183,7 +1193,8 @@ xfs_buf_iodone_callbacks( bp->b_first_retry_time = 0; xfs_buf_do_callbacks(bp); - bp->b_fspriv = NULL; + bp->b_log_item = NULL; + list_del_init(&bp->b_li_list); bp->b_iodone = NULL; xfs_buf_ioend(bp); } @@ -1228,10 +1239,9 @@ xfs_buf_iodone( bool xfs_buf_resubmit_failed_buffers( struct xfs_buf *bp, - struct xfs_log_item *lip, struct list_head *buffer_list) { - struct xfs_log_item *next; + struct xfs_log_item *lip; /* * Clear XFS_LI_FAILED flag from all items before resubmit @@ -1239,10 +1249,8 @@ xfs_buf_resubmit_failed_buffers( * XFS_LI_FAILED set/clear is protected by xa_lock, caller this * function already have it acquired */ - for (; lip; lip = next) { - next = lip->li_bio_list; + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) xfs_clear_li_failed(lip); - } /* Add this buffer back to the delayed write list */ return xfs_buf_delwri_queue(bp, buffer_list); diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 9690ce62c9a7..643f53dcfe51 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -50,7 +50,7 @@ struct xfs_buf_log_item; * needed to log buffers. It tracks how many times the lock has been * locked, and which 128 byte chunks of the buffer are dirty. */ -typedef struct xfs_buf_log_item { +struct xfs_buf_log_item { xfs_log_item_t bli_item; /* common item structure */ struct xfs_buf *bli_buf; /* real buffer pointer */ unsigned int bli_flags; /* misc flags */ @@ -59,11 +59,11 @@ typedef struct xfs_buf_log_item { int bli_format_count; /* count of headers */ struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */ struct xfs_buf_log_format __bli_format; /* embedded in-log header */ -} xfs_buf_log_item_t; +}; int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); void xfs_buf_item_relse(struct xfs_buf *); -void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); +void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); void xfs_buf_attach_iodone(struct xfs_buf *, void(*)(struct xfs_buf *, xfs_log_item_t *), @@ -71,7 +71,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *, void xfs_buf_iodone_callbacks(struct xfs_buf *); void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, - struct xfs_log_item *, struct list_head *); extern kmem_zone_t *xfs_buf_item_zone; diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 0c58918bc0ad..b6ae3597bfb0 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -152,7 +152,6 @@ xfs_dir2_block_getdents( struct xfs_inode *dp = args->dp; /* incore directory inode */ xfs_dir2_data_hdr_t *hdr; /* block header */ struct xfs_buf *bp; /* buffer for block */ - xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_dir2_data_unused_t *dup; /* block unused entry */ char *endptr; /* end of the data entries */ @@ -185,9 +184,8 @@ xfs_dir2_block_getdents( /* * Set up values for the loop. */ - btp = xfs_dir2_block_tail_p(geo, hdr); ptr = (char *)dp->d_ops->data_entry_p(hdr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); + endptr = xfs_dir3_data_endp(geo, hdr); /* * Loop over the data portion of the block. diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index f248708c10ff..43572f8a1b8e 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -399,52 +399,6 @@ error0: return error; } -STATIC int -xfs_qm_dqrepair( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct xfs_dquot *dqp, - xfs_dqid_t firstid, - struct xfs_buf **bpp) -{ - int error; - struct xfs_disk_dquot *ddq; - struct xfs_dqblk *d; - int i; - - /* - * Read the buffer without verification so we get the corrupted - * buffer returned to us. make sure we verify it on write, though. - */ - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, - 0, bpp, NULL); - - if (error) { - ASSERT(*bpp == NULL); - return error; - } - (*bpp)->b_ops = &xfs_dquot_buf_ops; - - ASSERT(xfs_buf_islocked(*bpp)); - d = (struct xfs_dqblk *)(*bpp)->b_addr; - - /* Do the actual repair of dquots in this buffer */ - for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { - ddq = &d[i].dd_diskdq; - error = xfs_dqcheck(mp, ddq, firstid + i, - dqp->dq_flags & XFS_DQ_ALLTYPES, - XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair"); - if (error) { - /* repair failed, we're screwed */ - xfs_trans_brelse(tp, *bpp); - return -EIO; - } - } - - return 0; -} - /* * Maps a dquot to the buffer containing its on-disk version. * This returns a ptr to the buffer containing the on-disk dquot @@ -526,14 +480,6 @@ xfs_qm_dqtobp( dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0, &bp, &xfs_dquot_buf_ops); - - if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) { - xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff * - mp->m_quotainfo->qi_dqperchunk; - ASSERT(bp == NULL); - error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp); - } - if (error) { ASSERT(bp == NULL); return error; @@ -1010,6 +956,7 @@ xfs_qm_dqflush( struct xfs_mount *mp = dqp->q_mount; struct xfs_buf *bp; struct xfs_disk_dquot *ddqp; + xfs_failaddr_t fa; int error; ASSERT(XFS_DQ_IS_LOCKED(dqp)); @@ -1056,9 +1003,10 @@ xfs_qm_dqflush( /* * A simple sanity check in case we got a corrupted dquot.. */ - error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, - XFS_QMOPT_DOWARN, "dqflush (incore copy)"); - if (error) { + fa = xfs_dquot_verify(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 0); + if (fa) { + xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", + be32_to_cpu(ddqp->d_id), fa); xfs_buf_relse(bp); xfs_dqfunlock(dqp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 664dea105e76..96eaa6933709 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -150,10 +150,7 @@ xfs_dquot_item_error( struct xfs_log_item *lip, struct xfs_buf *bp) { - struct xfs_dquot *dqp; - - dqp = DQUOT_ITEM(lip)->qli_dquot; - ASSERT(!completion_done(&dqp->q_flush)); + ASSERT(!completion_done(&DQUOT_ITEM(lip)->qli_dquot->q_flush)); xfs_set_li_failed(lip, bp); } @@ -179,7 +176,7 @@ xfs_qm_dquot_logitem_push( if (!xfs_buf_trylock(bp)) return XFS_ITEM_LOCKED; - if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) + if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_unlock(bp); @@ -212,7 +209,7 @@ xfs_qm_dquot_logitem_push( error = xfs_qm_dqflush(dqp, &bp); if (error) { - xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", + xfs_warn(dqp->q_mount, "%s: push error %d on dqp "PTR_FMT, __func__, error, dqp); } else { if (!xfs_buf_delwri_queue(bp, buffer_list)) diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 4c9f35d983b2..ccf520f0b00d 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -24,6 +24,7 @@ #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_sysfs.h" +#include "xfs_inode.h" #ifdef DEBUG @@ -314,12 +315,12 @@ xfs_error_report( struct xfs_mount *mp, const char *filename, int linenum, - void *ra) + xfs_failaddr_t failaddr) { if (level <= xfs_error_level) { xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, "Internal error %s at line %d of file %s. Caller %pS", - tag, linenum, filename, ra); + tag, linenum, filename, failaddr); xfs_stack_trace(); } @@ -333,11 +334,11 @@ xfs_corruption_error( void *p, const char *filename, int linenum, - void *ra) + xfs_failaddr_t failaddr) { if (level <= xfs_error_level) - xfs_hex_dump(p, 64); - xfs_error_report(tag, level, mp, filename, linenum, ra); + xfs_hex_dump(p, XFS_CORRUPTION_DUMP_LEN); + xfs_error_report(tag, level, mp, filename, linenum, failaddr); xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); } @@ -347,19 +348,62 @@ xfs_corruption_error( */ void xfs_verifier_error( - struct xfs_buf *bp) + struct xfs_buf *bp, + int error, + xfs_failaddr_t failaddr) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; + + fa = failaddr ? failaddr : __return_address; + __xfs_buf_ioerror(bp, error, fa); xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx", bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", - __return_address, bp->b_ops->name, bp->b_bn); + fa, bp->b_ops->name, bp->b_bn); xfs_alert(mp, "Unmount and run xfs_repair"); if (xfs_error_level >= XFS_ERRLEVEL_LOW) { - xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:"); - xfs_hex_dump(xfs_buf_offset(bp, 0), 64); + xfs_alert(mp, "First %d bytes of corrupted metadata buffer:", + XFS_CORRUPTION_DUMP_LEN); + xfs_hex_dump(xfs_buf_offset(bp, 0), XFS_CORRUPTION_DUMP_LEN); + } + + if (xfs_error_level >= XFS_ERRLEVEL_HIGH) + xfs_stack_trace(); +} + +/* + * Warnings for inode corruption problems. Don't bother with the stack + * trace unless the error level is turned up high. + */ +void +xfs_inode_verifier_error( + struct xfs_inode *ip, + int error, + const char *name, + void *buf, + size_t bufsz, + xfs_failaddr_t failaddr) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_failaddr_t fa; + int sz; + + fa = failaddr ? failaddr : __return_address; + + xfs_alert(mp, "Metadata %s detected at %pS, inode 0x%llx %s", + error == -EFSBADCRC ? "CRC error" : "corruption", + fa, ip->i_ino, name); + + xfs_alert(mp, "Unmount and run xfs_repair"); + + if (buf && xfs_error_level >= XFS_ERRLEVEL_LOW) { + sz = min_t(size_t, XFS_CORRUPTION_DUMP_LEN, bufsz); + xfs_alert(mp, "First %d bytes of corrupted metadata buffer:", + sz); + xfs_hex_dump(buf, sz); } if (xfs_error_level >= XFS_ERRLEVEL_HIGH) diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index ea816c1bf8db..7e728c5a46b8 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -21,11 +21,16 @@ struct xfs_mount; extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, - const char *filename, int linenum, void *ra); + const char *filename, int linenum, + xfs_failaddr_t failaddr); extern void xfs_corruption_error(const char *tag, int level, struct xfs_mount *mp, void *p, const char *filename, - int linenum, void *ra); -extern void xfs_verifier_error(struct xfs_buf *bp); + int linenum, xfs_failaddr_t failaddr); +extern void xfs_verifier_error(struct xfs_buf *bp, int error, + xfs_failaddr_t failaddr); +extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error, + const char *name, void *buf, size_t bufsz, + xfs_failaddr_t failaddr); #define XFS_ERROR_REPORT(e, lvl, mp) \ xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) @@ -37,6 +42,9 @@ extern void xfs_verifier_error(struct xfs_buf *bp); #define XFS_ERRLEVEL_LOW 1 #define XFS_ERRLEVEL_HIGH 5 +/* Dump 128 bytes of any corrupt buffer */ +#define XFS_CORRUPTION_DUMP_LEN (128) + /* * Macros to set EFSCORRUPTED & return/branch. */ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 60a2e128cb6a..8b4545623e25 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -49,83 +49,6 @@ * File system operations */ -int -xfs_fs_geometry( - xfs_mount_t *mp, - xfs_fsop_geom_t *geo, - int new_version) -{ - - memset(geo, 0, sizeof(*geo)); - - geo->blocksize = mp->m_sb.sb_blocksize; - geo->rtextsize = mp->m_sb.sb_rextsize; - geo->agblocks = mp->m_sb.sb_agblocks; - geo->agcount = mp->m_sb.sb_agcount; - geo->logblocks = mp->m_sb.sb_logblocks; - geo->sectsize = mp->m_sb.sb_sectsize; - geo->inodesize = mp->m_sb.sb_inodesize; - geo->imaxpct = mp->m_sb.sb_imax_pct; - geo->datablocks = mp->m_sb.sb_dblocks; - geo->rtblocks = mp->m_sb.sb_rblocks; - geo->rtextents = mp->m_sb.sb_rextents; - geo->logstart = mp->m_sb.sb_logstart; - ASSERT(sizeof(geo->uuid)==sizeof(mp->m_sb.sb_uuid)); - memcpy(geo->uuid, &mp->m_sb.sb_uuid, sizeof(mp->m_sb.sb_uuid)); - if (new_version >= 2) { - geo->sunit = mp->m_sb.sb_unit; - geo->swidth = mp->m_sb.sb_width; - } - if (new_version >= 3) { - geo->version = XFS_FSOP_GEOM_VERSION; - geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | - XFS_FSOP_GEOM_FLAGS_DIRV2 | - (xfs_sb_version_hasattr(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_ATTR : 0) | - (xfs_sb_version_hasquota(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_QUOTA : 0) | - (xfs_sb_version_hasalign(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_IALIGN : 0) | - (xfs_sb_version_hasdalign(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_DALIGN : 0) | - (xfs_sb_version_hasextflgbit(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) | - (xfs_sb_version_hassector(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | - (xfs_sb_version_hasasciici(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) | - (xfs_sb_version_haslazysbcount(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | - (xfs_sb_version_hasattr2(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) | - (xfs_sb_version_hasprojid32bit(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) | - (xfs_sb_version_hascrc(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_V5SB : 0) | - (xfs_sb_version_hasftype(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | - (xfs_sb_version_hasfinobt(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_FINOBT : 0) | - (xfs_sb_version_hassparseinodes(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_SPINODES : 0) | - (xfs_sb_version_hasrmapbt(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_RMAPBT : 0) | - (xfs_sb_version_hasreflink(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_REFLINK : 0); - geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? - mp->m_sb.sb_logsectsize : BBSIZE; - geo->rtsectsize = mp->m_sb.sb_blocksize; - geo->dirblocksize = mp->m_dir_geo->blksize; - } - if (new_version >= 4) { - geo->flags |= - (xfs_sb_version_haslogv2(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_LOGV2 : 0); - geo->logsunit = mp->m_sb.sb_logsunit; - } - return 0; -} - static struct xfs_buf * xfs_growfs_get_hdr_buf( struct xfs_mount *mp, @@ -955,7 +878,7 @@ xfs_do_force_shutdown( if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { xfs_notice(mp, - "%s(0x%x) called from line %d of file %s. Return address = 0x%p", + "%s(0x%x) called from line %d of file %s. Return address = "PTR_FMT, __func__, flags, lnnum, fname, __return_address); } /* diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index 2954c13a3acd..20484ed5e919 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h @@ -18,7 +18,6 @@ #ifndef __XFS_FSOPS_H__ #define __XFS_FSOPS_H__ -extern int xfs_fs_geometry(xfs_mount_t *mp, xfs_fsop_geom_t *geo, int nversion); extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in); extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in); extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 3bcb8fd2a826..d53a316162d6 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -296,6 +296,7 @@ xfs_reinit_inode( uint32_t generation = inode->i_generation; uint64_t version = inode_peek_iversion(inode); umode_t mode = inode->i_mode; + dev_t dev = inode->i_rdev; error = inode_init_always(mp->m_super, inode); @@ -303,6 +304,7 @@ xfs_reinit_inode( inode->i_generation = generation; inode_set_iversion_queried(inode, version); inode->i_mode = mode; + inode->i_rdev = dev; return error; } @@ -474,6 +476,11 @@ xfs_iget_cache_miss( if (error) goto out_destroy; + if (!xfs_inode_verify_forks(ip)) { + error = -EFSCORRUPTED; + goto out_destroy; + } + trace_xfs_iget_miss(ip); if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) { @@ -1651,28 +1658,15 @@ xfs_inode_clear_eofblocks_tag( } /* - * Automatic CoW Reservation Freeing - * - * These functions automatically garbage collect leftover CoW reservations - * that were made on behalf of a cowextsize hint when we start to run out - * of quota or when the reservations sit around for too long. If the file - * has dirty pages or is undergoing writeback, its CoW reservations will - * be retained. - * - * The actual garbage collection piggybacks off the same code that runs - * the speculative EOF preallocation garbage collector. + * Set ourselves up to free CoW blocks from this file. If it's already clean + * then we can bail out quickly, but otherwise we must back off if the file + * is undergoing some kind of write. */ -STATIC int -xfs_inode_free_cowblocks( +static bool +xfs_prep_free_cowblocks( struct xfs_inode *ip, - int flags, - void *args) + struct xfs_ifork *ifp) { - int ret; - struct xfs_eofblocks *eofb = args; - int match; - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - /* * Just clear the tag if we have an empty cow fork or none at all. It's * possible the inode was fully unshared since it was originally tagged. @@ -1680,7 +1674,7 @@ xfs_inode_free_cowblocks( if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { trace_xfs_inode_free_cowblocks_invalid(ip); xfs_inode_clear_cowblocks_tag(ip); - return 0; + return false; } /* @@ -1691,6 +1685,35 @@ xfs_inode_free_cowblocks( mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || atomic_read(&VFS_I(ip)->i_dio_count)) + return false; + + return true; +} + +/* + * Automatic CoW Reservation Freeing + * + * These functions automatically garbage collect leftover CoW reservations + * that were made on behalf of a cowextsize hint when we start to run out + * of quota or when the reservations sit around for too long. If the file + * has dirty pages or is undergoing writeback, its CoW reservations will + * be retained. + * + * The actual garbage collection piggybacks off the same code that runs + * the speculative EOF preallocation garbage collector. + */ +STATIC int +xfs_inode_free_cowblocks( + struct xfs_inode *ip, + int flags, + void *args) +{ + struct xfs_eofblocks *eofb = args; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + int match; + int ret = 0; + + if (!xfs_prep_free_cowblocks(ip, ifp)) return 0; if (eofb) { @@ -1711,7 +1734,12 @@ xfs_inode_free_cowblocks( xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); + /* + * Check again, nobody else should be able to dirty blocks or change + * the reflink iflag now that we have the first two locks held. + */ + if (xfs_prep_free_cowblocks(ip, ifp)) + ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 9f424e0aef1f..604ee384a00a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -547,23 +547,36 @@ again: /* * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - - * the iolock, the mmaplock or the ilock, but not more than one at a time. If we - * lock more than one at a time, lockdep will report false positives saying we - * have violated locking orders. + * the mmaplock or the ilock, but not more than one type at a time. If we lock + * more than one at a time, lockdep will report false positives saying we have + * violated locking orders. The iolock must be double-locked separately since + * we use i_rwsem for that. We now support taking one lock EXCL and the other + * SHARED. */ void xfs_lock_two_inodes( - xfs_inode_t *ip0, - xfs_inode_t *ip1, - uint lock_mode) + struct xfs_inode *ip0, + uint ip0_mode, + struct xfs_inode *ip1, + uint ip1_mode) { - xfs_inode_t *temp; + struct xfs_inode *temp; + uint mode_temp; int attempts = 0; xfs_log_item_t *lp; - ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); - if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) - ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(hweight32(ip0_mode) == 1); + ASSERT(hweight32(ip1_mode) == 1); + ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); + ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); + ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || + !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || + !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || + !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || + !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); ASSERT(ip0->i_ino != ip1->i_ino); @@ -571,10 +584,13 @@ xfs_lock_two_inodes( temp = ip0; ip0 = ip1; ip1 = temp; + mode_temp = ip0_mode; + ip0_mode = ip1_mode; + ip1_mode = mode_temp; } again: - xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); + xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0)); /* * If the first lock we have locked is in the AIL, we must TRY to get @@ -583,18 +599,17 @@ xfs_lock_two_inodes( */ lp = (xfs_log_item_t *)ip0->i_itemp; if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { - if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { - xfs_iunlock(ip0, lock_mode); + if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) { + xfs_iunlock(ip0, ip0_mode); if ((++attempts % 5) == 0) delay(1); /* Don't just spin the CPU */ goto again; } } else { - xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); + xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1)); } } - void __xfs_iflock( struct xfs_inode *ip) @@ -1422,7 +1437,7 @@ xfs_link( if (error) goto std_return; - xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); @@ -2215,7 +2230,7 @@ xfs_ifree_cluster( xfs_buf_t *bp; xfs_inode_t *ip; xfs_inode_log_item_t *iip; - xfs_log_item_t *lip; + struct xfs_log_item *lip; struct xfs_perag *pag; xfs_ino_t inum; @@ -2273,8 +2288,7 @@ xfs_ifree_cluster( * stale first, we will not attempt to lock them in the loop * below as the XFS_ISTALE flag will be set. */ - lip = bp->b_fspriv; - while (lip) { + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { if (lip->li_type == XFS_LI_INODE) { iip = (xfs_inode_log_item_t *)lip; ASSERT(iip->ili_logged == 1); @@ -2284,7 +2298,6 @@ xfs_ifree_cluster( &iip->ili_item.li_lsn); xfs_iflags_set(iip->ili_inode, XFS_ISTALE); } - lip = lip->li_bio_list; } @@ -2452,6 +2465,7 @@ xfs_ifree( VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; + ip->i_d.di_flags2 = 0; ip->i_d.di_dmevmask = 0; ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; @@ -2587,7 +2601,7 @@ xfs_remove( goto std_return; } - xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); @@ -3480,6 +3494,36 @@ abort_out: return error; } +/* + * If there are inline format data / attr forks attached to this inode, + * make sure they're not corrupt. + */ +bool +xfs_inode_verify_forks( + struct xfs_inode *ip) +{ + struct xfs_ifork *ifp; + xfs_failaddr_t fa; + + fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops); + if (fa) { + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork", + ifp->if_u1.if_data, ifp->if_bytes, fa); + return false; + } + + fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops); + if (fa) { + ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork", + ifp ? ifp->if_u1.if_data : NULL, + ifp ? ifp->if_bytes : 0, fa); + return false; + } + return true; +} + STATIC int xfs_iflush_int( struct xfs_inode *ip, @@ -3502,7 +3546,7 @@ xfs_iflush_int( if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), mp, XFS_ERRTAG_IFLUSH_1)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", + "%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT, __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); goto corrupt_out; } @@ -3512,7 +3556,7 @@ xfs_iflush_int( (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), mp, XFS_ERRTAG_IFLUSH_3)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: Bad regular inode %Lu, ptr 0x%p", + "%s: Bad regular inode %Lu, ptr "PTR_FMT, __func__, ip->i_ino, ip); goto corrupt_out; } @@ -3523,7 +3567,7 @@ xfs_iflush_int( (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), mp, XFS_ERRTAG_IFLUSH_4)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: Bad directory inode %Lu, ptr 0x%p", + "%s: Bad directory inode %Lu, ptr "PTR_FMT, __func__, ip->i_ino, ip); goto corrupt_out; } @@ -3532,7 +3576,7 @@ xfs_iflush_int( ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, "%s: detected corrupt incore inode %Lu, " - "total extents = %d, nblocks = %Ld, ptr 0x%p", + "total extents = %d, nblocks = %Ld, ptr "PTR_FMT, __func__, ip->i_ino, ip->i_d.di_nextents + ip->i_d.di_anextents, ip->i_d.di_nblocks, ip); @@ -3541,7 +3585,7 @@ xfs_iflush_int( if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, mp, XFS_ERRTAG_IFLUSH_6)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", + "%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT, __func__, ip->i_ino, ip->i_d.di_forkoff, ip); goto corrupt_out; } @@ -3558,10 +3602,8 @@ xfs_iflush_int( if (ip->i_d.di_version < 3) ip->i_d.di_flushiter++; - /* Check the inline directory data. */ - if (S_ISDIR(VFS_I(ip)->i_mode) && - ip->i_d.di_format == XFS_DINODE_FMT_LOCAL && - xfs_dir2_sf_verify(ip)) + /* Check the inline fork data before we write out. */ + if (!xfs_inode_verify_forks(ip)) goto corrupt_out; /* @@ -3624,7 +3666,7 @@ xfs_iflush_int( /* generate the checksum. */ xfs_dinode_calc_crc(mp, dip); - ASSERT(bp->b_fspriv != NULL); + ASSERT(!list_empty(&bp->b_li_list)); ASSERT(bp->b_iodone != NULL); return 0; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index d383e392ec9d..3e8dc990d41c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -423,7 +423,8 @@ void xfs_iunpin_wait(xfs_inode_t *); #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) int xfs_iflush(struct xfs_inode *, struct xfs_buf **); -void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); +void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode, + struct xfs_inode *ip1, uint ip1_mode); xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); @@ -491,4 +492,6 @@ extern struct kmem_zone *xfs_inode_zone; /* The default CoW extent size hint. */ #define XFS_DEFAULT_COWEXTSZ_HINT 32 +bool xfs_inode_verify_forks(struct xfs_inode *ip); + #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 7571abf5dfb3..d5037f060d6f 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -522,7 +522,7 @@ xfs_inode_item_push( if (!xfs_buf_trylock(bp)) return XFS_ITEM_LOCKED; - if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) + if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_unlock(bp); @@ -713,37 +713,23 @@ xfs_iflush_done( struct xfs_log_item *lip) { struct xfs_inode_log_item *iip; - struct xfs_log_item *blip; - struct xfs_log_item *next; - struct xfs_log_item *prev; + struct xfs_log_item *blip, *n; struct xfs_ail *ailp = lip->li_ailp; int need_ail = 0; + LIST_HEAD(tmp); /* * Scan the buffer IO completions for other inodes being completed and * attach them to the current inode log item. */ - blip = bp->b_fspriv; - prev = NULL; - while (blip != NULL) { - if (blip->li_cb != xfs_iflush_done) { - prev = blip; - blip = blip->li_bio_list; - continue; - } - /* remove from list */ - next = blip->li_bio_list; - if (!prev) { - bp->b_fspriv = next; - } else { - prev->li_bio_list = next; - } + list_add_tail(&lip->li_bio_list, &tmp); - /* add to current list */ - blip->li_bio_list = lip->li_bio_list; - lip->li_bio_list = blip; + list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) { + if (lip->li_cb != xfs_iflush_done) + continue; + list_move_tail(&blip->li_bio_list, &tmp); /* * while we have the item, do the unlocked check for needing * the AIL lock. @@ -752,8 +738,6 @@ xfs_iflush_done( if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || (blip->li_flags & XFS_LI_FAILED)) need_ail++; - - blip = next; } /* make sure we capture the state of the initial inode. */ @@ -776,7 +760,7 @@ xfs_iflush_done( /* this is an opencoded batch version of xfs_trans_ail_delete */ spin_lock(&ailp->xa_lock); - for (blip = lip; blip; blip = blip->li_bio_list) { + list_for_each_entry(blip, &tmp, li_bio_list) { if (INODE_ITEM(blip)->ili_logged && blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) mlip_changed |= xfs_ail_delete_one(ailp, blip); @@ -802,15 +786,14 @@ xfs_iflush_done( * ili_last_fields bits now that we know that the data corresponding to * them is safely on disk. */ - for (blip = lip; blip; blip = next) { - next = blip->li_bio_list; - blip->li_bio_list = NULL; - + list_for_each_entry_safe(blip, n, &tmp, li_bio_list) { + list_del_init(&blip->li_bio_list); iip = INODE_ITEM(blip); iip->ili_logged = 0; iip->ili_last_fields = 0; xfs_ifunlock(iip->ili_inode); } + list_del(&tmp); } /* diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 20dc65fef6a4..89fb1eb80aae 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -45,6 +45,7 @@ #include <linux/fsmap.h> #include "xfs_fsmap.h" #include "scrub/xfs_scrub.h" +#include "xfs_sb.h" #include <linux/capability.h> #include <linux/cred.h> @@ -809,7 +810,7 @@ xfs_ioc_fsgeometry_v1( xfs_fsop_geom_t fsgeo; int error; - error = xfs_fs_geometry(mp, &fsgeo, 3); + error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3); if (error) return error; @@ -831,7 +832,7 @@ xfs_ioc_fsgeometry( xfs_fsop_geom_t fsgeo; int error; - error = xfs_fs_geometry(mp, &fsgeo, 4); + error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 4); if (error) return error; diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 35c79e246fde..10fbde359649 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -37,6 +37,7 @@ #include "xfs_ioctl.h" #include "xfs_ioctl32.h" #include "xfs_trace.h" +#include "xfs_sb.h" #define _NATIVE_IOC(cmd, type) \ _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) @@ -66,7 +67,7 @@ xfs_compat_ioc_fsgeometry_v1( xfs_fsop_geom_t fsgeo; int error; - error = xfs_fs_geometry(mp, &fsgeo, 3); + error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3); if (error) return error; /* The 32-bit variant simply has some padding at the end */ diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 99562ec0de56..bee51a14a906 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -285,8 +285,22 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y) #define XFS_IS_REALTIME_INODE(ip) \ (((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) && \ (ip)->i_mount->m_rtdev_targp) +#define XFS_IS_REALTIME_MOUNT(mp) ((mp)->m_rtdev_targp ? 1 : 0) #else #define XFS_IS_REALTIME_INODE(ip) (0) +#define XFS_IS_REALTIME_MOUNT(mp) (0) +#endif + +/* + * Starting in Linux 4.15, the %p (raw pointer value) printk modifier + * prints a hashed version of the pointer to avoid leaking kernel + * pointers into dmesg. If we're trying to debug the kernel we want the + * raw values, so override this behavior as best we can. + */ +#ifdef DEBUG +# define PTR_FMT "%px" +#else +# define PTR_FMT "%p" #endif #endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a503af96d780..3e5ba1ecc080 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1047,6 +1047,7 @@ xfs_log_item_init( INIT_LIST_HEAD(&item->li_ail); INIT_LIST_HEAD(&item->li_cil); + INIT_LIST_HEAD(&item->li_bio_list); } /* @@ -1242,7 +1243,7 @@ xlog_space_left( static void xlog_iodone(xfs_buf_t *bp) { - struct xlog_in_core *iclog = bp->b_fspriv; + struct xlog_in_core *iclog = bp->b_log_item; struct xlog *l = iclog->ic_log; int aborted = 0; @@ -1773,7 +1774,7 @@ STATIC int xlog_bdstrat( struct xfs_buf *bp) { - struct xlog_in_core *iclog = bp->b_fspriv; + struct xlog_in_core *iclog = bp->b_log_item; xfs_buf_lock(bp); if (iclog->ic_state & XLOG_STATE_IOERROR) { @@ -1919,7 +1920,7 @@ xlog_sync( } bp->b_io_length = BTOBB(count); - bp->b_fspriv = iclog; + bp->b_log_item = iclog; bp->b_flags &= ~XBF_FLUSH; bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); @@ -1958,7 +1959,7 @@ xlog_sync( XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ xfs_buf_associate_memory(bp, (char *)&iclog->ic_header + count, split); - bp->b_fspriv = iclog; + bp->b_log_item = iclog; bp->b_flags &= ~XBF_FLUSH; bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); @@ -2117,7 +2118,9 @@ xlog_print_trans( /* dump core transaction and ticket info */ xfs_warn(mp, "transaction summary:"); - xfs_warn(mp, " flags = 0x%x", tp->t_flags); + xfs_warn(mp, " log res = %d", tp->t_log_res); + xfs_warn(mp, " log count = %d", tp->t_log_count); + xfs_warn(mp, " flags = 0x%x", tp->t_flags); xlog_print_tic_res(mp, tp->t_ticket); @@ -2242,7 +2245,7 @@ xlog_write_setup_ophdr( break; default: xfs_warn(log->l_mp, - "Bad XFS transaction clientid 0x%x in ticket 0x%p", + "Bad XFS transaction clientid 0x%x in ticket "PTR_FMT, ophdr->oh_clientid, ticket); return NULL; } @@ -3924,7 +3927,7 @@ xlog_verify_iclog( } if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) xfs_warn(log->l_mp, - "%s: invalid clientid %d op 0x%p offset 0x%lx", + "%s: invalid clientid %d op "PTR_FMT" offset 0x%lx", __func__, clientid, ophead, (unsigned long)field_offset); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 28d1abfe835e..00240c9ee72e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -400,9 +400,9 @@ xlog_recover_iodone( * On v5 supers, a bli could be attached to update the metadata LSN. * Clean it up. */ - if (bp->b_fspriv) + if (bp->b_log_item) xfs_buf_item_relse(bp); - ASSERT(bp->b_fspriv == NULL); + ASSERT(bp->b_log_item == NULL); bp->b_iodone = NULL; xfs_buf_ioend(bp); @@ -2218,7 +2218,7 @@ xlog_recover_do_inode_buffer( next_unlinked_offset - reg_buf_offset; if (unlikely(*logged_nextp == 0)) { xfs_alert(mp, - "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). " + "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). " "Trying to replay bad (0) inode di_next_unlinked field.", item, bp); XFS_ERROR_REPORT("xlog_recover_do_inode_buf", @@ -2630,7 +2630,7 @@ xlog_recover_validate_buf_type( ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone); bp->b_iodone = xlog_recover_iodone; xfs_buf_item_init(bp, mp); - bip = bp->b_fspriv; + bip = bp->b_log_item; bip->bli_item.li_lsn = current_lsn; } } @@ -2652,7 +2652,7 @@ xlog_recover_do_reg_buffer( int i; int bit; int nbits; - int error; + xfs_failaddr_t fa; trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); @@ -2687,7 +2687,7 @@ xlog_recover_do_reg_buffer( * the first dquot in the buffer should do. XXXThis is * probably a good thing to do for other buf types also. */ - error = 0; + fa = NULL; if (buf_f->blf_flags & (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { if (item->ri_buf[i].i_addr == NULL) { @@ -2701,11 +2701,14 @@ xlog_recover_do_reg_buffer( item->ri_buf[i].i_len, __func__); goto next; } - error = xfs_dqcheck(mp, item->ri_buf[i].i_addr, - -1, 0, XFS_QMOPT_DOWARN, - "dquot_buf_recover"); - if (error) + fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr, + -1, 0, 0); + if (fa) { + xfs_alert(mp, + "dquot corrupt at %pS trying to replay into block 0x%llx", + fa, bp->b_bn); goto next; + } } memcpy(xfs_buf_offset(bp, @@ -2957,6 +2960,10 @@ xfs_recover_inode_owner_change( if (error) goto out_free_ip; + if (!xfs_inode_verify_forks(ip)) { + error = -EFSCORRUPTED; + goto out_free_ip; + } if (in_f->ilf_fields & XFS_ILOG_DOWNER) { ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT); @@ -3042,7 +3049,7 @@ xlog_recover_inode_pass2( */ if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { xfs_alert(mp, - "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", + "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld", __func__, dip, bp, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", XFS_ERRLEVEL_LOW, mp); @@ -3052,7 +3059,7 @@ xlog_recover_inode_pass2( ldip = item->ri_buf[1].i_addr; if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) { xfs_alert(mp, - "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", + "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld", __func__, item, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", XFS_ERRLEVEL_LOW, mp); @@ -3110,8 +3117,8 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad regular inode log record, rec ptr 0x%p, " - "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", + "%s: Bad regular inode log record, rec ptr "PTR_FMT", " + "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", __func__, item, dip, bp, in_f->ilf_ino); error = -EFSCORRUPTED; goto out_release; @@ -3123,8 +3130,8 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad dir inode log record, rec ptr 0x%p, " - "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", + "%s: Bad dir inode log record, rec ptr "PTR_FMT", " + "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", __func__, item, dip, bp, in_f->ilf_ino); error = -EFSCORRUPTED; goto out_release; @@ -3134,8 +3141,8 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " - "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", + "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " + "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld", __func__, item, dip, bp, in_f->ilf_ino, ldip->di_nextents + ldip->di_anextents, ldip->di_nblocks); @@ -3146,8 +3153,8 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " - "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, + "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " + "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__, item, dip, bp, in_f->ilf_ino, ldip->di_forkoff); error = -EFSCORRUPTED; goto out_release; @@ -3157,7 +3164,7 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad inode log record length %d, rec ptr 0x%p", + "%s: Bad inode log record length %d, rec ptr "PTR_FMT, __func__, item->ri_buf[1].i_len, item); error = -EFSCORRUPTED; goto out_release; @@ -3303,6 +3310,7 @@ xlog_recover_dquot_pass2( xfs_mount_t *mp = log->l_mp; xfs_buf_t *bp; struct xfs_disk_dquot *ddq, *recddq; + xfs_failaddr_t fa; int error; xfs_dq_logformat_t *dq_f; uint type; @@ -3345,10 +3353,12 @@ xlog_recover_dquot_pass2( */ dq_f = item->ri_buf[0].i_addr; ASSERT(dq_f); - error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, - "xlog_recover_dquot_pass2 (log copy)"); - if (error) + fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0, 0); + if (fa) { + xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS", + dq_f->qlf_id, fa); return -EIO; + } ASSERT(dq_f->qlf_len == 1); /* diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index c879b517cc94..98fd41cbb9e1 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -162,6 +162,7 @@ xfs_free_perag( ASSERT(pag); ASSERT(atomic_read(&pag->pag_ref) == 0); xfs_buf_hash_destroy(pag); + mutex_destroy(&pag->pag_ici_reclaim_lock); call_rcu(&pag->rcu_head, __xfs_free_perag); } } @@ -248,6 +249,7 @@ xfs_initialize_perag( out_hash_destroy: xfs_buf_hash_destroy(pag); out_free_pag: + mutex_destroy(&pag->pag_ici_reclaim_lock); kmem_free(pag); out_unwind_new_pags: /* unwind any prior newly initialized pags */ @@ -256,6 +258,7 @@ out_unwind_new_pags: if (!pag) break; xfs_buf_hash_destroy(pag); + mutex_destroy(&pag->pag_ici_reclaim_lock); kmem_free(pag); } return error; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b897b11afb2c..5b848f4b637f 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -162,7 +162,7 @@ xfs_qm_dqpurge( */ error = xfs_qm_dqflush(dqp, &bp); if (error) { - xfs_warn(mp, "%s: dquot %p flush failed", + xfs_warn(mp, "%s: dquot "PTR_FMT" flush failed", __func__, dqp); } else { error = xfs_bwrite(bp); @@ -291,8 +291,7 @@ xfs_qm_dqattach_one( * exist on disk and we didn't ask it to allocate; ESRCH if quotas got * turned off suddenly. */ - error = xfs_qm_dqget(ip->i_mount, ip, id, type, - doalloc | XFS_QMOPT_DOWARN, &dqp); + error = xfs_qm_dqget(ip->i_mount, ip, id, type, doalloc, &dqp); if (error) return error; @@ -481,7 +480,7 @@ xfs_qm_dquot_isolate( error = xfs_qm_dqflush(dqp, &bp); if (error) { - xfs_warn(dqp->q_mount, "%s: dquot %p flush failed", + xfs_warn(dqp->q_mount, "%s: dquot "PTR_FMT" flush failed", __func__, dqp); goto out_unlock_dirty; } @@ -574,7 +573,7 @@ xfs_qm_set_defquota( struct xfs_def_quota *defq; int error; - error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp); + error = xfs_qm_dqread(mp, 0, type, 0, &dqp); if (!error) { xfs_disk_dquot_t *ddqp = &dqp->q_core; @@ -652,7 +651,7 @@ xfs_qm_init_quotainfo( XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : XFS_DQ_PROJ), - XFS_QMOPT_DOWARN, &dqp); + 0, &dqp); if (!error) { xfs_disk_dquot_t *ddqp = &dqp->q_core; @@ -843,6 +842,7 @@ xfs_qm_reset_dqcounts( { struct xfs_dqblk *dqb; int j; + xfs_failaddr_t fa; trace_xfs_reset_dqcounts(bp, _RET_IP_); @@ -864,10 +864,13 @@ xfs_qm_reset_dqcounts( /* * Do a sanity check, and if needed, repair the dqblk. Don't * output any warnings because it's perfectly possible to - * find uninitialised dquot blks. See comment in xfs_dqcheck. + * find uninitialised dquot blks. See comment in + * xfs_dquot_verify. */ - xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, - "xfs_quotacheck"); + fa = xfs_dquot_verify(mp, ddq, id + j, type, 0); + if (fa) + xfs_dquot_repair(mp, ddq, id + j, type); + /* * Reset type in case we are reusing group quota file for * project quotas or vice versa @@ -1074,8 +1077,7 @@ xfs_qm_quotacheck_dqadjust( struct xfs_dquot *dqp; int error; - error = xfs_qm_dqget(mp, ip, id, type, - XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); + error = xfs_qm_dqget(mp, ip, id, type, XFS_QMOPT_DQALLOC, &dqp); if (error) { /* * Shouldn't be able to turn off quotas here. @@ -1696,8 +1698,7 @@ xfs_qm_vop_dqalloc( xfs_iunlock(ip, lockflags); error = xfs_qm_dqget(mp, NULL, uid, XFS_DQ_USER, - XFS_QMOPT_DQALLOC | - XFS_QMOPT_DOWARN, + XFS_QMOPT_DQALLOC, &uq); if (error) { ASSERT(error != -ENOENT); @@ -1723,8 +1724,7 @@ xfs_qm_vop_dqalloc( xfs_iunlock(ip, lockflags); error = xfs_qm_dqget(mp, NULL, gid, XFS_DQ_GROUP, - XFS_QMOPT_DQALLOC | - XFS_QMOPT_DOWARN, + XFS_QMOPT_DQALLOC, &gq); if (error) { ASSERT(error != -ENOENT); @@ -1743,8 +1743,7 @@ xfs_qm_vop_dqalloc( xfs_iunlock(ip, lockflags); error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, XFS_DQ_PROJ, - XFS_QMOPT_DQALLOC | - XFS_QMOPT_DOWARN, + XFS_QMOPT_DQALLOC, &pq); if (error) { ASSERT(error != -ENOENT); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 47aea2e82c26..270246943a06 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -464,6 +464,13 @@ retry: error = xfs_trans_commit(tp); if (error) return error; + + /* + * Allocation succeeded but the requested range was not even partially + * satisfied? Bail out! + */ + if (nimaps == 0) + return -ENOSPC; convert: return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb, &dfops); @@ -599,10 +606,6 @@ xfs_reflink_cancel_cow_blocks( del.br_startblock, del.br_blockcount, NULL); - /* Update quota accounting */ - xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, - -(long)del.br_blockcount); - /* Roll the transaction */ xfs_defer_ijoin(&dfops, ip); error = xfs_defer_finish(tpp, &dfops); @@ -613,6 +616,13 @@ xfs_reflink_cancel_cow_blocks( /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); + + /* Remove the quota reservation */ + error = xfs_trans_reserve_quota_nblks(NULL, ip, + -(long)del.br_blockcount, 0, + XFS_QMOPT_RES_REGBLKS); + if (error) + break; } else { /* Didn't do anything, push cursor back. */ xfs_iext_prev(ifp, &icur); @@ -795,6 +805,10 @@ xfs_reflink_end_cow( if (error) goto out_defer; + /* Charge this new data fork mapping to the on-disk quota. */ + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT, + (long)del.br_blockcount); + /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); @@ -944,7 +958,7 @@ xfs_reflink_set_inode_flag( if (src->i_ino == dest->i_ino) xfs_ilock(src, XFS_ILOCK_EXCL); else - xfs_lock_two_inodes(src, dest, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(src, XFS_ILOCK_EXCL, dest, XFS_ILOCK_EXCL); if (!xfs_is_reflink_inode(src)) { trace_xfs_reflink_set_inode_flag(src); @@ -1202,13 +1216,16 @@ xfs_reflink_remap_blocks( /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ while (len) { + uint lock_mode; + trace_xfs_reflink_remap_blocks_loop(src, srcoff, len, dest, destoff); + /* Read extent from the source file */ nimaps = 1; - xfs_ilock(src, XFS_ILOCK_EXCL); + lock_mode = xfs_ilock_data_map_shared(src); error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); - xfs_iunlock(src, XFS_ILOCK_EXCL); + xfs_iunlock(src, lock_mode); if (error) goto err; ASSERT(nimaps == 1); @@ -1245,6 +1262,50 @@ err: } /* + * Grab the exclusive iolock for a data copy from src to dest, making + * sure to abide vfs locking order (lowest pointer value goes first) and + * breaking the pnfs layout leases on dest before proceeding. The loop + * is needed because we cannot call the blocking break_layout() with the + * src iolock held, and therefore have to back out both locks. + */ +static int +xfs_iolock_two_inodes_and_break_layout( + struct inode *src, + struct inode *dest) +{ + int error; + +retry: + if (src < dest) { + inode_lock_shared(src); + inode_lock_nested(dest, I_MUTEX_NONDIR2); + } else { + /* src >= dest */ + inode_lock(dest); + } + + error = break_layout(dest, false); + if (error == -EWOULDBLOCK) { + inode_unlock(dest); + if (src < dest) + inode_unlock_shared(src); + error = break_layout(dest, true); + if (error) + return error; + goto retry; + } + if (error) { + inode_unlock(dest); + if (src < dest) + inode_unlock_shared(src); + return error; + } + if (src > dest) + inode_lock_shared_nested(src, I_MUTEX_NONDIR2); + return 0; +} + +/* * Link a range of blocks from one file to another. */ int @@ -1274,11 +1335,14 @@ xfs_reflink_remap_range( return -EIO; /* Lock both files against IO */ - lock_two_nondirectories(inode_in, inode_out); + ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); + if (ret) + return ret; if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else - xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); + xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest, + XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ ret = -EINVAL; @@ -1295,6 +1359,11 @@ xfs_reflink_remap_range( if (ret <= 0) goto out_unlock; + /* Attach dquots to dest inode before changing block map */ + ret = xfs_qm_dqattach(dest, 0); + if (ret) + goto out_unlock; + trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); /* @@ -1341,10 +1410,12 @@ xfs_reflink_remap_range( is_dedupe); out_unlock: - xfs_iunlock(src, XFS_MMAPLOCK_EXCL); + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); + if (!same_inode) + xfs_iunlock(src, XFS_MMAPLOCK_SHARED); + inode_unlock(inode_out); if (!same_inode) - xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); - unlock_two_nondirectories(inode_in, inode_out); + inode_unlock_shared(inode_in); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); return ret; diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 3f30f846d7f2..dfee3c991155 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -139,6 +139,9 @@ int xfs_rtalloc_query_all(struct xfs_trans *tp, xfs_rtalloc_query_range_fn fn, void *priv); bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); +int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t start, xfs_extlen_t len, + bool *is_free); #else # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) # define xfs_rtfree_extent(t,b,l) (ENOSYS) @@ -148,6 +151,7 @@ bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); # define xfs_rtalloc_query_all(t,f,p) (ENOSYS) # define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS) # define xfs_verify_rtbno(m, r) (false) +# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (ENOSYS) static inline int /* error */ xfs_rtmount_init( xfs_mount_t *mp) /* file system mount structure */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 1dacccc367f8..f3e0001f9992 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1153,6 +1153,14 @@ xfs_fs_statfs( ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) xfs_qm_statvfs(ip, statp); + + if (XFS_IS_REALTIME_MOUNT(mp) && + (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) { + statp->f_blocks = sbp->sb_rblocks; + statp->f_bavail = statp->f_bfree = + sbp->sb_frextents * sbp->sb_rextsize; + } + return 0; } @@ -1660,7 +1668,7 @@ xfs_fs_fill_super( } if (xfs_sb_version_hasreflink(&mp->m_sb)) xfs_alert(mp, - "DAX and reflink have not been tested together!"); + "DAX and reflink cannot be used together!"); } if (mp->m_flags & XFS_MOUNT_DISCARD) { @@ -1684,10 +1692,6 @@ xfs_fs_fill_super( "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!"); } - if (xfs_sb_version_hasreflink(&mp->m_sb)) - xfs_alert(mp, - "EXPERIMENTAL reflink feature enabled. Use at your own risk!"); - error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index d718a10c2271..945de08af7ba 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -72,7 +72,7 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class, __entry->flags = ctx->flags; ), TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " - "alist 0x%p size %u count %u firstu %u flags %d %s", + "alist %p size %u count %u firstu %u flags %d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->hashval, @@ -119,7 +119,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class, __entry->refcount = refcount; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d agno %u refcount %d caller %ps", + TP_printk("dev %d:%d agno %u refcount %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->refcount, @@ -200,7 +200,7 @@ TRACE_EVENT(xfs_attr_list_node_descend, __entry->bt_before = be32_to_cpu(btree->before); ), TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " - "alist 0x%p size %u count %u firstu %u flags %d %s " + "alist %p size %u count %u firstu %u flags %d %s " "node hashval %u, node before %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, @@ -251,8 +251,8 @@ DECLARE_EVENT_CLASS(xfs_bmap_class, __entry->bmap_state = state; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx state %s cur 0x%p/%d " - "offset %lld block %lld count %lld flag %d caller %ps", + TP_printk("dev %d:%d ino 0x%llx state %s cur %p/%d " + "offset %lld block %lld count %lld flag %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), @@ -301,7 +301,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " - "lock %d flags %s caller %ps", + "lock %d flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, __entry->nblks, @@ -370,7 +370,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class, __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d flags %s caller %ps", + "lock %d flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, __entry->buffer_length, @@ -390,7 +390,7 @@ DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); TRACE_EVENT(xfs_buf_ioerror, - TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip), + TP_PROTO(struct xfs_buf *bp, int error, xfs_failaddr_t caller_ip), TP_ARGS(bp, error, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) @@ -401,7 +401,7 @@ TRACE_EVENT(xfs_buf_ioerror, __field(int, pincount) __field(unsigned, lockval) __field(int, error) - __field(unsigned long, caller_ip) + __field(xfs_failaddr_t, caller_ip) ), TP_fast_assign( __entry->dev = bp->b_target->bt_dev; @@ -415,7 +415,7 @@ TRACE_EVENT(xfs_buf_ioerror, __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d error %d flags %s caller %ps", + "lock %d error %d flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, __entry->buffer_length, @@ -460,7 +460,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class, ), TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " "lock %d flags %s recur %d refcount %d bliflags %s " - "lidesc 0x%p liflags %s", + "lidesc %p liflags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->buf_bno, __entry->buf_len, @@ -579,7 +579,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class, __entry->lock_flags = lock_flags; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps", + TP_printk("dev %d:%d ino 0x%llx flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), @@ -697,7 +697,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class, __entry->pincount = atomic_read(&ip->i_pincount); __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps", + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->count, @@ -1028,7 +1028,7 @@ DECLARE_EVENT_CLASS(xfs_log_item_class, __entry->flags = lip->li_flags; __entry->lsn = lip->li_lsn; ), - TP_printk("dev %d:%d lip 0x%p lsn %d/%d type %s flags %s", + TP_printk("dev %d:%d lip %p lsn %d/%d type %s flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lip, CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn), @@ -1049,7 +1049,7 @@ TRACE_EVENT(xfs_log_force, __entry->lsn = lsn; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d lsn 0x%llx caller %ps", + TP_printk("dev %d:%d lsn 0x%llx caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lsn, (void *)__entry->caller_ip) ) @@ -1082,7 +1082,7 @@ DECLARE_EVENT_CLASS(xfs_ail_class, __entry->old_lsn = old_lsn; __entry->new_lsn = new_lsn; ), - TP_printk("dev %d:%d lip 0x%p old lsn %d/%d new lsn %d/%d type %s flags %s", + TP_printk("dev %d:%d lip %p old lsn %d/%d new lsn %d/%d type %s flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lip, CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), @@ -1403,7 +1403,7 @@ TRACE_EVENT(xfs_bunmap, __entry->flags = flags; ), TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" - "flags %s caller %ps", + "flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, @@ -1517,7 +1517,7 @@ TRACE_EVENT(xfs_agf, ), TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " "levels b %u c %u flfirst %u fllast %u flcount %u " - "freeblks %u longest %u caller %ps", + "freeblks %u longest %u caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), @@ -2014,7 +2014,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class, __entry->count = item->ri_cnt; __entry->total = item->ri_total; ), - TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item 0x%p, " + TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item %p, " "item type %s item region count/total %d/%d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, @@ -2486,7 +2486,7 @@ DECLARE_EVENT_CLASS(xfs_ag_error_class, __entry->error = error; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d agno %u error %d caller %ps", + TP_printk("dev %d:%d agno %u error %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->error, @@ -2977,7 +2977,7 @@ DECLARE_EVENT_CLASS(xfs_inode_error_class, __entry->error = error; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino %llx error %d caller %ps", + TP_printk("dev %d:%d ino %llx error %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->error, @@ -3313,6 +3313,32 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key); DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key); DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping); +TRACE_EVENT(xfs_trans_resv_calc, + TP_PROTO(struct xfs_mount *mp, unsigned int type, + struct xfs_trans_res *res), + TP_ARGS(mp, type, res), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(uint, logres) + __field(int, logcount) + __field(int, logflags) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->type = type; + __entry->logres = res->tr_logres; + __entry->logcount = res->tr_logcount; + __entry->logflags = res->tr_logflags; + ), + TP_printk("dev %d:%d type %d logres %u logcount %d flags 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->logres, + __entry->logcount, + __entry->logflags) +); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index a87f657f59c9..86f92df32c42 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -35,6 +35,27 @@ kmem_zone_t *xfs_trans_zone; kmem_zone_t *xfs_log_item_desc_zone; +#if defined(CONFIG_TRACEPOINTS) +static void +xfs_trans_trace_reservations( + struct xfs_mount *mp) +{ + struct xfs_trans_res resv; + struct xfs_trans_res *res; + struct xfs_trans_res *end_res; + int i; + + res = (struct xfs_trans_res *)M_RES(mp); + end_res = (struct xfs_trans_res *)(M_RES(mp) + 1); + for (i = 0; res < end_res; i++, res++) + trace_xfs_trans_resv_calc(mp, i, res); + xfs_log_get_max_trans_res(mp, &resv); + trace_xfs_trans_resv_calc(mp, -1, &resv); +} +#else +# define xfs_trans_trace_reservations(mp) +#endif + /* * Initialize the precomputed transaction reservation values * in the mount structure. @@ -44,6 +65,7 @@ xfs_trans_init( struct xfs_mount *mp) { xfs_trans_resv_calc(mp, M_RES(mp)); + xfs_trans_trace_reservations(mp); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 815b53d20e26..9d542dfe0052 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -50,7 +50,7 @@ typedef struct xfs_log_item { uint li_type; /* item type */ uint li_flags; /* misc flags */ struct xfs_buf *li_buf; /* real buffer pointer */ - struct xfs_log_item *li_bio_list; /* buffer item list */ + struct list_head li_bio_list; /* buffer item list */ void (*li_cb)(struct xfs_buf *, struct xfs_log_item *); /* buffer item iodone */ diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 3ba7a96a8abd..653ce379d36b 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -82,12 +82,12 @@ _xfs_trans_bjoin( ASSERT(bp->b_transp == NULL); /* - * The xfs_buf_log_item pointer is stored in b_fsprivate. If + * The xfs_buf_log_item pointer is stored in b_log_item. If * it doesn't have one yet, then allocate one and initialize it. * The checks to see if one is there are in xfs_buf_item_init(). */ xfs_buf_item_init(bp, tp->t_mountp); - bip = bp->b_fspriv; + bip = bp->b_log_item; ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); @@ -118,7 +118,7 @@ xfs_trans_bjoin( struct xfs_buf *bp) { _xfs_trans_bjoin(tp, bp, 0); - trace_xfs_trans_bjoin(bp->b_fspriv); + trace_xfs_trans_bjoin(bp->b_log_item); } /* @@ -139,7 +139,7 @@ xfs_trans_get_buf_map( xfs_buf_flags_t flags) { xfs_buf_t *bp; - xfs_buf_log_item_t *bip; + struct xfs_buf_log_item *bip; if (!tp) return xfs_buf_get_map(target, map, nmaps, flags); @@ -159,7 +159,7 @@ xfs_trans_get_buf_map( } ASSERT(bp->b_transp == tp); - bip = bp->b_fspriv; + bip = bp->b_log_item; ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; @@ -175,7 +175,7 @@ xfs_trans_get_buf_map( ASSERT(!bp->b_error); _xfs_trans_bjoin(tp, bp, 1); - trace_xfs_trans_get_buf(bp->b_fspriv); + trace_xfs_trans_get_buf(bp->b_log_item); return bp; } @@ -188,12 +188,13 @@ xfs_trans_get_buf_map( * mount structure. */ xfs_buf_t * -xfs_trans_getsb(xfs_trans_t *tp, - struct xfs_mount *mp, - int flags) +xfs_trans_getsb( + xfs_trans_t *tp, + struct xfs_mount *mp, + int flags) { xfs_buf_t *bp; - xfs_buf_log_item_t *bip; + struct xfs_buf_log_item *bip; /* * Default to just trying to lock the superblock buffer @@ -210,7 +211,7 @@ xfs_trans_getsb(xfs_trans_t *tp, */ bp = mp->m_sb_bp; if (bp->b_transp == tp) { - bip = bp->b_fspriv; + bip = bp->b_log_item; ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; @@ -223,7 +224,7 @@ xfs_trans_getsb(xfs_trans_t *tp, return NULL; _xfs_trans_bjoin(tp, bp, 1); - trace_xfs_trans_getsb(bp->b_fspriv); + trace_xfs_trans_getsb(bp->b_log_item); return bp; } @@ -266,7 +267,7 @@ xfs_trans_read_buf_map( if (bp) { ASSERT(xfs_buf_islocked(bp)); ASSERT(bp->b_transp == tp); - ASSERT(bp->b_fspriv != NULL); + ASSERT(bp->b_log_item != NULL); ASSERT(!bp->b_error); ASSERT(bp->b_flags & XBF_DONE); @@ -279,7 +280,7 @@ xfs_trans_read_buf_map( return -EIO; } - bip = bp->b_fspriv; + bip = bp->b_log_item; bip->bli_recur++; ASSERT(atomic_read(&bip->bli_refcount) > 0); @@ -329,7 +330,7 @@ xfs_trans_read_buf_map( if (tp) { _xfs_trans_bjoin(tp, bp, 1); - trace_xfs_trans_read_buf(bp->b_fspriv); + trace_xfs_trans_read_buf(bp->b_log_item); } *bpp = bp; return 0; @@ -352,10 +353,11 @@ xfs_trans_read_buf_map( * brelse() call. */ void -xfs_trans_brelse(xfs_trans_t *tp, - xfs_buf_t *bp) +xfs_trans_brelse( + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip; + struct xfs_buf_log_item *bip; int freed; /* @@ -368,7 +370,7 @@ xfs_trans_brelse(xfs_trans_t *tp, } ASSERT(bp->b_transp == tp); - bip = bp->b_fspriv; + bip = bp->b_log_item; ASSERT(bip->bli_item.li_type == XFS_LI_BUF); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); @@ -456,10 +458,11 @@ xfs_trans_brelse(xfs_trans_t *tp, */ /* ARGSUSED */ void -xfs_trans_bhold(xfs_trans_t *tp, - xfs_buf_t *bp) +xfs_trans_bhold( + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -476,10 +479,11 @@ xfs_trans_bhold(xfs_trans_t *tp, * for this transaction. */ void -xfs_trans_bhold_release(xfs_trans_t *tp, - xfs_buf_t *bp) +xfs_trans_bhold_release( + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -500,7 +504,7 @@ xfs_trans_dirty_buf( struct xfs_trans *tp, struct xfs_buf *bp) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -557,7 +561,7 @@ xfs_trans_log_buf( uint first, uint last) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(first <= last && last < BBTOB(bp->b_length)); ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED)); @@ -600,10 +604,10 @@ xfs_trans_log_buf( */ void xfs_trans_binval( - xfs_trans_t *tp, - xfs_buf_t *bp) + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; int i; ASSERT(bp->b_transp == tp); @@ -655,10 +659,10 @@ xfs_trans_binval( */ void xfs_trans_inode_buf( - xfs_trans_t *tp, - xfs_buf_t *bp) + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -679,10 +683,10 @@ xfs_trans_inode_buf( */ void xfs_trans_stale_inode_buf( - xfs_trans_t *tp, - xfs_buf_t *bp) + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -704,10 +708,10 @@ xfs_trans_stale_inode_buf( /* ARGSUSED */ void xfs_trans_inode_alloc_buf( - xfs_trans_t *tp, - xfs_buf_t *bp) + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -729,7 +733,7 @@ xfs_trans_ordered_buf( struct xfs_trans *tp, struct xfs_buf *bp) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -759,7 +763,7 @@ xfs_trans_buf_set_type( struct xfs_buf *bp, enum xfs_blft type) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; if (!tp) return; @@ -776,8 +780,8 @@ xfs_trans_buf_copy_type( struct xfs_buf *dst_bp, struct xfs_buf *src_bp) { - struct xfs_buf_log_item *sbip = src_bp->b_fspriv; - struct xfs_buf_log_item *dbip = dst_bp->b_fspriv; + struct xfs_buf_log_item *sbip = src_bp->b_log_item; + struct xfs_buf_log_item *dbip = dst_bp->b_log_item; enum xfs_blft type; type = xfs_blft_from_flags(&sbip->__bli_format); @@ -797,11 +801,11 @@ xfs_trans_buf_copy_type( /* ARGSUSED */ void xfs_trans_dquot_buf( - xfs_trans_t *tp, - xfs_buf_t *bp, - uint type) + xfs_trans_t *tp, + xfs_buf_t *bp, + uint type) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(type == XFS_BLF_UDQUOT_BUF || type == XFS_BLF_PDQUOT_BUF || diff --git a/include/linux/fs.h b/include/linux/fs.h index 8f6654c21711..2a815560fda0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -748,6 +748,11 @@ static inline void inode_lock_nested(struct inode *inode, unsigned subclass) down_write_nested(&inode->i_rwsem, subclass); } +static inline void inode_lock_shared_nested(struct inode *inode, unsigned subclass) +{ + down_read_nested(&inode->i_rwsem, subclass); +} + void lock_two_nondirectories(struct inode *, struct inode*); void unlock_two_nondirectories(struct inode *, struct inode*); @@ -2980,6 +2985,7 @@ enum { }; void dio_end_io(struct bio *bio); +void dio_warn_stale_pagecache(struct file *filp); ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, |