diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-02 12:42:56 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-02 12:42:56 -0700 |
commit | bba7d682277c09373b56b0461b0abbd0b3d1e872 (patch) | |
tree | 68b2697be391fadc6321274427af4770dc2acdbb /fs/xfs | |
parent | a64a325bf6313aa5cde7ecd691927e92892d1b7f (diff) | |
parent | 2a09b575074ff3ed23907b6f6f3da87af41f592b (diff) |
Merge tag 'xfs-5.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong:
"This cycle we've worked on fixing bugs and improving XFS' memory
footprint.
The most notable fixes include: fixing a corruption warning (and free
space accounting skew) if copy on write fails; fixing slab cache
misuse if SLOB is enabled, which apparently was broken for years
without anybody noticing; and fixing a potential race with online
shrinkfs.
Otherwise, the bulk of the changes here involve setting up separate
slab caches for frequently used items such as btree cursors and log
intent items, and compacting the structures to reduce memory usage of
those items substantially. This also sets us up to support larger
btrees in future kernels. We also switch parts of online fsck to
allocate scrub context information from the heap instead of using
stack space.
Summary:
- Bug fixes and cleanups for kernel memory allocation usage, this
time without touching the mm code.
- Refactor the log recovery mechanism that preserves held resources
across a transaction roll so that it uses the exact same mechanism
that we use for that during regular runtime.
- Fix bugs and tighten checking around btree heights.
- Remove more old typedefs.
- Fix perag reference leaks when racing with growfs.
- Remove unused fields from xfs_btree_cur.
- Allocate various scrub structures on the heap to reduce stack
usage.
- Pack xfs_btree_cur fields and rearrange to support arbitrary
heights.
- Compute maximum possible heights for each btree height, and use
that to set up slab caches for each btree type.
- Finally remove kmem_zone_t, since these have always been struct
kmem_cache on Linux.
- Compact the structures used to coordinate work intent items.
- Set up slab caches for each work intent item type.
- Rename the "bmap_add_free" function to "free_extent_later", which
more accurately describes what it does.
- Fix corruption warning on unmount when a CoW preallocation covers a
data fork delalloc reservation but then the CoW fails.
- Add some more minor code improvements"
* tag 'xfs-5.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (45 commits)
xfs: use swap() to make code cleaner
xfs: Remove duplicated include in xfs_super
xfs: punch out data fork delalloc blocks on COW writeback failure
xfs: remove unused parameter from refcount code
xfs: reduce the size of struct xfs_extent_free_item
xfs: rename xfs_bmap_add_free to xfs_free_extent_later
xfs: create slab caches for frequently-used deferred items
xfs: compact deferred intent item structures
xfs: rename _zone variables to _cache
xfs: remove kmem_zone typedef
xfs: use separate btree cursor cache for each btree type
xfs: compute absolute maximum nlevels for each btree type
xfs: kill XFS_BTREE_MAXLEVELS
xfs: compute the maximum height of the rmap btree when reflink enabled
xfs: clean up xfs_btree_{calc_size,compute_maxlevels}
xfs: compute maximum AG btree height for critical reservation calculation
xfs: rename m_ag_maxlevels to m_allocbt_maxlevels
xfs: dynamically allocate cursors based on maxlevels
xfs: encode the max btree height in the cursor
xfs: refactor btree cursor allocation function
...
Diffstat (limited to 'fs/xfs')
88 files changed, 1649 insertions, 900 deletions
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 54da6d717a06..b987dc2c6851 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -72,10 +72,6 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags) /* * Zone interfaces */ - -#define kmem_zone kmem_cache -#define kmem_zone_t struct kmem_cache - static inline struct page * kmem_to_page(void *addr) { diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 005abfd9fd34..d7d875cef07a 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -850,7 +850,7 @@ xfs_ag_shrink_space( if (err2 != -ENOSPC) goto resv_err; - __xfs_bmap_add_free(*tpp, args.fsbno, delta, NULL, true); + __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true); /* * Roll the transaction before trying to re-init the per-ag diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 4c6f9045baca..3f597cad2c33 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -116,23 +116,29 @@ void xfs_perag_put(struct xfs_perag *pag); /* * Perag iteration APIs - * - * XXX: for_each_perag_range() usage really needs an iterator to clean up when - * we terminate at end_agno because we may have taken a reference to the perag - * beyond end_agno. Right now callers have to be careful to catch and clean that - * up themselves. This is not necessary for the callers of for_each_perag() and - * for_each_perag_from() because they terminate at sb_agcount where there are - * no perag structures in tree beyond end_agno. */ -#define for_each_perag_range(mp, next_agno, end_agno, pag) \ - for ((pag) = xfs_perag_get((mp), (next_agno)); \ - (pag) != NULL && (next_agno) <= (end_agno); \ - (next_agno) = (pag)->pag_agno + 1, \ - xfs_perag_put(pag), \ - (pag) = xfs_perag_get((mp), (next_agno))) +static inline struct xfs_perag * +xfs_perag_next( + struct xfs_perag *pag, + xfs_agnumber_t *agno, + xfs_agnumber_t end_agno) +{ + struct xfs_mount *mp = pag->pag_mount; + + *agno = pag->pag_agno + 1; + xfs_perag_put(pag); + if (*agno > end_agno) + return NULL; + return xfs_perag_get(mp, *agno); +} + +#define for_each_perag_range(mp, agno, end_agno, pag) \ + for ((pag) = xfs_perag_get((mp), (agno)); \ + (pag) != NULL; \ + (pag) = xfs_perag_next((pag), &(agno), (end_agno))) -#define for_each_perag_from(mp, next_agno, pag) \ - for_each_perag_range((mp), (next_agno), (mp)->m_sb.sb_agcount, (pag)) +#define for_each_perag_from(mp, agno, pag) \ + for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag)) #define for_each_perag(mp, agno, pag) \ diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 2aa2b3484c28..fe94058d4e9e 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -91,7 +91,8 @@ xfs_ag_resv_critical( trace_xfs_ag_resv_critical(pag, type, avail); /* Critically low if less than 10% or max btree height remains. */ - return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS, + return XFS_TEST_ERROR(avail < orig / 10 || + avail < pag->pag_mount->m_agbtree_maxlevels, pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL); } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 95157f5a5a6c..353e53b892e6 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -27,7 +27,7 @@ #include "xfs_ag_resv.h" #include "xfs_bmap.h" -extern kmem_zone_t *xfs_bmap_free_item_zone; +struct kmem_cache *xfs_extfree_item_cache; struct workqueue_struct *xfs_alloc_wq; @@ -426,8 +426,8 @@ xfs_alloc_fix_len( */ STATIC int /* error code */ xfs_alloc_fixup_trees( - xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ - xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ + struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */ + struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */ xfs_agblock_t fbno, /* starting block of free extent */ xfs_extlen_t flen, /* length of free extent */ xfs_agblock_t rbno, /* starting block of returned extent */ @@ -488,8 +488,8 @@ xfs_alloc_fixup_trees( struct xfs_btree_block *bnoblock; struct xfs_btree_block *cntblock; - bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); - cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); + bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp); + cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp); if (XFS_IS_CORRUPT(mp, bnoblock->bb_numrecs != @@ -1200,8 +1200,8 @@ xfs_alloc_ag_vextent_exact( xfs_alloc_arg_t *args) /* allocation argument structure */ { struct xfs_agf __maybe_unused *agf = args->agbp->b_addr; - xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ - xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ + struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */ + struct xfs_btree_cur *cnt_cur;/* by count btree cursor */ int error; xfs_agblock_t fbno; /* start block of found extent */ xfs_extlen_t flen; /* length of found extent */ @@ -1512,7 +1512,7 @@ xfs_alloc_ag_vextent_lastblock( * than minlen. */ if (*len || args->alignment > 1) { - acur->cnt->bc_ptrs[0] = 1; + acur->cnt->bc_levels[0].ptr = 1; do { error = xfs_alloc_get_rec(acur->cnt, bno, len, &i); if (error) @@ -1658,8 +1658,8 @@ xfs_alloc_ag_vextent_size( xfs_alloc_arg_t *args) /* allocation argument structure */ { struct xfs_agf *agf = args->agbp->b_addr; - xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ - xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ + struct xfs_btree_cur *bno_cur; /* cursor for bno btree */ + struct xfs_btree_cur *cnt_cur; /* cursor for cnt btree */ int error; /* error result */ xfs_agblock_t fbno; /* start of found freespace */ xfs_extlen_t flen; /* length of found freespace */ @@ -2190,14 +2190,15 @@ xfs_free_ag_extent( */ /* - * Compute and fill in value of m_ag_maxlevels. + * Compute and fill in value of m_alloc_maxlevels. */ void xfs_alloc_compute_maxlevels( xfs_mount_t *mp) /* file system mount structure */ { - mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr, + mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr, (mp->m_sb.sb_agblocks + 1) / 2); + ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk()); } /* @@ -2255,14 +2256,14 @@ xfs_alloc_min_freelist( const uint8_t *levels = pag ? pag->pagf_levels : fake_levels; unsigned int min_free; - ASSERT(mp->m_ag_maxlevels > 0); + ASSERT(mp->m_alloc_maxlevels > 0); /* space needed by-bno freespace btree */ min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1, - mp->m_ag_maxlevels); + mp->m_alloc_maxlevels); /* space needed by-size freespace btree */ min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1, - mp->m_ag_maxlevels); + mp->m_alloc_maxlevels); /* space needed reverse mapping used space btree */ if (xfs_has_rmapbt(mp)) min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1, @@ -2439,7 +2440,7 @@ xfs_agfl_reset( /* * Defer an AGFL block free. This is effectively equivalent to - * xfs_bmap_add_free() with some special handling particular to AGFL blocks. + * xfs_free_extent_later() with some special handling particular to AGFL blocks. * * Deferring AGFL frees helps prevent log reservation overruns due to too many * allocation operations in a transaction. AGFL frees are prone to this problem @@ -2458,21 +2459,74 @@ xfs_defer_agfl_block( struct xfs_mount *mp = tp->t_mountp; struct xfs_extent_free_item *new; /* new element */ - ASSERT(xfs_bmap_free_item_zone != NULL); + ASSERT(xfs_extfree_item_cache != NULL); ASSERT(oinfo != NULL); - new = kmem_cache_alloc(xfs_bmap_free_item_zone, + new = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); new->xefi_blockcount = 1; - new->xefi_oinfo = *oinfo; - new->xefi_skip_discard = false; + new->xefi_owner = oinfo->oi_owner; trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list); } +/* + * Add the extent to the list of extents to be free at transaction end. + * The list is maintained sorted (by block number). + */ +void +__xfs_free_extent_later( + struct xfs_trans *tp, + xfs_fsblock_t bno, + xfs_filblks_t len, + const struct xfs_owner_info *oinfo, + bool skip_discard) +{ + struct xfs_extent_free_item *new; /* new element */ +#ifdef DEBUG + struct xfs_mount *mp = tp->t_mountp; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + + ASSERT(bno != NULLFSBLOCK); + ASSERT(len > 0); + ASSERT(len <= MAXEXTLEN); + ASSERT(!isnullstartblock(bno)); + agno = XFS_FSB_TO_AGNO(mp, bno); + agbno = XFS_FSB_TO_AGBNO(mp, bno); + ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(agbno < mp->m_sb.sb_agblocks); + ASSERT(len < mp->m_sb.sb_agblocks); + ASSERT(agbno + len <= mp->m_sb.sb_agblocks); +#endif + ASSERT(xfs_extfree_item_cache != NULL); + + new = kmem_cache_zalloc(xfs_extfree_item_cache, + GFP_KERNEL | __GFP_NOFAIL); + new->xefi_startblock = bno; + new->xefi_blockcount = (xfs_extlen_t)len; + if (skip_discard) + new->xefi_flags |= XFS_EFI_SKIP_DISCARD; + if (oinfo) { + ASSERT(oinfo->oi_offset == 0); + + if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK) + new->xefi_flags |= XFS_EFI_ATTR_FORK; + if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK) + new->xefi_flags |= XFS_EFI_BMBT_BLOCK; + new->xefi_owner = oinfo->oi_owner; + } else { + new->xefi_owner = XFS_RMAP_OWN_NULL; + } + trace_xfs_bmap_free_defer(tp->t_mountp, + XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, + XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); +} + #ifdef DEBUG /* * Check if an AGF has a free extent record whose length is equal to @@ -2903,13 +2957,16 @@ xfs_agf_verify( if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > mp->m_ag_maxlevels || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > mp->m_ag_maxlevels) + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > + mp->m_alloc_maxlevels || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > + mp->m_alloc_maxlevels) return __this_address; if (xfs_has_rmapbt(mp) && (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > mp->m_rmap_maxlevels)) + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > + mp->m_rmap_maxlevels)) return __this_address; if (xfs_has_rmapbt(mp) && @@ -3495,3 +3552,20 @@ xfs_agfl_walk( return 0; } + +int __init +xfs_extfree_intent_init_cache(void) +{ + xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent", + sizeof(struct xfs_extent_free_item), + 0, 0, NULL); + + return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_extfree_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_extfree_item_cache); + xfs_extfree_item_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index df4aefaf0046..1c14a0b1abea 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -98,7 +98,7 @@ unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp, struct xfs_perag *pag); /* - * Compute and fill in value of m_ag_maxlevels. + * Compute and fill in value of m_alloc_maxlevels. */ void xfs_alloc_compute_maxlevels( @@ -248,4 +248,40 @@ xfs_buf_to_agfl_bno( return bp->b_addr; } +void __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, + xfs_filblks_t len, const struct xfs_owner_info *oinfo, + bool skip_discard); + +/* + * List of extents to be free "later". + * The list is kept sorted on xbf_startblock. + */ +struct xfs_extent_free_item { + struct list_head xefi_list; + uint64_t xefi_owner; + xfs_fsblock_t xefi_startblock;/* starting fs block number */ + xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ + unsigned int xefi_flags; +}; + +#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */ +#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ +#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ + +static inline void +xfs_free_extent_later( + struct xfs_trans *tp, + xfs_fsblock_t bno, + xfs_filblks_t len, + const struct xfs_owner_info *oinfo) +{ + __xfs_free_extent_later(tp, bno, len, oinfo, false); +} + + +extern struct kmem_cache *xfs_extfree_item_cache; + +int __init xfs_extfree_intent_init_cache(void); +void xfs_extfree_intent_destroy_cache(void); + #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 6746fd735550..8c9f73cc0bee 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -20,6 +20,7 @@ #include "xfs_trans.h" #include "xfs_ag.h" +static struct kmem_cache *xfs_allocbt_cur_cache; STATIC struct xfs_btree_cur * xfs_allocbt_dup_cursor( @@ -316,7 +317,7 @@ xfs_allocbt_verify( if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[btnum]) return __this_address; - } else if (level >= mp->m_ag_maxlevels) + } else if (level >= mp->m_alloc_maxlevels) return __this_address; return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]); @@ -477,12 +478,8 @@ xfs_allocbt_init_common( ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_btnum = btnum; - cur->bc_blocklog = mp->m_sb.sb_blocklog; + cur = xfs_btree_alloc_cursor(mp, tp, btnum, mp->m_alloc_maxlevels, + xfs_allocbt_cur_cache); cur->bc_ag.abt.active = false; if (btnum == XFS_BTNUM_CNT) { @@ -571,6 +568,17 @@ xfs_allocbt_commit_staged_btree( } } +/* Calculate number of records in an alloc btree block. */ +static inline unsigned int +xfs_allocbt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(xfs_alloc_rec_t); + return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); +} + /* * Calculate number of records in an alloc btree block. */ @@ -581,10 +589,26 @@ xfs_allocbt_maxrecs( int leaf) { blocklen -= XFS_ALLOC_BLOCK_LEN(mp); + return xfs_allocbt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(xfs_alloc_rec_t); - return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); +/* Free space btrees are at their largest when every other block is free. */ +#define XFS_MAX_FREESP_RECORDS ((XFS_MAX_AG_BLOCKS + 1) / 2) + +/* Compute the max possible height for free space btrees. */ +unsigned int +xfs_allocbt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, + XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); + + minrecs[0] = xfs_allocbt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_allocbt_block_maxrecs(blocklen, false) / 2; + + return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_FREESP_RECORDS); } /* Calculate the freespace btree size for some records. */ @@ -595,3 +619,22 @@ xfs_allocbt_calc_size( { return xfs_btree_calc_size(mp->m_alloc_mnr, len); } + +int __init +xfs_allocbt_init_cur_cache(void) +{ + xfs_allocbt_cur_cache = kmem_cache_create("xfs_bnobt_cur", + xfs_btree_cur_sizeof(xfs_allocbt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_allocbt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_allocbt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_allocbt_cur_cache); + xfs_allocbt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index 2f6b816aaf9f..45df893ef6bb 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h @@ -60,4 +60,9 @@ extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp, void xfs_allocbt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); +unsigned int xfs_allocbt_maxlevels_ondisk(void); + +int __init xfs_allocbt_init_cur_cache(void); +void xfs_allocbt_destroy_cur_cache(void); + #endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index e1d11e314228..014daa8c542d 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -770,7 +770,7 @@ xfs_attr_fork_remove( ASSERT(ip->i_afp->if_nextents == 0); xfs_idestroy_fork(ip->i_afp); - kmem_cache_free(xfs_ifork_zone, ip->i_afp); + kmem_cache_free(xfs_ifork_cache, ip->i_afp); ip->i_afp = NULL; ip->i_forkoff = 0; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index b48230f1a361..4dccd4d90622 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -37,8 +37,7 @@ #include "xfs_icache.h" #include "xfs_iomap.h" - -kmem_zone_t *xfs_bmap_free_item_zone; +struct kmem_cache *xfs_bmap_intent_cache; /* * Miscellaneous helper functions @@ -93,6 +92,7 @@ xfs_bmap_compute_maxlevels( maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; } mp->m_bm_maxlevels[whichfork] = level; + ASSERT(mp->m_bm_maxlevels[whichfork] <= xfs_bmbt_maxlevels_ondisk()); } unsigned int @@ -239,11 +239,11 @@ xfs_bmap_get_bp( if (!cur) return NULL; - for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { - if (!cur->bc_bufs[i]) + for (i = 0; i < cur->bc_maxlevels; i++) { + if (!cur->bc_levels[i].bp) break; - if (xfs_buf_daddr(cur->bc_bufs[i]) == bno) - return cur->bc_bufs[i]; + if (xfs_buf_daddr(cur->bc_levels[i].bp) == bno) + return cur->bc_levels[i].bp; } /* Chase down all the log items to see if the bp is there */ @@ -316,7 +316,7 @@ xfs_check_block( */ STATIC void xfs_bmap_check_leaf_extents( - xfs_btree_cur_t *cur, /* btree cursor or null */ + struct xfs_btree_cur *cur, /* btree cursor or null */ xfs_inode_t *ip, /* incore inode pointer */ int whichfork) /* data or attr fork */ { @@ -522,56 +522,6 @@ xfs_bmap_validate_ret( #endif /* DEBUG */ /* - * bmap free list manipulation functions - */ - -/* - * Add the extent to the list of extents to be free at transaction end. - * The list is maintained sorted (by block number). - */ -void -__xfs_bmap_add_free( - struct xfs_trans *tp, - xfs_fsblock_t bno, - xfs_filblks_t len, - const struct xfs_owner_info *oinfo, - bool skip_discard) -{ - struct xfs_extent_free_item *new; /* new element */ -#ifdef DEBUG - struct xfs_mount *mp = tp->t_mountp; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - - ASSERT(bno != NULLFSBLOCK); - ASSERT(len > 0); - ASSERT(len <= MAXEXTLEN); - ASSERT(!isnullstartblock(bno)); - agno = XFS_FSB_TO_AGNO(mp, bno); - agbno = XFS_FSB_TO_AGBNO(mp, bno); - ASSERT(agno < mp->m_sb.sb_agcount); - ASSERT(agbno < mp->m_sb.sb_agblocks); - ASSERT(len < mp->m_sb.sb_agblocks); - ASSERT(agbno + len <= mp->m_sb.sb_agblocks); -#endif - ASSERT(xfs_bmap_free_item_zone != NULL); - - new = kmem_cache_alloc(xfs_bmap_free_item_zone, - GFP_KERNEL | __GFP_NOFAIL); - new->xefi_startblock = bno; - new->xefi_blockcount = (xfs_extlen_t)len; - if (oinfo) - new->xefi_oinfo = *oinfo; - else - new->xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; - new->xefi_skip_discard = skip_discard; - trace_xfs_bmap_free_defer(tp->t_mountp, - XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, - XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); - xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); -} - -/* * Inode fork format manipulation functions */ @@ -625,12 +575,12 @@ xfs_bmap_btree_to_extents( if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) return error; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); - xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo); + xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); ip->i_nblocks--; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, cbp); - if (cur->bc_bufs[0] == cbp) - cur->bc_bufs[0] = NULL; + if (cur->bc_levels[0].bp == cbp) + cur->bc_levels[0].bp = NULL; xfs_iroot_realloc(ip, -1, whichfork); ASSERT(ifp->if_broot == NULL); ifp->if_format = XFS_DINODE_FMT_EXTENTS; @@ -925,7 +875,7 @@ xfs_bmap_add_attrfork_btree( int *flags) /* inode logging flags */ { struct xfs_btree_block *block = ip->i_df.if_broot; - xfs_btree_cur_t *cur; /* btree cursor */ + struct xfs_btree_cur *cur; /* btree cursor */ int error; /* error return value */ xfs_mount_t *mp; /* file system mount struct */ int stat; /* newroot status */ @@ -968,7 +918,7 @@ xfs_bmap_add_attrfork_extents( struct xfs_inode *ip, /* incore inode pointer */ int *flags) /* inode logging flags */ { - xfs_btree_cur_t *cur; /* bmap btree cursor */ + struct xfs_btree_cur *cur; /* bmap btree cursor */ int error; /* error return value */ if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <= @@ -1988,11 +1938,11 @@ xfs_bmap_add_extent_unwritten_real( xfs_inode_t *ip, /* incore inode pointer */ int whichfork, struct xfs_iext_cursor *icur, - xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ + struct xfs_btree_cur **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp) /* inode logging flags */ { - xfs_btree_cur_t *cur; /* btree cursor */ + struct xfs_btree_cur *cur; /* btree cursor */ int error; /* error return value */ int i; /* temp state */ struct xfs_ifork *ifp; /* inode fork pointer */ @@ -5045,7 +4995,7 @@ xfs_bmap_del_extent_real( xfs_inode_t *ip, /* incore inode pointer */ xfs_trans_t *tp, /* current transaction pointer */ struct xfs_iext_cursor *icur, - xfs_btree_cur_t *cur, /* if null, not a btree */ + struct xfs_btree_cur *cur, /* if null, not a btree */ xfs_bmbt_irec_t *del, /* data to remove from extents */ int *logflagsp, /* inode logging flags */ int whichfork, /* data or attr fork */ @@ -5296,7 +5246,7 @@ xfs_bmap_del_extent_real( if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { xfs_refcount_decrease_extent(tp, del); } else { - __xfs_bmap_add_free(tp, del->br_startblock, + __xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, (bflags & XFS_BMAPI_NODISCARD) || del->br_state == XFS_EXT_UNWRITTEN); @@ -6189,7 +6139,7 @@ __xfs_bmap_add( bmap->br_blockcount, bmap->br_state); - bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS); + bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&bi->bi_list); bi->bi_type = type; bi->bi_owner = ip; @@ -6300,3 +6250,20 @@ xfs_bmap_validate_extent( return __this_address; return NULL; } + +int __init +xfs_bmap_intent_init_cache(void) +{ + xfs_bmap_intent_cache = kmem_cache_create("xfs_bmap_intent", + sizeof(struct xfs_bmap_intent), + 0, 0, NULL); + + return xfs_bmap_intent_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_bmap_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_bmap_intent_cache); + xfs_bmap_intent_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 67641f669918..03d9aaf87413 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -13,8 +13,6 @@ struct xfs_inode; struct xfs_mount; struct xfs_trans; -extern kmem_zone_t *xfs_bmap_free_item_zone; - /* * Argument structure for xfs_bmap_alloc. */ @@ -44,19 +42,6 @@ struct xfs_bmalloca { int flags; }; -/* - * List of extents to be free "later". - * The list is kept sorted on xbf_startblock. - */ -struct xfs_extent_free_item -{ - xfs_fsblock_t xefi_startblock;/* starting fs block number */ - xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ - bool xefi_skip_discard; - struct list_head xefi_list; - struct xfs_owner_info xefi_oinfo; /* extent owner */ -}; - #define XFS_BMAP_MAX_NMAP 4 /* @@ -189,9 +174,6 @@ unsigned int xfs_bmap_compute_attr_offset(struct xfs_mount *mp); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork); -void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno, - xfs_filblks_t len, const struct xfs_owner_info *oinfo, - bool skip_discard); void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); @@ -239,16 +221,6 @@ int xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp, struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp, struct xfs_bmbt_irec *new, int *logflagsp); -static inline void -xfs_bmap_add_free( - struct xfs_trans *tp, - xfs_fsblock_t bno, - xfs_filblks_t len, - const struct xfs_owner_info *oinfo) -{ - __xfs_bmap_add_free(tp, bno, len, oinfo, false); -} - enum xfs_bmap_intent_type { XFS_BMAP_MAP = 1, XFS_BMAP_UNMAP, @@ -257,8 +229,8 @@ enum xfs_bmap_intent_type { struct xfs_bmap_intent { struct list_head bi_list; enum xfs_bmap_intent_type bi_type; - struct xfs_inode *bi_owner; int bi_whichfork; + struct xfs_inode *bi_owner; struct xfs_bmbt_irec bi_bmap; }; @@ -290,4 +262,9 @@ int xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock, int flags); +extern struct kmem_cache *xfs_bmap_intent_cache; + +int __init xfs_bmap_intent_init_cache(void); +void xfs_bmap_intent_destroy_cache(void); + #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 72444b8b38a6..453309fc85f2 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -22,6 +22,8 @@ #include "xfs_trace.h" #include "xfs_rmap.h" +static struct kmem_cache *xfs_bmbt_cur_cache; + /* * Convert on-disk form of btree root to in-memory form. */ @@ -286,7 +288,7 @@ xfs_bmbt_free_block( struct xfs_owner_info oinfo; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); - xfs_bmap_add_free(cur->bc_tp, fsbno, 1, &oinfo); + xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); ip->i_nblocks--; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -552,13 +554,9 @@ xfs_bmbt_init_cursor( struct xfs_btree_cur *cur; ASSERT(whichfork != XFS_COW_FORK); - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - - cur->bc_tp = tp; - cur->bc_mp = mp; + cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_BMAP, + mp->m_bm_maxlevels[whichfork], xfs_bmbt_cur_cache); cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; - cur->bc_btnum = XFS_BTNUM_BMAP; - cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2); cur->bc_ops = &xfs_bmbt_ops; @@ -575,6 +573,17 @@ xfs_bmbt_init_cursor( return cur; } +/* Calculate number of records in a block mapping btree block. */ +static inline unsigned int +xfs_bmbt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(xfs_bmbt_rec_t); + return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)); +} + /* * Calculate number of records in a bmap btree block. */ @@ -585,10 +594,24 @@ xfs_bmbt_maxrecs( int leaf) { blocklen -= XFS_BMBT_BLOCK_LEN(mp); + return xfs_bmbt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(xfs_bmbt_rec_t); - return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)); +/* Compute the max possible height for block mapping btrees. */ +unsigned int +xfs_bmbt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, + XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); + + minrecs[0] = xfs_bmbt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_bmbt_block_maxrecs(blocklen, false) / 2; + + /* One extra level for the inode root. */ + return xfs_btree_compute_maxlevels(minrecs, MAXEXTNUM) + 1; } /* @@ -654,3 +677,22 @@ xfs_bmbt_calc_size( { return xfs_btree_calc_size(mp->m_bmap_dmnr, len); } + +int __init +xfs_bmbt_init_cur_cache(void) +{ + xfs_bmbt_cur_cache = kmem_cache_create("xfs_bmbt_cur", + xfs_btree_cur_sizeof(xfs_bmbt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_bmbt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_bmbt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_bmbt_cur_cache); + xfs_bmbt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 729e3bc569be..3e7a40a83835 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -110,4 +110,9 @@ extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, extern unsigned long long xfs_bmbt_calc_size(struct xfs_mount *mp, unsigned long long len); +unsigned int xfs_bmbt_maxlevels_ondisk(void); + +int __init xfs_bmbt_init_cur_cache(void); +void xfs_bmbt_destroy_cur_cache(void); + #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 298395481713..b4e19aacb9de 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -22,11 +22,11 @@ #include "xfs_log.h" #include "xfs_btree_staging.h" #include "xfs_ag.h" - -/* - * Cursor allocation zone. - */ -kmem_zone_t *xfs_btree_cur_zone; +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_rmap_btree.h" +#include "xfs_refcount_btree.h" /* * Btree magic numbers. @@ -367,8 +367,8 @@ xfs_btree_del_cursor( * way we won't have initialized all the entries down to 0. */ for (i = 0; i < cur->bc_nlevels; i++) { - if (cur->bc_bufs[i]) - xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); + if (cur->bc_levels[i].bp) + xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp); else if (!error) break; } @@ -379,7 +379,7 @@ xfs_btree_del_cursor( kmem_free(cur->bc_ops); if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag) xfs_perag_put(cur->bc_ag.pag); - kmem_cache_free(xfs_btree_cur_zone, cur); + kmem_cache_free(cur->bc_cache, cur); } /* @@ -388,14 +388,14 @@ xfs_btree_del_cursor( */ int /* error */ xfs_btree_dup_cursor( - xfs_btree_cur_t *cur, /* input cursor */ - xfs_btree_cur_t **ncur) /* output cursor */ + struct xfs_btree_cur *cur, /* input cursor */ + struct xfs_btree_cur **ncur) /* output cursor */ { struct xfs_buf *bp; /* btree block's buffer pointer */ int error; /* error return value */ int i; /* level number of btree block */ xfs_mount_t *mp; /* mount structure for filesystem */ - xfs_btree_cur_t *new; /* new cursor value */ + struct xfs_btree_cur *new; /* new cursor value */ xfs_trans_t *tp; /* transaction pointer, can be NULL */ tp = cur->bc_tp; @@ -415,9 +415,9 @@ xfs_btree_dup_cursor( * For each level current, re-get the buffer and copy the ptr value. */ for (i = 0; i < new->bc_nlevels; i++) { - new->bc_ptrs[i] = cur->bc_ptrs[i]; - new->bc_ra[i] = cur->bc_ra[i]; - bp = cur->bc_bufs[i]; + new->bc_levels[i].ptr = cur->bc_levels[i].ptr; + new->bc_levels[i].ra = cur->bc_levels[i].ra; + bp = cur->bc_levels[i].bp; if (bp) { error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, xfs_buf_daddr(bp), mp->m_bsize, @@ -429,7 +429,7 @@ xfs_btree_dup_cursor( return error; } } - new->bc_bufs[i] = bp; + new->bc_levels[i].bp = bp; } *ncur = new; return 0; @@ -681,7 +681,7 @@ xfs_btree_get_block( return xfs_btree_get_iroot(cur); } - *bpp = cur->bc_bufs[level]; + *bpp = cur->bc_levels[level].bp; return XFS_BUF_TO_BLOCK(*bpp); } @@ -691,7 +691,7 @@ xfs_btree_get_block( */ STATIC int /* success=1, failure=0 */ xfs_btree_firstrec( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ @@ -711,7 +711,7 @@ xfs_btree_firstrec( /* * Set the ptr value to 1, that's the first record/key. */ - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; return 1; } @@ -721,7 +721,7 @@ xfs_btree_firstrec( */ STATIC int /* success=1, failure=0 */ xfs_btree_lastrec( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ @@ -741,7 +741,7 @@ xfs_btree_lastrec( /* * Set the ptr value to numrecs, that's the last record/key. */ - cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); + cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs); return 1; } @@ -922,11 +922,11 @@ xfs_btree_readahead( (lev == cur->bc_nlevels - 1)) return 0; - if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) + if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra) return 0; - cur->bc_ra[lev] |= lr; - block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); + cur->bc_levels[lev].ra |= lr; + block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) return xfs_btree_readahead_lblock(cur, lr, block); @@ -985,28 +985,28 @@ xfs_btree_readahead_ptr( */ STATIC void xfs_btree_setbuf( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int lev, /* level in btree */ struct xfs_buf *bp) /* new buffer to set */ { struct xfs_btree_block *b; /* btree block */ - if (cur->bc_bufs[lev]) - xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); - cur->bc_bufs[lev] = bp; - cur->bc_ra[lev] = 0; + if (cur->bc_levels[lev].bp) + xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp); + cur->bc_levels[lev].bp = bp; + cur->bc_levels[lev].ra = 0; b = XFS_BUF_TO_BLOCK(bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; + cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA; if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; + cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA; } else { if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; + cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA; if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; + cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA; } } @@ -1548,7 +1548,7 @@ xfs_btree_increment( #endif /* We're done if we remain in the block after the increment. */ - if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) + if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block)) goto out1; /* Fail if we just went off the right edge of the tree. */ @@ -1571,7 +1571,7 @@ xfs_btree_increment( goto error0; #endif - if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) + if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block)) break; /* Read-ahead the right block for the next loop. */ @@ -1598,14 +1598,14 @@ xfs_btree_increment( for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { union xfs_btree_ptr *ptrp; - ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); + ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block); --lev; error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); if (error) goto error0; xfs_btree_setbuf(cur, lev, bp); - cur->bc_ptrs[lev] = 1; + cur->bc_levels[lev].ptr = 1; } out1: *stat = 1; @@ -1641,7 +1641,7 @@ xfs_btree_decrement( xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); /* We're done if we remain in the block after the decrement. */ - if (--cur->bc_ptrs[level] > 0) + if (--cur->bc_levels[level].ptr > 0) goto out1; /* Get a pointer to the btree block. */ @@ -1665,7 +1665,7 @@ xfs_btree_decrement( * Stop when we don't go off the left edge of a block. */ for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - if (--cur->bc_ptrs[lev] > 0) + if (--cur->bc_levels[lev].ptr > 0) break; /* Read-ahead the left block for the next loop. */ xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); @@ -1691,13 +1691,13 @@ xfs_btree_decrement( for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { union xfs_btree_ptr *ptrp; - ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); + ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block); --lev; error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); if (error) goto error0; xfs_btree_setbuf(cur, lev, bp); - cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); + cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block); } out1: *stat = 1; @@ -1735,7 +1735,7 @@ xfs_btree_lookup_get_block( * * Otherwise throw it away and get a new one. */ - bp = cur->bc_bufs[level]; + bp = cur->bc_levels[level].bp; error = xfs_btree_ptr_to_daddr(cur, pp, &daddr); if (error) return error; @@ -1864,7 +1864,7 @@ xfs_btree_lookup( return -EFSCORRUPTED; } - cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; + cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE; *stat = 0; return 0; } @@ -1916,7 +1916,7 @@ xfs_btree_lookup( if (error) goto error0; - cur->bc_ptrs[level] = keyno; + cur->bc_levels[level].ptr = keyno; } } @@ -1933,7 +1933,7 @@ xfs_btree_lookup( !xfs_btree_ptr_is_null(cur, &ptr)) { int i; - cur->bc_ptrs[0] = keyno; + cur->bc_levels[0].ptr = keyno; error = xfs_btree_increment(cur, 0, &i); if (error) goto error0; @@ -1944,7 +1944,7 @@ xfs_btree_lookup( } } else if (dir == XFS_LOOKUP_LE && diff > 0) keyno--; - cur->bc_ptrs[0] = keyno; + cur->bc_levels[0].ptr = keyno; /* Return if we succeeded or not. */ if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) @@ -2104,7 +2104,7 @@ __xfs_btree_updkeys( if (error) return error; #endif - ptr = cur->bc_ptrs[level]; + ptr = cur->bc_levels[level].ptr; nlkey = xfs_btree_key_addr(cur, ptr, block); nhkey = xfs_btree_high_key_addr(cur, ptr, block); if (!force_all && @@ -2171,7 +2171,7 @@ xfs_btree_update_keys( if (error) return error; #endif - ptr = cur->bc_ptrs[level]; + ptr = cur->bc_levels[level].ptr; kp = xfs_btree_key_addr(cur, ptr, block); xfs_btree_copy_keys(cur, kp, &key, 1); xfs_btree_log_keys(cur, bp, ptr, ptr); @@ -2205,7 +2205,7 @@ xfs_btree_update( goto error0; #endif /* Get the address of the rec to be updated. */ - ptr = cur->bc_ptrs[0]; + ptr = cur->bc_levels[0].ptr; rp = xfs_btree_rec_addr(cur, ptr, block); /* Fill in the new contents and log them. */ @@ -2280,7 +2280,7 @@ xfs_btree_lshift( * If the cursor entry is the one that would be moved, don't * do it... it's too complicated. */ - if (cur->bc_ptrs[level] <= 1) + if (cur->bc_levels[level].ptr <= 1) goto out0; /* Set up the left neighbor as "left". */ @@ -2414,7 +2414,7 @@ xfs_btree_lshift( goto error0; /* Slide the cursor value left one. */ - cur->bc_ptrs[level]--; + cur->bc_levels[level].ptr--; *stat = 1; return 0; @@ -2476,7 +2476,7 @@ xfs_btree_rshift( * do it... it's too complicated. */ lrecs = xfs_btree_get_numrecs(left); - if (cur->bc_ptrs[level] >= lrecs) + if (cur->bc_levels[level].ptr >= lrecs) goto out0; /* Set up the right neighbor as "right". */ @@ -2664,7 +2664,7 @@ __xfs_btree_split( */ lrecs = xfs_btree_get_numrecs(left); rrecs = lrecs / 2; - if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) + if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1) rrecs++; src_index = (lrecs - rrecs + 1); @@ -2760,9 +2760,9 @@ __xfs_btree_split( * If it's just pointing past the last entry in left, then we'll * insert there, so don't change anything in that case. */ - if (cur->bc_ptrs[level] > lrecs + 1) { + if (cur->bc_levels[level].ptr > lrecs + 1) { xfs_btree_setbuf(cur, level, rbp); - cur->bc_ptrs[level] -= lrecs; + cur->bc_levels[level].ptr -= lrecs; } /* * If there are more levels, we'll need another cursor which refers @@ -2772,7 +2772,7 @@ __xfs_btree_split( error = xfs_btree_dup_cursor(cur, curp); if (error) goto error0; - (*curp)->bc_ptrs[level + 1]++; + (*curp)->bc_levels[level + 1].ptr++; } *ptrp = rptr; *stat = 1; @@ -2933,7 +2933,8 @@ xfs_btree_new_iroot( be16_add_cpu(&block->bb_level, 1); xfs_btree_set_numrecs(block, 1); cur->bc_nlevels++; - cur->bc_ptrs[level + 1] = 1; + ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); + cur->bc_levels[level + 1].ptr = 1; kp = xfs_btree_key_addr(cur, 1, block); ckp = xfs_btree_key_addr(cur, 1, cblock); @@ -3094,8 +3095,9 @@ xfs_btree_new_root( /* Fix up the cursor. */ xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); - cur->bc_ptrs[cur->bc_nlevels] = nptr; + cur->bc_levels[cur->bc_nlevels].ptr = nptr; cur->bc_nlevels++; + ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); *stat = 1; return 0; error0: @@ -3152,7 +3154,7 @@ xfs_btree_make_block_unfull( return error; if (*stat) { - *oindex = *index = cur->bc_ptrs[level]; + *oindex = *index = cur->bc_levels[level].ptr; return 0; } @@ -3167,7 +3169,7 @@ xfs_btree_make_block_unfull( return error; - *index = cur->bc_ptrs[level]; + *index = cur->bc_levels[level].ptr; return 0; } @@ -3214,7 +3216,7 @@ xfs_btree_insrec( } /* If we're off the left edge, return failure. */ - ptr = cur->bc_ptrs[level]; + ptr = cur->bc_levels[level].ptr; if (ptr == 0) { *stat = 0; return 0; @@ -3557,7 +3559,7 @@ xfs_btree_kill_iroot( if (error) return error; - cur->bc_bufs[level - 1] = NULL; + cur->bc_levels[level - 1].bp = NULL; be16_add_cpu(&block->bb_level, -1); xfs_trans_log_inode(cur->bc_tp, ip, XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork)); @@ -3590,8 +3592,8 @@ xfs_btree_kill_root( if (error) return error; - cur->bc_bufs[level] = NULL; - cur->bc_ra[level] = 0; + cur->bc_levels[level].bp = NULL; + cur->bc_levels[level].ra = 0; cur->bc_nlevels--; return 0; @@ -3650,7 +3652,7 @@ xfs_btree_delrec( tcur = NULL; /* Get the index of the entry being deleted, check for nothing there. */ - ptr = cur->bc_ptrs[level]; + ptr = cur->bc_levels[level].ptr; if (ptr == 0) { *stat = 0; return 0; @@ -3960,7 +3962,7 @@ xfs_btree_delrec( xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); tcur = NULL; if (level == 0) - cur->bc_ptrs[0]++; + cur->bc_levels[0].ptr++; *stat = 1; return 0; @@ -4097,9 +4099,9 @@ xfs_btree_delrec( * cursor to the left block, and fix up the index. */ if (bp != lbp) { - cur->bc_bufs[level] = lbp; - cur->bc_ptrs[level] += lrecs; - cur->bc_ra[level] = 0; + cur->bc_levels[level].bp = lbp; + cur->bc_levels[level].ptr += lrecs; + cur->bc_levels[level].ra = 0; } /* * If we joined with the right neighbor and there's a level above @@ -4119,16 +4121,16 @@ xfs_btree_delrec( * We can't use decrement because it would change the next level up. */ if (level > 0) - cur->bc_ptrs[level]--; + cur->bc_levels[level].ptr--; /* * We combined blocks, so we have to update the parent keys if the - * btree supports overlapped intervals. However, bc_ptrs[level + 1] - * points to the old block so that the caller knows which record to - * delete. Therefore, the caller must be savvy enough to call updkeys - * for us if we return stat == 2. The other exit points from this - * function don't require deletions further up the tree, so they can - * call updkeys directly. + * btree supports overlapped intervals. However, + * bc_levels[level + 1].ptr points to the old block so that the caller + * knows which record to delete. Therefore, the caller must be savvy + * enough to call updkeys for us if we return stat == 2. The other + * exit points from this function don't require deletions further up + * the tree, so they can call updkeys directly. */ /* Return value means the next level up has something to do. */ @@ -4182,7 +4184,7 @@ xfs_btree_delete( if (i == 0) { for (level = 1; level < cur->bc_nlevels; level++) { - if (cur->bc_ptrs[level] == 0) { + if (cur->bc_levels[level].ptr == 0) { error = xfs_btree_decrement(cur, level, &i); if (error) goto error0; @@ -4213,7 +4215,7 @@ xfs_btree_get_rec( int error; /* error return value */ #endif - ptr = cur->bc_ptrs[0]; + ptr = cur->bc_levels[0].ptr; block = xfs_btree_get_block(cur, 0, &bp); #ifdef DEBUG @@ -4512,21 +4514,76 @@ xfs_btree_sblock_verify( } /* - * Calculate the number of btree levels needed to store a given number of - * records in a short-format btree. + * For the given limits on leaf and keyptr records per block, calculate the + * height of the tree needed to index the number of leaf records. */ -uint +unsigned int xfs_btree_compute_maxlevels( - uint *limits, - unsigned long len) + const unsigned int *limits, + unsigned long long records) +{ + unsigned long long level_blocks = howmany_64(records, limits[0]); + unsigned int height = 1; + + while (level_blocks > 1) { + level_blocks = howmany_64(level_blocks, limits[1]); + height++; + } + + return height; +} + +/* + * For the given limits on leaf and keyptr records per block, calculate the + * number of blocks needed to index the given number of leaf records. + */ +unsigned long long +xfs_btree_calc_size( + const unsigned int *limits, + unsigned long long records) +{ + unsigned long long level_blocks = howmany_64(records, limits[0]); + unsigned long long blocks = level_blocks; + + while (level_blocks > 1) { + level_blocks = howmany_64(level_blocks, limits[1]); + blocks += level_blocks; + } + + return blocks; +} + +/* + * Given a number of available blocks for the btree to consume with records and + * pointers, calculate the height of the tree needed to index all the records + * that space can hold based on the number of pointers each interior node + * holds. + * + * We start by assuming a single level tree consumes a single block, then track + * the number of blocks each node level consumes until we no longer have space + * to store the next node level. At this point, we are indexing all the leaf + * blocks in the space, and there's no more free space to split the tree any + * further. That's our maximum btree height. + */ +unsigned int +xfs_btree_space_to_height( + const unsigned int *limits, + unsigned long long leaf_blocks) { - uint level; - unsigned long maxblocks; + unsigned long long node_blocks = limits[1]; + unsigned long long blocks_left = leaf_blocks - 1; + unsigned int height = 1; + + if (leaf_blocks < 1) + return 0; + + while (node_blocks < blocks_left) { + blocks_left -= node_blocks; + node_blocks *= limits[1]; + height++; + } - maxblocks = (len + limits[0] - 1) / limits[0]; - for (level = 1; maxblocks > 1; level++) - maxblocks = (maxblocks + limits[1] - 1) / limits[1]; - return level; + return height; } /* @@ -4661,23 +4718,25 @@ xfs_btree_overlapped_query_range( if (error) goto out; #endif - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; while (level < cur->bc_nlevels) { block = xfs_btree_get_block(cur, level, &bp); /* End of node, pop back towards the root. */ - if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { + if (cur->bc_levels[level].ptr > + be16_to_cpu(block->bb_numrecs)) { pop_up: if (level < cur->bc_nlevels - 1) - cur->bc_ptrs[level + 1]++; + cur->bc_levels[level + 1].ptr++; level++; continue; } if (level == 0) { /* Handle a leaf node. */ - recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); + recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, + block); cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp); ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey, @@ -4700,14 +4759,15 @@ pop_up: /* Record is larger than high key; pop. */ goto pop_up; } - cur->bc_ptrs[level]++; + cur->bc_levels[level].ptr++; continue; } /* Handle an internal node. */ - lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); - hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); - pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); + lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block); + hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, + block); + pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block); ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key); hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp); @@ -4730,13 +4790,13 @@ pop_up: if (error) goto out; #endif - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; continue; } else if (hdiff < 0) { /* The low key is larger than the upper range; pop. */ goto pop_up; } - cur->bc_ptrs[level]++; + cur->bc_levels[level].ptr++; } out: @@ -4747,13 +4807,14 @@ out: * with a zero-results range query, so release the buffers if we * failed to return any results. */ - if (cur->bc_bufs[0] == NULL) { + if (cur->bc_levels[0].bp == NULL) { for (i = 0; i < cur->bc_nlevels; i++) { - if (cur->bc_bufs[i]) { - xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); - cur->bc_bufs[i] = NULL; - cur->bc_ptrs[i] = 0; - cur->bc_ra[i] = 0; + if (cur->bc_levels[i].bp) { + xfs_trans_brelse(cur->bc_tp, + cur->bc_levels[i].bp); + cur->bc_levels[i].bp = NULL; + cur->bc_levels[i].ptr = 0; + cur->bc_levels[i].ra = 0; } } } @@ -4816,29 +4877,6 @@ xfs_btree_query_all( return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv); } -/* - * Calculate the number of blocks needed to store a given number of records - * in a short-format (per-AG metadata) btree. - */ -unsigned long long -xfs_btree_calc_size( - uint *limits, - unsigned long long len) -{ - int level; - int maxrecs; - unsigned long long rval; - - maxrecs = limits[0]; - for (level = 0, rval = 0; len > 1; level++) { - len += maxrecs - 1; - do_div(len, maxrecs); - maxrecs = limits[1]; - rval += len; - } - return rval; -} - static int xfs_btree_count_blocks_helper( struct xfs_btree_cur *cur, @@ -4915,7 +4953,7 @@ xfs_btree_has_more_records( block = xfs_btree_get_block(cur, 0, &bp); /* There are still records in this block. */ - if (cur->bc_ptrs[0] < xfs_btree_get_numrecs(block)) + if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block)) return true; /* There are more record blocks. */ @@ -4924,3 +4962,42 @@ xfs_btree_has_more_records( else return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK); } + +/* Set up all the btree cursor caches. */ +int __init +xfs_btree_init_cur_caches(void) +{ + int error; + + error = xfs_allocbt_init_cur_cache(); + if (error) + return error; + error = xfs_inobt_init_cur_cache(); + if (error) + goto err; + error = xfs_bmbt_init_cur_cache(); + if (error) + goto err; + error = xfs_rmapbt_init_cur_cache(); + if (error) + goto err; + error = xfs_refcountbt_init_cur_cache(); + if (error) + goto err; + + return 0; +err: + xfs_btree_destroy_cur_caches(); + return error; +} + +/* Destroy all the btree cursor caches, if they've been allocated. */ +void +xfs_btree_destroy_cur_caches(void) +{ + xfs_allocbt_destroy_cur_cache(); + xfs_inobt_destroy_cur_cache(); + xfs_bmbt_destroy_cur_cache(); + xfs_rmapbt_destroy_cur_cache(); + xfs_refcountbt_destroy_cur_cache(); +} diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 4eaf8517f850..22d9f411fde6 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -13,8 +13,6 @@ struct xfs_trans; struct xfs_ifork; struct xfs_perag; -extern kmem_zone_t *xfs_btree_cur_zone; - /* * Generic key, ptr and record wrapper structures. * @@ -92,8 +90,6 @@ uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum); #define XFS_BTREE_STATS_ADD(cur, stat, val) \ XFS_STATS_ADD_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat, val) -#define XFS_BTREE_MAXLEVELS 9 /* max of all btrees */ - struct xfs_btree_ops { /* size of the key and record structures */ size_t key_len; @@ -181,18 +177,18 @@ union xfs_btree_irec { /* Per-AG btree information. */ struct xfs_btree_cur_ag { - struct xfs_perag *pag; + struct xfs_perag *pag; union { struct xfs_buf *agbp; struct xbtree_afakeroot *afake; /* for staging cursor */ }; union { struct { - unsigned long nr_ops; /* # record updates */ - int shape_changes; /* # of extent splits */ + unsigned int nr_ops; /* # record updates */ + unsigned int shape_changes; /* # of extent splits */ } refc; struct { - bool active; /* allocation cursor state */ + bool active; /* allocation cursor state */ } abt; }; }; @@ -212,26 +208,35 @@ struct xfs_btree_cur_ino { #define XFS_BTCUR_BMBT_INVALID_OWNER (1 << 1) }; +struct xfs_btree_level { + /* buffer pointer */ + struct xfs_buf *bp; + + /* key/record number */ + uint16_t ptr; + + /* readahead info */ +#define XFS_BTCUR_LEFTRA (1 << 0) /* left sibling has been read-ahead */ +#define XFS_BTCUR_RIGHTRA (1 << 1) /* right sibling has been read-ahead */ + uint16_t ra; +}; + /* * Btree cursor structure. * This collects all information needed by the btree code in one place. */ -typedef struct xfs_btree_cur +struct xfs_btree_cur { struct xfs_trans *bc_tp; /* transaction we're in, if any */ struct xfs_mount *bc_mp; /* file system mount struct */ const struct xfs_btree_ops *bc_ops; - uint bc_flags; /* btree features - below */ + struct kmem_cache *bc_cache; /* cursor cache */ + unsigned int bc_flags; /* btree features - below */ + xfs_btnum_t bc_btnum; /* identifies which btree type */ union xfs_btree_irec bc_rec; /* current insert/search record value */ - struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */ - int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */ - uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */ -#define XFS_BTCUR_LEFTRA 1 /* left sibling has been read-ahead */ -#define XFS_BTCUR_RIGHTRA 2 /* right sibling has been read-ahead */ - uint8_t bc_nlevels; /* number of levels in the tree */ - uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ - xfs_btnum_t bc_btnum; /* identifies which btree type */ - int bc_statoff; /* offset of btre stats array */ + uint8_t bc_nlevels; /* number of levels in the tree */ + uint8_t bc_maxlevels; /* maximum levels for this btree type */ + int bc_statoff; /* offset of btree stats array */ /* * Short btree pointers need an agno to be able to turn the pointers @@ -243,7 +248,21 @@ typedef struct xfs_btree_cur struct xfs_btree_cur_ag bc_ag; struct xfs_btree_cur_ino bc_ino; }; -} xfs_btree_cur_t; + + /* Must be at the end of the struct! */ + struct xfs_btree_level bc_levels[]; +}; + +/* + * Compute the size of a btree cursor that can handle a btree of a given + * height. The bc_levels array handles node and leaf blocks, so its size + * is exactly nlevels. + */ +static inline size_t +xfs_btree_cur_sizeof(unsigned int nlevels) +{ + return struct_size((struct xfs_btree_cur *)NULL, bc_levels, nlevels); +} /* cursor flags */ #define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */ @@ -258,7 +277,6 @@ typedef struct xfs_btree_cur */ #define XFS_BTREE_STAGING (1<<5) - #define XFS_BTREE_NOERROR 0 #define XFS_BTREE_ERROR 1 @@ -309,7 +327,7 @@ xfs_btree_check_sptr( */ void xfs_btree_del_cursor( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int error); /* del because of error */ /* @@ -318,8 +336,8 @@ xfs_btree_del_cursor( */ int /* error */ xfs_btree_dup_cursor( - xfs_btree_cur_t *cur, /* input cursor */ - xfs_btree_cur_t **ncur);/* output cursor */ + struct xfs_btree_cur *cur, /* input cursor */ + struct xfs_btree_cur **ncur);/* output cursor */ /* * Compute first and last byte offsets for the fields given. @@ -460,8 +478,12 @@ xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp, xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp, unsigned int max_recs); -uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len); -unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len); +unsigned int xfs_btree_compute_maxlevels(const unsigned int *limits, + unsigned long long records); +unsigned long long xfs_btree_calc_size(const unsigned int *limits, + unsigned long long records); +unsigned int xfs_btree_space_to_height(const unsigned int *limits, + unsigned long long blocks); /* * Return codes for the query range iterator function are 0 to continue @@ -527,7 +549,7 @@ struct xfs_ifork *xfs_btree_ifork_ptr(struct xfs_btree_cur *cur); /* Does this cursor point to the last block in the given level? */ static inline bool xfs_btree_islastblock( - xfs_btree_cur_t *cur, + struct xfs_btree_cur *cur, int level) { struct xfs_btree_block *block; @@ -558,4 +580,27 @@ void xfs_btree_copy_keys(struct xfs_btree_cur *cur, union xfs_btree_key *dst_key, const union xfs_btree_key *src_key, int numkeys); +static inline struct xfs_btree_cur * +xfs_btree_alloc_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_btnum_t btnum, + uint8_t maxlevels, + struct kmem_cache *cache) +{ + struct xfs_btree_cur *cur; + + cur = kmem_cache_zalloc(cache, GFP_NOFS | __GFP_NOFAIL); + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_btnum = btnum; + cur->bc_maxlevels = maxlevels; + cur->bc_cache = cache; + + return cur; +} + +int __init xfs_btree_init_cur_caches(void); +void xfs_btree_destroy_cur_caches(void); + #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c index ac9e80152b5c..dd75e208b543 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.c +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -657,12 +657,12 @@ xfs_btree_bload_compute_geometry( * checking levels 0 and 1 here, so set bc_nlevels such that the btree * code doesn't interpret either as the root level. */ - cur->bc_nlevels = XFS_BTREE_MAXLEVELS - 1; + cur->bc_nlevels = cur->bc_maxlevels - 1; xfs_btree_bload_ensure_slack(cur, &bbl->leaf_slack, 0); xfs_btree_bload_ensure_slack(cur, &bbl->node_slack, 1); bbl->nr_records = nr_this_level = nr_records; - for (cur->bc_nlevels = 1; cur->bc_nlevels < XFS_BTREE_MAXLEVELS;) { + for (cur->bc_nlevels = 1; cur->bc_nlevels <= cur->bc_maxlevels;) { uint64_t level_blocks; uint64_t dontcare64; unsigned int level = cur->bc_nlevels - 1; @@ -703,6 +703,7 @@ xfs_btree_bload_compute_geometry( * block-based btree level. */ cur->bc_nlevels++; + ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level, &avg_per_block, &level_blocks, &dontcare64); @@ -718,13 +719,14 @@ xfs_btree_bload_compute_geometry( /* Otherwise, we need another level of btree. */ cur->bc_nlevels++; + ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); } nr_blocks += level_blocks; nr_this_level = level_blocks; } - if (cur->bc_nlevels == XFS_BTREE_MAXLEVELS) + if (cur->bc_nlevels > cur->bc_maxlevels) return -EOVERFLOW; bbl->btree_height = cur->bc_nlevels; diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index c062e2c85178..dd7a2dbce1d1 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -72,7 +72,7 @@ STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *save_blk); -kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ +struct kmem_cache *xfs_da_state_cache; /* anchor for dir/attr state */ /* * Allocate a dir-state structure. @@ -84,7 +84,7 @@ xfs_da_state_alloc( { struct xfs_da_state *state; - state = kmem_cache_zalloc(xfs_da_state_zone, GFP_NOFS | __GFP_NOFAIL); + state = kmem_cache_zalloc(xfs_da_state_cache, GFP_NOFS | __GFP_NOFAIL); state->args = args; state->mp = args->dp->i_mount; return state; @@ -113,7 +113,7 @@ xfs_da_state_free(xfs_da_state_t *state) #ifdef DEBUG memset((char *)state, 0, sizeof(*state)); #endif /* DEBUG */ - kmem_cache_free(xfs_da_state_zone, state); + kmem_cache_free(xfs_da_state_cache, state); } static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork) diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index ad5dd324631a..0faf7d9ac241 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -9,7 +9,6 @@ struct xfs_inode; struct xfs_trans; -struct zone; /* * Directory/attribute geometry information. There will be one of these for each @@ -227,6 +226,6 @@ void xfs_da3_node_hdr_from_disk(struct xfs_mount *mp, void xfs_da3_node_hdr_to_disk(struct xfs_mount *mp, struct xfs_da_intnode *to, struct xfs_da3_icnode_hdr *from); -extern struct kmem_zone *xfs_da_state_zone; +extern struct kmem_cache *xfs_da_state_cache; #endif /* __XFS_DA_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index eff4a127188e..0805ade2d300 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -18,6 +18,12 @@ #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_log.h" +#include "xfs_rmap.h" +#include "xfs_refcount.h" +#include "xfs_bmap.h" +#include "xfs_alloc.h" + +static struct kmem_cache *xfs_defer_pending_cache; /* * Deferred Operations in XFS @@ -232,23 +238,20 @@ xfs_defer_trans_abort( } } -/* Roll a transaction so we can do some deferred op processing. */ -STATIC int -xfs_defer_trans_roll( - struct xfs_trans **tpp) +/* + * Capture resources that the caller said not to release ("held") when the + * transaction commits. Caller is responsible for zero-initializing @dres. + */ +static int +xfs_defer_save_resources( + struct xfs_defer_resources *dres, + struct xfs_trans *tp) { - struct xfs_trans *tp = *tpp; struct xfs_buf_log_item *bli; struct xfs_inode_log_item *ili; struct xfs_log_item *lip; - struct xfs_buf *bplist[XFS_DEFER_OPS_NR_BUFS]; - struct xfs_inode *iplist[XFS_DEFER_OPS_NR_INODES]; - unsigned int ordered = 0; /* bitmap */ - int bpcount = 0, ipcount = 0; - int i; - int error; - BUILD_BUG_ON(NBBY * sizeof(ordered) < XFS_DEFER_OPS_NR_BUFS); + BUILD_BUG_ON(NBBY * sizeof(dres->dr_ordered) < XFS_DEFER_OPS_NR_BUFS); list_for_each_entry(lip, &tp->t_items, li_trans) { switch (lip->li_type) { @@ -256,28 +259,29 @@ xfs_defer_trans_roll( bli = container_of(lip, struct xfs_buf_log_item, bli_item); if (bli->bli_flags & XFS_BLI_HOLD) { - if (bpcount >= XFS_DEFER_OPS_NR_BUFS) { + if (dres->dr_bufs >= XFS_DEFER_OPS_NR_BUFS) { ASSERT(0); return -EFSCORRUPTED; } if (bli->bli_flags & XFS_BLI_ORDERED) - ordered |= (1U << bpcount); + dres->dr_ordered |= + (1U << dres->dr_bufs); else xfs_trans_dirty_buf(tp, bli->bli_buf); - bplist[bpcount++] = bli->bli_buf; + dres->dr_bp[dres->dr_bufs++] = bli->bli_buf; } break; case XFS_LI_INODE: ili = container_of(lip, struct xfs_inode_log_item, ili_item); if (ili->ili_lock_flags == 0) { - if (ipcount >= XFS_DEFER_OPS_NR_INODES) { + if (dres->dr_inos >= XFS_DEFER_OPS_NR_INODES) { ASSERT(0); return -EFSCORRUPTED; } xfs_trans_log_inode(tp, ili->ili_inode, XFS_ILOG_CORE); - iplist[ipcount++] = ili->ili_inode; + dres->dr_ip[dres->dr_inos++] = ili->ili_inode; } break; default: @@ -285,7 +289,43 @@ xfs_defer_trans_roll( } } - trace_xfs_defer_trans_roll(tp, _RET_IP_); + return 0; +} + +/* Attach the held resources to the transaction. */ +static void +xfs_defer_restore_resources( + struct xfs_trans *tp, + struct xfs_defer_resources *dres) +{ + unsigned short i; + + /* Rejoin the joined inodes. */ + for (i = 0; i < dres->dr_inos; i++) + xfs_trans_ijoin(tp, dres->dr_ip[i], 0); + + /* Rejoin the buffers and dirty them so the log moves forward. */ + for (i = 0; i < dres->dr_bufs; i++) { + xfs_trans_bjoin(tp, dres->dr_bp[i]); + if (dres->dr_ordered & (1U << i)) + xfs_trans_ordered_buf(tp, dres->dr_bp[i]); + xfs_trans_bhold(tp, dres->dr_bp[i]); + } +} + +/* Roll a transaction so we can do some deferred op processing. */ +STATIC int +xfs_defer_trans_roll( + struct xfs_trans **tpp) +{ + struct xfs_defer_resources dres = { }; + int error; + + error = xfs_defer_save_resources(&dres, *tpp); + if (error) + return error; + + trace_xfs_defer_trans_roll(*tpp, _RET_IP_); /* * Roll the transaction. Rolling always given a new transaction (even @@ -295,22 +335,11 @@ xfs_defer_trans_roll( * happened. */ error = xfs_trans_roll(tpp); - tp = *tpp; - /* Rejoin the joined inodes. */ - for (i = 0; i < ipcount; i++) - xfs_trans_ijoin(tp, iplist[i], 0); - - /* Rejoin the buffers and dirty them so the log moves forward. */ - for (i = 0; i < bpcount; i++) { - xfs_trans_bjoin(tp, bplist[i]); - if (ordered & (1U << i)) - xfs_trans_ordered_buf(tp, bplist[i]); - xfs_trans_bhold(tp, bplist[i]); - } + xfs_defer_restore_resources(*tpp, &dres); if (error) - trace_xfs_defer_trans_roll_error(tp, error); + trace_xfs_defer_trans_roll_error(*tpp, error); return error; } @@ -342,7 +371,7 @@ xfs_defer_cancel_list( ops->cancel_item(pwi); } ASSERT(dfp->dfp_count == 0); - kmem_free(dfp); + kmem_cache_free(xfs_defer_pending_cache, dfp); } } @@ -439,7 +468,7 @@ xfs_defer_finish_one( /* Done with the dfp, free it. */ list_del(&dfp->dfp_list); - kmem_free(dfp); + kmem_cache_free(xfs_defer_pending_cache, dfp); out: if (ops->finish_cleanup) ops->finish_cleanup(tp, state, error); @@ -573,8 +602,8 @@ xfs_defer_add( dfp = NULL; } if (!dfp) { - dfp = kmem_alloc(sizeof(struct xfs_defer_pending), - KM_NOFS); + dfp = kmem_cache_zalloc(xfs_defer_pending_cache, + GFP_NOFS | __GFP_NOFAIL); dfp->dfp_type = type; dfp->dfp_intent = NULL; dfp->dfp_done = NULL; @@ -627,10 +656,11 @@ xfs_defer_move( */ static struct xfs_defer_capture * xfs_defer_ops_capture( - struct xfs_trans *tp, - struct xfs_inode *capture_ip) + struct xfs_trans *tp) { struct xfs_defer_capture *dfc; + unsigned short i; + int error; if (list_empty(&tp->t_dfops)) return NULL; @@ -654,27 +684,48 @@ xfs_defer_ops_capture( /* Preserve the log reservation size. */ dfc->dfc_logres = tp->t_log_res; + error = xfs_defer_save_resources(&dfc->dfc_held, tp); + if (error) { + /* + * Resource capture should never fail, but if it does, we + * still have to shut down the log and release things + * properly. + */ + xfs_force_shutdown(tp->t_mountp, SHUTDOWN_CORRUPT_INCORE); + } + /* - * Grab an extra reference to this inode and attach it to the capture - * structure. + * Grab extra references to the inodes and buffers because callers are + * expected to release their held references after we commit the + * transaction. */ - if (capture_ip) { - ihold(VFS_I(capture_ip)); - dfc->dfc_capture_ip = capture_ip; + for (i = 0; i < dfc->dfc_held.dr_inos; i++) { + ASSERT(xfs_isilocked(dfc->dfc_held.dr_ip[i], XFS_ILOCK_EXCL)); + ihold(VFS_I(dfc->dfc_held.dr_ip[i])); } + for (i = 0; i < dfc->dfc_held.dr_bufs; i++) + xfs_buf_hold(dfc->dfc_held.dr_bp[i]); + return dfc; } /* Release all resources that we used to capture deferred ops. */ void -xfs_defer_ops_release( +xfs_defer_ops_capture_free( struct xfs_mount *mp, struct xfs_defer_capture *dfc) { + unsigned short i; + xfs_defer_cancel_list(mp, &dfc->dfc_dfops); - if (dfc->dfc_capture_ip) - xfs_irele(dfc->dfc_capture_ip); + + for (i = 0; i < dfc->dfc_held.dr_bufs; i++) + xfs_buf_relse(dfc->dfc_held.dr_bp[i]); + + for (i = 0; i < dfc->dfc_held.dr_inos; i++) + xfs_irele(dfc->dfc_held.dr_ip[i]); + kmem_free(dfc); } @@ -689,24 +740,21 @@ xfs_defer_ops_release( int xfs_defer_ops_capture_and_commit( struct xfs_trans *tp, - struct xfs_inode *capture_ip, struct list_head *capture_list) { struct xfs_mount *mp = tp->t_mountp; struct xfs_defer_capture *dfc; int error; - ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL)); - /* If we don't capture anything, commit transaction and exit. */ - dfc = xfs_defer_ops_capture(tp, capture_ip); + dfc = xfs_defer_ops_capture(tp); if (!dfc) return xfs_trans_commit(tp); /* Commit the transaction and add the capture structure to the list. */ error = xfs_trans_commit(tp); if (error) { - xfs_defer_ops_release(mp, dfc); + xfs_defer_ops_capture_free(mp, dfc); return error; } @@ -724,17 +772,19 @@ void xfs_defer_ops_continue( struct xfs_defer_capture *dfc, struct xfs_trans *tp, - struct xfs_inode **captured_ipp) + struct xfs_defer_resources *dres) { ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY)); /* Lock and join the captured inode to the new transaction. */ - if (dfc->dfc_capture_ip) { - xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0); - } - *captured_ipp = dfc->dfc_capture_ip; + if (dfc->dfc_held.dr_inos == 2) + xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL, + dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL); + else if (dfc->dfc_held.dr_inos == 1) + xfs_ilock(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL); + xfs_defer_restore_resources(tp, &dfc->dfc_held); + memcpy(dres, &dfc->dfc_held, sizeof(struct xfs_defer_resources)); /* Move captured dfops chain and state to the transaction. */ list_splice_init(&dfc->dfc_dfops, &tp->t_dfops); @@ -742,3 +792,82 @@ xfs_defer_ops_continue( kmem_free(dfc); } + +/* Release the resources captured and continued during recovery. */ +void +xfs_defer_resources_rele( + struct xfs_defer_resources *dres) +{ + unsigned short i; + + for (i = 0; i < dres->dr_inos; i++) { + xfs_iunlock(dres->dr_ip[i], XFS_ILOCK_EXCL); + xfs_irele(dres->dr_ip[i]); + dres->dr_ip[i] = NULL; + } + + for (i = 0; i < dres->dr_bufs; i++) { + xfs_buf_relse(dres->dr_bp[i]); + dres->dr_bp[i] = NULL; + } + + dres->dr_inos = 0; + dres->dr_bufs = 0; + dres->dr_ordered = 0; +} + +static inline int __init +xfs_defer_init_cache(void) +{ + xfs_defer_pending_cache = kmem_cache_create("xfs_defer_pending", + sizeof(struct xfs_defer_pending), + 0, 0, NULL); + + return xfs_defer_pending_cache != NULL ? 0 : -ENOMEM; +} + +static inline void +xfs_defer_destroy_cache(void) +{ + kmem_cache_destroy(xfs_defer_pending_cache); + xfs_defer_pending_cache = NULL; +} + +/* Set up caches for deferred work items. */ +int __init +xfs_defer_init_item_caches(void) +{ + int error; + + error = xfs_defer_init_cache(); + if (error) + return error; + error = xfs_rmap_intent_init_cache(); + if (error) + goto err; + error = xfs_refcount_intent_init_cache(); + if (error) + goto err; + error = xfs_bmap_intent_init_cache(); + if (error) + goto err; + error = xfs_extfree_intent_init_cache(); + if (error) + goto err; + + return 0; +err: + xfs_defer_destroy_item_caches(); + return error; +} + +/* Destroy all the deferred work item caches, if they've been allocated. */ +void +xfs_defer_destroy_item_caches(void) +{ + xfs_extfree_intent_destroy_cache(); + xfs_bmap_intent_destroy_cache(); + xfs_refcount_intent_destroy_cache(); + xfs_rmap_intent_destroy_cache(); + xfs_defer_destroy_cache(); +} diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index 05472f71fffe..7bb8a31ad65b 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -65,6 +65,30 @@ extern const struct xfs_defer_op_type xfs_extent_free_defer_type; extern const struct xfs_defer_op_type xfs_agfl_free_defer_type; /* + * Deferred operation item relogging limits. + */ +#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ +#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ + +/* Resources that must be held across a transaction roll. */ +struct xfs_defer_resources { + /* held buffers */ + struct xfs_buf *dr_bp[XFS_DEFER_OPS_NR_BUFS]; + + /* inodes with no unlock flags */ + struct xfs_inode *dr_ip[XFS_DEFER_OPS_NR_INODES]; + + /* number of held buffers */ + unsigned short dr_bufs; + + /* bitmap of ordered buffers */ + unsigned short dr_ordered; + + /* number of held inodes */ + unsigned short dr_inos; +}; + +/* * This structure enables a dfops user to detach the chain of deferred * operations from a transaction so that they can be continued later. */ @@ -83,11 +107,7 @@ struct xfs_defer_capture { /* Log reservation saved from the transaction. */ unsigned int dfc_logres; - /* - * An inode reference that must be maintained to complete the deferred - * work. - */ - struct xfs_inode *dfc_capture_ip; + struct xfs_defer_resources dfc_held; }; /* @@ -95,9 +115,14 @@ struct xfs_defer_capture { * This doesn't normally happen except log recovery. */ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp, - struct xfs_inode *capture_ip, struct list_head *capture_list); + struct list_head *capture_list); void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp, - struct xfs_inode **captured_ipp); -void xfs_defer_ops_release(struct xfs_mount *mp, struct xfs_defer_capture *d); + struct xfs_defer_resources *dres); +void xfs_defer_ops_capture_free(struct xfs_mount *mp, + struct xfs_defer_capture *d); +void xfs_defer_resources_rele(struct xfs_defer_resources *dres); + +int __init xfs_defer_init_item_caches(void); +void xfs_defer_destroy_item_caches(void); #endif /* __XFS_DEFER_H__ */ diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index deeb74becabc..15a362e2f5ea 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -22,7 +22,7 @@ xfs_calc_dquots_per_chunk( unsigned int nbblks) /* basic block units */ { ASSERT(nbblks > 0); - return BBTOB(nbblks) / sizeof(xfs_dqblk_t); + return BBTOB(nbblks) / sizeof(struct xfs_dqblk); } /* @@ -127,7 +127,7 @@ xfs_dqblk_repair( * Typically, a repair is only requested by quotacheck. */ ASSERT(id != -1); - memset(dqb, 0, sizeof(xfs_dqblk_t)); + memset(dqb, 0, sizeof(struct xfs_dqblk)); dqb->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); dqb->dd_diskdq.d_version = XFS_DQUOT_VERSION; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 2d7057b7984b..d665c04e69dd 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -184,7 +184,7 @@ typedef struct xfs_sb { * Superblock - on disk version. Must match the in core version above. * Must be padded to 64 bit alignment. */ -typedef struct xfs_dsb { +struct xfs_dsb { __be32 sb_magicnum; /* magic number == XFS_SB_MAGIC */ __be32 sb_blocksize; /* logical block size, bytes */ __be64 sb_dblocks; /* number of data blocks */ @@ -263,7 +263,7 @@ typedef struct xfs_dsb { uuid_t sb_meta_uuid; /* metadata file system unique id */ /* must be padded to 64 bit alignment */ -} xfs_dsb_t; +}; /* * Misc. Flags - warning - these will be cleared by xfs_repair unless @@ -780,7 +780,7 @@ static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds) * padding field for v3 inodes. */ #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ -typedef struct xfs_dinode { +struct xfs_dinode { __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ __be16 di_mode; /* mode and type of file */ __u8 di_version; /* inode version */ @@ -825,7 +825,7 @@ typedef struct xfs_dinode { uuid_t di_uuid; /* UUID of the filesystem */ /* structure must be padded to 64 bit alignment */ -} xfs_dinode_t; +}; #define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc) @@ -1215,7 +1215,7 @@ struct xfs_disk_dquot { * This is what goes on disk. This is separated from the xfs_disk_dquot because * carrying the unnecessary padding would be a waste of memory. */ -typedef struct xfs_dqblk { +struct xfs_dqblk { struct xfs_disk_dquot dd_diskdq; /* portion living incore as well */ char dd_fill[4];/* filling for posterity */ @@ -1225,7 +1225,7 @@ typedef struct xfs_dqblk { __be32 dd_crc; /* checksum */ __be64 dd_lsn; /* last modification in log */ uuid_t dd_uuid; /* location information */ -} xfs_dqblk_t; +}; #define XFS_DQUOT_CRC_OFF offsetof(struct xfs_dqblk, dd_crc) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index bde2b4c64dbe..c43877c8a279 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -268,6 +268,8 @@ typedef struct xfs_fsop_resblks { */ #define XFS_MIN_AG_BYTES (1ULL << 24) /* 16 MB */ #define XFS_MAX_AG_BYTES (1ULL << 40) /* 1 TB */ +#define XFS_MAX_AG_BLOCKS (XFS_MAX_AG_BYTES / XFS_MIN_BLOCKSIZE) +#define XFS_MAX_CRC_AG_BLOCKS (XFS_MAX_AG_BYTES / XFS_MIN_CRC_BLOCKSIZE) /* keep the maximum size under 2^31 by a small amount */ #define XFS_MAX_LOG_BYTES \ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 994ad783d407..b418fe0c0679 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1827,7 +1827,7 @@ xfs_difree_inode_chunk( if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ - xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), + xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES); return; @@ -1872,7 +1872,7 @@ xfs_difree_inode_chunk( ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); - xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, agbno), + xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, &XFS_RMAP_OINFO_INODES); /* reset range to current bit and carry on... */ @@ -2793,6 +2793,7 @@ xfs_ialloc_setup_geometry( inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG; igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr, inodes); + ASSERT(igeo->inobt_maxlevels <= xfs_iallocbt_maxlevels_ondisk()); /* * Set the maximum inode count for this filesystem, being careful not diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 27190840c5d8..b2ad2fdc40f5 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -22,6 +22,8 @@ #include "xfs_rmap.h" #include "xfs_ag.h" +static struct kmem_cache *xfs_inobt_cur_cache; + STATIC int xfs_inobt_get_minrecs( struct xfs_btree_cur *cur, @@ -432,10 +434,8 @@ xfs_inobt_init_common( { struct xfs_btree_cur *cur; - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_btnum = btnum; + cur = xfs_btree_alloc_cursor(mp, tp, btnum, + M_IGEO(mp)->inobt_maxlevels, xfs_inobt_cur_cache); if (btnum == XFS_BTNUM_INO) { cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_ibt_2); cur->bc_ops = &xfs_inobt_ops; @@ -444,8 +444,6 @@ xfs_inobt_init_common( cur->bc_ops = &xfs_finobt_ops; } - cur->bc_blocklog = mp->m_sb.sb_blocklog; - if (xfs_has_crc(mp)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; @@ -530,6 +528,17 @@ xfs_inobt_commit_staged_btree( } } +/* Calculate number of records in an inode btree block. */ +static inline unsigned int +xfs_inobt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(xfs_inobt_rec_t); + return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); +} + /* * Calculate number of records in an inobt btree block. */ @@ -540,10 +549,54 @@ xfs_inobt_maxrecs( int leaf) { blocklen -= XFS_INOBT_BLOCK_LEN(mp); + return xfs_inobt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(xfs_inobt_rec_t); - return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); +/* + * Maximum number of inode btree records per AG. Pretend that we can fill an + * entire AG completely full of inodes except for the AG headers. + */ +#define XFS_MAX_INODE_RECORDS \ + ((XFS_MAX_AG_BYTES - (4 * BBSIZE)) / XFS_DINODE_MIN_SIZE) / \ + XFS_INODES_PER_CHUNK + +/* Compute the max possible height for the inode btree. */ +static inline unsigned int +xfs_inobt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, + XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); + + minrecs[0] = xfs_inobt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_inobt_block_maxrecs(blocklen, false) / 2; + + return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_INODE_RECORDS); +} + +/* Compute the max possible height for the free inode btree. */ +static inline unsigned int +xfs_finobt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN; + + minrecs[0] = xfs_inobt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_inobt_block_maxrecs(blocklen, false) / 2; + + return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_INODE_RECORDS); +} + +/* Compute the max possible height for either inode btree. */ +unsigned int +xfs_iallocbt_maxlevels_ondisk(void) +{ + return max(xfs_inobt_maxlevels_ondisk(), + xfs_finobt_maxlevels_ondisk()); } /* @@ -761,3 +814,22 @@ xfs_iallocbt_calc_size( { return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, len); } + +int __init +xfs_inobt_init_cur_cache(void) +{ + xfs_inobt_cur_cache = kmem_cache_create("xfs_inobt_cur", + xfs_btree_cur_sizeof(xfs_inobt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_inobt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_inobt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_inobt_cur_cache); + xfs_inobt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index 8a322d402e61..26451cb76b98 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -75,4 +75,9 @@ int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp, void xfs_inobt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); +unsigned int xfs_iallocbt_maxlevels_ondisk(void); + +int __init xfs_inobt_init_cur_cache(void); +void xfs_inobt_destroy_cur_cache(void); + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 3932b4ebf903..cae9708c8587 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -51,9 +51,9 @@ xfs_inode_buf_verify( agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp)); ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; for (i = 0; i < ni; i++) { - int di_ok; - xfs_dinode_t *dip; - xfs_agino_t unlinked_ino; + struct xfs_dinode *dip; + xfs_agino_t unlinked_ino; + int di_ok; dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); unlinked_ino = be32_to_cpu(dip->di_next_unlinked); diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 1d174909f9bd..9149f4f796fc 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -26,7 +26,7 @@ #include "xfs_types.h" #include "xfs_errortag.h" -kmem_zone_t *xfs_ifork_zone; +struct kmem_cache *xfs_ifork_cache; void xfs_init_local_fork( @@ -67,10 +67,10 @@ xfs_init_local_fork( */ STATIC int xfs_iformat_local( - xfs_inode_t *ip, - xfs_dinode_t *dip, - int whichfork, - int size) + struct xfs_inode *ip, + struct xfs_dinode *dip, + int whichfork, + int size) { /* * If the size is unreasonable, then something @@ -162,8 +162,8 @@ xfs_iformat_extents( */ STATIC int xfs_iformat_btree( - xfs_inode_t *ip, - xfs_dinode_t *dip, + struct xfs_inode *ip, + struct xfs_dinode *dip, int whichfork) { struct xfs_mount *mp = ip->i_mount; @@ -284,7 +284,7 @@ xfs_ifork_alloc( { struct xfs_ifork *ifp; - ifp = kmem_cache_zalloc(xfs_ifork_zone, GFP_NOFS | __GFP_NOFAIL); + ifp = kmem_cache_zalloc(xfs_ifork_cache, GFP_NOFS | __GFP_NOFAIL); ifp->if_format = format; ifp->if_nextents = nextents; return ifp; @@ -325,7 +325,7 @@ xfs_iformat_attr_fork( } if (error) { - kmem_cache_free(xfs_ifork_zone, ip->i_afp); + kmem_cache_free(xfs_ifork_cache, ip->i_afp); ip->i_afp = NULL; } return error; @@ -580,8 +580,8 @@ xfs_iextents_copy( */ void xfs_iflush_fork( - xfs_inode_t *ip, - xfs_dinode_t *dip, + struct xfs_inode *ip, + struct xfs_dinode *dip, struct xfs_inode_log_item *iip, int whichfork) { @@ -676,7 +676,7 @@ xfs_ifork_init_cow( if (ip->i_cowfp) return; - ip->i_cowfp = kmem_cache_zalloc(xfs_ifork_zone, + ip->i_cowfp = kmem_cache_zalloc(xfs_ifork_cache, GFP_NOFS | __GFP_NOFAIL); ip->i_cowfp->if_format = XFS_DINODE_FMT_EXTENTS; } diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index a6f7897b6887..3d64a3acb0ed 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -221,7 +221,7 @@ static inline bool xfs_iext_peek_prev_extent(struct xfs_ifork *ifp, xfs_iext_get_extent((ifp), (ext), (got)); \ xfs_iext_next((ifp), (ext))) -extern struct kmem_zone *xfs_ifork_zone; +extern struct kmem_cache *xfs_ifork_cache; extern void xfs_ifork_init_cow(struct xfs_inode *ip); diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index e5d767a7fc5d..327ba25e9e17 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -24,6 +24,8 @@ #include "xfs_rmap.h" #include "xfs_ag.h" +struct kmem_cache *xfs_refcount_intent_cache; + /* Allowable refcount adjustment amounts. */ enum xfs_refc_adjust_op { XFS_REFCOUNT_ADJUST_INCREASE = 1, @@ -916,8 +918,7 @@ xfs_refcount_adjust_extents( struct xfs_btree_cur *cur, xfs_agblock_t *agbno, xfs_extlen_t *aglen, - enum xfs_refc_adjust_op adj, - struct xfs_owner_info *oinfo) + enum xfs_refc_adjust_op adj) { struct xfs_refcount_irec ext, tmp; int error; @@ -974,8 +975,8 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, tmp.rc_startblock); - xfs_bmap_add_free(cur->bc_tp, fsbno, - tmp.rc_blockcount, oinfo); + xfs_free_extent_later(cur->bc_tp, fsbno, + tmp.rc_blockcount, NULL); } (*agbno) += tmp.rc_blockcount; @@ -1019,8 +1020,8 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, ext.rc_startblock); - xfs_bmap_add_free(cur->bc_tp, fsbno, ext.rc_blockcount, - oinfo); + xfs_free_extent_later(cur->bc_tp, fsbno, + ext.rc_blockcount, NULL); } skip: @@ -1048,8 +1049,7 @@ xfs_refcount_adjust( xfs_extlen_t aglen, xfs_agblock_t *new_agbno, xfs_extlen_t *new_aglen, - enum xfs_refc_adjust_op adj, - struct xfs_owner_info *oinfo) + enum xfs_refc_adjust_op adj) { bool shape_changed; int shape_changes = 0; @@ -1092,8 +1092,7 @@ xfs_refcount_adjust( cur->bc_ag.refc.shape_changes++; /* Now that we've taken care of the ends, adjust the middle extents */ - error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, - adj, oinfo); + error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj); if (error) goto out_error; @@ -1188,12 +1187,12 @@ xfs_refcount_finish_one( switch (type) { case XFS_REFCOUNT_INCREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_INCREASE, NULL); + new_len, XFS_REFCOUNT_ADJUST_INCREASE); *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_DECREASE, NULL); + new_len, XFS_REFCOUNT_ADJUST_DECREASE); *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); break; case XFS_REFCOUNT_ALLOC_COW: @@ -1235,8 +1234,8 @@ __xfs_refcount_add( type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), blockcount); - ri = kmem_alloc(sizeof(struct xfs_refcount_intent), - KM_NOFS); + ri = kmem_cache_alloc(xfs_refcount_intent_cache, + GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_startblock = startblock; @@ -1742,7 +1741,7 @@ xfs_refcount_recover_cow_leftovers( rr->rr_rrec.rc_blockcount); /* Free the block. */ - xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); + xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); error = xfs_trans_commit(tp); if (error) @@ -1782,3 +1781,20 @@ xfs_refcount_has_record( return xfs_btree_has_record(cur, &low, &high, exists); } + +int __init +xfs_refcount_intent_init_cache(void) +{ + xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent", + sizeof(struct xfs_refcount_intent), + 0, 0, NULL); + + return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_refcount_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_refcount_intent_cache); + xfs_refcount_intent_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 02cb3aa405be..9eb01edbd89d 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -32,8 +32,8 @@ enum xfs_refcount_intent_type { struct xfs_refcount_intent { struct list_head ri_list; enum xfs_refcount_intent_type ri_type; - xfs_fsblock_t ri_startblock; xfs_extlen_t ri_blockcount; + xfs_fsblock_t ri_startblock; }; void xfs_refcount_increase_extent(struct xfs_trans *tp, @@ -83,4 +83,9 @@ extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec, extern int xfs_refcount_insert(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat); +extern struct kmem_cache *xfs_refcount_intent_cache; + +int __init xfs_refcount_intent_init_cache(void); +void xfs_refcount_intent_destroy_cache(void); + #endif /* __XFS_REFCOUNT_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 1ef9b99962ab..d14c1720b0fb 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -21,6 +21,8 @@ #include "xfs_rmap.h" #include "xfs_ag.h" +static struct kmem_cache *xfs_refcountbt_cur_cache; + static struct xfs_btree_cur * xfs_refcountbt_dup_cursor( struct xfs_btree_cur *cur) @@ -322,11 +324,8 @@ xfs_refcountbt_init_common( ASSERT(pag->pag_agno < mp->m_sb.sb_agcount); - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_btnum = XFS_BTNUM_REFC; - cur->bc_blocklog = mp->m_sb.sb_blocklog; + cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_REFC, + mp->m_refc_maxlevels, xfs_refcountbt_cur_cache); cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2); cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; @@ -396,6 +395,18 @@ xfs_refcountbt_commit_staged_btree( xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_refcountbt_ops); } +/* Calculate number of records in a refcount btree block. */ +static inline unsigned int +xfs_refcountbt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(struct xfs_refcount_rec); + return blocklen / (sizeof(struct xfs_refcount_key) + + sizeof(xfs_refcount_ptr_t)); +} + /* * Calculate the number of records in a refcount btree block. */ @@ -405,11 +416,22 @@ xfs_refcountbt_maxrecs( bool leaf) { blocklen -= XFS_REFCOUNT_BLOCK_LEN; + return xfs_refcountbt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(struct xfs_refcount_rec); - return blocklen / (sizeof(struct xfs_refcount_key) + - sizeof(xfs_refcount_ptr_t)); +/* Compute the max possible height of the maximally sized refcount btree. */ +unsigned int +xfs_refcountbt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN; + + minrecs[0] = xfs_refcountbt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_refcountbt_block_maxrecs(blocklen, false) / 2; + + return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_CRC_AG_BLOCKS); } /* Compute the maximum height of a refcount btree. */ @@ -417,8 +439,14 @@ void xfs_refcountbt_compute_maxlevels( struct xfs_mount *mp) { + if (!xfs_has_reflink(mp)) { + mp->m_refc_maxlevels = 0; + return; + } + mp->m_refc_maxlevels = xfs_btree_compute_maxlevels( mp->m_refc_mnr, mp->m_sb.sb_agblocks); + ASSERT(mp->m_refc_maxlevels <= xfs_refcountbt_maxlevels_ondisk()); } /* Calculate the refcount btree size for some records. */ @@ -488,3 +516,22 @@ xfs_refcountbt_calc_reserves( return error; } + +int __init +xfs_refcountbt_init_cur_cache(void) +{ + xfs_refcountbt_cur_cache = kmem_cache_create("xfs_refcbt_cur", + xfs_btree_cur_sizeof(xfs_refcountbt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_refcountbt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_refcountbt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_refcountbt_cur_cache); + xfs_refcountbt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index bd9ed9e1e41f..d66b37259bed 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -65,4 +65,9 @@ extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp, void xfs_refcountbt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); +unsigned int xfs_refcountbt_maxlevels_ondisk(void); + +int __init xfs_refcountbt_init_cur_cache(void); +void xfs_refcountbt_destroy_cur_cache(void); + #endif /* __XFS_REFCOUNT_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index f45929b1b94a..cd322174dbff 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -24,6 +24,8 @@ #include "xfs_inode.h" #include "xfs_ag.h" +struct kmem_cache *xfs_rmap_intent_cache; + /* * Lookup the first record less than or equal to [bno, len, owner, offset] * in the btree given by cur. @@ -2485,7 +2487,7 @@ __xfs_rmap_add( bmap->br_blockcount, bmap->br_state); - ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_NOFS); + ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_owner = owner; @@ -2779,3 +2781,20 @@ const struct xfs_owner_info XFS_RMAP_OINFO_REFC = { const struct xfs_owner_info XFS_RMAP_OINFO_COW = { .oi_owner = XFS_RMAP_OWN_COW, }; + +int __init +xfs_rmap_intent_init_cache(void) +{ + xfs_rmap_intent_cache = kmem_cache_create("xfs_rmap_intent", + sizeof(struct xfs_rmap_intent), + 0, 0, NULL); + + return xfs_rmap_intent_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_rmap_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_rmap_intent_cache); + xfs_rmap_intent_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index fd67904ed446..b718ebeda372 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -159,8 +159,8 @@ enum xfs_rmap_intent_type { struct xfs_rmap_intent { struct list_head ri_list; enum xfs_rmap_intent_type ri_type; - uint64_t ri_owner; int ri_whichfork; + uint64_t ri_owner; struct xfs_bmbt_irec ri_bmap; }; @@ -215,4 +215,9 @@ extern const struct xfs_owner_info XFS_RMAP_OINFO_INODES; extern const struct xfs_owner_info XFS_RMAP_OINFO_REFC; extern const struct xfs_owner_info XFS_RMAP_OINFO_COW; +extern struct kmem_cache *xfs_rmap_intent_cache; + +int __init xfs_rmap_intent_init_cache(void); +void xfs_rmap_intent_destroy_cache(void); + #endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index b7dbbfb3aeed..69e104d0277f 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -22,6 +22,8 @@ #include "xfs_ag.h" #include "xfs_ag_resv.h" +static struct kmem_cache *xfs_rmapbt_cur_cache; + /* * Reverse map btree. * @@ -451,13 +453,10 @@ xfs_rmapbt_init_common( { struct xfs_btree_cur *cur; - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - cur->bc_tp = tp; - cur->bc_mp = mp; /* Overlapping btree; 2 keys per pointer. */ - cur->bc_btnum = XFS_BTNUM_RMAP; + cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP, + mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache); cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING; - cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); cur->bc_ops = &xfs_rmapbt_ops; @@ -522,6 +521,18 @@ xfs_rmapbt_commit_staged_btree( xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops); } +/* Calculate number of records in a reverse mapping btree block. */ +static inline unsigned int +xfs_rmapbt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(struct xfs_rmap_rec); + return blocklen / + (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t)); +} + /* * Calculate number of records in an rmap btree block. */ @@ -531,11 +542,33 @@ xfs_rmapbt_maxrecs( int leaf) { blocklen -= XFS_RMAP_BLOCK_LEN; + return xfs_rmapbt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(struct xfs_rmap_rec); - return blocklen / - (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t)); +/* Compute the max possible height for reverse mapping btrees. */ +unsigned int +xfs_rmapbt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN; + + minrecs[0] = xfs_rmapbt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_rmapbt_block_maxrecs(blocklen, false) / 2; + + /* + * Compute the asymptotic maxlevels for an rmapbt on any reflink fs. + * + * On a reflink filesystem, each AG block can have up to 2^32 (per the + * refcount record format) owners, which means that theoretically we + * could face up to 2^64 rmap records. However, we're likely to run + * out of blocks in the AG long before that happens, which means that + * we must compute the max height based on what the btree will look + * like if it consumes almost all the blocks in the AG due to maximal + * sharing factor. + */ + return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS); } /* Compute the maximum height of an rmap btree. */ @@ -543,26 +576,36 @@ void xfs_rmapbt_compute_maxlevels( struct xfs_mount *mp) { - /* - * On a non-reflink filesystem, the maximum number of rmap - * records is the number of blocks in the AG, hence the max - * rmapbt height is log_$maxrecs($agblocks). However, with - * reflink each AG block can have up to 2^32 (per the refcount - * record format) owners, which means that theoretically we - * could face up to 2^64 rmap records. - * - * That effectively means that the max rmapbt height must be - * XFS_BTREE_MAXLEVELS. "Fortunately" we'll run out of AG - * blocks to feed the rmapbt long before the rmapbt reaches - * maximum height. The reflink code uses ag_resv_critical to - * disallow reflinking when less than 10% of the per-AG metadata - * block reservation since the fallback is a regular file copy. - */ - if (xfs_has_reflink(mp)) - mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS; - else + if (!xfs_has_rmapbt(mp)) { + mp->m_rmap_maxlevels = 0; + return; + } + + if (xfs_has_reflink(mp)) { + /* + * Compute the asymptotic maxlevels for an rmap btree on a + * filesystem that supports reflink. + * + * On a reflink filesystem, each AG block can have up to 2^32 + * (per the refcount record format) owners, which means that + * theoretically we could face up to 2^64 rmap records. + * However, we're likely to run out of blocks in the AG long + * before that happens, which means that we must compute the + * max height based on what the btree will look like if it + * consumes almost all the blocks in the AG due to maximal + * sharing factor. + */ + mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr, + mp->m_sb.sb_agblocks); + } else { + /* + * If there's no block sharing, compute the maximum rmapbt + * height assuming one rmap record per AG block. + */ mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels( mp->m_rmap_mnr, mp->m_sb.sb_agblocks); + } + ASSERT(mp->m_rmap_maxlevels <= xfs_rmapbt_maxlevels_ondisk()); } /* Calculate the refcount btree size for some records. */ @@ -633,3 +676,22 @@ xfs_rmapbt_calc_reserves( return error; } + +int __init +xfs_rmapbt_init_cur_cache(void) +{ + xfs_rmapbt_cur_cache = kmem_cache_create("xfs_rmapbt_cur", + xfs_btree_cur_sizeof(xfs_rmapbt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_rmapbt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_rmapbt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_rmapbt_cur_cache); + xfs_rmapbt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index f2eee6572af4..3244715dd111 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -59,4 +59,9 @@ extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp, extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used); +unsigned int xfs_rmapbt_maxlevels_ondisk(void); + +int __init xfs_rmapbt_init_cur_cache(void); +void xfs_rmapbt_destroy_cur_cache(void); + #endif /* __XFS_RMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index e58349be78bd..f4e84aa1d50a 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -495,7 +495,7 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp) static void __xfs_sb_from_disk( struct xfs_sb *to, - xfs_dsb_t *from, + struct xfs_dsb *from, bool convert_xquota) { to->sb_magicnum = be32_to_cpu(from->sb_magicnum); @@ -571,7 +571,7 @@ __xfs_sb_from_disk( void xfs_sb_from_disk( struct xfs_sb *to, - xfs_dsb_t *from) + struct xfs_dsb *from) { __xfs_sb_from_disk(to, from, true); } diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 5e300daa2559..6f83d9b306ee 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -70,7 +70,7 @@ xfs_allocfree_log_count( { uint blocks; - blocks = num_ops * 2 * (2 * mp->m_ag_maxlevels - 1); + blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1); if (xfs_has_rmapbt(mp)) blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1); if (xfs_has_reflink(mp)) @@ -814,6 +814,19 @@ xfs_trans_resv_calc( struct xfs_mount *mp, struct xfs_trans_resv *resp) { + unsigned int rmap_maxlevels = mp->m_rmap_maxlevels; + + /* + * In the early days of rmap+reflink, we always set the rmap maxlevels + * to 9 even if the AG was small enough that it would never grow to + * that height. Transaction reservation sizes influence the minimum + * log size calculation, which influences the size of the log that mkfs + * creates. Use the old value here to ensure that newly formatted + * small filesystems will mount on older kernels. + */ + if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp)) + mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS; + /* * The following transactions are logged in physical format and * require a permanent reservation on space. @@ -916,4 +929,7 @@ xfs_trans_resv_calc( resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp); resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp); resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp); + + /* Put everything back the way it was. This goes at the end. */ + mp->m_rmap_maxlevels = rmap_maxlevels; } diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 50332be34388..87b31c69a773 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -17,6 +17,13 @@ /* Adding one rmap could split every level up to the top of the tree. */ #define XFS_RMAPADD_SPACE_RES(mp) ((mp)->m_rmap_maxlevels) +/* + * Note that we historically set m_rmap_maxlevels to 9 when reflink is enabled, + * so we must preserve this behavior to avoid changing the transaction space + * reservations and minimum log size calculations for existing filesystems. + */ +#define XFS_OLD_REFLINK_RMAP_MAXLEVELS 9 + /* Blocks we might need to add "b" rmaps to a tree. */ #define XFS_NRMAPADD_SPACE_RES(mp, b)\ (((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \ @@ -74,7 +81,7 @@ #define XFS_DIOSTRAT_SPACE_RES(mp, v) \ (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v)) #define XFS_GROWFS_SPACE_RES(mp) \ - (2 * (mp)->m_ag_maxlevels) + (2 * (mp)->m_alloc_maxlevels) #define XFS_GROWFSRT_SPACE_RES(mp,b) \ ((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK)) #define XFS_LINK_SPACE_RES(mp,nl) \ diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index ae3c9f6e2c69..bed798792226 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -555,11 +555,11 @@ xchk_agf( xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > mp->m_alloc_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > mp->m_alloc_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agf_bp); if (xfs_has_rmapbt(mp)) { @@ -568,7 +568,7 @@ xchk_agf( xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > mp->m_rmap_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agf_bp); } @@ -578,7 +578,7 @@ xchk_agf( xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_refcount_level); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > mp->m_refc_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agf_bp); } @@ -850,6 +850,7 @@ xchk_agi( struct xfs_mount *mp = sc->mp; struct xfs_agi *agi; struct xfs_perag *pag; + struct xfs_ino_geometry *igeo = M_IGEO(sc->mp); xfs_agnumber_t agno = sc->sm->sm_agno; xfs_agblock_t agbno; xfs_agblock_t eoag; @@ -880,7 +881,7 @@ xchk_agi( xchk_block_set_corrupt(sc, sc->sa.agi_bp); level = be32_to_cpu(agi->agi_level); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > igeo->inobt_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agi_bp); if (xfs_has_finobt(mp)) { @@ -889,7 +890,7 @@ xchk_agi( xchk_block_set_corrupt(sc, sc->sa.agi_bp); level = be32_to_cpu(agi->agi_free_level); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > igeo->inobt_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agi_bp); } diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 0f8deee66f15..d7bfed52f4cd 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -122,7 +122,7 @@ xrep_check_btree_root( xfs_agnumber_t agno = sc->sm->sm_agno; return xfs_verify_agbno(mp, agno, fab->root) && - fab->height <= XFS_BTREE_MAXLEVELS; + fab->height <= fab->maxlevels; } /* @@ -339,18 +339,22 @@ xrep_agf( [XREP_AGF_BNOBT] = { .rmap_owner = XFS_RMAP_OWN_AG, .buf_ops = &xfs_bnobt_buf_ops, + .maxlevels = sc->mp->m_alloc_maxlevels, }, [XREP_AGF_CNTBT] = { .rmap_owner = XFS_RMAP_OWN_AG, .buf_ops = &xfs_cntbt_buf_ops, + .maxlevels = sc->mp->m_alloc_maxlevels, }, [XREP_AGF_RMAPBT] = { .rmap_owner = XFS_RMAP_OWN_AG, .buf_ops = &xfs_rmapbt_buf_ops, + .maxlevels = sc->mp->m_rmap_maxlevels, }, [XREP_AGF_REFCOUNTBT] = { .rmap_owner = XFS_RMAP_OWN_REFC, .buf_ops = &xfs_refcountbt_buf_ops, + .maxlevels = sc->mp->m_refc_maxlevels, }, [XREP_AGF_END] = { .buf_ops = NULL, @@ -881,10 +885,12 @@ xrep_agi( [XREP_AGI_INOBT] = { .rmap_owner = XFS_RMAP_OWN_INOBT, .buf_ops = &xfs_inobt_buf_ops, + .maxlevels = M_IGEO(sc->mp)->inobt_maxlevels, }, [XREP_AGI_FINOBT] = { .rmap_owner = XFS_RMAP_OWN_INOBT, .buf_ops = &xfs_finobt_buf_ops, + .maxlevels = M_IGEO(sc->mp)->inobt_maxlevels, }, [XREP_AGI_END] = { .buf_ops = NULL diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index d6d24c866bc4..b89bf9de9b1c 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -222,21 +222,21 @@ out: * 1 2 3 * * Pretend for this example that each leaf block has 100 btree records. For - * the first btree record, we'll observe that bc_ptrs[0] == 1, so we record - * that we saw block 1. Then we observe that bc_ptrs[1] == 1, so we record - * block 4. The list is [1, 4]. + * the first btree record, we'll observe that bc_levels[0].ptr == 1, so we + * record that we saw block 1. Then we observe that bc_levels[1].ptr == 1, so + * we record block 4. The list is [1, 4]. * - * For the second btree record, we see that bc_ptrs[0] == 2, so we exit the - * loop. The list remains [1, 4]. + * For the second btree record, we see that bc_levels[0].ptr == 2, so we exit + * the loop. The list remains [1, 4]. * * For the 101st btree record, we've moved onto leaf block 2. Now - * bc_ptrs[0] == 1 again, so we record that we saw block 2. We see that - * bc_ptrs[1] == 2, so we exit the loop. The list is now [1, 4, 2]. + * bc_levels[0].ptr == 1 again, so we record that we saw block 2. We see that + * bc_levels[1].ptr == 2, so we exit the loop. The list is now [1, 4, 2]. * - * For the 102nd record, bc_ptrs[0] == 2, so we continue. + * For the 102nd record, bc_levels[0].ptr == 2, so we continue. * - * For the 201st record, we've moved on to leaf block 3. bc_ptrs[0] == 1, so - * we add 3 to the list. Now it is [1, 4, 2, 3]. + * For the 201st record, we've moved on to leaf block 3. + * bc_levels[0].ptr == 1, so we add 3 to the list. Now it is [1, 4, 2, 3]. * * For the 300th record we just exit, with the list being [1, 4, 2, 3]. */ @@ -256,7 +256,7 @@ xbitmap_set_btcur_path( int i; int error; - for (i = 0; i < cur->bc_nlevels && cur->bc_ptrs[i] == 1; i++) { + for (i = 0; i < cur->bc_nlevels && cur->bc_levels[i].ptr == 1; i++) { xfs_btree_get_block(cur, i, &bp); if (!bp) continue; diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 017da9ceaee9..a4cbbc346f60 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -402,7 +402,7 @@ xchk_bmapbt_rec( * the root since the verifiers don't do that. */ if (xfs_has_crc(bs->cur->bc_mp) && - bs->cur->bc_ptrs[0] == 1) { + bs->cur->bc_levels[0].ptr == 1) { for (i = 0; i < bs->cur->bc_nlevels - 1; i++) { block = xfs_btree_get_block(bs->cur, i, &bp); owner = be64_to_cpu(block->bb_u.l.bb_owner); diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index eccb855dc904..39dd46f038fe 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -136,14 +136,14 @@ xchk_btree_rec( struct xfs_buf *bp; block = xfs_btree_get_block(cur, 0, &bp); - rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); + rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block); trace_xchk_btree_rec(bs->sc, cur, 0); /* If this isn't the first record, are they in order? */ - if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec)) + if (cur->bc_levels[0].ptr > 1 && + !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec)) xchk_btree_set_corrupt(bs->sc, cur, 0); - bs->firstrec = false; memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len); if (cur->bc_nlevels == 1) @@ -152,7 +152,7 @@ xchk_btree_rec( /* Is this at least as large as the parent low key? */ cur->bc_ops->init_key_from_rec(&key, rec); keyblock = xfs_btree_get_block(cur, 1, &bp); - keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock); + keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock); if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0) xchk_btree_set_corrupt(bs->sc, cur, 1); @@ -161,7 +161,7 @@ xchk_btree_rec( /* Is this no larger than the parent high key? */ cur->bc_ops->init_high_key_from_rec(&hkey, rec); - keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock); + keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock); if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0) xchk_btree_set_corrupt(bs->sc, cur, 1); } @@ -183,23 +183,22 @@ xchk_btree_key( struct xfs_buf *bp; block = xfs_btree_get_block(cur, level, &bp); - key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); + key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block); trace_xchk_btree_key(bs->sc, cur, level); /* If this isn't the first key, are they in order? */ - if (!bs->firstkey[level] && - !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key)) + if (cur->bc_levels[level].ptr > 1 && + !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1], key)) xchk_btree_set_corrupt(bs->sc, cur, level); - bs->firstkey[level] = false; - memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len); + memcpy(&bs->lastkey[level - 1], key, cur->bc_ops->key_len); if (level + 1 >= cur->bc_nlevels) return; /* Is this at least as large as the parent low key? */ keyblock = xfs_btree_get_block(cur, level + 1, &bp); - keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); + keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock); if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0) xchk_btree_set_corrupt(bs->sc, cur, level); @@ -207,8 +206,9 @@ xchk_btree_key( return; /* Is this no larger than the parent high key? */ - key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); - keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); + key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block); + keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr, + keyblock); if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0) xchk_btree_set_corrupt(bs->sc, cur, level); } @@ -291,7 +291,7 @@ xchk_btree_block_check_sibling( /* Compare upper level pointer to sibling pointer. */ pblock = xfs_btree_get_block(ncur, level + 1, &pbp); - pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock); + pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock); if (!xchk_btree_ptr_ok(bs, level + 1, pp)) goto out; if (pbp) @@ -596,7 +596,7 @@ xchk_btree_block_keys( /* Obtain the parent's copy of the keys for this block. */ parent_block = xfs_btree_get_block(cur, level + 1, &bp); - parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], + parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, parent_block); if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0) @@ -607,7 +607,7 @@ xchk_btree_block_keys( /* Get high keys */ high_bk = xfs_btree_high_key_from_key(cur, &block_keys); - high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], + high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr, parent_block); if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0) @@ -627,35 +627,39 @@ xchk_btree( const struct xfs_owner_info *oinfo, void *private) { - struct xchk_btree bs = { - .cur = cur, - .scrub_rec = scrub_fn, - .oinfo = oinfo, - .firstrec = true, - .private = private, - .sc = sc, - }; union xfs_btree_ptr ptr; + struct xchk_btree *bs; union xfs_btree_ptr *pp; union xfs_btree_rec *recp; struct xfs_btree_block *block; - int level; struct xfs_buf *bp; struct check_owner *co; struct check_owner *n; - int i; + size_t cur_sz; + int level; int error = 0; - /* Initialize scrub state */ - for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) - bs.firstkey[i] = true; - INIT_LIST_HEAD(&bs.to_check); - - /* Don't try to check a tree with a height we can't handle. */ - if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) { + /* + * Allocate the btree scrub context from the heap, because this + * structure can get rather large. Don't let a caller feed us a + * totally absurd size. + */ + cur_sz = xchk_btree_sizeof(cur->bc_nlevels); + if (cur_sz > PAGE_SIZE) { xchk_btree_set_corrupt(sc, cur, 0); - goto out; + return 0; } + bs = kmem_zalloc(cur_sz, KM_NOFS | KM_MAYFAIL); + if (!bs) + return -ENOMEM; + bs->cur = cur; + bs->scrub_rec = scrub_fn; + bs->oinfo = oinfo; + bs->private = private; + bs->sc = sc; + + /* Initialize scrub state */ + INIT_LIST_HEAD(&bs->to_check); /* * Load the root of the btree. The helper function absorbs @@ -663,79 +667,82 @@ xchk_btree( */ level = cur->bc_nlevels - 1; cur->bc_ops->init_ptr_from_cur(cur, &ptr); - if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr)) + if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr)) goto out; - error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp); + error = xchk_btree_get_block(bs, level, &ptr, &block, &bp); if (error || !block) goto out; - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; while (level < cur->bc_nlevels) { block = xfs_btree_get_block(cur, level, &bp); if (level == 0) { /* End of leaf, pop back towards the root. */ - if (cur->bc_ptrs[level] > + if (cur->bc_levels[level].ptr > be16_to_cpu(block->bb_numrecs)) { - xchk_btree_block_keys(&bs, level, block); + xchk_btree_block_keys(bs, level, block); if (level < cur->bc_nlevels - 1) - cur->bc_ptrs[level + 1]++; + cur->bc_levels[level + 1].ptr++; level++; continue; } /* Records in order for scrub? */ - xchk_btree_rec(&bs); + xchk_btree_rec(bs); /* Call out to the record checker. */ - recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); - error = bs.scrub_rec(&bs, recp); + recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, + block); + error = bs->scrub_rec(bs, recp); if (error) break; if (xchk_should_terminate(sc, &error) || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) break; - cur->bc_ptrs[level]++; + cur->bc_levels[level].ptr++; continue; } /* End of node, pop back towards the root. */ - if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { - xchk_btree_block_keys(&bs, level, block); + if (cur->bc_levels[level].ptr > + be16_to_cpu(block->bb_numrecs)) { + xchk_btree_block_keys(bs, level, block); if (level < cur->bc_nlevels - 1) - cur->bc_ptrs[level + 1]++; + cur->bc_levels[level + 1].ptr++; level++; continue; } /* Keys in order for scrub? */ - xchk_btree_key(&bs, level); + xchk_btree_key(bs, level); /* Drill another level deeper. */ - pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); - if (!xchk_btree_ptr_ok(&bs, level, pp)) { - cur->bc_ptrs[level]++; + pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block); + if (!xchk_btree_ptr_ok(bs, level, pp)) { + cur->bc_levels[level].ptr++; continue; } level--; - error = xchk_btree_get_block(&bs, level, pp, &block, &bp); + error = xchk_btree_get_block(bs, level, pp, &block, &bp); if (error || !block) goto out; - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; } out: /* Process deferred owner checks on btree blocks. */ - list_for_each_entry_safe(co, n, &bs.to_check, list) { - if (!error && bs.cur) - error = xchk_btree_check_block_owner(&bs, - co->level, co->daddr); + list_for_each_entry_safe(co, n, &bs->to_check, list) { + if (!error && bs->cur) + error = xchk_btree_check_block_owner(bs, co->level, + co->daddr); list_del(&co->list); kmem_free(co); } + kmem_free(bs); return error; } diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h index b7d2fc01fbf9..da61a53a0b61 100644 --- a/fs/xfs/scrub/btree.h +++ b/fs/xfs/scrub/btree.h @@ -39,11 +39,22 @@ struct xchk_btree { /* internal scrub state */ union xfs_btree_rec lastrec; - bool firstrec; - union xfs_btree_key lastkey[XFS_BTREE_MAXLEVELS]; - bool firstkey[XFS_BTREE_MAXLEVELS]; struct list_head to_check; + + /* this element must come last! */ + union xfs_btree_key lastkey[]; }; + +/* + * Calculate the size of a xchk_btree structure. There are nlevels-1 slots for + * keys because we track leaf records separately in lastrec. + */ +static inline size_t +xchk_btree_sizeof(unsigned int nlevels) +{ + return struct_size((struct xchk_btree *)NULL, lastkey, nlevels - 1); +} + int xchk_btree(struct xfs_scrub *sc, struct xfs_btree_cur *cur, xchk_btree_rec_fn scrub_fn, const struct xfs_owner_info *oinfo, void *private); diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 8a52514bc1ff..b962cfbbd92b 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -473,7 +473,7 @@ xchk_da_btree( xchk_da_btree_rec_fn scrub_fn, void *private) { - struct xchk_da_btree ds = {}; + struct xchk_da_btree *ds; struct xfs_mount *mp = sc->mp; struct xfs_da_state_blk *blks; struct xfs_da_node_entry *key; @@ -486,32 +486,35 @@ xchk_da_btree( return 0; /* Set up initial da state. */ - ds.dargs.dp = sc->ip; - ds.dargs.whichfork = whichfork; - ds.dargs.trans = sc->tp; - ds.dargs.op_flags = XFS_DA_OP_OKNOENT; - ds.state = xfs_da_state_alloc(&ds.dargs); - ds.sc = sc; - ds.private = private; + ds = kmem_zalloc(sizeof(struct xchk_da_btree), KM_NOFS | KM_MAYFAIL); + if (!ds) + return -ENOMEM; + ds->dargs.dp = sc->ip; + ds->dargs.whichfork = whichfork; + ds->dargs.trans = sc->tp; + ds->dargs.op_flags = XFS_DA_OP_OKNOENT; + ds->state = xfs_da_state_alloc(&ds->dargs); + ds->sc = sc; + ds->private = private; if (whichfork == XFS_ATTR_FORK) { - ds.dargs.geo = mp->m_attr_geo; - ds.lowest = 0; - ds.highest = 0; + ds->dargs.geo = mp->m_attr_geo; + ds->lowest = 0; + ds->highest = 0; } else { - ds.dargs.geo = mp->m_dir_geo; - ds.lowest = ds.dargs.geo->leafblk; - ds.highest = ds.dargs.geo->freeblk; + ds->dargs.geo = mp->m_dir_geo; + ds->lowest = ds->dargs.geo->leafblk; + ds->highest = ds->dargs.geo->freeblk; } - blkno = ds.lowest; + blkno = ds->lowest; level = 0; /* Find the root of the da tree, if present. */ - blks = ds.state->path.blk; - error = xchk_da_btree_block(&ds, level, blkno); + blks = ds->state->path.blk; + error = xchk_da_btree_block(ds, level, blkno); if (error) goto out_state; /* - * We didn't find a block at ds.lowest, which means that there's + * We didn't find a block at ds->lowest, which means that there's * no LEAF1/LEAFN tree (at least not where it's supposed to be), * so jump out now. */ @@ -523,16 +526,16 @@ xchk_da_btree( /* Handle leaf block. */ if (blks[level].magic != XFS_DA_NODE_MAGIC) { /* End of leaf, pop back towards the root. */ - if (blks[level].index >= ds.maxrecs[level]) { + if (blks[level].index >= ds->maxrecs[level]) { if (level > 0) blks[level - 1].index++; - ds.tree_level++; + ds->tree_level++; level--; continue; } /* Dispatch record scrubbing. */ - error = scrub_fn(&ds, level); + error = scrub_fn(ds, level); if (error) break; if (xchk_should_terminate(sc, &error) || @@ -545,17 +548,17 @@ xchk_da_btree( /* End of node, pop back towards the root. */ - if (blks[level].index >= ds.maxrecs[level]) { + if (blks[level].index >= ds->maxrecs[level]) { if (level > 0) blks[level - 1].index++; - ds.tree_level++; + ds->tree_level++; level--; continue; } /* Hashes in order for scrub? */ - key = xchk_da_btree_node_entry(&ds, level); - error = xchk_da_btree_hash(&ds, level, &key->hashval); + key = xchk_da_btree_node_entry(ds, level); + error = xchk_da_btree_hash(ds, level, &key->hashval); if (error) goto out; @@ -564,11 +567,11 @@ xchk_da_btree( level++; if (level >= XFS_DA_NODE_MAXDEPTH) { /* Too deep! */ - xchk_da_set_corrupt(&ds, level - 1); + xchk_da_set_corrupt(ds, level - 1); break; } - ds.tree_level--; - error = xchk_da_btree_block(&ds, level, blkno); + ds->tree_level--; + error = xchk_da_btree_block(ds, level, blkno); if (error) goto out; if (blks[level].bp == NULL) @@ -587,6 +590,7 @@ out: } out_state: - xfs_da_state_free(ds.state); + xfs_da_state_free(ds->state); + kmem_free(ds); return error; } diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 3bb152d52a07..840f74ec431c 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -44,6 +44,9 @@ struct xrep_find_ag_btree { /* in: buffer ops */ const struct xfs_buf_ops *buf_ops; + /* in: maximum btree height */ + unsigned int maxlevels; + /* out: the highest btree block found and the tree height */ xfs_agblock_t root; unsigned int height; diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 51e4c61916d2..8d528d35b725 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -461,15 +461,10 @@ xfs_scrub_metadata( struct file *file, struct xfs_scrub_metadata *sm) { - struct xfs_scrub sc = { - .file = file, - .sm = sm, - }; + struct xfs_scrub *sc; struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount; int error = 0; - sc.mp = mp; - BUILD_BUG_ON(sizeof(meta_scrub_ops) != (sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR)); @@ -489,59 +484,68 @@ xfs_scrub_metadata( xchk_experimental_warning(mp); - sc.ops = &meta_scrub_ops[sm->sm_type]; - sc.sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type); + sc = kmem_zalloc(sizeof(struct xfs_scrub), KM_NOFS | KM_MAYFAIL); + if (!sc) { + error = -ENOMEM; + goto out; + } + + sc->mp = mp; + sc->file = file; + sc->sm = sm; + sc->ops = &meta_scrub_ops[sm->sm_type]; + sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type); retry_op: /* * When repairs are allowed, prevent freezing or readonly remount while * scrub is running with a real transaction. */ if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) { - error = mnt_want_write_file(sc.file); + error = mnt_want_write_file(sc->file); if (error) - goto out; + goto out_sc; } /* Set up for the operation. */ - error = sc.ops->setup(&sc); + error = sc->ops->setup(sc); if (error) goto out_teardown; /* Scrub for errors. */ - error = sc.ops->scrub(&sc); - if (!(sc.flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) { + error = sc->ops->scrub(sc); + if (!(sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) { /* * Scrubbers return -EDEADLOCK to mean 'try harder'. * Tear down everything we hold, then set up again with * preparation for worst-case scenarios. */ - error = xchk_teardown(&sc, 0); + error = xchk_teardown(sc, 0); if (error) - goto out; - sc.flags |= XCHK_TRY_HARDER; + goto out_sc; + sc->flags |= XCHK_TRY_HARDER; goto retry_op; } else if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)) goto out_teardown; - xchk_update_health(&sc); + xchk_update_health(sc); - if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && - !(sc.flags & XREP_ALREADY_FIXED)) { + if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && + !(sc->flags & XREP_ALREADY_FIXED)) { bool needs_fix; /* Let debug users force us into the repair routines. */ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) - sc.sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; - needs_fix = (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | - XFS_SCRUB_OFLAG_XCORRUPT | - XFS_SCRUB_OFLAG_PREEN)); + needs_fix = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | + XFS_SCRUB_OFLAG_XCORRUPT | + XFS_SCRUB_OFLAG_PREEN)); /* * If userspace asked for a repair but it wasn't necessary, * report that back to userspace. */ if (!needs_fix) { - sc.sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED; + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED; goto out_nofix; } @@ -549,26 +553,28 @@ retry_op: * If it's broken, userspace wants us to fix it, and we haven't * already tried to fix it, then attempt a repair. */ - error = xrep_attempt(&sc); + error = xrep_attempt(sc); if (error == -EAGAIN) { /* * Either the repair function succeeded or it couldn't * get all the resources it needs; either way, we go * back to the beginning and call the scrub function. */ - error = xchk_teardown(&sc, 0); + error = xchk_teardown(sc, 0); if (error) { xrep_failure(mp); - goto out; + goto out_sc; } goto retry_op; } } out_nofix: - xchk_postmortem(&sc); + xchk_postmortem(sc); out_teardown: - error = xchk_teardown(&sc, error); + error = xchk_teardown(sc, error); +out_sc: + kmem_free(sc); out: trace_xchk_done(XFS_I(file_inode(file)), sm, error); if (error == -EFSCORRUPTED || error == -EFSBADCRC) { diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index c0ef53fe6611..b5f94676c37c 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -21,13 +21,14 @@ xchk_btree_cur_fsbno( struct xfs_btree_cur *cur, int level) { - if (level < cur->bc_nlevels && cur->bc_bufs[level]) + if (level < cur->bc_nlevels && cur->bc_levels[level].bp) return XFS_DADDR_TO_FSB(cur->bc_mp, - xfs_buf_daddr(cur->bc_bufs[level])); - if (level == cur->bc_nlevels - 1 && cur->bc_flags & XFS_BTREE_LONG_PTRS) + xfs_buf_daddr(cur->bc_levels[level].bp)); + + if (level == cur->bc_nlevels - 1 && + (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)) return XFS_INO_TO_FSB(cur->bc_mp, cur->bc_ino.ip->i_ino); - if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) - return XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, 0); + return NULLFSBLOCK; } diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index a7bbb84f91a7..93ece6df02e3 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -348,7 +348,7 @@ TRACE_EVENT(xchk_btree_op_error, __entry->level = level; __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->error = error; __entry->ret_ip = ret_ip; ), @@ -389,7 +389,7 @@ TRACE_EVENT(xchk_ifork_btree_op_error, __entry->type = sc->sm->sm_type; __entry->btnum = cur->bc_btnum; __entry->level = level; - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); __entry->error = error; @@ -431,7 +431,7 @@ TRACE_EVENT(xchk_btree_error, __entry->level = level; __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->ret_ip = ret_ip; ), TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x ret_ip %pS", @@ -471,7 +471,7 @@ TRACE_EVENT(xchk_ifork_btree_error, __entry->level = level; __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->ret_ip = ret_ip; ), TP_printk("dev %d:%d ino 0x%llx fork %s type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x ret_ip %pS", @@ -511,7 +511,7 @@ DECLARE_EVENT_CLASS(xchk_sbtree_class, __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); __entry->level = level; __entry->nlevels = cur->bc_nlevels; - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; ), TP_printk("dev %d:%d type %s btree %s agno 0x%x agbno 0x%x level %d nlevels %d ptr %d", MAJOR(__entry->dev), MINOR(__entry->dev), diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 34fc6148032a..c8c15c3c3147 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -82,6 +82,7 @@ xfs_end_ioend( struct iomap_ioend *ioend) { struct xfs_inode *ip = XFS_I(ioend->io_inode); + struct xfs_mount *mp = ip->i_mount; xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; unsigned int nofs_flag; @@ -97,18 +98,26 @@ xfs_end_ioend( /* * Just clean up the in-memory structures if the fs has been shut down. */ - if (xfs_is_shutdown(ip->i_mount)) { + if (xfs_is_shutdown(mp)) { error = -EIO; goto done; } /* - * Clean up any COW blocks on an I/O error. + * Clean up all COW blocks and underlying data fork delalloc blocks on + * I/O error. The delalloc punch is required because this ioend was + * mapped to blocks in the COW fork and the associated pages are no + * longer dirty. If we don't remove delalloc blocks here, they become + * stale and can corrupt free space accounting on unmount. */ error = blk_status_to_errno(ioend->io_bio->bi_status); if (unlikely(error)) { - if (ioend->io_flags & IOMAP_F_SHARED) + if (ioend->io_flags & IOMAP_F_SHARED) { xfs_reflink_cancel_cow_range(ip, offset, size, true); + xfs_bmap_punch_delalloc_range(ip, + XFS_B_TO_FSBT(mp, offset), + XFS_B_TO_FSB(mp, size)); + } goto done; } diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 2b5da6218977..27265771f247 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -390,7 +390,7 @@ out_destroy_fork: /* kill the in-core attr fork before we drop the inode lock */ if (dp->i_afp) { xfs_idestroy_fork(dp->i_afp); - kmem_cache_free(xfs_ifork_zone, dp->i_afp); + kmem_cache_free(xfs_ifork_cache, dp->i_afp); dp->i_afp = NULL; } if (lock_mode) diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 03159970133f..e1f4d7d5a011 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -25,8 +25,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_bui_zone; -kmem_zone_t *xfs_bud_zone; +struct kmem_cache *xfs_bui_cache; +struct kmem_cache *xfs_bud_cache; static const struct xfs_item_ops xfs_bui_item_ops; @@ -39,7 +39,7 @@ STATIC void xfs_bui_item_free( struct xfs_bui_log_item *buip) { - kmem_cache_free(xfs_bui_zone, buip); + kmem_cache_free(xfs_bui_cache, buip); } /* @@ -138,7 +138,7 @@ xfs_bui_init( { struct xfs_bui_log_item *buip; - buip = kmem_cache_zalloc(xfs_bui_zone, GFP_KERNEL | __GFP_NOFAIL); + buip = kmem_cache_zalloc(xfs_bui_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &buip->bui_item, XFS_LI_BUI, &xfs_bui_item_ops); buip->bui_format.bui_nextents = XFS_BUI_MAX_FAST_EXTENTS; @@ -198,7 +198,7 @@ xfs_bud_item_release( struct xfs_bud_log_item *budp = BUD_ITEM(lip); xfs_bui_release(budp->bud_buip); - kmem_cache_free(xfs_bud_zone, budp); + kmem_cache_free(xfs_bud_cache, budp); } static const struct xfs_item_ops xfs_bud_item_ops = { @@ -215,7 +215,7 @@ xfs_trans_get_bud( { struct xfs_bud_log_item *budp; - budp = kmem_cache_zalloc(xfs_bud_zone, GFP_KERNEL | __GFP_NOFAIL); + budp = kmem_cache_zalloc(xfs_bud_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops); budp->bud_buip = buip; @@ -384,7 +384,7 @@ xfs_bmap_update_finish_item( bmap->bi_bmap.br_blockcount = count; return -EAGAIN; } - kmem_free(bmap); + kmem_cache_free(xfs_bmap_intent_cache, bmap); return error; } @@ -404,7 +404,7 @@ xfs_bmap_update_cancel_item( struct xfs_bmap_intent *bmap; bmap = container_of(item, struct xfs_bmap_intent, bi_list); - kmem_free(bmap); + kmem_cache_free(xfs_bmap_intent_cache, bmap); } const struct xfs_defer_op_type xfs_bmap_update_defer_type = { @@ -532,7 +532,7 @@ xfs_bui_item_recover( * Commit transaction, which frees the transaction and saves the inode * for later replay activities. */ - error = xfs_defer_ops_capture_and_commit(tp, ip, capture_list); + error = xfs_defer_ops_capture_and_commit(tp, capture_list); if (error) goto err_unlock; diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h index b9be62f8bd52..3fafd3881a0b 100644 --- a/fs/xfs/xfs_bmap_item.h +++ b/fs/xfs/xfs_bmap_item.h @@ -25,7 +25,7 @@ /* kernel only BUI/BUD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -65,7 +65,7 @@ struct xfs_bud_log_item { struct xfs_bud_log_format bud_format; }; -extern struct kmem_zone *xfs_bui_zone; -extern struct kmem_zone *xfs_bud_zone; +extern struct kmem_cache *xfs_bui_cache; +extern struct kmem_cache *xfs_bud_cache; #endif /* __XFS_BMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 5fa6cd947dd4..631c5a61d89b 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -20,7 +20,7 @@ #include "xfs_error.h" #include "xfs_ag.h" -static kmem_zone_t *xfs_buf_zone; +static struct kmem_cache *xfs_buf_cache; /* * Locking orders @@ -220,7 +220,7 @@ _xfs_buf_alloc( int i; *bpp = NULL; - bp = kmem_cache_zalloc(xfs_buf_zone, GFP_NOFS | __GFP_NOFAIL); + bp = kmem_cache_zalloc(xfs_buf_cache, GFP_NOFS | __GFP_NOFAIL); /* * We don't want certain flags to appear in b_flags unless they are @@ -247,7 +247,7 @@ _xfs_buf_alloc( */ error = xfs_buf_get_maps(bp, nmaps); if (error) { - kmem_cache_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_cache, bp); return error; } @@ -307,7 +307,7 @@ xfs_buf_free( kmem_free(bp->b_addr); xfs_buf_free_maps(bp); - kmem_cache_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_cache, bp); } static int @@ -2258,12 +2258,12 @@ xfs_buf_delwri_pushbuf( int __init xfs_buf_init(void) { - xfs_buf_zone = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, + xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); - if (!xfs_buf_zone) + if (!xfs_buf_cache) goto out; return 0; @@ -2275,7 +2275,7 @@ xfs_buf_init(void) void xfs_buf_terminate(void) { - kmem_cache_destroy(xfs_buf_zone); + kmem_cache_destroy(xfs_buf_cache); } void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index b1ab100c09e1..a7a8e4528881 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -23,7 +23,7 @@ #include "xfs_log.h" -kmem_zone_t *xfs_buf_item_zone; +struct kmem_cache *xfs_buf_item_cache; static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) { @@ -804,7 +804,7 @@ xfs_buf_item_init( return 0; } - bip = kmem_cache_zalloc(xfs_buf_item_zone, GFP_KERNEL | __GFP_NOFAIL); + bip = kmem_cache_zalloc(xfs_buf_item_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); bip->bli_buf = bp; @@ -825,7 +825,7 @@ xfs_buf_item_init( map_size = DIV_ROUND_UP(chunks, NBWORD); if (map_size > XFS_BLF_DATAMAP_SIZE) { - kmem_cache_free(xfs_buf_item_zone, bip); + kmem_cache_free(xfs_buf_item_cache, bip); xfs_err(mp, "buffer item dirty bitmap (%u uints) too small to reflect %u bytes!", map_size, @@ -1002,7 +1002,7 @@ xfs_buf_item_free( { xfs_buf_item_free_format(bip); kmem_free(bip->bli_item.li_lv_shadow); - kmem_cache_free(xfs_buf_item_zone, bip); + kmem_cache_free(xfs_buf_item_cache, bip); } /* diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 50aa0f5ef959..e11e9ef2338f 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -71,6 +71,6 @@ static inline void xfs_buf_dquot_io_fail(struct xfs_buf *bp) void xfs_buf_iodone(struct xfs_buf *); bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec); -extern kmem_zone_t *xfs_buf_item_zone; +extern struct kmem_cache *xfs_buf_item_cache; #endif /* __XFS_BUF_ITEM_H__ */ diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index a476c7ef5d53..70ca5751b13e 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -603,7 +603,7 @@ xlog_recover_do_inode_buffer( inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog; for (i = 0; i < inodes_per_buf; i++) { next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + - offsetof(xfs_dinode_t, di_next_unlinked); + offsetof(struct xfs_dinode, di_next_unlinked); while (next_unlinked_offset >= (reg_buf_offset + reg_buf_bytes)) { diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index c15d61d47a06..e48ae227bb11 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -38,8 +38,8 @@ * otherwise by the lowest id first, see xfs_dqlock2. */ -struct kmem_zone *xfs_qm_dqtrxzone; -static struct kmem_zone *xfs_qm_dqzone; +struct kmem_cache *xfs_dqtrx_cache; +static struct kmem_cache *xfs_dquot_cache; static struct lock_class_key xfs_dquot_group_class; static struct lock_class_key xfs_dquot_project_class; @@ -57,7 +57,7 @@ xfs_qm_dqdestroy( mutex_destroy(&dqp->q_qlock); XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); - kmem_cache_free(xfs_qm_dqzone, dqp); + kmem_cache_free(xfs_dquot_cache, dqp); } /* @@ -458,7 +458,7 @@ xfs_dquot_alloc( { struct xfs_dquot *dqp; - dqp = kmem_cache_zalloc(xfs_qm_dqzone, GFP_KERNEL | __GFP_NOFAIL); + dqp = kmem_cache_zalloc(xfs_dquot_cache, GFP_KERNEL | __GFP_NOFAIL); dqp->q_type = type; dqp->q_id = id; @@ -471,7 +471,7 @@ xfs_dquot_alloc( * Offset of dquot in the (fixed sized) dquot chunk. */ dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * - sizeof(xfs_dqblk_t); + sizeof(struct xfs_dqblk); /* * Because we want to use a counting completion, complete @@ -1363,22 +1363,22 @@ xfs_dqlock2( int __init xfs_qm_init(void) { - xfs_qm_dqzone = kmem_cache_create("xfs_dquot", + xfs_dquot_cache = kmem_cache_create("xfs_dquot", sizeof(struct xfs_dquot), 0, 0, NULL); - if (!xfs_qm_dqzone) + if (!xfs_dquot_cache) goto out; - xfs_qm_dqtrxzone = kmem_cache_create("xfs_dqtrx", + xfs_dqtrx_cache = kmem_cache_create("xfs_dqtrx", sizeof(struct xfs_dquot_acct), 0, 0, NULL); - if (!xfs_qm_dqtrxzone) - goto out_free_dqzone; + if (!xfs_dqtrx_cache) + goto out_free_dquot_cache; return 0; -out_free_dqzone: - kmem_cache_destroy(xfs_qm_dqzone); +out_free_dquot_cache: + kmem_cache_destroy(xfs_dquot_cache); out: return -ENOMEM; } @@ -1386,8 +1386,8 @@ out: void xfs_qm_exit(void) { - kmem_cache_destroy(xfs_qm_dqtrxzone); - kmem_cache_destroy(xfs_qm_dqzone); + kmem_cache_destroy(xfs_dqtrx_cache); + kmem_cache_destroy(xfs_dquot_cache); } /* diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 3f8a0713573a..47ef9c9c5c17 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -25,8 +25,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_efi_zone; -kmem_zone_t *xfs_efd_zone; +struct kmem_cache *xfs_efi_cache; +struct kmem_cache *xfs_efd_cache; static const struct xfs_item_ops xfs_efi_item_ops; @@ -43,7 +43,7 @@ xfs_efi_item_free( if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS) kmem_free(efip); else - kmem_cache_free(xfs_efi_zone, efip); + kmem_cache_free(xfs_efi_cache, efip); } /* @@ -161,7 +161,7 @@ xfs_efi_init( ((nextents - 1) * sizeof(xfs_extent_t))); efip = kmem_zalloc(size, 0); } else { - efip = kmem_cache_zalloc(xfs_efi_zone, + efip = kmem_cache_zalloc(xfs_efi_cache, GFP_KERNEL | __GFP_NOFAIL); } @@ -241,7 +241,7 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp) if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS) kmem_free(efdp); else - kmem_cache_free(xfs_efd_zone, efdp); + kmem_cache_free(xfs_efd_cache, efdp); } /* @@ -333,7 +333,7 @@ xfs_trans_get_efd( (nextents - 1) * sizeof(struct xfs_extent), 0); } else { - efdp = kmem_cache_zalloc(xfs_efd_zone, + efdp = kmem_cache_zalloc(xfs_efd_cache, GFP_KERNEL | __GFP_NOFAIL); } @@ -474,15 +474,21 @@ xfs_extent_free_finish_item( struct list_head *item, struct xfs_btree_cur **state) { + struct xfs_owner_info oinfo = { }; struct xfs_extent_free_item *free; int error; free = container_of(item, struct xfs_extent_free_item, xefi_list); + oinfo.oi_owner = free->xefi_owner; + if (free->xefi_flags & XFS_EFI_ATTR_FORK) + oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; + if (free->xefi_flags & XFS_EFI_BMBT_BLOCK) + oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; error = xfs_trans_free_extent(tp, EFD_ITEM(done), free->xefi_startblock, free->xefi_blockcount, - &free->xefi_oinfo, free->xefi_skip_discard); - kmem_free(free); + &oinfo, free->xefi_flags & XFS_EFI_SKIP_DISCARD); + kmem_cache_free(xfs_extfree_item_cache, free); return error; } @@ -502,7 +508,7 @@ xfs_extent_free_cancel_item( struct xfs_extent_free_item *free; free = container_of(item, struct xfs_extent_free_item, xefi_list); - kmem_free(free); + kmem_cache_free(xfs_extfree_item_cache, free); } const struct xfs_defer_op_type xfs_extent_free_defer_type = { @@ -525,6 +531,7 @@ xfs_agfl_free_finish_item( struct list_head *item, struct xfs_btree_cur **state) { + struct xfs_owner_info oinfo = { }; struct xfs_mount *mp = tp->t_mountp; struct xfs_efd_log_item *efdp = EFD_ITEM(done); struct xfs_extent_free_item *free; @@ -539,13 +546,13 @@ xfs_agfl_free_finish_item( ASSERT(free->xefi_blockcount == 1); agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); + oinfo.oi_owner = free->xefi_owner; trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (!error) - error = xfs_free_agfl_block(tp, agno, agbno, agbp, - &free->xefi_oinfo); + error = xfs_free_agfl_block(tp, agno, agbno, agbp, &oinfo); /* * Mark the transaction dirty, even on error. This ensures the @@ -564,7 +571,7 @@ xfs_agfl_free_finish_item( extp->ext_len = free->xefi_blockcount; efdp->efd_next_extent++; - kmem_free(free); + kmem_cache_free(xfs_extfree_item_cache, free); return error; } @@ -637,7 +644,7 @@ xfs_efi_item_recover( } - return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list); + return xfs_defer_ops_capture_and_commit(tp, capture_list); abort_error: xfs_trans_cancel(tp); diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index cd2860c875bf..186d0f2137f1 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -9,7 +9,7 @@ /* kernel only EFI/EFD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -69,7 +69,7 @@ struct xfs_efd_log_item { */ #define XFS_EFD_MAX_FAST_EXTENTS 16 -extern struct kmem_zone *xfs_efi_zone; -extern struct kmem_zone *xfs_efd_zone; +extern struct kmem_cache *xfs_efi_cache; +extern struct kmem_cache *xfs_efd_cache; #endif /* __XFS_EXTFREE_ITEM_H__ */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index f2210d927481..e1472004170e 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -77,10 +77,10 @@ xfs_inode_alloc( * XXX: If this didn't occur in transactions, we could drop GFP_NOFAIL * and return NULL here on ENOMEM. */ - ip = kmem_cache_alloc(xfs_inode_zone, GFP_KERNEL | __GFP_NOFAIL); + ip = kmem_cache_alloc(xfs_inode_cache, GFP_KERNEL | __GFP_NOFAIL); if (inode_init_always(mp->m_super, VFS_I(ip))) { - kmem_cache_free(xfs_inode_zone, ip); + kmem_cache_free(xfs_inode_cache, ip); return NULL; } @@ -130,11 +130,11 @@ xfs_inode_free_callback( if (ip->i_afp) { xfs_idestroy_fork(ip->i_afp); - kmem_cache_free(xfs_ifork_zone, ip->i_afp); + kmem_cache_free(xfs_ifork_cache, ip->i_afp); } if (ip->i_cowfp) { xfs_idestroy_fork(ip->i_cowfp); - kmem_cache_free(xfs_ifork_zone, ip->i_cowfp); + kmem_cache_free(xfs_ifork_cache, ip->i_cowfp); } if (ip->i_itemp) { ASSERT(!test_bit(XFS_LI_IN_AIL, @@ -143,7 +143,7 @@ xfs_inode_free_callback( ip->i_itemp = NULL; } - kmem_cache_free(xfs_inode_zone, ip); + kmem_cache_free(xfs_inode_cache, ip); } static void diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 017904a34c02..508e184e3b8f 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -20,7 +20,7 @@ #include "xfs_ialloc.h" #include "xfs_trace.h" -kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ +struct kmem_cache *xfs_icreate_cache; /* inode create item */ static inline struct xfs_icreate_item *ICR_ITEM(struct xfs_log_item *lip) { @@ -63,7 +63,7 @@ STATIC void xfs_icreate_item_release( struct xfs_log_item *lip) { - kmem_cache_free(xfs_icreate_zone, ICR_ITEM(lip)); + kmem_cache_free(xfs_icreate_cache, ICR_ITEM(lip)); } static const struct xfs_item_ops xfs_icreate_item_ops = { @@ -97,7 +97,7 @@ xfs_icreate_log( { struct xfs_icreate_item *icp; - icp = kmem_cache_zalloc(xfs_icreate_zone, GFP_KERNEL | __GFP_NOFAIL); + icp = kmem_cache_zalloc(xfs_icreate_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE, &xfs_icreate_item_ops); diff --git a/fs/xfs/xfs_icreate_item.h b/fs/xfs/xfs_icreate_item.h index a50d0b01e15a..64992823108a 100644 --- a/fs/xfs/xfs_icreate_item.h +++ b/fs/xfs/xfs_icreate_item.h @@ -12,7 +12,7 @@ struct xfs_icreate_item { struct xfs_icreate_log ic_format; }; -extern kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ +extern struct kmem_cache *xfs_icreate_cache; /* inode create item */ void xfs_icreate_log(struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t agbno, unsigned int count, diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a4f6f034fb81..64b9bf334806 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -36,7 +36,7 @@ #include "xfs_reflink.h" #include "xfs_ag.h" -kmem_zone_t *xfs_inode_zone; +struct kmem_cache *xfs_inode_cache; /* * Used in xfs_itruncate_extents(). This is the maximum number of extents @@ -564,8 +564,6 @@ xfs_lock_two_inodes( struct xfs_inode *ip1, uint ip1_mode) { - struct xfs_inode *temp; - uint mode_temp; int attempts = 0; struct xfs_log_item *lp; @@ -578,12 +576,8 @@ xfs_lock_two_inodes( ASSERT(ip0->i_ino != ip1->i_ino); if (ip0->i_ino > ip1->i_ino) { - temp = ip0; - ip0 = ip1; - ip1 = temp; - mode_temp = ip0_mode; - ip0_mode = ip1_mode; - ip1_mode = mode_temp; + swap(ip0, ip1); + swap(ip0_mode, ip1_mode); } again: diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index b21b177832d1..e635a3d64cba 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -504,7 +504,7 @@ static inline void xfs_setup_existing_inode(struct xfs_inode *ip) void xfs_irele(struct xfs_inode *ip); -extern struct kmem_zone *xfs_inode_zone; +extern struct kmem_cache *xfs_inode_cache; /* The default CoW extent size hint. */ #define XFS_DEFAULT_COWEXTSZ_HINT 32 diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 0659d19c211e..90d8e591baf8 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -21,7 +21,7 @@ #include <linux/iversion.h> -kmem_zone_t *xfs_ili_zone; /* inode log item zone */ +struct kmem_cache *xfs_ili_cache; /* inode log item */ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip) { @@ -672,7 +672,7 @@ xfs_inode_item_init( struct xfs_inode_log_item *iip; ASSERT(ip->i_itemp == NULL); - iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_zone, + iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_cache, GFP_KERNEL | __GFP_NOFAIL); iip->ili_inode = ip; @@ -694,7 +694,7 @@ xfs_inode_item_destroy( ip->i_itemp = NULL; kmem_free(iip->ili_item.li_lv_shadow); - kmem_cache_free(xfs_ili_zone, iip); + kmem_cache_free(xfs_ili_cache, iip); } diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 403b45ab9aa2..1a302000d604 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -47,6 +47,6 @@ extern void xfs_iflush_abort(struct xfs_inode *); extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, struct xfs_inode_log_format *); -extern struct kmem_zone *xfs_ili_zone; +extern struct kmem_cache *xfs_ili_cache; #endif /* __XFS_INODE_ITEM_H__ */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f6cd2d4aa770..89fec9a18c34 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -21,7 +21,7 @@ #include "xfs_sb.h" #include "xfs_health.h" -kmem_zone_t *xfs_log_ticket_zone; +struct kmem_cache *xfs_log_ticket_cache; /* Local miscellaneous function prototypes */ STATIC struct xlog * @@ -3487,7 +3487,7 @@ xfs_log_ticket_put( { ASSERT(atomic_read(&ticket->t_ref) > 0); if (atomic_dec_and_test(&ticket->t_ref)) - kmem_cache_free(xfs_log_ticket_zone, ticket); + kmem_cache_free(xfs_log_ticket_cache, ticket); } xlog_ticket_t * @@ -3611,7 +3611,7 @@ xlog_ticket_alloc( struct xlog_ticket *tic; int unit_res; - tic = kmem_cache_zalloc(xfs_log_ticket_zone, GFP_NOFS | __GFP_NOFAIL); + tic = kmem_cache_zalloc(xfs_log_ticket_cache, GFP_NOFS | __GFP_NOFAIL); unit_res = xlog_calc_unit_res(log, unit_bytes); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 844fbeec3545..23103d68423c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -497,7 +497,7 @@ xlog_recover_cancel(struct xlog *); extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, char *dp, int size); -extern kmem_zone_t *xfs_log_ticket_zone; +extern struct kmem_cache *xfs_log_ticket_cache; struct xlog_ticket * xlog_ticket_alloc( struct xlog *log, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 10562ecbd9ea..53366cc0bc9e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2466,11 +2466,11 @@ xlog_finish_defer_ops( { struct xfs_defer_capture *dfc, *next; struct xfs_trans *tp; - struct xfs_inode *ip; int error = 0; list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { struct xfs_trans_res resv; + struct xfs_defer_resources dres; /* * Create a new transaction reservation from the captured @@ -2494,13 +2494,9 @@ xlog_finish_defer_ops( * from recovering a single intent item. */ list_del_init(&dfc->dfc_list); - xfs_defer_ops_continue(dfc, tp, &ip); - + xfs_defer_ops_continue(dfc, tp, &dres); error = xfs_trans_commit(tp); - if (ip) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_irele(ip); - } + xfs_defer_resources_rele(&dres); if (error) return error; } @@ -2520,7 +2516,7 @@ xlog_abort_defer_ops( list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { list_del_init(&dfc->dfc_list); - xfs_defer_ops_release(mp, dfc); + xfs_defer_ops_capture_free(mp, dfc); } } /* diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 06dac09eddbd..359109b6f0d3 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -567,6 +567,18 @@ xfs_mount_setup_inode_geom( xfs_ialloc_setup_geometry(mp); } +/* Compute maximum possible height for per-AG btree types for this fs. */ +static inline void +xfs_agbtree_compute_maxlevels( + struct xfs_mount *mp) +{ + unsigned int levels; + + levels = max(mp->m_alloc_maxlevels, M_IGEO(mp)->inobt_maxlevels); + levels = max(levels, mp->m_rmap_maxlevels); + mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels); +} + /* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct @@ -638,6 +650,8 @@ xfs_mountfs( xfs_rmapbt_compute_maxlevels(mp); xfs_refcountbt_compute_maxlevels(mp); + xfs_agbtree_compute_maxlevels(mp); + /* * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks * is NOT aligned turn off m_dalign since allocator alignment is within diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index e091f3b3fa15..00720a02e761 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -128,10 +128,11 @@ typedef struct xfs_mount { uint m_rmap_mnr[2]; /* min rmap btree records */ uint m_refc_mxr[2]; /* max refc btree records */ uint m_refc_mnr[2]; /* min refc btree records */ - uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ - uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ + uint m_alloc_maxlevels; /* max alloc btree levels */ + uint m_bm_maxlevels[2]; /* max bmap btree levels */ uint m_rmap_maxlevels; /* max rmap btree levels */ uint m_refc_maxlevels; /* max refcount btree level */ + unsigned int m_agbtree_maxlevels; /* max level of all AG btrees */ xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ uint m_alloc_set_aside; /* space we can't use */ uint m_ag_max_usable; /* max space per AG */ diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 34c3b16f834f..f85e3b07ab44 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -219,7 +219,7 @@ _xfs_mru_cache_list_insert( * When destroying or reaping, all the elements that were migrated to the reap * list need to be deleted. For each element this involves removing it from the * data store, removing it from the reap list, calling the client's free - * function and deleting the element from the element zone. + * function and deleting the element from the element cache. * * We get called holding the mru->lock, which we drop and then reacquire. * Sparse need special help with this to tell it we know what we are doing. diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 5608066d6e53..32ac8d9c8940 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -850,7 +850,7 @@ xfs_qm_reset_dqcounts( */ #ifdef DEBUG j = (int)XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) / - sizeof(xfs_dqblk_t); + sizeof(struct xfs_dqblk); ASSERT(mp->m_quotainfo->qi_dqperchunk == j); #endif dqb = bp->b_addr; diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 442a0f97a9d4..5bb12717ea28 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -11,7 +11,7 @@ struct xfs_inode; -extern struct kmem_zone *xfs_qm_dqtrxzone; +extern struct kmem_cache *xfs_dqtrx_cache; /* * Number of bmaps that we ask from bmapi when doing a quotacheck. diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 46904b793bd4..d3da67772d57 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -21,8 +21,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_cui_zone; -kmem_zone_t *xfs_cud_zone; +struct kmem_cache *xfs_cui_cache; +struct kmem_cache *xfs_cud_cache; static const struct xfs_item_ops xfs_cui_item_ops; @@ -38,7 +38,7 @@ xfs_cui_item_free( if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) kmem_free(cuip); else - kmem_cache_free(xfs_cui_zone, cuip); + kmem_cache_free(xfs_cui_cache, cuip); } /* @@ -143,7 +143,7 @@ xfs_cui_init( cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents), 0); else - cuip = kmem_cache_zalloc(xfs_cui_zone, + cuip = kmem_cache_zalloc(xfs_cui_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); @@ -204,7 +204,7 @@ xfs_cud_item_release( struct xfs_cud_log_item *cudp = CUD_ITEM(lip); xfs_cui_release(cudp->cud_cuip); - kmem_cache_free(xfs_cud_zone, cudp); + kmem_cache_free(xfs_cud_cache, cudp); } static const struct xfs_item_ops xfs_cud_item_ops = { @@ -221,7 +221,7 @@ xfs_trans_get_cud( { struct xfs_cud_log_item *cudp; - cudp = kmem_cache_zalloc(xfs_cud_zone, GFP_KERNEL | __GFP_NOFAIL); + cudp = kmem_cache_zalloc(xfs_cud_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops); cudp->cud_cuip = cuip; @@ -384,7 +384,7 @@ xfs_refcount_update_finish_item( refc->ri_blockcount = new_aglen; return -EAGAIN; } - kmem_free(refc); + kmem_cache_free(xfs_refcount_intent_cache, refc); return error; } @@ -404,7 +404,7 @@ xfs_refcount_update_cancel_item( struct xfs_refcount_intent *refc; refc = container_of(item, struct xfs_refcount_intent, ri_list); - kmem_free(refc); + kmem_cache_free(xfs_refcount_intent_cache, refc); } const struct xfs_defer_op_type xfs_refcount_update_defer_type = { @@ -557,7 +557,7 @@ xfs_cui_item_recover( } xfs_refcount_finish_one_cleanup(tp, rcur, error); - return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list); + return xfs_defer_ops_capture_and_commit(tp, capture_list); abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h index f4f2e836540b..eb0ab13682d0 100644 --- a/fs/xfs/xfs_refcount_item.h +++ b/fs/xfs/xfs_refcount_item.h @@ -25,7 +25,7 @@ /* kernel only CUI/CUD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -68,7 +68,7 @@ struct xfs_cud_log_item { struct xfs_cud_log_format cud_format; }; -extern struct kmem_zone *xfs_cui_zone; -extern struct kmem_zone *xfs_cud_zone; +extern struct kmem_cache *xfs_cui_cache; +extern struct kmem_cache *xfs_cud_cache; #endif /* __XFS_REFCOUNT_ITEM_H__ */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 76355f293488..cb0edb1d68ef 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -484,7 +484,7 @@ xfs_reflink_cancel_cow_blocks( xfs_refcount_free_cow_extent(*tpp, del.br_startblock, del.br_blockcount); - xfs_bmap_add_free(*tpp, del.br_startblock, + xfs_free_extent_later(*tpp, del.br_startblock, del.br_blockcount, NULL); /* Roll the transaction */ diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 5f0695980467..c3966b4c58ef 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -21,8 +21,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_rui_zone; -kmem_zone_t *xfs_rud_zone; +struct kmem_cache *xfs_rui_cache; +struct kmem_cache *xfs_rud_cache; static const struct xfs_item_ops xfs_rui_item_ops; @@ -38,7 +38,7 @@ xfs_rui_item_free( if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS) kmem_free(ruip); else - kmem_cache_free(xfs_rui_zone, ruip); + kmem_cache_free(xfs_rui_cache, ruip); } /* @@ -141,7 +141,7 @@ xfs_rui_init( if (nextents > XFS_RUI_MAX_FAST_EXTENTS) ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), 0); else - ruip = kmem_cache_zalloc(xfs_rui_zone, + ruip = kmem_cache_zalloc(xfs_rui_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops); @@ -227,7 +227,7 @@ xfs_rud_item_release( struct xfs_rud_log_item *rudp = RUD_ITEM(lip); xfs_rui_release(rudp->rud_ruip); - kmem_cache_free(xfs_rud_zone, rudp); + kmem_cache_free(xfs_rud_cache, rudp); } static const struct xfs_item_ops xfs_rud_item_ops = { @@ -244,7 +244,7 @@ xfs_trans_get_rud( { struct xfs_rud_log_item *rudp; - rudp = kmem_cache_zalloc(xfs_rud_zone, GFP_KERNEL | __GFP_NOFAIL); + rudp = kmem_cache_zalloc(xfs_rud_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops); rudp->rud_ruip = ruip; @@ -427,7 +427,7 @@ xfs_rmap_update_finish_item( rmap->ri_bmap.br_startoff, rmap->ri_bmap.br_startblock, rmap->ri_bmap.br_blockcount, rmap->ri_bmap.br_state, state); - kmem_free(rmap); + kmem_cache_free(xfs_rmap_intent_cache, rmap); return error; } @@ -447,7 +447,7 @@ xfs_rmap_update_cancel_item( struct xfs_rmap_intent *rmap; rmap = container_of(item, struct xfs_rmap_intent, ri_list); - kmem_free(rmap); + kmem_cache_free(xfs_rmap_intent_cache, rmap); } const struct xfs_defer_op_type xfs_rmap_update_defer_type = { @@ -587,7 +587,7 @@ xfs_rui_item_recover( } xfs_rmap_finish_one_cleanup(tp, rcur, error); - return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list); + return xfs_defer_ops_capture_and_commit(tp, capture_list); abort_error: xfs_rmap_finish_one_cleanup(tp, rcur, error); diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h index 31e6cdfff71f..802e5119eaca 100644 --- a/fs/xfs/xfs_rmap_item.h +++ b/fs/xfs/xfs_rmap_item.h @@ -28,7 +28,7 @@ /* kernel only RUI/RUD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -68,7 +68,7 @@ struct xfs_rud_log_item { struct xfs_rud_log_format rud_format; }; -extern struct kmem_zone *xfs_rui_zone; -extern struct kmem_zone *xfs_rud_zone; +extern struct kmem_cache *xfs_rui_cache; +extern struct kmem_cache *xfs_rud_cache; #endif /* __XFS_RMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c4e0cd1c1c8c..e21459f9923a 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -37,6 +37,7 @@ #include "xfs_reflink.h" #include "xfs_pwork.h" #include "xfs_ag.h" +#include "xfs_defer.h" #include <linux/magic.h> #include <linux/fs_context.h> @@ -1951,196 +1952,194 @@ static struct file_system_type xfs_fs_type = { MODULE_ALIAS_FS("xfs"); STATIC int __init -xfs_init_zones(void) +xfs_init_caches(void) { - xfs_log_ticket_zone = kmem_cache_create("xfs_log_ticket", + int error; + + xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket", sizeof(struct xlog_ticket), 0, 0, NULL); - if (!xfs_log_ticket_zone) + if (!xfs_log_ticket_cache) goto out; - xfs_bmap_free_item_zone = kmem_cache_create("xfs_bmap_free_item", - sizeof(struct xfs_extent_free_item), - 0, 0, NULL); - if (!xfs_bmap_free_item_zone) - goto out_destroy_log_ticket_zone; + error = xfs_btree_init_cur_caches(); + if (error) + goto out_destroy_log_ticket_cache; - xfs_btree_cur_zone = kmem_cache_create("xfs_btree_cur", - sizeof(struct xfs_btree_cur), - 0, 0, NULL); - if (!xfs_btree_cur_zone) - goto out_destroy_bmap_free_item_zone; + error = xfs_defer_init_item_caches(); + if (error) + goto out_destroy_btree_cur_cache; - xfs_da_state_zone = kmem_cache_create("xfs_da_state", + xfs_da_state_cache = kmem_cache_create("xfs_da_state", sizeof(struct xfs_da_state), 0, 0, NULL); - if (!xfs_da_state_zone) - goto out_destroy_btree_cur_zone; + if (!xfs_da_state_cache) + goto out_destroy_defer_item_cache; - xfs_ifork_zone = kmem_cache_create("xfs_ifork", + xfs_ifork_cache = kmem_cache_create("xfs_ifork", sizeof(struct xfs_ifork), 0, 0, NULL); - if (!xfs_ifork_zone) - goto out_destroy_da_state_zone; + if (!xfs_ifork_cache) + goto out_destroy_da_state_cache; - xfs_trans_zone = kmem_cache_create("xfs_trans", + xfs_trans_cache = kmem_cache_create("xfs_trans", sizeof(struct xfs_trans), 0, 0, NULL); - if (!xfs_trans_zone) - goto out_destroy_ifork_zone; + if (!xfs_trans_cache) + goto out_destroy_ifork_cache; /* - * The size of the zone allocated buf log item is the maximum + * The size of the cache-allocated buf log item is the maximum * size possible under XFS. This wastes a little bit of memory, * but it is much faster. */ - xfs_buf_item_zone = kmem_cache_create("xfs_buf_item", + xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", sizeof(struct xfs_buf_log_item), 0, 0, NULL); - if (!xfs_buf_item_zone) - goto out_destroy_trans_zone; + if (!xfs_buf_item_cache) + goto out_destroy_trans_cache; - xfs_efd_zone = kmem_cache_create("xfs_efd_item", + xfs_efd_cache = kmem_cache_create("xfs_efd_item", (sizeof(struct xfs_efd_log_item) + (XFS_EFD_MAX_FAST_EXTENTS - 1) * sizeof(struct xfs_extent)), 0, 0, NULL); - if (!xfs_efd_zone) - goto out_destroy_buf_item_zone; + if (!xfs_efd_cache) + goto out_destroy_buf_item_cache; - xfs_efi_zone = kmem_cache_create("xfs_efi_item", + xfs_efi_cache = kmem_cache_create("xfs_efi_item", (sizeof(struct xfs_efi_log_item) + (XFS_EFI_MAX_FAST_EXTENTS - 1) * sizeof(struct xfs_extent)), 0, 0, NULL); - if (!xfs_efi_zone) - goto out_destroy_efd_zone; + if (!xfs_efi_cache) + goto out_destroy_efd_cache; - xfs_inode_zone = kmem_cache_create("xfs_inode", + xfs_inode_cache = kmem_cache_create("xfs_inode", sizeof(struct xfs_inode), 0, (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT), xfs_fs_inode_init_once); - if (!xfs_inode_zone) - goto out_destroy_efi_zone; + if (!xfs_inode_cache) + goto out_destroy_efi_cache; - xfs_ili_zone = kmem_cache_create("xfs_ili", + xfs_ili_cache = kmem_cache_create("xfs_ili", sizeof(struct xfs_inode_log_item), 0, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); - if (!xfs_ili_zone) - goto out_destroy_inode_zone; + if (!xfs_ili_cache) + goto out_destroy_inode_cache; - xfs_icreate_zone = kmem_cache_create("xfs_icr", + xfs_icreate_cache = kmem_cache_create("xfs_icr", sizeof(struct xfs_icreate_item), 0, 0, NULL); - if (!xfs_icreate_zone) - goto out_destroy_ili_zone; + if (!xfs_icreate_cache) + goto out_destroy_ili_cache; - xfs_rud_zone = kmem_cache_create("xfs_rud_item", + xfs_rud_cache = kmem_cache_create("xfs_rud_item", sizeof(struct xfs_rud_log_item), 0, 0, NULL); - if (!xfs_rud_zone) - goto out_destroy_icreate_zone; + if (!xfs_rud_cache) + goto out_destroy_icreate_cache; - xfs_rui_zone = kmem_cache_create("xfs_rui_item", + xfs_rui_cache = kmem_cache_create("xfs_rui_item", xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 0, 0, NULL); - if (!xfs_rui_zone) - goto out_destroy_rud_zone; + if (!xfs_rui_cache) + goto out_destroy_rud_cache; - xfs_cud_zone = kmem_cache_create("xfs_cud_item", + xfs_cud_cache = kmem_cache_create("xfs_cud_item", sizeof(struct xfs_cud_log_item), 0, 0, NULL); - if (!xfs_cud_zone) - goto out_destroy_rui_zone; + if (!xfs_cud_cache) + goto out_destroy_rui_cache; - xfs_cui_zone = kmem_cache_create("xfs_cui_item", + xfs_cui_cache = kmem_cache_create("xfs_cui_item", xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 0, 0, NULL); - if (!xfs_cui_zone) - goto out_destroy_cud_zone; + if (!xfs_cui_cache) + goto out_destroy_cud_cache; - xfs_bud_zone = kmem_cache_create("xfs_bud_item", + xfs_bud_cache = kmem_cache_create("xfs_bud_item", sizeof(struct xfs_bud_log_item), 0, 0, NULL); - if (!xfs_bud_zone) - goto out_destroy_cui_zone; + if (!xfs_bud_cache) + goto out_destroy_cui_cache; - xfs_bui_zone = kmem_cache_create("xfs_bui_item", + xfs_bui_cache = kmem_cache_create("xfs_bui_item", xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 0, 0, NULL); - if (!xfs_bui_zone) - goto out_destroy_bud_zone; + if (!xfs_bui_cache) + goto out_destroy_bud_cache; return 0; - out_destroy_bud_zone: - kmem_cache_destroy(xfs_bud_zone); - out_destroy_cui_zone: - kmem_cache_destroy(xfs_cui_zone); - out_destroy_cud_zone: - kmem_cache_destroy(xfs_cud_zone); - out_destroy_rui_zone: - kmem_cache_destroy(xfs_rui_zone); - out_destroy_rud_zone: - kmem_cache_destroy(xfs_rud_zone); - out_destroy_icreate_zone: - kmem_cache_destroy(xfs_icreate_zone); - out_destroy_ili_zone: - kmem_cache_destroy(xfs_ili_zone); - out_destroy_inode_zone: - kmem_cache_destroy(xfs_inode_zone); - out_destroy_efi_zone: - kmem_cache_destroy(xfs_efi_zone); - out_destroy_efd_zone: - kmem_cache_destroy(xfs_efd_zone); - out_destroy_buf_item_zone: - kmem_cache_destroy(xfs_buf_item_zone); - out_destroy_trans_zone: - kmem_cache_destroy(xfs_trans_zone); - out_destroy_ifork_zone: - kmem_cache_destroy(xfs_ifork_zone); - out_destroy_da_state_zone: - kmem_cache_destroy(xfs_da_state_zone); - out_destroy_btree_cur_zone: - kmem_cache_destroy(xfs_btree_cur_zone); - out_destroy_bmap_free_item_zone: - kmem_cache_destroy(xfs_bmap_free_item_zone); - out_destroy_log_ticket_zone: - kmem_cache_destroy(xfs_log_ticket_zone); + out_destroy_bud_cache: + kmem_cache_destroy(xfs_bud_cache); + out_destroy_cui_cache: + kmem_cache_destroy(xfs_cui_cache); + out_destroy_cud_cache: + kmem_cache_destroy(xfs_cud_cache); + out_destroy_rui_cache: + kmem_cache_destroy(xfs_rui_cache); + out_destroy_rud_cache: + kmem_cache_destroy(xfs_rud_cache); + out_destroy_icreate_cache: + kmem_cache_destroy(xfs_icreate_cache); + out_destroy_ili_cache: + kmem_cache_destroy(xfs_ili_cache); + out_destroy_inode_cache: + kmem_cache_destroy(xfs_inode_cache); + out_destroy_efi_cache: + kmem_cache_destroy(xfs_efi_cache); + out_destroy_efd_cache: + kmem_cache_destroy(xfs_efd_cache); + out_destroy_buf_item_cache: + kmem_cache_destroy(xfs_buf_item_cache); + out_destroy_trans_cache: + kmem_cache_destroy(xfs_trans_cache); + out_destroy_ifork_cache: + kmem_cache_destroy(xfs_ifork_cache); + out_destroy_da_state_cache: + kmem_cache_destroy(xfs_da_state_cache); + out_destroy_defer_item_cache: + xfs_defer_destroy_item_caches(); + out_destroy_btree_cur_cache: + xfs_btree_destroy_cur_caches(); + out_destroy_log_ticket_cache: + kmem_cache_destroy(xfs_log_ticket_cache); out: return -ENOMEM; } STATIC void -xfs_destroy_zones(void) +xfs_destroy_caches(void) { /* * Make sure all delayed rcu free are flushed before we * destroy caches. */ rcu_barrier(); - kmem_cache_destroy(xfs_bui_zone); - kmem_cache_destroy(xfs_bud_zone); - kmem_cache_destroy(xfs_cui_zone); - kmem_cache_destroy(xfs_cud_zone); - kmem_cache_destroy(xfs_rui_zone); - kmem_cache_destroy(xfs_rud_zone); - kmem_cache_destroy(xfs_icreate_zone); - kmem_cache_destroy(xfs_ili_zone); - kmem_cache_destroy(xfs_inode_zone); - kmem_cache_destroy(xfs_efi_zone); - kmem_cache_destroy(xfs_efd_zone); - kmem_cache_destroy(xfs_buf_item_zone); - kmem_cache_destroy(xfs_trans_zone); - kmem_cache_destroy(xfs_ifork_zone); - kmem_cache_destroy(xfs_da_state_zone); - kmem_cache_destroy(xfs_btree_cur_zone); - kmem_cache_destroy(xfs_bmap_free_item_zone); - kmem_cache_destroy(xfs_log_ticket_zone); + kmem_cache_destroy(xfs_bui_cache); + kmem_cache_destroy(xfs_bud_cache); + kmem_cache_destroy(xfs_cui_cache); + kmem_cache_destroy(xfs_cud_cache); + kmem_cache_destroy(xfs_rui_cache); + kmem_cache_destroy(xfs_rud_cache); + kmem_cache_destroy(xfs_icreate_cache); + kmem_cache_destroy(xfs_ili_cache); + kmem_cache_destroy(xfs_inode_cache); + kmem_cache_destroy(xfs_efi_cache); + kmem_cache_destroy(xfs_efd_cache); + kmem_cache_destroy(xfs_buf_item_cache); + kmem_cache_destroy(xfs_trans_cache); + kmem_cache_destroy(xfs_ifork_cache); + kmem_cache_destroy(xfs_da_state_cache); + xfs_defer_destroy_item_caches(); + xfs_btree_destroy_cur_caches(); + kmem_cache_destroy(xfs_log_ticket_cache); } STATIC int __init @@ -2233,13 +2232,13 @@ init_xfs_fs(void) if (error) goto out; - error = xfs_init_zones(); + error = xfs_init_caches(); if (error) goto out_destroy_hp; error = xfs_init_workqueues(); if (error) - goto out_destroy_zones; + goto out_destroy_caches; error = xfs_mru_cache_init(); if (error) @@ -2314,8 +2313,8 @@ init_xfs_fs(void) xfs_mru_cache_uninit(); out_destroy_wq: xfs_destroy_workqueues(); - out_destroy_zones: - xfs_destroy_zones(); + out_destroy_caches: + xfs_destroy_caches(); out_destroy_hp: xfs_cpu_hotplug_destroy(); out: @@ -2338,7 +2337,7 @@ exit_xfs_fs(void) xfs_buf_terminate(); xfs_mru_cache_uninit(); xfs_destroy_workqueues(); - xfs_destroy_zones(); + xfs_destroy_caches(); xfs_uuid_table_free(); xfs_cpu_hotplug_destroy(); } diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 18dc5eca6c04..8608f804388f 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -105,7 +105,7 @@ bug_on_assert_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bug_on_assert ? 1 : 0); + return sysfs_emit(buf, "%d\n", xfs_globals.bug_on_assert); } XFS_SYSFS_ATTR_RW(bug_on_assert); @@ -135,7 +135,7 @@ log_recovery_delay_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay); + return sysfs_emit(buf, "%d\n", xfs_globals.log_recovery_delay); } XFS_SYSFS_ATTR_RW(log_recovery_delay); @@ -165,7 +165,7 @@ mount_delay_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.mount_delay); + return sysfs_emit(buf, "%d\n", xfs_globals.mount_delay); } XFS_SYSFS_ATTR_RW(mount_delay); @@ -188,7 +188,7 @@ always_cow_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.always_cow); + return sysfs_emit(buf, "%d\n", xfs_globals.always_cow); } XFS_SYSFS_ATTR_RW(always_cow); @@ -224,7 +224,7 @@ pwork_threads_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.pwork_threads); + return sysfs_emit(buf, "%d\n", xfs_globals.pwork_threads); } XFS_SYSFS_ATTR_RW(pwork_threads); #endif /* DEBUG */ @@ -327,7 +327,7 @@ log_head_lsn_show( block = log->l_curr_block; spin_unlock(&log->l_icloglock); - return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); + return sysfs_emit(buf, "%d:%d\n", cycle, block); } XFS_SYSFS_ATTR_RO(log_head_lsn); @@ -341,7 +341,7 @@ log_tail_lsn_show( struct xlog *log = to_xlog(kobject); xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block); - return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); + return sysfs_emit(buf, "%d:%d\n", cycle, block); } XFS_SYSFS_ATTR_RO(log_tail_lsn); @@ -356,7 +356,7 @@ reserve_grant_head_show( struct xlog *log = to_xlog(kobject); xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes); - return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); + return sysfs_emit(buf, "%d:%d\n", cycle, bytes); } XFS_SYSFS_ATTR_RO(reserve_grant_head); @@ -370,7 +370,7 @@ write_grant_head_show( struct xlog *log = to_xlog(kobject); xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes); - return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); + return sysfs_emit(buf, "%d:%d\n", cycle, bytes); } XFS_SYSFS_ATTR_RO(write_grant_head); @@ -425,7 +425,7 @@ max_retries_show( else retries = cfg->max_retries; - return snprintf(buf, PAGE_SIZE, "%d\n", retries); + return sysfs_emit(buf, "%d\n", retries); } static ssize_t @@ -466,7 +466,7 @@ retry_timeout_seconds_show( else timeout = jiffies_to_msecs(cfg->retry_timeout) / MSEC_PER_SEC; - return snprintf(buf, PAGE_SIZE, "%d\n", timeout); + return sysfs_emit(buf, "%d\n", timeout); } static ssize_t @@ -504,7 +504,7 @@ fail_at_unmount_show( { struct xfs_mount *mp = err_to_mp(kobject); - return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_unmount); + return sysfs_emit(buf, "%d\n", mp->m_fail_unmount); } static ssize_t diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 1033a95fbf8e..4a8076ef8cb4 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2476,7 +2476,7 @@ DECLARE_EVENT_CLASS(xfs_btree_cur_class, __entry->btnum = cur->bc_btnum; __entry->level = level; __entry->nlevels = cur->bc_nlevels; - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->daddr = bp ? xfs_buf_daddr(bp) : -1; ), TP_printk("dev %d:%d btree %s level %d/%d ptr %d daddr 0x%llx", diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 67dec11e34c7..234a9d9c2f43 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -25,7 +25,7 @@ #include "xfs_dquot.h" #include "xfs_icache.h" -kmem_zone_t *xfs_trans_zone; +struct kmem_cache *xfs_trans_cache; #if defined(CONFIG_TRACEPOINTS) static void @@ -76,7 +76,7 @@ xfs_trans_free( if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) sb_end_intwrite(tp->t_mountp->m_super); xfs_trans_free_dqinfo(tp); - kmem_cache_free(xfs_trans_zone, tp); + kmem_cache_free(xfs_trans_cache, tp); } /* @@ -95,7 +95,7 @@ xfs_trans_dup( trace_xfs_trans_dup(tp, _RET_IP_); - ntp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); + ntp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); /* * Initialize the new transaction structure. @@ -263,7 +263,7 @@ xfs_trans_alloc( * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ retry: - tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); + tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); xfs_trans_set_context(tp); @@ -477,7 +477,7 @@ STATIC void xfs_trans_apply_sb_deltas( xfs_trans_t *tp) { - xfs_dsb_t *sbp; + struct xfs_dsb *sbp; struct xfs_buf *bp; int whole = 0; @@ -541,14 +541,14 @@ xfs_trans_apply_sb_deltas( /* * Log the whole thing, the fields are noncontiguous. */ - xfs_trans_log_buf(tp, bp, 0, sizeof(xfs_dsb_t) - 1); + xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1); else /* * Since all the modifiable fields are contiguous, we * can get away with this. */ - xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount), - offsetof(xfs_dsb_t, sb_frextents) + + xfs_trans_log_buf(tp, bp, offsetof(struct xfs_dsb, sb_icount), + offsetof(struct xfs_dsb, sb_frextents) + sizeof(sbp->sb_frextents) - 1); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 50da47f23a07..a487b264a9eb 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -113,12 +113,6 @@ void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, #define XFS_ITEM_FLUSHING 3 /* - * Deferred operation item relogging limits. - */ -#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ -#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ - -/* * This is the structure maintained for every active transaction. */ typedef struct xfs_trans { @@ -243,7 +237,7 @@ void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *, void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp); -extern kmem_zone_t *xfs_trans_zone; +extern struct kmem_cache *xfs_trans_cache; static inline struct xfs_log_item * xfs_trans_item_relog( diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 3872ce671411..9ba7e6b9bed3 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -846,7 +846,7 @@ STATIC void xfs_trans_alloc_dqinfo( xfs_trans_t *tp) { - tp->t_dqinfo = kmem_cache_zalloc(xfs_qm_dqtrxzone, + tp->t_dqinfo = kmem_cache_zalloc(xfs_dqtrx_cache, GFP_KERNEL | __GFP_NOFAIL); } @@ -856,6 +856,6 @@ xfs_trans_free_dqinfo( { if (!tp->t_dqinfo) return; - kmem_cache_free(xfs_qm_dqtrxzone, tp->t_dqinfo); + kmem_cache_free(xfs_dqtrx_cache, tp->t_dqinfo); tp->t_dqinfo = NULL; } |