summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c31
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c26
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h2
-rw-r--r--fs/xfs/libxfs/xfs_format.h5
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c76
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c4
-rw-r--r--fs/xfs/xfs_bmap_util.c106
-rw-r--r--fs/xfs/xfs_fsmap.c4
-rw-r--r--fs/xfs/xfs_fsops.c2
-rw-r--r--fs/xfs/xfs_inode.c57
-rw-r--r--fs/xfs/xfs_iomap.c15
-rw-r--r--fs/xfs/xfs_trans.c7
12 files changed, 205 insertions, 130 deletions
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 84db76e0e3e3..fecd187fcf2c 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -157,6 +157,7 @@ __xfs_ag_resv_free(
error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
resv->ar_reserved = 0;
resv->ar_asked = 0;
+ resv->ar_orig_reserved = 0;
if (error)
trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
@@ -189,13 +190,34 @@ __xfs_ag_resv_init(
struct xfs_mount *mp = pag->pag_mount;
struct xfs_ag_resv *resv;
int error;
- xfs_extlen_t reserved;
+ xfs_extlen_t hidden_space;
if (used > ask)
ask = used;
- reserved = ask - used;
- error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true);
+ switch (type) {
+ case XFS_AG_RESV_RMAPBT:
+ /*
+ * Space taken by the rmapbt is not subtracted from fdblocks
+ * because the rmapbt lives in the free space. Here we must
+ * subtract the entire reservation from fdblocks so that we
+ * always have blocks available for rmapbt expansion.
+ */
+ hidden_space = ask;
+ break;
+ case XFS_AG_RESV_METADATA:
+ /*
+ * Space taken by all other metadata btrees are accounted
+ * on-disk as used space. We therefore only hide the space
+ * that is reserved but not used by the trees.
+ */
+ hidden_space = ask - used;
+ break;
+ default:
+ ASSERT(0);
+ return -EINVAL;
+ }
+ error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true);
if (error) {
trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
error, _RET_IP_);
@@ -216,7 +238,8 @@ __xfs_ag_resv_init(
resv = xfs_perag_resv(pag, type);
resv->ar_asked = ask;
- resv->ar_reserved = resv->ar_orig_reserved = reserved;
+ resv->ar_orig_reserved = hidden_space;
+ resv->ar_reserved = ask - used;
trace_xfs_ag_resv_init(pag, type, ask);
return 0;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 01628f0c9a0c..7205268b30bc 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5780,6 +5780,32 @@ del_cursor:
return error;
}
+/* Make sure we won't be right-shifting an extent past the maximum bound. */
+int
+xfs_bmap_can_insert_extents(
+ struct xfs_inode *ip,
+ xfs_fileoff_t off,
+ xfs_fileoff_t shift)
+{
+ struct xfs_bmbt_irec got;
+ int is_empty;
+ int error = 0;
+
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -EIO;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
+ if (!error && !is_empty && got.br_startoff >= off &&
+ ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
+ error = -EINVAL;
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ return error;
+}
+
int
xfs_bmap_insert_extents(
struct xfs_trans *tp,
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 99dddbd0fcc6..9b49ddf99c41 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -227,6 +227,8 @@ int xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
bool *done, xfs_fsblock_t *firstblock,
struct xfs_defer_ops *dfops);
+int xfs_bmap_can_insert_extents(struct xfs_inode *ip, xfs_fileoff_t off,
+ xfs_fileoff_t shift);
int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 1c5a8aaf2bfc..059bc44c27e8 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -962,6 +962,9 @@ typedef enum xfs_dinode_fmt {
XFS_DFORK_DSIZE(dip, mp) : \
XFS_DFORK_ASIZE(dip, mp))
+#define XFS_DFORK_MAXEXT(dip, mp, w) \
+ (XFS_DFORK_SIZE(dip, mp, w) / sizeof(struct xfs_bmbt_rec))
+
/*
* Return pointers to the data or attribute forks.
*/
@@ -1526,6 +1529,8 @@ typedef struct xfs_bmdr_block {
#define BMBT_STARTBLOCK_BITLEN 52
#define BMBT_BLOCKCOUNT_BITLEN 21
+#define BMBT_STARTOFF_MASK ((1ULL << BMBT_STARTOFF_BITLEN) - 1)
+
typedef struct xfs_bmbt_rec {
__be64 l0, l1;
} xfs_bmbt_rec_t;
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index d38d724534c4..33dc34655ac3 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -374,6 +374,47 @@ xfs_log_dinode_to_disk(
}
}
+static xfs_failaddr_t
+xfs_dinode_verify_fork(
+ struct xfs_dinode *dip,
+ struct xfs_mount *mp,
+ int whichfork)
+{
+ uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
+
+ switch (XFS_DFORK_FORMAT(dip, whichfork)) {
+ case XFS_DINODE_FMT_LOCAL:
+ /*
+ * no local regular files yet
+ */
+ if (whichfork == XFS_DATA_FORK) {
+ if (S_ISREG(be16_to_cpu(dip->di_mode)))
+ return __this_address;
+ if (be64_to_cpu(dip->di_size) >
+ XFS_DFORK_SIZE(dip, mp, whichfork))
+ return __this_address;
+ }
+ if (di_nextents)
+ return __this_address;
+ break;
+ case XFS_DINODE_FMT_EXTENTS:
+ if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
+ return __this_address;
+ break;
+ case XFS_DINODE_FMT_BTREE:
+ if (whichfork == XFS_ATTR_FORK) {
+ if (di_nextents > MAXAEXTNUM)
+ return __this_address;
+ } else if (di_nextents > MAXEXTNUM) {
+ return __this_address;
+ }
+ break;
+ default:
+ return __this_address;
+ }
+ return NULL;
+}
+
xfs_failaddr_t
xfs_dinode_verify(
struct xfs_mount *mp,
@@ -441,24 +482,9 @@ xfs_dinode_verify(
case S_IFREG:
case S_IFLNK:
case S_IFDIR:
- switch (dip->di_format) {
- case XFS_DINODE_FMT_LOCAL:
- /*
- * no local regular files yet
- */
- if (S_ISREG(mode))
- return __this_address;
- if (di_size > XFS_DFORK_DSIZE(dip, mp))
- return __this_address;
- if (dip->di_nextents)
- return __this_address;
- /* fall through */
- case XFS_DINODE_FMT_EXTENTS:
- case XFS_DINODE_FMT_BTREE:
- break;
- default:
- return __this_address;
- }
+ fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
+ if (fa)
+ return fa;
break;
case 0:
/* Uninitialized inode ok. */
@@ -468,17 +494,9 @@ xfs_dinode_verify(
}
if (XFS_DFORK_Q(dip)) {
- switch (dip->di_aformat) {
- case XFS_DINODE_FMT_LOCAL:
- if (dip->di_anextents)
- return __this_address;
- /* fall through */
- case XFS_DINODE_FMT_EXTENTS:
- case XFS_DINODE_FMT_BTREE:
- break;
- default:
- return __this_address;
- }
+ fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
+ if (fa)
+ return fa;
} else {
/*
* If there is no fork offset, this may be a freshly-made inode
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 65fc4ed2e9a1..b228c821bae6 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1029,8 +1029,8 @@ xfs_rtalloc_query_range(
if (low_rec->ar_startext >= mp->m_sb.sb_rextents ||
low_rec->ar_startext == high_rec->ar_startext)
return 0;
- if (high_rec->ar_startext >= mp->m_sb.sb_rextents)
- high_rec->ar_startext = mp->m_sb.sb_rextents - 1;
+ if (high_rec->ar_startext > mp->m_sb.sb_rextents)
+ high_rec->ar_startext = mp->m_sb.sb_rextents;
/* Iterate the bitmap, looking for discrepancies. */
rtstart = low_rec->ar_startext;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index c35009a86699..83b1e8c6c18f 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -685,12 +685,10 @@ out_unlock_iolock:
}
/*
- * dead simple method of punching delalyed allocation blocks from a range in
- * the inode. Walks a block at a time so will be slow, but is only executed in
- * rare error cases so the overhead is not critical. This will always punch out
- * both the start and end blocks, even if the ranges only partially overlap
- * them, so it is up to the caller to ensure that partial blocks are not
- * passed in.
+ * Dead simple method of punching delalyed allocation blocks from a range in
+ * the inode. This will always punch out both the start and end blocks, even
+ * if the ranges only partially overlap them, so it is up to the caller to
+ * ensure that partial blocks are not passed in.
*/
int
xfs_bmap_punch_delalloc_range(
@@ -698,63 +696,44 @@ xfs_bmap_punch_delalloc_range(
xfs_fileoff_t start_fsb,
xfs_fileoff_t length)
{
- xfs_fileoff_t remaining = length;
+ struct xfs_ifork *ifp = &ip->i_df;
+ xfs_fileoff_t end_fsb = start_fsb + length;
+ struct xfs_bmbt_irec got, del;
+ struct xfs_iext_cursor icur;
int error = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- do {
- int done;
- xfs_bmbt_irec_t imap;
- int nimaps = 1;
- xfs_fsblock_t firstblock;
- struct xfs_defer_ops dfops;
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+ if (error)
+ return error;
+ }
- /*
- * Map the range first and check that it is a delalloc extent
- * before trying to unmap the range. Otherwise we will be
- * trying to remove a real extent (which requires a
- * transaction) or a hole, which is probably a bad idea...
- */
- error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps,
- XFS_BMAPI_ENTIRE);
+ if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
+ return 0;
- if (error) {
- /* something screwed, just bail */
- if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- xfs_alert(ip->i_mount,
- "Failed delalloc mapping lookup ino %lld fsb %lld.",
- ip->i_ino, start_fsb);
- }
- break;
- }
- if (!nimaps) {
- /* nothing there */
- goto next_block;
- }
- if (imap.br_startblock != DELAYSTARTBLOCK) {
- /* been converted, ignore */
- goto next_block;
- }
- WARN_ON(imap.br_blockcount == 0);
+ while (got.br_startoff + got.br_blockcount > start_fsb) {
+ del = got;
+ xfs_trim_extent(&del, start_fsb, length);
/*
- * Note: while we initialise the firstblock/dfops pair, they
- * should never be used because blocks should never be
- * allocated or freed for a delalloc extent and hence we need
- * don't cancel or finish them after the xfs_bunmapi() call.
+ * A delete can push the cursor forward. Step back to the
+ * previous extent on non-delalloc or extents outside the
+ * target range.
*/
- xfs_defer_init(&dfops, &firstblock);
- error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
- &dfops, &done);
- if (error)
- break;
+ if (!del.br_blockcount ||
+ !isnullstartblock(del.br_startblock)) {
+ if (!xfs_iext_prev_extent(ifp, &icur, &got))
+ break;
+ continue;
+ }
- ASSERT(!xfs_defer_has_unfinished_work(&dfops));
-next_block:
- start_fsb++;
- remaining--;
- } while(remaining > 0);
+ error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur,
+ &got, &del);
+ if (error || !xfs_iext_get_extent(ifp, &icur, &got))
+ break;
+ }
return error;
}
@@ -1208,7 +1187,22 @@ xfs_free_file_space(
return 0;
if (offset + len > XFS_ISIZE(ip))
len = XFS_ISIZE(ip) - offset;
- return iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops);
+ error = iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops);
+ if (error)
+ return error;
+
+ /*
+ * If we zeroed right up to EOF and EOF straddles a page boundary we
+ * must make sure that the post-EOF area is also zeroed because the
+ * page could be mmap'd and iomap_zero_range doesn't do that for us.
+ * Writeback of the eof page will do this, albeit clumsily.
+ */
+ if (offset + len >= XFS_ISIZE(ip) && ((offset + len) & PAGE_MASK)) {
+ error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+ (offset + len) & ~PAGE_MASK, LLONG_MAX);
+ }
+
+ return error;
}
/*
@@ -1404,6 +1398,10 @@ xfs_insert_file_space(
trace_xfs_insert_file_space(ip);
+ error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb);
+ if (error)
+ return error;
+
error = xfs_prepare_shift(ip, offset);
if (error)
return error;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index c34fa9c342f2..c7157bc48bd1 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -513,8 +513,8 @@ xfs_getfsmap_rtdev_rtbitmap_query(
struct xfs_trans *tp,
struct xfs_getfsmap_info *info)
{
- struct xfs_rtalloc_rec alow;
- struct xfs_rtalloc_rec ahigh;
+ struct xfs_rtalloc_rec alow = { 0 };
+ struct xfs_rtalloc_rec ahigh = { 0 };
int error;
xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a7afcad6b711..3f2bd6032cf8 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -387,7 +387,7 @@ xfs_reserve_blocks(
do {
free = percpu_counter_sum(&mp->m_fdblocks) -
mp->m_alloc_set_aside;
- if (!free)
+ if (free <= 0)
break;
delta = request - mp->m_resblks;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7a96c4e0ab5c..5df4de666cc1 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3236,7 +3236,6 @@ xfs_iflush_cluster(
struct xfs_inode *cip;
int nr_found;
int clcount = 0;
- int bufwasdelwri;
int i;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -3360,37 +3359,22 @@ cluster_corrupt_out:
* inode buffer and shut down the filesystem.
*/
rcu_read_unlock();
- /*
- * Clean up the buffer. If it was delwri, just release it --
- * brelse can handle it with no problems. If not, shut down the
- * filesystem before releasing the buffer.
- */
- bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q);
- if (bufwasdelwri)
- xfs_buf_relse(bp);
-
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- if (!bufwasdelwri) {
- /*
- * Just like incore_relse: if we have b_iodone functions,
- * mark the buffer as an error and call them. Otherwise
- * mark it as stale and brelse.
- */
- if (bp->b_iodone) {
- bp->b_flags &= ~XBF_DONE;
- xfs_buf_stale(bp);
- xfs_buf_ioerror(bp, -EIO);
- xfs_buf_ioend(bp);
- } else {
- xfs_buf_stale(bp);
- xfs_buf_relse(bp);
- }
- }
-
/*
- * Unlocks the flush lock
+ * We'll always have an inode attached to the buffer for completion
+ * process by the time we are called from xfs_iflush(). Hence we have
+ * always need to do IO completion processing to abort the inodes
+ * attached to the buffer. handle them just like the shutdown case in
+ * xfs_buf_submit().
*/
+ ASSERT(bp->b_iodone);
+ bp->b_flags &= ~XBF_DONE;
+ xfs_buf_stale(bp);
+ xfs_buf_ioerror(bp, -EIO);
+ xfs_buf_ioend(bp);
+
+ /* abort the corrupt inode, as it was not attached to the buffer */
xfs_iflush_abort(cip, false);
kmem_free(cilist);
xfs_perag_put(pag);
@@ -3486,12 +3470,17 @@ xfs_iflush(
xfs_log_force(mp, 0);
/*
- * inode clustering:
- * see if other inodes can be gathered into this write
+ * inode clustering: try to gather other inodes into this write
+ *
+ * Note: Any error during clustering will result in the filesystem
+ * being shut down and completion callbacks run on the cluster buffer.
+ * As we have already flushed and attached this inode to the buffer,
+ * it has already been aborted and released by xfs_iflush_cluster() and
+ * so we have no further error handling to do here.
*/
error = xfs_iflush_cluster(ip, bp);
if (error)
- goto cluster_corrupt_out;
+ return error;
*bpp = bp;
return 0;
@@ -3500,12 +3489,8 @@ corrupt_out:
if (bp)
xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-cluster_corrupt_out:
- error = -EFSCORRUPTED;
abort_out:
- /*
- * Unlocks the flush lock
- */
+ /* abort the corrupt inode, as it was not attached to the buffer */
xfs_iflush_abort(ip, false);
return error;
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 49f5492eed3b..55876dd02f0c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -963,12 +963,13 @@ xfs_ilock_for_iomap(
unsigned *lockmode)
{
unsigned mode = XFS_ILOCK_SHARED;
+ bool is_write = flags & (IOMAP_WRITE | IOMAP_ZERO);
/*
* COW writes may allocate delalloc space or convert unwritten COW
* extents, so we need to make sure to take the lock exclusively here.
*/
- if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) {
+ if (xfs_is_reflink_inode(ip) && is_write) {
/*
* FIXME: It could still overwrite on unshared extents and not
* need allocation.
@@ -989,6 +990,7 @@ xfs_ilock_for_iomap(
mode = XFS_ILOCK_EXCL;
}
+relock:
if (flags & IOMAP_NOWAIT) {
if (!xfs_ilock_nowait(ip, mode))
return -EAGAIN;
@@ -996,6 +998,17 @@ xfs_ilock_for_iomap(
xfs_ilock(ip, mode);
}
+ /*
+ * The reflink iflag could have changed since the earlier unlocked
+ * check, so if we got ILOCK_SHARED for a write and but we're now a
+ * reflink inode we have to switch to ILOCK_EXCL and relock.
+ */
+ if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_reflink_inode(ip)) {
+ xfs_iunlock(ip, mode);
+ mode = XFS_ILOCK_EXCL;
+ goto relock;
+ }
+
*lockmode = mode;
return 0;
}
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index e040af120b69..524f543c5b82 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -258,7 +258,12 @@ xfs_trans_alloc(
if (!(flags & XFS_TRANS_NO_WRITECOUNT))
sb_start_intwrite(mp->m_super);
- WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
+ /*
+ * Zero-reservation ("empty") transactions can't modify anything, so
+ * they're allowed to run while we're frozen.
+ */
+ WARN_ON(resp->tr_logres > 0 &&
+ mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
atomic_inc(&mp->m_active_trans);
tp = kmem_zone_zalloc(xfs_trans_zone,