summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/fops.c10
-rw-r--r--fs/open.c51
-rw-r--r--fs/xfs/xfs_bmap_util.c11
-rw-r--r--fs/xfs/xfs_file.c353
-rw-r--r--include/linux/falloc.h18
-rw-r--r--include/trace/events/ext4.h1
-rw-r--r--include/uapi/linux/falloc.h1
7 files changed, 258 insertions, 187 deletions
diff --git a/block/fops.c b/block/fops.c
index dacbd61fda29..35c47a304727 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -771,7 +771,7 @@ reexpand:
#define BLKDEV_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
- FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
+ FALLOC_FL_ZERO_RANGE)
static long blkdev_fallocate(struct file *file, int mode, loff_t start,
loff_t len)
@@ -830,14 +830,6 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
len >> SECTOR_SHIFT, GFP_KERNEL,
BLKDEV_ZERO_NOFALLBACK);
break;
- case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
- error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
- if (error)
- goto fail;
-
- error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
- len >> SECTOR_SHIFT, GFP_KERNEL);
- break;
default:
error = -EOPNOTSUPP;
}
diff --git a/fs/open.c b/fs/open.c
index 22adbef7ecc2..daf1b55ca818 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -252,40 +252,39 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (offset < 0 || len <= 0)
return -EINVAL;
- /* Return error if mode is not supported */
- if (mode & ~FALLOC_FL_SUPPORTED_MASK)
+ if (mode & ~(FALLOC_FL_MODE_MASK | FALLOC_FL_KEEP_SIZE))
return -EOPNOTSUPP;
- /* Punch hole and zero range are mutually exclusive */
- if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
- (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
- return -EOPNOTSUPP;
-
- /* Punch hole must have keep size set */
- if ((mode & FALLOC_FL_PUNCH_HOLE) &&
- !(mode & FALLOC_FL_KEEP_SIZE))
+ /*
+ * Modes are exclusive, even if that is not obvious from the encoding
+ * as bit masks and the mix with the flag in the same namespace.
+ *
+ * To make things even more complicated, FALLOC_FL_ALLOCATE_RANGE is
+ * encoded as no bit set.
+ */
+ switch (mode & FALLOC_FL_MODE_MASK) {
+ case FALLOC_FL_ALLOCATE_RANGE:
+ case FALLOC_FL_UNSHARE_RANGE:
+ case FALLOC_FL_ZERO_RANGE:
+ break;
+ case FALLOC_FL_PUNCH_HOLE:
+ if (!(mode & FALLOC_FL_KEEP_SIZE))
+ return -EOPNOTSUPP;
+ break;
+ case FALLOC_FL_COLLAPSE_RANGE:
+ case FALLOC_FL_INSERT_RANGE:
+ if (mode & FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+ break;
+ default:
return -EOPNOTSUPP;
-
- /* Collapse range should only be used exclusively. */
- if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
- (mode & ~FALLOC_FL_COLLAPSE_RANGE))
- return -EINVAL;
-
- /* Insert range should only be used exclusively. */
- if ((mode & FALLOC_FL_INSERT_RANGE) &&
- (mode & ~FALLOC_FL_INSERT_RANGE))
- return -EINVAL;
-
- /* Unshare range should only be used with allocate mode. */
- if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
- (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
- return -EINVAL;
+ }
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
/*
- * We can only allow pure fallocate on append only files
+ * On append-only files only space preallocation is supported.
*/
if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
return -EPERM;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index fe2e2c930975..e9fdebaa40ea 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -653,6 +653,9 @@ xfs_alloc_file_space(
xfs_bmbt_irec_t imaps[1], *imapp;
int error;
+ if (xfs_is_always_cow_inode(ip))
+ return 0;
+
trace_xfs_alloc_file_space(ip);
if (xfs_is_shutdown(mp))
@@ -848,6 +851,14 @@ xfs_free_file_space(
if (len <= 0) /* if nothing being freed */
return 0;
+ /*
+ * Now AIO and DIO has drained we flush and (if necessary) invalidate
+ * the cached range over the first operation we are about to run.
+ */
+ error = xfs_flush_unmap_range(ip, offset, len);
+ if (error)
+ return error;
+
startoffset_fsb = XFS_B_TO_FSB(mp, offset);
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 4cdc54dc9686..f6e4912769a0 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -852,6 +852,192 @@ static inline bool xfs_file_sync_writes(struct file *filp)
return false;
}
+static int
+xfs_falloc_newsize(
+ struct file *file,
+ int mode,
+ loff_t offset,
+ loff_t len,
+ loff_t *new_size)
+{
+ struct inode *inode = file_inode(file);
+
+ if ((mode & FALLOC_FL_KEEP_SIZE) || offset + len <= i_size_read(inode))
+ return 0;
+ *new_size = offset + len;
+ return inode_newsize_ok(inode, *new_size);
+}
+
+static int
+xfs_falloc_setsize(
+ struct file *file,
+ loff_t new_size)
+{
+ struct iattr iattr = {
+ .ia_valid = ATTR_SIZE,
+ .ia_size = new_size,
+ };
+
+ if (!new_size)
+ return 0;
+ return xfs_vn_setattr_size(file_mnt_idmap(file), file_dentry(file),
+ &iattr);
+}
+
+static int
+xfs_falloc_collapse_range(
+ struct file *file,
+ loff_t offset,
+ loff_t len)
+{
+ struct inode *inode = file_inode(file);
+ loff_t new_size = i_size_read(inode) - len;
+ int error;
+
+ if (!xfs_is_falloc_aligned(XFS_I(inode), offset, len))
+ return -EINVAL;
+
+ /*
+ * There is no need to overlap collapse range with EOF, in which case it
+ * is effectively a truncate operation
+ */
+ if (offset + len >= i_size_read(inode))
+ return -EINVAL;
+
+ error = xfs_collapse_file_space(XFS_I(inode), offset, len);
+ if (error)
+ return error;
+ return xfs_falloc_setsize(file, new_size);
+}
+
+static int
+xfs_falloc_insert_range(
+ struct file *file,
+ loff_t offset,
+ loff_t len)
+{
+ struct inode *inode = file_inode(file);
+ loff_t isize = i_size_read(inode);
+ int error;
+
+ if (!xfs_is_falloc_aligned(XFS_I(inode), offset, len))
+ return -EINVAL;
+
+ /*
+ * New inode size must not exceed ->s_maxbytes, accounting for
+ * possible signed overflow.
+ */
+ if (inode->i_sb->s_maxbytes - isize < len)
+ return -EFBIG;
+
+ /* Offset should be less than i_size */
+ if (offset >= isize)
+ return -EINVAL;
+
+ error = xfs_falloc_setsize(file, isize + len);
+ if (error)
+ return error;
+
+ /*
+ * Perform hole insertion now that the file size has been updated so
+ * that if we crash during the operation we don't leave shifted extents
+ * past EOF and hence losing access to the data that is contained within
+ * them.
+ */
+ return xfs_insert_file_space(XFS_I(inode), offset, len);
+}
+
+/*
+ * Punch a hole and prealloc the range. We use a hole punch rather than
+ * unwritten extent conversion for two reasons:
+ *
+ * 1.) Hole punch handles partial block zeroing for us.
+ * 2.) If prealloc returns ENOSPC, the file range is still zero-valued by
+ * virtue of the hole punch.
+ */
+static int
+xfs_falloc_zero_range(
+ struct file *file,
+ int mode,
+ loff_t offset,
+ loff_t len)
+{
+ struct inode *inode = file_inode(file);
+ unsigned int blksize = i_blocksize(inode);
+ loff_t new_size = 0;
+ int error;
+
+ trace_xfs_zero_file_space(XFS_I(inode));
+
+ error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
+ if (error)
+ return error;
+
+ error = xfs_free_file_space(XFS_I(inode), offset, len);
+ if (error)
+ return error;
+
+ len = round_up(offset + len, blksize) - round_down(offset, blksize);
+ offset = round_down(offset, blksize);
+ error = xfs_alloc_file_space(XFS_I(inode), offset, len);
+ if (error)
+ return error;
+ return xfs_falloc_setsize(file, new_size);
+}
+
+static int
+xfs_falloc_unshare_range(
+ struct file *file,
+ int mode,
+ loff_t offset,
+ loff_t len)
+{
+ struct inode *inode = file_inode(file);
+ loff_t new_size = 0;
+ int error;
+
+ error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
+ if (error)
+ return error;
+
+ error = xfs_reflink_unshare(XFS_I(inode), offset, len);
+ if (error)
+ return error;
+
+ error = xfs_alloc_file_space(XFS_I(inode), offset, len);
+ if (error)
+ return error;
+ return xfs_falloc_setsize(file, new_size);
+}
+
+static int
+xfs_falloc_allocate_range(
+ struct file *file,
+ int mode,
+ loff_t offset,
+ loff_t len)
+{
+ struct inode *inode = file_inode(file);
+ loff_t new_size = 0;
+ int error;
+
+ /*
+ * If always_cow mode we can't use preallocations and thus should not
+ * create them.
+ */
+ if (xfs_is_always_cow_inode(XFS_I(inode)))
+ return -EOPNOTSUPP;
+
+ error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
+ if (error)
+ return error;
+
+ error = xfs_alloc_file_space(XFS_I(inode), offset, len);
+ if (error)
+ return error;
+ return xfs_falloc_setsize(file, new_size);
+}
+
#define XFS_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
@@ -868,8 +1054,6 @@ xfs_file_fallocate(
struct xfs_inode *ip = XFS_I(inode);
long error;
uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
- loff_t new_size = 0;
- bool do_file_insert = false;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -890,156 +1074,35 @@ xfs_file_fallocate(
*/
inode_dio_wait(inode);
- /*
- * Now AIO and DIO has drained we flush and (if necessary) invalidate
- * the cached range over the first operation we are about to run.
- *
- * We care about zero and collapse here because they both run a hole
- * punch over the range first. Because that can zero data, and the range
- * of invalidation for the shift operations is much larger, we still do
- * the required flush for collapse in xfs_prepare_shift().
- *
- * Insert has the same range requirements as collapse, and we extend the
- * file first which can zero data. Hence insert has the same
- * flush/invalidate requirements as collapse and so they are both
- * handled at the right time by xfs_prepare_shift().
- */
- if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE |
- FALLOC_FL_COLLAPSE_RANGE)) {
- error = xfs_flush_unmap_range(ip, offset, len);
- if (error)
- goto out_unlock;
- }
-
error = file_modified(file);
if (error)
goto out_unlock;
- if (mode & FALLOC_FL_PUNCH_HOLE) {
+ switch (mode & FALLOC_FL_MODE_MASK) {
+ case FALLOC_FL_PUNCH_HOLE:
error = xfs_free_file_space(ip, offset, len);
- if (error)
- goto out_unlock;
- } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
- if (!xfs_is_falloc_aligned(ip, offset, len)) {
- error = -EINVAL;
- goto out_unlock;
- }
-
- /*
- * There is no need to overlap collapse range with EOF,
- * in which case it is effectively a truncate operation
- */
- if (offset + len >= i_size_read(inode)) {
- error = -EINVAL;
- goto out_unlock;
- }
-
- new_size = i_size_read(inode) - len;
-
- error = xfs_collapse_file_space(ip, offset, len);
- if (error)
- goto out_unlock;
- } else if (mode & FALLOC_FL_INSERT_RANGE) {
- loff_t isize = i_size_read(inode);
-
- if (!xfs_is_falloc_aligned(ip, offset, len)) {
- error = -EINVAL;
- goto out_unlock;
- }
-
- /*
- * New inode size must not exceed ->s_maxbytes, accounting for
- * possible signed overflow.
- */
- if (inode->i_sb->s_maxbytes - isize < len) {
- error = -EFBIG;
- goto out_unlock;
- }
- new_size = isize + len;
-
- /* Offset should be less than i_size */
- if (offset >= isize) {
- error = -EINVAL;
- goto out_unlock;
- }
- do_file_insert = true;
- } else {
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode)) {
- new_size = offset + len;
- error = inode_newsize_ok(inode, new_size);
- if (error)
- goto out_unlock;
- }
-
- if (mode & FALLOC_FL_ZERO_RANGE) {
- /*
- * Punch a hole and prealloc the range. We use a hole
- * punch rather than unwritten extent conversion for two
- * reasons:
- *
- * 1.) Hole punch handles partial block zeroing for us.
- * 2.) If prealloc returns ENOSPC, the file range is
- * still zero-valued by virtue of the hole punch.
- */
- unsigned int blksize = i_blocksize(inode);
-
- trace_xfs_zero_file_space(ip);
-
- error = xfs_free_file_space(ip, offset, len);
- if (error)
- goto out_unlock;
-
- len = round_up(offset + len, blksize) -
- round_down(offset, blksize);
- offset = round_down(offset, blksize);
- } else if (mode & FALLOC_FL_UNSHARE_RANGE) {
- error = xfs_reflink_unshare(ip, offset, len);
- if (error)
- goto out_unlock;
- } else {
- /*
- * If always_cow mode we can't use preallocations and
- * thus should not create them.
- */
- if (xfs_is_always_cow_inode(ip)) {
- error = -EOPNOTSUPP;
- goto out_unlock;
- }
- }
-
- if (!xfs_is_always_cow_inode(ip)) {
- error = xfs_alloc_file_space(ip, offset, len);
- if (error)
- goto out_unlock;
- }
- }
-
- /* Change file size if needed */
- if (new_size) {
- struct iattr iattr;
-
- iattr.ia_valid = ATTR_SIZE;
- iattr.ia_size = new_size;
- error = xfs_vn_setattr_size(file_mnt_idmap(file),
- file_dentry(file), &iattr);
- if (error)
- goto out_unlock;
- }
-
- /*
- * Perform hole insertion now that the file size has been
- * updated so that if we crash during the operation we don't
- * leave shifted extents past EOF and hence losing access to
- * the data that is contained within them.
- */
- if (do_file_insert) {
- error = xfs_insert_file_space(ip, offset, len);
- if (error)
- goto out_unlock;
+ break;
+ case FALLOC_FL_COLLAPSE_RANGE:
+ error = xfs_falloc_collapse_range(file, offset, len);
+ break;
+ case FALLOC_FL_INSERT_RANGE:
+ error = xfs_falloc_insert_range(file, offset, len);
+ break;
+ case FALLOC_FL_ZERO_RANGE:
+ error = xfs_falloc_zero_range(file, mode, offset, len);
+ break;
+ case FALLOC_FL_UNSHARE_RANGE:
+ error = xfs_falloc_unshare_range(file, mode, offset, len);
+ break;
+ case FALLOC_FL_ALLOCATE_RANGE:
+ error = xfs_falloc_allocate_range(file, mode, offset, len);
+ break;
+ default:
+ error = -EOPNOTSUPP;
+ break;
}
- if (xfs_file_sync_writes(file))
+ if (!error && xfs_file_sync_writes(file))
error = xfs_log_force_inode(ip);
out_unlock:
diff --git a/include/linux/falloc.h b/include/linux/falloc.h
index f3f0b97b1675..3f49f3df6af5 100644
--- a/include/linux/falloc.h
+++ b/include/linux/falloc.h
@@ -25,12 +25,18 @@ struct space_resv {
#define FS_IOC_UNRESVSP64 _IOW('X', 43, struct space_resv)
#define FS_IOC_ZERO_RANGE _IOW('X', 57, struct space_resv)
-#define FALLOC_FL_SUPPORTED_MASK (FALLOC_FL_KEEP_SIZE | \
- FALLOC_FL_PUNCH_HOLE | \
- FALLOC_FL_COLLAPSE_RANGE | \
- FALLOC_FL_ZERO_RANGE | \
- FALLOC_FL_INSERT_RANGE | \
- FALLOC_FL_UNSHARE_RANGE)
+/*
+ * Mask of all supported fallocate modes. Only one can be set at a time.
+ *
+ * In addition to the mode bit, the mode argument can also encode flags.
+ * FALLOC_FL_KEEP_SIZE is the only supported flag so far.
+ */
+#define FALLOC_FL_MODE_MASK (FALLOC_FL_ALLOCATE_RANGE | \
+ FALLOC_FL_PUNCH_HOLE | \
+ FALLOC_FL_COLLAPSE_RANGE | \
+ FALLOC_FL_ZERO_RANGE | \
+ FALLOC_FL_INSERT_RANGE | \
+ FALLOC_FL_UNSHARE_RANGE)
/* on ia32 l_start is on a 32-bit boundary */
#if defined(CONFIG_X86_64)
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index cc5e9b7b2b44..156908641e68 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -91,7 +91,6 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
#define show_falloc_mode(mode) __print_flags(mode, "|", \
{ FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \
{ FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \
- { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \
{ FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \
{ FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index 51398fa57f6c..5810371ed72b 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -2,6 +2,7 @@
#ifndef _UAPI_FALLOC_H_
#define _UAPI_FALLOC_H_
+#define FALLOC_FL_ALLOCATE_RANGE 0x00 /* allocate range */
#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
#define FALLOC_FL_NO_HIDE_STALE 0x04 /* reserved codepoint */