diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-11 17:34:10 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-11 17:34:10 -0700 |
commit | a379f71a30dddbd2e7393624e455ce53c87965d1 (patch) | |
tree | c9c71b3eb19ff7e8618ff29e9d5ac99882b823e1 /fs | |
parent | de34f4da7f62ff59ac6e1ef320b0fcfa3296fce3 (diff) | |
parent | 9c5d760b8d229b94c5030863a5edaee5f1a9d7b7 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
- a few block updates that fell in my lap
- lib/ updates
- checkpatch
- autofs
- ipc
- a ton of misc other things
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (100 commits)
mm: split gfp_mask and mapping flags into separate fields
fs: use mapping_set_error instead of opencoded set_bit
treewide: remove redundant #include <linux/kconfig.h>
hung_task: allow hung_task_panic when hung_task_warnings is 0
kthread: add kerneldoc for kthread_create()
kthread: better support freezable kthread workers
kthread: allow to modify delayed kthread work
kthread: allow to cancel kthread work
kthread: initial support for delayed kthread work
kthread: detect when a kthread work is used by more workers
kthread: add kthread_destroy_worker()
kthread: add kthread_create_worker*()
kthread: allow to call __kthread_create_on_node() with va_list args
kthread/smpboot: do not park in kthread_create_on_cpu()
kthread: kthread worker API cleanup
kthread: rename probe_kthread_data() to kthread_probe_data()
scripts/tags.sh: enable code completion in VIM
mm: kmemleak: avoid using __va() on addresses that don't have a lowmem mapping
kdump, vmcoreinfo: report memory sections virtual addresses
ipc/sem.c: add cond_resched in exit_sme
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/write.c | 5 | ||||
-rw-r--r-- | fs/autofs4/autofs_i.h | 9 | ||||
-rw-r--r-- | fs/autofs4/dev-ioctl.c | 77 | ||||
-rw-r--r-- | fs/autofs4/inode.c | 45 | ||||
-rw-r--r-- | fs/autofs4/root.c | 4 | ||||
-rw-r--r-- | fs/block_dev.c | 77 | ||||
-rw-r--r-- | fs/buffer.c | 4 | ||||
-rw-r--r-- | fs/exofs/inode.c | 2 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 2 | ||||
-rw-r--r-- | fs/f2fs/data.c | 2 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 3 | ||||
-rw-r--r-- | fs/lockd/procfs.h | 2 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 3 | ||||
-rw-r--r-- | fs/open.c | 3 | ||||
-rw-r--r-- | fs/pipe.c | 166 | ||||
-rw-r--r-- | fs/select.c | 14 |
16 files changed, 259 insertions, 159 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c index 14d506efd1aa..f865c3f05bea 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -398,8 +398,7 @@ no_more: switch (ret) { case -EDQUOT: case -ENOSPC: - set_bit(AS_ENOSPC, - &wb->vnode->vfs_inode.i_mapping->flags); + mapping_set_error(wb->vnode->vfs_inode.i_mapping, -ENOSPC); break; case -EROFS: case -EIO: @@ -409,7 +408,7 @@ no_more: case -ENOMEDIUM: case -ENXIO: afs_kill_pages(wb->vnode, true, first, last); - set_bit(AS_EIO, &wb->vnode->vfs_inode.i_mapping->flags); + mapping_set_error(wb->vnode->vfs_inode.i_mapping, -EIO); break; case -EACCES: case -EPERM: diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index a439548de785..a1fba4285277 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -20,7 +20,8 @@ #define AUTOFS_IOC_COUNT 32 #define AUTOFS_DEV_IOCTL_IOC_FIRST (AUTOFS_DEV_IOCTL_VERSION) -#define AUTOFS_DEV_IOCTL_IOC_COUNT (AUTOFS_IOC_COUNT - 11) +#define AUTOFS_DEV_IOCTL_IOC_COUNT \ + (AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD - AUTOFS_DEV_IOCTL_VERSION_CMD) #include <linux/kernel.h> #include <linux/slab.h> @@ -33,8 +34,6 @@ #include <asm/current.h> #include <linux/uaccess.h> -/* #define DEBUG */ - #ifdef pr_fmt #undef pr_fmt #endif @@ -111,8 +110,6 @@ struct autofs_sb_info { int max_proto; unsigned long exp_timeout; unsigned int type; - int reghost_enabled; - int needs_reghost; struct super_block *sb; struct mutex wq_mutex; struct mutex pipe_mutex; @@ -271,4 +268,4 @@ static inline void autofs4_del_expiring(struct dentry *dentry) } } -extern void autofs4_kill_sb(struct super_block *); +void autofs4_kill_sb(struct super_block *); diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index c7fcc7438843..fc09eb77ddf3 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -75,7 +75,7 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) if ((param->ver_major != AUTOFS_DEV_IOCTL_VERSION_MAJOR) || (param->ver_minor > AUTOFS_DEV_IOCTL_VERSION_MINOR)) { pr_warn("ioctl control interface version mismatch: " - "kernel(%u.%u), user(%u.%u), cmd(%d)\n", + "kernel(%u.%u), user(%u.%u), cmd(0x%08x)\n", AUTOFS_DEV_IOCTL_VERSION_MAJOR, AUTOFS_DEV_IOCTL_VERSION_MINOR, param->ver_major, param->ver_minor, cmd); @@ -172,6 +172,17 @@ static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f) return sbi; } +/* Return autofs dev ioctl version */ +static int autofs_dev_ioctl_version(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + /* This should have already been set. */ + param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; + param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; + return 0; +} + /* Return autofs module protocol version */ static int autofs_dev_ioctl_protover(struct file *fp, struct autofs_sb_info *sbi, @@ -586,41 +597,25 @@ out: static ioctl_fn lookup_dev_ioctl(unsigned int cmd) { - static struct { - int cmd; - ioctl_fn fn; - } _ioctls[] = { - {cmd_idx(AUTOFS_DEV_IOCTL_VERSION_CMD), NULL}, - {cmd_idx(AUTOFS_DEV_IOCTL_PROTOVER_CMD), - autofs_dev_ioctl_protover}, - {cmd_idx(AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD), - autofs_dev_ioctl_protosubver}, - {cmd_idx(AUTOFS_DEV_IOCTL_OPENMOUNT_CMD), - autofs_dev_ioctl_openmount}, - {cmd_idx(AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD), - autofs_dev_ioctl_closemount}, - {cmd_idx(AUTOFS_DEV_IOCTL_READY_CMD), - autofs_dev_ioctl_ready}, - {cmd_idx(AUTOFS_DEV_IOCTL_FAIL_CMD), - autofs_dev_ioctl_fail}, - {cmd_idx(AUTOFS_DEV_IOCTL_SETPIPEFD_CMD), - autofs_dev_ioctl_setpipefd}, - {cmd_idx(AUTOFS_DEV_IOCTL_CATATONIC_CMD), - autofs_dev_ioctl_catatonic}, - {cmd_idx(AUTOFS_DEV_IOCTL_TIMEOUT_CMD), - autofs_dev_ioctl_timeout}, - {cmd_idx(AUTOFS_DEV_IOCTL_REQUESTER_CMD), - autofs_dev_ioctl_requester}, - {cmd_idx(AUTOFS_DEV_IOCTL_EXPIRE_CMD), - autofs_dev_ioctl_expire}, - {cmd_idx(AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD), - autofs_dev_ioctl_askumount}, - {cmd_idx(AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD), - autofs_dev_ioctl_ismountpoint} + static ioctl_fn _ioctls[] = { + autofs_dev_ioctl_version, + autofs_dev_ioctl_protover, + autofs_dev_ioctl_protosubver, + autofs_dev_ioctl_openmount, + autofs_dev_ioctl_closemount, + autofs_dev_ioctl_ready, + autofs_dev_ioctl_fail, + autofs_dev_ioctl_setpipefd, + autofs_dev_ioctl_catatonic, + autofs_dev_ioctl_timeout, + autofs_dev_ioctl_requester, + autofs_dev_ioctl_expire, + autofs_dev_ioctl_askumount, + autofs_dev_ioctl_ismountpoint, }; unsigned int idx = cmd_idx(cmd); - return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx].fn; + return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx]; } /* ioctl dispatcher */ @@ -642,7 +637,7 @@ static int _autofs_dev_ioctl(unsigned int command, cmd = _IOC_NR(command); if (_IOC_TYPE(command) != _IOC_TYPE(AUTOFS_DEV_IOCTL_IOC_FIRST) || - cmd - cmd_first >= AUTOFS_DEV_IOCTL_IOC_COUNT) { + cmd - cmd_first > AUTOFS_DEV_IOCTL_IOC_COUNT) { return -ENOTTY; } @@ -655,14 +650,11 @@ static int _autofs_dev_ioctl(unsigned int command, if (err) goto out; - /* The validate routine above always sets the version */ - if (cmd == AUTOFS_DEV_IOCTL_VERSION_CMD) - goto done; - fn = lookup_dev_ioctl(cmd); if (!fn) { pr_warn("unknown command 0x%08x\n", command); - return -ENOTTY; + err = -ENOTTY; + goto out; } fp = NULL; @@ -671,9 +663,11 @@ static int _autofs_dev_ioctl(unsigned int command, /* * For obvious reasons the openmount can't have a file * descriptor yet. We don't take a reference to the - * file during close to allow for immediate release. + * file during close to allow for immediate release, + * and the same for retrieving ioctl version. */ - if (cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD && + if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD && + cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD && cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) { fp = fget(param->ioctlfd); if (!fp) { @@ -706,7 +700,6 @@ cont: if (fp) fput(fp); -done: if (err >= 0 && copy_to_user(user, param, AUTOFS_DEV_IOCTL_SIZE)) err = -EFAULT; out: diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index ca9cbd6362e0..438b5bf675b6 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -274,6 +274,23 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) goto fail_dput; } + /* Test versions first */ + if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION || + sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) { + pr_err("kernel does not match daemon version " + "daemon (%d, %d) kernel (%d, %d)\n", + sbi->min_proto, sbi->max_proto, + AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION); + goto fail_dput; + } + + /* Establish highest kernel protocol version */ + if (sbi->max_proto > AUTOFS_MAX_PROTO_VERSION) + sbi->version = AUTOFS_MAX_PROTO_VERSION; + else + sbi->version = sbi->max_proto; + sbi->sub_version = AUTOFS_PROTO_SUBVERSION; + if (pgrp_set) { sbi->oz_pgrp = find_get_pid(pgrp); if (!sbi->oz_pgrp) { @@ -291,29 +308,12 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) root_inode->i_fop = &autofs4_root_operations; root_inode->i_op = &autofs4_dir_inode_operations; - /* Couldn't this be tested earlier? */ - if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION || - sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) { - pr_err("kernel does not match daemon version " - "daemon (%d, %d) kernel (%d, %d)\n", - sbi->min_proto, sbi->max_proto, - AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION); - goto fail_dput; - } - - /* Establish highest kernel protocol version */ - if (sbi->max_proto > AUTOFS_MAX_PROTO_VERSION) - sbi->version = AUTOFS_MAX_PROTO_VERSION; - else - sbi->version = sbi->max_proto; - sbi->sub_version = AUTOFS_PROTO_SUBVERSION; - pr_debug("pipe fd = %d, pgrp = %u\n", pipefd, pid_nr(sbi->oz_pgrp)); pipe = fget(pipefd); if (!pipe) { pr_err("could not open pipe file descriptor\n"); - goto fail_dput; + goto fail_put_pid; } ret = autofs_prepare_pipe(pipe); if (ret < 0) @@ -334,14 +334,14 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) fail_fput: pr_err("pipe file descriptor does not contain proper ops\n"); fput(pipe); - /* fall through */ +fail_put_pid: + put_pid(sbi->oz_pgrp); fail_dput: dput(root); goto fail_free; fail_ino: - kfree(ino); + autofs4_free_ino(ino); fail_free: - put_pid(sbi->oz_pgrp); kfree(sbi); s->s_fs_info = NULL; return ret; @@ -368,7 +368,8 @@ struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode) inode->i_fop = &autofs4_dir_operations; } else if (S_ISLNK(mode)) { inode->i_op = &autofs4_symlink_inode_operations; - } + } else + WARN_ON(1); return inode; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 623510e84c96..a11f73174877 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -577,8 +577,6 @@ static int autofs4_dir_symlink(struct inode *dir, inode = autofs4_get_inode(dir->i_sb, S_IFLNK | 0555); if (!inode) { kfree(cp); - if (!dentry->d_fsdata) - kfree(ino); return -ENOMEM; } inode->i_private = cp; @@ -842,7 +840,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) if (may_umount(mnt)) status = 1; - pr_debug("returning %d\n", status); + pr_debug("may umount %d\n", status); status = put_user(status, p); diff --git a/fs/block_dev.c b/fs/block_dev.c index 376e4e426324..05b553368bb4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -30,6 +30,7 @@ #include <linux/cleancache.h> #include <linux/dax.h> #include <linux/badblocks.h> +#include <linux/falloc.h> #include <asm/uaccess.h> #include "internal.h" @@ -1775,6 +1776,81 @@ static const struct address_space_operations def_blk_aops = { .is_dirty_writeback = buffer_check_dirty_writeback, }; +#define BLKDEV_FALLOC_FL_SUPPORTED \ + (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ + FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) + +static long blkdev_fallocate(struct file *file, int mode, loff_t start, + loff_t len) +{ + struct block_device *bdev = I_BDEV(bdev_file_inode(file)); + struct request_queue *q = bdev_get_queue(bdev); + struct address_space *mapping; + loff_t end = start + len - 1; + loff_t isize; + int error; + + /* Fail if we don't recognize the flags. */ + if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED) + return -EOPNOTSUPP; + + /* Don't go off the end of the device. */ + isize = i_size_read(bdev->bd_inode); + if (start >= isize) + return -EINVAL; + if (end >= isize) { + if (mode & FALLOC_FL_KEEP_SIZE) { + len = isize - start; + end = start + len - 1; + } else + return -EINVAL; + } + + /* + * Don't allow IO that isn't aligned to logical block size. + */ + if ((start | len) & (bdev_logical_block_size(bdev) - 1)) + return -EINVAL; + + /* Invalidate the page cache, including dirty pages. */ + mapping = bdev->bd_inode->i_mapping; + truncate_inode_pages_range(mapping, start, end); + + switch (mode) { + case FALLOC_FL_ZERO_RANGE: + case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: + error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, + GFP_KERNEL, false); + break; + case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: + /* Only punch if the device can do zeroing discard. */ + if (!blk_queue_discard(q) || !q->limits.discard_zeroes_data) + return -EOPNOTSUPP; + error = blkdev_issue_discard(bdev, start >> 9, len >> 9, + GFP_KERNEL, 0); + break; + case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: + if (!blk_queue_discard(q)) + return -EOPNOTSUPP; + error = blkdev_issue_discard(bdev, start >> 9, len >> 9, + GFP_KERNEL, 0); + break; + default: + return -EOPNOTSUPP; + } + if (error) + return error; + + /* + * Invalidate again; if someone wandered in and dirtied a page, + * the caller will be given -EBUSY. The third argument is + * inclusive, so the rounding here is safe. + */ + return invalidate_inode_pages2_range(mapping, + start >> PAGE_SHIFT, + end >> PAGE_SHIFT); +} + const struct file_operations def_blk_fops = { .open = blkdev_open, .release = blkdev_close, @@ -1789,6 +1865,7 @@ const struct file_operations def_blk_fops = { #endif .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, + .fallocate = blkdev_fallocate, }; int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) diff --git a/fs/buffer.c b/fs/buffer.c index 7dad8713fac8..b205a629001d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -351,7 +351,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) set_buffer_uptodate(bh); } else { buffer_io_error(bh, ", lost async page write"); - set_bit(AS_EIO, &page->mapping->flags); + mapping_set_error(page->mapping, -EIO); set_buffer_write_io_error(bh); clear_buffer_uptodate(bh); SetPageError(page); @@ -3249,7 +3249,7 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free) bh = head; do { if (buffer_write_io_error(bh) && page->mapping) - set_bit(AS_EIO, &page->mapping->flags); + mapping_set_error(page->mapping, -EIO); if (buffer_busy(bh)) goto failed; bh = bh->b_this_page; diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index d42ff527ab21..d8072bc074a4 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -778,7 +778,7 @@ try_again: fail: EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n", inode->i_ino, page->index, ret); - set_bit(AS_EIO, &page->mapping->flags); + mapping_set_error(page->mapping, -EIO); unlock_page(page); return ret; } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index b4cbee936cf8..0094923e5ebf 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -88,7 +88,7 @@ static void ext4_finish_bio(struct bio *bio) if (bio->bi_error) { SetPageError(page); - set_bit(AS_EIO, &page->mapping->flags); + mapping_set_error(page->mapping, -EIO); } bh = head = page_buffers(page); /* diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0d0177c9149c..9ae194fd2fdb 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -75,7 +75,7 @@ static void f2fs_write_end_io(struct bio *bio) fscrypt_pullback_bio_page(&page, true); if (unlikely(bio->bi_error)) { - set_bit(AS_EIO, &page->mapping->flags); + mapping_set_error(page->mapping, -EIO); f2fs_stop_checkpoint(sbi, true); } end_page_writeback(page); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 5bb565f9989c..31f8ca046639 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -269,8 +269,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal, * filemap_fdatawait_range(), set it again so * that user process can get -EIO from fsync(). */ - set_bit(AS_EIO, - &jinode->i_vfs_inode->i_mapping->flags); + mapping_set_error(jinode->i_vfs_inode->i_mapping, -EIO); if (!ret) ret = err; diff --git a/fs/lockd/procfs.h b/fs/lockd/procfs.h index 2257a1311027..184a15edd18d 100644 --- a/fs/lockd/procfs.h +++ b/fs/lockd/procfs.h @@ -6,8 +6,6 @@ #ifndef _LOCKD_PROCFS_H #define _LOCKD_PROCFS_H -#include <linux/kconfig.h> - #if IS_ENABLED(CONFIG_PROC_FS) int lockd_create_procfs(void); void lockd_remove_procfs(void); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 6ea06f8a7d29..3f828a187049 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -3188,6 +3188,9 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, migrate->new_master, migrate->master); + if (ret < 0) + kmem_cache_free(dlm_mle_cache, mle); + spin_unlock(&dlm->master_lock); unlock: spin_unlock(&dlm->spinlock); diff --git a/fs/open.c b/fs/open.c index 8aeb08bb278b..a7719cfb7257 100644 --- a/fs/open.c +++ b/fs/open.c @@ -300,7 +300,8 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) * Let individual file system decide if it supports preallocation * for directories or not. */ - if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) + if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && + !S_ISBLK(inode->i_mode)) return -ENODEV; /* Check for wrap through zero too */ diff --git a/fs/pipe.c b/fs/pipe.c index 1f559f0608e1..8e0d9f26dfad 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -601,54 +601,63 @@ pipe_fasync(int fd, struct file *filp, int on) return retval; } -static void account_pipe_buffers(struct pipe_inode_info *pipe, +static unsigned long account_pipe_buffers(struct user_struct *user, unsigned long old, unsigned long new) { - atomic_long_add(new - old, &pipe->user->pipe_bufs); + return atomic_long_add_return(new - old, &user->pipe_bufs); } -static bool too_many_pipe_buffers_soft(struct user_struct *user) +static bool too_many_pipe_buffers_soft(unsigned long user_bufs) { - return pipe_user_pages_soft && - atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_soft; + return pipe_user_pages_soft && user_bufs >= pipe_user_pages_soft; } -static bool too_many_pipe_buffers_hard(struct user_struct *user) +static bool too_many_pipe_buffers_hard(unsigned long user_bufs) { - return pipe_user_pages_hard && - atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_hard; + return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard; } struct pipe_inode_info *alloc_pipe_info(void) { struct pipe_inode_info *pipe; + unsigned long pipe_bufs = PIPE_DEF_BUFFERS; + struct user_struct *user = get_current_user(); + unsigned long user_bufs; pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT); - if (pipe) { - unsigned long pipe_bufs = PIPE_DEF_BUFFERS; - struct user_struct *user = get_current_user(); - - if (!too_many_pipe_buffers_hard(user)) { - if (too_many_pipe_buffers_soft(user)) - pipe_bufs = 1; - pipe->bufs = kcalloc(pipe_bufs, - sizeof(struct pipe_buffer), - GFP_KERNEL_ACCOUNT); - } + if (pipe == NULL) + goto out_free_uid; - if (pipe->bufs) { - init_waitqueue_head(&pipe->wait); - pipe->r_counter = pipe->w_counter = 1; - pipe->buffers = pipe_bufs; - pipe->user = user; - account_pipe_buffers(pipe, 0, pipe_bufs); - mutex_init(&pipe->mutex); - return pipe; - } - free_uid(user); - kfree(pipe); + if (pipe_bufs * PAGE_SIZE > pipe_max_size && !capable(CAP_SYS_RESOURCE)) + pipe_bufs = pipe_max_size >> PAGE_SHIFT; + + user_bufs = account_pipe_buffers(user, 0, pipe_bufs); + + if (too_many_pipe_buffers_soft(user_bufs)) { + user_bufs = account_pipe_buffers(user, pipe_bufs, 1); + pipe_bufs = 1; + } + + if (too_many_pipe_buffers_hard(user_bufs)) + goto out_revert_acct; + + pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), + GFP_KERNEL_ACCOUNT); + + if (pipe->bufs) { + init_waitqueue_head(&pipe->wait); + pipe->r_counter = pipe->w_counter = 1; + pipe->buffers = pipe_bufs; + pipe->user = user; + mutex_init(&pipe->mutex); + return pipe; } +out_revert_acct: + (void) account_pipe_buffers(user, pipe_bufs, 0); + kfree(pipe); +out_free_uid: + free_uid(user); return NULL; } @@ -656,7 +665,7 @@ void free_pipe_info(struct pipe_inode_info *pipe) { int i; - account_pipe_buffers(pipe, pipe->buffers, 0); + (void) account_pipe_buffers(pipe->user, pipe->buffers, 0); free_uid(pipe->user); for (i = 0; i < pipe->buffers; i++) { struct pipe_buffer *buf = pipe->bufs + i; @@ -1008,12 +1017,54 @@ const struct file_operations pipefifo_fops = { }; /* + * Currently we rely on the pipe array holding a power-of-2 number + * of pages. + */ +static inline unsigned int round_pipe_size(unsigned int size) +{ + unsigned long nr_pages; + + nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; +} + +/* * Allocate a new array of pipe buffers and copy the info over. Returns the * pipe size if successful, or return -ERROR on error. */ -static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) +static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) { struct pipe_buffer *bufs; + unsigned int size, nr_pages; + unsigned long user_bufs; + long ret = 0; + + size = round_pipe_size(arg); + nr_pages = size >> PAGE_SHIFT; + + if (!nr_pages) + return -EINVAL; + + /* + * If trying to increase the pipe capacity, check that an + * unprivileged user is not trying to exceed various limits + * (soft limit check here, hard limit check just below). + * Decreasing the pipe capacity is always permitted, even + * if the user is currently over a limit. + */ + if (nr_pages > pipe->buffers && + size > pipe_max_size && !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + user_bufs = account_pipe_buffers(pipe->user, pipe->buffers, nr_pages); + + if (nr_pages > pipe->buffers && + (too_many_pipe_buffers_hard(user_bufs) || + too_many_pipe_buffers_soft(user_bufs)) && + !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { + ret = -EPERM; + goto out_revert_acct; + } /* * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't @@ -1021,13 +1072,17 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) * again like we would do for growing. If the pipe currently * contains more buffers than arg, then return busy. */ - if (nr_pages < pipe->nrbufs) - return -EBUSY; + if (nr_pages < pipe->nrbufs) { + ret = -EBUSY; + goto out_revert_acct; + } bufs = kcalloc(nr_pages, sizeof(*bufs), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); - if (unlikely(!bufs)) - return -ENOMEM; + if (unlikely(!bufs)) { + ret = -ENOMEM; + goto out_revert_acct; + } /* * The pipe array wraps around, so just start the new one at zero @@ -1050,24 +1105,15 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer)); } - account_pipe_buffers(pipe, pipe->buffers, nr_pages); pipe->curbuf = 0; kfree(pipe->bufs); pipe->bufs = bufs; pipe->buffers = nr_pages; return nr_pages * PAGE_SIZE; -} - -/* - * Currently we rely on the pipe array holding a power-of-2 number - * of pages. - */ -static inline unsigned int round_pipe_size(unsigned int size) -{ - unsigned long nr_pages; - nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; +out_revert_acct: + (void) account_pipe_buffers(pipe->user, nr_pages, pipe->buffers); + return ret; } /* @@ -1109,28 +1155,9 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) __pipe_lock(pipe); switch (cmd) { - case F_SETPIPE_SZ: { - unsigned int size, nr_pages; - - size = round_pipe_size(arg); - nr_pages = size >> PAGE_SHIFT; - - ret = -EINVAL; - if (!nr_pages) - goto out; - - if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) { - ret = -EPERM; - goto out; - } else if ((too_many_pipe_buffers_hard(pipe->user) || - too_many_pipe_buffers_soft(pipe->user)) && - !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { - ret = -EPERM; - goto out; - } - ret = pipe_set_size(pipe, nr_pages); + case F_SETPIPE_SZ: + ret = pipe_set_size(pipe, arg); break; - } case F_GETPIPE_SZ: ret = pipe->buffers * PAGE_SIZE; break; @@ -1139,7 +1166,6 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) break; } -out: __pipe_unlock(pipe); return ret; } diff --git a/fs/select.c b/fs/select.c index 8ed9da50896a..3d4f85defeab 100644 --- a/fs/select.c +++ b/fs/select.c @@ -29,6 +29,7 @@ #include <linux/sched/rt.h> #include <linux/freezer.h> #include <net/busy_poll.h> +#include <linux/vmalloc.h> #include <asm/uaccess.h> @@ -554,7 +555,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set_bits fds; void *bits; int ret, max_fds; - unsigned int size; + size_t size, alloc_size; struct fdtable *fdt; /* Allocate small arguments on the stack to save memory and be faster */ long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; @@ -581,7 +582,14 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, if (size > sizeof(stack_fds) / 6) { /* Not enough space in on-stack array; must use kmalloc */ ret = -ENOMEM; - bits = kmalloc(6 * size, GFP_KERNEL); + if (size > (SIZE_MAX / 6)) + goto out_nofds; + + alloc_size = 6 * size; + bits = kmalloc(alloc_size, GFP_KERNEL|__GFP_NOWARN); + if (!bits && alloc_size > PAGE_SIZE) + bits = vmalloc(alloc_size); + if (!bits) goto out_nofds; } @@ -618,7 +626,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, out: if (bits != stack_fds) - kfree(bits); + kvfree(bits); out_nofds: return ret; } |