From ec9ee4acd97c0039a61c0ae4f12705767ae62153 Mon Sep 17 00:00:00 2001 From: Daniel Cashman Date: Fri, 26 Feb 2016 15:19:34 -0800 Subject: drivers: char: random: add get_random_long() Commit d07e22597d1d ("mm: mmap: add new /proc tunable for mmap_base ASLR") added the ability to choose from a range of values to use for entropy count in generating the random offset to the mmap_base address. The maximum value on this range was set to 32 bits for 64-bit x86 systems, but this value could be increased further, requiring more than the 32 bits of randomness provided by get_random_int(), as is already possible for arm64. Add a new function: get_random_long() which more naturally fits with the mmap usage of get_random_int() but operates exactly the same as get_random_int(). Also, fix the shifting constant in mmap_rnd() to be an unsigned long so that values greater than 31 bits generate an appropriate mask without overflow. This is especially important on x86, as its shift instruction uses a 5-bit mask for the shift operand, which meant that any value for mmap_rnd_bits over 31 acts as a no-op and effectively disables mmap_base randomization. Finally, replace calls to get_random_int() with get_random_long() where appropriate. This patch (of 2): Add get_random_long(). Signed-off-by: Daniel Cashman Acked-by: Kees Cook Cc: "Theodore Ts'o" Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Catalin Marinas Cc: Will Deacon Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: David S. Miller Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Al Viro Cc: Nick Kralevich Cc: Jeff Vander Stoep Cc: Mark Salyzyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/random.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index a75840c1aa71..9c29122037f9 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -34,6 +34,7 @@ extern const struct file_operations random_fops, urandom_fops; #endif unsigned int get_random_int(void); +unsigned long get_random_long(void); unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len); u32 prandom_u32(void); -- cgit v1.2.3-58-ga151 From 20a90f58997245749c2bdfaea9e51f785ec90d0b Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 26 Feb 2016 15:19:52 -0800 Subject: dax: give DAX clearing code correct bdev dax_clear_blocks() needs a valid struct block_device and previously it was using inode->i_sb->s_bdev in all cases. This is correct for normal inodes on mounted ext2, ext4 and XFS filesystems, but is incorrect for DAX raw block devices and for XFS real-time devices. Instead, rename dax_clear_blocks() to dax_clear_sectors(), and change its arguments to take a bdev and a sector instead of an inode and a block. This better reflects what the function does, and it allows the filesystem and raw block device code to pass in an appropriate struct block_device. Signed-off-by: Ross Zwisler Suggested-by: Dan Williams Reviewed-by: Jan Kara Cc: Theodore Ts'o Cc: Al Viro Cc: Dave Chinner Cc: Jens Axboe Cc: Matthew Wilcox Cc: Ross Zwisler Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 9 ++++----- fs/ext2/inode.c | 6 ++++-- fs/xfs/xfs_aops.c | 2 +- fs/xfs/xfs_aops.h | 1 + fs/xfs/xfs_bmap_util.c | 3 ++- include/linux/dax.h | 2 +- 6 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/dax.c b/fs/dax.c index fc2e3141138b..9a173dd8c4a3 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -79,15 +79,14 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n) } /* - * dax_clear_blocks() is called from within transaction context from XFS, + * dax_clear_sectors() is called from within transaction context from XFS, * and hence this means the stack from this point must follow GFP_NOFS * semantics for all operations. */ -int dax_clear_blocks(struct inode *inode, sector_t block, long _size) +int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size) { - struct block_device *bdev = inode->i_sb->s_bdev; struct blk_dax_ctl dax = { - .sector = block << (inode->i_blkbits - 9), + .sector = _sector, .size = _size, }; @@ -109,7 +108,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long _size) wmb_pmem(); return 0; } -EXPORT_SYMBOL_GPL(dax_clear_blocks); +EXPORT_SYMBOL_GPL(dax_clear_sectors); /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */ static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first, diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 27e2cdd4999b..4467cbd75f24 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -737,8 +737,10 @@ static int ext2_get_blocks(struct inode *inode, * so that it's not found by another thread before it's * initialised */ - err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key), - 1 << inode->i_blkbits); + err = dax_clear_sectors(inode->i_sb->s_bdev, + le32_to_cpu(chain[depth-1].key) << + (inode->i_blkbits - 9), + 1 << inode->i_blkbits); if (err) { mutex_unlock(&ei->truncate_mutex); goto cleanup; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 379c089fb051..fc20518e2398 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -55,7 +55,7 @@ xfs_count_page_state( } while ((bh = bh->b_this_page) != head); } -STATIC struct block_device * +struct block_device * xfs_find_bdev_for_inode( struct inode *inode) { diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index f6ffc9ae5ceb..a4343c63fb38 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -62,5 +62,6 @@ int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset, struct buffer_head *map_bh, int create); extern void xfs_count_page_state(struct page *, int *, int *); +extern struct block_device *xfs_find_bdev_for_inode(struct inode *); #endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 45ec9e40150c..6c876012b2e5 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -75,7 +75,8 @@ xfs_zero_extent( ssize_t size = XFS_FSB_TO_B(mp, count_fsb); if (IS_DAX(VFS_I(ip))) - return dax_clear_blocks(VFS_I(ip), block, size); + return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)), + sector, size); /* * let the block layer decide on the fastest method of diff --git a/include/linux/dax.h b/include/linux/dax.h index 818e45078929..7b6bcedb980f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -7,7 +7,7 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, get_block_t, dio_iodone_t, int flags); -int dax_clear_blocks(struct inode *, sector_t block, long size); +int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, -- cgit v1.2.3-58-ga151 From 7f6d5b529b7dfe2fca30cbf4bc81e16575090025 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 26 Feb 2016 15:19:55 -0800 Subject: dax: move writeback calls into the filesystems Previously calls to dax_writeback_mapping_range() for all DAX filesystems (ext2, ext4 & xfs) were centralized in filemap_write_and_wait_range(). dax_writeback_mapping_range() needs a struct block_device, and it used to get that from inode->i_sb->s_bdev. This is correct for normal inodes mounted on ext2, ext4 and XFS filesystems, but is incorrect for DAX raw block devices and for XFS real-time files. Instead, call dax_writeback_mapping_range() directly from the filesystem ->writepages function so that it can supply us with a valid block device. This also fixes DAX code to properly flush caches in response to sync(2). Signed-off-by: Ross Zwisler Signed-off-by: Jan Kara Cc: Al Viro Cc: Dan Williams Cc: Dave Chinner Cc: Jens Axboe Cc: Matthew Wilcox Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/block_dev.c | 13 ++++++++++++- fs/dax.c | 12 +++++++----- fs/ext2/inode.c | 8 ++++++++ fs/ext4/inode.c | 4 ++++ fs/xfs/xfs_aops.c | 4 ++++ include/linux/dax.h | 6 ++++-- mm/filemap.c | 12 ++++-------- 7 files changed, 43 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/fs/block_dev.c b/fs/block_dev.c index 31c6d1090f11..826b164a4b5b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1697,13 +1697,24 @@ static int blkdev_releasepage(struct page *page, gfp_t wait) return try_to_free_buffers(page); } +static int blkdev_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + if (dax_mapping(mapping)) { + struct block_device *bdev = I_BDEV(mapping->host); + + return dax_writeback_mapping_range(mapping, bdev, wbc); + } + return generic_writepages(mapping, wbc); +} + static const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, .readpages = blkdev_readpages, .writepage = blkdev_writepage, .write_begin = blkdev_write_begin, .write_end = blkdev_write_end, - .writepages = generic_writepages, + .writepages = blkdev_writepages, .releasepage = blkdev_releasepage, .direct_IO = blkdev_direct_IO, .is_dirty_writeback = buffer_check_dirty_writeback, diff --git a/fs/dax.c b/fs/dax.c index 9a173dd8c4a3..711172450da6 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -484,11 +484,10 @@ static int dax_writeback_one(struct block_device *bdev, * end]. This is required by data integrity operations to ensure file data is * on persistent storage prior to completion of the operation. */ -int dax_writeback_mapping_range(struct address_space *mapping, loff_t start, - loff_t end) +int dax_writeback_mapping_range(struct address_space *mapping, + struct block_device *bdev, struct writeback_control *wbc) { struct inode *inode = mapping->host; - struct block_device *bdev = inode->i_sb->s_bdev; pgoff_t start_index, end_index, pmd_index; pgoff_t indices[PAGEVEC_SIZE]; struct pagevec pvec; @@ -499,8 +498,11 @@ int dax_writeback_mapping_range(struct address_space *mapping, loff_t start, if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) return -EIO; - start_index = start >> PAGE_CACHE_SHIFT; - end_index = end >> PAGE_CACHE_SHIFT; + if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) + return 0; + + start_index = wbc->range_start >> PAGE_CACHE_SHIFT; + end_index = wbc->range_end >> PAGE_CACHE_SHIFT; pmd_index = DAX_PMD_INDEX(start_index); rcu_read_lock(); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 4467cbd75f24..6bd58e6ff038 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -876,6 +876,14 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) static int ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) { +#ifdef CONFIG_FS_DAX + if (dax_mapping(mapping)) { + return dax_writeback_mapping_range(mapping, + mapping->host->i_sb->s_bdev, + wbc); + } +#endif + return mpage_writepages(mapping, wbc, ext2_get_block); } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5708e689e63d..aee960b1af34 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2478,6 +2478,10 @@ static int ext4_writepages(struct address_space *mapping, trace_ext4_writepages(inode, wbc); + if (dax_mapping(mapping)) + return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, + wbc); + /* * No pages to write? This is mainly a kludge to avoid starting * a transaction for special inodes like journal inode on last iput() diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index fc20518e2398..a9ebabfe7587 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1208,6 +1208,10 @@ xfs_vm_writepages( struct writeback_control *wbc) { xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); + if (dax_mapping(mapping)) + return dax_writeback_mapping_range(mapping, + xfs_find_bdev_for_inode(mapping->host), wbc); + return generic_writepages(mapping, wbc); } diff --git a/include/linux/dax.h b/include/linux/dax.h index 7b6bcedb980f..636dd59ab505 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -52,6 +52,8 @@ static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); } -int dax_writeback_mapping_range(struct address_space *mapping, loff_t start, - loff_t end); + +struct writeback_control; +int dax_writeback_mapping_range(struct address_space *mapping, + struct block_device *bdev, struct writeback_control *wbc); #endif diff --git a/mm/filemap.c b/mm/filemap.c index 23edccecadb0..3461d97ecb30 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -446,7 +446,8 @@ int filemap_write_and_wait(struct address_space *mapping) { int err = 0; - if (mapping->nrpages) { + if ((!dax_mapping(mapping) && mapping->nrpages) || + (dax_mapping(mapping) && mapping->nrexceptional)) { err = filemap_fdatawrite(mapping); /* * Even if the above returned error, the pages may be @@ -482,13 +483,8 @@ int filemap_write_and_wait_range(struct address_space *mapping, { int err = 0; - if (dax_mapping(mapping) && mapping->nrexceptional) { - err = dax_writeback_mapping_range(mapping, lstart, lend); - if (err) - return err; - } - - if (mapping->nrpages) { + if ((!dax_mapping(mapping) && mapping->nrpages) || + (dax_mapping(mapping) && mapping->nrexceptional)) { err = __filemap_fdatawrite_range(mapping, lstart, lend, WB_SYNC_ALL); /* See comment of filemap_write_and_wait() */ -- cgit v1.2.3-58-ga151