diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-15 22:40:03 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-15 22:40:03 -0700 |
commit | 2b2f2aedba985108cbc92a761ac0d9fc4c774616 (patch) | |
tree | 183ebc740d0e3217f4522e9669a2742833b29e7d /fs | |
parent | 72f02ba66bd83b54054da20eae550123de84da6f (diff) | |
parent | f5580d0f8bf60993a5fbc73ee04678070ffbba57 (diff) |
Merge tag 'gfs2-4.19.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2
Pull gfs2 updates from Andreas Gruenbacher:
- iomap support for buffered writes and for direct I/O
- two patches that reduce the size of struct gfs2_inode
- lots of fixes and cleanups
* tag 'gfs2-4.19.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2: (25 commits)
gfs2: eliminate update_rgrp_lvb_unlinked
gfs2: Fix gfs2_testbit to use clone bitmaps
gfs2: Get rid of gfs2_ea_strlen
gfs2: cleanup: call gfs2_rgrp_ondisk2lvb from gfs2_rgrp_out
gfs2: Special-case rindex for gfs2_grow
GFS2: rgrp free blocks used incorrectly
gfs2: remove redundant variable 'moved'
gfs2: use iomap_readpage for blocksize == PAGE_SIZE
gfs2: Use iomap for stuffed direct I/O reads
gfs2: fallocate_chunk: Always initialize struct iomap
GFS2: Fix recovery issues for spectators
fs: gfs2: Adding new return type vm_fault_t
gfs2: using posix_acl_xattr_size instead of posix_acl_to_xattr
gfs2: Don't reject a supposedly full bitmap if we have blocks reserved
gfs2: Eliminate redundant ip->i_rgd
gfs2: Stop messing with ip->i_rgd in the rlist code
gfs2: Remove gfs2_write_{begin,end}
gfs2: iomap direct I/O support
gfs2: gfs2_extent_length cleanup
gfs2: iomap buffered write support
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/gfs2/acl.c | 6 | ||||
-rw-r--r-- | fs/gfs2/aops.c | 337 | ||||
-rw-r--r-- | fs/gfs2/aops.h | 19 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 414 | ||||
-rw-r--r-- | fs/gfs2/dir.c | 4 | ||||
-rw-r--r-- | fs/gfs2/file.c | 161 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 23 | ||||
-rw-r--r-- | fs/gfs2/lock_dlm.c | 20 | ||||
-rw-r--r-- | fs/gfs2/log.c | 28 | ||||
-rw-r--r-- | fs/gfs2/lops.c | 2 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 4 | ||||
-rw-r--r-- | fs/gfs2/recovery.c | 7 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 169 | ||||
-rw-r--r-- | fs/gfs2/super.c | 1 | ||||
-rw-r--r-- | fs/gfs2/sys.c | 11 | ||||
-rw-r--r-- | fs/gfs2/trace_gfs2.h | 3 | ||||
-rw-r--r-- | fs/gfs2/trans.h | 6 | ||||
-rw-r--r-- | fs/gfs2/util.c | 38 | ||||
-rw-r--r-- | fs/gfs2/util.h | 10 | ||||
-rw-r--r-- | fs/gfs2/xattr.c | 59 |
20 files changed, 752 insertions, 570 deletions
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 776717f1eeea..af5f87a493d9 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -82,14 +82,12 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int error; - int len; + size_t len; char *data; const char *name = gfs2_acl_name(type); if (acl) { - len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); - if (len == 0) - return 0; + len = posix_acl_xattr_size(acl->a_count); data = kmalloc(len, GFP_NOFS); if (data == NULL) return -ENOMEM; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 35f5ee23566d..31e8270d0b26 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -22,6 +22,7 @@ #include <linux/backing-dev.h> #include <linux/uio.h> #include <trace/events/writeback.h> +#include <linux/sched/signal.h> #include "gfs2.h" #include "incore.h" @@ -36,10 +37,11 @@ #include "super.h" #include "util.h" #include "glops.h" +#include "aops.h" -static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, - unsigned int from, unsigned int len) +void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, + unsigned int from, unsigned int len) { struct buffer_head *head = page_buffers(page); unsigned int bsize = head->b_size; @@ -82,12 +84,6 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, return 0; } -static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, - struct buffer_head *bh_result, int create) -{ - return gfs2_block_map(inode, lblock, bh_result, 0); -} - /** * gfs2_writepage_common - Common bits of writepage * @page: The page to be written @@ -462,7 +458,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping, * Returns: errno */ -static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) +int stuffed_readpage(struct gfs2_inode *ip, struct page *page) { struct buffer_head *dibh; u64 dsize = i_size_read(&ip->i_inode); @@ -512,9 +508,13 @@ static int __gfs2_readpage(void *file, struct page *page) { struct gfs2_inode *ip = GFS2_I(page->mapping->host); struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); + int error; - if (gfs2_is_stuffed(ip)) { + if (i_blocksize(page->mapping->host) == PAGE_SIZE && + !page_has_buffers(page)) { + error = iomap_readpage(page, &gfs2_iomap_ops); + } else if (gfs2_is_stuffed(ip)) { error = stuffed_readpage(ip, page); unlock_page(page); } else { @@ -644,139 +644,10 @@ out_uninit: } /** - * gfs2_write_begin - Begin to write to a file - * @file: The file to write to - * @mapping: The mapping in which to write - * @pos: The file offset at which to start writing - * @len: Length of the write - * @flags: Various flags - * @pagep: Pointer to return the page - * @fsdata: Pointer to return fs data (unused by GFS2) - * - * Returns: errno - */ - -static int gfs2_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) -{ - struct gfs2_inode *ip = GFS2_I(mapping->host); - struct gfs2_sbd *sdp = GFS2_SB(mapping->host); - struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); - unsigned int data_blocks = 0, ind_blocks = 0, rblocks; - unsigned requested = 0; - int alloc_required; - int error = 0; - pgoff_t index = pos >> PAGE_SHIFT; - unsigned from = pos & (PAGE_SIZE - 1); - struct page *page; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); - error = gfs2_glock_nq(&ip->i_gh); - if (unlikely(error)) - goto out_uninit; - if (&ip->i_inode == sdp->sd_rindex) { - error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, - GL_NOCACHE, &m_ip->i_gh); - if (unlikely(error)) { - gfs2_glock_dq(&ip->i_gh); - goto out_uninit; - } - } - - alloc_required = gfs2_write_alloc_required(ip, pos, len); - - if (alloc_required || gfs2_is_jdata(ip)) - gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); - - if (alloc_required) { - struct gfs2_alloc_parms ap = { .aflags = 0, }; - requested = data_blocks + ind_blocks; - ap.target = requested; - error = gfs2_quota_lock_check(ip, &ap); - if (error) - goto out_unlock; - - error = gfs2_inplace_reserve(ip, &ap); - if (error) - goto out_qunlock; - } - - rblocks = RES_DINODE + ind_blocks; - if (gfs2_is_jdata(ip)) - rblocks += data_blocks ? data_blocks : 1; - if (ind_blocks || data_blocks) - rblocks += RES_STATFS + RES_QUOTA; - if (&ip->i_inode == sdp->sd_rindex) - rblocks += 2 * RES_STATFS; - if (alloc_required) - rblocks += gfs2_rg_blocks(ip, requested); - - error = gfs2_trans_begin(sdp, rblocks, - PAGE_SIZE/sdp->sd_sb.sb_bsize); - if (error) - goto out_trans_fail; - - error = -ENOMEM; - flags |= AOP_FLAG_NOFS; - page = grab_cache_page_write_begin(mapping, index, flags); - *pagep = page; - if (unlikely(!page)) - goto out_endtrans; - - if (gfs2_is_stuffed(ip)) { - error = 0; - if (pos + len > gfs2_max_stuffed_size(ip)) { - error = gfs2_unstuff_dinode(ip, page); - if (error == 0) - goto prepare_write; - } else if (!PageUptodate(page)) { - error = stuffed_readpage(ip, page); - } - goto out; - } - -prepare_write: - error = __block_write_begin(page, from, len, gfs2_block_map); -out: - if (error == 0) - return 0; - - unlock_page(page); - put_page(page); - - gfs2_trans_end(sdp); - if (alloc_required) { - gfs2_inplace_release(ip); - if (pos + len > ip->i_inode.i_size) - gfs2_trim_blocks(&ip->i_inode); - } - goto out_qunlock; - -out_endtrans: - gfs2_trans_end(sdp); -out_trans_fail: - if (alloc_required) - gfs2_inplace_release(ip); -out_qunlock: - if (alloc_required) - gfs2_quota_unlock(ip); -out_unlock: - if (&ip->i_inode == sdp->sd_rindex) { - gfs2_glock_dq(&m_ip->i_gh); - gfs2_holder_uninit(&m_ip->i_gh); - } - gfs2_glock_dq(&ip->i_gh); -out_uninit: - gfs2_holder_uninit(&ip->i_gh); - return error; -} - -/** * adjust_fs_space - Adjusts the free space available due to gfs2_grow * @inode: the rindex inode */ -static void adjust_fs_space(struct inode *inode) +void adjust_fs_space(struct inode *inode) { struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); @@ -822,11 +693,11 @@ out: * This copies the data from the page into the inode block after * the inode data structure itself. * - * Returns: errno + * Returns: copied bytes or errno */ -static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, - loff_t pos, unsigned copied, - struct page *page) +int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, + loff_t pos, unsigned copied, + struct page *page) { struct gfs2_inode *ip = GFS2_I(inode); u64 to = pos + copied; @@ -853,84 +724,6 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, } /** - * gfs2_write_end - * @file: The file to write to - * @mapping: The address space to write to - * @pos: The file position - * @len: The length of the data - * @copied: How much was actually copied by the VFS - * @page: The page that has been written - * @fsdata: The fsdata (unused in GFS2) - * - * The main write_end function for GFS2. We just put our locking around the VFS - * provided functions. - * - * Returns: errno - */ - -static int gfs2_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - struct inode *inode = page->mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); - struct buffer_head *dibh; - int ret; - struct gfs2_trans *tr = current->journal_info; - BUG_ON(!tr); - - BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL); - - ret = gfs2_meta_inode_buffer(ip, &dibh); - if (unlikely(ret)) - goto out; - - if (gfs2_is_stuffed(ip)) { - ret = gfs2_stuffed_write_end(inode, dibh, pos, copied, page); - page = NULL; - goto out2; - } - - if (gfs2_is_jdata(ip)) - gfs2_page_add_databufs(ip, page, pos & ~PAGE_MASK, len); - else - gfs2_ordered_add_inode(ip); - - ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - page = NULL; - if (tr->tr_num_buf_new) - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - else - gfs2_trans_add_meta(ip->i_gl, dibh); - -out2: - if (inode == sdp->sd_rindex) { - adjust_fs_space(inode); - sdp->sd_rindex_uptodate = 0; - } - - brelse(dibh); -out: - if (page) { - unlock_page(page); - put_page(page); - } - gfs2_trans_end(sdp); - gfs2_inplace_release(ip); - if (ip->i_qadata && ip->i_qadata->qa_qd_num) - gfs2_quota_unlock(ip); - if (inode == sdp->sd_rindex) { - gfs2_glock_dq(&m_ip->i_gh); - gfs2_holder_uninit(&m_ip->i_gh); - } - gfs2_glock_dq(&ip->i_gh); - gfs2_holder_uninit(&ip->i_gh); - return ret; -} - -/** * jdata_set_page_dirty - Page dirtying function * @page: The page to dirty * @@ -1023,96 +816,6 @@ out: } /** - * gfs2_ok_for_dio - check that dio is valid on this file - * @ip: The inode - * @offset: The offset at which we are reading or writing - * - * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) - * 1 (to accept the i/o request) - */ -static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset) -{ - /* - * Should we return an error here? I can't see that O_DIRECT for - * a stuffed file makes any sense. For now we'll silently fall - * back to buffered I/O - */ - if (gfs2_is_stuffed(ip)) - return 0; - - if (offset >= i_size_read(&ip->i_inode)) - return 0; - return 1; -} - - - -static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct address_space *mapping = inode->i_mapping; - struct gfs2_inode *ip = GFS2_I(inode); - loff_t offset = iocb->ki_pos; - struct gfs2_holder gh; - int rv; - - /* - * Deferred lock, even if its a write, since we do no allocation - * on this path. All we need change is atime, and this lock mode - * ensures that other nodes have flushed their buffered read caches - * (i.e. their page cache entries for this inode). We do not, - * unfortunately have the option of only flushing a range like - * the VFS does. - */ - gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); - rv = gfs2_glock_nq(&gh); - if (rv) - goto out_uninit; - rv = gfs2_ok_for_dio(ip, offset); - if (rv != 1) - goto out; /* dio not valid, fall back to buffered i/o */ - - /* - * Now since we are holding a deferred (CW) lock at this point, you - * might be wondering why this is ever needed. There is a case however - * where we've granted a deferred local lock against a cached exclusive - * glock. That is ok provided all granted local locks are deferred, but - * it also means that it is possible to encounter pages which are - * cached and possibly also mapped. So here we check for that and sort - * them out ahead of the dio. The glock state machine will take care of - * everything else. - * - * If in fact the cached glock state (gl->gl_state) is deferred (CW) in - * the first place, mapping->nr_pages will always be zero. - */ - if (mapping->nrpages) { - loff_t lstart = offset & ~(PAGE_SIZE - 1); - loff_t len = iov_iter_count(iter); - loff_t end = PAGE_ALIGN(offset + len) - 1; - - rv = 0; - if (len == 0) - goto out; - if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) - unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); - rv = filemap_write_and_wait_range(mapping, lstart, end); - if (rv) - goto out; - if (iov_iter_rw(iter) == WRITE) - truncate_inode_pages_range(mapping, lstart, end); - } - - rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, - gfs2_get_block_direct, NULL, NULL, 0); -out: - gfs2_glock_dq(&gh); -out_uninit: - gfs2_holder_uninit(&gh); - return rv; -} - -/** * gfs2_releasepage - free the metadata associated with a page * @page: the page that's being released * @gfp_mask: passed from Linux VFS, ignored by us @@ -1187,12 +890,10 @@ static const struct address_space_operations gfs2_writeback_aops = { .writepages = gfs2_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, - .write_begin = gfs2_write_begin, - .write_end = gfs2_write_end, .bmap = gfs2_bmap, .invalidatepage = gfs2_invalidatepage, .releasepage = gfs2_releasepage, - .direct_IO = gfs2_direct_IO, + .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, @@ -1203,13 +904,11 @@ static const struct address_space_operations gfs2_ordered_aops = { .writepages = gfs2_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, - .write_begin = gfs2_write_begin, - .write_end = gfs2_write_end, .set_page_dirty = __set_page_dirty_buffers, .bmap = gfs2_bmap, .invalidatepage = gfs2_invalidatepage, .releasepage = gfs2_releasepage, - .direct_IO = gfs2_direct_IO, + .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, @@ -1220,8 +919,6 @@ static const struct address_space_operations gfs2_jdata_aops = { .writepages = gfs2_jdata_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, - .write_begin = gfs2_write_begin, - .write_end = gfs2_write_end, .set_page_dirty = jdata_set_page_dirty, .bmap = gfs2_bmap, .invalidatepage = gfs2_invalidatepage, diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h new file mode 100644 index 000000000000..fa8e5d0144dd --- /dev/null +++ b/fs/gfs2/aops.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Red Hat, Inc. All rights reserved. + */ + +#ifndef __AOPS_DOT_H__ +#define __AOPS_DOT_H__ + +#include "incore.h" + +extern int stuffed_readpage(struct gfs2_inode *ip, struct page *page); +extern int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, + loff_t pos, unsigned copied, + struct page *page); +extern void adjust_fs_space(struct inode *inode); +extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, + unsigned int from, unsigned int len); + +#endif /* __AOPS_DOT_H__ */ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index ed6699705c13..03128ed1f34e 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -28,6 +28,7 @@ #include "trans.h" #include "dir.h" #include "util.h" +#include "aops.h" #include "trace_gfs2.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k @@ -41,6 +42,8 @@ struct metapath { int mp_aheight; /* actual height (lookup height) */ }; +static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length); + /** * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page * @ip: the inode @@ -389,7 +392,7 @@ static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h) return mp->mp_aheight - x - 1; } -static inline void release_metapath(struct metapath *mp) +static void release_metapath(struct metapath *mp) { int i; @@ -397,27 +400,23 @@ static inline void release_metapath(struct metapath *mp) if (mp->mp_bh[i] == NULL) break; brelse(mp->mp_bh[i]); + mp->mp_bh[i] = NULL; } } /** * gfs2_extent_length - Returns length of an extent of blocks - * @start: Start of the buffer - * @len: Length of the buffer in bytes - * @ptr: Current position in the buffer - * @limit: Max extent length to return (0 = unlimited) + * @bh: The metadata block + * @ptr: Current position in @bh + * @limit: Max extent length to return * @eob: Set to 1 if we hit "end of block" * - * If the first block is zero (unallocated) it will return the number of - * unallocated blocks in the extent, otherwise it will return the number - * of contiguous blocks in the extent. - * * Returns: The length of the extent (minimum of one block) */ -static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob) +static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob) { - const __be64 *end = (start + len); + const __be64 *end = (__be64 *)(bh->b_data + bh->b_size); const __be64 *first = ptr; u64 d = be64_to_cpu(*ptr); @@ -426,14 +425,11 @@ static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __b ptr++; if (ptr >= end) break; - if (limit && --limit == 0) - break; - if (d) - d++; + d++; } while(be64_to_cpu(*ptr) == d); if (ptr >= end) *eob = 1; - return (ptr - first); + return ptr - first; } typedef const __be64 *(*gfs2_metadata_walker)( @@ -609,11 +605,13 @@ enum alloc_state { * ii) Indirect blocks to fill in lower part of the metadata tree * iii) Data blocks * - * The function is in two parts. The first part works out the total - * number of blocks which we need. The second part does the actual - * allocation asking for an extent at a time (if enough contiguous free - * blocks are available, there will only be one request per bmap call) - * and uses the state machine to initialise the blocks in order. + * This function is called after gfs2_iomap_get, which works out the + * total number of blocks which we need via gfs2_alloc_size. + * + * We then do the actual allocation asking for an extent at a time (if + * enough contiguous free blocks are available, there will only be one + * allocation request per call) and uses the state machine to initialise + * the blocks in order. * * Right now, this function will allocate at most one indirect block * worth of data -- with a default block size of 4K, that's slightly @@ -633,39 +631,26 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, struct buffer_head *dibh = mp->mp_bh[0]; u64 bn; unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; - unsigned dblks = 0; - unsigned ptrs_per_blk; + size_t dblks = iomap->length >> inode->i_blkbits; const unsigned end_of_metadata = mp->mp_fheight - 1; int ret; enum alloc_state state; __be64 *ptr; __be64 zero_bn = 0; - size_t maxlen = iomap->length >> inode->i_blkbits; BUG_ON(mp->mp_aheight < 1); BUG_ON(dibh == NULL); + BUG_ON(dblks < 1); gfs2_trans_add_meta(ip->i_gl, dibh); down_write(&ip->i_rw_mutex); if (mp->mp_fheight == mp->mp_aheight) { - struct buffer_head *bh; - int eob; - - /* Bottom indirect block exists, find unalloced extent size */ - ptr = metapointer(end_of_metadata, mp); - bh = mp->mp_bh[end_of_metadata]; - dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, - maxlen, &eob); - BUG_ON(dblks < 1); + /* Bottom indirect block exists */ state = ALLOC_DATA; } else { /* Need to allocate indirect blocks */ - ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs : - sdp->sd_diptrs; - dblks = min(maxlen, (size_t)(ptrs_per_blk - - mp->mp_list[end_of_metadata])); if (mp->mp_fheight == ip->i_height) { /* Writing into existing tree, extend tree down */ iblks = mp->mp_fheight - mp->mp_aheight; @@ -750,6 +735,7 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, } } while (iomap->addr == IOMAP_NULL_ADDR); + iomap->type = IOMAP_MAPPED; iomap->length = (u64)dblks << inode->i_blkbits; ip->i_height = mp->mp_fheight; gfs2_add_inode_blocks(&ip->i_inode, alloced); @@ -759,18 +745,51 @@ out: return ret; } -static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap) +#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE + +/** + * gfs2_alloc_size - Compute the maximum allocation size + * @inode: The inode + * @mp: The metapath + * @size: Requested size in blocks + * + * Compute the maximum size of the next allocation at @mp. + * + * Returns: size in blocks + */ +static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size) { struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + const __be64 *first, *ptr, *end; - iomap->addr = (ip->i_no_addr << inode->i_blkbits) + - sizeof(struct gfs2_dinode); - iomap->offset = 0; - iomap->length = i_size_read(inode); - iomap->type = IOMAP_INLINE; -} + /* + * For writes to stuffed files, this function is called twice via + * gfs2_iomap_get, before and after unstuffing. The size we return the + * first time needs to be large enough to get the reservation and + * allocation sizes right. The size we return the second time must + * be exact or else gfs2_iomap_alloc won't do the right thing. + */ -#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE + if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) { + unsigned int maxsize = mp->mp_fheight > 1 ? + sdp->sd_inptrs : sdp->sd_diptrs; + maxsize -= mp->mp_list[mp->mp_fheight - 1]; + if (size > maxsize) + size = maxsize; + return size; + } + + first = metapointer(ip->i_height - 1, mp); + end = metaend(ip->i_height - 1, mp); + if (end - first > size) + end = first + size; + for (ptr = first; ptr < end; ptr++) { + if (*ptr) + break; + } + return ptr - first; +} /** * gfs2_iomap_get - Map blocks from an inode to disk blocks @@ -789,37 +808,63 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + loff_t size = i_size_read(inode); __be64 *ptr; sector_t lblock; sector_t lblock_stop; int ret; int eob; u64 len; - struct buffer_head *bh; + struct buffer_head *dibh = NULL, *bh; u8 height; if (!length) return -EINVAL; + down_read(&ip->i_rw_mutex); + + ret = gfs2_meta_inode_buffer(ip, &dibh); + if (ret) + goto unlock; + iomap->private = dibh; + if (gfs2_is_stuffed(ip)) { - if (flags & IOMAP_REPORT) { - if (pos >= i_size_read(inode)) - return -ENOENT; - gfs2_stuffed_iomap(inode, iomap); - return 0; + if (flags & IOMAP_WRITE) { + loff_t max_size = gfs2_max_stuffed_size(ip); + + if (pos + length > max_size) + goto unstuff; + iomap->length = max_size; + } else { + if (pos >= size) { + if (flags & IOMAP_REPORT) { + ret = -ENOENT; + goto unlock; + } else { + /* report a hole */ + iomap->offset = pos; + iomap->length = length; + goto do_alloc; + } + } + iomap->length = size; } - BUG_ON(!(flags & IOMAP_WRITE)); + iomap->addr = (ip->i_no_addr << inode->i_blkbits) + + sizeof(struct gfs2_dinode); + iomap->type = IOMAP_INLINE; + iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode); + goto out; } + +unstuff: lblock = pos >> inode->i_blkbits; iomap->offset = lblock << inode->i_blkbits; lblock_stop = (pos + length - 1) >> inode->i_blkbits; len = lblock_stop - lblock + 1; + iomap->length = len << inode->i_blkbits; - down_read(&ip->i_rw_mutex); - - ret = gfs2_meta_inode_buffer(ip, &mp->mp_bh[0]); - if (ret) - goto unlock; + get_bh(dibh); + mp->mp_bh[0] = dibh; height = ip->i_height; while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height]) @@ -840,12 +885,12 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, goto do_alloc; bh = mp->mp_bh[ip->i_height - 1]; - len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, len, &eob); + len = gfs2_extent_length(bh, ptr, len, &eob); iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits; iomap->length = len << inode->i_blkbits; iomap->type = IOMAP_MAPPED; - iomap->flags = IOMAP_F_MERGED; + iomap->flags |= IOMAP_F_MERGED; if (eob) iomap->flags |= IOMAP_F_GFS2_BOUNDARY; @@ -853,25 +898,185 @@ out: iomap->bdev = inode->i_sb->s_bdev; unlock: up_read(&ip->i_rw_mutex); + if (ret && dibh) + brelse(dibh); return ret; do_alloc: iomap->addr = IOMAP_NULL_ADDR; - iomap->length = len << inode->i_blkbits; iomap->type = IOMAP_HOLE; - iomap->flags = 0; if (flags & IOMAP_REPORT) { - loff_t size = i_size_read(inode); if (pos >= size) ret = -ENOENT; else if (height == ip->i_height) ret = gfs2_hole_size(inode, lblock, len, mp, iomap); else iomap->length = size - pos; + } else if (flags & IOMAP_WRITE) { + u64 alloc_size; + + if (flags & IOMAP_DIRECT) + goto out; /* (see gfs2_file_direct_write) */ + + len = gfs2_alloc_size(inode, mp, len); + alloc_size = len << inode->i_blkbits; + if (alloc_size < iomap->length) + iomap->length = alloc_size; + } else { + if (pos < size && height == ip->i_height) + ret = gfs2_hole_size(inode, lblock, len, mp, iomap); } goto out; } +static int gfs2_write_lock(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); + error = gfs2_glock_nq(&ip->i_gh); + if (error) + goto out_uninit; + if (&ip->i_inode == sdp->sd_rindex) { + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); + + error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, + GL_NOCACHE, &m_ip->i_gh); + if (error) + goto out_unlock; + } + return 0; + +out_unlock: + gfs2_glock_dq(&ip->i_gh); +out_uninit: + gfs2_holder_uninit(&ip->i_gh); + return error; +} + +static void gfs2_write_unlock(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + + if (&ip->i_inode == sdp->sd_rindex) { + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); + + gfs2_glock_dq_uninit(&m_ip->i_gh); + } + gfs2_glock_dq_uninit(&ip->i_gh); +} + +static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos, + unsigned copied, struct page *page, + struct iomap *iomap) +{ + struct gfs2_inode *ip = GFS2_I(inode); + + gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied); +} + +static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, + loff_t length, unsigned flags, + struct iomap *iomap) +{ + struct metapath mp = { .mp_aheight = 1, }; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + unsigned int data_blocks = 0, ind_blocks = 0, rblocks; + bool unstuff, alloc_required; + int ret; + + ret = gfs2_write_lock(inode); + if (ret) + return ret; + + unstuff = gfs2_is_stuffed(ip) && + pos + length > gfs2_max_stuffed_size(ip); + + ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); + if (ret) + goto out_release; + + alloc_required = unstuff || iomap->type == IOMAP_HOLE; + + if (alloc_required || gfs2_is_jdata(ip)) + gfs2_write_calc_reserv(ip, iomap->length, &data_blocks, + &ind_blocks); + + if (alloc_required) { + struct gfs2_alloc_parms ap = { + .target = data_blocks + ind_blocks + }; + + ret = gfs2_quota_lock_check(ip, &ap); + if (ret) + goto out_release; + + ret = gfs2_inplace_reserve(ip, &ap); + if (ret) + goto out_qunlock; + } + + rblocks = RES_DINODE + ind_blocks; + if (gfs2_is_jdata(ip)) + rblocks += data_blocks; + if (ind_blocks || data_blocks) + rblocks += RES_STATFS + RES_QUOTA; + if (inode == sdp->sd_rindex) + rblocks += 2 * RES_STATFS; + if (alloc_required) + rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); + + ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits); + if (ret) + goto out_trans_fail; + + if (unstuff) { + ret = gfs2_unstuff_dinode(ip, NULL); + if (ret) + goto out_trans_end; + release_metapath(&mp); + brelse(iomap->private); + iomap->private = NULL; + ret = gfs2_iomap_get(inode, iomap->offset, iomap->length, + flags, iomap, &mp); + if (ret) + goto out_trans_end; + } + + if (iomap->type == IOMAP_HOLE) { + ret = gfs2_iomap_alloc(inode, iomap, flags, &mp); + if (ret) { + gfs2_trans_end(sdp); + gfs2_inplace_release(ip); + punch_hole(ip, iomap->offset, iomap->length); + goto out_qunlock; + } + } + release_metapath(&mp); + if (gfs2_is_jdata(ip)) + iomap->page_done = gfs2_iomap_journaled_page_done; + return 0; + +out_trans_end: + gfs2_trans_end(sdp); +out_trans_fail: + if (alloc_required) + gfs2_inplace_release(ip); +out_qunlock: + if (alloc_required) + gfs2_quota_unlock(ip); +out_release: + if (iomap->private) + brelse(iomap->private); + release_metapath(&mp); + gfs2_write_unlock(inode); + return ret; +} + static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap) { @@ -879,22 +1084,79 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, struct metapath mp = { .mp_aheight = 1, }; int ret; + iomap->flags |= IOMAP_F_BUFFER_HEAD; + trace_gfs2_iomap_start(ip, pos, length, flags); - if (flags & IOMAP_WRITE) { - ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); - if (!ret && iomap->type == IOMAP_HOLE) - ret = gfs2_iomap_alloc(inode, iomap, flags, &mp); - release_metapath(&mp); + if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) { + ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap); } else { ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); release_metapath(&mp); + /* + * Silently fall back to buffered I/O for stuffed files or if + * we've hot a hole (see gfs2_file_direct_write). + */ + if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) && + iomap->type != IOMAP_MAPPED) + ret = -ENOTBLK; } trace_gfs2_iomap_end(ip, iomap, ret); return ret; } +static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, + ssize_t written, unsigned flags, struct iomap *iomap) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_trans *tr = current->journal_info; + struct buffer_head *dibh = iomap->private; + + if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE) + goto out; + + if (iomap->type != IOMAP_INLINE) { + gfs2_ordered_add_inode(ip); + + if (tr->tr_num_buf_new) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + else + gfs2_trans_add_meta(ip->i_gl, dibh); + } + + if (inode == sdp->sd_rindex) { + adjust_fs_space(inode); + sdp->sd_rindex_uptodate = 0; + } + + gfs2_trans_end(sdp); + gfs2_inplace_release(ip); + + if (length != written && (iomap->flags & IOMAP_F_NEW)) { + /* Deallocate blocks that were just allocated. */ + loff_t blockmask = i_blocksize(inode) - 1; + loff_t end = (pos + length) & ~blockmask; + + pos = (pos + written + blockmask) & ~blockmask; + if (pos < end) { + truncate_pagecache_range(inode, pos, end - 1); + punch_hole(ip, pos, end - pos); + } + } + + if (ip->i_qadata && ip->i_qadata->qa_qd_num) + gfs2_quota_unlock(ip); + gfs2_write_unlock(inode); + +out: + if (dibh) + brelse(dibh); + return 0; +} + const struct iomap_ops gfs2_iomap_ops = { .iomap_begin = gfs2_iomap_begin, + .iomap_end = gfs2_iomap_end, }; /** @@ -941,12 +1203,6 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, } else { ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp); release_metapath(&mp); - - /* Return unmapped buffer beyond the end of file. */ - if (ret == -ENOENT) { - ret = 0; - goto out; - } } if (ret) goto out; @@ -2060,7 +2316,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; lblock = offset >> shift; lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; - if (lblock_stop > end_of_file) + if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex)) return 1; size = (lblock_stop - lblock) << shift; @@ -2154,11 +2410,11 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length) if (error) goto out; } else { - unsigned int start_off, end_off, blocksize; + unsigned int start_off, end_len, blocksize; blocksize = i_blocksize(inode); start_off = offset & (blocksize - 1); - end_off = (offset + length) & (blocksize - 1); + end_len = (offset + length) & (blocksize - 1); if (start_off) { unsigned int len = length; if (length > blocksize - start_off) @@ -2167,11 +2423,11 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length) if (error) goto out; if (start_off + length < blocksize) - end_off = 0; + end_len = 0; } - if (end_off) { + if (end_len) { error = gfs2_block_zero_range(inode, - offset + length - end_off, end_off); + offset + length - end_len, end_len); if (error) goto out; } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index d97ad89955d1..e37002560c11 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1011,7 +1011,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) u64 bn, leaf_no; __be64 *lp; u32 index; - int x, moved = 0; + int x; int error; index = name->hash >> (32 - dip->i_depth); @@ -1113,8 +1113,6 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) if (!prev) prev = dent; - - moved = 1; } else { prev = dent; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 7137db7b0119..08369c6cd127 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -26,10 +26,12 @@ #include <linux/dlm.h> #include <linux/dlm_plock.h> #include <linux/delay.h> +#include <linux/backing-dev.h> #include "gfs2.h" #include "incore.h" #include "bmap.h" +#include "aops.h" #include "dir.h" #include "glock.h" #include "glops.h" @@ -387,7 +389,7 @@ static int gfs2_allocate_page_backing(struct page *page) * blocks allocated on disk to back that page. */ -static int gfs2_page_mkwrite(struct vm_fault *vmf) +static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); @@ -688,12 +690,83 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, return ret ? ret : ret1; } +static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); + size_t count = iov_iter_count(to); + struct gfs2_holder gh; + ssize_t ret; + + if (!count) + return 0; /* skip atime */ + + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (ret) + goto out_uninit; + + ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL); + + gfs2_glock_dq(&gh); +out_uninit: + gfs2_holder_uninit(&gh); + return ret; +} + +static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + size_t len = iov_iter_count(from); + loff_t offset = iocb->ki_pos; + struct gfs2_holder gh; + ssize_t ret; + + /* + * Deferred lock, even if its a write, since we do no allocation on + * this path. All we need to change is the atime, and this lock mode + * ensures that other nodes have flushed their buffered read caches + * (i.e. their page cache entries for this inode). We do not, + * unfortunately, have the option of only flushing a range like the + * VFS does. + */ + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (ret) + goto out_uninit; + + /* Silently fall back to buffered I/O when writing beyond EOF */ + if (offset + len > i_size_read(&ip->i_inode)) + goto out; + + ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL); + +out: + gfs2_glock_dq(&gh); +out_uninit: + gfs2_holder_uninit(&gh); + return ret; +} + +static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + ssize_t ret; + + if (iocb->ki_flags & IOCB_DIRECT) { + ret = gfs2_file_direct_read(iocb, to); + if (likely(ret != -ENOTBLK)) + return ret; + iocb->ki_flags &= ~IOCB_DIRECT; + } + return generic_file_read_iter(iocb, to); +} + /** * gfs2_file_write_iter - Perform a write to a file * @iocb: The io context - * @iov: The data to write - * @nr_segs: Number of @iov segments - * @pos: The file position + * @from: The data to write * * We have to do a lock/unlock here to refresh the inode size for * O_APPEND writes, otherwise we can land up writing at the wrong @@ -705,8 +778,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct gfs2_inode *ip = GFS2_I(file_inode(file)); - int ret; + struct inode *inode = file_inode(file); + struct gfs2_inode *ip = GFS2_I(inode); + ssize_t written = 0, ret; ret = gfs2_rsqa_alloc(ip); if (ret) @@ -723,7 +797,71 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) gfs2_glock_dq_uninit(&gh); } - return generic_file_write_iter(iocb, from); + inode_lock(inode); + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto out; + + /* We can write back this queue in page reclaim */ + current->backing_dev_info = inode_to_bdi(inode); + + ret = file_remove_privs(file); + if (ret) + goto out2; + + ret = file_update_time(file); + if (ret) + goto out2; + + if (iocb->ki_flags & IOCB_DIRECT) { + struct address_space *mapping = file->f_mapping; + loff_t pos, endbyte; + ssize_t buffered; + + written = gfs2_file_direct_write(iocb, from); + if (written < 0 || !iov_iter_count(from)) + goto out2; + + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + if (unlikely(ret < 0)) + goto out2; + buffered = ret; + + /* + * We need to ensure that the page cache pages are written to + * disk and invalidated to preserve the expected O_DIRECT + * semantics. + */ + pos = iocb->ki_pos; + endbyte = pos + buffered - 1; + ret = filemap_write_and_wait_range(mapping, pos, endbyte); + if (!ret) { + iocb->ki_pos += buffered; + written += buffered; + invalidate_mapping_pages(mapping, + pos >> PAGE_SHIFT, + endbyte >> PAGE_SHIFT); + } else { + /* + * We don't know how much we wrote, so just return + * the number of bytes which were direct-written + */ + } + } else { + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + if (likely(ret > 0)) + iocb->ki_pos += ret; + } + +out2: + current->backing_dev_info = NULL; +out: + inode_unlock(inode); + if (likely(ret > 0)) { + /* Handle various SYNC-type writes */ + ret = generic_write_sync(iocb, ret); + } + return written ? written : ret; } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, @@ -733,7 +871,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, struct gfs2_inode *ip = GFS2_I(inode); loff_t end = offset + len; struct buffer_head *dibh; - struct iomap iomap = { }; int error; error = gfs2_meta_inode_buffer(ip, &dibh); @@ -749,12 +886,14 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, } while (offset < end) { + struct iomap iomap = { }; + error = gfs2_iomap_get_alloc(inode, offset, end - offset, &iomap); if (error) goto out; offset = iomap.offset + iomap.length; - if (iomap.type != IOMAP_HOLE) + if (!(iomap.flags & IOMAP_F_NEW)) continue; error = sb_issue_zeroout(sb, iomap.addr >> inode->i_blkbits, iomap.length >> inode->i_blkbits, @@ -1125,7 +1264,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) const struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, - .read_iter = generic_file_read_iter, + .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, @@ -1155,7 +1294,7 @@ const struct file_operations gfs2_dir_fops = { const struct file_operations gfs2_file_fops_nolock = { .llseek = gfs2_llseek, - .read_iter = generic_file_read_iter, + .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index d2ad817e089f..b96d39c28e17 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -65,6 +65,27 @@ struct gfs2_log_operations { #define GBF_FULL 1 +/** + * Clone bitmaps (bi_clone): + * + * - When a block is freed, we remember the previous state of the block in the + * clone bitmap, and only mark the block as free in the real bitmap. + * + * - When looking for a block to allocate, we check for a free block in the + * clone bitmap, and if no clone bitmap exists, in the real bitmap. + * + * - For allocating a block, we mark it as allocated in the real bitmap, and if + * a clone bitmap exists, also in the clone bitmap. + * + * - At the end of a log_flush, we copy the real bitmap into the clone bitmap + * to make the clone bitmap reflect the current allocation state. + * (Alternatively, we could remove the clone bitmap.) + * + * The clone bitmaps are in-core only, and is never written to disk. + * + * These steps ensure that blocks which have been freed in a transaction cannot + * be reallocated in that same transaction. + */ struct gfs2_bitmap { struct buffer_head *bi_bh; char *bi_clone; @@ -295,7 +316,6 @@ struct gfs2_blkreserv { struct rb_node rs_node; /* link to other block reservations */ struct gfs2_rbm rs_rbm; /* Start of reservation */ u32 rs_free; /* how many blocks are still free */ - u64 rs_inum; /* Inode number for reservation */ }; /* @@ -398,7 +418,6 @@ struct gfs2_inode { struct gfs2_holder i_gh; /* for prepare/commit_write only */ struct gfs2_qadata *i_qadata; /* quota allocation data */ struct gfs2_blkreserv i_res; /* rgrp multi-block reservation */ - struct gfs2_rgrpd *i_rgd; u64 i_goal; /* goal block for allocations */ struct rw_semaphore i_rw_mutex; struct list_head i_ordered; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 006c6164f759..ac7caa267ed6 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -821,6 +821,13 @@ restart: goto fail; } + /** + * If we're a spectator, we don't want to take the lock in EX because + * we cannot do the first-mount responsibility it implies: recovery. + */ + if (sdp->sd_args.ar_spectator) + goto locks_done; + error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); if (!error) { mounted_mode = DLM_LOCK_EX; @@ -896,9 +903,16 @@ locks_done: if (lvb_gen < mount_gen) { /* wait for mounted nodes to update control_lock lvb to our generation, which might include new recovery bits set */ - fs_info(sdp, "control_mount wait1 block %u start %u mount %u " - "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, - lvb_gen, ls->ls_recover_flags); + if (sdp->sd_args.ar_spectator) { + fs_info(sdp, "Recovery is required. Waiting for a " + "non-spectator to mount.\n"); + msleep_interruptible(1000); + } else { + fs_info(sdp, "control_mount wait1 block %u start %u " + "mount %u lvb %u flags %lx\n", block_gen, + start_gen, mount_gen, lvb_gen, + ls->ls_recover_flags); + } spin_unlock(&ls->ls_recover_spin); goto restart; } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 0248835625f1..ee20ea42e7b5 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -92,7 +92,8 @@ static void gfs2_remove_from_ail(struct gfs2_bufdata *bd) static int gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct writeback_control *wbc, - struct gfs2_trans *tr) + struct gfs2_trans *tr, + bool *withdraw) __releases(&sdp->sd_ail_lock) __acquires(&sdp->sd_ail_lock) { @@ -107,8 +108,10 @@ __acquires(&sdp->sd_ail_lock) gfs2_assert(sdp, bd->bd_tr == tr); if (!buffer_busy(bh)) { - if (!buffer_uptodate(bh)) + if (!buffer_uptodate(bh)) { gfs2_io_error_bh(sdp, bh); + *withdraw = true; + } list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); continue; } @@ -148,6 +151,7 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) struct list_head *head = &sdp->sd_ail1_list; struct gfs2_trans *tr; struct blk_plug plug; + bool withdraw = false; trace_gfs2_ail_flush(sdp, wbc, 1); blk_start_plug(&plug); @@ -156,11 +160,13 @@ restart: list_for_each_entry_reverse(tr, head, tr_list) { if (wbc->nr_to_write <= 0) break; - if (gfs2_ail1_start_one(sdp, wbc, tr)) + if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw)) goto restart; } spin_unlock(&sdp->sd_ail_lock); blk_finish_plug(&plug); + if (withdraw) + gfs2_lm_withdraw(sdp, NULL); trace_gfs2_ail_flush(sdp, wbc, 0); } @@ -188,7 +194,8 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp) * */ -static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) +static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + bool *withdraw) { struct gfs2_bufdata *bd, *s; struct buffer_head *bh; @@ -199,11 +206,12 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) gfs2_assert(sdp, bd->bd_tr == tr); if (buffer_busy(bh)) continue; - if (!buffer_uptodate(bh)) + if (!buffer_uptodate(bh)) { gfs2_io_error_bh(sdp, bh); + *withdraw = true; + } list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); } - } /** @@ -218,10 +226,11 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) struct gfs2_trans *tr, *s; int oldest_tr = 1; int ret; + bool withdraw = false; spin_lock(&sdp->sd_ail_lock); list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { - gfs2_ail1_empty_one(sdp, tr); + gfs2_ail1_empty_one(sdp, tr, &withdraw); if (list_empty(&tr->tr_ail1_list) && oldest_tr) list_move(&tr->tr_list, &sdp->sd_ail2_list); else @@ -230,6 +239,9 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) ret = list_empty(&sdp->sd_ail1_list); spin_unlock(&sdp->sd_ail_lock); + if (withdraw) + gfs2_lm_withdraw(sdp, "fatal: I/O error(s)\n"); + return ret; } @@ -689,7 +701,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, hash = ~crc32(~0, lh, LH_V1_SIZE); lh->lh_hash = cpu_to_be32(hash); - tv = current_kernel_time64(); + ktime_get_coarse_real_ts64(&tv); lh->lh_nsec = cpu_to_be32(tv.tv_nsec); lh->lh_sec = cpu_to_be64(tv.tv_sec); addr = gfs2_log_bmap(sdp); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 4d6567990baf..f2567f958d00 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -49,7 +49,7 @@ void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) if (test_set_buffer_pinned(bh)) gfs2_assert_withdraw(sdp, 0); if (!buffer_uptodate(bh)) - gfs2_io_error_bh(sdp, bh); + gfs2_io_error_bh_wd(sdp, bh); bd = bh->b_private; /* If this buffer is in the AIL and it has already been written * to in-place disk block, remove it from the AIL. diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 52de1036d9f9..be9c0bf697fe 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -293,7 +293,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, if (unlikely(!buffer_uptodate(bh))) { struct gfs2_trans *tr = current->journal_info; if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) - gfs2_io_error_bh(sdp, bh); + gfs2_io_error_bh_wd(sdp, bh); brelse(bh); *bhp = NULL; return -EIO; @@ -320,7 +320,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) if (!buffer_uptodate(bh)) { struct gfs2_trans *tr = current->journal_info; if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) - gfs2_io_error_bh(sdp, bh); + gfs2_io_error_bh_wd(sdp, bh); return -EIO; } if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index d8b622c375ab..0f501f938d1c 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -413,12 +413,13 @@ void gfs2_recover_func(struct work_struct *work) ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep; int ro = 0; unsigned int pass; - int error; + int error = 0; int jlocked = 0; t_start = ktime_get(); - if (sdp->sd_args.ar_spectator || - (jd->jd_jid != sdp->sd_lockstruct.ls_jid)) { + if (sdp->sd_args.ar_spectator) + goto fail; + if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", jd->jd_jid); jlocked = 1; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 33abcf29bc05..1ad3256b9cbc 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -123,17 +123,26 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, /** * gfs2_testbit - test a bit in the bitmaps * @rbm: The bit to test + * @use_clone: If true, test the clone bitmap, not the official bitmap. + * + * Some callers like gfs2_unaligned_extlen need to test the clone bitmaps, + * not the "real" bitmaps, to avoid allocating recently freed blocks. * * Returns: The two bit block state of the requested bit */ -static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) +static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone) { struct gfs2_bitmap *bi = rbm_bi(rbm); - const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset; + const u8 *buffer; const u8 *byte; unsigned int bit; + if (use_clone && bi->bi_clone) + buffer = bi->bi_clone; + else + buffer = bi->bi_bh->b_data; + buffer += bi->bi_offset; byte = buffer + (rbm->offset / GFS2_NBBY); bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; @@ -322,7 +331,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le u8 res; for (n = 0; n < n_unaligned; n++) { - res = gfs2_testbit(rbm); + res = gfs2_testbit(rbm, true); if (res != GFS2_BLKST_FREE) return true; (*len)--; @@ -607,8 +616,10 @@ int gfs2_rsqa_alloc(struct gfs2_inode *ip) static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) { + struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res); + gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", - (unsigned long long)rs->rs_inum, + (unsigned long long)ip->i_no_addr, (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), rs->rs_rbm.offset, rs->rs_free); } @@ -1051,6 +1062,18 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) /* rd_data0, rd_data and rd_bitbytes already set from rindex */ } +static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) +{ + const struct gfs2_rgrp *str = buf; + + rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); + rgl->rl_flags = str->rg_flags; + rgl->rl_free = str->rg_free; + rgl->rl_dinodes = str->rg_dinodes; + rgl->rl_igeneration = str->rg_igeneration; + rgl->__pad = 0UL; +} + static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) { struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd); @@ -1073,6 +1096,7 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) str->rg_crc = cpu_to_be32(crc); memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, buf); } static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) @@ -1087,25 +1111,6 @@ static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) return 1; } -static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) -{ - const struct gfs2_rgrp *str = buf; - - rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); - rgl->rl_flags = str->rg_flags; - rgl->rl_free = str->rg_free; - rgl->rl_dinodes = str->rg_dinodes; - rgl->rl_igeneration = str->rg_igeneration; - rgl->__pad = 0UL; -} - -static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change) -{ - struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; - u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change; - rgl->rl_unlinked = cpu_to_be32(unlinked); -} - static u32 count_unlinked(struct gfs2_rgrpd *rgd) { struct gfs2_bitmap *bi; @@ -1424,7 +1429,6 @@ int gfs2_fitrim(struct file *filp, void __user *argp) rgd->rd_flags |= GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, bh); gfs2_rgrp_out(rgd, bh->b_data); - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); gfs2_trans_end(sdp); } } @@ -1488,6 +1492,34 @@ static void rs_insert(struct gfs2_inode *ip) } /** + * rgd_free - return the number of free blocks we can allocate. + * @rgd: the resource group + * + * This function returns the number of free blocks for an rgrp. + * That's the clone-free blocks (blocks that are free, not including those + * still being used for unlinked files that haven't been deleted.) + * + * It also subtracts any blocks reserved by someone else, but does not + * include free blocks that are still part of our current reservation, + * because obviously we can (and will) allocate them. + */ +static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs) +{ + u32 tot_reserved, tot_free; + + if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free)) + return 0; + tot_reserved = rgd->rd_reserved - rs->rs_free; + + if (rgd->rd_free_clone < tot_reserved) + tot_reserved = 0; + + tot_free = rgd->rd_free_clone - tot_reserved; + + return tot_free; +} + +/** * rg_mblk_search - find a group of multiple free blocks to form a reservation * @rgd: the resource group descriptor * @ip: pointer to the inode for which we're reserving blocks @@ -1502,7 +1534,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, u64 goal; struct gfs2_blkreserv *rs = &ip->i_res; u32 extlen; - u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; + u32 free_blocks = rgd_free(rgd, rs); int ret; struct inode *inode = &ip->i_inode; @@ -1528,7 +1560,6 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, if (ret == 0) { rs->rs_rbm = rbm; rs->rs_free = extlen; - rs->rs_inum = ip->i_no_addr; rs_insert(ip); } else { if (goal == rgd->rd_last_alloc + rgd->rd_data0) @@ -1686,7 +1717,8 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, while(1) { bi = rbm_bi(rbm); - if (test_bit(GBF_FULL, &bi->bi_flags) && + if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) && + test_bit(GBF_FULL, &bi->bi_flags) && (state == GFS2_BLKST_FREE)) goto next_bitmap; @@ -1983,7 +2015,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) int error = 0, rg_locked, flags = 0; u64 last_unlinked = NO_BLOCK; int loops = 0; - u32 skip = 0; + u32 free_blocks, skip = 0; if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; @@ -1991,8 +2023,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) return -EINVAL; if (gfs2_rs_active(rs)) { begin = rs->rs_rbm.rgd; - } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { - rs->rs_rbm.rgd = begin = ip->i_rgd; + } else if (rs->rs_rbm.rgd && + rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) { + begin = rs->rs_rbm.rgd; } else { check_and_update_goal(ip); rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); @@ -2053,11 +2086,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) goto check_rgrp; /* If rgrp has enough free space, use it */ - if (rs->rs_rbm.rgd->rd_free_clone >= ap->target || + free_blocks = rgd_free(rs->rs_rbm.rgd, rs); + if (free_blocks >= ap->target || (loops == 2 && ap->min_target && - rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) { - ip->i_rgd = rs->rs_rbm.rgd; - ap->allowed = ip->i_rgd->rd_free_clone; + free_blocks >= ap->min_target)) { + ap->allowed = free_blocks; return 0; } check_rgrp: @@ -2116,26 +2149,6 @@ void gfs2_inplace_release(struct gfs2_inode *ip) } /** - * gfs2_get_block_type - Check a block in a RG is of given type - * @rgd: the resource group holding the block - * @block: the block number - * - * Returns: The block type (GFS2_BLKST_*) - */ - -static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) -{ - struct gfs2_rbm rbm = { .rgd = rgd, }; - int ret; - - ret = gfs2_rbm_from_block(&rbm, block); - WARN_ON_ONCE(ret != 0); - - return gfs2_testbit(&rbm); -} - - -/** * gfs2_alloc_extent - allocate an extent from a given bitmap * @rbm: the resource group information * @dinode: TRUE if the first block we allocate is for a dinode @@ -2159,7 +2172,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); - if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) + if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE) break; gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh); gfs2_setbit(&pos, true, GFS2_BLKST_USED); @@ -2335,7 +2348,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; - struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; + struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, }; unsigned int ndata; u64 block; /* block, within the file system scope */ int error; @@ -2393,7 +2406,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); - gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -2434,7 +2446,6 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) rgd->rd_flags &= ~GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); /* Directories keep their data in the metadata address space */ if (meta || ip->i_depth) @@ -2471,8 +2482,7 @@ void gfs2_unlink_di(struct inode *inode) trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); - update_rgrp_lvb_unlinked(rgd, 1); + be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1); } void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) @@ -2492,8 +2502,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); - update_rgrp_lvb_unlinked(rgd, -1); + be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1); gfs2_statfs_change(sdp, 0, +1, -1); trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); @@ -2516,6 +2525,7 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) { struct gfs2_rgrpd *rgd; struct gfs2_holder rgd_gh; + struct gfs2_rbm rbm; int error = -EINVAL; rgd = gfs2_blk2rgrpd(sdp, no_addr, 1); @@ -2526,7 +2536,11 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) if (error) goto fail; - if (gfs2_get_block_type(rgd, no_addr) != type) + rbm.rgd = rgd; + error = gfs2_rbm_from_block(&rbm, no_addr); + WARN_ON_ONCE(error != 0); + + if (gfs2_testbit(&rbm, false) != type) error = -ESTALE; gfs2_glock_dq_uninit(&rgd_gh); @@ -2558,19 +2572,34 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) return; - if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) - rgd = ip->i_rgd; - else + /* + * The resource group last accessed is kept in the last position. + */ + + if (rlist->rl_rgrps) { + rgd = rlist->rl_rgd[rlist->rl_rgrps - 1]; + if (rgrp_contains_block(rgd, block)) + return; rgd = gfs2_blk2rgrpd(sdp, block, 1); + } else { + rgd = ip->i_res.rs_rbm.rgd; + if (!rgd || !rgrp_contains_block(rgd, block)) + rgd = gfs2_blk2rgrpd(sdp, block, 1); + } + if (!rgd) { - fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); + fs_err(sdp, "rlist_add: no rgrp for block %llu\n", + (unsigned long long)block); return; } - ip->i_rgd = rgd; - for (x = 0; x < rlist->rl_rgrps; x++) - if (rlist->rl_rgd[x] == rgd) + for (x = 0; x < rlist->rl_rgrps; x++) { + if (rlist->rl_rgd[x] == rgd) { + swap(rlist->rl_rgd[x], + rlist->rl_rgd[rlist->rl_rgrps - 1]); return; + } + } if (rlist->rl_rgrps == rlist->rl_space) { new_space = rlist->rl_space + 10; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index af0d5b01cf0b..c212893534ed 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1729,7 +1729,6 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) if (ip) { ip->i_flags = 0; ip->i_gl = NULL; - ip->i_rgd = NULL; memset(&ip->i_res, 0, sizeof(ip->i_res)); RB_CLEAR_NODE(&ip->i_res.rs_node); ip->i_rahead = 0; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index c191fa58a1df..1787d295834e 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -429,11 +429,18 @@ int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) spin_lock(&sdp->sd_jindex_spin); rv = -EBUSY; - if (sdp->sd_jdesc->jd_jid == jid) + /** + * If we're a spectator, we use journal0, but it's not really ours. + * So we need to wait for its recovery too. If we skip it we'd never + * queue work to the recovery workqueue, and so its completion would + * never clear the DFL_BLOCK_LOCKS flag, so all our locks would + * permanently stop working. + */ + if (sdp->sd_jdesc->jd_jid == jid && !sdp->sd_args.ar_spectator) goto out; rv = -ENOENT; list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { - if (jd->jd_jid != jid) + if (jd->jd_jid != jid && !sdp->sd_args.ar_spectator) continue; rv = gfs2_recover_journal(jd, false); break; diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index cb10b95efe0f..e0025258107a 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -606,7 +606,8 @@ TRACE_EVENT(gfs2_rs, __entry->rd_addr = rs->rs_rbm.rgd->rd_addr; __entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone; __entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved; - __entry->inum = rs->rs_inum; + __entry->inum = container_of(rs, struct gfs2_inode, + i_res)->i_no_addr; __entry->start = gfs2_rbm_to_block(&rs->rs_rbm); __entry->free = rs->rs_free; __entry->func = func; diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 1e6e7da25a17..ad70087d0597 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -30,9 +30,11 @@ struct gfs2_glock; * block, or all of the blocks in the rg, whichever is smaller */ static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested) { - if (requested < ip->i_rgd->rd_length) + struct gfs2_rgrpd *rgd = ip->i_res.rs_rbm.rgd; + + if (requested < rgd->rd_length) return requested + 1; - return ip->i_rgd->rd_length; + return rgd->rd_length; } extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 763d659db91b..59c811de0dc7 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -46,14 +46,16 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) return 0; - va_start(args, fmt); + if (fmt) { + va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; + vaf.fmt = fmt; + vaf.va = &args; - fs_err(sdp, "%pV", &vaf); + fs_err(sdp, "%pV", &vaf); - va_end(args); + va_end(args); + } if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { fs_err(sdp, "about to withdraw this file system\n"); @@ -246,21 +248,21 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, } /** - * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw - * Returns: -1 if this call withdrew the machine, - * 0 if it was already withdrawn + * gfs2_io_error_bh_i - Flag a buffer I/O error + * @withdraw: withdraw the filesystem */ -int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, - const char *function, char *file, unsigned int line) +void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *function, char *file, unsigned int line, + bool withdraw) { - int rv; - rv = gfs2_lm_withdraw(sdp, - "fatal: I/O error\n" - " block = %llu\n" - " function = %s, file = %s, line = %u\n", - (unsigned long long)bh->b_blocknr, - function, file, line); - return rv; + fs_err(sdp, + "fatal: I/O error\n" + " block = %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, + function, file, line); + if (withdraw) + gfs2_lm_withdraw(sdp, NULL); } diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 3926f95a6eb7..96ac4aba4738 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -136,11 +136,15 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__); -int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, - const char *function, char *file, unsigned int line); +void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *function, char *file, unsigned int line, + bool withdraw); + +#define gfs2_io_error_bh_wd(sdp, bh) \ +gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true); #define gfs2_io_error_bh(sdp, bh) \ -gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__); +gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false); extern struct kmem_cache *gfs2_glock_cachep; diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index f2bce1e0f6fb..38515988aaf7 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -343,60 +343,45 @@ struct ea_list { unsigned int ei_size; }; -static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea) -{ - switch (ea->ea_type) { - case GFS2_EATYPE_USR: - return 5 + ea->ea_name_len + 1; - case GFS2_EATYPE_SYS: - return 7 + ea->ea_name_len + 1; - case GFS2_EATYPE_SECURITY: - return 9 + ea->ea_name_len + 1; - default: - return 0; - } -} - static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh, struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, void *private) { struct ea_list *ei = private; struct gfs2_ea_request *er = ei->ei_er; - unsigned int ea_size = gfs2_ea_strlen(ea); + unsigned int ea_size; + char *prefix; + unsigned int l; if (ea->ea_type == GFS2_EATYPE_UNUSED) return 0; - if (er->er_data_len) { - char *prefix = NULL; - unsigned int l = 0; - char c = 0; + switch (ea->ea_type) { + case GFS2_EATYPE_USR: + prefix = "user."; + l = 5; + break; + case GFS2_EATYPE_SYS: + prefix = "system."; + l = 7; + break; + case GFS2_EATYPE_SECURITY: + prefix = "security."; + l = 9; + break; + default: + BUG(); + } + ea_size = l + ea->ea_name_len + 1; + if (er->er_data_len) { if (ei->ei_size + ea_size > er->er_data_len) return -ERANGE; - switch (ea->ea_type) { - case GFS2_EATYPE_USR: - prefix = "user."; - l = 5; - break; - case GFS2_EATYPE_SYS: - prefix = "system."; - l = 7; - break; - case GFS2_EATYPE_SECURITY: - prefix = "security."; - l = 9; - break; - } - - BUG_ON(l == 0); - memcpy(er->er_data + ei->ei_size, prefix, l); memcpy(er->er_data + ei->ei_size + l, GFS2_EA2NAME(ea), ea->ea_name_len); - memcpy(er->er_data + ei->ei_size + ea_size - 1, &c, 1); + er->er_data[ei->ei_size + ea_size - 1] = 0; } ei->ei_size += ea_size; |