diff options
author | Michal Marek <mmarek@suse.cz> | 2010-12-14 22:01:55 +0100 |
---|---|---|
committer | Michal Marek <mmarek@suse.cz> | 2010-12-14 22:01:55 +0100 |
commit | 8990c1bc4be46473ad19bf2fa612ca57286f3df4 (patch) | |
tree | 3cea60576903a1d26c67e6ec62891b524d390e95 /fs/xfs | |
parent | 2979076fbf17a0947d6eba367b0cac19c907c160 (diff) | |
parent | c8ddb2713c624f432fa5fe3c7ecffcdda46ea0d4 (diff) |
Merge commit 'v2.6.37-rc1' into kbuild/kbuild
Diffstat (limited to 'fs/xfs')
102 files changed, 3157 insertions, 5663 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 480f28127f09..6100ec0fa1d4 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -22,6 +22,7 @@ config XFS_FS config XFS_QUOTA bool "XFS Quota support" depends on XFS_FS + select QUOTACTL help If you say Y here, you will be able to set limits for disk usage on a per user and/or a per group basis under XFS. XFS considers quota diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index c8fb13f83b3f..0dce969d6cad 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -87,11 +87,9 @@ xfs-y += xfs_alloc.o \ xfs_trans_buf.o \ xfs_trans_extfree.o \ xfs_trans_inode.o \ - xfs_trans_item.o \ xfs_utils.o \ xfs_vnodeops.o \ - xfs_rw.o \ - xfs_dmops.o + xfs_rw.o xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index 9f769b5b38fc..b2771862fd3d 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -225,7 +225,7 @@ xfs_check_acl(struct inode *inode, int mask) struct posix_acl *acl; int error = -EAGAIN; - xfs_itrace_entry(ip); + trace_xfs_check_acl(ip); /* * If there is no attribute fork no ACL exists on this inode and diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 34640d6dbdcb..c9af48fffcd7 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -21,19 +21,12 @@ #include "xfs_inum.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_trans.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_alloc.h" -#include "xfs_btree.h" #include "xfs_error.h" #include "xfs_rw.h" #include "xfs_iomap.h" @@ -92,18 +85,15 @@ void xfs_count_page_state( struct page *page, int *delalloc, - int *unmapped, int *unwritten) { struct buffer_head *bh, *head; - *delalloc = *unmapped = *unwritten = 0; + *delalloc = *unwritten = 0; bh = head = page_buffers(page); do { - if (buffer_uptodate(bh) && !buffer_mapped(bh)) - (*unmapped) = 1; - else if (buffer_unwritten(bh)) + if (buffer_unwritten(bh)) (*unwritten) = 1; else if (buffer_delay(bh)) (*delalloc) = 1; @@ -212,23 +202,17 @@ xfs_setfilesize( } /* - * Schedule IO completion handling on a xfsdatad if this was - * the final hold on this ioend. If we are asked to wait, - * flush the workqueue. + * Schedule IO completion handling on the final put of an ioend. */ STATIC void xfs_finish_ioend( - xfs_ioend_t *ioend, - int wait) + struct xfs_ioend *ioend) { if (atomic_dec_and_test(&ioend->io_remaining)) { - struct workqueue_struct *wq; - - wq = (ioend->io_type == IO_UNWRITTEN) ? - xfsconvertd_workqueue : xfsdatad_workqueue; - queue_work(wq, &ioend->io_work); - if (wait) - flush_workqueue(wq); + if (ioend->io_type == IO_UNWRITTEN) + queue_work(xfsconvertd_workqueue, &ioend->io_work); + else + queue_work(xfsdatad_workqueue, &ioend->io_work); } } @@ -272,11 +256,25 @@ xfs_end_io( */ if (error == EAGAIN) { atomic_inc(&ioend->io_remaining); - xfs_finish_ioend(ioend, 0); + xfs_finish_ioend(ioend); /* ensure we don't spin on blocked ioends */ delay(1); - } else + } else { + if (ioend->io_iocb) + aio_complete(ioend->io_iocb, ioend->io_result, 0); xfs_destroy_ioend(ioend); + } +} + +/* + * Call IO completion handling in caller context on the final put of an ioend. + */ +STATIC void +xfs_finish_ioend_sync( + struct xfs_ioend *ioend) +{ + if (atomic_dec_and_test(&ioend->io_remaining)) + xfs_end_io(&ioend->io_work); } /* @@ -309,6 +307,8 @@ xfs_alloc_ioend( atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); ioend->io_offset = 0; ioend->io_size = 0; + ioend->io_iocb = NULL; + ioend->io_result = 0; INIT_WORK(&ioend->io_work, xfs_end_io); return ioend; @@ -358,7 +358,7 @@ xfs_end_bio( bio->bi_end_io = NULL; bio_put(bio); - xfs_finish_ioend(ioend, 0); + xfs_finish_ioend(ioend); } STATIC void @@ -500,7 +500,7 @@ xfs_submit_ioend( } if (bio) xfs_submit_ioend_bio(wbc, ioend, bio); - xfs_finish_ioend(ioend, 0); + xfs_finish_ioend(ioend); } while ((ioend = next) != NULL); } @@ -614,31 +614,30 @@ xfs_map_at_offset( STATIC unsigned int xfs_probe_page( struct page *page, - unsigned int pg_offset, - int mapped) + unsigned int pg_offset) { + struct buffer_head *bh, *head; int ret = 0; if (PageWriteback(page)) return 0; + if (!PageDirty(page)) + return 0; + if (!page->mapping) + return 0; + if (!page_has_buffers(page)) + return 0; - if (page->mapping && PageDirty(page)) { - if (page_has_buffers(page)) { - struct buffer_head *bh, *head; - - bh = head = page_buffers(page); - do { - if (!buffer_uptodate(bh)) - break; - if (mapped != buffer_mapped(bh)) - break; - ret += bh->b_size; - if (ret >= pg_offset) - break; - } while ((bh = bh->b_this_page) != head); - } else - ret = mapped ? 0 : PAGE_CACHE_SIZE; - } + bh = head = page_buffers(page); + do { + if (!buffer_uptodate(bh)) + break; + if (!buffer_mapped(bh)) + break; + ret += bh->b_size; + if (ret >= pg_offset) + break; + } while ((bh = bh->b_this_page) != head); return ret; } @@ -648,8 +647,7 @@ xfs_probe_cluster( struct inode *inode, struct page *startpage, struct buffer_head *bh, - struct buffer_head *head, - int mapped) + struct buffer_head *head) { struct pagevec pvec; pgoff_t tindex, tlast, tloff; @@ -658,7 +656,7 @@ xfs_probe_cluster( /* First sum forwards in this page */ do { - if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh))) + if (!buffer_uptodate(bh) || !buffer_mapped(bh)) return total; total += bh->b_size; } while ((bh = bh->b_this_page) != head); @@ -692,7 +690,7 @@ xfs_probe_cluster( pg_offset = PAGE_CACHE_SIZE; if (page->index == tindex && trylock_page(page)) { - pg_len = xfs_probe_page(page, pg_offset, mapped); + pg_len = xfs_probe_page(page, pg_offset); unlock_page(page); } @@ -761,7 +759,6 @@ xfs_convert_page( struct xfs_bmbt_irec *imap, xfs_ioend_t **ioendp, struct writeback_control *wbc, - int startio, int all_bh) { struct buffer_head *bh, *head; @@ -832,19 +829,14 @@ xfs_convert_page( ASSERT(imap->br_startblock != DELAYSTARTBLOCK); xfs_map_at_offset(inode, bh, imap, offset); - if (startio) { - xfs_add_to_ioend(inode, bh, offset, - type, ioendp, done); - } else { - set_buffer_dirty(bh); - unlock_buffer(bh); - mark_buffer_dirty(bh); - } + xfs_add_to_ioend(inode, bh, offset, type, + ioendp, done); + page_dirty--; count++; } else { type = IO_NEW; - if (buffer_mapped(bh) && all_bh && startio) { + if (buffer_mapped(bh) && all_bh) { lock_buffer(bh); xfs_add_to_ioend(inode, bh, offset, type, ioendp, done); @@ -859,14 +851,12 @@ xfs_convert_page( if (uptodate && bh == head) SetPageUptodate(page); - if (startio) { - if (count) { - wbc->nr_to_write--; - if (wbc->nr_to_write <= 0) - done = 1; - } - xfs_start_page_writeback(page, !page_dirty, count); + if (count) { + if (--wbc->nr_to_write <= 0 && + wbc->sync_mode == WB_SYNC_NONE) + done = 1; } + xfs_start_page_writeback(page, !page_dirty, count); return done; fail_unlock_page: @@ -886,7 +876,6 @@ xfs_cluster_write( struct xfs_bmbt_irec *imap, xfs_ioend_t **ioendp, struct writeback_control *wbc, - int startio, int all_bh, pgoff_t tlast) { @@ -902,7 +891,7 @@ xfs_cluster_write( for (i = 0; i < pagevec_count(&pvec); i++) { done = xfs_convert_page(inode, pvec.pages[i], tindex++, - imap, ioendp, wbc, startio, all_bh); + imap, ioendp, wbc, all_bh); if (done) break; } @@ -981,7 +970,7 @@ xfs_aops_discard_page( */ error = xfs_bmapi(NULL, ip, offset_fsb, 1, XFS_BMAPI_ENTIRE, NULL, 0, &imap, - &nimaps, NULL, NULL); + &nimaps, NULL); if (error) { /* something screwed, just bail */ @@ -1009,7 +998,7 @@ xfs_aops_discard_page( */ xfs_bmap_init(&flist, &firstblock); error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, - &flist, NULL, &done); + &flist, &done); ASSERT(!flist.xbf_count && !flist.xbf_first); if (error) { @@ -1032,50 +1021,66 @@ out_invalidate: } /* - * Calling this without startio set means we are being asked to make a dirty - * page ready for freeing it's buffers. When called with startio set then - * we are coming from writepage. + * Write out a dirty page. + * + * For delalloc space on the page we need to allocate space and flush it. + * For unwritten space on the page we need to start the conversion to + * regular allocated space. + * For any other dirty buffer heads on the page we should flush them. * - * When called with startio set it is important that we write the WHOLE - * page if possible. - * The bh->b_state's cannot know if any of the blocks or which block for - * that matter are dirty due to mmap writes, and therefore bh uptodate is - * only valid if the page itself isn't completely uptodate. Some layers - * may clear the page dirty flag prior to calling write page, under the - * assumption the entire page will be written out; by not writing out the - * whole page the page can be reused before all valid dirty data is - * written out. Note: in the case of a page that has been dirty'd by - * mapwrite and but partially setup by block_prepare_write the - * bh->b_states's will not agree and only ones setup by BPW/BCW will have - * valid state, thus the whole page must be written out thing. + * If we detect that a transaction would be required to flush the page, we + * have to check the process flags first, if we are already in a transaction + * or disk I/O during allocations is off, we need to fail the writepage and + * redirty the page. */ - STATIC int -xfs_page_state_convert( - struct inode *inode, - struct page *page, - struct writeback_control *wbc, - int startio, - int unmapped) /* also implies page uptodate */ +xfs_vm_writepage( + struct page *page, + struct writeback_control *wbc) { + struct inode *inode = page->mapping->host; + int delalloc, unwritten; struct buffer_head *bh, *head; struct xfs_bmbt_irec imap; xfs_ioend_t *ioend = NULL, *iohead = NULL; loff_t offset; - unsigned long p_offset = 0; unsigned int type; __uint64_t end_offset; pgoff_t end_index, last_index; ssize_t size, len; int flags, err, imap_valid = 0, uptodate = 1; - int page_dirty, count = 0; - int trylock = 0; - int all_bh = unmapped; + int count = 0; + int all_bh = 0; - if (startio) { - if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) - trylock |= BMAPI_TRYLOCK; - } + trace_xfs_writepage(inode, page, 0); + + ASSERT(page_has_buffers(page)); + + /* + * Refuse to write the page out if we are called from reclaim context. + * + * This avoids stack overflows when called from deeply used stacks in + * random callers for direct reclaim or memcg reclaim. We explicitly + * allow reclaim from kswapd as the stack usage there is relatively low. + * + * This should really be done by the core VM, but until that happens + * filesystems like XFS, btrfs and ext4 have to take care of this + * by themselves. + */ + if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) + goto redirty; + + /* + * We need a transaction if there are delalloc or unwritten buffers + * on the page. + * + * If we need a transaction and the process flags say we are already + * in a transaction, or no IO is allowed then mark the page dirty + * again and leave the page as is. + */ + xfs_count_page_state(page, &delalloc, &unwritten); + if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) + goto redirty; /* Is this page beyond the end of the file? */ offset = i_size_read(inode); @@ -1084,50 +1089,33 @@ xfs_page_state_convert( if (page->index >= end_index) { if ((page->index >= end_index + 1) || !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { - if (startio) - unlock_page(page); + unlock_page(page); return 0; } } - /* - * page_dirty is initially a count of buffers on the page before - * EOF and is decremented as we move each into a cleanable state. - * - * Derivation: - * - * End offset is the highest offset that this page should represent. - * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) - * will evaluate non-zero and be less than PAGE_CACHE_SIZE and - * hence give us the correct page_dirty count. On any other page, - * it will be zero and in that case we need page_dirty to be the - * count of buffers on the page. - */ end_offset = min_t(unsigned long long, - (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); + (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, + offset); len = 1 << inode->i_blkbits; - p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), - PAGE_CACHE_SIZE); - p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; - page_dirty = p_offset / len; bh = head = page_buffers(page); offset = page_offset(page); flags = BMAPI_READ; type = IO_NEW; - /* TODO: cleanup count and page_dirty */ - do { if (offset >= end_offset) break; if (!buffer_uptodate(bh)) uptodate = 0; - if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { - /* - * the iomap is actually still valid, but the ioend - * isn't. shouldn't happen too often. - */ + + /* + * A hole may still be marked uptodate because discard_buffer + * leaves the flag set. + */ + if (!buffer_mapped(bh) && buffer_uptodate(bh)) { + ASSERT(!buffer_dirty(bh)); imap_valid = 0; continue; } @@ -1135,19 +1123,7 @@ xfs_page_state_convert( if (imap_valid) imap_valid = xfs_imap_valid(inode, &imap, offset); - /* - * First case, map an unwritten extent and prepare for - * extent state conversion transaction on completion. - * - * Second case, allocate space for a delalloc buffer. - * We can return EAGAIN here in the release page case. - * - * Third case, an unmapped buffer was found, and we are - * in a path where we need to write the whole page out. - */ - if (buffer_unwritten(bh) || buffer_delay(bh) || - ((buffer_uptodate(bh) || PageUptodate(page)) && - !buffer_mapped(bh) && (unmapped || startio))) { + if (buffer_unwritten(bh) || buffer_delay(bh)) { int new_ioend = 0; /* @@ -1161,15 +1137,15 @@ xfs_page_state_convert( flags = BMAPI_WRITE | BMAPI_IGNSTATE; } else if (buffer_delay(bh)) { type = IO_DELAY; - flags = BMAPI_ALLOCATE | trylock; - } else { - type = IO_NEW; - flags = BMAPI_WRITE | BMAPI_MMAP; + flags = BMAPI_ALLOCATE; + + if (wbc->sync_mode == WB_SYNC_NONE) + flags |= BMAPI_TRYLOCK; } if (!imap_valid) { /* - * if we didn't have a valid mapping then we + * If we didn't have a valid mapping then we * need to ensure that we put the new mapping * in a new ioend structure. This needs to be * done to ensure that the ioends correctly @@ -1177,14 +1153,7 @@ xfs_page_state_convert( * for unwritten extent conversion. */ new_ioend = 1; - if (type == IO_NEW) { - size = xfs_probe_cluster(inode, - page, bh, head, 0); - } else { - size = len; - } - - err = xfs_map_blocks(inode, offset, size, + err = xfs_map_blocks(inode, offset, len, &imap, flags); if (err) goto error; @@ -1193,19 +1162,11 @@ xfs_page_state_convert( } if (imap_valid) { xfs_map_at_offset(inode, bh, &imap, offset); - if (startio) { - xfs_add_to_ioend(inode, bh, offset, - type, &ioend, - new_ioend); - } else { - set_buffer_dirty(bh); - unlock_buffer(bh); - mark_buffer_dirty(bh); - } - page_dirty--; + xfs_add_to_ioend(inode, bh, offset, type, + &ioend, new_ioend); count++; } - } else if (buffer_uptodate(bh) && startio) { + } else if (buffer_uptodate(bh)) { /* * we got here because the buffer is already mapped. * That means it must already have extents allocated @@ -1213,8 +1174,7 @@ xfs_page_state_convert( */ if (!imap_valid || flags != BMAPI_READ) { flags = BMAPI_READ; - size = xfs_probe_cluster(inode, page, bh, - head, 1); + size = xfs_probe_cluster(inode, page, bh, head); err = xfs_map_blocks(inode, offset, size, &imap, flags); if (err) @@ -1233,18 +1193,16 @@ xfs_page_state_convert( */ type = IO_NEW; if (trylock_buffer(bh)) { - ASSERT(buffer_mapped(bh)); if (imap_valid) all_bh = 1; xfs_add_to_ioend(inode, bh, offset, type, &ioend, !imap_valid); - page_dirty--; count++; } else { imap_valid = 0; } - } else if ((buffer_uptodate(bh) || PageUptodate(page)) && - (unmapped || startio)) { + } else if (PageUptodate(page)) { + ASSERT(buffer_mapped(bh)); imap_valid = 0; } @@ -1256,8 +1214,7 @@ xfs_page_state_convert( if (uptodate && bh == head) SetPageUptodate(page); - if (startio) - xfs_start_page_writeback(page, 1, count); + xfs_start_page_writeback(page, 1, count); if (ioend && imap_valid) { xfs_off_t end_index; @@ -1275,131 +1232,30 @@ xfs_page_state_convert( end_index = last_index; xfs_cluster_write(inode, page->index + 1, &imap, &ioend, - wbc, startio, all_bh, end_index); + wbc, all_bh, end_index); } if (iohead) xfs_submit_ioend(wbc, iohead); - return page_dirty; + return 0; error: if (iohead) xfs_cancel_ioend(iohead); - /* - * If it's delalloc and we have nowhere to put it, - * throw it away, unless the lower layers told - * us to try again. - */ - if (err != -EAGAIN) { - if (!unmapped) - xfs_aops_discard_page(page); - ClearPageUptodate(page); - } - return err; -} + if (err == -EAGAIN) + goto redirty; -/* - * writepage: Called from one of two places: - * - * 1. we are flushing a delalloc buffer head. - * - * 2. we are writing out a dirty page. Typically the page dirty - * state is cleared before we get here. In this case is it - * conceivable we have no buffer heads. - * - * For delalloc space on the page we need to allocate space and - * flush it. For unmapped buffer heads on the page we should - * allocate space if the page is uptodate. For any other dirty - * buffer heads on the page we should flush them. - * - * If we detect that a transaction would be required to flush - * the page, we have to check the process flags first, if we - * are already in a transaction or disk I/O during allocations - * is off, we need to fail the writepage and redirty the page. - */ - -STATIC int -xfs_vm_writepage( - struct page *page, - struct writeback_control *wbc) -{ - int error; - int need_trans; - int delalloc, unmapped, unwritten; - struct inode *inode = page->mapping->host; - - trace_xfs_writepage(inode, page, 0); - - /* - * Refuse to write the page out if we are called from reclaim context. - * - * This is primarily to avoid stack overflows when called from deep - * used stacks in random callers for direct reclaim, but disabling - * reclaim for kswap is a nice side-effect as kswapd causes rather - * suboptimal I/O patters, too. - * - * This should really be done by the core VM, but until that happens - * filesystems like XFS, btrfs and ext4 have to take care of this - * by themselves. - */ - if (current->flags & PF_MEMALLOC) - goto out_fail; - - /* - * We need a transaction if: - * 1. There are delalloc buffers on the page - * 2. The page is uptodate and we have unmapped buffers - * 3. The page is uptodate and we have no buffers - * 4. There are unwritten buffers on the page - */ - - if (!page_has_buffers(page)) { - unmapped = 1; - need_trans = 1; - } else { - xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); - if (!PageUptodate(page)) - unmapped = 0; - need_trans = delalloc + unmapped + unwritten; - } - - /* - * If we need a transaction and the process flags say - * we are already in a transaction, or no IO is allowed - * then mark the page dirty again and leave the page - * as is. - */ - if (current_test_flags(PF_FSTRANS) && need_trans) - goto out_fail; - - /* - * Delay hooking up buffer heads until we have - * made our go/no-go decision. - */ - if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << inode->i_blkbits, 0); - - /* - * Convert delayed allocate, unwritten or unmapped space - * to real space and flush out to disk. - */ - error = xfs_page_state_convert(inode, page, wbc, 1, unmapped); - if (error == -EAGAIN) - goto out_fail; - if (unlikely(error < 0)) - goto out_unlock; - - return 0; + xfs_aops_discard_page(page); + ClearPageUptodate(page); + unlock_page(page); + return err; -out_fail: +redirty: redirty_page_for_writepage(wbc, page); unlock_page(page); return 0; -out_unlock: - unlock_page(page); - return error; } STATIC int @@ -1413,65 +1269,27 @@ xfs_vm_writepages( /* * Called to move a page into cleanable state - and from there - * to be released. Possibly the page is already clean. We always + * to be released. The page should already be clean. We always * have buffer heads in this call. * - * Returns 0 if the page is ok to release, 1 otherwise. - * - * Possible scenarios are: - * - * 1. We are being called to release a page which has been written - * to via regular I/O. buffer heads will be dirty and possibly - * delalloc. If no delalloc buffer heads in this case then we - * can just return zero. - * - * 2. We are called to release a page which has been written via - * mmap, all we need to do is ensure there is no delalloc - * state in the buffer heads, if not we can let the caller - * free them and we should come back later via writepage. + * Returns 1 if the page is ok to release, 0 otherwise. */ STATIC int xfs_vm_releasepage( struct page *page, gfp_t gfp_mask) { - struct inode *inode = page->mapping->host; - int dirty, delalloc, unmapped, unwritten; - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 1, - }; + int delalloc, unwritten; - trace_xfs_releasepage(inode, page, 0); + trace_xfs_releasepage(page->mapping->host, page, 0); - if (!page_has_buffers(page)) - return 0; - - xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); - if (!delalloc && !unwritten) - goto free_buffers; + xfs_count_page_state(page, &delalloc, &unwritten); - if (!(gfp_mask & __GFP_FS)) + if (WARN_ON(delalloc)) return 0; - - /* If we are already inside a transaction or the thread cannot - * do I/O, we cannot release this page. - */ - if (current_test_flags(PF_FSTRANS)) + if (WARN_ON(unwritten)) return 0; - /* - * Convert delalloc space to real space, do not flush the - * data out to disk, that will be done by the caller. - * Never need to allocate space here - we will always - * come back to writepage in that case. - */ - dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0); - if (dirty == 0 && !unwritten) - goto free_buffers; - return 0; - -free_buffers: return try_to_free_buffers(page); } @@ -1481,9 +1299,9 @@ __xfs_get_blocks( sector_t iblock, struct buffer_head *bh_result, int create, - int direct, - bmapi_flags_t flags) + int direct) { + int flags = create ? BMAPI_WRITE : BMAPI_READ; struct xfs_bmbt_irec imap; xfs_off_t offset; ssize_t size; @@ -1498,8 +1316,11 @@ __xfs_get_blocks( if (!create && direct && offset >= i_size_read(inode)) return 0; - error = xfs_iomap(XFS_I(inode), offset, size, - create ? flags : BMAPI_READ, &imap, &nimap, &new); + if (direct && create) + flags |= BMAPI_DIRECT; + + error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, + &new); if (error) return -error; if (nimap == 0) @@ -1579,8 +1400,7 @@ xfs_get_blocks( struct buffer_head *bh_result, int create) { - return __xfs_get_blocks(inode, iblock, - bh_result, create, 0, BMAPI_WRITE); + return __xfs_get_blocks(inode, iblock, bh_result, create, 0); } STATIC int @@ -1590,61 +1410,59 @@ xfs_get_blocks_direct( struct buffer_head *bh_result, int create) { - return __xfs_get_blocks(inode, iblock, - bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT); + return __xfs_get_blocks(inode, iblock, bh_result, create, 1); } +/* + * Complete a direct I/O write request. + * + * If the private argument is non-NULL __xfs_get_blocks signals us that we + * need to issue a transaction to convert the range from unwritten to written + * extents. In case this is regular synchronous I/O we just call xfs_end_io + * to do this and we are done. But in case this was a successfull AIO + * request this handler is called from interrupt context, from which we + * can't start transactions. In that case offload the I/O completion to + * the workqueues we also use for buffered I/O completion. + */ STATIC void -xfs_end_io_direct( - struct kiocb *iocb, - loff_t offset, - ssize_t size, - void *private) +xfs_end_io_direct_write( + struct kiocb *iocb, + loff_t offset, + ssize_t size, + void *private, + int ret, + bool is_async) { - xfs_ioend_t *ioend = iocb->private; + struct xfs_ioend *ioend = iocb->private; /* - * Non-NULL private data means we need to issue a transaction to - * convert a range from unwritten to written extents. This needs - * to happen from process context but aio+dio I/O completion - * happens from irq context so we need to defer it to a workqueue. - * This is not necessary for synchronous direct I/O, but we do - * it anyway to keep the code uniform and simpler. - * - * Well, if only it were that simple. Because synchronous direct I/O - * requires extent conversion to occur *before* we return to userspace, - * we have to wait for extent conversion to complete. Look at the - * iocb that has been passed to us to determine if this is AIO or - * not. If it is synchronous, tell xfs_finish_ioend() to kick the - * workqueue and wait for it to complete. - * - * The core direct I/O code might be changed to always call the - * completion handler in the future, in which case all this can - * go away. + * blockdev_direct_IO can return an error even after the I/O + * completion handler was called. Thus we need to protect + * against double-freeing. */ + iocb->private = NULL; + ioend->io_offset = offset; ioend->io_size = size; - if (ioend->io_type == IO_READ) { - xfs_finish_ioend(ioend, 0); - } else if (private && size > 0) { - xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); - } else { + if (private && size > 0) + ioend->io_type = IO_UNWRITTEN; + + if (is_async) { /* - * A direct I/O write ioend starts it's life in unwritten - * state in case they map an unwritten extent. This write - * didn't map an unwritten extent so switch it's completion - * handler. + * If we are converting an unwritten extent we need to delay + * the AIO completion until after the unwrittent extent + * conversion has completed, otherwise do it ASAP. */ - ioend->io_type = IO_NEW; - xfs_finish_ioend(ioend, 0); + if (ioend->io_type == IO_UNWRITTEN) { + ioend->io_iocb = iocb; + ioend->io_result = ret; + } else { + aio_complete(iocb, ret, 0); + } + xfs_finish_ioend(ioend); + } else { + xfs_finish_ioend_sync(ioend); } - - /* - * blockdev_direct_IO can return an error even after the I/O - * completion handler was called. Thus we need to protect - * against double-freeing. - */ - iocb->private = NULL; } STATIC ssize_t @@ -1655,26 +1473,45 @@ xfs_vm_direct_IO( loff_t offset, unsigned long nr_segs) { - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct block_device *bdev; - ssize_t ret; - - bdev = xfs_find_bdev_for_inode(inode); + struct inode *inode = iocb->ki_filp->f_mapping->host; + struct block_device *bdev = xfs_find_bdev_for_inode(inode); + ssize_t ret; - iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? - IO_UNWRITTEN : IO_READ); + if (rw & WRITE) { + iocb->private = xfs_alloc_ioend(inode, IO_NEW); - ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, nr_segs, xfs_get_blocks_direct, - xfs_end_io_direct); + xfs_end_io_direct_write, NULL, 0); + if (ret != -EIOCBQUEUED && iocb->private) + xfs_destroy_ioend(iocb->private); + } else { + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, + offset, nr_segs, + xfs_get_blocks_direct, + NULL, NULL, 0); + } - if (unlikely(ret != -EIOCBQUEUED && iocb->private)) - xfs_destroy_ioend(iocb->private); return ret; } +STATIC void +xfs_vm_write_failed( + struct address_space *mapping, + loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + struct iattr ia = { + .ia_valid = ATTR_SIZE | ATTR_FORCE, + .ia_size = inode->i_size, + }; + xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK); + } +} + STATIC int xfs_vm_write_begin( struct file *file, @@ -1685,9 +1522,31 @@ xfs_vm_write_begin( struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - xfs_get_blocks); + int ret; + + ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, + pagep, xfs_get_blocks); + if (unlikely(ret)) + xfs_vm_write_failed(mapping, pos + len); + return ret; +} + +STATIC int +xfs_vm_write_end( + struct file *file, + struct address_space *mapping, + loff_t pos, + unsigned len, + unsigned copied, + struct page *page, + void *fsdata) +{ + int ret; + + ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); + if (unlikely(ret < len)) + xfs_vm_write_failed(mapping, pos + len); + return ret; } STATIC sector_t @@ -1698,7 +1557,7 @@ xfs_vm_bmap( struct inode *inode = (struct inode *)mapping->host; struct xfs_inode *ip = XFS_I(inode); - xfs_itrace_entry(XFS_I(inode)); + trace_xfs_vm_bmap(XFS_I(inode)); xfs_ilock(ip, XFS_IOLOCK_SHARED); xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -1732,7 +1591,7 @@ const struct address_space_operations xfs_address_space_operations = { .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .write_begin = xfs_vm_write_begin, - .write_end = generic_write_end, + .write_end = xfs_vm_write_end, .bmap = xfs_vm_bmap, .direct_IO = xfs_vm_direct_IO, .migratepage = buffer_migrate_page, diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 4cfc6ea87df8..c5057fb6237a 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -37,6 +37,8 @@ typedef struct xfs_ioend { size_t io_size; /* size of the extent */ xfs_off_t io_offset; /* offset in the file */ struct work_struct io_work; /* xfsdatad work queue */ + struct kiocb *io_iocb; + int io_result; } xfs_ioend_t; extern const struct address_space_operations xfs_address_space_operations; @@ -45,6 +47,6 @@ extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); extern void xfs_ioend_init(void); extern void xfs_ioend_wait(struct xfs_inode *); -extern void xfs_count_page_state(struct page *, int *, int *, int *); +extern void xfs_count_page_state(struct page *, int *, int *); #endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 2ee3f7a60163..63fd2c07cb57 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -39,7 +39,6 @@ #include "xfs_inum.h" #include "xfs_log.h" #include "xfs_ag.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_trace.h" @@ -189,8 +188,8 @@ _xfs_buf_initialize( atomic_set(&bp->b_hold, 1); init_completion(&bp->b_iowait); INIT_LIST_HEAD(&bp->b_list); - INIT_LIST_HEAD(&bp->b_hash_list); - init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ + RB_CLEAR_NODE(&bp->b_rbnode); + sema_init(&bp->b_sema, 0); /* held, no waiters */ XB_SET_OWNER(bp); bp->b_target = target; bp->b_file_offset = range_base; @@ -263,8 +262,6 @@ xfs_buf_free( { trace_xfs_buf_free(bp, _RET_IP_); - ASSERT(list_empty(&bp->b_hash_list)); - if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { uint i; @@ -423,8 +420,10 @@ _xfs_buf_find( { xfs_off_t range_base; size_t range_length; - xfs_bufhash_t *hash; - xfs_buf_t *bp, *n; + struct xfs_perag *pag; + struct rb_node **rbp; + struct rb_node *parent; + xfs_buf_t *bp; range_base = (ioff << BBSHIFT); range_length = (isize << BBSHIFT); @@ -433,20 +432,38 @@ _xfs_buf_find( ASSERT(!(range_length < (1 << btp->bt_sshift))); ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); - hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; - - spin_lock(&hash->bh_lock); - - list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { - ASSERT(btp == bp->b_target); - if (bp->b_file_offset == range_base && - bp->b_buffer_length == range_length) { + /* get tree root */ + pag = xfs_perag_get(btp->bt_mount, + xfs_daddr_to_agno(btp->bt_mount, ioff)); + + /* walk tree */ + spin_lock(&pag->pag_buf_lock); + rbp = &pag->pag_buf_tree.rb_node; + parent = NULL; + bp = NULL; + while (*rbp) { + parent = *rbp; + bp = rb_entry(parent, struct xfs_buf, b_rbnode); + + if (range_base < bp->b_file_offset) + rbp = &(*rbp)->rb_left; + else if (range_base > bp->b_file_offset) + rbp = &(*rbp)->rb_right; + else { /* - * If we look at something, bring it to the - * front of the list for next time. + * found a block offset match. If the range doesn't + * match, the only way this is allowed is if the buffer + * in the cache is stale and the transaction that made + * it stale has not yet committed. i.e. we are + * reallocating a busy extent. Skip this buffer and + * continue searching to the right for an exact match. */ + if (bp->b_buffer_length != range_length) { + ASSERT(bp->b_flags & XBF_STALE); + rbp = &(*rbp)->rb_right; + continue; + } atomic_inc(&bp->b_hold); - list_move(&bp->b_hash_list, &hash->bh_list); goto found; } } @@ -455,17 +472,21 @@ _xfs_buf_find( if (new_bp) { _xfs_buf_initialize(new_bp, btp, range_base, range_length, flags); - new_bp->b_hash = hash; - list_add(&new_bp->b_hash_list, &hash->bh_list); + rb_link_node(&new_bp->b_rbnode, parent, rbp); + rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); + /* the buffer keeps the perag reference until it is freed */ + new_bp->b_pag = pag; + spin_unlock(&pag->pag_buf_lock); } else { XFS_STATS_INC(xb_miss_locked); + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); } - - spin_unlock(&hash->bh_lock); return new_bp; found: - spin_unlock(&hash->bh_lock); + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); /* Attempt to get the semaphore without sleeping, * if this does not work then we need to drop the @@ -579,9 +600,9 @@ _xfs_buf_read( XBF_READ_AHEAD | _XBF_RUN_QUEUES); status = xfs_buf_iorequest(bp); - if (!status && !(flags & XBF_ASYNC)) - status = xfs_buf_iowait(bp); - return status; + if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC)) + return status; + return xfs_buf_iowait(bp); } xfs_buf_t * @@ -631,8 +652,7 @@ void xfs_buf_readahead( xfs_buftarg_t *target, xfs_off_t ioff, - size_t isize, - xfs_buf_flags_t flags) + size_t isize) { struct backing_dev_info *bdi; @@ -640,8 +660,42 @@ xfs_buf_readahead( if (bdi_read_congested(bdi)) return; - flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); - xfs_buf_read(target, ioff, isize, flags); + xfs_buf_read(target, ioff, isize, + XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK); +} + +/* + * Read an uncached buffer from disk. Allocates and returns a locked + * buffer containing the disk contents or nothing. + */ +struct xfs_buf * +xfs_buf_read_uncached( + struct xfs_mount *mp, + struct xfs_buftarg *target, + xfs_daddr_t daddr, + size_t length, + int flags) +{ + xfs_buf_t *bp; + int error; + + bp = xfs_buf_get_uncached(target, length, flags); + if (!bp) + return NULL; + + /* set up the buffer for a read IO */ + xfs_buf_lock(bp); + XFS_BUF_SET_ADDR(bp, daddr); + XFS_BUF_READ(bp); + XFS_BUF_BUSY(bp); + + xfsbdstrat(mp, bp); + error = xfs_buf_iowait(bp); + if (error || bp->b_error) { + xfs_buf_relse(bp); + return NULL; + } + return bp; } xfs_buf_t * @@ -713,9 +767,10 @@ xfs_buf_associate_memory( } xfs_buf_t * -xfs_buf_get_noaddr( +xfs_buf_get_uncached( + struct xfs_buftarg *target, size_t len, - xfs_buftarg_t *target) + int flags) { unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; int error, i; @@ -731,7 +786,7 @@ xfs_buf_get_noaddr( goto fail_free_buf; for (i = 0; i < page_count; i++) { - bp->b_pages[i] = alloc_page(GFP_KERNEL); + bp->b_pages[i] = alloc_page(xb_to_gfp(flags)); if (!bp->b_pages[i]) goto fail_free_mem; } @@ -746,7 +801,7 @@ xfs_buf_get_noaddr( xfs_buf_unlock(bp); - trace_xfs_buf_get_noaddr(bp, _RET_IP_); + trace_xfs_buf_get_uncached(bp, _RET_IP_); return bp; fail_free_mem: @@ -780,29 +835,30 @@ void xfs_buf_rele( xfs_buf_t *bp) { - xfs_bufhash_t *hash = bp->b_hash; + struct xfs_perag *pag = bp->b_pag; trace_xfs_buf_rele(bp, _RET_IP_); - if (unlikely(!hash)) { + if (!pag) { ASSERT(!bp->b_relse); + ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); if (atomic_dec_and_test(&bp->b_hold)) xfs_buf_free(bp); return; } + ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); ASSERT(atomic_read(&bp->b_hold) > 0); - if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { + if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { if (bp->b_relse) { atomic_inc(&bp->b_hold); - spin_unlock(&hash->bh_lock); - (*(bp->b_relse)) (bp); - } else if (bp->b_flags & XBF_FS_MANAGED) { - spin_unlock(&hash->bh_lock); + spin_unlock(&pag->pag_buf_lock); + bp->b_relse(bp); } else { ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); - list_del_init(&bp->b_hash_list); - spin_unlock(&hash->bh_lock); + rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); xfs_buf_free(bp); } } @@ -865,7 +921,7 @@ xfs_buf_lock( trace_xfs_buf_lock(bp, _RET_IP_); if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) - xfs_log_force(bp->b_mount, 0); + xfs_log_force(bp->b_target->bt_mount, 0); if (atomic_read(&bp->b_io_remaining)) blk_run_address_space(bp->b_target->bt_mapping); down(&bp->b_sema); @@ -897,36 +953,6 @@ xfs_buf_unlock( trace_xfs_buf_unlock(bp, _RET_IP_); } - -/* - * Pinning Buffer Storage in Memory - * Ensure that no attempt to force a buffer to disk will succeed. - */ -void -xfs_buf_pin( - xfs_buf_t *bp) -{ - trace_xfs_buf_pin(bp, _RET_IP_); - atomic_inc(&bp->b_pin_count); -} - -void -xfs_buf_unpin( - xfs_buf_t *bp) -{ - trace_xfs_buf_unpin(bp, _RET_IP_); - - if (atomic_dec_and_test(&bp->b_pin_count)) - wake_up_all(&bp->b_waiters); -} - -int -xfs_buf_ispin( - xfs_buf_t *bp) -{ - return atomic_read(&bp->b_pin_count); -} - STATIC void xfs_buf_wait_unpin( xfs_buf_t *bp) @@ -960,19 +986,7 @@ xfs_buf_iodone_work( xfs_buf_t *bp = container_of(work, xfs_buf_t, b_iodone_work); - /* - * We can get an EOPNOTSUPP to ordered writes. Here we clear the - * ordered flag and reissue them. Because we can't tell the higher - * layers directly that they should not issue ordered I/O anymore, they - * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. - */ - if ((bp->b_error == EOPNOTSUPP) && - (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { - trace_xfs_buf_ordered_retry(bp, _RET_IP_); - bp->b_flags &= ~XBF_ORDERED; - bp->b_flags |= _XFS_BARRIER_FAILED; - xfs_buf_iorequest(bp); - } else if (bp->b_iodone) + if (bp->b_iodone) (*(bp->b_iodone))(bp); else if (bp->b_flags & XBF_ASYNC) xfs_buf_relse(bp); @@ -1018,13 +1032,11 @@ xfs_bwrite( { int error; - bp->b_strat = xfs_bdstrat_cb; - bp->b_mount = mp; bp->b_flags |= XBF_WRITE; bp->b_flags &= ~(XBF_ASYNC | XBF_READ); xfs_buf_delwri_dequeue(bp); - xfs_buf_iostrategy(bp); + xfs_bdstrat_cb(bp); error = xfs_buf_iowait(bp); if (error) @@ -1040,9 +1052,6 @@ xfs_bdwrite( { trace_xfs_buf_bdwrite(bp, _RET_IP_); - bp->b_strat = xfs_bdstrat_cb; - bp->b_mount = mp; - bp->b_flags &= ~XBF_READ; bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); @@ -1051,7 +1060,7 @@ xfs_bdwrite( /* * Called when we want to stop a buffer from getting written or read. - * We attach the EIO error, muck with its flags, and call biodone + * We attach the EIO error, muck with its flags, and call xfs_buf_ioend * so that the proper iodone callbacks get called. */ STATIC int @@ -1068,22 +1077,21 @@ xfs_bioerror( XFS_BUF_ERROR(bp, EIO); /* - * We're calling biodone, so delete XBF_DONE flag. + * We're calling xfs_buf_ioend, so delete XBF_DONE flag. */ XFS_BUF_UNREAD(bp); XFS_BUF_UNDELAYWRITE(bp); XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp); - XFS_BUF_CLR_BDSTRAT_FUNC(bp); - xfs_biodone(bp); + xfs_buf_ioend(bp, 0); return EIO; } /* * Same as xfs_bioerror, except that we are releasing the buffer - * here ourselves, and avoiding the biodone call. + * here ourselves, and avoiding the xfs_buf_ioend call. * This is meant for userdata errors; metadata bufs come with * iodone functions attached, so that we can track down errors. */ @@ -1105,7 +1113,6 @@ xfs_bioerror_relse( XFS_BUF_DONE(bp); XFS_BUF_STALE(bp); XFS_BUF_CLR_IODONE_FUNC(bp); - XFS_BUF_CLR_BDSTRAT_FUNC(bp); if (!(fl & XBF_ASYNC)) { /* * Mark b_error and B_ERROR _both_. @@ -1133,7 +1140,7 @@ int xfs_bdstrat_cb( struct xfs_buf *bp) { - if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { + if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { trace_xfs_bdstrat_shut(bp, _RET_IP_); /* * Metadata write that didn't get logged but @@ -1235,7 +1242,7 @@ _xfs_buf_ioapply( if (bp->b_flags & XBF_ORDERED) { ASSERT(!(bp->b_flags & XBF_READ)); - rw = WRITE_BARRIER; + rw = WRITE_FLUSH_FUA; } else if (bp->b_flags & XBF_LOG_BUFFER) { ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); bp->b_flags &= ~_XBF_RUN_QUEUES; @@ -1311,8 +1318,19 @@ submit_io: if (size) goto next_chunk; } else { - bio_put(bio); + /* + * if we get here, no pages were added to the bio. However, + * we can't just error out here - if the pages are locked then + * we have to unlock them otherwise we can hang on a later + * access to the page. + */ xfs_buf_ioerror(bp, EIO); + if (bp->b_flags & _XBF_PAGE_LOCKED) { + int i; + for (i = 0; i < bp->b_page_count; i++) + unlock_page(bp->b_pages[i]); + } + bio_put(bio); } } @@ -1428,63 +1446,24 @@ xfs_buf_iomove( */ void xfs_wait_buftarg( - xfs_buftarg_t *btp) -{ - xfs_buf_t *bp, *n; - xfs_bufhash_t *hash; - uint i; - - for (i = 0; i < (1 << btp->bt_hashshift); i++) { - hash = &btp->bt_hash[i]; -again: - spin_lock(&hash->bh_lock); - list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { - ASSERT(btp == bp->b_target); - if (!(bp->b_flags & XBF_FS_MANAGED)) { - spin_unlock(&hash->bh_lock); - /* - * Catch superblock reference count leaks - * immediately - */ - BUG_ON(bp->b_bn == 0); - delay(100); - goto again; - } - } - spin_unlock(&hash->bh_lock); - } -} - -/* - * Allocate buffer hash table for a given target. - * For devices containing metadata (i.e. not the log/realtime devices) - * we need to allocate a much larger hash table. - */ -STATIC void -xfs_alloc_bufhash( - xfs_buftarg_t *btp, - int external) + struct xfs_buftarg *btp) { - unsigned int i; + struct xfs_perag *pag; + uint i; - btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ - btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; - btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * - sizeof(xfs_bufhash_t)); - for (i = 0; i < (1 << btp->bt_hashshift); i++) { - spin_lock_init(&btp->bt_hash[i].bh_lock); - INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); + for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { + pag = xfs_perag_get(btp->bt_mount, i); + spin_lock(&pag->pag_buf_lock); + while (rb_first(&pag->pag_buf_tree)) { + spin_unlock(&pag->pag_buf_lock); + delay(100); + spin_lock(&pag->pag_buf_lock); + } + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); } } -STATIC void -xfs_free_bufhash( - xfs_buftarg_t *btp) -{ - kmem_free_large(btp->bt_hash); - btp->bt_hash = NULL; -} - /* * buftarg list for delwrite queue processing */ @@ -1517,7 +1496,6 @@ xfs_free_buftarg( xfs_flush_buftarg(btp, 1); if (mp->m_flags & XFS_MOUNT_BARRIER) xfs_blkdev_issue_flush(btp); - xfs_free_bufhash(btp); iput(btp->bt_mapping->host); /* Unregister the buftarg first so that we don't get a @@ -1602,6 +1580,7 @@ xfs_mapping_buftarg( XFS_BUFTARG_NAME(btp)); return ENOMEM; } + inode->i_ino = get_next_ino(); inode->i_mode = S_IFBLK; inode->i_bdev = bdev; inode->i_rdev = bdev->bd_dev; @@ -1639,6 +1618,7 @@ out_error: xfs_buftarg_t * xfs_alloc_buftarg( + struct xfs_mount *mp, struct block_device *bdev, int external, const char *fsname) @@ -1647,6 +1627,7 @@ xfs_alloc_buftarg( btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); + btp->bt_mount = mp; btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; if (xfs_setsize_buftarg_early(btp, bdev)) @@ -1655,7 +1636,6 @@ xfs_alloc_buftarg( goto error; if (xfs_alloc_delwrite_queue(btp, fsname)) goto error; - xfs_alloc_bufhash(btp, external); return btp; error: @@ -1804,7 +1784,7 @@ xfs_buf_delwri_split( trace_xfs_buf_delwri_split(bp, _RET_IP_); ASSERT(bp->b_flags & XBF_DELWRI); - if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { + if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { if (!force && time_before(jiffies, bp->b_queuetime + age)) { xfs_buf_unlock(bp); @@ -1889,7 +1869,7 @@ xfsbufd( struct xfs_buf *bp; bp = list_first_entry(&tmp, struct xfs_buf, b_list); list_del_init(&bp->b_list); - xfs_buf_iostrategy(bp); + xfs_bdstrat_cb(bp); count++; } if (count) @@ -1936,7 +1916,7 @@ xfs_flush_buftarg( bp->b_flags &= ~XBF_ASYNC; list_add(&bp->b_list, &wait_list); } - xfs_buf_iostrategy(bp); + xfs_bdstrat_cb(bp); } if (wait) { @@ -1946,7 +1926,7 @@ xfs_flush_buftarg( bp = list_first_entry(&wait_list, struct xfs_buf, b_list); list_del_init(&bp->b_list); - xfs_iowait(bp); + xfs_buf_iowait(bp); xfs_buf_relse(bp); } } @@ -1962,7 +1942,8 @@ xfs_buf_init(void) if (!xfs_buf_zone) goto out; - xfslogd_workqueue = create_workqueue("xfslogd"); + xfslogd_workqueue = alloc_workqueue("xfslogd", + WQ_MEM_RECLAIM | WQ_HIGHPRI, 1); if (!xfslogd_workqueue) goto out_free_buf_zone; diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 5fbecefa5dfd..383a3f37cf98 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -44,57 +44,48 @@ typedef enum { XBRW_ZERO = 3, /* Zero target memory */ } xfs_buf_rw_t; -typedef enum { - XBF_READ = (1 << 0), /* buffer intended for reading from device */ - XBF_WRITE = (1 << 1), /* buffer intended for writing to device */ - XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */ - XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ - XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */ - XBF_DELWRI = (1 << 6), /* buffer has dirty pages */ - XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ - XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ - XBF_ORDERED = (1 << 11), /* use ordered writes */ - XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ - XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */ - - /* flags used only as arguments to access routines */ - XBF_LOCK = (1 << 14), /* lock requested */ - XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ - XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ - - /* flags used only internally */ - _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ - _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ - _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ - _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ +#define XBF_READ (1 << 0) /* buffer intended for reading from device */ +#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ +#define XBF_MAPPED (1 << 2) /* buffer mapped (b_addr valid) */ +#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ +#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ +#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ +#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ +#define XBF_ORDERED (1 << 11)/* use ordered writes */ +#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */ +#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */ + +/* flags used only as arguments to access routines */ +#define XBF_LOCK (1 << 14)/* lock requested */ +#define XBF_TRYLOCK (1 << 15)/* lock requested, but do not wait */ +#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ + +/* flags used only internally */ +#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */ +#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ +#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ +#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ - /* - * Special flag for supporting metadata blocks smaller than a FSB. - * - * In this case we can have multiple xfs_buf_t on a single page and - * need to lock out concurrent xfs_buf_t readers as they only - * serialise access to the buffer. - * - * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation - * between reads of the page. Hence we can have one thread read the - * page and modify it, but then race with another thread that thinks - * the page is not up-to-date and hence reads it again. - * - * The result is that the first modifcation to the page is lost. - * This sort of AGF/AGI reading race can happen when unlinking inodes - * that require truncation and results in the AGI unlinked list - * modifications being lost. - */ - _XBF_PAGE_LOCKED = (1 << 22), +/* + * Special flag for supporting metadata blocks smaller than a FSB. + * + * In this case we can have multiple xfs_buf_t on a single page and + * need to lock out concurrent xfs_buf_t readers as they only + * serialise access to the buffer. + * + * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation + * between reads of the page. Hence we can have one thread read the + * page and modify it, but then race with another thread that thinks + * the page is not up-to-date and hence reads it again. + * + * The result is that the first modifcation to the page is lost. + * This sort of AGF/AGI reading race can happen when unlinking inodes + * that require truncation and results in the AGI unlinked list + * modifications being lost. + */ +#define _XBF_PAGE_LOCKED (1 << 22) - /* - * If we try a barrier write, but it fails we have to communicate - * this to the upper layers. Unfortunately b_error gets overwritten - * when the buffer is re-issued so we have to add another flag to - * keep this information. - */ - _XFS_BARRIER_FAILED = (1 << 23), -} xfs_buf_flags_t; +typedef unsigned int xfs_buf_flags_t; #define XFS_BUF_FLAGS \ { XBF_READ, "READ" }, \ @@ -104,7 +95,6 @@ typedef enum { { XBF_DONE, "DONE" }, \ { XBF_DELWRI, "DELWRI" }, \ { XBF_STALE, "STALE" }, \ - { XBF_FS_MANAGED, "FS_MANAGED" }, \ { XBF_ORDERED, "ORDERED" }, \ { XBF_READ_AHEAD, "READ_AHEAD" }, \ { XBF_LOCK, "LOCK" }, /* should never be set */\ @@ -114,8 +104,7 @@ typedef enum { { _XBF_PAGES, "PAGES" }, \ { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \ - { _XFS_BARRIER_FAILED, "BARRIER_FAILED" } + { _XBF_PAGE_LOCKED, "PAGE_LOCKED" } typedef enum { @@ -132,15 +121,11 @@ typedef struct xfs_buftarg { dev_t bt_dev; struct block_device *bt_bdev; struct address_space *bt_mapping; + struct xfs_mount *bt_mount; unsigned int bt_bsize; unsigned int bt_sshift; size_t bt_smask; - /* per device buffer hash table */ - uint bt_hashmask; - uint bt_hashshift; - xfs_bufhash_t *bt_hash; - /* per device delwri queue */ struct task_struct *bt_task; struct list_head bt_list; @@ -168,35 +153,41 @@ typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); #define XB_PAGES 2 typedef struct xfs_buf { + /* + * first cacheline holds all the fields needed for an uncontended cache + * hit to be fully processed. The semaphore straddles the cacheline + * boundary, but the counter and lock sits on the first cacheline, + * which is the only bit that is touched if we hit the semaphore + * fast-path on locking. + */ + struct rb_node b_rbnode; /* rbtree node */ + xfs_off_t b_file_offset; /* offset in file */ + size_t b_buffer_length;/* size of buffer in bytes */ + atomic_t b_hold; /* reference count */ + xfs_buf_flags_t b_flags; /* status flags */ struct semaphore b_sema; /* semaphore for lockables */ - unsigned long b_queuetime; /* time buffer was queued */ - atomic_t b_pin_count; /* pin count */ + wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; - xfs_buf_flags_t b_flags; /* status flags */ - struct list_head b_hash_list; /* hash table list */ - xfs_bufhash_t *b_hash; /* hash table list start */ + struct xfs_perag *b_pag; /* contains rbtree root */ xfs_buftarg_t *b_target; /* buffer target (device) */ - atomic_t b_hold; /* reference count */ xfs_daddr_t b_bn; /* block number for I/O */ - xfs_off_t b_file_offset; /* offset in file */ - size_t b_buffer_length;/* size of buffer in bytes */ size_t b_count_desired;/* desired transfer size */ void *b_addr; /* virtual address of buffer */ struct work_struct b_iodone_work; - atomic_t b_io_remaining; /* #outstanding I/O requests */ xfs_buf_iodone_t b_iodone; /* I/O completion function */ xfs_buf_relse_t b_relse; /* releasing function */ - xfs_buf_bdstrat_t b_strat; /* pre-write function */ struct completion b_iowait; /* queue for I/O waiters */ void *b_fspriv; void *b_fspriv2; - struct xfs_mount *b_mount; - unsigned short b_error; /* error code on I/O */ - unsigned int b_page_count; /* size of page array */ - unsigned int b_offset; /* page offset in first page */ struct page **b_pages; /* array of page pointers */ struct page *b_page_array[XB_PAGES]; /* inline pages */ + unsigned long b_queuetime; /* time buffer was queued */ + atomic_t b_pin_count; /* pin count */ + atomic_t b_io_remaining; /* #outstanding I/O requests */ + unsigned int b_page_count; /* size of page array */ + unsigned int b_offset; /* page offset in first page */ + unsigned short b_error; /* error code on I/O */ #ifdef XFS_BUF_LOCK_TRACKING int b_last_holder; #endif @@ -215,11 +206,13 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, xfs_buf_flags_t); extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); -extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); +extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); extern void xfs_buf_hold(xfs_buf_t *); -extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t, - xfs_buf_flags_t); +extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t); +struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp, + struct xfs_buftarg *target, + xfs_daddr_t daddr, size_t length, int flags); /* Releasing Buffers */ extern void xfs_buf_free(xfs_buf_t *); @@ -244,11 +237,8 @@ extern int xfs_buf_iorequest(xfs_buf_t *); extern int xfs_buf_iowait(xfs_buf_t *); extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, xfs_buf_rw_t); - -static inline int xfs_buf_iostrategy(xfs_buf_t *bp) -{ - return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp); -} +#define xfs_buf_zero(bp, off, len) \ + xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) static inline int xfs_buf_geterror(xfs_buf_t *bp) { @@ -258,11 +248,6 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp) /* Buffer Utility Routines */ extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); -/* Pinning Buffer Storage in Memory */ -extern void xfs_buf_pin(xfs_buf_t *); -extern void xfs_buf_unpin(xfs_buf_t *); -extern int xfs_buf_ispin(xfs_buf_t *); - /* Delayed Write Buffer Routines */ extern void xfs_buf_delwri_dequeue(xfs_buf_t *); extern void xfs_buf_delwri_promote(xfs_buf_t *); @@ -288,8 +273,6 @@ extern void xfs_buf_terminate(void); XFS_BUF_DONE(bp); \ } while (0) -#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) - #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) @@ -326,8 +309,6 @@ extern void xfs_buf_terminate(void); #define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone) #define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func)) #define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL) -#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func)) -#define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL) #define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv) #define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) @@ -351,7 +332,7 @@ extern void xfs_buf_terminate(void); #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) #define XFS_BUF_SET_REF(bp, ref) do { } while (0) -#define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp) +#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) #define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp) #define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) @@ -370,27 +351,11 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) xfs_buf_rele(bp); } -#define xfs_bpin(bp) xfs_buf_pin(bp) -#define xfs_bunpin(bp) xfs_buf_unpin(bp) -#define xfs_biodone(bp) xfs_buf_ioend(bp, 0) - -#define xfs_biomove(bp, off, len, data, rw) \ - xfs_buf_iomove((bp), (off), (len), (data), \ - ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ) - -#define xfs_biozero(bp, off, len) \ - xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) - -#define xfs_iowait(bp) xfs_buf_iowait(bp) - -#define xfs_baread(target, rablkno, ralen) \ - xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK) - - /* * Handling of buftargs. */ -extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *); +extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, + struct block_device *, int, const char *); extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h deleted file mode 100644 index 55bddf3b6091..000000000000 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_CRED_H__ -#define __XFS_CRED_H__ - -#include <linux/capability.h> - -/* - * Credentials - */ -typedef const struct cred cred_t; - -#endif /* __XFS_CRED_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_dmapi_priv.h b/fs/xfs/linux-2.6/xfs_dmapi_priv.h deleted file mode 100644 index a8b0b1685eed..000000000000 --- a/fs/xfs/linux-2.6/xfs_dmapi_priv.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DMAPI_PRIV_H__ -#define __XFS_DMAPI_PRIV_H__ - -/* - * Based on IO_ISDIRECT, decide which i_ flag is set. - */ -#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ - DM_FLAGS_IMUX : 0) -#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX) - -#endif /*__XFS_DMAPI_PRIV_H__*/ diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index e7839ee49e43..3764d74790ec 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -23,13 +23,13 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_export.h" #include "xfs_vnodeops.h" #include "xfs_bmap_btree.h" #include "xfs_inode.h" #include "xfs_inode_item.h" +#include "xfs_trace.h" /* * Note that we only accept fileids which are long enough rather than allow @@ -132,8 +132,7 @@ xfs_nfs_get_inode( * fine and not an indication of a corrupted filesystem as clients can * send invalid file handles and we have to handle it gracefully.. */ - error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, - XFS_ILOCK_SHARED, &ip); + error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip); if (error) { /* * EINVAL means the inode cluster doesn't exist anymore. @@ -148,11 +147,10 @@ xfs_nfs_get_inode( } if (ip->i_d.di_gen != generation) { - xfs_iput_new(ip, XFS_ILOCK_SHARED); + IRELE(ip); return ERR_PTR(-ENOENT); } - xfs_iunlock(ip, XFS_ILOCK_SHARED); return VFS_I(ip); } diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 257a56b127cf..ba8ad422a165 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -22,23 +22,15 @@ #include "xfs_inum.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_trans.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" #include "xfs_alloc.h" -#include "xfs_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_vnodeops.h" #include "xfs_da_btree.h" #include "xfs_ioctl.h" @@ -108,7 +100,7 @@ xfs_file_fsync( int error = 0; int log_flushed = 0; - xfs_itrace_entry(ip); + trace_xfs_file_fsync(ip); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -XFS_ERROR(EIO); @@ -166,8 +158,7 @@ xfs_file_fsync( * transaction. So we play it safe and fire off the * transaction anyway. */ - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = _xfs_trans_commit(tp, 0, &log_flushed); @@ -275,20 +266,6 @@ xfs_file_aio_read( mutex_lock(&inode->i_mutex); xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { - int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); - int iolock = XFS_IOLOCK_SHARED; - - ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size, - dmflags, &iolock); - if (ret) { - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - if (unlikely(ioflags & IO_ISDIRECT)) - mutex_unlock(&inode->i_mutex); - return ret; - } - } - if (unlikely(ioflags & IO_ISDIRECT)) { if (inode->i_mapping->nrpages) { ret = -xfs_flushinval_pages(ip, @@ -321,7 +298,6 @@ xfs_file_splice_read( unsigned int flags) { struct xfs_inode *ip = XFS_I(infilp->f_mapping->host); - struct xfs_mount *mp = ip->i_mount; int ioflags = 0; ssize_t ret; @@ -335,18 +311,6 @@ xfs_file_splice_read( xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { - int iolock = XFS_IOLOCK_SHARED; - int error; - - error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count, - FILP_DELAY_FLAG(infilp), &iolock); - if (error) { - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - return -error; - } - } - trace_xfs_file_splice_read(ip, count, *ppos, ioflags); ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); @@ -367,7 +331,6 @@ xfs_file_splice_write( { struct inode *inode = outfilp->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; xfs_fsize_t isize, new_size; int ioflags = 0; ssize_t ret; @@ -382,18 +345,6 @@ xfs_file_splice_write( xfs_ilock(ip, XFS_IOLOCK_EXCL); - if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { - int iolock = XFS_IOLOCK_EXCL; - int error; - - error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count, - FILP_DELAY_FLAG(outfilp), &iolock); - if (error) { - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return -error; - } - } - new_size = *ppos + count; xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -463,7 +414,7 @@ xfs_zero_last_block( last_fsb = XFS_B_TO_FSBT(mp, isize); nimaps = 1; error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, - &nimaps, NULL, NULL); + &nimaps, NULL); if (error) { return error; } @@ -558,7 +509,7 @@ xfs_zero_eof( nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, - 0, NULL, 0, &imap, &nimaps, NULL, NULL); + 0, NULL, 0, &imap, &nimaps, NULL); if (error) { ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); return error; @@ -627,7 +578,6 @@ xfs_file_aio_write( int ioflags = 0; xfs_fsize_t isize, new_size; int iolock; - int eventsent = 0; size_t ocount = 0, count; int need_i_mutex; @@ -673,33 +623,6 @@ start: goto out_unlock_mutex; } - if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && - !(ioflags & IO_INVIS) && !eventsent)) { - int dmflags = FILP_DELAY_FLAG(file); - - if (need_i_mutex) - dmflags |= DM_FLAGS_IMUX; - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip, - pos, count, dmflags, &iolock); - if (error) { - goto out_unlock_internal; - } - xfs_ilock(ip, XFS_ILOCK_EXCL); - eventsent = 1; - - /* - * The iolock was dropped and reacquired in XFS_SEND_DATA - * so we have to recheck the size when appending. - * We will only "goto start;" once, since having sent the - * event prevents another call to XFS_SEND_DATA, which is - * what allows the size to change in the first place. - */ - if ((file->f_flags & O_APPEND) && pos != ip->i_size) - goto start; - } - if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? @@ -830,22 +753,6 @@ write_retry: xfs_iunlock(ip, XFS_ILOCK_EXCL); } - if (ret == -ENOSPC && - DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { - xfs_iunlock(ip, iolock); - if (need_i_mutex) - mutex_unlock(&inode->i_mutex); - error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip, - DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL, - 0, 0, 0); /* Delay flag intentionally unused */ - if (need_i_mutex) - mutex_lock(&inode->i_mutex); - xfs_ilock(ip, iolock); - if (error) - goto out_unlock_internal; - goto start; - } - error = -ret; if (ret <= 0) goto out_unlock_internal; @@ -1014,9 +921,6 @@ const struct file_operations xfs_file_operations = { .open = xfs_file_open, .release = xfs_file_release, .fsync = xfs_file_fsync, -#ifdef HAVE_FOP_OPEN_EXEC - .open_exec = xfs_file_open_exec, -#endif }; const struct file_operations xfs_dir_file_operations = { diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index b6918d76bc7b..ed88ed16811c 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -21,10 +21,6 @@ #include "xfs_inode.h" #include "xfs_trace.h" -int fs_noerr(void) { return 0; } -int fs_nosys(void) { return ENOSYS; } -void fs_noval(void) { return; } - /* * note: all filemap functions return negative error codes. These * need to be inverted before returning to the xfs core functions. @@ -36,10 +32,9 @@ xfs_tosspages( xfs_off_t last, int fiopt) { - struct address_space *mapping = VFS_I(ip)->i_mapping; - - if (mapping->nrpages) - truncate_inode_pages(mapping, first); + /* can't toss partial tail pages, so mask them out */ + last &= ~(PAGE_SIZE - 1); + truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1); } int @@ -54,12 +49,11 @@ xfs_flushinval_pages( trace_xfs_pagecache_inval(ip, first, last); - if (mapping->nrpages) { - xfs_iflags_clear(ip, XFS_ITRUNCATED); - ret = filemap_write_and_wait(mapping); - if (!ret) - truncate_inode_pages(mapping, first); - } + xfs_iflags_clear(ip, XFS_ITRUNCATED); + ret = filemap_write_and_wait_range(mapping, first, + last == -1 ? LLONG_MAX : last); + if (!ret) + truncate_inode_pages_range(mapping, first, last); return -ret; } @@ -75,10 +69,9 @@ xfs_flush_pages( int ret = 0; int ret2; - if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { - xfs_iflags_clear(ip, XFS_ITRUNCATED); - ret = -filemap_fdatawrite(mapping); - } + xfs_iflags_clear(ip, XFS_ITRUNCATED); + ret = -filemap_fdatawrite_range(mapping, first, + last == -1 ? LLONG_MAX : last); if (flags & XBF_ASYNC) return ret; ret2 = xfs_wait_on_pages(ip, first, last); @@ -95,7 +88,9 @@ xfs_wait_on_pages( { struct address_space *mapping = VFS_I(ip)->i_mapping; - if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) - return -filemap_fdatawait(mapping); + if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { + return -filemap_fdatawait_range(mapping, first, + last == -1 ? ip->i_size - 1 : last); + } return 0; } diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h deleted file mode 100644 index 82bb19b2599e..000000000000 --- a/fs/xfs/linux-2.6/xfs_fs_subr.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_FS_SUBR_H__ -#define __XFS_FS_SUBR_H__ - -extern int fs_noerr(void); -extern int fs_nosys(void); -extern void fs_noval(void); - -#endif /* __XFS_FS_SUBR_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index 2ae8b1ccb02e..76e81cff70b9 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -16,7 +16,6 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_cred.h" #include "xfs_sysctl.h" /* diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h deleted file mode 100644 index 69f71caf061c..000000000000 --- a/fs/xfs/linux-2.6/xfs_globals.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_GLOBALS_H__ -#define __XFS_GLOBALS_H__ - -extern uint64_t xfs_panic_mask; /* set to cause more panics */ - -#endif /* __XFS_GLOBALS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index e59a81062830..2ea238f6d38e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -23,24 +23,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_ioctl.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_rtalloc.h" #include "xfs_itable.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_bmap.h" #include "xfs_buf_item.h" @@ -794,10 +785,12 @@ xfs_ioc_fsgetxattr( { struct fsxattr fa; + memset(&fa, 0, sizeof(struct fsxattr)); + xfs_ilock(ip, XFS_ILOCK_SHARED); fa.fsx_xflags = xfs_ip2xflags(ip); fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; - fa.fsx_projid = ip->i_d.di_projid; + fa.fsx_projid = xfs_get_projid(ip); if (attr) { if (ip->i_afp) { @@ -908,7 +901,7 @@ xfs_ioctl_setattr( struct xfs_dquot *olddquot = NULL; int code; - xfs_itrace_entry(ip); + trace_xfs_ioctl_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); @@ -916,6 +909,13 @@ xfs_ioctl_setattr( return XFS_ERROR(EIO); /* + * Disallow 32bit project ids when projid32bit feature is not enabled. + */ + if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && + !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) + return XFS_ERROR(EINVAL); + + /* * If disk quotas is on, we make sure that the dquots do exist on disk, * before we start any other transactions. Trying to do this later * is messy. We don't care to take a readlock to look at the ids @@ -961,7 +961,7 @@ xfs_ioctl_setattr( if (mask & FSX_PROJID) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && - ip->i_d.di_projid != fa->fsx_projid) { + xfs_get_projid(ip) != fa->fsx_projid) { ASSERT(tp); code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? @@ -1043,8 +1043,7 @@ xfs_ioctl_setattr( } } - xfs_trans_ijoin(tp, ip, lock_flags); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); /* * Change file ownership. Must be the owner or privileged. @@ -1064,12 +1063,12 @@ xfs_ioctl_setattr( * Change the ownerships and register quota modifications * in the transaction. */ - if (ip->i_d.di_projid != fa->fsx_projid) { + if (xfs_get_projid(ip) != fa->fsx_projid) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } - ip->i_d.di_projid = fa->fsx_projid; + xfs_set_projid(ip, fa->fsx_projid); /* * We may have to rev the inode as well as @@ -1089,8 +1088,8 @@ xfs_ioctl_setattr( xfs_diflags_to_linux(ip); } + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_ichgtime(ip, XFS_ICHGTIME_CHG); XFS_STATS_INC(xs_ig_attrchg); @@ -1116,16 +1115,7 @@ xfs_ioctl_setattr( xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); - if (code) - return code; - - if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) { - XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL, - NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0, - (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0); - } - - return 0; + return code; error_return: xfs_qm_dqrele(udqp); @@ -1301,7 +1291,7 @@ xfs_file_ioctl( if (filp->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; - xfs_itrace_entry(ip); + trace_xfs_file_ioctl(ip); switch (cmd) { case XFS_IOC_ALLOCSP: @@ -1311,7 +1301,8 @@ xfs_file_ioctl( case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: { + case XFS_IOC_UNRESVSP64: + case XFS_IOC_ZERO_RANGE: { xfs_flock64_t bf; if (copy_from_user(&bf, arg, sizeof(bf))) diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 52ed49e6465c..b3486dfa5520 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -28,12 +28,8 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_dir2_sf.h" #include "xfs_vnode.h" #include "xfs_dinode.h" #include "xfs_inode.h" @@ -168,7 +164,8 @@ xfs_ioctl32_bstat_copyin( get_user(bstat->bs_extsize, &bstat32->bs_extsize) || get_user(bstat->bs_extents, &bstat32->bs_extents) || get_user(bstat->bs_gen, &bstat32->bs_gen) || - get_user(bstat->bs_projid, &bstat32->bs_projid) || + get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) || + get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) || get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || get_user(bstat->bs_aextents, &bstat32->bs_aextents)) @@ -222,6 +219,7 @@ xfs_bulkstat_one_fmt_compat( put_user(buffer->bs_extents, &p32->bs_extents) || put_user(buffer->bs_gen, &p32->bs_gen) || put_user(buffer->bs_projid, &p32->bs_projid) || + put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) || put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || put_user(buffer->bs_dmstate, &p32->bs_dmstate) || put_user(buffer->bs_aextents, &p32->bs_aextents)) @@ -544,7 +542,7 @@ xfs_file_compat_ioctl( if (filp->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; - xfs_itrace_entry(ip); + trace_xfs_file_compat_ioctl(ip); switch (cmd) { /* No size or alignment issues on any arch */ @@ -578,6 +576,7 @@ xfs_file_compat_ioctl( case XFS_IOC_FSGEOMETRY_V1: case XFS_IOC_FSGROWFSDATA: case XFS_IOC_FSGROWFSRT: + case XFS_IOC_ZERO_RANGE: return xfs_file_ioctl(filp, cmd, p); #else case XFS_IOC_ALLOCSP_32: diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 1024c4f8ba0d..08b605792a99 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -65,8 +65,10 @@ typedef struct compat_xfs_bstat { __s32 bs_extsize; /* extent size */ __s32 bs_extents; /* number of extents */ __u32 bs_gen; /* generation count */ - __u16 bs_projid; /* project id */ - unsigned char bs_pad[14]; /* pad space, unused */ + __u16 bs_projid_lo; /* lower part of project id */ +#define bs_projid bs_projid_lo /* (previously just bs_projid) */ + __u16 bs_projid_hi; /* high part of project id */ + unsigned char bs_pad[12]; /* pad space, unused */ __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 44f0b2de153e..96107efc0c61 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -24,21 +24,13 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_itable.h" @@ -88,7 +80,7 @@ xfs_mark_inode_dirty_sync( { struct inode *inode = VFS_I(ip); - if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) + if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) mark_inode_dirty_sync(inode); } @@ -98,46 +90,11 @@ xfs_mark_inode_dirty( { struct inode *inode = VFS_I(ip); - if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) + if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) mark_inode_dirty(inode); } /* - * Change the requested timestamp in the given inode. - * We don't lock across timestamp updates, and we don't log them but - * we do record the fact that there is dirty information in core. - */ -void -xfs_ichgtime( - xfs_inode_t *ip, - int flags) -{ - struct inode *inode = VFS_I(ip); - timespec_t tv; - int sync_it = 0; - - tv = current_fs_time(inode->i_sb); - - if ((flags & XFS_ICHGTIME_MOD) && - !timespec_equal(&inode->i_mtime, &tv)) { - inode->i_mtime = tv; - sync_it = 1; - } - if ((flags & XFS_ICHGTIME_CHG) && - !timespec_equal(&inode->i_ctime, &tv)) { - inode->i_ctime = tv; - sync_it = 1; - } - - /* - * Update complete - now make sure everyone knows that the inode - * is dirty. - */ - if (sync_it) - xfs_mark_inode_dirty_sync(ip); -} - -/* * Hook in SELinux. This is not quite correct yet, what we really need * here (as we do for default ACLs) is a mechanism by which creation of * these attrs can be journalled at inode creation time (along with the @@ -232,7 +189,7 @@ xfs_vn_mknod( } xfs_dentry_to_name(&name, dentry); - error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); + error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); if (unlikely(error)) goto out_free_acl; @@ -360,7 +317,7 @@ xfs_vn_link( if (unlikely(error)) return -error; - atomic_inc(&inode->i_count); + ihold(inode); d_instantiate(dentry, inode); return 0; } @@ -405,7 +362,7 @@ xfs_vn_symlink( (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); xfs_dentry_to_name(&name, dentry); - error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL); + error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); if (unlikely(error)) goto out; @@ -496,7 +453,7 @@ xfs_vn_getattr( struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - xfs_itrace_entry(ip); + trace_xfs_getattr(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -548,21 +505,6 @@ xfs_vn_setattr( return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); } -/* - * block_truncate_page can return an error, but we can't propagate it - * at all here. Leave a complaint + stack trace in the syslog because - * this could be bad. If it is bad, we need to propagate the error further. - */ -STATIC void -xfs_vn_truncate( - struct inode *inode) -{ - int error; - error = block_truncate_page(inode->i_mapping, inode->i_size, - xfs_get_blocks); - WARN_ON(error); -} - STATIC long xfs_vn_fallocate( struct inode *inode, @@ -687,7 +629,7 @@ xfs_vn_fiemap( fieinfo->fi_extents_max + 1; bm.bmv_count = min_t(__s32, bm.bmv_count, (PAGE_SIZE * 16 / sizeof(struct getbmapx))); - bm.bmv_iflags = BMV_IF_PREALLOC; + bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES; if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) bm.bmv_iflags |= BMV_IF_ATTRFORK; if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) @@ -702,7 +644,6 @@ xfs_vn_fiemap( static const struct inode_operations xfs_inode_operations = { .check_acl = xfs_check_acl, - .truncate = xfs_vn_truncate, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -819,7 +760,9 @@ xfs_setup_inode( inode->i_ino = ip->i_ino; inode->i_state = I_NEW; - inode_add_to_lists(ip->i_mount->m_super, inode); + + inode_sb_list_add(inode); + insert_inode_hash(inode); inode->i_mode = ip->i_d.di_mode; inode->i_nlink = ip->i_d.di_nlink; diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index facfb323a706..214ddd71ff79 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -71,6 +71,7 @@ #include <linux/random.h> #include <linux/ctype.h> #include <linux/writeback.h> +#include <linux/capability.h> #include <asm/page.h> #include <asm/div64.h> @@ -79,15 +80,12 @@ #include <asm/byteorder.h> #include <asm/unaligned.h> -#include <xfs_cred.h> #include <xfs_vnode.h> #include <xfs_stats.h> #include <xfs_sysctl.h> #include <xfs_iops.h> #include <xfs_aops.h> #include <xfs_super.h> -#include <xfs_globals.h> -#include <xfs_fs_subr.h> #include <xfs_buf.h> /* @@ -145,7 +143,7 @@ #define SYNCHRONIZE() barrier() #define __return_address __builtin_return_address(0) -#define dfltprid 0 +#define XFS_PROJID_DEFAULT 0 #define MAXPATHLEN 1024 #define MIN(a,b) (min(a,b)) @@ -157,8 +155,6 @@ */ #define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) #define xfs_stack_trace() dump_stack() -#define xfs_itruncate_data(ip, off) \ - (-vmtruncate(VFS_I(ip), (off))) /* Move the kernel do_div definition off to one side */ diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 067cafbfc635..29b9d642e93d 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -16,7 +16,6 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_dmapi.h" #include "xfs_sb.h" #include "xfs_inum.h" #include "xfs_log.h" @@ -69,15 +68,15 @@ xfs_fs_set_xstate( if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; - if (uflags & XFS_QUOTA_UDQ_ACCT) + if (uflags & FS_QUOTA_UDQ_ACCT) flags |= XFS_UQUOTA_ACCT; - if (uflags & XFS_QUOTA_PDQ_ACCT) + if (uflags & FS_QUOTA_PDQ_ACCT) flags |= XFS_PQUOTA_ACCT; - if (uflags & XFS_QUOTA_GDQ_ACCT) + if (uflags & FS_QUOTA_GDQ_ACCT) flags |= XFS_GQUOTA_ACCT; - if (uflags & XFS_QUOTA_UDQ_ENFD) + if (uflags & FS_QUOTA_UDQ_ENFD) flags |= XFS_UQUOTA_ENFD; - if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD)) + if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD)) flags |= XFS_OQUOTA_ENFD; switch (op) { diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 80938c736c27..9f3a78fe6ae4 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -25,14 +25,11 @@ #include "xfs_ag.h" #include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" @@ -43,12 +40,10 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_fsops.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_utils.h" #include "xfs_vnodeops.h" -#include "xfs_version.h" #include "xfs_log_priv.h" #include "xfs_trans_priv.h" #include "xfs_filestream.h" @@ -94,7 +89,6 @@ mempool_t *xfs_ioend_pool; #define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and * unwritten extent conversion */ #define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ -#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ #define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ #define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ #define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ @@ -116,9 +110,6 @@ mempool_t *xfs_ioend_pool; #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ -#define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ -#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ -#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ #define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ @@ -172,15 +163,13 @@ suffix_strtoul(char *s, char **endp, unsigned int base) STATIC int xfs_parseargs( struct xfs_mount *mp, - char *options, - char **mtpt) + char *options) { struct super_block *sb = mp->m_super; char *this_char, *value, *eov; int dsunit = 0; int dswidth = 0; int iosize = 0; - int dmapi_implies_ikeep = 1; __uint8_t iosizelog = 0; /* @@ -243,15 +232,10 @@ xfs_parseargs( if (!mp->m_logname) return ENOMEM; } else if (!strcmp(this_char, MNTOPT_MTPT)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL); - if (!*mtpt) - return ENOMEM; + cmn_err(CE_WARN, + "XFS: %s option not allowed on this system", + this_char); + return EINVAL; } else if (!strcmp(this_char, MNTOPT_RTDEV)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -288,8 +272,6 @@ xfs_parseargs( mp->m_flags &= ~XFS_MOUNT_GRPID; } else if (!strcmp(this_char, MNTOPT_WSYNC)) { mp->m_flags |= XFS_MOUNT_WSYNC; - } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { - mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { mp->m_flags |= XFS_MOUNT_NORECOVERY; } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { @@ -329,7 +311,6 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_IKEEP)) { mp->m_flags |= XFS_MOUNT_IKEEP; } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { - dmapi_implies_ikeep = 0; mp->m_flags &= ~XFS_MOUNT_IKEEP; } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; @@ -370,12 +351,6 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); mp->m_qflags &= ~XFS_OQUOTA_ENFD; - } else if (!strcmp(this_char, MNTOPT_DMAPI)) { - mp->m_flags |= XFS_MOUNT_DMAPI; - } else if (!strcmp(this_char, MNTOPT_XDSM)) { - mp->m_flags |= XFS_MOUNT_DMAPI; - } else if (!strcmp(this_char, MNTOPT_DMI)) { - mp->m_flags |= XFS_MOUNT_DMAPI; } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { mp->m_flags |= XFS_MOUNT_DELAYLOG; cmn_err(CE_WARN, @@ -387,9 +362,11 @@ xfs_parseargs( cmn_err(CE_WARN, "XFS: ihashsize no longer used, option is deprecated."); } else if (!strcmp(this_char, "osyncisdsync")) { - /* no-op, this is now the default */ cmn_err(CE_WARN, - "XFS: osyncisdsync is now the default, option is deprecated."); + "XFS: osyncisdsync has no effect, option is deprecated."); + } else if (!strcmp(this_char, "osyncisosync")) { + cmn_err(CE_WARN, + "XFS: osyncisosync has no effect, option is deprecated."); } else if (!strcmp(this_char, "irixsgid")) { cmn_err(CE_WARN, "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); @@ -430,12 +407,6 @@ xfs_parseargs( return EINVAL; } - if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) { - printk("XFS: %s option needs the mount point option as well\n", - MNTOPT_DMAPI); - return EINVAL; - } - if ((dsunit && !dswidth) || (!dsunit && dswidth)) { cmn_err(CE_WARN, "XFS: sunit and swidth must be specified together"); @@ -449,18 +420,6 @@ xfs_parseargs( return EINVAL; } - /* - * Applications using DMI filesystems often expect the - * inode generation number to be monotonically increasing. - * If we delete inode chunks we break this assumption, so - * keep unused inode chunks on disk for DMI filesystems - * until we come up with a better solution. - * Note that if "ikeep" or "noikeep" mount options are - * supplied, then they are honored. - */ - if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep) - mp->m_flags |= XFS_MOUNT_IKEEP; - done: if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { /* @@ -539,10 +498,8 @@ xfs_showargs( { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, - { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC }, { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, - { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI }, { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, { 0, NULL } @@ -619,7 +576,7 @@ xfs_max_file_offset( /* Figure out maximum filesize, on Linux this can depend on * the filesystem blocksize (on 32 bit platforms). - * __block_prepare_write does this in an [unsigned] long... + * __block_write_begin does this in an [unsigned] long... * page->index << (PAGE_CACHE_SHIFT - bbits) * So, for page sized blocks (4K on 32 bit platforms), * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is @@ -687,7 +644,7 @@ xfs_barrier_test( XFS_BUF_ORDERED(sbp); xfsbdstrat(mp, sbp); - error = xfs_iowait(sbp); + error = xfs_buf_iowait(sbp); /* * Clear all the flags we set and possible error state in the @@ -735,8 +692,7 @@ void xfs_blkdev_issue_flush( xfs_buftarg_t *buftarg) { - blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); } STATIC void @@ -800,18 +756,20 @@ xfs_open_devices( * Setup xfs_mount buffer target pointers */ error = ENOMEM; - mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname); + mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname); if (!mp->m_ddev_targp) goto out_close_rtdev; if (rtdev) { - mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname); + mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1, + mp->m_fsname); if (!mp->m_rtdev_targp) goto out_free_ddev_targ; } if (logdev && logdev != ddev) { - mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname); + mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1, + mp->m_fsname); if (!mp->m_logdev_targp) goto out_free_rtdev_targ; } else { @@ -947,7 +905,7 @@ xfs_fs_destroy_inode( { struct xfs_inode *ip = XFS_I(inode); - xfs_itrace_entry(ip); + trace_xfs_destroy_inode(ip); XFS_STATS_INC(vn_reclaim); @@ -1014,12 +972,7 @@ xfs_fs_inode_init_once( /* * Dirty the XFS inode when mark_inode_dirty_sync() is called so that - * we catch unlogged VFS level updates to the inode. Care must be taken - * here - the transaction code calls mark_inode_dirty_sync() to mark the - * VFS inode dirty in a transaction and clears the i_update_core field; - * it must clear the field after calling mark_inode_dirty_sync() to - * correctly indicate that the dirty state has been propagated into the - * inode log item. + * we catch unlogged VFS level updates to the inode. * * We need the barrier() to maintain correct ordering between unlogged * updates and the transaction commit code that clears the i_update_core @@ -1063,10 +1016,8 @@ xfs_log_inode( * an inode in another recent transaction. So we play it safe and * fire off the transaction anyway. */ - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); xfs_ilock_demote(ip, XFS_ILOCK_EXCL); @@ -1082,27 +1033,18 @@ xfs_fs_write_inode( struct xfs_mount *mp = ip->i_mount; int error = EAGAIN; - xfs_itrace_entry(ip); + trace_xfs_write_inode(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); if (wbc->sync_mode == WB_SYNC_ALL) { /* - * Make sure the inode has hit stable storage. By using the - * log and the fsync transactions we reduce the IOs we have - * to do here from two (log and inode) to just the log. - * - * Note: We still need to do a delwri write of the inode after - * this to flush it to the backing buffer so that bulkstat - * works properly if this is the first time the inode has been - * written. Because we hold the ilock atomically over the - * transaction commit and the inode flush we are guaranteed - * that the inode is not pinned when it returns. If the flush - * lock is already held, then the inode has already been - * flushed once and we don't need to flush it again. Hence - * the code will only flush the inode if it isn't already - * being flushed. + * Make sure the inode has made it it into the log. Instead + * of forcing it all the way to stable storage using a + * synchronous transaction we let the log force inside the + * ->sync_fs call do that for thus, which reduces the number + * of synchronous log foces dramatically. */ xfs_ioend_wait(ip); xfs_ilock(ip, XFS_ILOCK_SHARED); @@ -1116,27 +1058,29 @@ xfs_fs_write_inode( * We make this non-blocking if the inode is contended, return * EAGAIN to indicate to the caller that they did not succeed. * This prevents the flush path from blocking on inodes inside - * another operation right now, they get caught later by xfs_sync. + * another operation right now, they get caught later by + * xfs_sync. */ if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) goto out; - } - if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) - goto out_unlock; + if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) + goto out_unlock; - /* - * Now we have the flush lock and the inode is not pinned, we can check - * if the inode is really clean as we know that there are no pending - * transaction completions, it is not waiting on the delayed write - * queue and there is no IO in progress. - */ - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - error = 0; - goto out_unlock; + /* + * Now we have the flush lock and the inode is not pinned, we + * can check if the inode is really clean as we know that + * there are no pending transaction completions, it is not + * waiting on the delayed write queue and there is no IO in + * progress. + */ + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + error = 0; + goto out_unlock; + } + error = xfs_iflush(ip, 0); } - error = xfs_iflush(ip, 0); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -1151,12 +1095,15 @@ xfs_fs_write_inode( } STATIC void -xfs_fs_clear_inode( +xfs_fs_evict_inode( struct inode *inode) { xfs_inode_t *ip = XFS_I(inode); - xfs_itrace_entry(ip); + trace_xfs_evict_inode(ip); + + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); XFS_STATS_INC(vn_rele); XFS_STATS_INC(vn_remove); XFS_STATS_DEC(vn_active); @@ -1193,22 +1140,13 @@ xfs_fs_put_super( { struct xfs_mount *mp = XFS_M(sb); + /* + * Unregister the memory shrinker before we tear down the mount + * structure so we don't have memory reclaim racing with us here. + */ + xfs_inode_shrinker_unregister(mp); xfs_syncd_stop(mp); - if (!(sb->s_flags & MS_RDONLY)) { - /* - * XXX(hch): this should be SYNC_WAIT. - * - * Or more likely not needed at all because the VFS is already - * calling ->sync_fs after shutting down all filestem - * operations and just before calling ->put_super. - */ - xfs_sync_data(mp, 0); - xfs_sync_attr(mp, 0); - } - - XFS_SEND_PREUNMOUNT(mp); - /* * Blow away any referenced inode in the filestreams cache. * This can and will cause log traffic as inodes go inactive @@ -1218,14 +1156,10 @@ xfs_fs_put_super( XFS_bflush(mp->m_ddev_targp); - XFS_SEND_UNMOUNT(mp); - xfs_unmountfs(mp); xfs_freesb(mp); - xfs_inode_shrinker_unregister(mp); xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); - xfs_dmops_put(mp); xfs_free_fsname(mp); kfree(mp); } @@ -1287,6 +1221,7 @@ xfs_fs_statfs( struct xfs_inode *ip = XFS_I(dentry->d_inode); __uint64_t fakeinos, id; xfs_extlen_t lsize; + __int64_t ffree; statp->f_type = XFS_SB_MAGIC; statp->f_namelen = MAXNAMELEN - 1; @@ -1310,7 +1245,11 @@ xfs_fs_statfs( statp->f_files = min_t(typeof(statp->f_files), statp->f_files, mp->m_maxicount); - statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); + + /* make sure statp->f_ffree does not underflow */ + ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); + statp->f_ffree = max_t(__int64_t, ffree, 0); + spin_unlock(&mp->m_sb_lock); if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || @@ -1463,7 +1402,7 @@ xfs_fs_freeze( xfs_save_resvblks(mp); xfs_quiesce_attr(mp); - return -xfs_fs_log_dummy(mp); + return -xfs_fs_log_dummy(mp, SYNC_WAIT); } STATIC int @@ -1543,7 +1482,6 @@ xfs_fs_fill_super( struct inode *root; struct xfs_mount *mp = NULL; int flags = 0, error = ENOMEM; - char *mtpt = NULL; mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); if (!mp) @@ -1559,7 +1497,7 @@ xfs_fs_fill_super( mp->m_super = sb; sb->s_fs_info = mp; - error = xfs_parseargs(mp, (char *)data, &mtpt); + error = xfs_parseargs(mp, (char *)data); if (error) goto out_free_fsname; @@ -1571,19 +1509,16 @@ xfs_fs_fill_super( #endif sb->s_op = &xfs_super_operations; - error = xfs_dmops_get(mp); - if (error) - goto out_free_fsname; - if (silent) flags |= XFS_MFSI_QUIET; error = xfs_open_devices(mp); if (error) - goto out_put_dmops; + goto out_free_fsname; - if (xfs_icsb_init_counters(mp)) - mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; + error = xfs_icsb_init_counters(mp); + if (error) + goto out_close_devices; error = xfs_readsb(mp, flags); if (error) @@ -1608,8 +1543,6 @@ xfs_fs_fill_super( if (error) goto out_filestream_unmount; - XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname); - sb->s_magic = XFS_SB_MAGIC; sb->s_blocksize = mp->m_sb.sb_blocksize; sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; @@ -1638,7 +1571,6 @@ xfs_fs_fill_super( xfs_inode_shrinker_register(mp); - kfree(mtpt); return 0; out_filestream_unmount: @@ -1647,12 +1579,10 @@ xfs_fs_fill_super( xfs_freesb(mp); out_destroy_counters: xfs_icsb_destroy_counters(mp); + out_close_devices: xfs_close_devices(mp); - out_put_dmops: - xfs_dmops_put(mp); out_free_fsname: xfs_free_fsname(mp); - kfree(mtpt); kfree(mp); out: return -error; @@ -1679,16 +1609,14 @@ xfs_fs_fill_super( goto out_free_sb; } -STATIC int -xfs_fs_get_sb( +STATIC struct dentry * +xfs_fs_mount( struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, - struct vfsmount *mnt) + void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); } static const struct super_operations xfs_super_operations = { @@ -1696,7 +1624,7 @@ static const struct super_operations xfs_super_operations = { .destroy_inode = xfs_fs_destroy_inode, .dirty_inode = xfs_fs_dirty_inode, .write_inode = xfs_fs_write_inode, - .clear_inode = xfs_fs_clear_inode, + .evict_inode = xfs_fs_evict_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, @@ -1709,7 +1637,7 @@ static const struct super_operations xfs_super_operations = { static struct file_system_type xfs_fs_type = { .owner = THIS_MODULE, .name = "xfs", - .get_sb = xfs_fs_get_sb, + .mount = xfs_fs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; @@ -1759,6 +1687,12 @@ xfs_init_zones(void) if (!xfs_trans_zone) goto out_destroy_ifork_zone; + xfs_log_item_desc_zone = + kmem_zone_init(sizeof(struct xfs_log_item_desc), + "xfs_log_item_desc"); + if (!xfs_log_item_desc_zone) + goto out_destroy_trans_zone; + /* * The size of the zone allocated buf log item is the maximum * size possible under XFS. This wastes a little bit of memory, @@ -1768,7 +1702,7 @@ xfs_init_zones(void) (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD) * sizeof(int))), "xfs_buf_item"); if (!xfs_buf_item_zone) - goto out_destroy_trans_zone; + goto out_destroy_log_item_desc_zone; xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) + ((XFS_EFD_MAX_FAST_EXTENTS - 1) * @@ -1805,6 +1739,8 @@ xfs_init_zones(void) kmem_zone_destroy(xfs_efd_zone); out_destroy_buf_item_zone: kmem_zone_destroy(xfs_buf_item_zone); + out_destroy_log_item_desc_zone: + kmem_zone_destroy(xfs_log_item_desc_zone); out_destroy_trans_zone: kmem_zone_destroy(xfs_trans_zone); out_destroy_ifork_zone: @@ -1835,6 +1771,7 @@ xfs_destroy_zones(void) kmem_zone_destroy(xfs_efi_zone); kmem_zone_destroy(xfs_efd_zone); kmem_zone_destroy(xfs_buf_item_zone); + kmem_zone_destroy(xfs_log_item_desc_zone); kmem_zone_destroy(xfs_trans_zone); kmem_zone_destroy(xfs_ifork_zone); kmem_zone_destroy(xfs_dabuf_zone); diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 519618e9279e..50a3266c999e 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -56,23 +56,17 @@ extern void xfs_qm_exit(void); # define XFS_BIGFS_STRING #endif -#ifdef CONFIG_XFS_DMAPI -# define XFS_DMAPI_STRING "dmapi support, " -#else -# define XFS_DMAPI_STRING -#endif - #ifdef DEBUG # define XFS_DBG_STRING "debug" #else # define XFS_DBG_STRING "no debug" #endif +#define XFS_VERSION_STRING "SGI XFS" #define XFS_BUILD_OPTIONS XFS_ACL_STRING \ XFS_SECURITY_STRING \ XFS_REALTIME_STRING \ XFS_BIGFS_STRING \ - XFS_DMAPI_STRING \ XFS_DBG_STRING /* DBG must be last */ struct xfs_inode; diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index a51a07c3a70c..37d33254981d 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -24,67 +24,54 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_inode.h" #include "xfs_dinode.h" #include "xfs_error.h" -#include "xfs_mru_cache.h" #include "xfs_filestream.h" #include "xfs_vnodeops.h" -#include "xfs_utils.h" -#include "xfs_buf_item.h" #include "xfs_inode_item.h" -#include "xfs_rw.h" #include "xfs_quota.h" #include "xfs_trace.h" +#include "xfs_fsops.h" #include <linux/kthread.h> #include <linux/freezer.h> +/* + * The inode lookup is done in batches to keep the amount of lock traffic and + * radix tree lookups to a minimum. The batch size is a trade off between + * lookup reduction and stack usage. This is in the reclaim path, so we can't + * be too greedy. + */ +#define XFS_LOOKUP_BATCH 32 -STATIC xfs_inode_t * -xfs_inode_ag_lookup( - struct xfs_mount *mp, - struct xfs_perag *pag, - uint32_t *first_index, - int tag) +STATIC int +xfs_inode_ag_walk_grab( + struct xfs_inode *ip) { - int nr_found; - struct xfs_inode *ip; + struct inode *inode = VFS_I(ip); - /* - * use a gang lookup to find the next inode in the tree - * as the tree is sparse and a gang lookup walks to find - * the number of objects requested. - */ - if (tag == XFS_ICI_NO_TAG) { - nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, - (void **)&ip, *first_index, 1); - } else { - nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, - (void **)&ip, *first_index, 1, tag); + /* nothing to sync during shutdown */ + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return EFSCORRUPTED; + + /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ + if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + return ENOENT; + + /* If we can't grab the inode, it must on it's way to reclaim. */ + if (!igrab(inode)) + return ENOENT; + + if (is_bad_inode(inode)) { + IRELE(ip); + return ENOENT; } - if (!nr_found) - return NULL; - /* - * Update the index for the next lookup. Catch overflows - * into the next AG range which can occur if we have inodes - * in the last block of the AG and we are currently - * pointing to the last inode. - */ - *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); - if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) - return NULL; - return ip; + /* inode is valid */ + return 0; } STATIC int @@ -93,49 +80,75 @@ xfs_inode_ag_walk( struct xfs_perag *pag, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags, - int tag, - int exclusive, - int *nr_to_scan) + int flags) { uint32_t first_index; int last_error = 0; int skipped; + int done; + int nr_found; restart: + done = 0; skipped = 0; first_index = 0; + nr_found = 0; do { + struct xfs_inode *batch[XFS_LOOKUP_BATCH]; int error = 0; - xfs_inode_t *ip; + int i; - if (exclusive) - write_lock(&pag->pag_ici_lock); - else - read_lock(&pag->pag_ici_lock); - ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); - if (!ip) { - if (exclusive) - write_unlock(&pag->pag_ici_lock); - else - read_unlock(&pag->pag_ici_lock); + read_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, + (void **)batch, first_index, + XFS_LOOKUP_BATCH); + if (!nr_found) { + read_unlock(&pag->pag_ici_lock); break; } - /* execute releases pag->pag_ici_lock */ - error = execute(ip, pag, flags); - if (error == EAGAIN) { - skipped++; - continue; + /* + * Grab the inodes before we drop the lock. if we found + * nothing, nr == 0 and the loop will be skipped. + */ + for (i = 0; i < nr_found; i++) { + struct xfs_inode *ip = batch[i]; + + if (done || xfs_inode_ag_walk_grab(ip)) + batch[i] = NULL; + + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) + done = 1; + } + + /* unlock now we've grabbed the inodes. */ + read_unlock(&pag->pag_ici_lock); + + for (i = 0; i < nr_found; i++) { + if (!batch[i]) + continue; + error = execute(batch[i], pag, flags); + IRELE(batch[i]); + if (error == EAGAIN) { + skipped++; + continue; + } + if (error && last_error != EFSCORRUPTED) + last_error = error; } - if (error) - last_error = error; /* bail out if the filesystem is corrupted. */ if (error == EFSCORRUPTED) break; - } while ((*nr_to_scan)--); + } while (nr_found && !done); if (skipped) { delay(1); @@ -144,110 +157,32 @@ restart: return last_error; } -/* - * Select the next per-ag structure to iterate during the walk. The reclaim - * walk is optimised only to walk AGs with reclaimable inodes in them. - */ -static struct xfs_perag * -xfs_inode_ag_iter_next_pag( - struct xfs_mount *mp, - xfs_agnumber_t *first, - int tag) -{ - struct xfs_perag *pag = NULL; - - if (tag == XFS_ICI_RECLAIM_TAG) { - int found; - int ref; - - spin_lock(&mp->m_perag_lock); - found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, - (void **)&pag, *first, 1, tag); - if (found <= 0) { - spin_unlock(&mp->m_perag_lock); - return NULL; - } - *first = pag->pag_agno + 1; - /* open coded pag reference increment */ - ref = atomic_inc_return(&pag->pag_ref); - spin_unlock(&mp->m_perag_lock); - trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_); - } else { - pag = xfs_perag_get(mp, *first); - (*first)++; - } - return pag; -} - int xfs_inode_ag_iterator( struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags, - int tag, - int exclusive, - int *nr_to_scan) + int flags) { struct xfs_perag *pag; int error = 0; int last_error = 0; xfs_agnumber_t ag; - int nr; - nr = nr_to_scan ? *nr_to_scan : INT_MAX; ag = 0; - while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) { - error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, - exclusive, &nr); + while ((pag = xfs_perag_get(mp, ag))) { + ag = pag->pag_agno + 1; + error = xfs_inode_ag_walk(mp, pag, execute, flags); xfs_perag_put(pag); if (error) { last_error = error; if (error == EFSCORRUPTED) break; } - if (nr <= 0) - break; } - if (nr_to_scan) - *nr_to_scan = nr; return XFS_ERROR(last_error); } -/* must be called with pag_ici_lock held and releases it */ -int -xfs_sync_inode_valid( - struct xfs_inode *ip, - struct xfs_perag *pag) -{ - struct inode *inode = VFS_I(ip); - int error = EFSCORRUPTED; - - /* nothing to sync during shutdown */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - goto out_unlock; - - /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ - error = ENOENT; - if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) - goto out_unlock; - - /* If we can't grab the inode, it must on it's way to reclaim. */ - if (!igrab(inode)) - goto out_unlock; - - if (is_bad_inode(inode)) { - IRELE(ip); - goto out_unlock; - } - - /* inode is valid */ - error = 0; -out_unlock: - read_unlock(&pag->pag_ici_lock); - return error; -} - STATIC int xfs_sync_inode_data( struct xfs_inode *ip, @@ -258,10 +193,6 @@ xfs_sync_inode_data( struct address_space *mapping = inode->i_mapping; int error = 0; - error = xfs_sync_inode_valid(ip, pag); - if (error) - return error; - if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) goto out_wait; @@ -278,7 +209,6 @@ xfs_sync_inode_data( out_wait: if (flags & SYNC_WAIT) xfs_ioend_wait(ip); - IRELE(ip); return error; } @@ -290,10 +220,6 @@ xfs_sync_inode_attr( { int error = 0; - error = xfs_sync_inode_valid(ip, pag); - if (error) - return error; - xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_inode_clean(ip)) goto out_unlock; @@ -312,14 +238,13 @@ xfs_sync_inode_attr( out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); - IRELE(ip); return error; } /* * Write out pagecache data for the whole filesystem. */ -int +STATIC int xfs_sync_data( struct xfs_mount *mp, int flags) @@ -328,8 +253,7 @@ xfs_sync_data( ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); - error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, - XFS_ICI_NO_TAG, 0, NULL); + error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags); if (error) return XFS_ERROR(error); @@ -340,48 +264,14 @@ xfs_sync_data( /* * Write out inode metadata (attributes) for the whole filesystem. */ -int +STATIC int xfs_sync_attr( struct xfs_mount *mp, int flags) { ASSERT((flags & ~SYNC_WAIT) == 0); - return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, - XFS_ICI_NO_TAG, 0, NULL); -} - -STATIC int -xfs_commit_dummy_trans( - struct xfs_mount *mp, - uint flags) -{ - struct xfs_inode *ip = mp->m_rootip; - struct xfs_trans *tp; - int error; - - /* - * Put a dummy transaction in the log to tell recovery - * that all others are OK. - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); - error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - /* the log force ensures this transaction is pushed to disk */ - xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); - return error; + return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags); } STATIC int @@ -444,7 +334,7 @@ xfs_quiesce_data( /* mark the log as covered if needed */ if (xfs_log_need_covered(mp)) - error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT); + error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); /* flush data-only devices */ if (mp->m_rtdev_targp) @@ -575,7 +465,7 @@ xfs_flush_inodes( /* * Every sync period we need to unpin all items, reclaim inodes and sync * disk quotas. We might need to cover the log to indicate that the - * filesystem is idle. + * filesystem is idle and not frozen. */ STATIC void xfs_sync_worker( @@ -589,8 +479,9 @@ xfs_sync_worker( xfs_reclaim_inodes(mp, 0); /* dgc: errors ignored here */ error = xfs_qm_sync(mp, SYNC_TRYLOCK); - if (xfs_log_need_covered(mp)) - error = xfs_commit_dummy_trans(mp, 0); + if (mp->m_super->s_frozen == SB_UNFROZEN && + xfs_log_need_covered(mp)) + error = xfs_fs_log_dummy(mp, 0); } mp->m_sync_seq++; wake_up(&mp->m_wait_single_sync_task); @@ -710,14 +601,11 @@ xfs_inode_set_reclaim_tag( xfs_perag_put(pag); } -void -__xfs_inode_clear_reclaim_tag( - xfs_mount_t *mp, +STATIC void +__xfs_inode_clear_reclaim( xfs_perag_t *pag, xfs_inode_t *ip) { - radix_tree_tag_clear(&pag->pag_ici_root, - XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); pag->pag_ici_reclaimable--; if (!pag->pag_ici_reclaimable) { /* clear the reclaim tag from the perag radix tree */ @@ -731,6 +619,54 @@ __xfs_inode_clear_reclaim_tag( } } +void +__xfs_inode_clear_reclaim_tag( + xfs_mount_t *mp, + xfs_perag_t *pag, + xfs_inode_t *ip) +{ + radix_tree_tag_clear(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + __xfs_inode_clear_reclaim(pag, ip); +} + +/* + * Grab the inode for reclaim exclusively. + * Return 0 if we grabbed it, non-zero otherwise. + */ +STATIC int +xfs_reclaim_inode_grab( + struct xfs_inode *ip, + int flags) +{ + + /* + * do some unlocked checks first to avoid unnecceary lock traffic. + * The first is a flush lock check, the second is a already in reclaim + * check. Only do these checks if we are not going to block on locks. + */ + if ((flags & SYNC_TRYLOCK) && + (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { + return 1; + } + + /* + * The radix tree lock here protects a thread in xfs_iget from racing + * with us starting reclaim on the inode. Once we have the + * XFS_IRECLAIM flag set it will not touch us. + */ + spin_lock(&ip->i_flags_lock); + ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { + /* ignore as it is already under reclaim */ + spin_unlock(&ip->i_flags_lock); + return 1; + } + __xfs_iflags_set(ip, XFS_IRECLAIM); + spin_unlock(&ip->i_flags_lock); + return 0; +} + /* * Inodes in different states need to be treated differently, and the return * value of xfs_iflush is not sufficient to get this right. The following table @@ -789,23 +725,6 @@ xfs_reclaim_inode( { int error = 0; - /* - * The radix tree lock here protects a thread in xfs_iget from racing - * with us starting reclaim on the inode. Once we have the - * XFS_IRECLAIM flag set it will not touch us. - */ - spin_lock(&ip->i_flags_lock); - ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { - /* ignore as it is already under reclaim */ - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - return 0; - } - __xfs_iflags_set(ip, XFS_IRECLAIM); - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - xfs_ilock(ip, XFS_ILOCK_EXCL); if (!xfs_iflock_nowait(ip)) { if (!(sync_mode & SYNC_WAIT)) @@ -867,18 +786,161 @@ out: reclaim: xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_ireclaim(ip); + + XFS_STATS_INC(xs_ig_reclaims); + /* + * Remove the inode from the per-AG radix tree. + * + * Because radix_tree_delete won't complain even if the item was never + * added to the tree assert that it's been there before to catch + * problems with the inode life time early on. + */ + write_lock(&pag->pag_ici_lock); + if (!radix_tree_delete(&pag->pag_ici_root, + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) + ASSERT(0); + __xfs_inode_clear_reclaim(pag, ip); + write_unlock(&pag->pag_ici_lock); + + /* + * Here we do an (almost) spurious inode lock in order to coordinate + * with inode cache radix tree lookups. This is because the lookup + * can reference the inodes in the cache without taking references. + * + * We make that OK here by ensuring that we wait until the inode is + * unlocked after the lookup before we go ahead and free it. We get + * both the ilock and the iolock because the code may need to drop the + * ilock one but will still hold the iolock. + */ + xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_qm_dqdetach(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + + xfs_inode_free(ip); return error; } +/* + * Walk the AGs and reclaim the inodes in them. Even if the filesystem is + * corrupted, we still want to try to reclaim all the inodes. If we don't, + * then a shut down during filesystem unmount reclaim walk leak all the + * unreclaimed inodes. + */ +int +xfs_reclaim_inodes_ag( + struct xfs_mount *mp, + int flags, + int *nr_to_scan) +{ + struct xfs_perag *pag; + int error = 0; + int last_error = 0; + xfs_agnumber_t ag; + int trylock = flags & SYNC_TRYLOCK; + int skipped; + +restart: + ag = 0; + skipped = 0; + while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { + unsigned long first_index = 0; + int done = 0; + int nr_found = 0; + + ag = pag->pag_agno + 1; + + if (trylock) { + if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { + skipped++; + continue; + } + first_index = pag->pag_ici_reclaim_cursor; + } else + mutex_lock(&pag->pag_ici_reclaim_lock); + + do { + struct xfs_inode *batch[XFS_LOOKUP_BATCH]; + int i; + + write_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup_tag( + &pag->pag_ici_root, + (void **)batch, first_index, + XFS_LOOKUP_BATCH, + XFS_ICI_RECLAIM_TAG); + if (!nr_found) { + write_unlock(&pag->pag_ici_lock); + break; + } + + /* + * Grab the inodes before we drop the lock. if we found + * nothing, nr == 0 and the loop will be skipped. + */ + for (i = 0; i < nr_found; i++) { + struct xfs_inode *ip = batch[i]; + + if (done || xfs_reclaim_inode_grab(ip, flags)) + batch[i] = NULL; + + /* + * Update the index for the next lookup. Catch + * overflows into the next AG range which can + * occur if we have inodes in the last block of + * the AG and we are currently pointing to the + * last inode. + */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) + done = 1; + } + + /* unlock now we've grabbed the inodes. */ + write_unlock(&pag->pag_ici_lock); + + for (i = 0; i < nr_found; i++) { + if (!batch[i]) + continue; + error = xfs_reclaim_inode(batch[i], pag, flags); + if (error && last_error != EFSCORRUPTED) + last_error = error; + } + + *nr_to_scan -= XFS_LOOKUP_BATCH; + + } while (nr_found && !done && *nr_to_scan > 0); + + if (trylock && !done) + pag->pag_ici_reclaim_cursor = first_index; + else + pag->pag_ici_reclaim_cursor = 0; + mutex_unlock(&pag->pag_ici_reclaim_lock); + xfs_perag_put(pag); + } + + /* + * if we skipped any AG, and we still have scan count remaining, do + * another pass this time using blocking reclaim semantics (i.e + * waiting on the reclaim locks and ignoring the reclaim cursors). This + * ensure that when we get more reclaimers than AGs we block rather + * than spin trying to execute reclaim. + */ + if (trylock && skipped && *nr_to_scan > 0) { + trylock = 0; + goto restart; + } + return XFS_ERROR(last_error); +} + int xfs_reclaim_inodes( xfs_mount_t *mp, int mode) { - return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, - XFS_ICI_RECLAIM_TAG, 1, NULL); + int nr_to_scan = INT_MAX; + + return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan); } /* @@ -900,17 +962,16 @@ xfs_reclaim_inode_shrink( if (!(gfp_mask & __GFP_FS)) return -1; - xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, - XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); - /* if we don't exhaust the scan, don't bother coming back */ + xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); + /* terminate if we don't exhaust the scan */ if (nr_to_scan > 0) return -1; } reclaimable = 0; ag = 0; - while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, - XFS_ICI_RECLAIM_TAG))) { + while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { + ag = pag->pag_agno + 1; reclaimable += pag->pag_ici_reclaimable; xfs_perag_put(pag); } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index e28139aaa4aa..32ba6628290c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -35,9 +35,6 @@ typedef struct xfs_sync_work { int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); -int xfs_sync_attr(struct xfs_mount *mp, int flags); -int xfs_sync_data(struct xfs_mount *mp, int flags); - int xfs_quiesce_data(struct xfs_mount *mp); void xfs_quiesce_attr(struct xfs_mount *mp); @@ -50,10 +47,10 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, struct xfs_inode *ip); -int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); +int xfs_sync_inode_grab(struct xfs_inode *ip); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags, int tag, int write_lock, int *nr_to_scan); + int flags); void xfs_inode_shrinker_register(struct xfs_mount *mp); void xfs_inode_shrinker_unregister(struct xfs_mount *mp); diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c index d12be8470cba..88d25d4aa56e 100644 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ b/fs/xfs/linux-2.6/xfs_trace.c @@ -24,17 +24,13 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_ialloc.h" #include "xfs_itable.h" diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 302820690904..acef2e98c594 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \ unsigned long caller_ip), \ TP_ARGS(mp, agno, refcount, caller_ip)) DEFINE_PERAG_REF_EVENT(xfs_perag_get); -DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim); +DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_put); DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); @@ -317,8 +317,6 @@ DEFINE_BUF_EVENT(xfs_buf_init); DEFINE_BUF_EVENT(xfs_buf_free); DEFINE_BUF_EVENT(xfs_buf_hold); DEFINE_BUF_EVENT(xfs_buf_rele); -DEFINE_BUF_EVENT(xfs_buf_pin); -DEFINE_BUF_EVENT(xfs_buf_unpin); DEFINE_BUF_EVENT(xfs_buf_iodone); DEFINE_BUF_EVENT(xfs_buf_iorequest); DEFINE_BUF_EVENT(xfs_buf_bawrite); @@ -327,13 +325,12 @@ DEFINE_BUF_EVENT(xfs_buf_lock); DEFINE_BUF_EVENT(xfs_buf_lock_done); DEFINE_BUF_EVENT(xfs_buf_cond_lock); DEFINE_BUF_EVENT(xfs_buf_unlock); -DEFINE_BUF_EVENT(xfs_buf_ordered_retry); DEFINE_BUF_EVENT(xfs_buf_iowait); DEFINE_BUF_EVENT(xfs_buf_iowait_done); DEFINE_BUF_EVENT(xfs_buf_delwri_queue); DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); DEFINE_BUF_EVENT(xfs_buf_delwri_split); -DEFINE_BUF_EVENT(xfs_buf_get_noaddr); +DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_bdstrat_shut); DEFINE_BUF_EVENT(xfs_buf_item_relse); DEFINE_BUF_EVENT(xfs_buf_item_iodone); @@ -541,7 +538,7 @@ DEFINE_LOCK_EVENT(xfs_ilock_nowait); DEFINE_LOCK_EVENT(xfs_ilock_demote); DEFINE_LOCK_EVENT(xfs_iunlock); -DECLARE_EVENT_CLASS(xfs_iget_class, +DECLARE_EVENT_CLASS(xfs_inode_class, TP_PROTO(struct xfs_inode *ip), TP_ARGS(ip), TP_STRUCT__entry( @@ -557,16 +554,38 @@ DECLARE_EVENT_CLASS(xfs_iget_class, __entry->ino) ) -#define DEFINE_IGET_EVENT(name) \ -DEFINE_EVENT(xfs_iget_class, name, \ +#define DEFINE_INODE_EVENT(name) \ +DEFINE_EVENT(xfs_inode_class, name, \ TP_PROTO(struct xfs_inode *ip), \ TP_ARGS(ip)) -DEFINE_IGET_EVENT(xfs_iget_skip); -DEFINE_IGET_EVENT(xfs_iget_reclaim); -DEFINE_IGET_EVENT(xfs_iget_found); -DEFINE_IGET_EVENT(xfs_iget_alloc); - -DECLARE_EVENT_CLASS(xfs_inode_class, +DEFINE_INODE_EVENT(xfs_iget_skip); +DEFINE_INODE_EVENT(xfs_iget_reclaim); +DEFINE_INODE_EVENT(xfs_iget_reclaim_fail); +DEFINE_INODE_EVENT(xfs_iget_hit); +DEFINE_INODE_EVENT(xfs_iget_miss); + +DEFINE_INODE_EVENT(xfs_getattr); +DEFINE_INODE_EVENT(xfs_setattr); +DEFINE_INODE_EVENT(xfs_readlink); +DEFINE_INODE_EVENT(xfs_alloc_file_space); +DEFINE_INODE_EVENT(xfs_free_file_space); +DEFINE_INODE_EVENT(xfs_readdir); +#ifdef CONFIG_XFS_POSIX_ACL +DEFINE_INODE_EVENT(xfs_check_acl); +#endif +DEFINE_INODE_EVENT(xfs_vm_bmap); +DEFINE_INODE_EVENT(xfs_file_ioctl); +DEFINE_INODE_EVENT(xfs_file_compat_ioctl); +DEFINE_INODE_EVENT(xfs_ioctl_setattr); +DEFINE_INODE_EVENT(xfs_file_fsync); +DEFINE_INODE_EVENT(xfs_destroy_inode); +DEFINE_INODE_EVENT(xfs_write_inode); +DEFINE_INODE_EVENT(xfs_evict_inode); + +DEFINE_INODE_EVENT(xfs_dquot_dqalloc); +DEFINE_INODE_EVENT(xfs_dquot_dqdetach); + +DECLARE_EVENT_CLASS(xfs_iref_class, TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), TP_ARGS(ip, caller_ip), TP_STRUCT__entry( @@ -591,20 +610,71 @@ DECLARE_EVENT_CLASS(xfs_inode_class, (char *)__entry->caller_ip) ) -#define DEFINE_INODE_EVENT(name) \ -DEFINE_EVENT(xfs_inode_class, name, \ +#define DEFINE_IREF_EVENT(name) \ +DEFINE_EVENT(xfs_iref_class, name, \ TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ TP_ARGS(ip, caller_ip)) -DEFINE_INODE_EVENT(xfs_ihold); -DEFINE_INODE_EVENT(xfs_irele); -DEFINE_INODE_EVENT(xfs_inode_pin); -DEFINE_INODE_EVENT(xfs_inode_unpin); -DEFINE_INODE_EVENT(xfs_inode_unpin_nowait); +DEFINE_IREF_EVENT(xfs_ihold); +DEFINE_IREF_EVENT(xfs_irele); +DEFINE_IREF_EVENT(xfs_inode_pin); +DEFINE_IREF_EVENT(xfs_inode_unpin); +DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); + +DECLARE_EVENT_CLASS(xfs_namespace_class, + TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), + TP_ARGS(dp, name), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dp_ino) + __dynamic_array(char, name, name->len) + ), + TP_fast_assign( + __entry->dev = VFS_I(dp)->i_sb->s_dev; + __entry->dp_ino = dp->i_ino; + memcpy(__get_str(name), name->name, name->len); + ), + TP_printk("dev %d:%d dp ino 0x%llx name %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dp_ino, + __get_str(name)) +) -/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ -DEFINE_INODE_EVENT(xfs_inode); -#define xfs_itrace_entry(ip) \ - trace_xfs_inode(ip, _THIS_IP_) +#define DEFINE_NAMESPACE_EVENT(name) \ +DEFINE_EVENT(xfs_namespace_class, name, \ + TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \ + TP_ARGS(dp, name)) +DEFINE_NAMESPACE_EVENT(xfs_remove); +DEFINE_NAMESPACE_EVENT(xfs_link); +DEFINE_NAMESPACE_EVENT(xfs_lookup); +DEFINE_NAMESPACE_EVENT(xfs_create); +DEFINE_NAMESPACE_EVENT(xfs_symlink); + +TRACE_EVENT(xfs_rename, + TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp, + struct xfs_name *src_name, struct xfs_name *target_name), + TP_ARGS(src_dp, target_dp, src_name, target_name), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, src_dp_ino) + __field(xfs_ino_t, target_dp_ino) + __dynamic_array(char, src_name, src_name->len) + __dynamic_array(char, target_name, target_name->len) + ), + TP_fast_assign( + __entry->dev = VFS_I(src_dp)->i_sb->s_dev; + __entry->src_dp_ino = src_dp->i_ino; + __entry->target_dp_ino = target_dp->i_ino; + memcpy(__get_str(src_name), src_name->name, src_name->len); + memcpy(__get_str(target_name), target_name->name, target_name->len); + ), + TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx" + " src name %s target name %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->src_dp_ino, + __entry->target_dp_ino, + __get_str(src_name), + __get_str(target_name)) +) DECLARE_EVENT_CLASS(xfs_dquot_class, TP_PROTO(struct xfs_dquot *dqp), @@ -684,9 +754,6 @@ DEFINE_DQUOT_EVENT(xfs_dqrele); DEFINE_DQUOT_EVENT(xfs_dqflush); DEFINE_DQUOT_EVENT(xfs_dqflush_force); DEFINE_DQUOT_EVENT(xfs_dqflush_done); -/* not really iget events, but we re-use the format */ -DEFINE_IGET_EVENT(xfs_dquot_dqalloc); -DEFINE_IGET_EVENT(xfs_dquot_dqdetach); DECLARE_EVENT_CLASS(xfs_loggrant_class, TP_PROTO(struct log *log, struct xlog_ticket *tic), @@ -834,33 +901,29 @@ DECLARE_EVENT_CLASS(xfs_page_class, __field(loff_t, size) __field(unsigned long, offset) __field(int, delalloc) - __field(int, unmapped) __field(int, unwritten) ), TP_fast_assign( - int delalloc = -1, unmapped = -1, unwritten = -1; + int delalloc = -1, unwritten = -1; if (page_has_buffers(page)) - xfs_count_page_state(page, &delalloc, - &unmapped, &unwritten); + xfs_count_page_state(page, &delalloc, &unwritten); __entry->dev = inode->i_sb->s_dev; __entry->ino = XFS_I(inode)->i_ino; __entry->pgoff = page_offset(page); __entry->size = i_size_read(inode); __entry->offset = off; __entry->delalloc = delalloc; - __entry->unmapped = unmapped; __entry->unwritten = unwritten; ), TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " - "delalloc %d unmapped %d unwritten %d", + "delalloc %d unwritten %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->pgoff, __entry->size, __entry->offset, __entry->delalloc, - __entry->unmapped, __entry->unwritten) ) diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h deleted file mode 100644 index f8d279d7563a..000000000000 --- a/fs/xfs/linux-2.6/xfs_version.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_VERSION_H__ -#define __XFS_VERSION_H__ - -/* - * Dummy file that can contain a timestamp to put into the - * XFS init string, to help users keep track of what they're - * running - */ - -#define XFS_VERSION_STRING "SGI XFS" - -#endif /* __XFS_VERSION_H__ */ diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 585e7633dfc7..faf8e1a83a12 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -23,25 +23,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_itable.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" @@ -64,8 +54,6 @@ flush lock - ditto. */ -STATIC void xfs_qm_dqflush_done(xfs_buf_t *, xfs_dq_logitem_t *); - #ifdef DEBUG xfs_buftarg_t *xfs_dqerror_target; int xfs_do_dqerror; @@ -390,21 +378,14 @@ xfs_qm_dqalloc( return (ESRCH); } - /* - * xfs_trans_commit normally decrements the vnode ref count - * when it unlocks the inode. Since we want to keep the quota - * inode around, we bump the vnode ref count now. - */ - IHOLD(quotip); - - xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL); nmaps = 1; if ((error = xfs_bmapi(tp, quotip, offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), - &map, &nmaps, &flist, NULL))) { + &map, &nmaps, &flist))) { goto error0; } ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); @@ -482,87 +463,68 @@ xfs_qm_dqtobp( uint flags) { xfs_bmbt_irec_t map; - int nmaps, error; + int nmaps = 1, error; xfs_buf_t *bp; - xfs_inode_t *quotip; - xfs_mount_t *mp; + xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); + xfs_mount_t *mp = dqp->q_mount; xfs_disk_dquot_t *ddq; - xfs_dqid_t id; - boolean_t newdquot; + xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); xfs_trans_t *tp = (tpp ? *tpp : NULL); - mp = dqp->q_mount; - id = be32_to_cpu(dqp->q_core.d_id); - nmaps = 1; - newdquot = B_FALSE; + dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; - /* - * If we don't know where the dquot lives, find out. - */ - if (dqp->q_blkno == (xfs_daddr_t) 0) { - /* We use the id as an index */ - dqp->q_fileoffset = (xfs_fileoff_t)id / - mp->m_quotainfo->qi_dqperchunk; - nmaps = 1; - quotip = XFS_DQ_TO_QIP(dqp); - xfs_ilock(quotip, XFS_ILOCK_SHARED); + xfs_ilock(quotip, XFS_ILOCK_SHARED); + if (XFS_IS_THIS_QUOTA_OFF(dqp)) { /* - * Return if this type of quotas is turned off while we didn't - * have an inode lock + * Return if this type of quotas is turned off while we + * didn't have the quota inode lock. */ - if (XFS_IS_THIS_QUOTA_OFF(dqp)) { - xfs_iunlock(quotip, XFS_ILOCK_SHARED); - return (ESRCH); - } + xfs_iunlock(quotip, XFS_ILOCK_SHARED); + return ESRCH; + } + + /* + * Find the block map; no allocations yet + */ + error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, + XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, + NULL, 0, &map, &nmaps, NULL); + + xfs_iunlock(quotip, XFS_ILOCK_SHARED); + if (error) + return error; + + ASSERT(nmaps == 1); + ASSERT(map.br_blockcount == 1); + + /* + * Offset of dquot in the (fixed sized) dquot chunk. + */ + dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * + sizeof(xfs_dqblk_t); + + ASSERT(map.br_startblock != DELAYSTARTBLOCK); + if (map.br_startblock == HOLESTARTBLOCK) { /* - * Find the block map; no allocations yet + * We don't allocate unless we're asked to */ - error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, - XFS_DQUOT_CLUSTER_SIZE_FSB, - XFS_BMAPI_METADATA, - NULL, 0, &map, &nmaps, NULL, NULL); + if (!(flags & XFS_QMOPT_DQALLOC)) + return ENOENT; - xfs_iunlock(quotip, XFS_ILOCK_SHARED); + ASSERT(tp); + error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, + dqp->q_fileoffset, &bp); if (error) - return (error); - ASSERT(nmaps == 1); - ASSERT(map.br_blockcount == 1); + return error; + tp = *tpp; + } else { + trace_xfs_dqtobp_read(dqp); /* - * offset of dquot in the (fixed sized) dquot chunk. + * store the blkno etc so that we don't have to do the + * mapping all the time */ - dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * - sizeof(xfs_dqblk_t); - if (map.br_startblock == HOLESTARTBLOCK) { - /* - * We don't allocate unless we're asked to - */ - if (!(flags & XFS_QMOPT_DQALLOC)) - return (ENOENT); - - ASSERT(tp); - if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, - dqp->q_fileoffset, &bp))) - return (error); - tp = *tpp; - newdquot = B_TRUE; - } else { - /* - * store the blkno etc so that we don't have to do the - * mapping all the time - */ - dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); - } - } - ASSERT(dqp->q_blkno != DELAYSTARTBLOCK); - ASSERT(dqp->q_blkno != HOLESTARTBLOCK); - - /* - * Read in the buffer, unless we've just done the allocation - * (in which case we already have the buf). - */ - if (!newdquot) { - trace_xfs_dqtobp_read(dqp); + dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, @@ -571,13 +533,14 @@ xfs_qm_dqtobp( if (error || !bp) return XFS_ERROR(error); } + ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); /* * calculate the location of the dquot inside the buffer. */ - ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset); + ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); /* * A simple sanity check in case we got a corrupted dquot... @@ -1141,6 +1104,46 @@ xfs_qm_dqrele( xfs_qm_dqput(dqp); } +/* + * This is the dquot flushing I/O completion routine. It is called + * from interrupt level when the buffer containing the dquot is + * flushed to disk. It is responsible for removing the dquot logitem + * from the AIL if it has not been re-logged, and unlocking the dquot's + * flush lock. This behavior is very similar to that of inodes.. + */ +STATIC void +xfs_qm_dqflush_done( + struct xfs_buf *bp, + struct xfs_log_item *lip) +{ + xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; + xfs_dquot_t *dqp = qip->qli_dquot; + struct xfs_ail *ailp = lip->li_ailp; + + /* + * We only want to pull the item from the AIL if its + * location in the log has not changed since we started the flush. + * Thus, we only bother if the dquot's lsn has + * not changed. First we check the lsn outside the lock + * since it's cheaper, and then we recheck while + * holding the lock before removing the dquot from the AIL. + */ + if ((lip->li_flags & XFS_LI_IN_AIL) && + lip->li_lsn == qip->qli_flush_lsn) { + + /* xfs_trans_ail_delete() drops the AIL lock. */ + spin_lock(&ailp->xa_lock); + if (lip->li_lsn == qip->qli_flush_lsn) + xfs_trans_ail_delete(ailp, lip); + else + spin_unlock(&ailp->xa_lock); + } + + /* + * Release the dq's flush lock since we're done with it. + */ + xfs_dqfunlock(dqp); +} /* * Write a modified dquot to disk. @@ -1155,18 +1158,18 @@ xfs_qm_dqflush( xfs_dquot_t *dqp, uint flags) { - xfs_mount_t *mp; - xfs_buf_t *bp; - xfs_disk_dquot_t *ddqp; + struct xfs_mount *mp = dqp->q_mount; + struct xfs_buf *bp; + struct xfs_disk_dquot *ddqp; int error; ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(!completion_done(&dqp->q_flush)); + trace_xfs_dqflush(dqp); /* - * If not dirty, or it's pinned and we are not supposed to - * block, nada. + * If not dirty, or it's pinned and we are not supposed to block, nada. */ if (!XFS_DQ_IS_DIRTY(dqp) || (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { @@ -1180,40 +1183,46 @@ xfs_qm_dqflush( * down forcibly. If that's the case we must not write this dquot * to disk, because the log record didn't make it to disk! */ - if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) { - dqp->dq_flags &= ~(XFS_DQ_DIRTY); + if (XFS_FORCED_SHUTDOWN(mp)) { + dqp->dq_flags &= ~XFS_DQ_DIRTY; xfs_dqfunlock(dqp); return XFS_ERROR(EIO); } /* * Get the buffer containing the on-disk dquot - * We don't need a transaction envelope because we know that the - * the ondisk-dquot has already been allocated for. */ - if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) { + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, 0, &bp); + if (error) { ASSERT(error != ENOENT); - /* - * Quotas could have gotten turned off (ESRCH) - */ xfs_dqfunlock(dqp); - return (error); + return error; } - if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), - 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { - xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE); + /* + * Calculate the location of the dquot inside the buffer. + */ + ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); + + /* + * A simple sanity check in case we got a corrupted dquot.. + */ + if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0, + XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { + xfs_buf_relse(bp); + xfs_dqfunlock(dqp); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return XFS_ERROR(EIO); } /* This is the only portion of data that needs to persist */ - memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t)); + memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); /* * Clear the dirty field and remember the flush lsn for later use. */ - dqp->dq_flags &= ~(XFS_DQ_DIRTY); - mp = dqp->q_mount; + dqp->dq_flags &= ~XFS_DQ_DIRTY; xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, &dqp->q_logitem.qli_item.li_lsn); @@ -1222,8 +1231,9 @@ xfs_qm_dqflush( * Attach an iodone routine so that we can remove this dquot from the * AIL and release the flush lock once the dquot is synced to disk. */ - xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *)) - xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item)); + xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done, + &dqp->q_logitem.qli_item); + /* * If the buffer is pinned then push on the log so we won't * get stuck waiting in the write for too long. @@ -1247,50 +1257,6 @@ xfs_qm_dqflush( } -/* - * This is the dquot flushing I/O completion routine. It is called - * from interrupt level when the buffer containing the dquot is - * flushed to disk. It is responsible for removing the dquot logitem - * from the AIL if it has not been re-logged, and unlocking the dquot's - * flush lock. This behavior is very similar to that of inodes.. - */ -/*ARGSUSED*/ -STATIC void -xfs_qm_dqflush_done( - xfs_buf_t *bp, - xfs_dq_logitem_t *qip) -{ - xfs_dquot_t *dqp; - struct xfs_ail *ailp; - - dqp = qip->qli_dquot; - ailp = qip->qli_item.li_ailp; - - /* - * We only want to pull the item from the AIL if its - * location in the log has not changed since we started the flush. - * Thus, we only bother if the dquot's lsn has - * not changed. First we check the lsn outside the lock - * since it's cheaper, and then we recheck while - * holding the lock before removing the dquot from the AIL. - */ - if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && - qip->qli_item.li_lsn == qip->qli_flush_lsn) { - - /* xfs_trans_ail_delete() drops the AIL lock. */ - spin_lock(&ailp->xa_lock); - if (qip->qli_item.li_lsn == qip->qli_flush_lsn) - xfs_trans_ail_delete(ailp, (xfs_log_item_t*)qip); - else - spin_unlock(&ailp->xa_lock); - } - - /* - * Release the dq's flush lock since we're done with it. - */ - xfs_dqfunlock(dqp); -} - int xfs_qm_dqlock_nowait( xfs_dquot_t *dqp) diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 8d89a24ae324..2a1f3dc10a02 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -23,42 +23,36 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_itable.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" #include "xfs_qm.h" +static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_dq_logitem, qli_item); +} + /* * returns the number of iovecs needed to log the given dquot item. */ -/* ARGSUSED */ STATIC uint xfs_qm_dquot_logitem_size( - xfs_dq_logitem_t *logitem) + struct xfs_log_item *lip) { /* * we need only two iovecs, one for the format, one for the real thing */ - return (2); + return 2; } /* @@ -66,22 +60,21 @@ xfs_qm_dquot_logitem_size( */ STATIC void xfs_qm_dquot_logitem_format( - xfs_dq_logitem_t *logitem, - xfs_log_iovec_t *logvec) + struct xfs_log_item *lip, + struct xfs_log_iovec *logvec) { - ASSERT(logitem); - ASSERT(logitem->qli_dquot); + struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); - logvec->i_addr = (xfs_caddr_t)&logitem->qli_format; + logvec->i_addr = &qlip->qli_format; logvec->i_len = sizeof(xfs_dq_logformat_t); logvec->i_type = XLOG_REG_TYPE_QFORMAT; logvec++; - logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core; + logvec->i_addr = &qlip->qli_dquot->q_core; logvec->i_len = sizeof(xfs_disk_dquot_t); logvec->i_type = XLOG_REG_TYPE_DQUOT; - ASSERT(2 == logitem->qli_item.li_desc->lid_size); - logitem->qli_format.qlf_size = 2; + ASSERT(2 == lip->li_desc->lid_size); + qlip->qli_format.qlf_size = 2; } @@ -90,9 +83,9 @@ xfs_qm_dquot_logitem_format( */ STATIC void xfs_qm_dquot_logitem_pin( - xfs_dq_logitem_t *logitem) + struct xfs_log_item *lip) { - xfs_dquot_t *dqp = logitem->qli_dquot; + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); atomic_inc(&dqp->q_pincount); @@ -104,27 +97,18 @@ xfs_qm_dquot_logitem_pin( * dquot must have been previously pinned with a call to * xfs_qm_dquot_logitem_pin(). */ -/* ARGSUSED */ STATIC void xfs_qm_dquot_logitem_unpin( - xfs_dq_logitem_t *logitem) + struct xfs_log_item *lip, + int remove) { - xfs_dquot_t *dqp = logitem->qli_dquot; + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; ASSERT(atomic_read(&dqp->q_pincount) > 0); if (atomic_dec_and_test(&dqp->q_pincount)) wake_up(&dqp->q_pinwait); } -/* ARGSUSED */ -STATIC void -xfs_qm_dquot_logitem_unpin_remove( - xfs_dq_logitem_t *logitem, - xfs_trans_t *tp) -{ - xfs_qm_dquot_logitem_unpin(logitem); -} - /* * Given the logitem, this writes the corresponding dquot entry to disk * asynchronously. This is called with the dquot entry securely locked; @@ -133,12 +117,10 @@ xfs_qm_dquot_logitem_unpin_remove( */ STATIC void xfs_qm_dquot_logitem_push( - xfs_dq_logitem_t *logitem) + struct xfs_log_item *lip) { - xfs_dquot_t *dqp; - int error; - - dqp = logitem->qli_dquot; + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; + int error; ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(!completion_done(&dqp->q_flush)); @@ -160,27 +142,25 @@ xfs_qm_dquot_logitem_push( xfs_dqunlock(dqp); } -/*ARGSUSED*/ STATIC xfs_lsn_t xfs_qm_dquot_logitem_committed( - xfs_dq_logitem_t *l, + struct xfs_log_item *lip, xfs_lsn_t lsn) { /* * We always re-log the entire dquot when it becomes dirty, * so, the latest copy _is_ the only one that matters. */ - return (lsn); + return lsn; } - /* * This is called to wait for the given dquot to be unpinned. * Most of these pin/unpin routines are plagiarized from inode code. */ void xfs_qm_dqunpin_wait( - xfs_dquot_t *dqp) + struct xfs_dquot *dqp) { ASSERT(XFS_DQ_IS_LOCKED(dqp)); if (atomic_read(&dqp->q_pincount) == 0) @@ -206,13 +186,12 @@ xfs_qm_dqunpin_wait( */ STATIC void xfs_qm_dquot_logitem_pushbuf( - xfs_dq_logitem_t *qip) + struct xfs_log_item *lip) { - xfs_dquot_t *dqp; - xfs_mount_t *mp; - xfs_buf_t *bp; + struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); + struct xfs_dquot *dqp = qlip->qli_dquot; + struct xfs_buf *bp; - dqp = qip->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); /* @@ -220,22 +199,20 @@ xfs_qm_dquot_logitem_pushbuf( * inode flush completed and the inode was taken off the AIL. * So, just get out. */ - if (completion_done(&dqp->q_flush) || - ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { + if (completion_done(&dqp->q_flush) || + !(lip->li_flags & XFS_LI_IN_AIL)) { xfs_dqunlock(dqp); return; } - mp = dqp->q_mount; - bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, - mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); + + bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, + dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); xfs_dqunlock(dqp); if (!bp) return; if (XFS_BUF_ISDELAYWRITE(bp)) xfs_buf_delwri_promote(bp); xfs_buf_relse(bp); - return; - } /* @@ -250,15 +227,14 @@ xfs_qm_dquot_logitem_pushbuf( */ STATIC uint xfs_qm_dquot_logitem_trylock( - xfs_dq_logitem_t *qip) + struct xfs_log_item *lip) { - xfs_dquot_t *dqp; + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - dqp = qip->qli_dquot; if (atomic_read(&dqp->q_pincount) > 0) return XFS_ITEM_PINNED; - if (! xfs_qm_dqlock_nowait(dqp)) + if (!xfs_qm_dqlock_nowait(dqp)) return XFS_ITEM_LOCKED; if (!xfs_dqflock_nowait(dqp)) { @@ -269,11 +245,10 @@ xfs_qm_dquot_logitem_trylock( return XFS_ITEM_PUSHBUF; } - ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL); + ASSERT(lip->li_flags & XFS_LI_IN_AIL); return XFS_ITEM_SUCCESS; } - /* * Unlock the dquot associated with the log item. * Clear the fields of the dquot and dquot log item that @@ -282,12 +257,10 @@ xfs_qm_dquot_logitem_trylock( */ STATIC void xfs_qm_dquot_logitem_unlock( - xfs_dq_logitem_t *ql) + struct xfs_log_item *lip) { - xfs_dquot_t *dqp; + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - ASSERT(ql != NULL); - dqp = ql->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); /* @@ -304,43 +277,32 @@ xfs_qm_dquot_logitem_unlock( xfs_dqunlock(dqp); } - /* * this needs to stamp an lsn into the dquot, I think. * rpc's that look at user dquot's would then have to * push on the dependency recorded in the dquot */ -/* ARGSUSED */ STATIC void xfs_qm_dquot_logitem_committing( - xfs_dq_logitem_t *l, + struct xfs_log_item *lip, xfs_lsn_t lsn) { - return; } - /* * This is the ops vector for dquots */ static struct xfs_item_ops xfs_dquot_item_ops = { - .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size, - .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) - xfs_qm_dquot_logitem_format, - .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin, - .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unpin, - .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) - xfs_qm_dquot_logitem_unpin_remove, - .iop_trylock = (uint(*)(xfs_log_item_t*)) - xfs_qm_dquot_logitem_trylock, - .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unlock, - .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_qm_dquot_logitem_committed, - .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push, - .iop_pushbuf = (void(*)(xfs_log_item_t*)) - xfs_qm_dquot_logitem_pushbuf, - .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_qm_dquot_logitem_committing + .iop_size = xfs_qm_dquot_logitem_size, + .iop_format = xfs_qm_dquot_logitem_format, + .iop_pin = xfs_qm_dquot_logitem_pin, + .iop_unpin = xfs_qm_dquot_logitem_unpin, + .iop_trylock = xfs_qm_dquot_logitem_trylock, + .iop_unlock = xfs_qm_dquot_logitem_unlock, + .iop_committed = xfs_qm_dquot_logitem_committed, + .iop_push = xfs_qm_dquot_logitem_push, + .iop_pushbuf = xfs_qm_dquot_logitem_pushbuf, + .iop_committing = xfs_qm_dquot_logitem_committing }; /* @@ -350,10 +312,9 @@ static struct xfs_item_ops xfs_dquot_item_ops = { */ void xfs_qm_dquot_logitem_init( - struct xfs_dquot *dqp) + struct xfs_dquot *dqp) { - xfs_dq_logitem_t *lp; - lp = &dqp->q_logitem; + struct xfs_dq_logitem *lp = &dqp->q_logitem; xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, &xfs_dquot_item_ops); @@ -374,16 +335,22 @@ xfs_qm_dquot_logitem_init( /*------------------ QUOTAOFF LOG ITEMS -------------------*/ +static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_qoff_logitem, qql_item); +} + + /* * This returns the number of iovecs needed to log the given quotaoff item. * We only need 1 iovec for an quotaoff item. It just logs the * quotaoff_log_format structure. */ -/*ARGSUSED*/ STATIC uint -xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf) +xfs_qm_qoff_logitem_size( + struct xfs_log_item *lip) { - return (1); + return 1; } /* @@ -394,53 +361,46 @@ xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf) * slots in the quotaoff item have been filled. */ STATIC void -xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf, - xfs_log_iovec_t *log_vector) +xfs_qm_qoff_logitem_format( + struct xfs_log_item *lip, + struct xfs_log_iovec *log_vector) { - ASSERT(qf->qql_format.qf_type == XFS_LI_QUOTAOFF); + struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip); + + ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF); - log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); + log_vector->i_addr = &qflip->qql_format; log_vector->i_len = sizeof(xfs_qoff_logitem_t); log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; - qf->qql_format.qf_size = 1; + qflip->qql_format.qf_size = 1; } - /* * Pinning has no meaning for an quotaoff item, so just return. */ -/*ARGSUSED*/ STATIC void -xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf) +xfs_qm_qoff_logitem_pin( + struct xfs_log_item *lip) { - return; } - /* * Since pinning has no meaning for an quotaoff item, unpinning does * not either. */ -/*ARGSUSED*/ STATIC void -xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf) +xfs_qm_qoff_logitem_unpin( + struct xfs_log_item *lip, + int remove) { - return; -} - -/*ARGSUSED*/ -STATIC void -xfs_qm_qoff_logitem_unpin_remove(xfs_qoff_logitem_t *qf, xfs_trans_t *tp) -{ - return; } /* * Quotaoff items have no locking, so just return success. */ -/*ARGSUSED*/ STATIC uint -xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf) +xfs_qm_qoff_logitem_trylock( + struct xfs_log_item *lip) { return XFS_ITEM_LOCKED; } @@ -449,53 +409,51 @@ xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf) * Quotaoff items have no locking or pushing, so return failure * so that the caller doesn't bother with us. */ -/*ARGSUSED*/ STATIC void -xfs_qm_qoff_logitem_unlock(xfs_qoff_logitem_t *qf) +xfs_qm_qoff_logitem_unlock( + struct xfs_log_item *lip) { - return; } /* * The quotaoff-start-item is logged only once and cannot be moved in the log, * so simply return the lsn at which it's been logged. */ -/*ARGSUSED*/ STATIC xfs_lsn_t -xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn) +xfs_qm_qoff_logitem_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) { - return (lsn); + return lsn; } /* * There isn't much you can do to push on an quotaoff item. It is simply * stuck waiting for the log to be flushed to disk. */ -/*ARGSUSED*/ STATIC void -xfs_qm_qoff_logitem_push(xfs_qoff_logitem_t *qf) +xfs_qm_qoff_logitem_push( + struct xfs_log_item *lip) { - return; } -/*ARGSUSED*/ STATIC xfs_lsn_t xfs_qm_qoffend_logitem_committed( - xfs_qoff_logitem_t *qfe, - xfs_lsn_t lsn) + struct xfs_log_item *lip, + xfs_lsn_t lsn) { - xfs_qoff_logitem_t *qfs; - struct xfs_ail *ailp; + struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip); + struct xfs_qoff_logitem *qfs = qfe->qql_start_lip; + struct xfs_ail *ailp = qfs->qql_item.li_ailp; - qfs = qfe->qql_start_lip; - ailp = qfs->qql_item.li_ailp; - spin_lock(&ailp->xa_lock); /* * Delete the qoff-start logitem from the AIL. * xfs_trans_ail_delete() drops the AIL lock. */ + spin_lock(&ailp->xa_lock); xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs); + kmem_free(qfs); kmem_free(qfe); return (xfs_lsn_t)-1; @@ -515,71 +473,52 @@ xfs_qm_qoffend_logitem_committed( * (truly makes the quotaoff irrevocable). If we do something else, * then maybe we don't need two. */ -/* ARGSUSED */ -STATIC void -xfs_qm_qoff_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn) -{ - return; -} - -/* ARGSUSED */ STATIC void -xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn) +xfs_qm_qoff_logitem_committing( + struct xfs_log_item *lip, + xfs_lsn_t commit_lsn) { - return; } static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { - .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, - .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) - xfs_qm_qoff_logitem_format, - .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, - .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin, - .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) - xfs_qm_qoff_logitem_unpin_remove, - .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, - .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock, - .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_qm_qoffend_logitem_committed, - .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push, - .iop_pushbuf = NULL, - .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_qm_qoffend_logitem_committing + .iop_size = xfs_qm_qoff_logitem_size, + .iop_format = xfs_qm_qoff_logitem_format, + .iop_pin = xfs_qm_qoff_logitem_pin, + .iop_unpin = xfs_qm_qoff_logitem_unpin, + .iop_trylock = xfs_qm_qoff_logitem_trylock, + .iop_unlock = xfs_qm_qoff_logitem_unlock, + .iop_committed = xfs_qm_qoffend_logitem_committed, + .iop_push = xfs_qm_qoff_logitem_push, + .iop_committing = xfs_qm_qoff_logitem_committing }; /* * This is the ops vector shared by all quotaoff-start log items. */ static struct xfs_item_ops xfs_qm_qoff_logitem_ops = { - .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, - .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) - xfs_qm_qoff_logitem_format, - .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, - .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin, - .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) - xfs_qm_qoff_logitem_unpin_remove, - .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, - .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock, - .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_qm_qoff_logitem_committed, - .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push, - .iop_pushbuf = NULL, - .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_qm_qoff_logitem_committing + .iop_size = xfs_qm_qoff_logitem_size, + .iop_format = xfs_qm_qoff_logitem_format, + .iop_pin = xfs_qm_qoff_logitem_pin, + .iop_unpin = xfs_qm_qoff_logitem_unpin, + .iop_trylock = xfs_qm_qoff_logitem_trylock, + .iop_unlock = xfs_qm_qoff_logitem_unlock, + .iop_committed = xfs_qm_qoff_logitem_committed, + .iop_push = xfs_qm_qoff_logitem_push, + .iop_committing = xfs_qm_qoff_logitem_committing }; /* * Allocate and initialize an quotaoff item of the correct quota type(s). */ -xfs_qoff_logitem_t * +struct xfs_qoff_logitem * xfs_qm_qoff_logitem_init( - struct xfs_mount *mp, - xfs_qoff_logitem_t *start, - uint flags) + struct xfs_mount *mp, + struct xfs_qoff_logitem *start, + uint flags) { - xfs_qoff_logitem_t *qf; + struct xfs_qoff_logitem *qf; - qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP); + qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP); xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); @@ -587,5 +526,5 @@ xfs_qm_qoff_logitem_init( qf->qql_format.qf_type = XFS_LI_QUOTAOFF; qf->qql_format.qf_flags = flags; qf->qql_start_lip = start; - return (qf); + return qf; } diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 67c018392d62..f8e854b4fde8 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -23,25 +23,18 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_itable.h" #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_bmap.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" @@ -62,8 +55,6 @@ uint ndquot; kmem_zone_t *qm_dqzone; kmem_zone_t *qm_dqtrxzone; -static cred_t xfs_zerocr; - STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); @@ -844,7 +835,7 @@ xfs_qm_dqattach_locked( xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, flags & XFS_QMOPT_DQALLOC, ip->i_udquot, &ip->i_gdquot) : - xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ, + xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, flags & XFS_QMOPT_DQALLOC, ip->i_udquot, &ip->i_gdquot); /* @@ -1206,87 +1197,6 @@ xfs_qm_list_destroy( mutex_destroy(&(list->qh_lock)); } - -/* - * Stripped down version of dqattach. This doesn't attach, or even look at the - * dquots attached to the inode. The rationale is that there won't be any - * attached at the time this is called from quotacheck. - */ -STATIC int -xfs_qm_dqget_noattach( - xfs_inode_t *ip, - xfs_dquot_t **O_udqpp, - xfs_dquot_t **O_gdqpp) -{ - int error; - xfs_mount_t *mp; - xfs_dquot_t *udqp, *gdqp; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - mp = ip->i_mount; - udqp = NULL; - gdqp = NULL; - - if (XFS_IS_UQUOTA_ON(mp)) { - ASSERT(ip->i_udquot == NULL); - /* - * We want the dquot allocated if it doesn't exist. - */ - if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER, - XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, - &udqp))) { - /* - * Shouldn't be able to turn off quotas here. - */ - ASSERT(error != ESRCH); - ASSERT(error != ENOENT); - return error; - } - ASSERT(udqp); - } - - if (XFS_IS_OQUOTA_ON(mp)) { - ASSERT(ip->i_gdquot == NULL); - if (udqp) - xfs_dqunlock(udqp); - error = XFS_IS_GQUOTA_ON(mp) ? - xfs_qm_dqget(mp, ip, - ip->i_d.di_gid, XFS_DQ_GROUP, - XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, - &gdqp) : - xfs_qm_dqget(mp, ip, - ip->i_d.di_projid, XFS_DQ_PROJ, - XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, - &gdqp); - if (error) { - if (udqp) - xfs_qm_dqrele(udqp); - ASSERT(error != ESRCH); - ASSERT(error != ENOENT); - return error; - } - ASSERT(gdqp); - - /* Reacquire the locks in the right order */ - if (udqp) { - if (! xfs_qm_dqlock_nowait(udqp)) { - xfs_dqunlock(gdqp); - xfs_dqlock(udqp); - xfs_dqlock(gdqp); - } - } - } - - *O_udqpp = udqp; - *O_gdqpp = gdqp; - -#ifdef QUOTADEBUG - if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp)); - if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp)); -#endif - return 0; -} - /* * Create an inode and return with a reference already taken, but unlocked * This is how we create quota inodes @@ -1312,8 +1222,8 @@ xfs_qm_qino_alloc( return error; } - if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, - &xfs_zerocr, 0, 1, ip, &committed))) { + error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); + if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); return error; @@ -1497,7 +1407,7 @@ xfs_qm_dqiterate( maxlblkcnt - lblkno, XFS_BMAPI_METADATA, NULL, - 0, map, &nmaps, NULL, NULL); + 0, map, &nmaps, NULL); xfs_iunlock(qip, XFS_ILOCK_SHARED); if (error) break; @@ -1523,7 +1433,7 @@ xfs_qm_dqiterate( rablkcnt = map[i+1].br_blockcount; rablkno = map[i+1].br_startblock; while (rablkcnt--) { - xfs_baread(mp->m_ddev_targp, + xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, rablkno), mp->m_quotainfo->qi_dqchunklen); rablkno++; @@ -1553,18 +1463,34 @@ xfs_qm_dqiterate( /* * Called by dqusage_adjust in doing a quotacheck. - * Given the inode, and a dquot (either USR or GRP, doesn't matter), - * this updates its incore copy as well as the buffer copy. This is - * so that once the quotacheck is done, we can just log all the buffers, - * as opposed to logging numerous updates to individual dquots. + * + * Given the inode, and a dquot id this updates both the incore dqout as well + * as the buffer copy. This is so that once the quotacheck is done, we can + * just log all the buffers, as opposed to logging numerous updates to + * individual dquots. */ -STATIC void +STATIC int xfs_qm_quotacheck_dqadjust( - xfs_dquot_t *dqp, + struct xfs_inode *ip, + xfs_dqid_t id, + uint type, xfs_qcnt_t nblks, xfs_qcnt_t rtblks) { - ASSERT(XFS_DQ_IS_LOCKED(dqp)); + struct xfs_mount *mp = ip->i_mount; + struct xfs_dquot *dqp; + int error; + + error = xfs_qm_dqget(mp, ip, id, type, + XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); + if (error) { + /* + * Shouldn't be able to turn off quotas here. + */ + ASSERT(error != ESRCH); + ASSERT(error != ENOENT); + return error; + } trace_xfs_dqadjust(dqp); @@ -1589,11 +1515,13 @@ xfs_qm_quotacheck_dqadjust( * There are no timers for the default values set in the root dquot. */ if (dqp->q_core.d_id) { - xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core); - xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core); + xfs_qm_adjust_dqlimits(mp, &dqp->q_core); + xfs_qm_adjust_dqtimers(mp, &dqp->q_core); } dqp->dq_flags |= XFS_DQ_DIRTY; + xfs_qm_dqput(dqp); + return 0; } STATIC int @@ -1636,8 +1564,7 @@ xfs_qm_dqusage_adjust( int *res) /* result code value */ { xfs_inode_t *ip; - xfs_dquot_t *udqp, *gdqp; - xfs_qcnt_t nblks, rtblks; + xfs_qcnt_t nblks, rtblks = 0; int error; ASSERT(XFS_IS_QUOTA_RUNNING(mp)); @@ -1657,49 +1584,24 @@ xfs_qm_dqusage_adjust( * the case in all other instances. It's OK that we do this because * quotacheck is done only at mount time. */ - if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) { + error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); + if (error) { *res = BULKSTAT_RV_NOTHING; return error; } - /* - * Obtain the locked dquots. In case of an error (eg. allocation - * fails for ENOSPC), we return the negative of the error number - * to bulkstat, so that it can get propagated to quotacheck() and - * making us disable quotas for the file system. - */ - if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) { - xfs_iput(ip, XFS_ILOCK_EXCL); - *res = BULKSTAT_RV_GIVEUP; - return error; - } + ASSERT(ip->i_delayed_blks == 0); - rtblks = 0; - if (! XFS_IS_REALTIME_INODE(ip)) { - nblks = (xfs_qcnt_t)ip->i_d.di_nblocks; - } else { + if (XFS_IS_REALTIME_INODE(ip)) { /* * Walk thru the extent list and count the realtime blocks. */ - if ((error = xfs_qm_get_rtblks(ip, &rtblks))) { - xfs_iput(ip, XFS_ILOCK_EXCL); - if (udqp) - xfs_qm_dqput(udqp); - if (gdqp) - xfs_qm_dqput(gdqp); - *res = BULKSTAT_RV_GIVEUP; - return error; - } - nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; + error = xfs_qm_get_rtblks(ip, &rtblks); + if (error) + goto error0; } - ASSERT(ip->i_delayed_blks == 0); - /* - * We can't release the inode while holding its dquot locks. - * The inode can go into inactive and might try to acquire the dquotlocks. - * So, just unlock here and do a vn_rele at the end. - */ - xfs_iunlock(ip, XFS_ILOCK_EXCL); + nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; /* * Add the (disk blocks and inode) resources occupied by this @@ -1714,26 +1616,36 @@ xfs_qm_dqusage_adjust( * and quotaoffs don't race. (Quotachecks happen at mount time only). */ if (XFS_IS_UQUOTA_ON(mp)) { - ASSERT(udqp); - xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks); - xfs_qm_dqput(udqp); + error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid, + XFS_DQ_USER, nblks, rtblks); + if (error) + goto error0; } - if (XFS_IS_OQUOTA_ON(mp)) { - ASSERT(gdqp); - xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks); - xfs_qm_dqput(gdqp); + + if (XFS_IS_GQUOTA_ON(mp)) { + error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid, + XFS_DQ_GROUP, nblks, rtblks); + if (error) + goto error0; } - /* - * Now release the inode. This will send it to 'inactive', and - * possibly even free blocks. - */ - IRELE(ip); - /* - * Goto next inode. - */ + if (XFS_IS_PQUOTA_ON(mp)) { + error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip), + XFS_DQ_PROJ, nblks, rtblks); + if (error) + goto error0; + } + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + IRELE(ip); *res = BULKSTAT_RV_DIDONE; return 0; + +error0: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + IRELE(ip); + *res = BULKSTAT_RV_GIVEUP; + return error; } /* @@ -2229,7 +2141,7 @@ xfs_qm_write_sb_changes( /* - * Given an inode, a uid and gid (from cred_t) make sure that we have + * Given an inode, a uid, gid and prid make sure that we have * allocated relevant dquot(s) on disk, and that we won't exceed inode * quotas by creating this file. * This also attaches dquot(s) to the given inode after locking it, @@ -2337,7 +2249,7 @@ xfs_qm_vop_dqalloc( xfs_dqunlock(gq); } } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { - if (ip->i_d.di_projid != prid) { + if (xfs_get_projid(ip) != prid) { xfs_iunlock(ip, lockflags); if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, XFS_DQ_PROJ, @@ -2459,7 +2371,7 @@ xfs_qm_vop_chown_reserve( } if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { if (XFS_IS_PQUOTA_ON(ip->i_mount) && - ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id)) + xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) prjflags = XFS_QMOPT_ENOSPC; if (prjflags || @@ -2563,7 +2475,7 @@ xfs_qm_vop_create_dqattach( ip->i_gdquot = gdqp; ASSERT(XFS_IS_OQUOTA_ON(mp)); ASSERT((XFS_IS_GQUOTA_ON(mp) ? - ip->i_d.di_gid : ip->i_d.di_projid) == + ip->i_d.di_gid : xfs_get_projid(ip)) == be32_to_cpu(gdqp->q_core.d_id)); xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); } diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index 97b410c12794..45b5cb1788ab 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -23,25 +23,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" #include "xfs_itable.h" -#include "xfs_btree.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_qm.h" @@ -91,7 +81,7 @@ xfs_qm_statvfs( xfs_mount_t *mp = ip->i_mount; xfs_dquot_t *dqp; - if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) { + if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); xfs_qm_dqput(dqp); } diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c index 3d1fc79532e2..8671a0b32644 100644 --- a/fs/xfs/quota/xfs_qm_stats.c +++ b/fs/xfs/quota/xfs_qm_stats.c @@ -23,25 +23,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" #include "xfs_itable.h" #include "xfs_bmap.h" -#include "xfs_btree.h" #include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_qm.h" diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index b4487764e923..bdebc183223e 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -26,25 +26,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" #include "xfs_itable.h" #include "xfs_bmap.h" -#include "xfs_btree.h" #include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_utils.h" @@ -248,40 +238,74 @@ out_unlock: return error; } +STATIC int +xfs_qm_scall_trunc_qfile( + struct xfs_mount *mp, + xfs_ino_t ino) +{ + struct xfs_inode *ip; + struct xfs_trans *tp; + int error; + + if (ino == NULLFSINO) + return 0; + + error = xfs_iget(mp, NULL, ino, 0, 0, &ip); + if (error) + return error; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + + tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); + error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_ITRUNCATE_LOG_COUNT); + if (error) { + xfs_trans_cancel(tp, 0); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + goto out_put; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip); + + error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1); + if (error) { + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | + XFS_TRANS_ABORT); + goto out_unlock; + } + + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); +out_put: + IRELE(ip); + return error; +} + int xfs_qm_scall_trunc_qfiles( xfs_mount_t *mp, uint flags) { int error = 0, error2 = 0; - xfs_inode_t *qip; if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); return XFS_ERROR(EINVAL); } - if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { - error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip); - if (!error) { - error = xfs_truncate_file(mp, qip); - IRELE(qip); - } - } - - if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && - mp->m_sb.sb_gquotino != NULLFSINO) { - error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip); - if (!error2) { - error2 = xfs_truncate_file(mp, qip); - IRELE(qip); - } - } + if (flags & XFS_DQ_USER) + error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino); + if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) + error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); return error ? error : error2; } - /* * Switch on (a given) quota enforcement for a filesystem. This takes * effect immediately. @@ -786,9 +810,9 @@ xfs_qm_export_dquot( } #ifdef DEBUG - if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == XFS_USER_QUOTA) || + if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || (XFS_IS_OQUOTA_ENFORCED(mp) && - (dst->d_flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)))) && + (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && dst->d_id != 0) { if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && (dst->d_blk_softlimit > 0)) { @@ -809,17 +833,17 @@ xfs_qm_export_qtype_flags( /* * Can't be more than one, or none. */ - ASSERT((flags & (XFS_PROJ_QUOTA | XFS_USER_QUOTA)) != - (XFS_PROJ_QUOTA | XFS_USER_QUOTA)); - ASSERT((flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)) != - (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)); - ASSERT((flags & (XFS_USER_QUOTA | XFS_GROUP_QUOTA)) != - (XFS_USER_QUOTA | XFS_GROUP_QUOTA)); - ASSERT((flags & (XFS_PROJ_QUOTA|XFS_USER_QUOTA|XFS_GROUP_QUOTA)) != 0); + ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) != + (FS_PROJ_QUOTA | FS_USER_QUOTA)); + ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) != + (FS_PROJ_QUOTA | FS_GROUP_QUOTA)); + ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) != + (FS_USER_QUOTA | FS_GROUP_QUOTA)); + ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0); return (flags & XFS_DQ_USER) ? - XFS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? - XFS_PROJ_QUOTA : XFS_GROUP_QUOTA; + FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? + FS_PROJ_QUOTA : FS_GROUP_QUOTA; } STATIC uint @@ -830,16 +854,16 @@ xfs_qm_export_flags( uflags = 0; if (flags & XFS_UQUOTA_ACCT) - uflags |= XFS_QUOTA_UDQ_ACCT; + uflags |= FS_QUOTA_UDQ_ACCT; if (flags & XFS_PQUOTA_ACCT) - uflags |= XFS_QUOTA_PDQ_ACCT; + uflags |= FS_QUOTA_PDQ_ACCT; if (flags & XFS_GQUOTA_ACCT) - uflags |= XFS_QUOTA_GDQ_ACCT; + uflags |= FS_QUOTA_GDQ_ACCT; if (flags & XFS_UQUOTA_ENFD) - uflags |= XFS_QUOTA_UDQ_ENFD; + uflags |= FS_QUOTA_UDQ_ENFD; if (flags & (XFS_OQUOTA_ENFD)) { uflags |= (flags & XFS_GQUOTA_ACCT) ? - XFS_QUOTA_GDQ_ENFD : XFS_QUOTA_PDQ_ENFD; + FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD; } return (uflags); } @@ -851,21 +875,14 @@ xfs_dqrele_inode( struct xfs_perag *pag, int flags) { - int error; - /* skip quota inodes */ if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || ip == ip->i_mount->m_quotainfo->qi_gquotaip) { ASSERT(ip->i_udquot == NULL); ASSERT(ip->i_gdquot == NULL); - read_unlock(&pag->pag_ici_lock); return 0; } - error = xfs_sync_inode_valid(ip, pag); - if (error) - return error; - xfs_ilock(ip, XFS_ILOCK_EXCL); if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { xfs_qm_dqrele(ip->i_udquot); @@ -875,8 +892,7 @@ xfs_dqrele_inode( xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } - xfs_iput(ip, XFS_ILOCK_EXCL); - + xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; } @@ -893,8 +909,7 @@ xfs_qm_dqrele_all_inodes( uint flags) { ASSERT(mp->m_quotainfo); - xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, - XFS_ICI_NO_TAG, 0, NULL); + xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); } /*------------------------------------------------------------------------*/ @@ -1143,13 +1158,14 @@ xfs_qm_internalqcheck_adjust( * of those now. */ if (! ipreleased) { - xfs_iput(ip, lock_flags); + xfs_iunlock(ip, lock_flags); + IRELE(ip); ipreleased = B_TRUE; goto again; } xfs_qm_internalqcheck_get_dquots(mp, (xfs_dqid_t) ip->i_d.di_uid, - (xfs_dqid_t) ip->i_d.di_projid, + (xfs_dqid_t) xfs_get_projid(ip), (xfs_dqid_t) ip->i_d.di_gid, &ud, &gd); if (XFS_IS_UQUOTA_ON(mp)) { @@ -1160,7 +1176,8 @@ xfs_qm_internalqcheck_adjust( ASSERT(gd); xfs_qm_internalqcheck_dqadjust(ip, gd); } - xfs_iput(ip, lock_flags); + xfs_iunlock(ip, lock_flags); + IRELE(ip); *res = BULKSTAT_RV_DIDONE; return (0); } diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 061d827da33c..7de91d1b75c0 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -23,25 +23,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_dir2_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" #include "xfs_itable.h" -#include "xfs_btree.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" @@ -59,16 +49,14 @@ xfs_trans_dqjoin( xfs_trans_t *tp, xfs_dquot_t *dqp) { - xfs_dq_logitem_t *lp = &dqp->q_logitem; - ASSERT(dqp->q_transp != tp); ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(lp->qli_dquot == dqp); + ASSERT(dqp->q_logitem.qli_dquot == dqp); /* * Get a log_item_desc to point at the new item. */ - (void) xfs_trans_add_item(tp, (xfs_log_item_t*)(lp)); + xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); /* * Initialize i_transp so we can later determine if this dquot is @@ -93,16 +81,11 @@ xfs_trans_log_dquot( xfs_trans_t *tp, xfs_dquot_t *dqp) { - xfs_log_item_desc_t *lidp; - ASSERT(dqp->q_transp == tp); ASSERT(XFS_DQ_IS_LOCKED(dqp)); - lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem)); - ASSERT(lidp != NULL); - tp->t_flags |= XFS_TRANS_DIRTY; - lidp->lid_flags |= XFS_LID_DIRTY; + dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY; } /* @@ -874,9 +857,8 @@ xfs_trans_get_qoff_item( /* * Get a log_item_desc to point at the new item. */ - (void) xfs_trans_add_item(tp, (xfs_log_item_t*)q); - - return (q); + xfs_trans_add_item(tp, &q->qql_item); + return q; } @@ -890,13 +872,8 @@ xfs_trans_log_quotaoff_item( xfs_trans_t *tp, xfs_qoff_logitem_t *qlp) { - xfs_log_item_desc_t *lidp; - - lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)qlp); - ASSERT(lidp != NULL); - tp->t_flags |= XFS_TRANS_DIRTY; - lidp->lid_flags |= XFS_LID_DIRTY; + qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY; } STATIC void diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 3f3610a7ee05..975aa10e1a47 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -22,7 +22,6 @@ #include "xfs_sb.h" #include "xfs_inum.h" #include "xfs_ag.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_error.h" diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 4917d4eed4ed..63c7a1a6c022 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -230,6 +230,15 @@ typedef struct xfs_perag { rwlock_t pag_ici_lock; /* incore inode lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ int pag_ici_reclaimable; /* reclaimable inodes */ + struct mutex pag_ici_reclaim_lock; /* serialisation point */ + unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ + + /* buffer cache index */ + spinlock_t pag_buf_lock; /* lock for pag_buf_tree */ + struct rb_root pag_buf_tree; /* ordered tree of active buffers */ + + /* for rcu-safe freeing */ + struct rcu_head rcu_head; #endif int pagb_count; /* pagb slots in use */ } xfs_perag_t; diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index a7fbe8a99b12..112abc439ca5 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -24,18 +24,13 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -680,7 +675,7 @@ xfs_alloc_ag_vextent_near( xfs_agblock_t gtbnoa; /* aligned ... */ xfs_extlen_t gtdiff; /* difference to right side entry */ xfs_extlen_t gtlen; /* length of right side entry */ - xfs_extlen_t gtlena; /* aligned ... */ + xfs_extlen_t gtlena = 0; /* aligned ... */ xfs_agblock_t gtnew; /* useful start bno of right side */ int error; /* error code */ int i; /* result code, temporary */ @@ -688,10 +683,8 @@ xfs_alloc_ag_vextent_near( xfs_agblock_t ltbno; /* start bno of left side entry */ xfs_agblock_t ltbnoa; /* aligned ... */ xfs_extlen_t ltdiff; /* difference to left side entry */ - /*REFERENCED*/ - xfs_agblock_t ltend; /* end bno of left side entry */ xfs_extlen_t ltlen; /* length of left side entry */ - xfs_extlen_t ltlena; /* aligned ... */ + xfs_extlen_t ltlena = 0; /* aligned ... */ xfs_agblock_t ltnew; /* useful start bno of left side */ xfs_extlen_t rlen; /* length of returned extent */ #if defined(DEBUG) && defined(__KERNEL__) @@ -814,8 +807,7 @@ xfs_alloc_ag_vextent_near( if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - ltend = ltbno + ltlen; - ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); + ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->len = blen; if (!xfs_alloc_fix_minleft(args)) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); @@ -828,7 +820,7 @@ xfs_alloc_ag_vextent_near( */ args->agbno = bnew; ASSERT(bnew >= ltbno); - ASSERT(bnew + blen <= ltend); + ASSERT(bnew + blen <= ltbno + ltlen); /* * Set up a cursor for the by-bno tree. */ @@ -1157,7 +1149,6 @@ xfs_alloc_ag_vextent_near( /* * Fix up the length and compute the useful address. */ - ltend = ltbno + ltlen; args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); if (!xfs_alloc_fix_minleft(args)) { @@ -1170,7 +1161,7 @@ xfs_alloc_ag_vextent_near( (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, ltlen, <new); ASSERT(ltnew >= ltbno); - ASSERT(ltnew + rlen <= ltend); + ASSERT(ltnew + rlen <= ltbno + ltlen); ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->agbno = ltnew; if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 6d05199b667c..895009a97271 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -27,16 +27,16 @@ struct xfs_busy_extent; /* * Freespace allocation types. Argument to xfs_alloc_[v]extent. */ -typedef enum xfs_alloctype -{ - XFS_ALLOCTYPE_ANY_AG, /* allocate anywhere, use rotor */ - XFS_ALLOCTYPE_FIRST_AG, /* ... start at ag 0 */ - XFS_ALLOCTYPE_START_AG, /* anywhere, start in this a.g. */ - XFS_ALLOCTYPE_THIS_AG, /* anywhere in this a.g. */ - XFS_ALLOCTYPE_START_BNO, /* near this block else anywhere */ - XFS_ALLOCTYPE_NEAR_BNO, /* in this a.g. and near this block */ - XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */ -} xfs_alloctype_t; +#define XFS_ALLOCTYPE_ANY_AG 0x01 /* allocate anywhere, use rotor */ +#define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */ +#define XFS_ALLOCTYPE_START_AG 0x04 /* anywhere, start in this a.g. */ +#define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */ +#define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */ +#define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */ +#define XFS_ALLOCTYPE_THIS_BNO 0x40 /* at exactly this block */ + +/* this should become an enum again when the tracing code is fixed */ +typedef unsigned int xfs_alloctype_t; #define XFS_ALLOC_TYPES \ { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \ diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 83f494218759..3916925e2584 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -24,19 +24,14 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_btree_trace.h" -#include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -285,38 +280,6 @@ xfs_allocbt_key_diff( return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; } -STATIC int -xfs_allocbt_kill_root( - struct xfs_btree_cur *cur, - struct xfs_buf *bp, - int level, - union xfs_btree_ptr *newroot) -{ - int error; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_STATS_INC(cur, killroot); - - /* - * Update the root pointer, decreasing the level by 1 and then - * free the old root. - */ - xfs_allocbt_set_root(cur, newroot, -1); - error = xfs_allocbt_free_block(cur, bp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; - } - - XFS_BTREE_STATS_INC(cur, free); - - xfs_btree_setbuf(cur, level, NULL); - cur->bc_nlevels--; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - return 0; -} - #ifdef DEBUG STATIC int xfs_allocbt_keys_inorder( @@ -428,7 +391,6 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .dup_cursor = xfs_allocbt_dup_cursor, .set_root = xfs_allocbt_set_root, - .kill_root = xfs_allocbt_kill_root, .alloc_block = xfs_allocbt_alloc_block, .free_block = xfs_allocbt_free_block, .update_lastrec = xfs_allocbt_update_lastrec, diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index b9c196a53c42..c86375378810 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -25,19 +25,13 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_alloc.h" -#include "xfs_btree.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_attr.h" @@ -325,8 +319,7 @@ xfs_attr_set_int( return (error); } - xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args.trans, dp); + xfs_trans_ijoin(args.trans, dp); /* * If the attribute list is non-existent or a shortform list, @@ -362,16 +355,15 @@ xfs_attr_set_int( if (mp->m_flags & XFS_MOUNT_WSYNC) { xfs_trans_set_sync(args.trans); } + + if (!error && (flags & ATTR_KERNOTIME) == 0) { + xfs_trans_ichgtime(args.trans, dp, + XFS_ICHGTIME_CHG); + } err2 = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - /* - * Hit the inode change time. - */ - if (!error && (flags & ATTR_KERNOTIME) == 0) { - xfs_ichgtime(dp, XFS_ICHGTIME_CHG); - } return(error == 0 ? err2 : error); } @@ -396,10 +388,8 @@ xfs_attr_set_int( * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args.trans, dp); - } + if (committed) + xfs_trans_ijoin(args.trans, dp); /* * Commit the leaf transformation. We'll need another (linked) @@ -429,6 +419,9 @@ xfs_attr_set_int( xfs_trans_set_sync(args.trans); } + if ((flags & ATTR_KERNOTIME) == 0) + xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); + /* * Commit the last in the sequence of transactions. */ @@ -436,13 +429,6 @@ xfs_attr_set_int( error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - /* - * Hit the inode change time. - */ - if (!error && (flags & ATTR_KERNOTIME) == 0) { - xfs_ichgtime(dp, XFS_ICHGTIME_CHG); - } - return(error); out: @@ -544,8 +530,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) * No need to make quota reservations here. We expect to release some * blocks not allocate in the common case. */ - xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args.trans, dp); + xfs_trans_ijoin(args.trans, dp); /* * Decide on what work routines to call based on the inode size. @@ -577,6 +562,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) xfs_trans_set_sync(args.trans); } + if ((flags & ATTR_KERNOTIME) == 0) + xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); + /* * Commit the last in the sequence of transactions. */ @@ -584,13 +572,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - /* - * Hit the inode change time. - */ - if (!error && (flags & ATTR_KERNOTIME) == 0) { - xfs_ichgtime(dp, XFS_ICHGTIME_CHG); - } - return(error); out: @@ -821,8 +802,7 @@ xfs_attr_inactive(xfs_inode_t *dp) * No need to make quota reservations here. We expect to release some * blocks, not allocate, in the common case. */ - xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(trans, dp); + xfs_trans_ijoin(trans, dp); /* * Decide on what work routines to call based on the inode size. @@ -981,10 +961,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp); /* * Commit the current trans (including the inode) and start @@ -1085,10 +1063,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * and started a new one. We need the inode to be * in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp); } else xfs_da_buf_done(bp); @@ -1161,10 +1137,8 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp); } else xfs_da_buf_done(bp); return(0); @@ -1317,10 +1291,8 @@ restart: * and started a new one. We need the inode to be * in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp); /* * Commit the node conversion and start the next @@ -1356,10 +1328,8 @@ restart: * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp); } else { /* * Addition succeeded, update Btree hashvals. @@ -1470,10 +1440,8 @@ restart: * and started a new one. We need the inode to be * in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp); } /* @@ -1604,10 +1572,8 @@ xfs_attr_node_removename(xfs_da_args_t *args) * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp); /* * Commit the Btree join operation and start a new trans. @@ -1658,10 +1624,8 @@ xfs_attr_node_removename(xfs_da_args_t *args) * and started a new one. We need the inode to be * in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp); } else xfs_da_brelse(args->trans, bp); } @@ -2004,7 +1968,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno, args->rmtblkcnt, XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - NULL, 0, map, &nmap, NULL, NULL); + NULL, 0, map, &nmap, NULL); if (error) return(error); ASSERT(nmap >= 1); @@ -2022,7 +1986,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : XFS_BUF_SIZE(bp); - xfs_biomove(bp, 0, tmp, dst, XBF_READ); + xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ); xfs_buf_relse(bp); dst += tmp; valuelen -= tmp; @@ -2083,7 +2047,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, args->firstblock, args->total, &map, &nmap, - args->flist, NULL); + args->flist); if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, &committed); @@ -2099,10 +2063,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp); ASSERT(nmap == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && @@ -2136,7 +2098,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) args->rmtblkcnt, XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, args->firstblock, 0, &map, &nmap, - NULL, NULL); + NULL); if (error) { return(error); } @@ -2154,9 +2116,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : XFS_BUF_SIZE(bp); - xfs_biomove(bp, 0, tmp, src, XBF_WRITE); + xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE); if (tmp < XFS_BUF_SIZE(bp)) - xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); + xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ return (error); } @@ -2201,7 +2163,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) args->rmtblkcnt, XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, args->firstblock, 0, &map, &nmap, - args->flist, NULL); + args->flist); if (error) { return(error); } @@ -2239,7 +2201,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 1, args->firstblock, args->flist, - NULL, &done); + &done); if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, &committed); @@ -2255,10 +2217,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, args->dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, args->dp); - } + if (committed) + xfs_trans_ijoin(args->trans, args->dp); /* * Close out trans and start the next one in the chain. diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index a90ce74fc256..a6cff8edcdb6 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -24,8 +24,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" @@ -33,7 +31,6 @@ #include "xfs_ialloc_btree.h" #include "xfs_alloc.h" #include "xfs_btree.h" -#include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" @@ -2931,7 +2928,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, nmap = 1; error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt, XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - NULL, 0, &map, &nmap, NULL, NULL); + NULL, 0, &map, &nmap, NULL); if (error) { return(error); } diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 99587ded043f..8abd12e32e13 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -30,13 +30,10 @@ #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" #include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" -#include "xfs_ialloc.h" #include "xfs_itable.h" #include "xfs_dir2_data.h" #include "xfs_dir2_leaf.h" @@ -104,7 +101,6 @@ xfs_bmap_add_extent( xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ int *logflagsp, /* inode logging flags */ - xfs_extdelta_t *delta, /* Change made to incore extents */ int whichfork, /* data or attr fork */ int rsvd); /* OK to allocate reserved blocks */ @@ -122,7 +118,6 @@ xfs_bmap_add_extent_delay_real( xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ int *logflagsp, /* inode logging flags */ - xfs_extdelta_t *delta, /* Change made to incore extents */ int rsvd); /* OK to allocate reserved blocks */ /* @@ -135,7 +130,6 @@ xfs_bmap_add_extent_hole_delay( xfs_extnum_t idx, /* extent number to update/insert */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp,/* inode logging flags */ - xfs_extdelta_t *delta, /* Change made to incore extents */ int rsvd); /* OK to allocate reserved blocks */ /* @@ -149,7 +143,6 @@ xfs_bmap_add_extent_hole_real( xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp, /* inode logging flags */ - xfs_extdelta_t *delta, /* Change made to incore extents */ int whichfork); /* data or attr fork */ /* @@ -162,8 +155,7 @@ xfs_bmap_add_extent_unwritten_real( xfs_extnum_t idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp, /* inode logging flags */ - xfs_extdelta_t *delta); /* Change made to incore extents */ + int *logflagsp); /* inode logging flags */ /* * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. @@ -200,7 +192,6 @@ xfs_bmap_del_extent( xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp,/* inode logging flags */ - xfs_extdelta_t *delta, /* Change made to incore extents */ int whichfork, /* data or attr fork */ int rsvd); /* OK to allocate reserved blocks */ @@ -489,7 +480,6 @@ xfs_bmap_add_extent( xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ int *logflagsp, /* inode logging flags */ - xfs_extdelta_t *delta, /* Change made to incore extents */ int whichfork, /* data or attr fork */ int rsvd) /* OK to use reserved data blocks */ { @@ -524,15 +514,6 @@ xfs_bmap_add_extent( logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); } else logflags = 0; - /* DELTA: single new extent */ - if (delta) { - if (delta->xed_startoff > new->br_startoff) - delta->xed_startoff = new->br_startoff; - if (delta->xed_blockcount < - new->br_startoff + new->br_blockcount) - delta->xed_blockcount = new->br_startoff + - new->br_blockcount; - } } /* * Any kind of new delayed allocation goes here. @@ -542,7 +523,7 @@ xfs_bmap_add_extent( ASSERT((cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL) == 0); if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new, - &logflags, delta, rsvd))) + &logflags, rsvd))) goto done; } /* @@ -553,7 +534,7 @@ xfs_bmap_add_extent( ASSERT((cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL) == 0); if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, - &logflags, delta, whichfork))) + &logflags, whichfork))) goto done; } else { xfs_bmbt_irec_t prev; /* old extent at offset idx */ @@ -578,17 +559,17 @@ xfs_bmap_add_extent( XFS_BTCUR_BPRV_WASDEL); if ((error = xfs_bmap_add_extent_delay_real(ip, idx, &cur, new, &da_new, first, flist, - &logflags, delta, rsvd))) + &logflags, rsvd))) goto done; } else if (new->br_state == XFS_EXT_NORM) { ASSERT(new->br_state == XFS_EXT_NORM); if ((error = xfs_bmap_add_extent_unwritten_real( - ip, idx, &cur, new, &logflags, delta))) + ip, idx, &cur, new, &logflags))) goto done; } else { ASSERT(new->br_state == XFS_EXT_UNWRITTEN); if ((error = xfs_bmap_add_extent_unwritten_real( - ip, idx, &cur, new, &logflags, delta))) + ip, idx, &cur, new, &logflags))) goto done; } ASSERT(*curp == cur || *curp == NULL); @@ -601,7 +582,7 @@ xfs_bmap_add_extent( ASSERT((cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL) == 0); if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, - new, &logflags, delta, whichfork))) + new, &logflags, whichfork))) goto done; } } @@ -633,7 +614,7 @@ xfs_bmap_add_extent( nblks += cur->bc_private.b.allocated; ASSERT(nblks <= da_old); if (nblks < da_old) - xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, + xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, (int64_t)(da_old - nblks), rsvd); } /* @@ -666,7 +647,6 @@ xfs_bmap_add_extent_delay_real( xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ int *logflagsp, /* inode logging flags */ - xfs_extdelta_t *delta, /* Change made to incore extents */ int rsvd) /* OK to use reserved data block allocation */ { xfs_btree_cur_t *cur; /* btree cursor */ @@ -797,11 +777,6 @@ xfs_bmap_add_extent_delay_real( goto done; } *dnew = 0; - /* DELTA: Three in-core extents are replaced by one. */ - temp = LEFT.br_startoff; - temp2 = LEFT.br_blockcount + - PREV.br_blockcount + - RIGHT.br_blockcount; break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: @@ -832,10 +807,6 @@ xfs_bmap_add_extent_delay_real( goto done; } *dnew = 0; - /* DELTA: Two in-core extents are replaced by one. */ - temp = LEFT.br_startoff; - temp2 = LEFT.br_blockcount + - PREV.br_blockcount; break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: @@ -867,10 +838,6 @@ xfs_bmap_add_extent_delay_real( goto done; } *dnew = 0; - /* DELTA: Two in-core extents are replaced by one. */ - temp = PREV.br_startoff; - temp2 = PREV.br_blockcount + - RIGHT.br_blockcount; break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: @@ -900,9 +867,6 @@ xfs_bmap_add_extent_delay_real( XFS_WANT_CORRUPTED_GOTO(i == 1, done); } *dnew = 0; - /* DELTA: The in-core extent described by new changed type. */ - temp = new->br_startoff; - temp2 = new->br_blockcount; break; case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: @@ -942,10 +906,6 @@ xfs_bmap_add_extent_delay_real( xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); *dnew = temp; - /* DELTA: The boundary between two in-core extents moved. */ - temp = LEFT.br_startoff; - temp2 = LEFT.br_blockcount + - PREV.br_blockcount; break; case BMAP_LEFT_FILLING: @@ -990,9 +950,6 @@ xfs_bmap_add_extent_delay_real( xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); *dnew = temp; - /* DELTA: One in-core extent is split in two. */ - temp = PREV.br_startoff; - temp2 = PREV.br_blockcount; break; case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: @@ -1031,10 +988,6 @@ xfs_bmap_add_extent_delay_real( xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); *dnew = temp; - /* DELTA: The boundary between two in-core extents moved. */ - temp = PREV.br_startoff; - temp2 = PREV.br_blockcount + - RIGHT.br_blockcount; break; case BMAP_RIGHT_FILLING: @@ -1078,9 +1031,6 @@ xfs_bmap_add_extent_delay_real( xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); *dnew = temp; - /* DELTA: One in-core extent is split in two. */ - temp = PREV.br_startoff; - temp2 = PREV.br_blockcount; break; case 0: @@ -1129,7 +1079,8 @@ xfs_bmap_add_extent_delay_real( diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - (cur ? cur->bc_private.b.allocated : 0)); if (diff > 0 && - xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) { + xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, + -((int64_t)diff), rsvd)) { /* * Ick gross gag me with a spoon. */ @@ -1139,16 +1090,18 @@ xfs_bmap_add_extent_delay_real( temp--; diff--; if (!diff || - !xfs_mod_incore_sb(ip->i_mount, - XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) + !xfs_icsb_modify_counters(ip->i_mount, + XFS_SBS_FDBLOCKS, + -((int64_t)diff), rsvd)) break; } if (temp2) { temp2--; diff--; if (!diff || - !xfs_mod_incore_sb(ip->i_mount, - XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) + !xfs_icsb_modify_counters(ip->i_mount, + XFS_SBS_FDBLOCKS, + -((int64_t)diff), rsvd)) break; } } @@ -1161,9 +1114,6 @@ xfs_bmap_add_extent_delay_real( nullstartblock((int)temp2)); trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_); *dnew = temp + temp2; - /* DELTA: One in-core extent is split in three. */ - temp = PREV.br_startoff; - temp2 = PREV.br_blockcount; break; case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: @@ -1179,13 +1129,6 @@ xfs_bmap_add_extent_delay_real( ASSERT(0); } *curp = cur; - if (delta) { - temp2 += temp; - if (delta->xed_startoff > temp) - delta->xed_startoff = temp; - if (delta->xed_blockcount < temp2) - delta->xed_blockcount = temp2; - } done: *logflagsp = rval; return error; @@ -1204,8 +1147,7 @@ xfs_bmap_add_extent_unwritten_real( xfs_extnum_t idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp, /* inode logging flags */ - xfs_extdelta_t *delta) /* Change made to incore extents */ + int *logflagsp) /* inode logging flags */ { xfs_btree_cur_t *cur; /* btree cursor */ xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ @@ -1219,8 +1161,6 @@ xfs_bmap_add_extent_unwritten_real( /* left is 0, right is 1, prev is 2 */ int rval=0; /* return value (logging flags) */ int state = 0;/* state bits, accessed thru macros */ - xfs_filblks_t temp=0; - xfs_filblks_t temp2=0; #define LEFT r[0] #define RIGHT r[1] @@ -1341,11 +1281,6 @@ xfs_bmap_add_extent_unwritten_real( RIGHT.br_blockcount, LEFT.br_state))) goto done; } - /* DELTA: Three in-core extents are replaced by one. */ - temp = LEFT.br_startoff; - temp2 = LEFT.br_blockcount + - PREV.br_blockcount + - RIGHT.br_blockcount; break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: @@ -1382,10 +1317,6 @@ xfs_bmap_add_extent_unwritten_real( LEFT.br_state))) goto done; } - /* DELTA: Two in-core extents are replaced by one. */ - temp = LEFT.br_startoff; - temp2 = LEFT.br_blockcount + - PREV.br_blockcount; break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: @@ -1422,10 +1353,6 @@ xfs_bmap_add_extent_unwritten_real( newext))) goto done; } - /* DELTA: Two in-core extents are replaced by one. */ - temp = PREV.br_startoff; - temp2 = PREV.br_blockcount + - RIGHT.br_blockcount; break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: @@ -1453,9 +1380,6 @@ xfs_bmap_add_extent_unwritten_real( newext))) goto done; } - /* DELTA: The in-core extent described by new changed type. */ - temp = new->br_startoff; - temp2 = new->br_blockcount; break; case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: @@ -1501,10 +1425,6 @@ xfs_bmap_add_extent_unwritten_real( LEFT.br_state)) goto done; } - /* DELTA: The boundary between two in-core extents moved. */ - temp = LEFT.br_startoff; - temp2 = LEFT.br_blockcount + - PREV.br_blockcount; break; case BMAP_LEFT_FILLING: @@ -1544,9 +1464,6 @@ xfs_bmap_add_extent_unwritten_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } - /* DELTA: One in-core extent is split in two. */ - temp = PREV.br_startoff; - temp2 = PREV.br_blockcount; break; case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: @@ -1587,10 +1504,6 @@ xfs_bmap_add_extent_unwritten_real( newext))) goto done; } - /* DELTA: The boundary between two in-core extents moved. */ - temp = PREV.br_startoff; - temp2 = PREV.br_blockcount + - RIGHT.br_blockcount; break; case BMAP_RIGHT_FILLING: @@ -1630,9 +1543,6 @@ xfs_bmap_add_extent_unwritten_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } - /* DELTA: One in-core extent is split in two. */ - temp = PREV.br_startoff; - temp2 = PREV.br_blockcount; break; case 0: @@ -1692,9 +1602,6 @@ xfs_bmap_add_extent_unwritten_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } - /* DELTA: One in-core extent is split in three. */ - temp = PREV.br_startoff; - temp2 = PREV.br_blockcount; break; case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: @@ -1710,13 +1617,6 @@ xfs_bmap_add_extent_unwritten_real( ASSERT(0); } *curp = cur; - if (delta) { - temp2 += temp; - if (delta->xed_startoff > temp) - delta->xed_startoff = temp; - if (delta->xed_blockcount < temp2) - delta->xed_blockcount = temp2; - } done: *logflagsp = rval; return error; @@ -1736,7 +1636,6 @@ xfs_bmap_add_extent_hole_delay( xfs_extnum_t idx, /* extent number to update/insert */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp, /* inode logging flags */ - xfs_extdelta_t *delta, /* Change made to incore extents */ int rsvd) /* OK to allocate reserved blocks */ { xfs_bmbt_rec_host_t *ep; /* extent record for idx */ @@ -1747,7 +1646,6 @@ xfs_bmap_add_extent_hole_delay( xfs_bmbt_irec_t right; /* right neighbor extent entry */ int state; /* state bits, accessed thru macros */ xfs_filblks_t temp=0; /* temp for indirect calculations */ - xfs_filblks_t temp2=0; ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); ep = xfs_iext_get_ext(ifp, idx); @@ -1819,9 +1717,6 @@ xfs_bmap_add_extent_hole_delay( xfs_iext_remove(ip, idx, 1, state); ip->i_df.if_lastex = idx - 1; - /* DELTA: Two in-core extents were replaced by one. */ - temp2 = temp; - temp = left.br_startoff; break; case BMAP_LEFT_CONTIG: @@ -1841,9 +1736,6 @@ xfs_bmap_add_extent_hole_delay( trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); ip->i_df.if_lastex = idx - 1; - /* DELTA: One in-core extent grew into a hole. */ - temp2 = temp; - temp = left.br_startoff; break; case BMAP_RIGHT_CONTIG: @@ -1862,9 +1754,6 @@ xfs_bmap_add_extent_hole_delay( trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); ip->i_df.if_lastex = idx; - /* DELTA: One in-core extent grew into a hole. */ - temp2 = temp; - temp = new->br_startoff; break; case 0: @@ -1876,26 +1765,16 @@ xfs_bmap_add_extent_hole_delay( oldlen = newlen = 0; xfs_iext_insert(ip, idx, 1, new, state); ip->i_df.if_lastex = idx; - /* DELTA: A new in-core extent was added in a hole. */ - temp2 = new->br_blockcount; - temp = new->br_startoff; break; } if (oldlen != newlen) { ASSERT(oldlen > newlen); - xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, + xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, (int64_t)(oldlen - newlen), rsvd); /* * Nothing to do for disk quota accounting here. */ } - if (delta) { - temp2 += temp; - if (delta->xed_startoff > temp) - delta->xed_startoff = temp; - if (delta->xed_blockcount < temp2) - delta->xed_blockcount = temp2; - } *logflagsp = 0; return 0; } @@ -1911,7 +1790,6 @@ xfs_bmap_add_extent_hole_real( xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp, /* inode logging flags */ - xfs_extdelta_t *delta, /* Change made to incore extents */ int whichfork) /* data or attr fork */ { xfs_bmbt_rec_host_t *ep; /* pointer to extent entry ins. point */ @@ -1922,8 +1800,6 @@ xfs_bmap_add_extent_hole_real( xfs_bmbt_irec_t right; /* right neighbor extent entry */ int rval=0; /* return value (logging flags) */ int state; /* state bits, accessed thru macros */ - xfs_filblks_t temp=0; - xfs_filblks_t temp2=0; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); @@ -2020,11 +1896,6 @@ xfs_bmap_add_extent_hole_real( left.br_state))) goto done; } - /* DELTA: Two in-core extents were replaced by one. */ - temp = left.br_startoff; - temp2 = left.br_blockcount + - new->br_blockcount + - right.br_blockcount; break; case BMAP_LEFT_CONTIG: @@ -2056,10 +1927,6 @@ xfs_bmap_add_extent_hole_real( left.br_state))) goto done; } - /* DELTA: One in-core extent grew. */ - temp = left.br_startoff; - temp2 = left.br_blockcount + - new->br_blockcount; break; case BMAP_RIGHT_CONTIG: @@ -2092,10 +1959,6 @@ xfs_bmap_add_extent_hole_real( right.br_state))) goto done; } - /* DELTA: One in-core extent grew. */ - temp = new->br_startoff; - temp2 = new->br_blockcount + - right.br_blockcount; break; case 0: @@ -2123,18 +1986,8 @@ xfs_bmap_add_extent_hole_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } - /* DELTA: A new extent was added in a hole. */ - temp = new->br_startoff; - temp2 = new->br_blockcount; break; } - if (delta) { - temp2 += temp; - if (delta->xed_startoff > temp) - delta->xed_startoff = temp; - if (delta->xed_blockcount < temp2) - delta->xed_blockcount = temp2; - } done: *logflagsp = rval; return error; @@ -2959,7 +2812,6 @@ xfs_bmap_del_extent( xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *del, /* data to remove from extents */ int *logflagsp, /* inode logging flags */ - xfs_extdelta_t *delta, /* Change made to incore extents */ int whichfork, /* data or attr fork */ int rsvd) /* OK to allocate reserved blocks */ { @@ -3262,16 +3114,9 @@ xfs_bmap_del_extent( * Nothing to do for disk quota accounting here. */ ASSERT(da_old >= da_new); - if (da_old > da_new) - xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new), - rsvd); - if (delta) { - /* DELTA: report the original extent. */ - if (delta->xed_startoff > got.br_startoff) - delta->xed_startoff = got.br_startoff; - if (delta->xed_blockcount < got.br_startoff+got.br_blockcount) - delta->xed_blockcount = got.br_startoff + - got.br_blockcount; + if (da_old > da_new) { + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + (int64_t)(da_old - da_new), rsvd); } done: *logflagsp = flags; @@ -3754,9 +3599,10 @@ xfs_bmap_add_attrfork( ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; } ASSERT(ip->i_d.di_anextents == 0); - IHOLD(ip); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + switch (ip->i_d.di_format) { case XFS_DINODE_FMT_DEV: ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; @@ -4483,8 +4329,7 @@ xfs_bmapi( xfs_extlen_t total, /* total blocks needed */ xfs_bmbt_irec_t *mval, /* output: map values */ int *nmap, /* i/o: mval size/count */ - xfs_bmap_free_t *flist, /* i/o: list extents to free */ - xfs_extdelta_t *delta) /* o: change made to incore extents */ + xfs_bmap_free_t *flist) /* i/o: list extents to free */ { xfs_fsblock_t abno; /* allocated block number */ xfs_extlen_t alen; /* allocated extent length */ @@ -4596,10 +4441,7 @@ xfs_bmapi( end = bno + len; obno = bno; bma.ip = NULL; - if (delta) { - delta->xed_startoff = NULLFILEOFF; - delta->xed_blockcount = 0; - } + while (bno < end && n < *nmap) { /* * Reading past eof, act as though there's a hole @@ -4620,19 +4462,13 @@ xfs_bmapi( * allocate the stuff asked for in this bmap call * but that wouldn't be as good. */ - if (wasdelay && !(flags & XFS_BMAPI_EXACT)) { + if (wasdelay) { alen = (xfs_extlen_t)got.br_blockcount; aoff = got.br_startoff; if (lastx != NULLEXTNUM && lastx) { ep = xfs_iext_get_ext(ifp, lastx - 1); xfs_bmbt_get_all(ep, &prev); } - } else if (wasdelay) { - alen = (xfs_extlen_t) - XFS_FILBLKS_MIN(len, - (got.br_startoff + - got.br_blockcount) - bno); - aoff = bno; } else { alen = (xfs_extlen_t) XFS_FILBLKS_MIN(len, MAXEXTLEN); @@ -4694,13 +4530,13 @@ xfs_bmapi( -((int64_t)extsz), (flags & XFS_BMAPI_RSVBLOCKS)); } else { - error = xfs_mod_incore_sb(mp, + error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -((int64_t)alen), (flags & XFS_BMAPI_RSVBLOCKS)); } if (!error) { - error = xfs_mod_incore_sb(mp, + error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -((int64_t)indlen), (flags & XFS_BMAPI_RSVBLOCKS)); @@ -4710,7 +4546,7 @@ xfs_bmapi( (int64_t)extsz, (flags & XFS_BMAPI_RSVBLOCKS)); else if (error) - xfs_mod_incore_sb(mp, + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, (int64_t)alen, (flags & XFS_BMAPI_RSVBLOCKS)); @@ -4831,7 +4667,7 @@ xfs_bmapi( got.br_state = XFS_EXT_UNWRITTEN; } error = xfs_bmap_add_extent(ip, lastx, &cur, &got, - firstblock, flist, &tmp_logflags, delta, + firstblock, flist, &tmp_logflags, whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); logflags |= tmp_logflags; if (error) @@ -4912,8 +4748,12 @@ xfs_bmapi( * Check if writing previously allocated but * unwritten extents. */ - if (wr && mval->br_state == XFS_EXT_UNWRITTEN && - ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) { + if (wr && + ((mval->br_state == XFS_EXT_UNWRITTEN && + ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) || + (mval->br_state == XFS_EXT_NORM && + ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT)) == + (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT))))) { /* * Modify (by adding) the state flag, if writing. */ @@ -4925,9 +4765,11 @@ xfs_bmapi( *firstblock; cur->bc_private.b.flist = flist; } - mval->br_state = XFS_EXT_NORM; + mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) + ? XFS_EXT_NORM + : XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent(ip, lastx, &cur, mval, - firstblock, flist, &tmp_logflags, delta, + firstblock, flist, &tmp_logflags, whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); logflags |= tmp_logflags; if (error) @@ -5017,14 +4859,6 @@ xfs_bmapi( ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max); error = 0; - if (delta && delta->xed_startoff != NULLFILEOFF) { - /* A change was actually made. - * Note that delta->xed_blockount is an offset at this - * point and needs to be converted to a block count. - */ - ASSERT(delta->xed_blockcount > delta->xed_startoff); - delta->xed_blockcount -= delta->xed_startoff; - } error0: /* * Log everything. Do this after conversion, there's no point in @@ -5136,8 +4970,6 @@ xfs_bunmapi( xfs_fsblock_t *firstblock, /* first allocated block controls a.g. for allocs */ xfs_bmap_free_t *flist, /* i/o: list extents to free */ - xfs_extdelta_t *delta, /* o: change made to incore - extents */ int *done) /* set if not done yet */ { xfs_btree_cur_t *cur; /* bmap btree cursor */ @@ -5196,10 +5028,7 @@ xfs_bunmapi( bno = start + len - 1; ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); - if (delta) { - delta->xed_startoff = NULLFILEOFF; - delta->xed_blockcount = 0; - } + /* * Check to see if the given block number is past the end of the * file, back up to the last block if so... @@ -5297,7 +5126,7 @@ xfs_bunmapi( } del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent(ip, lastx, &cur, &del, - firstblock, flist, &logflags, delta, + firstblock, flist, &logflags, XFS_DATA_FORK, 0); if (error) goto error0; @@ -5352,7 +5181,7 @@ xfs_bunmapi( prev.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent(ip, lastx - 1, &cur, &prev, firstblock, flist, &logflags, - delta, XFS_DATA_FORK, 0); + XFS_DATA_FORK, 0); if (error) goto error0; goto nodelete; @@ -5361,7 +5190,7 @@ xfs_bunmapi( del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent(ip, lastx, &cur, &del, firstblock, flist, &logflags, - delta, XFS_DATA_FORK, 0); + XFS_DATA_FORK, 0); if (error) goto error0; goto nodelete; @@ -5381,7 +5210,7 @@ xfs_bunmapi( ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_RTBLKS); } else { - xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, (int64_t)del.br_blockcount, rsvd); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, @@ -5414,7 +5243,7 @@ xfs_bunmapi( goto error0; } error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del, - &tmp_logflags, delta, whichfork, rsvd); + &tmp_logflags, whichfork, rsvd); logflags |= tmp_logflags; if (error) goto error0; @@ -5471,14 +5300,6 @@ nodelete: ASSERT(ifp->if_ext_max == XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); error = 0; - if (delta && delta->xed_startoff != NULLFILEOFF) { - /* A change was actually made. - * Note that delta->xed_blockount is an offset at this - * point and needs to be converted to a block count. - */ - ASSERT(delta->xed_blockcount > delta->xed_startoff); - delta->xed_blockcount -= delta->xed_startoff; - } error0: /* * Log everything. Do this after conversion, there's no point in @@ -5605,28 +5426,6 @@ xfs_getbmap( prealloced = 0; fixlen = 1LL << 32; } else { - /* - * If the BMV_IF_NO_DMAPI_READ interface bit specified, do - * not generate a DMAPI read event. Otherwise, if the - * DM_EVENT_READ bit is set for the file, generate a read - * event in order that the DMAPI application may do its thing - * before we return the extents. Usually this means restoring - * user file data to regions of the file that look like holes. - * - * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify - * BMV_IF_NO_DMAPI_READ so that read events are generated. - * If this were not true, callers of ioctl(XFS_IOC_GETBMAP) - * could misinterpret holes in a DMAPI file as true holes, - * when in fact they may represent offline user data. - */ - if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && - !(iflags & BMV_IF_NO_DMAPI_READ)) { - error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, - 0, 0, 0, NULL); - if (error) - return XFS_ERROR(error); - } - if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_format != XFS_DINODE_FMT_BTREE && ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) @@ -5713,7 +5512,7 @@ xfs_getbmap( error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), XFS_BB_TO_FSB(mp, bmv->bmv_length), bmapi_flags, NULL, 0, map, &nmap, - NULL, NULL); + NULL); if (error) goto out_free_map; ASSERT(nmap <= subnex); @@ -5744,12 +5543,24 @@ xfs_getbmap( map[i].br_startblock)) goto out_free_map; - nexleft--; bmv->bmv_offset = out[cur_ext].bmv_offset + out[cur_ext].bmv_length; bmv->bmv_length = max_t(__int64_t, 0, bmvend - bmv->bmv_offset); + + /* + * In case we don't want to return the hole, + * don't increase cur_ext so that we can reuse + * it in the next loop. + */ + if ((iflags & BMV_IF_NO_HOLES) && + map[i].br_startblock == HOLESTARTBLOCK) { + memset(&out[cur_ext], 0, sizeof(out[cur_ext])); + continue; + } + + nexleft--; bmv->bmv_entries++; cur_ext++; } @@ -5859,66 +5670,34 @@ xfs_bmap_eof( } #ifdef DEBUG -STATIC -xfs_buf_t * +STATIC struct xfs_buf * xfs_bmap_get_bp( - xfs_btree_cur_t *cur, + struct xfs_btree_cur *cur, xfs_fsblock_t bno) { - int i; - xfs_buf_t *bp; + struct xfs_log_item_desc *lidp; + int i; if (!cur) - return(NULL); - - bp = NULL; - for(i = 0; i < XFS_BTREE_MAXLEVELS; i++) { - bp = cur->bc_bufs[i]; - if (!bp) break; - if (XFS_BUF_ADDR(bp) == bno) - break; /* Found it */ - } - if (i == XFS_BTREE_MAXLEVELS) - bp = NULL; - - if (!bp) { /* Chase down all the log items to see if the bp is there */ - xfs_log_item_chunk_t *licp; - xfs_trans_t *tp; - - tp = cur->bc_tp; - licp = &tp->t_items; - while (!bp && licp != NULL) { - if (xfs_lic_are_all_free(licp)) { - licp = licp->lic_next; - continue; - } - for (i = 0; i < licp->lic_unused; i++) { - xfs_log_item_desc_t *lidp; - xfs_log_item_t *lip; - xfs_buf_log_item_t *bip; - xfs_buf_t *lbp; - - if (xfs_lic_isfree(licp, i)) { - continue; - } - - lidp = xfs_lic_slot(licp, i); - lip = lidp->lid_item; - if (lip->li_type != XFS_LI_BUF) - continue; + return NULL; - bip = (xfs_buf_log_item_t *)lip; - lbp = bip->bli_buf; + for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { + if (!cur->bc_bufs[i]) + break; + if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno) + return cur->bc_bufs[i]; + } - if (XFS_BUF_ADDR(lbp) == bno) { - bp = lbp; - break; /* Found it */ - } - } - licp = licp->lic_next; - } + /* Chase down all the log items to see if the bp is there */ + list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) { + struct xfs_buf_log_item *bip; + bip = (struct xfs_buf_log_item *)lidp->lid_item; + if (bip->bli_item.li_type == XFS_LI_BUF && + XFS_BUF_ADDR(bip->bli_buf) == bno) + return bip->bli_buf; } - return(bp); + + return NULL; } STATIC void diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 419dafb9d87d..71ec9b6ecdfc 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -28,20 +28,6 @@ struct xfs_trans; extern kmem_zone_t *xfs_bmap_free_item_zone; /* - * DELTA: describe a change to the in-core extent list. - * - * Internally the use of xed_blockount is somewhat funky. - * xed_blockcount contains an offset much of the time because this - * makes merging changes easier. (xfs_fileoff_t and xfs_filblks_t are - * the same underlying type). - */ -typedef struct xfs_extdelta -{ - xfs_fileoff_t xed_startoff; /* offset of range */ - xfs_filblks_t xed_blockcount; /* blocks in range */ -} xfs_extdelta_t; - -/* * List of extents to be free "later". * The list is kept sorted on xbf_startblock. */ @@ -82,27 +68,25 @@ typedef struct xfs_bmap_free #define XFS_BMAPI_DELAY 0x002 /* delayed write operation */ #define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */ #define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */ -#define XFS_BMAPI_EXACT 0x010 /* allocate only to spec'd bounds */ -#define XFS_BMAPI_ATTRFORK 0x020 /* use attribute fork not data */ -#define XFS_BMAPI_ASYNC 0x040 /* bunmapi xactions can be async */ -#define XFS_BMAPI_RSVBLOCKS 0x080 /* OK to alloc. reserved data blocks */ -#define XFS_BMAPI_PREALLOC 0x100 /* preallocation op: unwritten space */ -#define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */ +#define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */ +#define XFS_BMAPI_RSVBLOCKS 0x020 /* OK to alloc. reserved data blocks */ +#define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */ +#define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */ /* combine contig. space */ -#define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */ -/* XFS_BMAPI_DIRECT_IO 0x800 */ -#define XFS_BMAPI_CONVERT 0x1000 /* unwritten extent conversion - */ - /* need write cache flushing and no */ - /* additional allocation alignments */ +#define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */ +/* + * unwritten extent conversion - this needs write cache flushing and no additional + * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts + * from written to unwritten, otherwise convert from unwritten to written. + */ +#define XFS_BMAPI_CONVERT 0x200 #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_WRITE, "WRITE" }, \ { XFS_BMAPI_DELAY, "DELAY" }, \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ - { XFS_BMAPI_EXACT, "EXACT" }, \ { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ - { XFS_BMAPI_ASYNC, "ASYNC" }, \ { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \ { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ @@ -310,9 +294,7 @@ xfs_bmapi( xfs_extlen_t total, /* total blocks needed */ struct xfs_bmbt_irec *mval, /* output: map values */ int *nmap, /* i/o: mval size/count */ - xfs_bmap_free_t *flist, /* i/o: list extents to free */ - xfs_extdelta_t *delta); /* o: change made to incore - extents */ + xfs_bmap_free_t *flist); /* i/o: list extents to free */ /* * Map file blocks to filesystem blocks, simple version. @@ -346,8 +328,6 @@ xfs_bunmapi( xfs_fsblock_t *firstblock, /* first allocated block controls a.g. for allocs */ xfs_bmap_free_t *flist, /* i/o: list extents to free */ - xfs_extdelta_t *delta, /* o: change made to incore - extents */ int *done); /* set if not done yet */ /* diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 416e47e54b83..87d3c10b6954 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -24,21 +24,16 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_btree.h" #include "xfs_btree_trace.h" -#include "xfs_ialloc.h" #include "xfs_itable.h" #include "xfs_bmap.h" #include "xfs_error.h" diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 96be4b0f2496..04f9cca8da7e 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -24,20 +24,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_btree.h" #include "xfs_btree_trace.h" -#include "xfs_ialloc.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -222,7 +217,7 @@ xfs_btree_del_cursor( */ for (i = 0; i < cur->bc_nlevels; i++) { if (cur->bc_bufs[i]) - xfs_btree_setbuf(cur, i, NULL); + xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); else if (!error) break; } @@ -661,7 +656,7 @@ xfs_btree_reada_bufl( ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); - xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); + xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count); } /* @@ -681,7 +676,7 @@ xfs_btree_reada_bufs( ASSERT(agno != NULLAGNUMBER); ASSERT(agbno != NULLAGBLOCK); d = XFS_AGB_TO_DADDR(mp, agno, agbno); - xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); + xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count); } STATIC int @@ -768,22 +763,19 @@ xfs_btree_readahead( * Set the buffer for level "lev" in the cursor to bp, releasing * any previous buffer. */ -void +STATIC void xfs_btree_setbuf( xfs_btree_cur_t *cur, /* btree cursor */ int lev, /* level in btree */ xfs_buf_t *bp) /* new buffer to set */ { struct xfs_btree_block *b; /* btree block */ - xfs_buf_t *obp; /* old buffer pointer */ - obp = cur->bc_bufs[lev]; - if (obp) - xfs_trans_brelse(cur->bc_tp, obp); + if (cur->bc_bufs[lev]) + xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); cur->bc_bufs[lev] = bp; cur->bc_ra[lev] = 0; - if (!bp) - return; + b = XFS_BUF_TO_BLOCK(bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) @@ -3016,6 +3008,43 @@ out0: return 0; } +/* + * Kill the current root node, and replace it with it's only child node. + */ +STATIC int +xfs_btree_kill_root( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int level, + union xfs_btree_ptr *newroot) +{ + int error; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, killroot); + + /* + * Update the root pointer, decreasing the level by 1 and then + * free the old root. + */ + cur->bc_ops->set_root(cur, newroot, -1); + + error = cur->bc_ops->free_block(cur, bp); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + + XFS_BTREE_STATS_INC(cur, free); + + cur->bc_bufs[level] = NULL; + cur->bc_ra[level] = 0; + cur->bc_nlevels--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} + STATIC int xfs_btree_dec_cursor( struct xfs_btree_cur *cur, @@ -3200,7 +3229,7 @@ xfs_btree_delrec( * Make it the new root of the btree. */ pp = xfs_btree_ptr_addr(cur, 1, block); - error = cur->bc_ops->kill_root(cur, bp, level, pp); + error = xfs_btree_kill_root(cur, bp, level, pp); if (error) goto error0; } else if (level > 0) { diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 7fa07062bdda..82fafc66bd1f 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -152,9 +152,7 @@ struct xfs_btree_ops { /* update btree root pointer */ void (*set_root)(struct xfs_btree_cur *cur, - union xfs_btree_ptr *nptr, int level_change); - int (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp, - int level, union xfs_btree_ptr *newroot); + union xfs_btree_ptr *nptr, int level_change); /* block allocation / freeing */ int (*alloc_block)(struct xfs_btree_cur *cur, @@ -399,16 +397,6 @@ xfs_btree_reada_bufs( xfs_agblock_t agbno, /* allocation group block number */ xfs_extlen_t count); /* count of filesystem blocks */ -/* - * Set the buffer for level "lev" in the cursor to bp, releasing - * any previous buffer. - */ -void -xfs_btree_setbuf( - xfs_btree_cur_t *cur, /* btree cursor */ - int lev, /* level in btree */ - struct xfs_buf *bp); /* new buffer to set */ - /* * Common btree core entry points. diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 02a80984aa05..2686d0d54c5b 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -24,7 +24,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" @@ -34,6 +33,12 @@ kmem_zone_t *xfs_buf_item_zone; +static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_buf_log_item, bli_item); +} + + #ifdef XFS_TRANS_DEBUG /* * This function uses an alternate strategy for tracking the bytes @@ -151,12 +156,13 @@ STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip); */ STATIC uint xfs_buf_item_size( - xfs_buf_log_item_t *bip) + struct xfs_log_item *lip) { - uint nvecs; - int next_bit; - int last_bit; - xfs_buf_t *bp; + struct xfs_buf_log_item *bip = BUF_ITEM(lip); + struct xfs_buf *bp = bip->bli_buf; + uint nvecs; + int next_bit; + int last_bit; ASSERT(atomic_read(&bip->bli_refcount) > 0); if (bip->bli_flags & XFS_BLI_STALE) { @@ -170,7 +176,6 @@ xfs_buf_item_size( return 1; } - bp = bip->bli_buf; ASSERT(bip->bli_flags & XFS_BLI_LOGGED); nvecs = 1; last_bit = xfs_next_bit(bip->bli_format.blf_data_map, @@ -219,13 +224,13 @@ xfs_buf_item_size( */ STATIC void xfs_buf_item_format( - xfs_buf_log_item_t *bip, - xfs_log_iovec_t *log_vector) + struct xfs_log_item *lip, + struct xfs_log_iovec *vecp) { + struct xfs_buf_log_item *bip = BUF_ITEM(lip); + struct xfs_buf *bp = bip->bli_buf; uint base_size; uint nvecs; - xfs_log_iovec_t *vecp; - xfs_buf_t *bp; int first_bit; int last_bit; int next_bit; @@ -235,8 +240,6 @@ xfs_buf_item_format( ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || (bip->bli_flags & XFS_BLI_STALE)); - bp = bip->bli_buf; - vecp = log_vector; /* * The size of the base structure is the size of the @@ -248,7 +251,7 @@ xfs_buf_item_format( base_size = (uint)(sizeof(xfs_buf_log_format_t) + ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); - vecp->i_addr = (xfs_caddr_t)&bip->bli_format; + vecp->i_addr = &bip->bli_format; vecp->i_len = base_size; vecp->i_type = XLOG_REG_TYPE_BFORMAT; vecp++; @@ -263,7 +266,7 @@ xfs_buf_item_format( */ if (bip->bli_flags & XFS_BLI_INODE_BUF) { if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && - xfs_log_item_in_current_chkpt(&bip->bli_item))) + xfs_log_item_in_current_chkpt(lip))) bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; bip->bli_flags &= ~XFS_BLI_INODE_BUF; } @@ -356,66 +359,90 @@ xfs_buf_item_format( /* * This is called to pin the buffer associated with the buf log item in memory - * so it cannot be written out. Simply call bpin() on the buffer to do this. + * so it cannot be written out. * * We also always take a reference to the buffer log item here so that the bli * is held while the item is pinned in memory. This means that we can * unconditionally drop the reference count a transaction holds when the * transaction is completed. */ - STATIC void xfs_buf_item_pin( - xfs_buf_log_item_t *bip) + struct xfs_log_item *lip) { - xfs_buf_t *bp; + struct xfs_buf_log_item *bip = BUF_ITEM(lip); - bp = bip->bli_buf; - ASSERT(XFS_BUF_ISBUSY(bp)); + ASSERT(XFS_BUF_ISBUSY(bip->bli_buf)); ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || (bip->bli_flags & XFS_BLI_STALE)); - atomic_inc(&bip->bli_refcount); + trace_xfs_buf_item_pin(bip); - xfs_bpin(bp); -} + atomic_inc(&bip->bli_refcount); + atomic_inc(&bip->bli_buf->b_pin_count); +} /* * This is called to unpin the buffer associated with the buf log * item which was previously pinned with a call to xfs_buf_item_pin(). - * Just call bunpin() on the buffer to do this. * * Also drop the reference to the buf item for the current transaction. * If the XFS_BLI_STALE flag is set and we are the last reference, * then free up the buf log item and unlock the buffer. + * + * If the remove flag is set we are called from uncommit in the + * forced-shutdown path. If that is true and the reference count on + * the log item is going to drop to zero we need to free the item's + * descriptor in the transaction. */ STATIC void xfs_buf_item_unpin( - xfs_buf_log_item_t *bip) + struct xfs_log_item *lip, + int remove) { - struct xfs_ail *ailp; - xfs_buf_t *bp; - int freed; + struct xfs_buf_log_item *bip = BUF_ITEM(lip); + xfs_buf_t *bp = bip->bli_buf; + struct xfs_ail *ailp = lip->li_ailp; int stale = bip->bli_flags & XFS_BLI_STALE; + int freed; - bp = bip->bli_buf; - ASSERT(bp != NULL); ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); ASSERT(atomic_read(&bip->bli_refcount) > 0); + trace_xfs_buf_item_unpin(bip); freed = atomic_dec_and_test(&bip->bli_refcount); - ailp = bip->bli_item.li_ailp; - xfs_bunpin(bp); + + if (atomic_dec_and_test(&bp->b_pin_count)) + wake_up_all(&bp->b_waiters); + if (freed && stale) { ASSERT(bip->bli_flags & XFS_BLI_STALE); ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); ASSERT(XFS_BUF_ISSTALE(bp)); ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); + trace_xfs_buf_item_unpin_stale(bip); + if (remove) { + /* + * We have to remove the log item from the transaction + * as we are about to release our reference to the + * buffer. If we don't, the unlock that occurs later + * in xfs_trans_uncommit() will ry to reference the + * buffer which we no longer have a hold on. + */ + xfs_trans_del_item(lip); + + /* + * Since the transaction no longer refers to the buffer, + * the buffer should no longer refer to the transaction. + */ + XFS_BUF_SET_FSPRIVATE2(bp, NULL); + } + /* * If we get called here because of an IO error, we may * or may not have the item on the AIL. xfs_trans_ail_delete() @@ -437,48 +464,6 @@ xfs_buf_item_unpin( } /* - * this is called from uncommit in the forced-shutdown path. - * we need to check to see if the reference count on the log item - * is going to drop to zero. If so, unpin will free the log item - * so we need to free the item's descriptor (that points to the item) - * in the transaction. - */ -STATIC void -xfs_buf_item_unpin_remove( - xfs_buf_log_item_t *bip, - xfs_trans_t *tp) -{ - /* will xfs_buf_item_unpin() call xfs_buf_item_relse()? */ - if ((atomic_read(&bip->bli_refcount) == 1) && - (bip->bli_flags & XFS_BLI_STALE)) { - /* - * yes -- We can safely do some work here and then call - * buf_item_unpin to do the rest because we are - * are holding the buffer locked so no one else will be - * able to bump up the refcount. We have to remove the - * log item from the transaction as we are about to release - * our reference to the buffer. If we don't, the unlock that - * occurs later in the xfs_trans_uncommit() will try to - * reference the buffer which we no longer have a hold on. - */ - struct xfs_log_item_desc *lidp; - - ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0); - trace_xfs_buf_item_unpin_stale(bip); - - lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip); - xfs_trans_free_item(tp, lidp); - - /* - * Since the transaction no longer refers to the buffer, the - * buffer should no longer refer to the transaction. - */ - XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL); - } - xfs_buf_item_unpin(bip); -} - -/* * This is called to attempt to lock the buffer associated with this * buf log item. Don't sleep on the buffer lock. If we can't get * the lock right away, return 0. If we can get the lock, take a @@ -488,11 +473,11 @@ xfs_buf_item_unpin_remove( */ STATIC uint xfs_buf_item_trylock( - xfs_buf_log_item_t *bip) + struct xfs_log_item *lip) { - xfs_buf_t *bp; + struct xfs_buf_log_item *bip = BUF_ITEM(lip); + struct xfs_buf *bp = bip->bli_buf; - bp = bip->bli_buf; if (XFS_BUF_ISPINNED(bp)) return XFS_ITEM_PINNED; if (!XFS_BUF_CPSEMA(bp)) @@ -529,13 +514,12 @@ xfs_buf_item_trylock( */ STATIC void xfs_buf_item_unlock( - xfs_buf_log_item_t *bip) + struct xfs_log_item *lip) { - int aborted; - xfs_buf_t *bp; - uint hold; - - bp = bip->bli_buf; + struct xfs_buf_log_item *bip = BUF_ITEM(lip); + struct xfs_buf *bp = bip->bli_buf; + int aborted; + uint hold; /* Clear the buffer's association with this transaction. */ XFS_BUF_SET_FSPRIVATE2(bp, NULL); @@ -546,7 +530,7 @@ xfs_buf_item_unlock( * (cancelled) buffers at unpin time, but we'll never go through the * pin/unpin cycle if we abort inside commit. */ - aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0; + aborted = (lip->li_flags & XFS_LI_ABORTED) != 0; /* * Before possibly freeing the buf item, determine if we should @@ -607,16 +591,16 @@ xfs_buf_item_unlock( */ STATIC xfs_lsn_t xfs_buf_item_committed( - xfs_buf_log_item_t *bip, + struct xfs_log_item *lip, xfs_lsn_t lsn) { + struct xfs_buf_log_item *bip = BUF_ITEM(lip); + trace_xfs_buf_item_committed(bip); - if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && - (bip->bli_item.li_lsn != 0)) { - return bip->bli_item.li_lsn; - } - return (lsn); + if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && lip->li_lsn != 0) + return lip->li_lsn; + return lsn; } /* @@ -626,15 +610,16 @@ xfs_buf_item_committed( */ STATIC void xfs_buf_item_push( - xfs_buf_log_item_t *bip) + struct xfs_log_item *lip) { - xfs_buf_t *bp; + struct xfs_buf_log_item *bip = BUF_ITEM(lip); + struct xfs_buf *bp = bip->bli_buf; ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); + trace_xfs_buf_item_push(bip); - bp = bip->bli_buf; - ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); xfs_buf_relse(bp); } @@ -646,22 +631,24 @@ xfs_buf_item_push( */ STATIC void xfs_buf_item_pushbuf( - xfs_buf_log_item_t *bip) + struct xfs_log_item *lip) { - xfs_buf_t *bp; + struct xfs_buf_log_item *bip = BUF_ITEM(lip); + struct xfs_buf *bp = bip->bli_buf; ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + ASSERT(XFS_BUF_ISDELAYWRITE(bp)); + trace_xfs_buf_item_pushbuf(bip); - bp = bip->bli_buf; - ASSERT(XFS_BUF_ISDELAYWRITE(bp)); xfs_buf_delwri_promote(bp); xfs_buf_relse(bp); } -/* ARGSUSED */ STATIC void -xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn) +xfs_buf_item_committing( + struct xfs_log_item *lip, + xfs_lsn_t commit_lsn) { } @@ -669,21 +656,16 @@ xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn) * This is the ops vector shared by all buf log items. */ static struct xfs_item_ops xfs_buf_item_ops = { - .iop_size = (uint(*)(xfs_log_item_t*))xfs_buf_item_size, - .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) - xfs_buf_item_format, - .iop_pin = (void(*)(xfs_log_item_t*))xfs_buf_item_pin, - .iop_unpin = (void(*)(xfs_log_item_t*))xfs_buf_item_unpin, - .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) - xfs_buf_item_unpin_remove, - .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock, - .iop_unlock = (void(*)(xfs_log_item_t*))xfs_buf_item_unlock, - .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_buf_item_committed, - .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push, - .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_buf_item_pushbuf, - .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_buf_item_committing + .iop_size = xfs_buf_item_size, + .iop_format = xfs_buf_item_format, + .iop_pin = xfs_buf_item_pin, + .iop_unpin = xfs_buf_item_unpin, + .iop_trylock = xfs_buf_item_trylock, + .iop_unlock = xfs_buf_item_unlock, + .iop_committed = xfs_buf_item_committed, + .iop_push = xfs_buf_item_push, + .iop_pushbuf = xfs_buf_item_pushbuf, + .iop_committing = xfs_buf_item_committing }; @@ -710,9 +692,7 @@ xfs_buf_item_init( * the first. If we do already have one, there is * nothing to do here so return. */ - if (bp->b_mount != mp) - bp->b_mount = mp; - XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); + ASSERT(bp->b_target->bt_mount == mp); if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); if (lip->li_type == XFS_LI_BUF) { @@ -993,7 +973,7 @@ xfs_buf_iodone_callbacks( xfs_buf_do_callbacks(bp, lip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); - xfs_biodone(bp); + xfs_buf_ioend(bp, 0); return; } @@ -1052,7 +1032,7 @@ xfs_buf_iodone_callbacks( xfs_buf_do_callbacks(bp, lip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); - xfs_biodone(bp); + xfs_buf_ioend(bp, 0); } /* @@ -1098,15 +1078,14 @@ xfs_buf_error_relse( * It is called by xfs_buf_iodone_callbacks() above which will take * care of cleaning up the buffer itself. */ -/* ARGSUSED */ void xfs_buf_iodone( - xfs_buf_t *bp, - xfs_buf_log_item_t *bip) + struct xfs_buf *bp, + struct xfs_log_item *lip) { - struct xfs_ail *ailp = bip->bli_item.li_ailp; + struct xfs_ail *ailp = lip->li_ailp; - ASSERT(bip->bli_buf == bp); + ASSERT(BUF_ITEM(lip)->bli_buf == bp); xfs_buf_rele(bp); @@ -1120,6 +1099,6 @@ xfs_buf_iodone( * Either way, AIL is useless if we're forcing a shutdown. */ spin_lock(&ailp->xa_lock); - xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); - xfs_buf_item_free(bip); + xfs_trans_ail_delete(ailp, lip); + xfs_buf_item_free(BUF_ITEM(lip)); } diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index f20bb472d582..0e2ed43f16c7 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -124,7 +124,7 @@ void xfs_buf_attach_iodone(struct xfs_buf *, void(*)(struct xfs_buf *, xfs_log_item_t *), xfs_log_item_t *); void xfs_buf_iodone_callbacks(struct xfs_buf *); -void xfs_buf_iodone(struct xfs_buf *, xfs_buf_log_item_t *); +void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); #ifdef XFS_TRANS_DEBUG void diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 0ca556b4bf31..1c00bedb3175 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -25,19 +25,14 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" #include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_alloc.h" -#include "xfs_btree.h" #include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" @@ -581,16 +576,14 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, xfs_da_intnode_t *node; xfs_da_node_entry_t *btree; int tmp; - xfs_mount_t *mp; node = oldblk->bp->data; - mp = state->mp; ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); ASSERT(newblk->blkno != 0); if (state->args->whichfork == XFS_DATA_FORK) - ASSERT(newblk->blkno >= mp->m_dirleafblk && - newblk->blkno < mp->m_dirfreeblk); + ASSERT(newblk->blkno >= state->mp->m_dirleafblk && + newblk->blkno < state->mp->m_dirfreeblk); /* * We may need to make some room before we insert the new node. @@ -1601,7 +1594,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA| XFS_BMAPI_CONTIG, args->firstblock, args->total, &map, &nmap, - args->flist, NULL))) { + args->flist))) { return error; } ASSERT(nmap <= 1); @@ -1622,8 +1615,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE| XFS_BMAPI_METADATA, args->firstblock, args->total, - &mapp[mapi], &nmap, args->flist, - NULL))) { + &mapp[mapi], &nmap, args->flist))) { kmem_free(mapp); return error; } @@ -1884,7 +1876,7 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, */ if ((error = xfs_bunmapi(tp, dp, dead_blkno, count, xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, - 0, args->firstblock, args->flist, NULL, + 0, args->firstblock, args->flist, &done)) == ENOSPC) { if (w != XFS_DATA_FORK) break; @@ -1989,7 +1981,7 @@ xfs_da_do_buf( nfsb, XFS_BMAPI_METADATA | xfs_bmapi_aflag(whichfork), - NULL, 0, mapp, &nmap, NULL, NULL))) + NULL, 0, mapp, &nmap, NULL))) goto exit0; } } else { @@ -2050,7 +2042,7 @@ xfs_da_do_buf( mappedbno, nmapped, 0, &bp); break; case 3: - xfs_baread(mp->m_ddev_targp, mappedbno, nmapped); + xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped); error = 0; bp = NULL; break; diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 7f159d2a429a..3b9582c60a22 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -24,24 +24,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_itable.h" #include "xfs_dfrag.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_vnodeops.h" #include "xfs_trace.h" @@ -425,11 +416,8 @@ xfs_swap_extents( } - IHOLD(ip); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - - IHOLD(tip); - xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_trans_ijoin_ref(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_trans_log_inode(tp, ip, ilf_fields); xfs_trans_log_inode(tp, tip, tilf_fields); diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index e5b153b2e6a3..dffba9ba0db6 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -49,8 +49,9 @@ typedef struct xfs_dinode { __be32 di_uid; /* owner's user id */ __be32 di_gid; /* owner's group id */ __be32 di_nlink; /* number of links to file */ - __be16 di_projid; /* owner's project id */ - __u8 di_pad[8]; /* unused, zeroed space */ + __be16 di_projid_lo; /* lower part of owner's project id */ + __be16 di_projid_hi; /* higher part owner's project id */ + __u8 di_pad[6]; /* unused, zeroed space */ __be16 di_flushiter; /* incremented on flush */ xfs_timestamp_t di_atime; /* time last accessed */ xfs_timestamp_t di_mtime; /* time last modified */ diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 42520f041265..a1321bc7f192 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -25,13 +25,11 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" @@ -382,7 +380,7 @@ xfs_readdir( int rval; /* return value */ int v; /* type-checking value */ - xfs_itrace_entry(dp); + trace_xfs_readdir(dp); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return XFS_ERROR(EIO); @@ -549,7 +547,7 @@ xfs_dir2_grow_inode( if ((error = xfs_bmapi(tp, dp, bno, count, XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, args->firstblock, args->total, &map, &nmap, - args->flist, NULL))) + args->flist))) return error; ASSERT(nmap <= 1); if (nmap == 1) { @@ -581,8 +579,7 @@ xfs_dir2_grow_inode( if ((error = xfs_bmapi(tp, dp, b, c, XFS_BMAPI_WRITE|XFS_BMAPI_METADATA, args->firstblock, args->total, - &mapp[mapi], &nmap, args->flist, - NULL))) { + &mapp[mapi], &nmap, args->flist))) { kmem_free(mapp); return error; } @@ -715,7 +712,7 @@ xfs_dir2_shrink_inode( */ if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs, XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, - NULL, &done))) { + &done))) { /* * ENOSPC actually can happen if we're in a removename with * no space reservation, and the resulting block removal diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 779a267b0a84..580d99cef9e7 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -24,12 +24,10 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" @@ -1073,10 +1071,10 @@ xfs_dir2_sf_to_block( */ buf_len = dp->i_df.if_bytes; - buf = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP); + buf = kmem_alloc(buf_len, KM_SLEEP); - memcpy(buf, sfp, dp->i_df.if_bytes); - xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK); + memcpy(buf, sfp, buf_len); + xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK); dp->i_d.di_size = 0; xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); /* diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index 498f8d694330..921595b84f5b 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -24,12 +24,10 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_dir2_data.h" diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index e2d89854ec9e..ae891223be90 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -25,11 +25,9 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_attr_sf.h" #include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" @@ -875,7 +873,7 @@ xfs_dir2_leaf_getdents( xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) - map_off, XFS_BMAPI_METADATA, NULL, 0, - &map[map_valid], &nmap, NULL, NULL); + &map[map_valid], &nmap, NULL); /* * Don't know if we should ignore this or * try to return an error. @@ -963,7 +961,7 @@ xfs_dir2_leaf_getdents( if (i > ra_current && map[ra_index].br_blockcount >= mp->m_dirblkfsbs) { - xfs_baread(mp->m_ddev_targp, + xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, map[ra_index].br_startblock + ra_offset), diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 78fc4d9ae756..f9a0864b696a 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -24,12 +24,10 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index c1a5945d463a..b1bae6b1eed9 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -24,12 +24,10 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h deleted file mode 100644 index 2813cdd72375..000000000000 --- a/fs/xfs/xfs_dmapi.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DMAPI_H__ -#define __XFS_DMAPI_H__ - -/* Values used to define the on-disk version of dm_attrname_t. All - * on-disk attribute names start with the 8-byte string "SGI_DMI_". - * - * In the on-disk inode, DMAPI attribute names consist of the user-provided - * name with the DMATTR_PREFIXSTRING pre-pended. This string must NEVER be - * changed. - */ - -#define DMATTR_PREFIXLEN 8 -#define DMATTR_PREFIXSTRING "SGI_DMI_" - -typedef enum { - DM_EVENT_INVALID = -1, - DM_EVENT_CANCEL = 0, /* not supported */ - DM_EVENT_MOUNT = 1, - DM_EVENT_PREUNMOUNT = 2, - DM_EVENT_UNMOUNT = 3, - DM_EVENT_DEBUT = 4, /* not supported */ - DM_EVENT_CREATE = 5, - DM_EVENT_CLOSE = 6, /* not supported */ - DM_EVENT_POSTCREATE = 7, - DM_EVENT_REMOVE = 8, - DM_EVENT_POSTREMOVE = 9, - DM_EVENT_RENAME = 10, - DM_EVENT_POSTRENAME = 11, - DM_EVENT_LINK = 12, - DM_EVENT_POSTLINK = 13, - DM_EVENT_SYMLINK = 14, - DM_EVENT_POSTSYMLINK = 15, - DM_EVENT_READ = 16, - DM_EVENT_WRITE = 17, - DM_EVENT_TRUNCATE = 18, - DM_EVENT_ATTRIBUTE = 19, - DM_EVENT_DESTROY = 20, - DM_EVENT_NOSPACE = 21, - DM_EVENT_USER = 22, - DM_EVENT_MAX = 23 -} dm_eventtype_t; -#define HAVE_DM_EVENTTYPE_T - -typedef enum { - DM_RIGHT_NULL, - DM_RIGHT_SHARED, - DM_RIGHT_EXCL -} dm_right_t; -#define HAVE_DM_RIGHT_T - -/* Defines for determining if an event message should be sent. */ -#ifdef HAVE_DMAPI -#define DM_EVENT_ENABLED(ip, event) ( \ - unlikely ((ip)->i_mount->m_flags & XFS_MOUNT_DMAPI) && \ - ( ((ip)->i_d.di_dmevmask & (1 << event)) || \ - ((ip)->i_mount->m_dmevmask & (1 << event)) ) \ - ) -#else -#define DM_EVENT_ENABLED(ip, event) (0) -#endif - -#define DM_XFS_VALID_FS_EVENTS ( \ - (1 << DM_EVENT_PREUNMOUNT) | \ - (1 << DM_EVENT_UNMOUNT) | \ - (1 << DM_EVENT_NOSPACE) | \ - (1 << DM_EVENT_DEBUT) | \ - (1 << DM_EVENT_CREATE) | \ - (1 << DM_EVENT_POSTCREATE) | \ - (1 << DM_EVENT_REMOVE) | \ - (1 << DM_EVENT_POSTREMOVE) | \ - (1 << DM_EVENT_RENAME) | \ - (1 << DM_EVENT_POSTRENAME) | \ - (1 << DM_EVENT_LINK) | \ - (1 << DM_EVENT_POSTLINK) | \ - (1 << DM_EVENT_SYMLINK) | \ - (1 << DM_EVENT_POSTSYMLINK) | \ - (1 << DM_EVENT_ATTRIBUTE) | \ - (1 << DM_EVENT_DESTROY) ) - -/* Events valid in dm_set_eventlist() when called with a file handle for - a regular file or a symlink. These events are persistent. -*/ - -#define DM_XFS_VALID_FILE_EVENTS ( \ - (1 << DM_EVENT_ATTRIBUTE) | \ - (1 << DM_EVENT_DESTROY) ) - -/* Events valid in dm_set_eventlist() when called with a file handle for - a directory. These events are persistent. -*/ - -#define DM_XFS_VALID_DIRECTORY_EVENTS ( \ - (1 << DM_EVENT_CREATE) | \ - (1 << DM_EVENT_POSTCREATE) | \ - (1 << DM_EVENT_REMOVE) | \ - (1 << DM_EVENT_POSTREMOVE) | \ - (1 << DM_EVENT_RENAME) | \ - (1 << DM_EVENT_POSTRENAME) | \ - (1 << DM_EVENT_LINK) | \ - (1 << DM_EVENT_POSTLINK) | \ - (1 << DM_EVENT_SYMLINK) | \ - (1 << DM_EVENT_POSTSYMLINK) | \ - (1 << DM_EVENT_ATTRIBUTE) | \ - (1 << DM_EVENT_DESTROY) ) - -/* Events supported by the XFS filesystem. */ -#define DM_XFS_SUPPORTED_EVENTS ( \ - (1 << DM_EVENT_MOUNT) | \ - (1 << DM_EVENT_PREUNMOUNT) | \ - (1 << DM_EVENT_UNMOUNT) | \ - (1 << DM_EVENT_NOSPACE) | \ - (1 << DM_EVENT_CREATE) | \ - (1 << DM_EVENT_POSTCREATE) | \ - (1 << DM_EVENT_REMOVE) | \ - (1 << DM_EVENT_POSTREMOVE) | \ - (1 << DM_EVENT_RENAME) | \ - (1 << DM_EVENT_POSTRENAME) | \ - (1 << DM_EVENT_LINK) | \ - (1 << DM_EVENT_POSTLINK) | \ - (1 << DM_EVENT_SYMLINK) | \ - (1 << DM_EVENT_POSTSYMLINK) | \ - (1 << DM_EVENT_READ) | \ - (1 << DM_EVENT_WRITE) | \ - (1 << DM_EVENT_TRUNCATE) | \ - (1 << DM_EVENT_ATTRIBUTE) | \ - (1 << DM_EVENT_DESTROY) ) - - -/* - * Definitions used for the flags field on dm_send_*_event(). - */ - -#define DM_FLAGS_NDELAY 0x001 /* return EAGAIN after dm_pending() */ -#define DM_FLAGS_UNWANTED 0x002 /* event not in fsys dm_eventset_t */ -#define DM_FLAGS_IMUX 0x004 /* thread holds i_mutex */ -#define DM_FLAGS_IALLOCSEM_RD 0x010 /* thread holds i_alloc_sem rd */ -#define DM_FLAGS_IALLOCSEM_WR 0x020 /* thread holds i_alloc_sem wr */ - -/* - * Pull in platform specific event flags defines - */ -#include "xfs_dmapi_priv.h" - -/* - * Macros to turn caller specified delay/block flags into - * dm_send_xxxx_event flag DM_FLAGS_NDELAY. - */ - -#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \ - DM_FLAGS_NDELAY : 0) -#define AT_DELAY_FLAG(f) ((f & XFS_ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0) - -#endif /* __XFS_DMAPI_H__ */ diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c deleted file mode 100644 index e71e2581c0c3..000000000000 --- a/fs/xfs/xfs_dmops.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_dmapi.h" -#include "xfs_inum.h" -#include "xfs_ag.h" -#include "xfs_mount.h" - - -static struct xfs_dmops xfs_dmcore_stub = { - .xfs_send_data = (xfs_send_data_t)fs_nosys, - .xfs_send_mmap = (xfs_send_mmap_t)fs_noerr, - .xfs_send_destroy = (xfs_send_destroy_t)fs_nosys, - .xfs_send_namesp = (xfs_send_namesp_t)fs_nosys, - .xfs_send_mount = (xfs_send_mount_t)fs_nosys, - .xfs_send_unmount = (xfs_send_unmount_t)fs_noerr, -}; - -int -xfs_dmops_get(struct xfs_mount *mp) -{ - if (mp->m_flags & XFS_MOUNT_DMAPI) { - cmn_err(CE_WARN, - "XFS: dmapi support not available in this kernel."); - return EINVAL; - } - - mp->m_dm_ops = &xfs_dmcore_stub; - return 0; -} - -void -xfs_dmops_put(struct xfs_mount *mp) -{ -} diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 047b8a8e5c29..ed9990267661 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -23,12 +23,8 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_utils.h" diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 409fe81585fd..a55e687bf562 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -24,7 +24,6 @@ #include "xfs_buf_item.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_trans_priv.h" #include "xfs_extfree_item.h" @@ -33,18 +32,19 @@ kmem_zone_t *xfs_efi_zone; kmem_zone_t *xfs_efd_zone; -STATIC void xfs_efi_item_unlock(xfs_efi_log_item_t *); +static inline struct xfs_efi_log_item *EFI_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_efi_log_item, efi_item); +} void -xfs_efi_item_free(xfs_efi_log_item_t *efip) +xfs_efi_item_free( + struct xfs_efi_log_item *efip) { - int nexts = efip->efi_format.efi_nextents; - - if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { + if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS) kmem_free(efip); - } else { + else kmem_zone_free(xfs_efi_zone, efip); - } } /* @@ -52,9 +52,9 @@ xfs_efi_item_free(xfs_efi_log_item_t *efip) * We only need 1 iovec for an efi item. It just logs the efi_log_format * structure. */ -/*ARGSUSED*/ STATIC uint -xfs_efi_item_size(xfs_efi_log_item_t *efip) +xfs_efi_item_size( + struct xfs_log_item *lip) { return 1; } @@ -67,10 +67,12 @@ xfs_efi_item_size(xfs_efi_log_item_t *efip) * slots in the efi item have been filled. */ STATIC void -xfs_efi_item_format(xfs_efi_log_item_t *efip, - xfs_log_iovec_t *log_vector) +xfs_efi_item_format( + struct xfs_log_item *lip, + struct xfs_log_iovec *log_vector) { - uint size; + struct xfs_efi_log_item *efip = EFI_ITEM(lip); + uint size; ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents); @@ -80,7 +82,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip, size += (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t); efip->efi_format.efi_size = 1; - log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); + log_vector->i_addr = &efip->efi_format; log_vector->i_len = size; log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT; ASSERT(size >= sizeof(xfs_efi_log_format_t)); @@ -90,60 +92,33 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip, /* * Pinning has no meaning for an efi item, so just return. */ -/*ARGSUSED*/ STATIC void -xfs_efi_item_pin(xfs_efi_log_item_t *efip) +xfs_efi_item_pin( + struct xfs_log_item *lip) { - return; } - /* * While EFIs cannot really be pinned, the unpin operation is the * last place at which the EFI is manipulated during a transaction. * Here we coordinate with xfs_efi_cancel() to determine who gets to * free the EFI. */ -/*ARGSUSED*/ -STATIC void -xfs_efi_item_unpin(xfs_efi_log_item_t *efip) -{ - struct xfs_ail *ailp = efip->efi_item.li_ailp; - - spin_lock(&ailp->xa_lock); - if (efip->efi_flags & XFS_EFI_CANCELED) { - /* xfs_trans_ail_delete() drops the AIL lock. */ - xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); - xfs_efi_item_free(efip); - } else { - efip->efi_flags |= XFS_EFI_COMMITTED; - spin_unlock(&ailp->xa_lock); - } -} - -/* - * like unpin only we have to also clear the xaction descriptor - * pointing the log item if we free the item. This routine duplicates - * unpin because efi_flags is protected by the AIL lock. Freeing - * the descriptor and then calling unpin would force us to drop the AIL - * lock which would open up a race condition. - */ STATIC void -xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) +xfs_efi_item_unpin( + struct xfs_log_item *lip, + int remove) { - struct xfs_ail *ailp = efip->efi_item.li_ailp; - xfs_log_item_desc_t *lidp; + struct xfs_efi_log_item *efip = EFI_ITEM(lip); + struct xfs_ail *ailp = lip->li_ailp; spin_lock(&ailp->xa_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { - /* - * free the xaction descriptor pointing to this item - */ - lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip); - xfs_trans_free_item(tp, lidp); + if (remove) + xfs_trans_del_item(lip); /* xfs_trans_ail_delete() drops the AIL lock. */ - xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); + xfs_trans_ail_delete(ailp, lip); xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; @@ -158,9 +133,9 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) * XFS_ITEM_PINNED so that the caller will eventually flush the log. * This should help in getting the EFI out of the AIL. */ -/*ARGSUSED*/ STATIC uint -xfs_efi_item_trylock(xfs_efi_log_item_t *efip) +xfs_efi_item_trylock( + struct xfs_log_item *lip) { return XFS_ITEM_PINNED; } @@ -168,13 +143,12 @@ xfs_efi_item_trylock(xfs_efi_log_item_t *efip) /* * Efi items have no locking, so just return. */ -/*ARGSUSED*/ STATIC void -xfs_efi_item_unlock(xfs_efi_log_item_t *efip) +xfs_efi_item_unlock( + struct xfs_log_item *lip) { - if (efip->efi_item.li_flags & XFS_LI_ABORTED) - xfs_efi_item_free(efip); - return; + if (lip->li_flags & XFS_LI_ABORTED) + xfs_efi_item_free(EFI_ITEM(lip)); } /* @@ -183,9 +157,10 @@ xfs_efi_item_unlock(xfs_efi_log_item_t *efip) * flag is not paid any attention here. Checking for that is delayed * until the EFI is unpinned. */ -/*ARGSUSED*/ STATIC xfs_lsn_t -xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn) +xfs_efi_item_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) { return lsn; } @@ -195,11 +170,10 @@ xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn) * stuck waiting for all of its corresponding efd items to be * committed to disk. */ -/*ARGSUSED*/ STATIC void -xfs_efi_item_push(xfs_efi_log_item_t *efip) +xfs_efi_item_push( + struct xfs_log_item *lip) { - return; } /* @@ -209,61 +183,55 @@ xfs_efi_item_push(xfs_efi_log_item_t *efip) * example, for inodes, the inode is locked throughout the extent freeing * so the dependency should be recorded there. */ -/*ARGSUSED*/ STATIC void -xfs_efi_item_committing(xfs_efi_log_item_t *efip, xfs_lsn_t lsn) +xfs_efi_item_committing( + struct xfs_log_item *lip, + xfs_lsn_t lsn) { - return; } /* * This is the ops vector shared by all efi log items. */ static struct xfs_item_ops xfs_efi_item_ops = { - .iop_size = (uint(*)(xfs_log_item_t*))xfs_efi_item_size, - .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) - xfs_efi_item_format, - .iop_pin = (void(*)(xfs_log_item_t*))xfs_efi_item_pin, - .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efi_item_unpin, - .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) - xfs_efi_item_unpin_remove, - .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efi_item_trylock, - .iop_unlock = (void(*)(xfs_log_item_t*))xfs_efi_item_unlock, - .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_efi_item_committed, - .iop_push = (void(*)(xfs_log_item_t*))xfs_efi_item_push, - .iop_pushbuf = NULL, - .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_efi_item_committing + .iop_size = xfs_efi_item_size, + .iop_format = xfs_efi_item_format, + .iop_pin = xfs_efi_item_pin, + .iop_unpin = xfs_efi_item_unpin, + .iop_trylock = xfs_efi_item_trylock, + .iop_unlock = xfs_efi_item_unlock, + .iop_committed = xfs_efi_item_committed, + .iop_push = xfs_efi_item_push, + .iop_committing = xfs_efi_item_committing }; /* * Allocate and initialize an efi item with the given number of extents. */ -xfs_efi_log_item_t * -xfs_efi_init(xfs_mount_t *mp, - uint nextents) +struct xfs_efi_log_item * +xfs_efi_init( + struct xfs_mount *mp, + uint nextents) { - xfs_efi_log_item_t *efip; + struct xfs_efi_log_item *efip; uint size; ASSERT(nextents > 0); if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { size = (uint)(sizeof(xfs_efi_log_item_t) + ((nextents - 1) * sizeof(xfs_extent_t))); - efip = (xfs_efi_log_item_t*)kmem_zalloc(size, KM_SLEEP); + efip = kmem_zalloc(size, KM_SLEEP); } else { - efip = (xfs_efi_log_item_t*)kmem_zone_zalloc(xfs_efi_zone, - KM_SLEEP); + efip = kmem_zone_zalloc(xfs_efi_zone, KM_SLEEP); } xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); efip->efi_format.efi_nextents = nextents; efip->efi_format.efi_id = (__psint_t)(void*)efip; - return (efip); + return efip; } /* @@ -276,7 +244,7 @@ xfs_efi_init(xfs_mount_t *mp, int xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) { - xfs_efi_log_format_t *src_efi_fmt = (xfs_efi_log_format_t *)buf->i_addr; + xfs_efi_log_format_t *src_efi_fmt = buf->i_addr; uint i; uint len = sizeof(xfs_efi_log_format_t) + (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t); @@ -289,8 +257,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len); return 0; } else if (buf->i_len == len32) { - xfs_efi_log_format_32_t *src_efi_fmt_32 = - (xfs_efi_log_format_32_t *)buf->i_addr; + xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr; dst_efi_fmt->efi_type = src_efi_fmt_32->efi_type; dst_efi_fmt->efi_size = src_efi_fmt_32->efi_size; @@ -304,8 +271,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) } return 0; } else if (buf->i_len == len64) { - xfs_efi_log_format_64_t *src_efi_fmt_64 = - (xfs_efi_log_format_64_t *)buf->i_addr; + xfs_efi_log_format_64_t *src_efi_fmt_64 = buf->i_addr; dst_efi_fmt->efi_type = src_efi_fmt_64->efi_type; dst_efi_fmt->efi_size = src_efi_fmt_64->efi_size; @@ -356,16 +322,18 @@ xfs_efi_release(xfs_efi_log_item_t *efip, } } -STATIC void -xfs_efd_item_free(xfs_efd_log_item_t *efdp) +static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) { - int nexts = efdp->efd_format.efd_nextents; + return container_of(lip, struct xfs_efd_log_item, efd_item); +} - if (nexts > XFS_EFD_MAX_FAST_EXTENTS) { +STATIC void +xfs_efd_item_free(struct xfs_efd_log_item *efdp) +{ + if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS) kmem_free(efdp); - } else { + else kmem_zone_free(xfs_efd_zone, efdp); - } } /* @@ -373,9 +341,9 @@ xfs_efd_item_free(xfs_efd_log_item_t *efdp) * We only need 1 iovec for an efd item. It just logs the efd_log_format * structure. */ -/*ARGSUSED*/ STATIC uint -xfs_efd_item_size(xfs_efd_log_item_t *efdp) +xfs_efd_item_size( + struct xfs_log_item *lip) { return 1; } @@ -388,10 +356,12 @@ xfs_efd_item_size(xfs_efd_log_item_t *efdp) * slots in the efd item have been filled. */ STATIC void -xfs_efd_item_format(xfs_efd_log_item_t *efdp, - xfs_log_iovec_t *log_vector) +xfs_efd_item_format( + struct xfs_log_item *lip, + struct xfs_log_iovec *log_vector) { - uint size; + struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + uint size; ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents); @@ -401,48 +371,38 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp, size += (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t); efdp->efd_format.efd_size = 1; - log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); + log_vector->i_addr = &efdp->efd_format; log_vector->i_len = size; log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT; ASSERT(size >= sizeof(xfs_efd_log_format_t)); } - /* * Pinning has no meaning for an efd item, so just return. */ -/*ARGSUSED*/ STATIC void -xfs_efd_item_pin(xfs_efd_log_item_t *efdp) +xfs_efd_item_pin( + struct xfs_log_item *lip) { - return; } - /* * Since pinning has no meaning for an efd item, unpinning does * not either. */ -/*ARGSUSED*/ -STATIC void -xfs_efd_item_unpin(xfs_efd_log_item_t *efdp) -{ - return; -} - -/*ARGSUSED*/ STATIC void -xfs_efd_item_unpin_remove(xfs_efd_log_item_t *efdp, xfs_trans_t *tp) +xfs_efd_item_unpin( + struct xfs_log_item *lip, + int remove) { - return; } /* * Efd items have no locking, so just return success. */ -/*ARGSUSED*/ STATIC uint -xfs_efd_item_trylock(xfs_efd_log_item_t *efdp) +xfs_efd_item_trylock( + struct xfs_log_item *lip) { return XFS_ITEM_LOCKED; } @@ -451,13 +411,12 @@ xfs_efd_item_trylock(xfs_efd_log_item_t *efdp) * Efd items have no locking or pushing, so return failure * so that the caller doesn't bother with us. */ -/*ARGSUSED*/ STATIC void -xfs_efd_item_unlock(xfs_efd_log_item_t *efdp) +xfs_efd_item_unlock( + struct xfs_log_item *lip) { - if (efdp->efd_item.li_flags & XFS_LI_ABORTED) - xfs_efd_item_free(efdp); - return; + if (lip->li_flags & XFS_LI_ABORTED) + xfs_efd_item_free(EFD_ITEM(lip)); } /* @@ -467,15 +426,18 @@ xfs_efd_item_unlock(xfs_efd_log_item_t *efdp) * return -1 to keep the transaction code from further referencing * this item. */ -/*ARGSUSED*/ STATIC xfs_lsn_t -xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn) +xfs_efd_item_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) { + struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + /* * If we got a log I/O error, it's always the case that the LR with the * EFI got unpinned and freed before the EFD got aborted. */ - if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0) + if (!(lip->li_flags & XFS_LI_ABORTED)) xfs_efi_release(efdp->efd_efip, efdp->efd_format.efd_nextents); xfs_efd_item_free(efdp); @@ -486,11 +448,10 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn) * There isn't much you can do to push on an efd item. It is simply * stuck waiting for the log to be flushed to disk. */ -/*ARGSUSED*/ STATIC void -xfs_efd_item_push(xfs_efd_log_item_t *efdp) +xfs_efd_item_push( + struct xfs_log_item *lip) { - return; } /* @@ -500,55 +461,48 @@ xfs_efd_item_push(xfs_efd_log_item_t *efdp) * example, for inodes, the inode is locked throughout the extent freeing * so the dependency should be recorded there. */ -/*ARGSUSED*/ STATIC void -xfs_efd_item_committing(xfs_efd_log_item_t *efip, xfs_lsn_t lsn) +xfs_efd_item_committing( + struct xfs_log_item *lip, + xfs_lsn_t lsn) { - return; } /* * This is the ops vector shared by all efd log items. */ static struct xfs_item_ops xfs_efd_item_ops = { - .iop_size = (uint(*)(xfs_log_item_t*))xfs_efd_item_size, - .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) - xfs_efd_item_format, - .iop_pin = (void(*)(xfs_log_item_t*))xfs_efd_item_pin, - .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efd_item_unpin, - .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) - xfs_efd_item_unpin_remove, - .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efd_item_trylock, - .iop_unlock = (void(*)(xfs_log_item_t*))xfs_efd_item_unlock, - .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_efd_item_committed, - .iop_push = (void(*)(xfs_log_item_t*))xfs_efd_item_push, - .iop_pushbuf = NULL, - .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_efd_item_committing + .iop_size = xfs_efd_item_size, + .iop_format = xfs_efd_item_format, + .iop_pin = xfs_efd_item_pin, + .iop_unpin = xfs_efd_item_unpin, + .iop_trylock = xfs_efd_item_trylock, + .iop_unlock = xfs_efd_item_unlock, + .iop_committed = xfs_efd_item_committed, + .iop_push = xfs_efd_item_push, + .iop_committing = xfs_efd_item_committing }; - /* * Allocate and initialize an efd item with the given number of extents. */ -xfs_efd_log_item_t * -xfs_efd_init(xfs_mount_t *mp, - xfs_efi_log_item_t *efip, - uint nextents) +struct xfs_efd_log_item * +xfs_efd_init( + struct xfs_mount *mp, + struct xfs_efi_log_item *efip, + uint nextents) { - xfs_efd_log_item_t *efdp; + struct xfs_efd_log_item *efdp; uint size; ASSERT(nextents > 0); if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { size = (uint)(sizeof(xfs_efd_log_item_t) + ((nextents - 1) * sizeof(xfs_extent_t))); - efdp = (xfs_efd_log_item_t*)kmem_zalloc(size, KM_SLEEP); + efdp = kmem_zalloc(size, KM_SLEEP); } else { - efdp = (xfs_efd_log_item_t*)kmem_zone_zalloc(xfs_efd_zone, - KM_SLEEP); + efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP); } xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops); @@ -556,5 +510,5 @@ xfs_efd_init(xfs_mount_t *mp, efdp->efd_format.efd_nextents = nextents; efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; - return (efdp); + return efdp; } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 390850ee6603..9b715dce5699 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -18,13 +18,9 @@ #include "xfs.h" #include "xfs_bmap_btree.h" #include "xfs_inum.h" -#include "xfs_dir2.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_ag.h" -#include "xfs_dmapi.h" #include "xfs_log.h" #include "xfs_trans.h" #include "xfs_sb.h" @@ -127,6 +123,82 @@ typedef struct fstrm_item xfs_inode_t *pip; /* Parent directory inode pointer. */ } fstrm_item_t; +/* + * Allocation group filestream associations are tracked with per-ag atomic + * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a + * particular AG already has active filestreams associated with it. The mount + * point's m_peraglock is used to protect these counters from per-ag array + * re-allocation during a growfs operation. When xfs_growfs_data_private() is + * about to reallocate the array, it calls xfs_filestream_flush() with the + * m_peraglock held in write mode. + * + * Since xfs_mru_cache_flush() guarantees that all the free functions for all + * the cache elements have finished executing before it returns, it's safe for + * the free functions to use the atomic counters without m_peraglock protection. + * This allows the implementation of xfs_fstrm_free_func() to be agnostic about + * whether it was called with the m_peraglock held in read mode, write mode or + * not held at all. The race condition this addresses is the following: + * + * - The work queue scheduler fires and pulls a filestream directory cache + * element off the LRU end of the cache for deletion, then gets pre-empted. + * - A growfs operation grabs the m_peraglock in write mode, flushes all the + * remaining items from the cache and reallocates the mount point's per-ag + * array, resetting all the counters to zero. + * - The work queue thread resumes and calls the free function for the element + * it started cleaning up earlier. In the process it decrements the + * filestreams counter for an AG that now has no references. + * + * With a shrinkfs feature, the above scenario could panic the system. + * + * All other uses of the following macros should be protected by either the + * m_peraglock held in read mode, or the cache's internal locking exposed by the + * interval between a call to xfs_mru_cache_lookup() and a call to + * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode + * when new elements are added to the cache. + * + * Combined, these locking rules ensure that no associations will ever exist in + * the cache that reference per-ag array elements that have since been + * reallocated. + */ +static int +xfs_filestream_peek_ag( + xfs_mount_t *mp, + xfs_agnumber_t agno) +{ + struct xfs_perag *pag; + int ret; + + pag = xfs_perag_get(mp, agno); + ret = atomic_read(&pag->pagf_fstrms); + xfs_perag_put(pag); + return ret; +} + +static int +xfs_filestream_get_ag( + xfs_mount_t *mp, + xfs_agnumber_t agno) +{ + struct xfs_perag *pag; + int ret; + + pag = xfs_perag_get(mp, agno); + ret = atomic_inc_return(&pag->pagf_fstrms); + xfs_perag_put(pag); + return ret; +} + +static void +xfs_filestream_put_ag( + xfs_mount_t *mp, + xfs_agnumber_t agno) +{ + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, agno); + atomic_dec(&pag->pagf_fstrms); + xfs_perag_put(pag); +} /* * Scan the AGs starting at startag looking for an AG that isn't in use and has @@ -355,16 +427,14 @@ xfs_fstrm_free_func( { fstrm_item_t *item = (fstrm_item_t *)data; xfs_inode_t *ip = item->ip; - int ref; ASSERT(ip->i_ino == ino); xfs_iflags_clear(ip, XFS_IFILESTREAM); /* Drop the reference taken on the AG when the item was added. */ - ref = xfs_filestream_put_ag(ip->i_mount, item->ag); + xfs_filestream_put_ag(ip->i_mount, item->ag); - ASSERT(ref >= 0); TRACE_FREE(ip->i_mount, ip, item->pip, item->ag, xfs_filestream_peek_ag(ip->i_mount, item->ag)); diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h index 260f757bbc5d..09dd9af45434 100644 --- a/fs/xfs/xfs_filestream.h +++ b/fs/xfs/xfs_filestream.h @@ -42,88 +42,6 @@ extern ktrace_t *xfs_filestreams_trace_buf; #endif -/* - * Allocation group filestream associations are tracked with per-ag atomic - * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a - * particular AG already has active filestreams associated with it. The mount - * point's m_peraglock is used to protect these counters from per-ag array - * re-allocation during a growfs operation. When xfs_growfs_data_private() is - * about to reallocate the array, it calls xfs_filestream_flush() with the - * m_peraglock held in write mode. - * - * Since xfs_mru_cache_flush() guarantees that all the free functions for all - * the cache elements have finished executing before it returns, it's safe for - * the free functions to use the atomic counters without m_peraglock protection. - * This allows the implementation of xfs_fstrm_free_func() to be agnostic about - * whether it was called with the m_peraglock held in read mode, write mode or - * not held at all. The race condition this addresses is the following: - * - * - The work queue scheduler fires and pulls a filestream directory cache - * element off the LRU end of the cache for deletion, then gets pre-empted. - * - A growfs operation grabs the m_peraglock in write mode, flushes all the - * remaining items from the cache and reallocates the mount point's per-ag - * array, resetting all the counters to zero. - * - The work queue thread resumes and calls the free function for the element - * it started cleaning up earlier. In the process it decrements the - * filestreams counter for an AG that now has no references. - * - * With a shrinkfs feature, the above scenario could panic the system. - * - * All other uses of the following macros should be protected by either the - * m_peraglock held in read mode, or the cache's internal locking exposed by the - * interval between a call to xfs_mru_cache_lookup() and a call to - * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode - * when new elements are added to the cache. - * - * Combined, these locking rules ensure that no associations will ever exist in - * the cache that reference per-ag array elements that have since been - * reallocated. - */ -/* - * xfs_filestream_peek_ag is only used in tracing code - */ -static inline int -xfs_filestream_peek_ag( - xfs_mount_t *mp, - xfs_agnumber_t agno) -{ - struct xfs_perag *pag; - int ret; - - pag = xfs_perag_get(mp, agno); - ret = atomic_read(&pag->pagf_fstrms); - xfs_perag_put(pag); - return ret; -} - -static inline int -xfs_filestream_get_ag( - xfs_mount_t *mp, - xfs_agnumber_t agno) -{ - struct xfs_perag *pag; - int ret; - - pag = xfs_perag_get(mp, agno); - ret = atomic_inc_return(&pag->pagf_fstrms); - xfs_perag_put(pag); - return ret; -} - -static inline int -xfs_filestream_put_ag( - xfs_mount_t *mp, - xfs_agnumber_t agno) -{ - struct xfs_perag *pag; - int ret; - - pag = xfs_perag_get(mp, agno); - ret = atomic_dec_return(&pag->pagf_fstrms); - xfs_perag_put(pag); - return ret; -} - /* allocation selection flags */ typedef enum xfs_fstrm_alloc { XFS_PICK_USERDATA = 1, diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 7cf7220e7d5f..8f6fc1a96386 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -114,8 +114,10 @@ struct getbmapx { #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ #define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ +#define BMV_IF_NO_HOLES 0x10 /* Do not return holes */ #define BMV_IF_VALID \ - (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) + (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC| \ + BMV_IF_DELALLOC|BMV_IF_NO_HOLES) /* bmv_oflags values - returned for each non-header segment */ #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ @@ -291,9 +293,11 @@ typedef struct xfs_bstat { __s32 bs_extsize; /* extent size */ __s32 bs_extents; /* number of extents */ __u32 bs_gen; /* generation count */ - __u16 bs_projid; /* project id */ + __u16 bs_projid_lo; /* lower part of project id */ +#define bs_projid bs_projid_lo /* (previously just bs_projid) */ __u16 bs_forkoff; /* inode fork offset in bytes */ - unsigned char bs_pad[12]; /* pad space, unused */ + __u16 bs_projid_hi; /* higher part of project id */ + unsigned char bs_pad[10]; /* pad space, unused */ __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ @@ -446,6 +450,7 @@ typedef struct xfs_handle { /* XFS_IOC_SETBIOSIZE ---- deprecated 46 */ /* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ #define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) +#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) /* * ioctl commands that replace IRIX syssgi()'s diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 37a6f62c57b6..a7c116e814af 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -24,14 +24,10 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" @@ -148,12 +144,11 @@ xfs_growfs_data_private( if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) return error; dpct = pct - mp->m_sb.sb_imax_pct; - error = xfs_read_buf(mp, mp->m_ddev_targp, - XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), - XFS_FSS_TO_BB(mp, 1), 0, &bp); - if (error) - return error; - ASSERT(bp); + bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, + XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), + BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); + if (!bp) + return EIO; xfs_buf_relse(bp); new = nb; /* use new as a temporary here */ @@ -601,39 +596,44 @@ out: * the extra reserve blocks from the reserve..... */ int error; - error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, fdblks_delta, 0); + error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + fdblks_delta, 0); if (error == ENOSPC) goto retry; } return 0; } +/* + * Dump a transaction into the log that contains no real change. This is needed + * to be able to make the log dirty or stamp the current tail LSN into the log + * during the covering operation. + * + * We cannot use an inode here for this - that will push dirty state back up + * into the VFS and then periodic inode flushing will prevent log covering from + * making progress. Hence we log a field in the superblock instead. + */ int xfs_fs_log_dummy( - xfs_mount_t *mp) + xfs_mount_t *mp, + int flags) { xfs_trans_t *tp; - xfs_inode_t *ip; int error; tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); - error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, + XFS_DEFAULT_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); return error; } - ip = mp->m_rootip; - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; + /* log the UUID because it is an unchanging field */ + xfs_mod_sb(tp, XFS_SB_UUID); + if (flags & SYNC_WAIT) + xfs_trans_set_sync(tp); + return xfs_trans_commit(tp, 0); } int diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index 88435e0a77c9..a786c5212c1e 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h @@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, xfs_fsop_resblks_t *outval); extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); -extern int xfs_fs_log_dummy(xfs_mount_t *mp); +extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags); #endif /* __XFS_FSOPS_H__ */ diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index c7142a064c48..0626a32c3447 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -24,14 +24,10 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" @@ -216,7 +212,7 @@ xfs_ialloc_inode_init( * to log a whole cluster of inodes instead of all the * individual transactions causing a lot of log traffic. */ - xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); + xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); for (i = 0; i < ninodes; i++) { int ioffset = i << mp->m_sb.sb_inodelog; uint isize = sizeof(struct xfs_dinode); @@ -1217,7 +1213,6 @@ xfs_imap_lookup( struct xfs_inobt_rec_incore rec; struct xfs_btree_cur *cur; struct xfs_buf *agbp; - xfs_agino_t startino; int error; int i; @@ -1231,13 +1226,13 @@ xfs_imap_lookup( } /* - * derive and lookup the exact inode record for the given agino. If the - * record cannot be found, then it's an invalid inode number and we - * should abort. + * Lookup the inode record for the given agino. If the record cannot be + * found, then it's an invalid inode number and we should abort. Once + * we have a record, we need to ensure it contains the inode number + * we are looking up. */ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); - startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); - error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i); + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); if (!error) { if (i) error = xfs_inobt_get_rec(cur, &rec, &i); @@ -1250,6 +1245,11 @@ xfs_imap_lookup( if (error) return error; + /* check that the returned record contains the required inode */ + if (rec.ir_startino > agino || + rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) + return EINVAL; + /* for untrusted inodes check it is allocated first */ if ((flags & XFS_IGET_UNTRUSTED) && (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index c282a9af5393..16921f55c542 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -24,14 +24,10 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" @@ -187,38 +183,6 @@ xfs_inobt_key_diff( cur->bc_rec.i.ir_startino; } -STATIC int -xfs_inobt_kill_root( - struct xfs_btree_cur *cur, - struct xfs_buf *bp, - int level, - union xfs_btree_ptr *newroot) -{ - int error; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_STATS_INC(cur, killroot); - - /* - * Update the root pointer, decreasing the level by 1 and then - * free the old root. - */ - xfs_inobt_set_root(cur, newroot, -1); - error = xfs_inobt_free_block(cur, bp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; - } - - XFS_BTREE_STATS_INC(cur, free); - - cur->bc_bufs[level] = NULL; - cur->bc_nlevels--; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - return 0; -} - #ifdef DEBUG STATIC int xfs_inobt_keys_inorder( @@ -313,7 +277,6 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_inobt_set_root, - .kill_root = xfs_inobt_kill_root, .alloc_block = xfs_inobt_alloc_block, .free_block = xfs_inobt_free_block, .get_minrecs = xfs_inobt_get_minrecs, diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 8f8b91be2c99..0cdd26932d8e 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -25,14 +25,10 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" @@ -95,7 +91,7 @@ xfs_inode_alloc( return ip; } -STATIC void +void xfs_inode_free( struct xfs_inode *ip) { @@ -212,7 +208,7 @@ xfs_iget_cache_hit( ip->i_flags &= ~XFS_INEW; ip->i_flags |= XFS_IRECLAIMABLE; __xfs_inode_set_reclaim_tag(pag, ip); - trace_xfs_iget_reclaim(ip); + trace_xfs_iget_reclaim_fail(ip); goto out_error; } @@ -227,6 +223,7 @@ xfs_iget_cache_hit( } else { /* If the VFS inode is being torn down, pause and try again. */ if (!igrab(inode)) { + trace_xfs_iget_skip(ip); error = EAGAIN; goto out_error; } @@ -234,6 +231,7 @@ xfs_iget_cache_hit( /* We've got a live one. */ spin_unlock(&ip->i_flags_lock); read_unlock(&pag->pag_ici_lock); + trace_xfs_iget_hit(ip); } if (lock_flags != 0) @@ -242,7 +240,6 @@ xfs_iget_cache_hit( xfs_iflags_clear(ip, XFS_ISTALE); XFS_STATS_INC(xs_ig_found); - trace_xfs_iget_found(ip); return 0; out_error: @@ -264,7 +261,6 @@ xfs_iget_cache_miss( { struct xfs_inode *ip; int error; - unsigned long first_index, mask; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); ip = xfs_inode_alloc(mp, ino); @@ -275,7 +271,7 @@ xfs_iget_cache_miss( if (error) goto out_destroy; - xfs_itrace_entry(ip); + trace_xfs_iget_miss(ip); if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { error = ENOENT; @@ -301,8 +297,6 @@ xfs_iget_cache_miss( BUG(); } - mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); - first_index = agino & mask; write_lock(&pag->pag_ici_lock); /* insert the new inode */ @@ -321,7 +315,6 @@ xfs_iget_cache_miss( write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); - trace_xfs_iget_alloc(ip); *ipp = ip; return 0; @@ -372,8 +365,8 @@ xfs_iget( xfs_perag_t *pag; xfs_agino_t agino; - /* the radix tree exists only in inode capable AGs */ - if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) + /* reject inode numbers outside existing AGs */ + if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) return EINVAL; /* get the perag structure and ensure that it's inode capable */ @@ -422,97 +415,6 @@ out_error_or_again: } /* - * Decrement reference count of an inode structure and unlock it. - * - * ip -- the inode being released - * lock_flags -- this parameter indicates the inode's locks to be - * to be released. See the comment on xfs_iunlock() for a list - * of valid values. - */ -void -xfs_iput(xfs_inode_t *ip, - uint lock_flags) -{ - xfs_itrace_entry(ip); - xfs_iunlock(ip, lock_flags); - IRELE(ip); -} - -/* - * Special iput for brand-new inodes that are still locked - */ -void -xfs_iput_new( - xfs_inode_t *ip, - uint lock_flags) -{ - struct inode *inode = VFS_I(ip); - - xfs_itrace_entry(ip); - - if ((ip->i_d.di_mode == 0)) { - ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - make_bad_inode(inode); - } - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - if (lock_flags) - xfs_iunlock(ip, lock_flags); - IRELE(ip); -} - -/* - * This is called free all the memory associated with an inode. - * It must free the inode itself and any buffers allocated for - * if_extents/if_data and if_broot. It must also free the lock - * associated with the inode. - * - * Note: because we don't initialise everything on reallocation out - * of the zone, we must ensure we nullify everything correctly before - * freeing the structure. - */ -void -xfs_ireclaim( - struct xfs_inode *ip) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_perag *pag; - xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); - - XFS_STATS_INC(xs_ig_reclaims); - - /* - * Remove the inode from the per-AG radix tree. - * - * Because radix_tree_delete won't complain even if the item was never - * added to the tree assert that it's been there before to catch - * problems with the inode life time early on. - */ - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - write_lock(&pag->pag_ici_lock); - if (!radix_tree_delete(&pag->pag_ici_root, agino)) - ASSERT(0); - write_unlock(&pag->pag_ici_lock); - xfs_perag_put(pag); - - /* - * Here we do an (almost) spurious inode lock in order to coordinate - * with inode cache radix tree lookups. This is because the lookup - * can reference the inodes in the cache without taking references. - * - * We make that OK here by ensuring that we wait until the inode is - * unlocked after the lookup before we go ahead and free it. We get - * both the ilock and the iolock because the code may need to drop the - * ilock one but will still hold the iolock. - */ - xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_qm_dqdetach(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - - xfs_inode_free(ip); -} - -/* * This is a wrapper routine around the xfs_ilock() routine * used to centralize some grungy code. It is used in places * that wish to lock the inode solely for reading the extents. diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b76a829d7e20..108c7a085f94 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -27,13 +27,10 @@ #include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" @@ -44,7 +41,6 @@ #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" -#include "xfs_rw.h" #include "xfs_error.h" #include "xfs_utils.h" #include "xfs_quota.h" @@ -426,7 +422,7 @@ xfs_iformat( if (!XFS_DFORK_Q(dip)) return 0; ASSERT(ip->i_afp == NULL); - ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); + ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); ip->i_afp->if_ext_max = XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); switch (dip->di_aformat) { @@ -509,7 +505,7 @@ xfs_iformat_local( ifp->if_u1.if_data = ifp->if_u2.if_inline_data; else { real_size = roundup(size, 4); - ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); + ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); } ifp->if_bytes = size; ifp->if_real_bytes = real_size; @@ -636,7 +632,7 @@ xfs_iformat_btree( } ifp->if_broot_bytes = size; - ifp->if_broot = kmem_alloc(size, KM_SLEEP); + ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS); ASSERT(ifp->if_broot != NULL); /* * Copy and convert from the on-disk structure @@ -664,7 +660,8 @@ xfs_dinode_from_disk( to->di_uid = be32_to_cpu(from->di_uid); to->di_gid = be32_to_cpu(from->di_gid); to->di_nlink = be32_to_cpu(from->di_nlink); - to->di_projid = be16_to_cpu(from->di_projid); + to->di_projid_lo = be16_to_cpu(from->di_projid_lo); + to->di_projid_hi = be16_to_cpu(from->di_projid_hi); memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); to->di_flushiter = be16_to_cpu(from->di_flushiter); to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); @@ -699,7 +696,8 @@ xfs_dinode_to_disk( to->di_uid = cpu_to_be32(from->di_uid); to->di_gid = cpu_to_be32(from->di_gid); to->di_nlink = cpu_to_be32(from->di_nlink); - to->di_projid = cpu_to_be16(from->di_projid); + to->di_projid_lo = cpu_to_be16(from->di_projid_lo); + to->di_projid_hi = cpu_to_be16(from->di_projid_hi); memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); to->di_flushiter = cpu_to_be16(from->di_flushiter); to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); @@ -878,7 +876,7 @@ xfs_iread( if (ip->i_d.di_version == 1) { ip->i_d.di_nlink = ip->i_d.di_onlink; ip->i_d.di_onlink = 0; - ip->i_d.di_projid = 0; + xfs_set_projid(ip, 0); } ip->i_delayed_blks = 0; @@ -922,7 +920,6 @@ xfs_iread_extents( int error; xfs_ifork_t *ifp; xfs_extnum_t nextents; - size_t size; if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, @@ -930,7 +927,6 @@ xfs_iread_extents( return XFS_ERROR(EFSCORRUPTED); } nextents = XFS_IFORK_NEXTENTS(ip, whichfork); - size = nextents * sizeof(xfs_bmbt_rec_t); ifp = XFS_IFORK_PTR(ip, whichfork); /* @@ -988,8 +984,7 @@ xfs_ialloc( mode_t mode, xfs_nlink_t nlink, xfs_dev_t rdev, - cred_t *cr, - xfs_prid_t prid, + prid_t prid, int okalloc, xfs_buf_t **ialloc_context, boolean_t *call_again, @@ -1033,7 +1028,7 @@ xfs_ialloc( ASSERT(ip->i_d.di_nlink == nlink); ip->i_d.di_uid = current_fsuid(); ip->i_d.di_gid = current_fsgid(); - ip->i_d.di_projid = prid; + xfs_set_projid(ip, prid); memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); /* @@ -1226,7 +1221,7 @@ xfs_isize_check( (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first), XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, - NULL, NULL)) + NULL)) return; ASSERT(nimaps == 1); ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); @@ -1460,7 +1455,7 @@ xfs_itruncate_finish( ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(ip->i_transp == *tp); ASSERT(ip->i_itemp != NULL); - ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD); + ASSERT(ip->i_itemp->ili_lock_flags == 0); ntp = *tp; @@ -1589,11 +1584,10 @@ xfs_itruncate_finish( xfs_bmap_init(&free_list, &first_block); error = xfs_bunmapi(ntp, ip, first_unmap_block, unmap_len, - xfs_bmapi_aflag(fork) | - (sync ? 0 : XFS_BMAPI_ASYNC), + xfs_bmapi_aflag(fork), XFS_ITRUNC_MAX_EXTENTS, &first_block, &free_list, - NULL, &done); + &done); if (error) { /* * If the bunmapi call encounters an error, @@ -1612,12 +1606,8 @@ xfs_itruncate_finish( */ error = xfs_bmap_finish(tp, &free_list, &committed); ntp = *tp; - if (committed) { - /* link the inode into the next xact in the chain */ - xfs_trans_ijoin(ntp, ip, - XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_trans_ihold(ntp, ip); - } + if (committed) + xfs_trans_ijoin(ntp, ip); if (error) { /* @@ -1646,9 +1636,7 @@ xfs_itruncate_finish( error = xfs_trans_commit(*tp, 0); *tp = ntp; - /* link the inode into the next transaction in the chain */ - xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_trans_ihold(ntp, ip); + xfs_trans_ijoin(ntp, ip); if (error) return error; @@ -1927,6 +1915,11 @@ xfs_iunlink_remove( return 0; } +/* + * A big issue when freeing the inode cluster is is that we _cannot_ skip any + * inodes that are in memory - they all must be marked stale and attached to + * the cluster buffer. + */ STATIC void xfs_ifree_cluster( xfs_inode_t *free_ip, @@ -1958,8 +1951,6 @@ xfs_ifree_cluster( } for (j = 0; j < nbufs; j++, inum += ninodes) { - int found = 0; - blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), XFS_INO_TO_AGBNO(mp, inum)); @@ -1978,23 +1969,25 @@ xfs_ifree_cluster( /* * Walk the inodes already attached to the buffer and mark them * stale. These will all have the flush locks held, so an - * in-memory inode walk can't lock them. + * in-memory inode walk can't lock them. By marking them all + * stale first, we will not attempt to lock them in the loop + * below as the XFS_ISTALE flag will be set. */ lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); while (lip) { if (lip->li_type == XFS_LI_INODE) { iip = (xfs_inode_log_item_t *)lip; ASSERT(iip->ili_logged == 1); - lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; + lip->li_cb = xfs_istale_done; xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, &iip->ili_item.li_lsn); xfs_iflags_set(iip->ili_inode, XFS_ISTALE); - found++; } lip = lip->li_bio_list; } + /* * For each inode in memory attempt to add it to the inode * buffer and set it up for being staled on buffer IO @@ -2006,6 +1999,7 @@ xfs_ifree_cluster( * even trying to lock them. */ for (i = 0; i < ninodes; i++) { +retry: read_lock(&pag->pag_ici_lock); ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, (inum + i))); @@ -2016,38 +2010,36 @@ xfs_ifree_cluster( continue; } - /* don't try to lock/unlock the current inode */ + /* + * Don't try to lock/unlock the current inode, but we + * _cannot_ skip the other inodes that we did not find + * in the list attached to the buffer and are not + * already marked stale. If we can't lock it, back off + * and retry. + */ if (ip != free_ip && !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { read_unlock(&pag->pag_ici_lock); - continue; + delay(1); + goto retry; } read_unlock(&pag->pag_ici_lock); - if (!xfs_iflock_nowait(ip)) { - if (ip != free_ip) - xfs_iunlock(ip, XFS_ILOCK_EXCL); - continue; - } - + xfs_iflock(ip); xfs_iflags_set(ip, XFS_ISTALE); - if (xfs_inode_clean(ip)) { - ASSERT(ip != free_ip); - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - continue; - } + /* + * we don't need to attach clean inodes or those only + * with unlogged changes (which we throw away, anyway). + */ iip = ip->i_itemp; - if (!iip) { - /* inode with unlogged changes only */ + if (!iip || xfs_inode_clean(ip)) { ASSERT(ip != free_ip); ip->i_update_core = 0; xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); continue; } - found++; iip->ili_last_fields = iip->ili_format.ilf_fields; iip->ili_format.ilf_fields = 0; @@ -2055,16 +2047,14 @@ xfs_ifree_cluster( xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, &iip->ili_item.li_lsn); - xfs_buf_attach_iodone(bp, - (void(*)(xfs_buf_t*,xfs_log_item_t*)) - xfs_istale_done, (xfs_log_item_t *)iip); + xfs_buf_attach_iodone(bp, xfs_istale_done, + &iip->ili_item); if (ip != free_ip) xfs_iunlock(ip, XFS_ILOCK_EXCL); } - if (found) - xfs_trans_stale_inode_buf(tp, bp); + xfs_trans_stale_inode_buf(tp, bp); xfs_trans_binval(tp, bp); } @@ -2203,7 +2193,7 @@ xfs_iroot_realloc( */ if (ifp->if_broot_bytes == 0) { new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); - ifp->if_broot = kmem_alloc(new_size, KM_SLEEP); + ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); ifp->if_broot_bytes = (int)new_size; return; } @@ -2219,7 +2209,7 @@ xfs_iroot_realloc( new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ - KM_SLEEP); + KM_SLEEP | KM_NOFS); op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, ifp->if_broot_bytes); np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, @@ -2245,7 +2235,7 @@ xfs_iroot_realloc( else new_size = 0; if (new_size > 0) { - new_broot = kmem_alloc(new_size, KM_SLEEP); + new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); /* * First copy over the btree block header. */ @@ -2349,7 +2339,8 @@ xfs_idata_realloc( real_size = roundup(new_size, 4); if (ifp->if_u1.if_data == NULL) { ASSERT(ifp->if_real_bytes == 0); - ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); + ifp->if_u1.if_data = kmem_alloc(real_size, + KM_SLEEP | KM_NOFS); } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { /* * Only do the realloc if the underlying size @@ -2360,11 +2351,12 @@ xfs_idata_realloc( kmem_realloc(ifp->if_u1.if_data, real_size, ifp->if_real_bytes, - KM_SLEEP); + KM_SLEEP | KM_NOFS); } } else { ASSERT(ifp->if_real_bytes == 0); - ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); + ifp->if_u1.if_data = kmem_alloc(real_size, + KM_SLEEP | KM_NOFS); memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, ifp->if_bytes); } @@ -2731,11 +2723,10 @@ cluster_corrupt_out: * mark it as stale and brelse. */ if (XFS_BUF_IODONE_FUNC(bp)) { - XFS_BUF_CLR_BDSTRAT_FUNC(bp); XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp); XFS_BUF_ERROR(bp,EIO); - xfs_biodone(bp); + xfs_buf_ioend(bp, 0); } else { XFS_BUF_STALE(bp); xfs_buf_relse(bp); @@ -3018,7 +3009,7 @@ xfs_iflush_int( memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); memset(&(dip->di_pad[0]), 0, sizeof(dip->di_pad)); - ASSERT(ip->i_d.di_projid == 0); + ASSERT(xfs_get_projid(ip) == 0); } } @@ -3069,8 +3060,7 @@ xfs_iflush_int( * and unlock the inode's flush lock when the inode is * completely written to disk. */ - xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) - xfs_iflush_done, (xfs_log_item_t *)iip); + xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); @@ -3514,13 +3504,11 @@ xfs_iext_remove_indirect( xfs_extnum_t ext_diff; /* extents to remove in current list */ xfs_extnum_t nex1; /* number of extents before idx */ xfs_extnum_t nex2; /* extents after idx + count */ - int nlists; /* entries in indirection array */ int page_idx = idx; /* index in target extent list */ ASSERT(ifp->if_flags & XFS_IFEXTIREC); erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); ASSERT(erp != NULL); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; nex1 = page_idx; ext_cnt = count; while (ext_cnt) { diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 78550df13cd6..fb2ca2e4cdc9 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -134,8 +134,9 @@ typedef struct xfs_icdinode { __uint32_t di_uid; /* owner's user id */ __uint32_t di_gid; /* owner's group id */ __uint32_t di_nlink; /* number of links to file */ - __uint16_t di_projid; /* owner's project id */ - __uint8_t di_pad[8]; /* unused, zeroed space */ + __uint16_t di_projid_lo; /* lower part of owner's project id */ + __uint16_t di_projid_hi; /* higher part of owner's project id */ + __uint8_t di_pad[6]; /* unused, zeroed space */ __uint16_t di_flushiter; /* incremented on flush */ xfs_ictimestamp_t di_atime; /* time last accessed */ xfs_ictimestamp_t di_mtime; /* time last modified */ @@ -212,7 +213,6 @@ typedef struct xfs_icdinode { #ifdef __KERNEL__ struct bhv_desc; -struct cred; struct xfs_buf; struct xfs_bmap_free; struct xfs_bmbt_irec; @@ -335,6 +335,25 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) } /* + * Project quota id helpers (previously projid was 16bit only + * and using two 16bit values to hold new 32bit projid was choosen + * to retain compatibility with "old" filesystems). + */ +static inline prid_t +xfs_get_projid(struct xfs_inode *ip) +{ + return (prid_t)ip->i_d.di_projid_hi << 16 | ip->i_d.di_projid_lo; +} + +static inline void +xfs_set_projid(struct xfs_inode *ip, + prid_t projid) +{ + ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16); + ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); +} + +/* * Manage the i_flush queue embedded in the inode. This completion * queue synchronizes processes attempting to flush the in-core * inode back to disk. @@ -443,8 +462,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) */ int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, uint, uint, xfs_inode_t **); -void xfs_iput(xfs_inode_t *, uint); -void xfs_iput_new(xfs_inode_t *, uint); void xfs_ilock(xfs_inode_t *, uint); int xfs_ilock_nowait(xfs_inode_t *, uint); void xfs_iunlock(xfs_inode_t *, uint); @@ -452,14 +469,14 @@ void xfs_ilock_demote(xfs_inode_t *, uint); int xfs_isilocked(xfs_inode_t *, uint); uint xfs_ilock_map_shared(xfs_inode_t *); void xfs_iunlock_map_shared(xfs_inode_t *, uint); -void xfs_ireclaim(xfs_inode_t *); +void xfs_inode_free(struct xfs_inode *ip); /* * xfs_inode.c prototypes. */ int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, - xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t, - int, struct xfs_buf **, boolean_t *, xfs_inode_t **); + xfs_nlink_t, xfs_dev_t, prid_t, int, + struct xfs_buf **, boolean_t *, xfs_inode_t **); uint xfs_ip2xflags(struct xfs_inode *); uint xfs_dic2xflags(struct xfs_dinode *); @@ -473,7 +490,6 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_iunpin_wait(xfs_inode_t *); int xfs_iflush(xfs_inode_t *, uint); -void xfs_ichgtime(xfs_inode_t *, int); void xfs_lock_inodes(xfs_inode_t **, int, uint); void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); @@ -484,7 +500,7 @@ void xfs_mark_inode_dirty_sync(xfs_inode_t *); #define IHOLD(ip) \ do { \ ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ - atomic_inc(&(VFS_I(ip)->i_count)); \ + ihold(VFS_I(ip)); \ trace_xfs_ihold(ip, _THIS_IP_); \ } while (0) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index cf8249a60004..c7ac020705df 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -22,30 +22,26 @@ #include "xfs_log.h" #include "xfs_inum.h" #include "xfs_trans.h" -#include "xfs_buf_item.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_trans_priv.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" -#include "xfs_rw.h" #include "xfs_error.h" #include "xfs_trace.h" kmem_zone_t *xfs_ili_zone; /* inode log item zone */ +static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_inode_log_item, ili_item); +} + + /* * This returns the number of iovecs needed to log the given inode item. * @@ -55,13 +51,11 @@ kmem_zone_t *xfs_ili_zone; /* inode log item zone */ */ STATIC uint xfs_inode_item_size( - xfs_inode_log_item_t *iip) + struct xfs_log_item *lip) { - uint nvecs; - xfs_inode_t *ip; - - ip = iip->ili_inode; - nvecs = 2; + struct xfs_inode_log_item *iip = INODE_ITEM(lip); + struct xfs_inode *ip = iip->ili_inode; + uint nvecs = 2; /* * Only log the data/extents/b-tree root if there is something @@ -212,36 +206,23 @@ xfs_inode_item_size( */ STATIC void xfs_inode_item_format( - xfs_inode_log_item_t *iip, - xfs_log_iovec_t *log_vector) + struct xfs_log_item *lip, + struct xfs_log_iovec *vecp) { + struct xfs_inode_log_item *iip = INODE_ITEM(lip); + struct xfs_inode *ip = iip->ili_inode; uint nvecs; - xfs_log_iovec_t *vecp; - xfs_inode_t *ip; size_t data_bytes; xfs_bmbt_rec_t *ext_buffer; - int nrecs; xfs_mount_t *mp; - ip = iip->ili_inode; - vecp = log_vector; - - vecp->i_addr = (xfs_caddr_t)&iip->ili_format; + vecp->i_addr = &iip->ili_format; vecp->i_len = sizeof(xfs_inode_log_format_t); vecp->i_type = XLOG_REG_TYPE_IFORMAT; vecp++; nvecs = 1; /* - * Make sure the linux inode is dirty. We do this before - * clearing i_update_core as the VFS will call back into - * XFS here and set i_update_core, so we need to dirty the - * inode first so that the ordering of i_update_core and - * unlogged modifications still works as described below. - */ - xfs_mark_inode_dirty_sync(ip); - - /* * Clear i_update_core if the timestamps (or any other * non-transactional modification) need flushing/logging * and we're about to log them with the rest of the core. @@ -277,7 +258,7 @@ xfs_inode_item_format( */ xfs_synchronize_times(ip); - vecp->i_addr = (xfs_caddr_t)&ip->i_d; + vecp->i_addr = &ip->i_d; vecp->i_len = sizeof(struct xfs_icdinode); vecp->i_type = XLOG_REG_TYPE_ICORE; vecp++; @@ -323,18 +304,17 @@ xfs_inode_item_format( ASSERT(ip->i_df.if_u1.if_extents != NULL); ASSERT(ip->i_d.di_nextents > 0); ASSERT(iip->ili_extents_buf == NULL); - nrecs = ip->i_df.if_bytes / - (uint)sizeof(xfs_bmbt_rec_t); - ASSERT(nrecs > 0); + ASSERT((ip->i_df.if_bytes / + (uint)sizeof(xfs_bmbt_rec_t)) > 0); #ifdef XFS_NATIVE_HOST - if (nrecs == ip->i_d.di_nextents) { + if (ip->i_d.di_nextents == ip->i_df.if_bytes / + (uint)sizeof(xfs_bmbt_rec_t)) { /* * There are no delayed allocation * extents, so just point to the * real extents array. */ - vecp->i_addr = - (char *)(ip->i_df.if_u1.if_extents); + vecp->i_addr = ip->i_df.if_u1.if_extents; vecp->i_len = ip->i_df.if_bytes; vecp->i_type = XLOG_REG_TYPE_IEXT; } else @@ -352,7 +332,7 @@ xfs_inode_item_format( ext_buffer = kmem_alloc(ip->i_df.if_bytes, KM_SLEEP); iip->ili_extents_buf = ext_buffer; - vecp->i_addr = (xfs_caddr_t)ext_buffer; + vecp->i_addr = ext_buffer; vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_DATA_FORK); vecp->i_type = XLOG_REG_TYPE_IEXT; @@ -371,7 +351,7 @@ xfs_inode_item_format( if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { ASSERT(ip->i_df.if_broot_bytes > 0); ASSERT(ip->i_df.if_broot != NULL); - vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; + vecp->i_addr = ip->i_df.if_broot; vecp->i_len = ip->i_df.if_broot_bytes; vecp->i_type = XLOG_REG_TYPE_IBROOT; vecp++; @@ -389,7 +369,7 @@ xfs_inode_item_format( ASSERT(ip->i_df.if_u1.if_data != NULL); ASSERT(ip->i_d.di_size > 0); - vecp->i_addr = (xfs_caddr_t)ip->i_df.if_u1.if_data; + vecp->i_addr = ip->i_df.if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to @@ -437,7 +417,7 @@ xfs_inode_item_format( * Assert that no attribute-related log flags are set. */ if (!XFS_IFORK_Q(ip)) { - ASSERT(nvecs == iip->ili_item.li_desc->lid_size); + ASSERT(nvecs == lip->li_desc->lid_size); iip->ili_format.ilf_size = nvecs; ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); @@ -449,21 +429,21 @@ xfs_inode_item_format( ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { - ASSERT(ip->i_afp->if_bytes > 0); - ASSERT(ip->i_afp->if_u1.if_extents != NULL); - ASSERT(ip->i_d.di_anextents > 0); #ifdef DEBUG - nrecs = ip->i_afp->if_bytes / + int nrecs = ip->i_afp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); -#endif ASSERT(nrecs > 0); ASSERT(nrecs == ip->i_d.di_anextents); + ASSERT(ip->i_afp->if_bytes > 0); + ASSERT(ip->i_afp->if_u1.if_extents != NULL); + ASSERT(ip->i_d.di_anextents > 0); +#endif #ifdef XFS_NATIVE_HOST /* * There are not delayed allocation extents * for attributes, so just point at the array. */ - vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents); + vecp->i_addr = ip->i_afp->if_u1.if_extents; vecp->i_len = ip->i_afp->if_bytes; #else ASSERT(iip->ili_aextents_buf == NULL); @@ -473,7 +453,7 @@ xfs_inode_item_format( ext_buffer = kmem_alloc(ip->i_afp->if_bytes, KM_SLEEP); iip->ili_aextents_buf = ext_buffer; - vecp->i_addr = (xfs_caddr_t)ext_buffer; + vecp->i_addr = ext_buffer; vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_ATTR_FORK); #endif @@ -490,7 +470,7 @@ xfs_inode_item_format( if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { ASSERT(ip->i_afp->if_broot_bytes > 0); ASSERT(ip->i_afp->if_broot != NULL); - vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; + vecp->i_addr = ip->i_afp->if_broot; vecp->i_len = ip->i_afp->if_broot_bytes; vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; vecp++; @@ -506,7 +486,7 @@ xfs_inode_item_format( ASSERT(ip->i_afp->if_bytes > 0); ASSERT(ip->i_afp->if_u1.if_data != NULL); - vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_u1.if_data; + vecp->i_addr = ip->i_afp->if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to @@ -528,7 +508,7 @@ xfs_inode_item_format( break; } - ASSERT(nvecs == iip->ili_item.li_desc->lid_size); + ASSERT(nvecs == lip->li_desc->lid_size); iip->ili_format.ilf_size = nvecs; } @@ -539,12 +519,14 @@ xfs_inode_item_format( */ STATIC void xfs_inode_item_pin( - xfs_inode_log_item_t *iip) + struct xfs_log_item *lip) { - ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); + struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - trace_xfs_inode_pin(iip->ili_inode, _RET_IP_); - atomic_inc(&iip->ili_inode->i_pincount); + trace_xfs_inode_pin(ip, _RET_IP_); + atomic_inc(&ip->i_pincount); } @@ -554,12 +536,12 @@ xfs_inode_item_pin( * * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0. */ -/* ARGSUSED */ STATIC void xfs_inode_item_unpin( - xfs_inode_log_item_t *iip) + struct xfs_log_item *lip, + int remove) { - struct xfs_inode *ip = iip->ili_inode; + struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode; trace_xfs_inode_unpin(ip, _RET_IP_); ASSERT(atomic_read(&ip->i_pincount) > 0); @@ -567,15 +549,6 @@ xfs_inode_item_unpin( wake_up(&ip->i_ipin_wait); } -/* ARGSUSED */ -STATIC void -xfs_inode_item_unpin_remove( - xfs_inode_log_item_t *iip, - xfs_trans_t *tp) -{ - xfs_inode_item_unpin(iip); -} - /* * This is called to attempt to lock the inode associated with this * inode log item, in preparation for the push routine which does the actual @@ -591,19 +564,16 @@ xfs_inode_item_unpin_remove( */ STATIC uint xfs_inode_item_trylock( - xfs_inode_log_item_t *iip) + struct xfs_log_item *lip) { - register xfs_inode_t *ip; - - ip = iip->ili_inode; + struct xfs_inode_log_item *iip = INODE_ITEM(lip); + struct xfs_inode *ip = iip->ili_inode; - if (xfs_ipincount(ip) > 0) { + if (xfs_ipincount(ip) > 0) return XFS_ITEM_PINNED; - } - if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { + if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) return XFS_ITEM_LOCKED; - } if (!xfs_iflock_nowait(ip)) { /* @@ -629,7 +599,7 @@ xfs_inode_item_trylock( if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { ASSERT(iip->ili_format.ilf_fields != 0); ASSERT(iip->ili_logged == 0); - ASSERT(iip->ili_item.li_flags & XFS_LI_IN_AIL); + ASSERT(lip->li_flags & XFS_LI_IN_AIL); } #endif return XFS_ITEM_SUCCESS; @@ -643,26 +613,18 @@ xfs_inode_item_trylock( */ STATIC void xfs_inode_item_unlock( - xfs_inode_log_item_t *iip) + struct xfs_log_item *lip) { - uint hold; - uint iolocked; - uint lock_flags; - xfs_inode_t *ip; + struct xfs_inode_log_item *iip = INODE_ITEM(lip); + struct xfs_inode *ip = iip->ili_inode; + unsigned short lock_flags; - ASSERT(iip != NULL); ASSERT(iip->ili_inode->i_itemp != NULL); ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); - ASSERT((!(iip->ili_inode->i_itemp->ili_flags & - XFS_ILI_IOLOCKED_EXCL)) || - xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL)); - ASSERT((!(iip->ili_inode->i_itemp->ili_flags & - XFS_ILI_IOLOCKED_SHARED)) || - xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED)); + /* * Clear the transaction pointer in the inode. */ - ip = iip->ili_inode; ip->i_transp = NULL; /* @@ -686,34 +648,11 @@ xfs_inode_item_unlock( iip->ili_aextents_buf = NULL; } - /* - * Figure out if we should unlock the inode or not. - */ - hold = iip->ili_flags & XFS_ILI_HOLD; - - /* - * Before clearing out the flags, remember whether we - * are holding the inode's IO lock. - */ - iolocked = iip->ili_flags & XFS_ILI_IOLOCKED_ANY; - - /* - * Clear out the fields of the inode log item particular - * to the current transaction. - */ - iip->ili_flags = 0; - - /* - * Unlock the inode if XFS_ILI_HOLD was not set. - */ - if (!hold) { - lock_flags = XFS_ILOCK_EXCL; - if (iolocked & XFS_ILI_IOLOCKED_EXCL) { - lock_flags |= XFS_IOLOCK_EXCL; - } else if (iolocked & XFS_ILI_IOLOCKED_SHARED) { - lock_flags |= XFS_IOLOCK_SHARED; - } - xfs_iput(iip->ili_inode, lock_flags); + lock_flags = iip->ili_lock_flags; + iip->ili_lock_flags = 0; + if (lock_flags) { + xfs_iunlock(iip->ili_inode, lock_flags); + IRELE(iip->ili_inode); } } @@ -725,13 +664,12 @@ xfs_inode_item_unlock( * is the only one that matters. Therefore, simply return the * given lsn. */ -/*ARGSUSED*/ STATIC xfs_lsn_t xfs_inode_item_committed( - xfs_inode_log_item_t *iip, + struct xfs_log_item *lip, xfs_lsn_t lsn) { - return (lsn); + return lsn; } /* @@ -743,13 +681,12 @@ xfs_inode_item_committed( */ STATIC void xfs_inode_item_pushbuf( - xfs_inode_log_item_t *iip) + struct xfs_log_item *lip) { - xfs_inode_t *ip; - xfs_mount_t *mp; - xfs_buf_t *bp; + struct xfs_inode_log_item *iip = INODE_ITEM(lip); + struct xfs_inode *ip = iip->ili_inode; + struct xfs_buf *bp; - ip = iip->ili_inode; ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); /* @@ -757,14 +694,13 @@ xfs_inode_item_pushbuf( * inode was taken off the AIL. So, just get out. */ if (completion_done(&ip->i_flush) || - ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { + !(lip->li_flags & XFS_LI_IN_AIL)) { xfs_iunlock(ip, XFS_ILOCK_SHARED); return; } - mp = ip->i_mount; - bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, - iip->ili_format.ilf_len, XBF_TRYLOCK); + bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno, + iip->ili_format.ilf_len, XBF_TRYLOCK); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!bp) @@ -772,10 +708,8 @@ xfs_inode_item_pushbuf( if (XFS_BUF_ISDELAYWRITE(bp)) xfs_buf_delwri_promote(bp); xfs_buf_relse(bp); - return; } - /* * This is called to asynchronously write the inode associated with this * inode log item out to disk. The inode will already have been locked by @@ -783,14 +717,14 @@ xfs_inode_item_pushbuf( */ STATIC void xfs_inode_item_push( - xfs_inode_log_item_t *iip) + struct xfs_log_item *lip) { - xfs_inode_t *ip; - - ip = iip->ili_inode; + struct xfs_inode_log_item *iip = INODE_ITEM(lip); + struct xfs_inode *ip = iip->ili_inode; ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); ASSERT(!completion_done(&ip->i_flush)); + /* * Since we were able to lock the inode's flush lock and * we found it on the AIL, the inode must be dirty. This @@ -813,43 +747,34 @@ xfs_inode_item_push( */ (void) xfs_iflush(ip, 0); xfs_iunlock(ip, XFS_ILOCK_SHARED); - - return; } /* * XXX rcc - this one really has to do something. Probably needs * to stamp in a new field in the incore inode. */ -/* ARGSUSED */ STATIC void xfs_inode_item_committing( - xfs_inode_log_item_t *iip, + struct xfs_log_item *lip, xfs_lsn_t lsn) { - iip->ili_last_lsn = lsn; - return; + INODE_ITEM(lip)->ili_last_lsn = lsn; } /* * This is the ops vector shared by all buf log items. */ static struct xfs_item_ops xfs_inode_item_ops = { - .iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size, - .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) - xfs_inode_item_format, - .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin, - .iop_unpin = (void(*)(xfs_log_item_t*))xfs_inode_item_unpin, - .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) - xfs_inode_item_unpin_remove, - .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock, - .iop_unlock = (void(*)(xfs_log_item_t*))xfs_inode_item_unlock, - .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_inode_item_committed, - .iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push, - .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf, - .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_inode_item_committing + .iop_size = xfs_inode_item_size, + .iop_format = xfs_inode_item_format, + .iop_pin = xfs_inode_item_pin, + .iop_unpin = xfs_inode_item_unpin, + .iop_trylock = xfs_inode_item_trylock, + .iop_unlock = xfs_inode_item_unlock, + .iop_committed = xfs_inode_item_committed, + .iop_push = xfs_inode_item_push, + .iop_pushbuf = xfs_inode_item_pushbuf, + .iop_committing = xfs_inode_item_committing }; @@ -858,10 +783,10 @@ static struct xfs_item_ops xfs_inode_item_ops = { */ void xfs_inode_item_init( - xfs_inode_t *ip, - xfs_mount_t *mp) + struct xfs_inode *ip, + struct xfs_mount *mp) { - xfs_inode_log_item_t *iip; + struct xfs_inode_log_item *iip; ASSERT(ip->i_itemp == NULL); iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); @@ -899,14 +824,14 @@ xfs_inode_item_destroy( * from the AIL if it has not been re-logged, and unlocking the inode's * flush lock. */ -/*ARGSUSED*/ void xfs_iflush_done( - xfs_buf_t *bp, - xfs_inode_log_item_t *iip) + struct xfs_buf *bp, + struct xfs_log_item *lip) { + struct xfs_inode_log_item *iip = INODE_ITEM(lip); xfs_inode_t *ip = iip->ili_inode; - struct xfs_ail *ailp = iip->ili_item.li_ailp; + struct xfs_ail *ailp = lip->li_ailp; /* * We only want to pull the item from the AIL if it is @@ -917,12 +842,11 @@ xfs_iflush_done( * the lock since it's cheaper, and then we recheck while * holding the lock before removing the inode from the AIL. */ - if (iip->ili_logged && - (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { + if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) { spin_lock(&ailp->xa_lock); - if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { + if (lip->li_lsn == iip->ili_flush_lsn) { /* xfs_trans_ail_delete() drops the AIL lock. */ - xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip); + xfs_trans_ail_delete(ailp, lip); } else { spin_unlock(&ailp->xa_lock); } @@ -940,8 +864,6 @@ xfs_iflush_done( * Release the inode's flush lock since we're done with it. */ xfs_ifunlock(ip); - - return; } /* @@ -957,10 +879,8 @@ xfs_iflush_abort( xfs_inode_t *ip) { xfs_inode_log_item_t *iip = ip->i_itemp; - xfs_mount_t *mp; iip = ip->i_itemp; - mp = ip->i_mount; if (iip) { struct xfs_ail *ailp = iip->ili_item.li_ailp; if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { @@ -991,10 +911,10 @@ xfs_iflush_abort( void xfs_istale_done( - xfs_buf_t *bp, - xfs_inode_log_item_t *iip) + struct xfs_buf *bp, + struct xfs_log_item *lip) { - xfs_iflush_abort(iip->ili_inode); + xfs_iflush_abort(INODE_ITEM(lip)->ili_inode); } /* @@ -1007,9 +927,8 @@ xfs_inode_item_format_convert( xfs_inode_log_format_t *in_f) { if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) { - xfs_inode_log_format_32_t *in_f32; + xfs_inode_log_format_32_t *in_f32 = buf->i_addr; - in_f32 = (xfs_inode_log_format_32_t *)buf->i_addr; in_f->ilf_type = in_f32->ilf_type; in_f->ilf_size = in_f32->ilf_size; in_f->ilf_fields = in_f32->ilf_fields; @@ -1025,9 +944,8 @@ xfs_inode_item_format_convert( in_f->ilf_boffset = in_f32->ilf_boffset; return 0; } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){ - xfs_inode_log_format_64_t *in_f64; + xfs_inode_log_format_64_t *in_f64 = buf->i_addr; - in_f64 = (xfs_inode_log_format_64_t *)buf->i_addr; in_f->ilf_type = in_f64->ilf_type; in_f->ilf_size = in_f64->ilf_size; in_f->ilf_fields = in_f64->ilf_fields; diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 9a467958ecdd..d3dee61e6d91 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -103,12 +103,6 @@ typedef struct xfs_inode_log_format_64 { XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ XFS_ILOG_ABROOT) -#define XFS_ILI_HOLD 0x1 -#define XFS_ILI_IOLOCKED_EXCL 0x2 -#define XFS_ILI_IOLOCKED_SHARED 0x4 - -#define XFS_ILI_IOLOCKED_ANY (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED) - static inline int xfs_ilog_fbroot(int w) { return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT); @@ -137,7 +131,7 @@ typedef struct xfs_inode_log_item { struct xfs_inode *ili_inode; /* inode ptr */ xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ - unsigned short ili_flags; /* misc flags */ + unsigned short ili_lock_flags; /* lock flags */ unsigned short ili_logged; /* flushed logged data */ unsigned int ili_last_fields; /* fields when flushed */ struct xfs_bmbt_rec *ili_extents_buf; /* array of logged @@ -161,8 +155,8 @@ static inline int xfs_inode_clean(xfs_inode_t *ip) extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); extern void xfs_inode_item_destroy(struct xfs_inode *); -extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *); -extern void xfs_istale_done(struct xfs_buf *, xfs_inode_log_item_t *); +extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *); +extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *); extern void xfs_iflush_abort(struct xfs_inode *); extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, xfs_inode_log_format_t *); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ef14943829da..20576146369f 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -23,19 +23,14 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" #include "xfs_btree.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" @@ -123,7 +118,7 @@ xfs_iomap( error = xfs_bmapi(NULL, ip, offset_fsb, (xfs_filblks_t)(end_fsb - offset_fsb), bmapi_flags, NULL, 0, imap, - nimaps, NULL, NULL); + nimaps, NULL); if (error) goto out; @@ -138,7 +133,7 @@ xfs_iomap( break; } - if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) { + if (flags & BMAPI_DIRECT) { error = xfs_iomap_write_direct(ip, offset, count, flags, imap, nimaps); } else { @@ -247,7 +242,7 @@ xfs_iomap_write_direct( xfs_off_t offset, size_t count, int flags, - xfs_bmbt_irec_t *ret_imap, + xfs_bmbt_irec_t *imap, int *nmaps) { xfs_mount_t *mp = ip->i_mount; @@ -261,7 +256,6 @@ xfs_iomap_write_direct( int quota_flag; int rt; xfs_trans_t *tp; - xfs_bmbt_irec_t imap; xfs_bmap_free_t free_list; uint qblocks, resblks, resrtextents; int committed; @@ -285,10 +279,10 @@ xfs_iomap_write_direct( if (error) goto error_out; } else { - if (*nmaps && (ret_imap->br_startblock == HOLESTARTBLOCK)) + if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK)) last_fsb = MIN(last_fsb, (xfs_fileoff_t) - ret_imap->br_blockcount + - ret_imap->br_startoff); + imap->br_blockcount + + imap->br_startoff); } count_fsb = last_fsb - offset_fsb; ASSERT(count_fsb > 0); @@ -334,20 +328,22 @@ xfs_iomap_write_direct( if (error) goto error1; - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); bmapi_flag = XFS_BMAPI_WRITE; if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) bmapi_flag |= XFS_BMAPI_PREALLOC; /* - * Issue the xfs_bmapi() call to allocate the blocks + * Issue the xfs_bmapi() call to allocate the blocks. + * + * From this point onwards we overwrite the imap pointer that the + * caller gave to us. */ xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag, - &firstfsb, 0, &imap, &nimaps, &free_list, NULL); + &firstfsb, 0, imap, &nimaps, &free_list); if (error) goto error0; @@ -369,12 +365,11 @@ xfs_iomap_write_direct( goto error_out; } - if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) { - error = xfs_cmn_err_fsblock_zero(ip, &imap); + if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) { + error = xfs_cmn_err_fsblock_zero(ip, imap); goto error_out; } - *ret_imap = imap; *nmaps = 1; return 0; @@ -425,7 +420,7 @@ xfs_iomap_eof_want_preallocate( imaps = nimaps; firstblock = NULLFSBLOCK; error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0, - &firstblock, 0, imap, &imaps, NULL, NULL); + &firstblock, 0, imap, &imaps, NULL); if (error) return error; for (n = 0; n < imaps; n++) { @@ -500,7 +495,7 @@ retry: (xfs_filblks_t)(last_fsb - offset_fsb), XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, - &nimaps, NULL, NULL); + &nimaps, NULL); if (error && (error != ENOSPC)) return XFS_ERROR(error); @@ -548,7 +543,7 @@ xfs_iomap_write_allocate( xfs_inode_t *ip, xfs_off_t offset, size_t count, - xfs_bmbt_irec_t *map, + xfs_bmbt_irec_t *imap, int *retmap) { xfs_mount_t *mp = ip->i_mount; @@ -557,7 +552,6 @@ xfs_iomap_write_allocate( xfs_fsblock_t first_block; xfs_bmap_free_t free_list; xfs_filblks_t count_fsb; - xfs_bmbt_irec_t imap; xfs_trans_t *tp; int nimaps, committed; int error = 0; @@ -573,8 +567,8 @@ xfs_iomap_write_allocate( return XFS_ERROR(error); offset_fsb = XFS_B_TO_FSBT(mp, offset); - count_fsb = map->br_blockcount; - map_start_fsb = map->br_startoff; + count_fsb = imap->br_blockcount; + map_start_fsb = imap->br_startoff; XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); @@ -602,8 +596,7 @@ xfs_iomap_write_allocate( return XFS_ERROR(error); } xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); xfs_bmap_init(&free_list, &first_block); @@ -654,10 +647,15 @@ xfs_iomap_write_allocate( } } - /* Go get the actual blocks */ + /* + * Go get the actual blocks. + * + * From this point onwards we overwrite the imap + * pointer that the caller gave to us. + */ error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb, XFS_BMAPI_WRITE, &first_block, 1, - &imap, &nimaps, &free_list, NULL); + imap, &nimaps, &free_list); if (error) goto trans_cancel; @@ -676,13 +674,12 @@ xfs_iomap_write_allocate( * See if we were able to allocate an extent that * covers at least part of the callers request */ - if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) - return xfs_cmn_err_fsblock_zero(ip, &imap); + if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) + return xfs_cmn_err_fsblock_zero(ip, imap); - if ((offset_fsb >= imap.br_startoff) && - (offset_fsb < (imap.br_startoff + - imap.br_blockcount))) { - *map = imap; + if ((offset_fsb >= imap->br_startoff) && + (offset_fsb < (imap->br_startoff + + imap->br_blockcount))) { *retmap = 1; XFS_STATS_INC(xs_xstrat_quick); return 0; @@ -692,8 +689,8 @@ xfs_iomap_write_allocate( * So far we have not mapped the requested part of the * file, just surrounding data, try again. */ - count_fsb -= imap.br_blockcount; - map_start_fsb = imap.br_startoff + imap.br_blockcount; + count_fsb -= imap->br_blockcount; + map_start_fsb = imap->br_startoff + imap->br_blockcount; } trans_cancel: @@ -766,8 +763,7 @@ xfs_iomap_write_unwritten( } xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); /* * Modify the unwritten extent state of the buffer. @@ -776,7 +772,7 @@ xfs_iomap_write_unwritten( nimaps = 1; error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, - 1, &imap, &nimaps, &free_list, NULL); + 1, &imap, &nimaps, &free_list); if (error) goto error_on_bmapi_transaction; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 81ac4afd45b3..7748a430f50d 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -18,17 +18,16 @@ #ifndef __XFS_IOMAP_H__ #define __XFS_IOMAP_H__ -typedef enum { - /* base extent manipulation calls */ - BMAPI_READ = (1 << 0), /* read extents */ - BMAPI_WRITE = (1 << 1), /* create extents */ - BMAPI_ALLOCATE = (1 << 2), /* delayed allocate to real extents */ - /* modifiers */ - BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */ - BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */ - BMAPI_MMAP = (1 << 6), /* allocate for mmap write */ - BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */ -} bmapi_flags_t; +/* base extent manipulation calls */ +#define BMAPI_READ (1 << 0) /* read extents */ +#define BMAPI_WRITE (1 << 1) /* create extents */ +#define BMAPI_ALLOCATE (1 << 2) /* delayed allocate to real extents */ + +/* modifiers */ +#define BMAPI_IGNSTATE (1 << 4) /* ignore unwritten state on read */ +#define BMAPI_DIRECT (1 << 5) /* direct instead of buffered write */ +#define BMAPI_MMA (1 << 6) /* allocate for mmap write */ +#define BMAPI_TRYLOCK (1 << 7) /* non-blocking request */ #define BMAPI_FLAGS \ { BMAPI_READ, "READ" }, \ @@ -36,7 +35,6 @@ typedef enum { { BMAPI_ALLOCATE, "ALLOCATE" }, \ { BMAPI_IGNSTATE, "IGNSTATE" }, \ { BMAPI_DIRECT, "DIRECT" }, \ - { BMAPI_MMAP, "MMAP" }, \ { BMAPI_TRYLOCK, "TRYLOCK" } struct xfs_inode; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 2b86f8610512..dc1882adaf54 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -24,20 +24,17 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_ialloc.h" #include "xfs_itable.h" #include "xfs_error.h" #include "xfs_btree.h" +#include "xfs_trace.h" STATIC int xfs_internal_inum( @@ -95,7 +92,8 @@ xfs_bulkstat_one_int( * further change. */ buf->bs_nlink = dic->di_nlink; - buf->bs_projid = dic->di_projid; + buf->bs_projid_lo = dic->di_projid_lo; + buf->bs_projid_hi = dic->di_projid_hi; buf->bs_ino = ino; buf->bs_mode = dic->di_mode; buf->bs_uid = dic->di_uid; @@ -143,7 +141,8 @@ xfs_bulkstat_one_int( buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; break; } - xfs_iput(ip, XFS_ILOCK_SHARED); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + IRELE(ip); error = formatter(buffer, ubsize, ubused, buf); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 5215abc8023a..cee4ab9f8a9e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -24,8 +24,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_error.h" #include "xfs_log_priv.h" @@ -35,8 +33,6 @@ #include "xfs_ialloc_btree.h" #include "xfs_log_recover.h" #include "xfs_trans_priv.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_rw.h" @@ -337,7 +333,6 @@ xfs_log_reserve( int retval = 0; ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); - ASSERT((flags & XFS_LOG_NOSLEEP) == 0); if (XLOG_FORCED_SHUTDOWN(log)) return XFS_ERROR(EIO); @@ -552,7 +547,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) .magic = XLOG_UNMOUNT_TYPE, }; struct xfs_log_iovec reg = { - .i_addr = (void *)&magic, + .i_addr = &magic, .i_len = sizeof(magic), .i_type = XLOG_REG_TYPE_UNMOUNT, }; @@ -922,19 +917,6 @@ xlog_iodone(xfs_buf_t *bp) l = iclog->ic_log; /* - * If the _XFS_BARRIER_FAILED flag was set by a lower - * layer, it means the underlying device no longer supports - * barrier I/O. Warn loudly and turn off barriers. - */ - if (bp->b_flags & _XFS_BARRIER_FAILED) { - bp->b_flags &= ~_XFS_BARRIER_FAILED; - l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; - xfs_fs_cmn_err(CE_WARN, l->l_mp, - "xlog_iodone: Barriers are no longer supported" - " by device. Disabling barriers\n"); - } - - /* * Race to shutdown the filesystem if we see an error. */ if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, @@ -1047,7 +1029,6 @@ xlog_alloc_log(xfs_mount_t *mp, xlog_in_core_t *iclog, *prev_iclog=NULL; xfs_buf_t *bp; int i; - int iclogsize; int error = ENOMEM; uint log2_size = 0; @@ -1127,7 +1108,6 @@ xlog_alloc_log(xfs_mount_t *mp, * with different amounts of memory. See the definition of * xlog_in_core_t in xfs_log_priv.h for details. */ - iclogsize = log->l_iclog_size; ASSERT(log->l_iclog_size >= 4096); for (i=0; i < log->l_iclog_bufs; i++) { *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); @@ -1138,7 +1118,8 @@ xlog_alloc_log(xfs_mount_t *mp, iclog->ic_prev = prev_iclog; prev_iclog = iclog; - bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); + bp = xfs_buf_get_uncached(mp->m_logdev_targp, + log->l_iclog_size, 0); if (!bp) goto out_free_iclog; if (!XFS_BUF_CPSEMA(bp)) @@ -1316,7 +1297,7 @@ xlog_bdstrat( if (iclog->ic_state & XLOG_STATE_IOERROR) { XFS_BUF_ERROR(bp, EIO); XFS_BUF_STALE(bp); - xfs_biodone(bp); + xfs_buf_ioend(bp, 0); /* * It would seem logical to return EIO here, but we rely on * the log state machine to propagate I/O errors instead of @@ -1428,11 +1409,8 @@ xlog_sync(xlog_t *log, XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); bp->b_flags |= XBF_LOG_BUFFER; - /* - * Do an ordered write for the log block. - * Its unnecessary to flush the first split block in the log wrap case. - */ - if (!split && (log->l_mp->m_flags & XFS_MOUNT_BARRIER)) + + if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) XFS_BUF_ORDERED(bp); ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); @@ -3025,7 +3003,8 @@ _xfs_log_force( XFS_STATS_INC(xs_log_force); - xlog_cil_push(log, 1); + if (log->l_cilp) + xlog_cil_force(log); spin_lock(&log->l_icloglock); @@ -3177,7 +3156,7 @@ _xfs_log_force_lsn( XFS_STATS_INC(xs_log_force); if (log->l_cilp) { - lsn = xlog_cil_push_lsn(log, lsn); + lsn = xlog_cil_force_lsn(log, lsn); if (lsn == NULLCOMMITLSN) return 0; } @@ -3734,7 +3713,7 @@ xfs_log_force_umount( * call below. */ if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) - xlog_cil_push(log, 1); + xlog_cil_force(log); /* * We must hold both the GRANT lock and the LOG lock, diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 04c78e642cc8..916eb7db14d9 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -55,14 +55,10 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) /* * Flags to xfs_log_reserve() * - * XFS_LOG_SLEEP: If space is not available, sleep (default) - * XFS_LOG_NOSLEEP: If space is not available, return error * XFS_LOG_PERM_RESERV: Permanent reservation. When writes are * performed against this type of reservation, the reservation * is not decreased. Long running transactions should use this. */ -#define XFS_LOG_SLEEP 0x0 -#define XFS_LOG_NOSLEEP 0x1 #define XFS_LOG_PERM_RESERV 0x2 /* @@ -104,7 +100,7 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) #define XLOG_REG_TYPE_MAX 19 typedef struct xfs_log_iovec { - xfs_caddr_t i_addr; /* beginning address of region */ + void *i_addr; /* beginning address of region */ int i_len; /* length in bytes of region */ uint i_type; /* type of region */ } xfs_log_iovec_t; @@ -201,9 +197,4 @@ int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); #endif - - -extern int xlog_debug; /* set to 1 to enable real log */ - - #endif /* __XFS_LOG_H__ */ diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index bb17cc044bf3..23d6ceb5e97b 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -26,8 +26,6 @@ #include "xfs_log_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_error.h" #include "xfs_alloc.h" @@ -70,6 +68,7 @@ xlog_cil_init( ctx->sequence = 1; ctx->cil = cil; cil->xc_ctx = ctx; + cil->xc_current_sequence = ctx->sequence; cil->xc_log = log; log->l_cilp = cil; @@ -147,102 +146,6 @@ xlog_cil_init_post_recovery( } /* - * Insert the log item into the CIL and calculate the difference in space - * consumed by the item. Add the space to the checkpoint ticket and calculate - * if the change requires additional log metadata. If it does, take that space - * as well. Remove the amount of space we addded to the checkpoint ticket from - * the current transaction ticket so that the accounting works out correctly. - * - * If this is the first time the item is being placed into the CIL in this - * context, pin it so it can't be written to disk until the CIL is flushed to - * the iclog and the iclog written to disk. - */ -static void -xlog_cil_insert( - struct log *log, - struct xlog_ticket *ticket, - struct xfs_log_item *item, - struct xfs_log_vec *lv) -{ - struct xfs_cil *cil = log->l_cilp; - struct xfs_log_vec *old = lv->lv_item->li_lv; - struct xfs_cil_ctx *ctx = cil->xc_ctx; - int len; - int diff_iovecs; - int iclog_space; - - if (old) { - /* existing lv on log item, space used is a delta */ - ASSERT(!list_empty(&item->li_cil)); - ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); - - len = lv->lv_buf_len - old->lv_buf_len; - diff_iovecs = lv->lv_niovecs - old->lv_niovecs; - kmem_free(old->lv_buf); - kmem_free(old); - } else { - /* new lv, must pin the log item */ - ASSERT(!lv->lv_item->li_lv); - ASSERT(list_empty(&item->li_cil)); - - len = lv->lv_buf_len; - diff_iovecs = lv->lv_niovecs; - IOP_PIN(lv->lv_item); - - } - len += diff_iovecs * sizeof(xlog_op_header_t); - - /* attach new log vector to log item */ - lv->lv_item->li_lv = lv; - - spin_lock(&cil->xc_cil_lock); - list_move_tail(&item->li_cil, &cil->xc_cil); - ctx->nvecs += diff_iovecs; - - /* - * If this is the first time the item is being committed to the CIL, - * store the sequence number on the log item so we can tell - * in future commits whether this is the first checkpoint the item is - * being committed into. - */ - if (!item->li_seq) - item->li_seq = ctx->sequence; - - /* - * Now transfer enough transaction reservation to the context ticket - * for the checkpoint. The context ticket is special - the unit - * reservation has to grow as well as the current reservation as we - * steal from tickets so we can correctly determine the space used - * during the transaction commit. - */ - if (ctx->ticket->t_curr_res == 0) { - /* first commit in checkpoint, steal the header reservation */ - ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len); - ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; - ticket->t_curr_res -= ctx->ticket->t_unit_res; - } - - /* do we need space for more log record headers? */ - iclog_space = log->l_iclog_size - log->l_iclog_hsize; - if (len > 0 && (ctx->space_used / iclog_space != - (ctx->space_used + len) / iclog_space)) { - int hdrs; - - hdrs = (len + iclog_space - 1) / iclog_space; - /* need to take into account split region headers, too */ - hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); - ctx->ticket->t_unit_res += hdrs; - ctx->ticket->t_curr_res += hdrs; - ticket->t_curr_res -= hdrs; - ASSERT(ticket->t_curr_res >= len); - } - ticket->t_curr_res -= len; - ctx->space_used += len; - - spin_unlock(&cil->xc_cil_lock); -} - -/* * Format log item into a flat buffers * * For delayed logging, we need to hold a formatted buffer containing all the @@ -271,15 +174,10 @@ xlog_cil_insert( static void xlog_cil_format_items( struct log *log, - struct xfs_log_vec *log_vector, - struct xlog_ticket *ticket, - xfs_lsn_t *start_lsn) + struct xfs_log_vec *log_vector) { struct xfs_log_vec *lv; - if (start_lsn) - *start_lsn = log->l_cilp->xc_ctx->sequence; - ASSERT(log_vector); for (lv = log_vector; lv; lv = lv->lv_next) { void *ptr; @@ -292,7 +190,7 @@ xlog_cil_format_items( len += lv->lv_iovecp[index].i_len; lv->lv_buf_len = len; - lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); + lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); ptr = lv->lv_buf; for (index = 0; index < lv->lv_niovecs; index++) { @@ -303,97 +201,153 @@ xlog_cil_format_items( ptr += vec->i_len; } ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); - - xlog_cil_insert(log, ticket, lv->lv_item, lv); } } -static void -xlog_cil_free_logvec( - struct xfs_log_vec *log_vector) +/* + * Prepare the log item for insertion into the CIL. Calculate the difference in + * log space and vectors it will consume, and if it is a new item pin it as + * well. + */ +STATIC void +xfs_cil_prepare_item( + struct log *log, + struct xfs_log_vec *lv, + int *len, + int *diff_iovecs) { - struct xfs_log_vec *lv; + struct xfs_log_vec *old = lv->lv_item->li_lv; + + if (old) { + /* existing lv on log item, space used is a delta */ + ASSERT(!list_empty(&lv->lv_item->li_cil)); + ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); + + *len += lv->lv_buf_len - old->lv_buf_len; + *diff_iovecs += lv->lv_niovecs - old->lv_niovecs; + kmem_free(old->lv_buf); + kmem_free(old); + } else { + /* new lv, must pin the log item */ + ASSERT(!lv->lv_item->li_lv); + ASSERT(list_empty(&lv->lv_item->li_cil)); + + *len += lv->lv_buf_len; + *diff_iovecs += lv->lv_niovecs; + IOP_PIN(lv->lv_item); - for (lv = log_vector; lv; ) { - struct xfs_log_vec *next = lv->lv_next; - kmem_free(lv->lv_buf); - kmem_free(lv); - lv = next; } + + /* attach new log vector to log item */ + lv->lv_item->li_lv = lv; + + /* + * If this is the first time the item is being committed to the + * CIL, store the sequence number on the log item so we can + * tell in future commits whether this is the first checkpoint + * the item is being committed into. + */ + if (!lv->lv_item->li_seq) + lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence; } /* - * Commit a transaction with the given vector to the Committed Item List. - * - * To do this, we need to format the item, pin it in memory if required and - * account for the space used by the transaction. Once we have done that we - * need to release the unused reservation for the transaction, attach the - * transaction to the checkpoint context so we carry the busy extents through - * to checkpoint completion, and then unlock all the items in the transaction. - * - * For more specific information about the order of operations in - * xfs_log_commit_cil() please refer to the comments in - * xfs_trans_commit_iclog(). - * - * Called with the context lock already held in read mode to lock out - * background commit, returns without it held once background commits are - * allowed again. + * Insert the log items into the CIL and calculate the difference in space + * consumed by the item. Add the space to the checkpoint ticket and calculate + * if the change requires additional log metadata. If it does, take that space + * as well. Remove the amount of space we addded to the checkpoint ticket from + * the current transaction ticket so that the accounting works out correctly. */ -int -xfs_log_commit_cil( - struct xfs_mount *mp, - struct xfs_trans *tp, +static void +xlog_cil_insert_items( + struct log *log, struct xfs_log_vec *log_vector, - xfs_lsn_t *commit_lsn, - int flags) + struct xlog_ticket *ticket) { - struct log *log = mp->m_log; - int log_flags = 0; - int push = 0; - - if (flags & XFS_TRANS_RELEASE_LOG_RES) - log_flags = XFS_LOG_REL_PERM_RESERV; + struct xfs_cil *cil = log->l_cilp; + struct xfs_cil_ctx *ctx = cil->xc_ctx; + struct xfs_log_vec *lv; + int len = 0; + int diff_iovecs = 0; + int iclog_space; - if (XLOG_FORCED_SHUTDOWN(log)) { - xlog_cil_free_logvec(log_vector); - return XFS_ERROR(EIO); - } + ASSERT(log_vector); - /* lock out background commit */ - down_read(&log->l_cilp->xc_ctx_lock); - xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn); + /* + * Do all the accounting aggregation and switching of log vectors + * around in a separate loop to the insertion of items into the CIL. + * Then we can do a separate loop to update the CIL within a single + * lock/unlock pair. This reduces the number of round trips on the CIL + * lock from O(nr_logvectors) to O(1) and greatly reduces the overall + * hold time for the transaction commit. + * + * If this is the first time the item is being placed into the CIL in + * this context, pin it so it can't be written to disk until the CIL is + * flushed to the iclog and the iclog written to disk. + * + * We can do this safely because the context can't checkpoint until we + * are done so it doesn't matter exactly how we update the CIL. + */ + for (lv = log_vector; lv; lv = lv->lv_next) + xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); - /* check we didn't blow the reservation */ - if (tp->t_ticket->t_curr_res < 0) - xlog_print_tic_res(log->l_mp, tp->t_ticket); + /* account for space used by new iovec headers */ + len += diff_iovecs * sizeof(xlog_op_header_t); - /* attach the transaction to the CIL if it has any busy extents */ - if (!list_empty(&tp->t_busy)) { - spin_lock(&log->l_cilp->xc_cil_lock); - list_splice_init(&tp->t_busy, - &log->l_cilp->xc_ctx->busy_extents); - spin_unlock(&log->l_cilp->xc_cil_lock); - } + spin_lock(&cil->xc_cil_lock); - tp->t_commit_lsn = *commit_lsn; - xfs_log_done(mp, tp->t_ticket, NULL, log_flags); - xfs_trans_unreserve_and_mod_sb(tp); + /* move the items to the tail of the CIL */ + for (lv = log_vector; lv; lv = lv->lv_next) + list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil); - /* check for background commit before unlock */ - if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) - push = 1; - up_read(&log->l_cilp->xc_ctx_lock); + ctx->nvecs += diff_iovecs; /* - * We need to push CIL every so often so we don't cache more than we - * can fit in the log. The limit really is that a checkpoint can't be - * more than half the log (the current checkpoint is not allowed to - * overwrite the previous checkpoint), but commit latency and memory - * usage limit this to a smaller size in most cases. + * Now transfer enough transaction reservation to the context ticket + * for the checkpoint. The context ticket is special - the unit + * reservation has to grow as well as the current reservation as we + * steal from tickets so we can correctly determine the space used + * during the transaction commit. */ - if (push) - xlog_cil_push(log, 0); - return 0; + if (ctx->ticket->t_curr_res == 0) { + /* first commit in checkpoint, steal the header reservation */ + ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len); + ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; + ticket->t_curr_res -= ctx->ticket->t_unit_res; + } + + /* do we need space for more log record headers? */ + iclog_space = log->l_iclog_size - log->l_iclog_hsize; + if (len > 0 && (ctx->space_used / iclog_space != + (ctx->space_used + len) / iclog_space)) { + int hdrs; + + hdrs = (len + iclog_space - 1) / iclog_space; + /* need to take into account split region headers, too */ + hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); + ctx->ticket->t_unit_res += hdrs; + ctx->ticket->t_curr_res += hdrs; + ticket->t_curr_res -= hdrs; + ASSERT(ticket->t_curr_res >= len); + } + ticket->t_curr_res -= len; + ctx->space_used += len; + + spin_unlock(&cil->xc_cil_lock); +} + +static void +xlog_cil_free_logvec( + struct xfs_log_vec *log_vector) +{ + struct xfs_log_vec *lv; + + for (lv = log_vector; lv; ) { + struct xfs_log_vec *next = lv->lv_next; + kmem_free(lv->lv_buf); + kmem_free(lv); + lv = next; + } } /* @@ -429,13 +383,23 @@ xlog_cil_committed( } /* - * Push the Committed Item List to the log. If the push_now flag is not set, - * then it is a background flush and so we can chose to ignore it. + * Push the Committed Item List to the log. If @push_seq flag is zero, then it + * is a background flush and so we can chose to ignore it. Otherwise, if the + * current sequence is the same as @push_seq we need to do a flush. If + * @push_seq is less than the current sequence, then it has already been + * flushed and we don't need to do anything - the caller will wait for it to + * complete if necessary. + * + * @push_seq is a value rather than a flag because that allows us to do an + * unlocked check of the sequence number for a match. Hence we can allows log + * forces to run racily and not issue pushes for the same sequence twice. If we + * get a race between multiple pushes for the same sequence they will block on + * the first one and then abort, hence avoiding needless pushes. */ -int +STATIC int xlog_cil_push( struct log *log, - int push_now) + xfs_lsn_t push_seq) { struct xfs_cil *cil = log->l_cilp; struct xfs_log_vec *lv; @@ -455,12 +419,20 @@ xlog_cil_push( if (!cil) return 0; + ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence); + new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); new_ctx->ticket = xlog_cil_ticket_alloc(log); - /* lock out transaction commit, but don't block on background push */ + /* + * Lock out transaction commit, but don't block for background pushes + * unless we are well over the CIL space limit. See the definition of + * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic + * used here. + */ if (!down_write_trylock(&cil->xc_ctx_lock)) { - if (!push_now) + if (!push_seq && + cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log)) goto out_free_ticket; down_write(&cil->xc_ctx_lock); } @@ -471,7 +443,11 @@ xlog_cil_push( goto out_skip; /* check for spurious background flush */ - if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) + if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) + goto out_skip; + + /* check for a previously pushed seqeunce */ + if (push_seq && push_seq < cil->xc_ctx->sequence) goto out_skip; /* @@ -517,6 +493,13 @@ xlog_cil_push( cil->xc_ctx = new_ctx; /* + * mirror the new sequence into the cil structure so that we can do + * unlocked checks against the current sequence in log forces without + * risking deferencing a freed context pointer. + */ + cil->xc_current_sequence = new_ctx->sequence; + + /* * The switch is now done, so we can drop the context lock and move out * of a shared context. We can't just go straight to the commit record, * though - we need to synchronise with previous and future commits so @@ -554,7 +537,7 @@ xlog_cil_push( thdr.th_type = XFS_TRANS_CHECKPOINT; thdr.th_tid = tic->t_tid; thdr.th_num_items = num_iovecs; - lhdr.i_addr = (xfs_caddr_t)&thdr; + lhdr.i_addr = &thdr; lhdr.i_len = sizeof(xfs_trans_header_t); lhdr.i_type = XLOG_REG_TYPE_TRANSHDR; tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t); @@ -628,6 +611,105 @@ out_abort: } /* + * Commit a transaction with the given vector to the Committed Item List. + * + * To do this, we need to format the item, pin it in memory if required and + * account for the space used by the transaction. Once we have done that we + * need to release the unused reservation for the transaction, attach the + * transaction to the checkpoint context so we carry the busy extents through + * to checkpoint completion, and then unlock all the items in the transaction. + * + * For more specific information about the order of operations in + * xfs_log_commit_cil() please refer to the comments in + * xfs_trans_commit_iclog(). + * + * Called with the context lock already held in read mode to lock out + * background commit, returns without it held once background commits are + * allowed again. + */ +int +xfs_log_commit_cil( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_log_vec *log_vector, + xfs_lsn_t *commit_lsn, + int flags) +{ + struct log *log = mp->m_log; + int log_flags = 0; + int push = 0; + + if (flags & XFS_TRANS_RELEASE_LOG_RES) + log_flags = XFS_LOG_REL_PERM_RESERV; + + if (XLOG_FORCED_SHUTDOWN(log)) { + xlog_cil_free_logvec(log_vector); + return XFS_ERROR(EIO); + } + + /* + * do all the hard work of formatting items (including memory + * allocation) outside the CIL context lock. This prevents stalling CIL + * pushes when we are low on memory and a transaction commit spends a + * lot of time in memory reclaim. + */ + xlog_cil_format_items(log, log_vector); + + /* lock out background commit */ + down_read(&log->l_cilp->xc_ctx_lock); + if (commit_lsn) + *commit_lsn = log->l_cilp->xc_ctx->sequence; + + xlog_cil_insert_items(log, log_vector, tp->t_ticket); + + /* check we didn't blow the reservation */ + if (tp->t_ticket->t_curr_res < 0) + xlog_print_tic_res(log->l_mp, tp->t_ticket); + + /* attach the transaction to the CIL if it has any busy extents */ + if (!list_empty(&tp->t_busy)) { + spin_lock(&log->l_cilp->xc_cil_lock); + list_splice_init(&tp->t_busy, + &log->l_cilp->xc_ctx->busy_extents); + spin_unlock(&log->l_cilp->xc_cil_lock); + } + + tp->t_commit_lsn = *commit_lsn; + xfs_log_done(mp, tp->t_ticket, NULL, log_flags); + xfs_trans_unreserve_and_mod_sb(tp); + + /* + * Once all the items of the transaction have been copied to the CIL, + * the items can be unlocked and freed. + * + * This needs to be done before we drop the CIL context lock because we + * have to update state in the log items and unlock them before they go + * to disk. If we don't, then the CIL checkpoint can race with us and + * we can run checkpoint completion before we've updated and unlocked + * the log items. This affects (at least) processing of stale buffers, + * inodes and EFIs. + */ + xfs_trans_free_items(tp, *commit_lsn, 0); + + /* check for background commit before unlock */ + if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) + push = 1; + + up_read(&log->l_cilp->xc_ctx_lock); + + /* + * We need to push CIL every so often so we don't cache more than we + * can fit in the log. The limit really is that a checkpoint can't be + * more than half the log (the current checkpoint is not allowed to + * overwrite the previous checkpoint), but commit latency and memory + * usage limit this to a smaller size in most cases. + */ + if (push) + xlog_cil_push(log, 0); + return 0; +} + +/* * Conditionally push the CIL based on the sequence passed in. * * We only need to push if we haven't already pushed the sequence @@ -641,39 +723,34 @@ out_abort: * commit lsn is there. It'll be empty, so this is broken for now. */ xfs_lsn_t -xlog_cil_push_lsn( +xlog_cil_force_lsn( struct log *log, - xfs_lsn_t push_seq) + xfs_lsn_t sequence) { struct xfs_cil *cil = log->l_cilp; struct xfs_cil_ctx *ctx; xfs_lsn_t commit_lsn = NULLCOMMITLSN; -restart: - down_write(&cil->xc_ctx_lock); - ASSERT(push_seq <= cil->xc_ctx->sequence); - - /* check to see if we need to force out the current context */ - if (push_seq == cil->xc_ctx->sequence) { - up_write(&cil->xc_ctx_lock); - xlog_cil_push(log, 1); - goto restart; - } + ASSERT(sequence <= cil->xc_current_sequence); + + /* + * check to see if we need to force out the current context. + * xlog_cil_push() handles racing pushes for the same sequence, + * so no need to deal with it here. + */ + if (sequence == cil->xc_current_sequence) + xlog_cil_push(log, sequence); /* * See if we can find a previous sequence still committing. - * We can drop the flush lock as soon as we have the cil lock - * because we are now only comparing contexts protected by - * the cil lock. - * * We need to wait for all previous sequence commits to complete * before allowing the force of push_seq to go ahead. Hence block * on commits for those as well. */ +restart: spin_lock(&cil->xc_cil_lock); - up_write(&cil->xc_ctx_lock); list_for_each_entry(ctx, &cil->xc_committing, committing) { - if (ctx->sequence > push_seq) + if (ctx->sequence > sequence) continue; if (!ctx->commit_lsn) { /* @@ -683,7 +760,7 @@ restart: sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); goto restart; } - if (ctx->sequence != push_seq) + if (ctx->sequence != sequence) continue; /* found it! */ commit_lsn = ctx->commit_lsn; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 8c072618965c..edcdfe01617f 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -422,16 +422,17 @@ struct xfs_cil { struct rw_semaphore xc_ctx_lock; struct list_head xc_committing; sv_t xc_commit_wait; + xfs_lsn_t xc_current_sequence; }; /* - * The amount of log space we should the CIL to aggregate is difficult to size. - * Whatever we chose we have to make we can get a reservation for the log space - * effectively, that it is large enough to capture sufficient relogging to - * reduce log buffer IO significantly, but it is not too large for the log or - * induces too much latency when writing out through the iclogs. We track both - * space consumed and the number of vectors in the checkpoint context, so we - * need to decide which to use for limiting. + * The amount of log space we allow the CIL to aggregate is difficult to size. + * Whatever we choose, we have to make sure we can get a reservation for the + * log space effectively, that it is large enough to capture sufficient + * relogging to reduce log buffer IO significantly, but it is not too large for + * the log or induces too much latency when writing out through the iclogs. We + * track both space consumed and the number of vectors in the checkpoint + * context, so we need to decide which to use for limiting. * * Every log buffer we write out during a push needs a header reserved, which * is at least one sector and more for v2 logs. Hence we need a reservation of @@ -458,16 +459,21 @@ struct xfs_cil { * checkpoint transaction ticket is specific to the checkpoint context, rather * than the CIL itself. * - * With dynamic reservations, we can basically make up arbitrary limits for the - * checkpoint size so long as they don't violate any other size rules. Hence - * the initial maximum size for the checkpoint transaction will be set to a - * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit - * right now based on the latency of writing out a large amount of data through - * the circular iclog buffers. + * With dynamic reservations, we can effectively make up arbitrary limits for + * the checkpoint size so long as they don't violate any other size rules. + * Recovery imposes a rule that no transaction exceed half the log, so we are + * limited by that. Furthermore, the log transaction reservation subsystem + * tries to keep 25% of the log free, so we need to keep below that limit or we + * risk running out of free log space to start any new transactions. + * + * In order to keep background CIL push efficient, we will set a lower + * threshold at which background pushing is attempted without blocking current + * transaction commits. A separate, higher bound defines when CIL pushes are + * enforced to ensure we stay within our maximum checkpoint size bounds. + * threshold, yet give us plenty of space for aggregation on large logs. */ - -#define XLOG_CIL_SPACE_LIMIT(log) \ - (min((log->l_logsize >> 2), (8 * 1024 * 1024))) +#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) +#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) /* * The reservation head lsn is not made up of a cycle number and block number. @@ -562,8 +568,16 @@ int xlog_cil_init(struct log *log); void xlog_cil_init_post_recovery(struct log *log); void xlog_cil_destroy(struct log *log); -int xlog_cil_push(struct log *log, int push_now); -xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence); +/* + * CIL force routines + */ +xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence); + +static inline void +xlog_cil_force(struct log *log) +{ + xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence); +} /* * Unmount record type is used as a pseudo transaction type for the ticket. diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9ac5cfab27b9..966d3f97458c 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -24,15 +24,11 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_error.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" @@ -111,7 +107,8 @@ xlog_get_bp( nbblks += log->l_sectBBsize; nbblks = round_up(nbblks, log->l_sectBBsize); - return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); + return xfs_buf_get_uncached(log->l_mp->m_logdev_targp, + BBTOB(nbblks), 0); } STATIC void @@ -171,7 +168,7 @@ xlog_bread_noalign( XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); xfsbdstrat(log->l_mp, bp); - error = xfs_iowait(bp); + error = xfs_buf_iowait(bp); if (error) xfs_ioerror_alert("xlog_bread", log->l_mp, bp, XFS_BUF_ADDR(bp)); @@ -325,12 +322,13 @@ xlog_recover_iodone( * this during recovery. One strike! */ xfs_ioerror_alert("xlog_recover_iodone", - bp->b_mount, bp, XFS_BUF_ADDR(bp)); - xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); + bp->b_target->bt_mount, bp, + XFS_BUF_ADDR(bp)); + xfs_force_shutdown(bp->b_target->bt_mount, + SHUTDOWN_META_IO_ERROR); } - bp->b_mount = NULL; XFS_BUF_CLR_IODONE_FUNC(bp); - xfs_biodone(bp); + xfs_buf_ioend(bp, 0); } /* @@ -1565,9 +1563,7 @@ xlog_recover_reorder_trans( list_splice_init(&trans->r_itemq, &sort_list); list_for_each_entry_safe(item, n, &sort_list, ri_list) { - xfs_buf_log_format_t *buf_f; - - buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; + xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; switch (ITEM_TYPE(item)) { case XFS_LI_BUF: @@ -1892,9 +1888,8 @@ xlog_recover_do_inode_buffer( * current di_next_unlinked field. Extract its value * and copy it to the buffer copy. */ - logged_nextp = (xfs_agino_t *) - ((char *)(item->ri_buf[item_index].i_addr) + - (next_unlinked_offset - reg_buf_offset)); + logged_nextp = item->ri_buf[item_index].i_addr + + next_unlinked_offset - reg_buf_offset; if (unlikely(*logged_nextp == 0)) { xfs_fs_cmn_err(CE_ALERT, mp, "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", @@ -1973,8 +1968,7 @@ xlog_recover_do_reg_buffer( item->ri_buf[i].i_len, __func__); goto next; } - error = xfs_qm_dqcheck((xfs_disk_dquot_t *) - item->ri_buf[i].i_addr, + error = xfs_qm_dqcheck(item->ri_buf[i].i_addr, -1, 0, XFS_QMOPT_DOWARN, "dquot_buf_recover"); if (error) @@ -2187,7 +2181,7 @@ xlog_recover_do_buffer_trans( xlog_recover_item_t *item, int pass) { - xfs_buf_log_format_t *buf_f; + xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; xfs_mount_t *mp; xfs_buf_t *bp; int error; @@ -2197,8 +2191,6 @@ xlog_recover_do_buffer_trans( ushort flags; uint buf_flags; - buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; - if (pass == XLOG_RECOVER_PASS1) { /* * In this pass we're only looking for buf items @@ -2285,8 +2277,7 @@ xlog_recover_do_buffer_trans( XFS_BUF_STALE(bp); error = xfs_bwrite(mp, bp); } else { - ASSERT(bp->b_mount == NULL || bp->b_mount == mp); - bp->b_mount = mp; + ASSERT(bp->b_target->bt_mount == mp); XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); } @@ -2319,10 +2310,9 @@ xlog_recover_do_inode_trans( } if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { - in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr; + in_f = item->ri_buf[0].i_addr; } else { - in_f = (xfs_inode_log_format_t *)kmem_alloc( - sizeof(xfs_inode_log_format_t), KM_SLEEP); + in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP); need_free = 1; error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); if (error) @@ -2370,7 +2360,7 @@ xlog_recover_do_inode_trans( error = EFSCORRUPTED; goto error; } - dicp = (xfs_icdinode_t *)(item->ri_buf[1].i_addr); + dicp = item->ri_buf[1].i_addr; if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { xfs_buf_relse(bp); xfs_fs_cmn_err(CE_ALERT, mp, @@ -2461,7 +2451,7 @@ xlog_recover_do_inode_trans( } /* The core is in in-core format */ - xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr); + xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr); /* the rest is in on-disk format */ if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { @@ -2551,8 +2541,7 @@ xlog_recover_do_inode_trans( } write_inode_buffer: - ASSERT(bp->b_mount == NULL || bp->b_mount == mp); - bp->b_mount = mp; + ASSERT(bp->b_target->bt_mount == mp); XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); error: @@ -2578,7 +2567,7 @@ xlog_recover_do_quotaoff_trans( return (0); } - qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr; + qoff_f = item->ri_buf[0].i_addr; ASSERT(qoff_f); /* @@ -2622,9 +2611,8 @@ xlog_recover_do_dquot_trans( if (mp->m_qflags == 0) return (0); - recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr; - - if (item->ri_buf[1].i_addr == NULL) { + recddq = item->ri_buf[1].i_addr; + if (recddq == NULL) { cmn_err(CE_ALERT, "XFS: NULL dquot in %s.", __func__); return XFS_ERROR(EIO); @@ -2654,7 +2642,7 @@ xlog_recover_do_dquot_trans( * The other possibility, of course, is that the quota subsystem was * removed since the last mount - ENOSYS. */ - dq_f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr; + dq_f = item->ri_buf[0].i_addr; ASSERT(dq_f); if ((error = xfs_qm_dqcheck(recddq, dq_f->qlf_id, @@ -2690,8 +2678,7 @@ xlog_recover_do_dquot_trans( memcpy(ddq, recddq, item->ri_buf[1].i_len); ASSERT(dq_f->qlf_size == 2); - ASSERT(bp->b_mount == NULL || bp->b_mount == mp); - bp->b_mount = mp; + ASSERT(bp->b_target->bt_mount == mp); XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); @@ -2721,7 +2708,7 @@ xlog_recover_do_efi_trans( return 0; } - efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr; + efi_formatp = item->ri_buf[0].i_addr; mp = log->l_mp; efip = xfs_efi_init(mp, efi_formatp->efi_nextents); @@ -2767,7 +2754,7 @@ xlog_recover_do_efd_trans( return; } - efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr; + efd_formatp = item->ri_buf[0].i_addr; ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + @@ -3829,7 +3816,7 @@ xlog_do_recover( XFS_BUF_READ(bp); XFS_BUF_UNASYNC(bp); xfsbdstrat(log->l_mp, bp); - error = xfs_iowait(bp); + error = xfs_buf_iowait(bp); if (error) { xfs_ioerror_alert("xlog_do_recover", log->l_mp, bp, XFS_BUF_ADDR(bp)); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 69f62d8b2816..b1498ab5a399 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -25,13 +25,10 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" @@ -55,16 +52,11 @@ STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int); STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, int); -STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, - int64_t, int); STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); - #else #define xfs_icsb_balance_counter(mp, a, b) do { } while (0) #define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) -#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) - #endif static const struct { @@ -202,6 +194,8 @@ xfs_uuid_unmount( /* * Reference counting access wrappers to the perag structures. + * Because we never free per-ag structures, the only thing we + * have to protect against changes is the tree structure itself. */ struct xfs_perag * xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) @@ -209,19 +203,43 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) struct xfs_perag *pag; int ref = 0; - spin_lock(&mp->m_perag_lock); + rcu_read_lock(); pag = radix_tree_lookup(&mp->m_perag_tree, agno); if (pag) { ASSERT(atomic_read(&pag->pag_ref) >= 0); - /* catch leaks in the positive direction during testing */ - ASSERT(atomic_read(&pag->pag_ref) < 1000); ref = atomic_inc_return(&pag->pag_ref); } - spin_unlock(&mp->m_perag_lock); + rcu_read_unlock(); trace_xfs_perag_get(mp, agno, ref, _RET_IP_); return pag; } +/* + * search from @first to find the next perag with the given tag set. + */ +struct xfs_perag * +xfs_perag_get_tag( + struct xfs_mount *mp, + xfs_agnumber_t first, + int tag) +{ + struct xfs_perag *pag; + int found; + int ref; + + rcu_read_lock(); + found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, + (void **)&pag, first, 1, tag); + if (found <= 0) { + rcu_read_unlock(); + return NULL; + } + ref = atomic_inc_return(&pag->pag_ref); + rcu_read_unlock(); + trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_); + return pag; +} + void xfs_perag_put(struct xfs_perag *pag) { @@ -232,10 +250,18 @@ xfs_perag_put(struct xfs_perag *pag) trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); } +STATIC void +__xfs_free_perag( + struct rcu_head *head) +{ + struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); + + ASSERT(atomic_read(&pag->pag_ref) == 0); + kmem_free(pag); +} + /* - * Free up the resources associated with a mount structure. Assume that - * the structure was initially zeroed, so we can tell which fields got - * initialized. + * Free up the per-ag resources associated with the mount structure. */ STATIC void xfs_free_perag( @@ -247,10 +273,9 @@ xfs_free_perag( for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { spin_lock(&mp->m_perag_lock); pag = radix_tree_delete(&mp->m_perag_tree, agno); - ASSERT(pag); - ASSERT(atomic_read(&pag->pag_ref) == 0); spin_unlock(&mp->m_perag_lock); - kmem_free(pag); + ASSERT(pag); + call_rcu(&pag->rcu_head, __xfs_free_perag); } } @@ -447,7 +472,10 @@ xfs_initialize_perag( pag->pag_agno = index; pag->pag_mount = mp; rwlock_init(&pag->pag_ici_lock); + mutex_init(&pag->pag_ici_reclaim_lock); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); + spin_lock_init(&pag->pag_buf_lock); + pag->pag_buf_tree = RB_ROOT; if (radix_tree_preload(GFP_NOFS)) goto out_unwind; @@ -642,7 +670,6 @@ int xfs_readsb(xfs_mount_t *mp, int flags) { unsigned int sector_size; - unsigned int extra_flags; xfs_buf_t *bp; int error; @@ -655,28 +682,24 @@ xfs_readsb(xfs_mount_t *mp, int flags) * access to the superblock. */ sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); - extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED; - bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), - extra_flags); - if (!bp || XFS_BUF_ISERROR(bp)) { - xfs_fs_mount_cmn_err(flags, "SB read failed"); - error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; - goto fail; +reread: + bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, + XFS_SB_DADDR, sector_size, 0); + if (!bp) { + xfs_fs_mount_cmn_err(flags, "SB buffer read failed"); + return EIO; } - ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); /* * Initialize the mount structure from the superblock. * But first do some basic consistency checking. */ xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); - error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); if (error) { xfs_fs_mount_cmn_err(flags, "SB validate failed"); - goto fail; + goto release_buf; } /* @@ -687,7 +710,7 @@ xfs_readsb(xfs_mount_t *mp, int flags) "device supports only %u byte sectors (not %u)", sector_size, mp->m_sb.sb_sectsize); error = ENOSYS; - goto fail; + goto release_buf; } /* @@ -695,33 +718,20 @@ xfs_readsb(xfs_mount_t *mp, int flags) * re-read the superblock so the buffer is correctly sized. */ if (sector_size < mp->m_sb.sb_sectsize) { - XFS_BUF_UNMANAGE(bp); xfs_buf_relse(bp); sector_size = mp->m_sb.sb_sectsize; - bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, - BTOBB(sector_size), extra_flags); - if (!bp || XFS_BUF_ISERROR(bp)) { - xfs_fs_mount_cmn_err(flags, "SB re-read failed"); - error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; - goto fail; - } - ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + goto reread; } /* Initialize per-cpu counters */ xfs_icsb_reinit_counters(mp); mp->m_sb_bp = bp; - xfs_buf_relse(bp); - ASSERT(XFS_BUF_VALUSEMA(bp) > 0); + xfs_buf_unlock(bp); return 0; - fail: - if (bp) { - XFS_BUF_UNMANAGE(bp); - xfs_buf_relse(bp); - } +release_buf: + xfs_buf_relse(bp); return error; } @@ -994,42 +1004,35 @@ xfs_check_sizes(xfs_mount_t *mp) { xfs_buf_t *bp; xfs_daddr_t d; - int error; d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { - cmn_err(CE_WARN, "XFS: size check 1 failed"); + cmn_err(CE_WARN, "XFS: filesystem size mismatch detected"); return XFS_ERROR(EFBIG); } - error = xfs_read_buf(mp, mp->m_ddev_targp, - d - XFS_FSS_TO_BB(mp, 1), - XFS_FSS_TO_BB(mp, 1), 0, &bp); - if (!error) { - xfs_buf_relse(bp); - } else { - cmn_err(CE_WARN, "XFS: size check 2 failed"); - if (error == ENOSPC) - error = XFS_ERROR(EFBIG); - return error; + bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, + d - XFS_FSS_TO_BB(mp, 1), + BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); + if (!bp) { + cmn_err(CE_WARN, "XFS: last sector read failed"); + return EIO; } + xfs_buf_relse(bp); if (mp->m_logdev_targp != mp->m_ddev_targp) { d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { - cmn_err(CE_WARN, "XFS: size check 3 failed"); + cmn_err(CE_WARN, "XFS: log size mismatch detected"); return XFS_ERROR(EFBIG); } - error = xfs_read_buf(mp, mp->m_logdev_targp, - d - XFS_FSB_TO_BB(mp, 1), - XFS_FSB_TO_BB(mp, 1), 0, &bp); - if (!error) { - xfs_buf_relse(bp); - } else { - cmn_err(CE_WARN, "XFS: size check 3 failed"); - if (error == ENOSPC) - error = XFS_ERROR(EFBIG); - return error; + bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp, + d - XFS_FSB_TO_BB(mp, 1), + XFS_FSB_TO_B(mp, 1), 0); + if (!bp) { + cmn_err(CE_WARN, "XFS: log device read failed"); + return EIO; } + xfs_buf_relse(bp); } return 0; } @@ -1604,7 +1607,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) XFS_BUF_UNASYNC(sbp); ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); xfsbdstrat(mp, sbp); - error = xfs_iowait(sbp); + error = xfs_buf_iowait(sbp); if (error) xfs_ioerror_alert("xfs_unmountfs_writesb", mp, sbp, XFS_BUF_ADDR(sbp)); @@ -1835,135 +1838,72 @@ xfs_mod_incore_sb_unlocked( */ int xfs_mod_incore_sb( - xfs_mount_t *mp, - xfs_sb_field_t field, - int64_t delta, - int rsvd) + struct xfs_mount *mp, + xfs_sb_field_t field, + int64_t delta, + int rsvd) { - int status; + int status; - /* check for per-cpu counters */ - switch (field) { #ifdef HAVE_PERCPU_SB - case XFS_SBS_ICOUNT: - case XFS_SBS_IFREE: - case XFS_SBS_FDBLOCKS: - if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { - status = xfs_icsb_modify_counters(mp, field, - delta, rsvd); - break; - } - /* FALLTHROUGH */ + ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS); #endif - default: - spin_lock(&mp->m_sb_lock); - status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); - spin_unlock(&mp->m_sb_lock); - break; - } + spin_lock(&mp->m_sb_lock); + status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); + spin_unlock(&mp->m_sb_lock); return status; } /* - * xfs_mod_incore_sb_batch() is used to change more than one field - * in the in-core superblock structure at a time. This modification - * is protected by a lock internal to this module. The fields and - * changes to those fields are specified in the array of xfs_mod_sb - * structures passed in. + * Change more than one field in the in-core superblock structure at a time. * - * Either all of the specified deltas will be applied or none of - * them will. If any modified field dips below 0, then all modifications - * will be backed out and EINVAL will be returned. + * The fields and changes to those fields are specified in the array of + * xfs_mod_sb structures passed in. Either all of the specified deltas + * will be applied or none of them will. If any modified field dips below 0, + * then all modifications will be backed out and EINVAL will be returned. + * + * Note that this function may not be used for the superblock values that + * are tracked with the in-memory per-cpu counters - a direct call to + * xfs_icsb_modify_counters is required for these. */ int -xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) +xfs_mod_incore_sb_batch( + struct xfs_mount *mp, + xfs_mod_sb_t *msb, + uint nmsb, + int rsvd) { - int status=0; - xfs_mod_sb_t *msbp; + xfs_mod_sb_t *msbp = &msb[0]; + int error = 0; /* - * Loop through the array of mod structures and apply each - * individually. If any fail, then back out all those - * which have already been applied. Do all of this within - * the scope of the m_sb_lock so that all of the changes will - * be atomic. + * Loop through the array of mod structures and apply each individually. + * If any fail, then back out all those which have already been applied. + * Do all of this within the scope of the m_sb_lock so that all of the + * changes will be atomic. */ spin_lock(&mp->m_sb_lock); - msbp = &msb[0]; for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { - /* - * Apply the delta at index n. If it fails, break - * from the loop so we'll fall into the undo loop - * below. - */ - switch (msbp->msb_field) { -#ifdef HAVE_PERCPU_SB - case XFS_SBS_ICOUNT: - case XFS_SBS_IFREE: - case XFS_SBS_FDBLOCKS: - if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { - spin_unlock(&mp->m_sb_lock); - status = xfs_icsb_modify_counters(mp, - msbp->msb_field, - msbp->msb_delta, rsvd); - spin_lock(&mp->m_sb_lock); - break; - } - /* FALLTHROUGH */ -#endif - default: - status = xfs_mod_incore_sb_unlocked(mp, - msbp->msb_field, - msbp->msb_delta, rsvd); - break; - } + ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || + msbp->msb_field > XFS_SBS_FDBLOCKS); - if (status != 0) { - break; - } + error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, + msbp->msb_delta, rsvd); + if (error) + goto unwind; } + spin_unlock(&mp->m_sb_lock); + return 0; - /* - * If we didn't complete the loop above, then back out - * any changes made to the superblock. If you add code - * between the loop above and here, make sure that you - * preserve the value of status. Loop back until - * we step below the beginning of the array. Make sure - * we don't touch anything back there. - */ - if (status != 0) { - msbp--; - while (msbp >= msb) { - switch (msbp->msb_field) { -#ifdef HAVE_PERCPU_SB - case XFS_SBS_ICOUNT: - case XFS_SBS_IFREE: - case XFS_SBS_FDBLOCKS: - if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { - spin_unlock(&mp->m_sb_lock); - status = xfs_icsb_modify_counters(mp, - msbp->msb_field, - -(msbp->msb_delta), - rsvd); - spin_lock(&mp->m_sb_lock); - break; - } - /* FALLTHROUGH */ -#endif - default: - status = xfs_mod_incore_sb_unlocked(mp, - msbp->msb_field, - -(msbp->msb_delta), - rsvd); - break; - } - ASSERT(status == 0); - msbp--; - } +unwind: + while (--msbp >= msb) { + error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, + -msbp->msb_delta, rsvd); + ASSERT(error == 0); } spin_unlock(&mp->m_sb_lock); - return status; + return error; } /* @@ -2001,18 +1941,13 @@ xfs_getsb( */ void xfs_freesb( - xfs_mount_t *mp) + struct xfs_mount *mp) { - xfs_buf_t *bp; + struct xfs_buf *bp = mp->m_sb_bp; - /* - * Use xfs_getsb() so that the buffer will be locked - * when we call xfs_buf_relse(). - */ - bp = xfs_getsb(mp, 0); - XFS_BUF_UNMANAGE(bp); - xfs_buf_relse(bp); + xfs_buf_lock(bp); mp->m_sb_bp = NULL; + xfs_buf_relse(bp); } /* @@ -2499,7 +2434,7 @@ xfs_icsb_balance_counter( spin_unlock(&mp->m_sb_lock); } -STATIC int +int xfs_icsb_modify_counters( xfs_mount_t *mp, xfs_sb_field_t field, diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 5761087ee8ea..5861b4980740 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -53,7 +53,6 @@ typedef struct xfs_trans_reservations { #include "xfs_sync.h" -struct cred; struct log; struct xfs_mount_args; struct xfs_inode; @@ -66,65 +65,6 @@ struct xfs_nameops; struct xfs_ail; struct xfs_quotainfo; - -/* - * Prototypes and functions for the Data Migration subsystem. - */ - -typedef int (*xfs_send_data_t)(int, struct xfs_inode *, - xfs_off_t, size_t, int, int *); -typedef int (*xfs_send_mmap_t)(struct vm_area_struct *, uint); -typedef int (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t); -typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *, - struct xfs_inode *, dm_right_t, - struct xfs_inode *, dm_right_t, - const unsigned char *, const unsigned char *, - mode_t, int, int); -typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t, - char *, char *); -typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *, - dm_right_t, mode_t, int, int); - -typedef struct xfs_dmops { - xfs_send_data_t xfs_send_data; - xfs_send_mmap_t xfs_send_mmap; - xfs_send_destroy_t xfs_send_destroy; - xfs_send_namesp_t xfs_send_namesp; - xfs_send_mount_t xfs_send_mount; - xfs_send_unmount_t xfs_send_unmount; -} xfs_dmops_t; - -#define XFS_DMAPI_UNMOUNT_FLAGS(mp) \ - (((mp)->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ? 0 : DM_FLAGS_UNWANTED) - -#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \ - (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock) -#define XFS_SEND_MMAP(mp, vma,fl) \ - (*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl) -#define XFS_SEND_DESTROY(mp, ip,right) \ - (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right) -#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ - (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl) -#define XFS_SEND_MOUNT(mp,right,path,name) \ - (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name) -#define XFS_SEND_PREUNMOUNT(mp) \ -do { \ - if (mp->m_flags & XFS_MOUNT_DMAPI) { \ - (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT, mp, \ - (mp)->m_rootip, DM_RIGHT_NULL, \ - (mp)->m_rootip, DM_RIGHT_NULL, \ - NULL, NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \ - } \ -} while (0) -#define XFS_SEND_UNMOUNT(mp) \ -do { \ - if (mp->m_flags & XFS_MOUNT_DMAPI) { \ - (*(mp)->m_dm_ops->xfs_send_unmount)(mp, (mp)->m_rootip, \ - DM_RIGHT_NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \ - } \ -} while (0) - - #ifdef HAVE_PERCPU_SB /* @@ -150,6 +90,8 @@ extern void xfs_icsb_reinit_counters(struct xfs_mount *); extern void xfs_icsb_destroy_counters(struct xfs_mount *); extern void xfs_icsb_sync_counters(struct xfs_mount *, int); extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); +extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t, + int64_t, int); #else #define xfs_icsb_init_counters(mp) (0) @@ -157,6 +99,8 @@ extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); #define xfs_icsb_reinit_counters(mp) do { } while (0) #define xfs_icsb_sync_counters(mp, flags) do { } while (0) #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) +#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \ + xfs_mod_incore_sb(mp, field, delta, rsvd) #endif typedef struct xfs_mount { @@ -241,8 +185,6 @@ typedef struct xfs_mount { uint m_chsize; /* size of next field */ struct xfs_chash *m_chash; /* fs private inode per-cluster * hash table */ - struct xfs_dmops *m_dm_ops; /* vector of DMI ops */ - struct xfs_qmops *m_qm_ops; /* vector of XQM ops */ atomic_t m_active_trans; /* number trans frozen */ #ifdef HAVE_PERCPU_SB xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */ @@ -269,7 +211,6 @@ typedef struct xfs_mount { must be synchronous except for space allocations */ #define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */ -#define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */ #define XFS_MOUNT_WAS_CLEAN (1ULL << 3) #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem operations, typically for @@ -282,8 +223,6 @@ typedef struct xfs_mount { #define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */ #define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */ #define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */ -#define XFS_MOUNT_OSYNCISOSYNC (1ULL << 13) /* o_sync is REALLY o_sync */ - /* osyncisdsync is now default*/ #define XFS_MOUNT_32BITINODES (1ULL << 14) /* do not create inodes above * 32 bits in size */ #define XFS_MOUNT_SMALL_INUMS (1ULL << 15) /* users wants 32bit inodes */ @@ -296,8 +235,6 @@ typedef struct xfs_mount { #define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */ #define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred * I/O size in stat() */ -#define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock - counters */ #define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams allocator */ #define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ @@ -391,6 +328,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) * perag get/put wrappers for ref counting */ struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); +struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, + int tag); void xfs_perag_put(struct xfs_perag *pag); /* @@ -440,11 +379,6 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); extern int xfs_dev_is_read_only(struct xfs_mount *, char *); -extern int xfs_dmops_get(struct xfs_mount *); -extern void xfs_dmops_put(struct xfs_mount *); - -extern struct xfs_dmops xfs_dmcore_xfs; - #endif /* __KERNEL__ */ extern void xfs_mod_sb(struct xfs_trans *, __int64_t); diff --git a/fs/xfs/xfs_refcache.h b/fs/xfs/xfs_refcache.h deleted file mode 100644 index 2dec79edb510..000000000000 --- a/fs/xfs/xfs_refcache.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_REFCACHE_H__ -#define __XFS_REFCACHE_H__ - -#ifdef HAVE_REFCACHE -/* - * Maximum size (in inodes) for the NFS reference cache - */ -#define XFS_REFCACHE_SIZE_MAX 512 - -struct xfs_inode; -struct xfs_mount; - -extern void xfs_refcache_insert(struct xfs_inode *); -extern void xfs_refcache_purge_ip(struct xfs_inode *); -extern void xfs_refcache_purge_mp(struct xfs_mount *); -extern void xfs_refcache_purge_some(struct xfs_mount *); -extern void xfs_refcache_resize(int); -extern void xfs_refcache_destroy(void); - -extern void xfs_refcache_iunlock(struct xfs_inode *, uint); - -#else - -#define xfs_refcache_insert(ip) do { } while (0) -#define xfs_refcache_purge_ip(ip) do { } while (0) -#define xfs_refcache_purge_mp(mp) do { } while (0) -#define xfs_refcache_purge_some(mp) do { } while (0) -#define xfs_refcache_resize(size) do { } while (0) -#define xfs_refcache_destroy() do { } while (0) - -#define xfs_refcache_iunlock(ip, flags) xfs_iunlock(ip, flags) - -#endif - -#endif /* __XFS_REFCACHE_H__ */ diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index fc1cda23b817..d2af0a8381a6 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -24,12 +24,9 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" @@ -116,20 +113,7 @@ xfs_rename( int spaceres; int num_inodes; - xfs_itrace_entry(src_dp); - xfs_itrace_entry(target_dp); - - if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) || - DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) { - error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, - src_dp, DM_RIGHT_NULL, - target_dp, DM_RIGHT_NULL, - src_name->name, target_name->name, - 0, 0, 0); - if (error) - return error; - } - /* Return through std_return after this point. */ + trace_xfs_rename(src_dp, target_dp, src_name, target_name); new_parent = (src_dp != target_dp); src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); @@ -184,26 +168,14 @@ xfs_rename( /* * Join all the inodes to the transaction. From this point on, * we can rely on either trans_commit or trans_cancel to unlock - * them. Note that we need to add a vnode reference to the - * directories since trans_commit & trans_cancel will decrement - * them when they unlock the inodes. Also, we need to be careful - * not to add an inode to the transaction more than once. + * them. */ - IHOLD(src_dp); - xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); - - if (new_parent) { - IHOLD(target_dp); - xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); - } - - IHOLD(src_ip); - xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); - - if (target_ip) { - IHOLD(target_ip); - xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); - } + xfs_trans_ijoin_ref(tp, src_dp, XFS_ILOCK_EXCL); + if (new_parent) + xfs_trans_ijoin_ref(tp, target_dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, src_ip, XFS_ILOCK_EXCL); + if (target_ip) + xfs_trans_ijoin_ref(tp, target_ip, XFS_ILOCK_EXCL); /* * If we are using project inheritance, we only allow renames @@ -211,7 +183,7 @@ xfs_rename( * tree quota mechanism would be circumvented. */ if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && - (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { + (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { error = XFS_ERROR(EXDEV); goto error_return; } @@ -239,7 +211,9 @@ xfs_rename( goto error_return; if (error) goto abort_return; - xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + + xfs_trans_ichgtime(tp, target_dp, + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); @@ -277,7 +251,9 @@ xfs_rename( &first_block, &free_list, spaceres); if (error) goto abort_return; - xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + + xfs_trans_ichgtime(tp, target_dp, + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Decrement the link count on the target since the target @@ -320,7 +296,7 @@ xfs_rename( * inode isn't really being changed, but old unix file systems did * it and some incremental backup programs won't work without it. */ - xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); /* * Adjust the link count on src_dp. This is necessary when @@ -343,7 +319,7 @@ xfs_rename( if (error) goto abort_return; - xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); if (new_parent) xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); @@ -369,26 +345,13 @@ xfs_rename( * trans_commit will unlock src_ip, target_ip & decrement * the vnode references. */ - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - - /* Fall through to std_return with error = 0 or errno from - * xfs_trans_commit */ -std_return: - if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) || - DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) { - (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, - src_dp, DM_RIGHT_NULL, - target_dp, DM_RIGHT_NULL, - src_name->name, target_name->name, - 0, error, 0); - } - return error; + return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); abort_return: cancel_flags |= XFS_TRANS_ABORT; - /* FALLTHROUGH */ error_return: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, cancel_flags); - goto std_return; + std_return: + return error; } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index a2d32ce335aa..12a191385310 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -25,17 +25,10 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" @@ -46,6 +39,7 @@ #include "xfs_trans_space.h" #include "xfs_utils.h" #include "xfs_trace.h" +#include "xfs_buf.h" /* @@ -129,7 +123,7 @@ xfs_growfs_rt_alloc( cancelflags |= XFS_TRANS_ABORT; error = xfs_bmapi(tp, ip, oblocks, nblocks - oblocks, XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &firstblock, - resblks, &map, &nmap, &flist, NULL); + resblks, &map, &nmap, &flist); if (!error && nmap < 1) error = XFS_ERROR(ENOSPC); if (error) @@ -1890,13 +1884,13 @@ xfs_growfs_rt( /* * Read in the last block of the device, make sure it exists. */ - error = xfs_read_buf(mp, mp->m_rtdev_targp, - XFS_FSB_TO_BB(mp, nrblocks - 1), - XFS_FSB_TO_BB(mp, 1), 0, &bp); - if (error) - return error; - ASSERT(bp); + bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp, + XFS_FSB_TO_BB(mp, nrblocks - 1), + XFS_FSB_TO_B(mp, 1), 0); + if (!bp) + return EIO; xfs_buf_relse(bp); + /* * Calculate new parameters. These are the final values to be reached. */ @@ -2222,7 +2216,6 @@ xfs_rtmount_init( { xfs_buf_t *bp; /* buffer for last block of subvolume */ xfs_daddr_t d; /* address of last block of subvolume */ - int error; /* error return value */ xfs_sb_t *sbp; /* filesystem superblock copy in mount */ sbp = &mp->m_sb; @@ -2249,15 +2242,12 @@ xfs_rtmount_init( (unsigned long long) mp->m_sb.sb_rblocks); return XFS_ERROR(EFBIG); } - error = xfs_read_buf(mp, mp->m_rtdev_targp, - d - XFS_FSB_TO_BB(mp, 1), - XFS_FSB_TO_BB(mp, 1), 0, &bp); - if (error) { - cmn_err(CE_WARN, - "XFS: realtime mount -- xfs_read_buf failed, returned %d", error); - if (error == ENOSPC) - return XFS_ERROR(EFBIG); - return error; + bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp, + d - XFS_FSB_TO_BB(mp, 1), + XFS_FSB_TO_B(mp, 1), 0); + if (!bp) { + cmn_err(CE_WARN, "XFS: realtime device size check failed"); + return EIO; } xfs_buf_relse(bp); return 0; diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index e336742a58a4..56861d5daaef 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -24,27 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_itable.h" -#include "xfs_btree.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" -#include "xfs_attr.h" -#include "xfs_bmap.h" #include "xfs_error.h" -#include "xfs_buf_item.h" #include "xfs_rw.h" -#include "xfs_trace.h" /* * Force a shutdown of the filesystem instantly while keeping diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 1b017c657494..1eb2ba586814 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -80,10 +80,12 @@ struct xfs_mount; #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ #define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ +#define XFS_SB_VERSION2_PROJID32BIT 0x00000080 /* 32 bit project id */ #define XFS_SB_VERSION2_OKREALFBITS \ (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ - XFS_SB_VERSION2_ATTR2BIT) + XFS_SB_VERSION2_ATTR2BIT | \ + XFS_SB_VERSION2_PROJID32BIT) #define XFS_SB_VERSION2_OKSASHFBITS \ (0) #define XFS_SB_VERSION2_OKREALBITS \ @@ -495,6 +497,12 @@ static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp) sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; } +static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) +{ + return xfs_sb_version_hasmorebits(sbp) && + (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT); +} + /* * end of superblock version macros */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 28547dfce037..f6d956b7711e 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * Copyright (C) 2010 Red Hat, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -24,16 +25,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_error.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" @@ -47,6 +44,7 @@ #include "xfs_trace.h" kmem_zone_t *xfs_trans_zone; +kmem_zone_t *xfs_log_item_desc_zone; /* @@ -597,8 +595,7 @@ _xfs_trans_alloc( tp->t_magic = XFS_TRANS_MAGIC; tp->t_type = type; tp->t_mountp = mp; - tp->t_items_free = XFS_LIC_NUM_SLOTS; - xfs_lic_init(&(tp->t_items)); + INIT_LIST_HEAD(&tp->t_items); INIT_LIST_HEAD(&tp->t_busy); return tp; } @@ -643,8 +640,7 @@ xfs_trans_dup( ntp->t_magic = XFS_TRANS_MAGIC; ntp->t_type = tp->t_type; ntp->t_mountp = tp->t_mountp; - ntp->t_items_free = XFS_LIC_NUM_SLOTS; - xfs_lic_init(&(ntp->t_items)); + INIT_LIST_HEAD(&ntp->t_items); INIT_LIST_HEAD(&ntp->t_busy); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); @@ -700,7 +696,7 @@ xfs_trans_reserve( * fail if the count would go below zero. */ if (blocks > 0) { - error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, + error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, -((int64_t)blocks), rsvd); if (error != 0) { current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); @@ -771,7 +767,7 @@ undo_log: undo_blocks: if (blocks > 0) { - (void) xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, + xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, (int64_t)blocks, rsvd); tp->t_blk_res = 0; } @@ -1013,7 +1009,7 @@ void xfs_trans_unreserve_and_mod_sb( xfs_trans_t *tp) { - xfs_mod_sb_t msb[14]; /* If you add cases, add entries */ + xfs_mod_sb_t msb[9]; /* If you add cases, add entries */ xfs_mod_sb_t *msbp; xfs_mount_t *mp = tp->t_mountp; /* REFERENCED */ @@ -1021,55 +1017,61 @@ xfs_trans_unreserve_and_mod_sb( int rsvd; int64_t blkdelta = 0; int64_t rtxdelta = 0; + int64_t idelta = 0; + int64_t ifreedelta = 0; msbp = msb; rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; - /* calculate free blocks delta */ + /* calculate deltas */ if (tp->t_blk_res > 0) blkdelta = tp->t_blk_res; - if ((tp->t_fdblocks_delta != 0) && (xfs_sb_version_haslazysbcount(&mp->m_sb) || (tp->t_flags & XFS_TRANS_SB_DIRTY))) blkdelta += tp->t_fdblocks_delta; - if (blkdelta != 0) { - msbp->msb_field = XFS_SBS_FDBLOCKS; - msbp->msb_delta = blkdelta; - msbp++; - } - - /* calculate free realtime extents delta */ if (tp->t_rtx_res > 0) rtxdelta = tp->t_rtx_res; - if ((tp->t_frextents_delta != 0) && (tp->t_flags & XFS_TRANS_SB_DIRTY)) rtxdelta += tp->t_frextents_delta; + if (xfs_sb_version_haslazysbcount(&mp->m_sb) || + (tp->t_flags & XFS_TRANS_SB_DIRTY)) { + idelta = tp->t_icount_delta; + ifreedelta = tp->t_ifree_delta; + } + + /* apply the per-cpu counters */ + if (blkdelta) { + error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + blkdelta, rsvd); + if (error) + goto out; + } + + if (idelta) { + error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, + idelta, rsvd); + if (error) + goto out_undo_fdblocks; + } + + if (ifreedelta) { + error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, + ifreedelta, rsvd); + if (error) + goto out_undo_icount; + } + + /* apply remaining deltas */ if (rtxdelta != 0) { msbp->msb_field = XFS_SBS_FREXTENTS; msbp->msb_delta = rtxdelta; msbp++; } - /* apply remaining deltas */ - - if (xfs_sb_version_haslazysbcount(&mp->m_sb) || - (tp->t_flags & XFS_TRANS_SB_DIRTY)) { - if (tp->t_icount_delta != 0) { - msbp->msb_field = XFS_SBS_ICOUNT; - msbp->msb_delta = tp->t_icount_delta; - msbp++; - } - if (tp->t_ifree_delta != 0) { - msbp->msb_field = XFS_SBS_IFREE; - msbp->msb_delta = tp->t_ifree_delta; - msbp++; - } - } - if (tp->t_flags & XFS_TRANS_SB_DIRTY) { if (tp->t_dblocks_delta != 0) { msbp->msb_field = XFS_SBS_DBLOCKS; @@ -1119,7 +1121,125 @@ xfs_trans_unreserve_and_mod_sb( if (msbp > msb) { error = xfs_mod_incore_sb_batch(tp->t_mountp, msb, (uint)(msbp - msb), rsvd); - ASSERT(error == 0); + if (error) + goto out_undo_ifreecount; + } + + return; + +out_undo_ifreecount: + if (ifreedelta) + xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd); +out_undo_icount: + if (idelta) + xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd); +out_undo_fdblocks: + if (blkdelta) + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); +out: + ASSERT(error = 0); + return; +} + +/* + * Add the given log item to the transaction's list of log items. + * + * The log item will now point to its new descriptor with its li_desc field. + */ +void +xfs_trans_add_item( + struct xfs_trans *tp, + struct xfs_log_item *lip) +{ + struct xfs_log_item_desc *lidp; + + ASSERT(lip->li_mountp = tp->t_mountp); + ASSERT(lip->li_ailp = tp->t_mountp->m_ail); + + lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS); + + lidp->lid_item = lip; + lidp->lid_flags = 0; + lidp->lid_size = 0; + list_add_tail(&lidp->lid_trans, &tp->t_items); + + lip->li_desc = lidp; +} + +STATIC void +xfs_trans_free_item_desc( + struct xfs_log_item_desc *lidp) +{ + list_del_init(&lidp->lid_trans); + kmem_zone_free(xfs_log_item_desc_zone, lidp); +} + +/* + * Unlink and free the given descriptor. + */ +void +xfs_trans_del_item( + struct xfs_log_item *lip) +{ + xfs_trans_free_item_desc(lip->li_desc); + lip->li_desc = NULL; +} + +/* + * Unlock all of the items of a transaction and free all the descriptors + * of that transaction. + */ +void +xfs_trans_free_items( + struct xfs_trans *tp, + xfs_lsn_t commit_lsn, + int flags) +{ + struct xfs_log_item_desc *lidp, *next; + + list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) { + struct xfs_log_item *lip = lidp->lid_item; + + lip->li_desc = NULL; + + if (commit_lsn != NULLCOMMITLSN) + IOP_COMMITTING(lip, commit_lsn); + if (flags & XFS_TRANS_ABORT) + lip->li_flags |= XFS_LI_ABORTED; + IOP_UNLOCK(lip); + + xfs_trans_free_item_desc(lidp); + } +} + +/* + * Unlock the items associated with a transaction. + * + * Items which were not logged should be freed. Those which were logged must + * still be tracked so they can be unpinned when the transaction commits. + */ +STATIC void +xfs_trans_unlock_items( + struct xfs_trans *tp, + xfs_lsn_t commit_lsn) +{ + struct xfs_log_item_desc *lidp, *next; + + list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) { + struct xfs_log_item *lip = lidp->lid_item; + + lip->li_desc = NULL; + + if (commit_lsn != NULLCOMMITLSN) + IOP_COMMITTING(lip, commit_lsn); + IOP_UNLOCK(lip); + + /* + * Free the descriptor if the item is not dirty + * within this transaction. + */ + if (!(lidp->lid_flags & XFS_LID_DIRTY)) + xfs_trans_free_item_desc(lidp); } } @@ -1134,30 +1254,27 @@ xfs_trans_count_vecs( struct xfs_trans *tp) { int nvecs; - xfs_log_item_desc_t *lidp; + struct xfs_log_item_desc *lidp; nvecs = 1; - lidp = xfs_trans_first_item(tp); - ASSERT(lidp != NULL); /* In the non-debug case we need to start bailing out if we * didn't find a log_item here, return zero and let trans_commit * deal with it. */ - if (lidp == NULL) + if (list_empty(&tp->t_items)) { + ASSERT(0); return 0; + } - while (lidp != NULL) { + list_for_each_entry(lidp, &tp->t_items, lid_trans) { /* * Skip items which aren't dirty in this transaction. */ - if (!(lidp->lid_flags & XFS_LID_DIRTY)) { - lidp = xfs_trans_next_item(tp, lidp); + if (!(lidp->lid_flags & XFS_LID_DIRTY)) continue; - } lidp->lid_size = IOP_SIZE(lidp->lid_item); nvecs += lidp->lid_size; - lidp = xfs_trans_next_item(tp, lidp); } return nvecs; @@ -1177,7 +1294,7 @@ xfs_trans_fill_vecs( struct xfs_trans *tp, struct xfs_log_iovec *log_vector) { - xfs_log_item_desc_t *lidp; + struct xfs_log_item_desc *lidp; struct xfs_log_iovec *vecp; uint nitems; @@ -1188,14 +1305,11 @@ xfs_trans_fill_vecs( vecp = log_vector + 1; nitems = 0; - lidp = xfs_trans_first_item(tp); - ASSERT(lidp); - while (lidp) { + ASSERT(!list_empty(&tp->t_items)); + list_for_each_entry(lidp, &tp->t_items, lid_trans) { /* Skip items which aren't dirty in this transaction. */ - if (!(lidp->lid_flags & XFS_LID_DIRTY)) { - lidp = xfs_trans_next_item(tp, lidp); + if (!(lidp->lid_flags & XFS_LID_DIRTY)) continue; - } /* * The item may be marked dirty but not log anything. This can @@ -1206,7 +1320,6 @@ xfs_trans_fill_vecs( IOP_FORMAT(lidp->lid_item, vecp); vecp += lidp->lid_size; IOP_PIN(lidp->lid_item); - lidp = xfs_trans_next_item(tp, lidp); } /* @@ -1284,7 +1397,7 @@ xfs_trans_item_committed( * log item flags, if anyone else stales the buffer we do not want to * pay any attention to it. */ - IOP_UNPIN(lip); + IOP_UNPIN(lip, 0); } /* @@ -1298,27 +1411,15 @@ xfs_trans_item_committed( */ STATIC void xfs_trans_committed( - struct xfs_trans *tp, + void *arg, int abortflag) { - xfs_log_item_desc_t *lidp; - xfs_log_item_chunk_t *licp; - xfs_log_item_chunk_t *next_licp; - - /* Call the transaction's completion callback if there is one. */ - if (tp->t_callback != NULL) - tp->t_callback(tp, tp->t_callarg); + struct xfs_trans *tp = arg; + struct xfs_log_item_desc *lidp, *next; - for (lidp = xfs_trans_first_item(tp); - lidp != NULL; - lidp = xfs_trans_next_item(tp, lidp)) { + list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) { xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag); - } - - /* free the item chunks, ignoring the embedded chunk */ - for (licp = tp->t_items.lic_next; licp != NULL; licp = next_licp) { - next_licp = licp->lic_next; - kmem_free(licp); + xfs_trans_free_item_desc(lidp); } xfs_trans_free(tp); @@ -1333,16 +1434,14 @@ xfs_trans_uncommit( struct xfs_trans *tp, uint flags) { - xfs_log_item_desc_t *lidp; + struct xfs_log_item_desc *lidp; - for (lidp = xfs_trans_first_item(tp); - lidp != NULL; - lidp = xfs_trans_next_item(tp, lidp)) { + list_for_each_entry(lidp, &tp->t_items, lid_trans) { /* * Unpin all but those that aren't dirty. */ if (lidp->lid_flags & XFS_LID_DIRTY) - IOP_UNPIN_REMOVE(lidp->lid_item, tp); + IOP_UNPIN(lidp->lid_item, 1); } xfs_trans_unreserve_and_mod_sb(tp); @@ -1445,7 +1544,7 @@ xfs_trans_commit_iclog( * running in simulation mode (the log is explicitly turned * off). */ - tp->t_logcb.cb_func = (void(*)(void*, int))xfs_trans_committed; + tp->t_logcb.cb_func = xfs_trans_committed; tp->t_logcb.cb_arg = tp; /* @@ -1508,33 +1607,28 @@ STATIC struct xfs_log_vec * xfs_trans_alloc_log_vecs( xfs_trans_t *tp) { - xfs_log_item_desc_t *lidp; + struct xfs_log_item_desc *lidp; struct xfs_log_vec *lv = NULL; struct xfs_log_vec *ret_lv = NULL; - lidp = xfs_trans_first_item(tp); /* Bail out if we didn't find a log item. */ - if (!lidp) { + if (list_empty(&tp->t_items)) { ASSERT(0); return NULL; } - while (lidp != NULL) { + list_for_each_entry(lidp, &tp->t_items, lid_trans) { struct xfs_log_vec *new_lv; /* Skip items which aren't dirty in this transaction. */ - if (!(lidp->lid_flags & XFS_LID_DIRTY)) { - lidp = xfs_trans_next_item(tp, lidp); + if (!(lidp->lid_flags & XFS_LID_DIRTY)) continue; - } /* Skip items that do not have any vectors for writing */ lidp->lid_size = IOP_SIZE(lidp->lid_item); - if (!lidp->lid_size) { - lidp = xfs_trans_next_item(tp, lidp); + if (!lidp->lid_size) continue; - } new_lv = kmem_zalloc(sizeof(*new_lv) + lidp->lid_size * sizeof(struct xfs_log_iovec), @@ -1549,7 +1643,6 @@ xfs_trans_alloc_log_vecs( else lv->lv_next = new_lv; lv = new_lv; - lidp = xfs_trans_next_item(tp, lidp); } return ret_lv; @@ -1579,9 +1672,6 @@ xfs_trans_commit_cil( return error; current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); - - /* xfs_trans_free_items() unlocks them first */ - xfs_trans_free_items(tp, *commit_lsn, 0); xfs_trans_free(tp); return 0; } @@ -1708,12 +1798,6 @@ xfs_trans_cancel( int flags) { int log_flags; -#ifdef DEBUG - xfs_log_item_chunk_t *licp; - xfs_log_item_desc_t *lidp; - xfs_log_item_t *lip; - int i; -#endif xfs_mount_t *mp = tp->t_mountp; /* @@ -1732,21 +1816,11 @@ xfs_trans_cancel( xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } #ifdef DEBUG - if (!(flags & XFS_TRANS_ABORT)) { - licp = &(tp->t_items); - while (licp != NULL) { - lidp = licp->lic_descs; - for (i = 0; i < licp->lic_unused; i++, lidp++) { - if (xfs_lic_isfree(licp, i)) { - continue; - } - - lip = lidp->lid_item; - if (!XFS_FORCED_SHUTDOWN(mp)) - ASSERT(!(lip->li_type == XFS_LI_EFD)); - } - licp = licp->lic_next; - } + if (!(flags & XFS_TRANS_ABORT) && !XFS_FORCED_SHUTDOWN(mp)) { + struct xfs_log_item_desc *lidp; + + list_for_each_entry(lidp, &tp->t_items, lid_trans) + ASSERT(!(lidp->lid_item->li_type == XFS_LI_EFD)); } #endif xfs_trans_unreserve_and_mod_sb(tp); @@ -1834,7 +1908,6 @@ xfs_trans_roll( if (error) return error; - xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(trans, dp); + xfs_trans_ijoin(trans, dp); return 0; } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index e639e8e9a2a9..246286b77a86 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -161,105 +161,14 @@ typedef struct xfs_trans_header { * the amount of space needed to log the item it describes * once we get to commit processing (see xfs_trans_commit()). */ -typedef struct xfs_log_item_desc { +struct xfs_log_item_desc { struct xfs_log_item *lid_item; - ushort lid_size; - unsigned char lid_flags; - unsigned char lid_index; -} xfs_log_item_desc_t; + ushort lid_size; + unsigned char lid_flags; + struct list_head lid_trans; +}; #define XFS_LID_DIRTY 0x1 -#define XFS_LID_PINNED 0x2 - -/* - * This structure is used to maintain a chunk list of log_item_desc - * structures. The free field is a bitmask indicating which descriptors - * in this chunk's array are free. The unused field is the first value - * not used since this chunk was allocated. - */ -#define XFS_LIC_NUM_SLOTS 15 -typedef struct xfs_log_item_chunk { - struct xfs_log_item_chunk *lic_next; - ushort lic_free; - ushort lic_unused; - xfs_log_item_desc_t lic_descs[XFS_LIC_NUM_SLOTS]; -} xfs_log_item_chunk_t; - -#define XFS_LIC_MAX_SLOT (XFS_LIC_NUM_SLOTS - 1) -#define XFS_LIC_FREEMASK ((1 << XFS_LIC_NUM_SLOTS) - 1) - - -/* - * Initialize the given chunk. Set the chunk's free descriptor mask - * to indicate that all descriptors are free. The caller gets to set - * lic_unused to the right value (0 matches all free). The - * lic_descs.lid_index values are set up as each desc is allocated. - */ -static inline void xfs_lic_init(xfs_log_item_chunk_t *cp) -{ - cp->lic_free = XFS_LIC_FREEMASK; -} - -static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot) -{ - cp->lic_descs[slot].lid_index = (unsigned char)(slot); -} - -static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp) -{ - return cp->lic_free & XFS_LIC_FREEMASK; -} - -static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp) -{ - cp->lic_free = XFS_LIC_FREEMASK; -} - -static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp) -{ - return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK); -} - -static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot) -{ - return (cp->lic_free & (1 << slot)); -} - -static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot) -{ - cp->lic_free &= ~(1 << slot); -} - -static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot) -{ - cp->lic_free |= 1 << slot; -} - -static inline xfs_log_item_desc_t * -xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot) -{ - return &(cp->lic_descs[slot]); -} - -static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp) -{ - return (uint)dp->lid_index; -} - -/* - * Calculate the address of a chunk given a descriptor pointer: - * dp - dp->lid_index give the address of the start of the lic_descs array. - * From this we subtract the offset of the lic_descs field in a chunk. - * All of this yields the address of the chunk, which is - * cast to a chunk pointer. - */ -static inline xfs_log_item_chunk_t * -xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) -{ - return (xfs_log_item_chunk_t*) \ - (((xfs_caddr_t)((dp) - (dp)->lid_index)) - \ - (xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs)); -} #define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */ /* @@ -275,8 +184,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) /* * Values for call flags parameter. */ -#define XFS_TRANS_NOSLEEP 0x1 -#define XFS_TRANS_WAIT 0x2 #define XFS_TRANS_RELEASE_LOG_RES 0x4 #define XFS_TRANS_ABORT 0x8 @@ -438,8 +345,7 @@ typedef struct xfs_item_ops { uint (*iop_size)(xfs_log_item_t *); void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); void (*iop_pin)(xfs_log_item_t *); - void (*iop_unpin)(xfs_log_item_t *); - void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *); + void (*iop_unpin)(xfs_log_item_t *, int remove); uint (*iop_trylock)(xfs_log_item_t *); void (*iop_unlock)(xfs_log_item_t *); xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); @@ -451,8 +357,7 @@ typedef struct xfs_item_ops { #define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) #define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) #define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) -#define IOP_UNPIN(ip) (*(ip)->li_ops->iop_unpin)(ip) -#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp) +#define IOP_UNPIN(ip, remove) (*(ip)->li_ops->iop_unpin)(ip, remove) #define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) #define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) #define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) @@ -494,8 +399,6 @@ typedef struct xfs_trans { * transaction. */ struct xfs_mount *t_mountp; /* ptr to fs mount struct */ struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ - xfs_trans_callback_t t_callback; /* transaction callback */ - void *t_callarg; /* callback arg */ unsigned int t_flags; /* misc flags */ int64_t t_icount_delta; /* superblock icount change */ int64_t t_ifree_delta; /* superblock ifree change */ @@ -516,8 +419,7 @@ typedef struct xfs_trans { int64_t t_rblocks_delta;/* superblock rblocks change */ int64_t t_rextents_delta;/* superblocks rextents chg */ int64_t t_rextslog_delta;/* superblocks rextslog chg */ - unsigned int t_items_free; /* log item descs free */ - xfs_log_item_chunk_t t_items; /* first log item desc chunk */ + struct list_head t_items; /* log item descriptors */ xfs_trans_header_t t_header; /* header for in-log trans */ struct list_head t_busy; /* list of busy extents */ unsigned long t_pflags; /* saved process flags state */ @@ -569,8 +471,9 @@ void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, xfs_ino_t , uint, uint, struct xfs_inode **); -void xfs_trans_ijoin(xfs_trans_t *, struct xfs_inode *, uint); -void xfs_trans_ihold(xfs_trans_t *, struct xfs_inode *); +void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); +void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); +void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint); @@ -595,6 +498,7 @@ int xfs_trans_ail_init(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *); extern kmem_zone_t *xfs_trans_zone; +extern kmem_zone_t *xfs_log_item_desc_zone; #endif /* __KERNEL__ */ diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index e799824f7245..dc9069568ff7 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -24,7 +24,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_trans_priv.h" #include "xfs_error.h" diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 63d81a22f4fd..c47918c302a5 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -24,14 +24,10 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_buf_item.h" @@ -51,36 +47,17 @@ xfs_trans_buf_item_match( xfs_daddr_t blkno, int len) { - xfs_log_item_chunk_t *licp; - xfs_log_item_desc_t *lidp; - xfs_buf_log_item_t *blip; - int i; + struct xfs_log_item_desc *lidp; + struct xfs_buf_log_item *blip; len = BBTOB(len); - for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { - if (xfs_lic_are_all_free(licp)) { - ASSERT(licp == &tp->t_items); - ASSERT(licp->lic_next == NULL); - return NULL; - } - - for (i = 0; i < licp->lic_unused; i++) { - /* - * Skip unoccupied slots. - */ - if (xfs_lic_isfree(licp, i)) - continue; - - lidp = xfs_lic_slot(licp, i); - blip = (xfs_buf_log_item_t *)lidp->lid_item; - if (blip->bli_item.li_type != XFS_LI_BUF) - continue; - - if (XFS_BUF_TARGET(blip->bli_buf) == target && - XFS_BUF_ADDR(blip->bli_buf) == blkno && - XFS_BUF_COUNT(blip->bli_buf) == len) - return blip->bli_buf; - } + list_for_each_entry(lidp, &tp->t_items, lid_trans) { + blip = (struct xfs_buf_log_item *)lidp->lid_item; + if (blip->bli_item.li_type == XFS_LI_BUF && + XFS_BUF_TARGET(blip->bli_buf) == target && + XFS_BUF_ADDR(blip->bli_buf) == blkno && + XFS_BUF_COUNT(blip->bli_buf) == len) + return blip->bli_buf; } return NULL; @@ -127,7 +104,7 @@ _xfs_trans_bjoin( /* * Get a log_item_desc to point at the new item. */ - (void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip); + xfs_trans_add_item(tp, &bip->bli_item); /* * Initialize b_fsprivate2 so we can find it with incore_match() @@ -359,7 +336,7 @@ xfs_trans_read_buf( ASSERT(!XFS_BUF_ISASYNC(bp)); XFS_BUF_READ(bp); xfsbdstrat(tp->t_mountp, bp); - error = xfs_iowait(bp); + error = xfs_buf_iowait(bp); if (error) { xfs_ioerror_alert("xfs_trans_read_buf", mp, bp, blkno); @@ -483,7 +460,6 @@ xfs_trans_brelse(xfs_trans_t *tp, { xfs_buf_log_item_t *bip; xfs_log_item_t *lip; - xfs_log_item_desc_t *lidp; /* * Default to a normal brelse() call if the tp is NULL. @@ -514,13 +490,6 @@ xfs_trans_brelse(xfs_trans_t *tp, ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); - /* - * Find the item descriptor pointing to this buffer's - * log item. It must be there. - */ - lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); - ASSERT(lidp != NULL); - trace_xfs_trans_brelse(bip); /* @@ -536,7 +505,7 @@ xfs_trans_brelse(xfs_trans_t *tp, * If the buffer is dirty within this transaction, we can't * release it until we commit. */ - if (lidp->lid_flags & XFS_LID_DIRTY) + if (bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY) return; /* @@ -553,7 +522,7 @@ xfs_trans_brelse(xfs_trans_t *tp, /* * Free up the log item descriptor tracking the released item. */ - xfs_trans_free_item(tp, lidp); + xfs_trans_del_item(&bip->bli_item); /* * Clear the hold flag in the buf log item if it is set. @@ -665,7 +634,6 @@ xfs_trans_log_buf(xfs_trans_t *tp, uint last) { xfs_buf_log_item_t *bip; - xfs_log_item_desc_t *lidp; ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); @@ -690,7 +658,7 @@ xfs_trans_log_buf(xfs_trans_t *tp, bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); - bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone; + bip->bli_item.li_cb = xfs_buf_iodone; trace_xfs_trans_log_buf(bip); @@ -707,11 +675,8 @@ xfs_trans_log_buf(xfs_trans_t *tp, bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL; } - lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); - ASSERT(lidp != NULL); - tp->t_flags |= XFS_TRANS_DIRTY; - lidp->lid_flags |= XFS_LID_DIRTY; + bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; bip->bli_flags |= XFS_BLI_LOGGED; xfs_buf_item_log(bip, first, last); } @@ -740,7 +705,6 @@ xfs_trans_binval( xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_log_item_desc_t *lidp; xfs_buf_log_item_t *bip; ASSERT(XFS_BUF_ISBUSY(bp)); @@ -748,8 +712,6 @@ xfs_trans_binval( ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); - lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); - ASSERT(lidp != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); trace_xfs_trans_binval(bip); @@ -764,7 +726,7 @@ xfs_trans_binval( ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); - ASSERT(lidp->lid_flags & XFS_LID_DIRTY); + ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY); ASSERT(tp->t_flags & XFS_TRANS_DIRTY); return; } @@ -797,7 +759,7 @@ xfs_trans_binval( bip->bli_format.blf_flags |= XFS_BLF_CANCEL; memset((char *)(bip->bli_format.blf_data_map), 0, (bip->bli_format.blf_map_size * sizeof(uint))); - lidp->lid_flags |= XFS_LID_DIRTY; + bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; tp->t_flags |= XFS_TRANS_DIRTY; } @@ -853,12 +815,9 @@ xfs_trans_stale_inode_buf( ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_STALE_INODE; - bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) - xfs_buf_iodone; + bip->bli_item.li_cb = xfs_buf_iodone; } - - /* * Mark the buffer as being one which contains newly allocated * inodes. We need to make sure that even if this buffer is diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index 27cce2a9c7e9..f783d5e9fa70 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c @@ -23,7 +23,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_trans_priv.h" #include "xfs_extfree_item.h" @@ -49,9 +48,8 @@ xfs_trans_get_efi(xfs_trans_t *tp, /* * Get a log_item_desc to point at the new item. */ - (void) xfs_trans_add_item(tp, (xfs_log_item_t*)efip); - - return (efip); + xfs_trans_add_item(tp, &efip->efi_item); + return efip; } /* @@ -65,15 +63,11 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp, xfs_fsblock_t start_block, xfs_extlen_t ext_len) { - xfs_log_item_desc_t *lidp; uint next_extent; xfs_extent_t *extp; - lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)efip); - ASSERT(lidp != NULL); - tp->t_flags |= XFS_TRANS_DIRTY; - lidp->lid_flags |= XFS_LID_DIRTY; + efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; next_extent = efip->efi_next_extent; ASSERT(next_extent < efip->efi_format.efi_nextents); @@ -106,9 +100,8 @@ xfs_trans_get_efd(xfs_trans_t *tp, /* * Get a log_item_desc to point at the new item. */ - (void) xfs_trans_add_item(tp, (xfs_log_item_t*)efdp); - - return (efdp); + xfs_trans_add_item(tp, &efdp->efd_item); + return efdp; } /* @@ -122,15 +115,11 @@ xfs_trans_log_efd_extent(xfs_trans_t *tp, xfs_fsblock_t start_block, xfs_extlen_t ext_len) { - xfs_log_item_desc_t *lidp; uint next_extent; xfs_extent_t *extp; - lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)efdp); - ASSERT(lidp != NULL); - tp->t_flags |= XFS_TRANS_DIRTY; - lidp->lid_flags |= XFS_LID_DIRTY; + efdp->efd_item.li_desc->lid_flags |= XFS_LID_DIRTY; next_extent = efdp->efd_next_extent; ASSERT(next_extent < efdp->efd_format.efd_nextents); diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 2559dfec946b..ccb34532768b 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -24,20 +24,16 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_trans_priv.h" #include "xfs_inode_item.h" +#include "xfs_trace.h" #ifdef XFS_TRANS_DEBUG STATIC void @@ -47,7 +43,6 @@ xfs_trans_inode_broot_debug( #define xfs_trans_inode_broot_debug(ip) #endif - /* * Get an inode and join it to the transaction. */ @@ -63,76 +58,94 @@ xfs_trans_iget( int error; error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp); - if (!error && tp) - xfs_trans_ijoin(tp, *ipp, lock_flags); + if (!error && tp) { + xfs_trans_ijoin(tp, *ipp); + (*ipp)->i_itemp->ili_lock_flags = lock_flags; + } return error; } /* - * Add the locked inode to the transaction. - * The inode must be locked, and it cannot be associated with any - * transaction. The caller must specify the locks already held - * on the inode. + * Add a locked inode to the transaction. + * + * The inode must be locked, and it cannot be associated with any transaction. */ void xfs_trans_ijoin( - xfs_trans_t *tp, - xfs_inode_t *ip, - uint lock_flags) + struct xfs_trans *tp, + struct xfs_inode *ip) { xfs_inode_log_item_t *iip; ASSERT(ip->i_transp == NULL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(lock_flags & XFS_ILOCK_EXCL); if (ip->i_itemp == NULL) xfs_inode_item_init(ip, ip->i_mount); iip = ip->i_itemp; - ASSERT(iip->ili_flags == 0); + ASSERT(iip->ili_lock_flags == 0); /* * Get a log_item_desc to point at the new item. */ - (void) xfs_trans_add_item(tp, (xfs_log_item_t*)(iip)); + xfs_trans_add_item(tp, &iip->ili_item); xfs_trans_inode_broot_debug(ip); /* - * If the IO lock is already held, mark that in the inode log item. - */ - if (lock_flags & XFS_IOLOCK_EXCL) { - iip->ili_flags |= XFS_ILI_IOLOCKED_EXCL; - } else if (lock_flags & XFS_IOLOCK_SHARED) { - iip->ili_flags |= XFS_ILI_IOLOCKED_SHARED; - } - - /* * Initialize i_transp so we can find it with xfs_inode_incore() * in xfs_trans_iget() above. */ ip->i_transp = tp; } - +/* + * Add a locked inode to the transaction. + * + * + * Grabs a reference to the inode which will be dropped when the transaction + * is commited. The inode will also be unlocked at that point. The inode + * must be locked, and it cannot be associated with any transaction. + */ +void +xfs_trans_ijoin_ref( + struct xfs_trans *tp, + struct xfs_inode *ip, + uint lock_flags) +{ + xfs_trans_ijoin(tp, ip); + IHOLD(ip); + ip->i_itemp->ili_lock_flags = lock_flags; +} /* - * Mark the inode as not needing to be unlocked when the inode item's - * IOP_UNLOCK() routine is called. The inode must already be locked - * and associated with the given transaction. + * Transactional inode timestamp update. Requires the inode to be locked and + * joined to the transaction supplied. Relies on the transaction subsystem to + * track dirty state and update/writeback the inode accordingly. */ -/*ARGSUSED*/ void -xfs_trans_ihold( - xfs_trans_t *tp, - xfs_inode_t *ip) +xfs_trans_ichgtime( + struct xfs_trans *tp, + struct xfs_inode *ip, + int flags) { - ASSERT(ip->i_transp == tp); - ASSERT(ip->i_itemp != NULL); + struct inode *inode = VFS_I(ip); + timespec_t tv; + + ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(ip->i_transp == tp); - ip->i_itemp->ili_flags |= XFS_ILI_HOLD; -} + tv = current_fs_time(inode->i_sb); + if ((flags & XFS_ICHGTIME_MOD) && + !timespec_equal(&inode->i_mtime, &tv)) { + inode->i_mtime = tv; + } + if ((flags & XFS_ICHGTIME_CHG) && + !timespec_equal(&inode->i_ctime, &tv)) { + inode->i_ctime = tv; + } +} /* * This is called to mark the fields indicated in fieldmask as needing @@ -149,17 +162,12 @@ xfs_trans_log_inode( xfs_inode_t *ip, uint flags) { - xfs_log_item_desc_t *lidp; - ASSERT(ip->i_transp == tp); ASSERT(ip->i_itemp != NULL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp)); - ASSERT(lidp != NULL); - tp->t_flags |= XFS_TRANS_DIRTY; - lidp->lid_flags |= XFS_LID_DIRTY; + ip->i_itemp->ili_item.li_desc->lid_flags |= XFS_LID_DIRTY; /* * Always OR in the bits from the ili_last_fields field. diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c deleted file mode 100644 index f11d37d06dcc..000000000000 --- a/fs/xfs/xfs_trans_item.c +++ /dev/null @@ -1,441 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -/* XXX: from here down needed until struct xfs_trans has its own ailp */ -#include "xfs_bit.h" -#include "xfs_buf_item.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" -#include "xfs_mount.h" - -STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *, - int, int, xfs_lsn_t); - -/* - * This is called to add the given log item to the transaction's - * list of log items. It must find a free log item descriptor - * or allocate a new one and add the item to that descriptor. - * The function returns a pointer to item descriptor used to point - * to the new item. The log item will now point to its new descriptor - * with its li_desc field. - */ -xfs_log_item_desc_t * -xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) -{ - xfs_log_item_desc_t *lidp; - xfs_log_item_chunk_t *licp; - int i=0; - - /* - * If there are no free descriptors, allocate a new chunk - * of them and put it at the front of the chunk list. - */ - if (tp->t_items_free == 0) { - licp = (xfs_log_item_chunk_t*) - kmem_alloc(sizeof(xfs_log_item_chunk_t), KM_SLEEP); - ASSERT(licp != NULL); - /* - * Initialize the chunk, and then - * claim the first slot in the newly allocated chunk. - */ - xfs_lic_init(licp); - xfs_lic_claim(licp, 0); - licp->lic_unused = 1; - xfs_lic_init_slot(licp, 0); - lidp = xfs_lic_slot(licp, 0); - - /* - * Link in the new chunk and update the free count. - */ - licp->lic_next = tp->t_items.lic_next; - tp->t_items.lic_next = licp; - tp->t_items_free = XFS_LIC_NUM_SLOTS - 1; - - /* - * Initialize the descriptor and the generic portion - * of the log item. - * - * Point the new slot at this item and return it. - * Also point the log item at its currently active - * descriptor and set the item's mount pointer. - */ - lidp->lid_item = lip; - lidp->lid_flags = 0; - lidp->lid_size = 0; - lip->li_desc = lidp; - lip->li_mountp = tp->t_mountp; - lip->li_ailp = tp->t_mountp->m_ail; - return lidp; - } - - /* - * Find the free descriptor. It is somewhere in the chunklist - * of descriptors. - */ - licp = &tp->t_items; - while (licp != NULL) { - if (xfs_lic_vacancy(licp)) { - if (licp->lic_unused <= XFS_LIC_MAX_SLOT) { - i = licp->lic_unused; - ASSERT(xfs_lic_isfree(licp, i)); - break; - } - for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) { - if (xfs_lic_isfree(licp, i)) - break; - } - ASSERT(i <= XFS_LIC_MAX_SLOT); - break; - } - licp = licp->lic_next; - } - ASSERT(licp != NULL); - /* - * If we find a free descriptor, claim it, - * initialize it, and return it. - */ - xfs_lic_claim(licp, i); - if (licp->lic_unused <= i) { - licp->lic_unused = i + 1; - xfs_lic_init_slot(licp, i); - } - lidp = xfs_lic_slot(licp, i); - tp->t_items_free--; - lidp->lid_item = lip; - lidp->lid_flags = 0; - lidp->lid_size = 0; - lip->li_desc = lidp; - lip->li_mountp = tp->t_mountp; - lip->li_ailp = tp->t_mountp->m_ail; - return lidp; -} - -/* - * Free the given descriptor. - * - * This requires setting the bit in the chunk's free mask corresponding - * to the given slot. - */ -void -xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) -{ - uint slot; - xfs_log_item_chunk_t *licp; - xfs_log_item_chunk_t **licpp; - - slot = xfs_lic_desc_to_slot(lidp); - licp = xfs_lic_desc_to_chunk(lidp); - xfs_lic_relse(licp, slot); - lidp->lid_item->li_desc = NULL; - tp->t_items_free++; - - /* - * If there are no more used items in the chunk and this is not - * the chunk embedded in the transaction structure, then free - * the chunk. First pull it from the chunk list and then - * free it back to the heap. We didn't bother with a doubly - * linked list here because the lists should be very short - * and this is not a performance path. It's better to save - * the memory of the extra pointer. - * - * Also decrement the transaction structure's count of free items - * by the number in a chunk since we are freeing an empty chunk. - */ - if (xfs_lic_are_all_free(licp) && (licp != &(tp->t_items))) { - licpp = &(tp->t_items.lic_next); - while (*licpp != licp) { - ASSERT(*licpp != NULL); - licpp = &((*licpp)->lic_next); - } - *licpp = licp->lic_next; - kmem_free(licp); - tp->t_items_free -= XFS_LIC_NUM_SLOTS; - } -} - -/* - * This is called to find the descriptor corresponding to the given - * log item. It returns a pointer to the descriptor. - * The log item MUST have a corresponding descriptor in the given - * transaction. This routine does not return NULL, it panics. - * - * The descriptor pointer is kept in the log item's li_desc field. - * Just return it. - */ -/*ARGSUSED*/ -xfs_log_item_desc_t * -xfs_trans_find_item(xfs_trans_t *tp, xfs_log_item_t *lip) -{ - ASSERT(lip->li_desc != NULL); - - return lip->li_desc; -} - - -/* - * Return a pointer to the first descriptor in the chunk list. - * This does not return NULL if there are none, it panics. - * - * The first descriptor must be in either the first or second chunk. - * This is because the only chunk allowed to be empty is the first. - * All others are freed when they become empty. - * - * At some point this and xfs_trans_next_item() should be optimized - * to quickly look at the mask to determine if there is anything to - * look at. - */ -xfs_log_item_desc_t * -xfs_trans_first_item(xfs_trans_t *tp) -{ - xfs_log_item_chunk_t *licp; - int i; - - licp = &tp->t_items; - /* - * If it's not in the first chunk, skip to the second. - */ - if (xfs_lic_are_all_free(licp)) { - licp = licp->lic_next; - } - - /* - * Return the first non-free descriptor in the chunk. - */ - ASSERT(!xfs_lic_are_all_free(licp)); - for (i = 0; i < licp->lic_unused; i++) { - if (xfs_lic_isfree(licp, i)) { - continue; - } - - return xfs_lic_slot(licp, i); - } - cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item"); - return NULL; -} - - -/* - * Given a descriptor, return the next descriptor in the chunk list. - * This returns NULL if there are no more used descriptors in the list. - * - * We do this by first locating the chunk in which the descriptor resides, - * and then scanning forward in the chunk and the list for the next - * used descriptor. - */ -/*ARGSUSED*/ -xfs_log_item_desc_t * -xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) -{ - xfs_log_item_chunk_t *licp; - int i; - - licp = xfs_lic_desc_to_chunk(lidp); - - /* - * First search the rest of the chunk. The for loop keeps us - * from referencing things beyond the end of the chunk. - */ - for (i = (int)xfs_lic_desc_to_slot(lidp) + 1; i < licp->lic_unused; i++) { - if (xfs_lic_isfree(licp, i)) { - continue; - } - - return xfs_lic_slot(licp, i); - } - - /* - * Now search the next chunk. It must be there, because the - * next chunk would have been freed if it were empty. - * If there is no next chunk, return NULL. - */ - if (licp->lic_next == NULL) { - return NULL; - } - - licp = licp->lic_next; - ASSERT(!xfs_lic_are_all_free(licp)); - for (i = 0; i < licp->lic_unused; i++) { - if (xfs_lic_isfree(licp, i)) { - continue; - } - - return xfs_lic_slot(licp, i); - } - ASSERT(0); - /* NOTREACHED */ - return NULL; /* keep gcc quite */ -} - -/* - * This is called to unlock all of the items of a transaction and to free - * all the descriptors of that transaction. - * - * It walks the list of descriptors and unlocks each item. It frees - * each chunk except that embedded in the transaction as it goes along. - */ -void -xfs_trans_free_items( - xfs_trans_t *tp, - xfs_lsn_t commit_lsn, - int flags) -{ - xfs_log_item_chunk_t *licp; - xfs_log_item_chunk_t *next_licp; - int abort; - - abort = flags & XFS_TRANS_ABORT; - licp = &tp->t_items; - /* - * Special case the embedded chunk so we don't free it below. - */ - if (!xfs_lic_are_all_free(licp)) { - (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn); - xfs_lic_all_free(licp); - licp->lic_unused = 0; - } - licp = licp->lic_next; - - /* - * Unlock each item in each chunk and free the chunks. - */ - while (licp != NULL) { - ASSERT(!xfs_lic_are_all_free(licp)); - (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn); - next_licp = licp->lic_next; - kmem_free(licp); - licp = next_licp; - } - - /* - * Reset the transaction structure's free item count. - */ - tp->t_items_free = XFS_LIC_NUM_SLOTS; - tp->t_items.lic_next = NULL; -} - - - -/* - * This is called to unlock the items associated with a transaction. - * Items which were not logged should be freed. - * Those which were logged must still be tracked so they can be unpinned - * when the transaction commits. - */ -void -xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn) -{ - xfs_log_item_chunk_t *licp; - xfs_log_item_chunk_t *next_licp; - xfs_log_item_chunk_t **licpp; - int freed; - - freed = 0; - licp = &tp->t_items; - - /* - * Special case the embedded chunk so we don't free. - */ - if (!xfs_lic_are_all_free(licp)) { - freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); - } - licpp = &(tp->t_items.lic_next); - licp = licp->lic_next; - - /* - * Unlock each item in each chunk, free non-dirty descriptors, - * and free empty chunks. - */ - while (licp != NULL) { - ASSERT(!xfs_lic_are_all_free(licp)); - freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); - next_licp = licp->lic_next; - if (xfs_lic_are_all_free(licp)) { - *licpp = next_licp; - kmem_free(licp); - freed -= XFS_LIC_NUM_SLOTS; - } else { - licpp = &(licp->lic_next); - } - ASSERT(*licpp == next_licp); - licp = next_licp; - } - - /* - * Fix the free descriptor count in the transaction. - */ - tp->t_items_free += freed; -} - -/* - * Unlock each item pointed to by a descriptor in the given chunk. - * Stamp the commit lsn into each item if necessary. - * Free descriptors pointing to items which are not dirty if freeing_chunk - * is zero. If freeing_chunk is non-zero, then we need to unlock all - * items in the chunk. - * - * Return the number of descriptors freed. - */ -STATIC int -xfs_trans_unlock_chunk( - xfs_log_item_chunk_t *licp, - int freeing_chunk, - int abort, - xfs_lsn_t commit_lsn) -{ - xfs_log_item_desc_t *lidp; - xfs_log_item_t *lip; - int i; - int freed; - - freed = 0; - lidp = licp->lic_descs; - for (i = 0; i < licp->lic_unused; i++, lidp++) { - if (xfs_lic_isfree(licp, i)) { - continue; - } - lip = lidp->lid_item; - lip->li_desc = NULL; - - if (commit_lsn != NULLCOMMITLSN) - IOP_COMMITTING(lip, commit_lsn); - if (abort) - lip->li_flags |= XFS_LI_ABORTED; - IOP_UNLOCK(lip); - - /* - * Free the descriptor if the item is not dirty - * within this transaction and the caller is not - * going to just free the entire thing regardless. - */ - if (!(freeing_chunk) && - (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) { - xfs_lic_relse(licp, i); - freed++; - } - } - - return freed; -} diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index c6e4f2c8de6e..62da86c90de5 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -23,23 +23,10 @@ struct xfs_log_item_desc; struct xfs_mount; struct xfs_trans; -/* - * From xfs_trans_item.c - */ -struct xfs_log_item_desc *xfs_trans_add_item(struct xfs_trans *, - struct xfs_log_item *); -void xfs_trans_free_item(struct xfs_trans *, - struct xfs_log_item_desc *); -struct xfs_log_item_desc *xfs_trans_find_item(struct xfs_trans *, - struct xfs_log_item *); -struct xfs_log_item_desc *xfs_trans_first_item(struct xfs_trans *); -struct xfs_log_item_desc *xfs_trans_next_item(struct xfs_trans *, - struct xfs_log_item_desc *); - -void xfs_trans_unlock_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn); +void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); +void xfs_trans_del_item(struct xfs_log_item *); void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, int flags); - void xfs_trans_item_committed(struct xfs_log_item *lip, xfs_lsn_t commit_lsn, int aborted); void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 320775295e32..26d1867d8156 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */ typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ -typedef __uint16_t xfs_prid_t; /* prid_t truncated to 16bits in XFS */ - typedef __uint32_t xlog_tid_t; /* transaction ID type */ /* diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 4d88616bde91..8b32d1a4c5a1 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -25,18 +25,14 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_error.h" #include "xfs_quota.h" -#include "xfs_rw.h" #include "xfs_itable.h" #include "xfs_utils.h" @@ -60,7 +56,6 @@ xfs_dir_ialloc( mode_t mode, xfs_nlink_t nlink, xfs_dev_t rdev, - cred_t *credp, prid_t prid, /* project id */ int okalloc, /* ok to allocate new space */ xfs_inode_t **ipp, /* pointer to inode; it will be @@ -97,7 +92,7 @@ xfs_dir_ialloc( * transaction commit so that no other process can steal * the inode(s) that we've just allocated. */ - code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, okalloc, + code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, &ialloc_context, &call_again, &ip); /* @@ -201,7 +196,7 @@ xfs_dir_ialloc( * other allocations in this allocation group, * this call should always succeed. */ - code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, + code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, &ialloc_context, &call_again, &ip); /* @@ -239,7 +234,7 @@ xfs_droplink( { int error; - xfs_ichgtime(ip, XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); ASSERT (ip->i_d.di_nlink > 0); ip->i_d.di_nlink--; @@ -303,7 +298,7 @@ xfs_bumplink( { if (ip->i_d.di_nlink >= XFS_MAXLINK) return XFS_ERROR(EMLINK); - xfs_ichgtime(ip, XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); ASSERT(ip->i_d.di_nlink > 0); ip->i_d.di_nlink++; @@ -324,86 +319,3 @@ xfs_bumplink( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); return 0; } - -/* - * Try to truncate the given file to 0 length. Currently called - * only out of xfs_remove when it has to truncate a file to free - * up space for the remove to proceed. - */ -int -xfs_truncate_file( - xfs_mount_t *mp, - xfs_inode_t *ip) -{ - xfs_trans_t *tp; - int error; - -#ifdef QUOTADEBUG - /* - * This is called to truncate the quotainodes too. - */ - if (XFS_IS_UQUOTA_ON(mp)) { - if (ip->i_ino != mp->m_sb.sb_uquotino) - ASSERT(ip->i_udquot); - } - if (XFS_IS_OQUOTA_ON(mp)) { - if (ip->i_ino != mp->m_sb.sb_gquotino) - ASSERT(ip->i_gdquot); - } -#endif - /* - * Make the call to xfs_itruncate_start before starting the - * transaction, because we cannot make the call while we're - * in a transaction. - */ - xfs_ilock(ip, XFS_IOLOCK_EXCL); - error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, (xfs_fsize_t)0); - if (error) { - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return error; - } - - tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); - if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_ITRUNCATE_LOG_COUNT))) { - xfs_trans_cancel(tp, 0); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return error; - } - - /* - * Follow the normal truncate locking protocol. Since we - * hold the inode in the transaction, we know that its number - * of references will stay constant. - */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_trans_ihold(tp, ip); - /* - * Signal a sync xaction. The only case where that isn't - * the case is if we're truncating an already unlinked file - * on a wsync fs. In that case, we know the blocks can't - * reappear in the file because the links to file are - * permanently toast. Currently, we're always going to - * want a sync transaction because this code is being - * called from places where nlink is guaranteed to be 1 - * but I'm leaving the tests in to protect against future - * changes -- rcc. - */ - error = xfs_itruncate_finish(&tp, ip, (xfs_fsize_t)0, - XFS_DATA_FORK, - ((ip->i_d.di_nlink != 0 || - !(mp->m_flags & XFS_MOUNT_WSYNC)) - ? 1 : 0)); - if (error) { - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | - XFS_TRANS_ABORT); - } else { - xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - } - xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - - return error; -} diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index ef321225d269..456fca314933 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h @@ -18,10 +18,8 @@ #ifndef __XFS_UTILS_H__ #define __XFS_UTILS_H__ -extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, - xfs_dev_t, cred_t *, prid_t, int, - xfs_inode_t **, int *); + xfs_dev_t, prid_t, int, xfs_inode_t **, int *); extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *); extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *); extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c1646838898f..8e4a63c4151a 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -26,19 +26,14 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_itable.h" -#include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_bmap.h" @@ -73,7 +68,7 @@ xfs_setattr( struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; int need_iolock = 1; - xfs_itrace_entry(ip); + trace_xfs_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); @@ -119,7 +114,7 @@ xfs_setattr( */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); - code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid, + code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), qflags, &udqp, &gdqp); if (code) return code; @@ -143,16 +138,6 @@ xfs_setattr( goto error_return; } } else { - if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && - !(flags & XFS_ATTR_DMI)) { - int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; - code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip, - iattr->ia_size, 0, dmflags, NULL); - if (code) { - lock_flags = 0; - goto error_return; - } - } if (need_iolock) lock_flags |= XFS_IOLOCK_EXCL; } @@ -199,8 +184,11 @@ xfs_setattr( ip->i_size == 0 && ip->i_d.di_nextents == 0) { xfs_iunlock(ip, XFS_ILOCK_EXCL); lock_flags &= ~XFS_ILOCK_EXCL; - if (mask & ATTR_CTIME) - xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + if (mask & ATTR_CTIME) { + inode->i_mtime = inode->i_ctime = + current_fs_time(inode->i_sb); + xfs_mark_inode_dirty_sync(ip); + } code = 0; goto error_return; } @@ -236,8 +224,11 @@ xfs_setattr( * transaction to modify the i_size. */ code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); + if (code) + goto error_return; } xfs_iunlock(ip, XFS_ILOCK_EXCL); + lock_flags &= ~XFS_ILOCK_EXCL; /* * We are going to log the inode size change in this @@ -251,40 +242,38 @@ xfs_setattr( * really care about here and prevents waiting for other data * not within the range we care about here. */ - if (!code && - ip->i_size != ip->i_d.di_size && + if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { code = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, XBF_ASYNC, FI_NONE); + if (code) + goto error_return; } /* wait for all I/O to complete */ xfs_ioend_wait(ip); - if (!code) - code = xfs_itruncate_data(ip, iattr->ia_size); - if (code) { - ASSERT(tp == NULL); - lock_flags &= ~XFS_ILOCK_EXCL; - ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock); + code = -block_truncate_page(inode->i_mapping, iattr->ia_size, + xfs_get_blocks); + if (code) goto error_return; - } + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); - if ((code = xfs_trans_reserve(tp, 0, - XFS_ITRUNCATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_ITRUNCATE_LOG_COUNT))) { - xfs_trans_cancel(tp, 0); - if (need_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return code; - } + code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_ITRUNCATE_LOG_COUNT); + if (code) + goto error_return; + + truncate_setsize(inode, iattr->ia_size); + commit_flags = XFS_TRANS_RELEASE_LOG_RES; + lock_flags |= XFS_ILOCK_EXCL; + xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, lock_flags); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); /* * Only change the c/mtime if we are changing the size @@ -334,8 +323,7 @@ xfs_setattr( xfs_iflags_set(ip, XFS_ITRUNCATED); } } else if (tp) { - xfs_trans_ijoin(tp, ip, lock_flags); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); } /* @@ -470,17 +458,10 @@ xfs_setattr( return XFS_ERROR(code); } - if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && - !(flags & XFS_ATTR_DMI)) { - (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL, - NULL, DM_RIGHT_NULL, NULL, NULL, - 0, 0, AT_DELAY_FLAG(flags)); - } return 0; abort_return: commit_flags |= XFS_TRANS_ABORT; - /* FALLTHROUGH */ error_return: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); @@ -516,7 +497,7 @@ xfs_readlink_bmap( int error = 0; error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0, - mval, &nmaps, NULL, NULL); + mval, &nmaps, NULL); if (error) goto out; @@ -557,7 +538,7 @@ xfs_readlink( int pathlen; int error = 0; - xfs_itrace_entry(ip); + trace_xfs_readlink(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -613,14 +594,14 @@ xfs_free_eofblocks( */ end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); - map_len = last_fsb - end_fsb; - if (map_len <= 0) + if (last_fsb <= end_fsb) return 0; + map_len = last_fsb - end_fsb; nimaps = 1; xfs_ilock(ip, XFS_ILOCK_SHARED); error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, - NULL, 0, &imap, &nimaps, NULL, NULL); + NULL, 0, &imap, &nimaps, NULL); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!error && (nimaps != 0) && @@ -675,10 +656,7 @@ xfs_free_eofblocks( } xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, - XFS_IOLOCK_EXCL | - XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); error = xfs_itruncate_finish(&tp, ip, ip->i_size, @@ -750,8 +728,7 @@ xfs_inactive_symlink_rmt( xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); size = (int)ip->i_d.di_size; ip->i_d.di_size = 0; - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); /* * Find the block(s) so we can inval and unmap them. @@ -761,7 +738,7 @@ xfs_inactive_symlink_rmt( nmaps = ARRAY_SIZE(mval); if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, - &free_list, NULL))) + &free_list))) goto error0; /* * Invalidate the block(s). @@ -776,7 +753,7 @@ xfs_inactive_symlink_rmt( * Unmap the dead block(s) to the free_list. */ if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, - &first_block, &free_list, NULL, &done))) + &first_block, &free_list, &done))) goto error1; ASSERT(done); /* @@ -795,8 +772,7 @@ xfs_inactive_symlink_rmt( * Mark it dirty so it will be logged and moved forward in the log as * part of every commit. */ - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); /* * Get a new, empty transaction to return to our caller. @@ -929,8 +905,7 @@ xfs_inactive_attrs( goto error_cancel; xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); xfs_idestroy_fork(ip, XFS_ATTR_FORK); ASSERT(ip->i_d.di_anextents == 0); @@ -1035,8 +1010,6 @@ xfs_inactive( int error; int truncate; - xfs_itrace_entry(ip); - /* * If the inode is already free, then there can be nothing * to clean up here. @@ -1060,9 +1033,6 @@ xfs_inactive( mp = ip->i_mount; - if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) - XFS_SEND_DESTROY(mp, ip, DM_RIGHT_NULL); - error = 0; /* If this is a read-only mount, don't do this (would generate I/O) */ @@ -1120,8 +1090,7 @@ xfs_inactive( } xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); /* * normally, we have to run xfs_itruncate_finish sync. @@ -1154,8 +1123,7 @@ xfs_inactive( return VN_INACTIVE_CACHE; } - xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); } else { error = xfs_trans_reserve(tp, 0, XFS_IFREE_LOG_RES(mp), @@ -1168,8 +1136,7 @@ xfs_inactive( } xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); } /* @@ -1257,7 +1224,7 @@ xfs_lookup( int error; uint lock_mode; - xfs_itrace_entry(dp); + trace_xfs_lookup(dp, name); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return XFS_ERROR(EIO); @@ -1289,8 +1256,7 @@ xfs_create( struct xfs_name *name, mode_t mode, xfs_dev_t rdev, - xfs_inode_t **ipp, - cred_t *credp) + xfs_inode_t **ipp) { int is_dir = S_ISDIR(mode); struct xfs_mount *mp = dp->i_mount; @@ -1302,32 +1268,22 @@ xfs_create( boolean_t unlock_dp_on_error = B_FALSE; uint cancel_flags; int committed; - xfs_prid_t prid; + prid_t prid; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; uint resblks; uint log_res; uint log_count; - xfs_itrace_entry(dp); + trace_xfs_create(dp, name); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { - error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, - dp, DM_RIGHT_NULL, NULL, - DM_RIGHT_NULL, name->name, NULL, - mode, 0, 0); - - if (error) - return error; - } - if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) - prid = dp->i_d.di_projid; + prid = xfs_get_projid(dp); else - prid = dfltprid; + prid = XFS_PROJID_DEFAULT; /* * Make sure that we have allocated dquot(s) on disk. @@ -1406,7 +1362,7 @@ xfs_create( * entry pointing to them, but a directory also the "." entry * pointing to itself. */ - error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, credp, + error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, resblks > 0, &ip, &committed); if (error) { if (error == ENOSPC) @@ -1427,8 +1383,7 @@ xfs_create( * the transaction cancel unlocking dp so don't do it explicitly in the * error path. */ - IHOLD(dp); - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL); unlock_dp_on_error = B_FALSE; error = xfs_dir_createname(tp, dp, name, ip->i_ino, @@ -1438,7 +1393,7 @@ xfs_create( ASSERT(error != ENOSPC); goto out_trans_abort; } - xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); if (is_dir) { @@ -1487,16 +1442,7 @@ xfs_create( xfs_qm_dqrele(gdqp); *ipp = ip; - - /* Fallthrough to std_return with error = 0 */ - std_return: - if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { - XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, dp, DM_RIGHT_NULL, - ip, DM_RIGHT_NULL, name->name, NULL, mode, - error, 0); - } - - return error; + return 0; out_bmap_cancel: xfs_bmap_cancel(&free_list); @@ -1510,8 +1456,8 @@ xfs_create( if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); - - goto std_return; + std_return: + return error; out_abort_rele: /* @@ -1726,20 +1672,11 @@ xfs_remove( uint resblks; uint log_count; - xfs_itrace_entry(dp); - xfs_itrace_entry(ip); + trace_xfs_remove(dp, name); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { - error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dp, DM_RIGHT_NULL, - NULL, DM_RIGHT_NULL, name->name, NULL, - ip->i_d.di_mode, 0, 0); - if (error) - return error; - } - error = xfs_qm_dqattach(dp, 0); if (error) goto std_return; @@ -1782,15 +1719,8 @@ xfs_remove( xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); - /* - * At this point, we've gotten both the directory and the entry - * inodes locked. - */ - IHOLD(ip); - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); - - IHOLD(dp); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); /* * If we're removing a directory perform some additional validation. @@ -1814,7 +1744,7 @@ xfs_remove( ASSERT(error != ENOENT); goto out_bmap_cancel; } - xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (is_dir) { /* @@ -1877,21 +1807,15 @@ xfs_remove( if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) xfs_filestream_deassociate(ip); - std_return: - if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { - XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL, - NULL, DM_RIGHT_NULL, name->name, NULL, - ip->i_d.di_mode, error, 0); - } - - return error; + return 0; out_bmap_cancel: xfs_bmap_cancel(&free_list); cancel_flags |= XFS_TRANS_ABORT; out_trans_cancel: xfs_trans_cancel(tp, cancel_flags); - goto std_return; + std_return: + return error; } int @@ -1909,25 +1833,13 @@ xfs_link( int committed; int resblks; - xfs_itrace_entry(tdp); - xfs_itrace_entry(sip); + trace_xfs_link(tdp, target_name); ASSERT(!S_ISDIR(sip->i_d.di_mode)); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) { - error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, - tdp, DM_RIGHT_NULL, - sip, DM_RIGHT_NULL, - target_name->name, NULL, 0, 0, 0); - if (error) - return error; - } - - /* Return through std_return after this point. */ - error = xfs_qm_dqattach(sip, 0); if (error) goto std_return; @@ -1953,15 +1865,8 @@ xfs_link( xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); - /* - * Increment vnode ref counts since xfs_trans_commit & - * xfs_trans_cancel will both unlock the inodes and - * decrement the associated ref counts. - */ - IHOLD(sip); - IHOLD(tdp); - xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, sip, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, tdp, XFS_ILOCK_EXCL); /* * If the source has too many links, we can't make any more to it. @@ -1977,7 +1882,7 @@ xfs_link( * the tree quota mechanism could be circumvented. */ if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && - (tdp->i_d.di_projid != sip->i_d.di_projid))) { + (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { error = XFS_ERROR(EXDEV); goto error_return; } @@ -1992,7 +1897,7 @@ xfs_link( &first_block, &free_list, resblks); if (error) goto abort_return; - xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); error = xfs_bumplink(tp, sip); @@ -2014,27 +1919,14 @@ xfs_link( goto abort_return; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - if (error) - goto std_return; - - /* Fall through to std_return with error = 0. */ -std_return: - if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) { - (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, - tdp, DM_RIGHT_NULL, - sip, DM_RIGHT_NULL, - target_name->name, NULL, 0, error, 0); - } - return error; + return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); abort_return: cancel_flags |= XFS_TRANS_ABORT; - /* FALLTHROUGH */ - error_return: xfs_trans_cancel(tp, cancel_flags); - goto std_return; + std_return: + return error; } int @@ -2043,8 +1935,7 @@ xfs_symlink( struct xfs_name *link_name, const char *target_path, mode_t mode, - xfs_inode_t **ipp, - cred_t *credp) + xfs_inode_t **ipp) { xfs_mount_t *mp = dp->i_mount; xfs_trans_t *tp; @@ -2065,7 +1956,7 @@ xfs_symlink( int byte_cnt; int n; xfs_buf_t *bp; - xfs_prid_t prid; + prid_t prid; struct xfs_dquot *udqp, *gdqp; uint resblks; @@ -2074,7 +1965,7 @@ xfs_symlink( ip = NULL; tp = NULL; - xfs_itrace_entry(dp); + trace_xfs_symlink(dp, link_name); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -2086,22 +1977,11 @@ xfs_symlink( if (pathlen >= MAXPATHLEN) /* total string too long */ return XFS_ERROR(ENAMETOOLONG); - if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { - error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp, - DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, - link_name->name, - (unsigned char *)target_path, 0, 0, 0); - if (error) - return error; - } - - /* Return through std_return after this point. */ - udqp = gdqp = NULL; if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) - prid = dp->i_d.di_projid; + prid = xfs_get_projid(dp); else - prid = (xfs_prid_t)dfltprid; + prid = XFS_PROJID_DEFAULT; /* * Make sure that we have allocated dquot(s) on disk. @@ -2167,8 +2047,8 @@ xfs_symlink( /* * Allocate an inode for the symlink. */ - error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), - 1, 0, credp, prid, resblks > 0, &ip, NULL); + error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, + prid, resblks > 0, &ip, NULL); if (error) { if (error == ENOSPC) goto error_return; @@ -2180,8 +2060,7 @@ xfs_symlink( * transaction cancel unlocking dp so don't do it explicitly in the * error path. */ - IHOLD(dp); - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL); unlock_dp_on_error = B_FALSE; /* @@ -2215,7 +2094,7 @@ xfs_symlink( error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &first_block, resblks, mval, &nmaps, - &free_list, NULL); + &free_list); if (error) { goto error1; } @@ -2251,7 +2130,7 @@ xfs_symlink( &first_block, &free_list, resblks); if (error) goto error1; - xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); /* @@ -2278,21 +2157,8 @@ xfs_symlink( xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); - /* Fall through to std_return with error = 0 or errno from - * xfs_trans_commit */ -std_return: - if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) { - (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, - dp, DM_RIGHT_NULL, - error ? NULL : ip, - DM_RIGHT_NULL, link_name->name, - (unsigned char *)target_path, - 0, error, 0); - } - - if (!error) - *ipp = ip; - return error; + *ipp = ip; + return 0; error2: IRELE(ip); @@ -2306,8 +2172,8 @@ std_return: if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); - - goto std_return; + std_return: + return error; } int @@ -2333,13 +2199,12 @@ xfs_set_dmattrs( return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); ip->i_d.di_dmevmask = evmask; ip->i_d.di_dmstate = state; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - IHOLD(ip); error = xfs_trans_commit(tp, 0); return error; @@ -2390,7 +2255,7 @@ xfs_alloc_file_space( int committed; int error; - xfs_itrace_entry(ip); + trace_xfs_alloc_file_space(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -2408,29 +2273,13 @@ xfs_alloc_file_space( count = len; imapp = &imaps[0]; nimaps = 1; - bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); + bmapi_flag = XFS_BMAPI_WRITE | alloc_type; startoffset_fsb = XFS_B_TO_FSBT(mp, offset); allocatesize_fsb = XFS_B_TO_FSB(mp, count); - /* Generate a DMAPI event if needed. */ - if (alloc_type != 0 && offset < ip->i_size && - (attr_flags & XFS_ATTR_DMI) == 0 && - DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { - xfs_off_t end_dmi_offset; - - end_dmi_offset = offset+len; - if (end_dmi_offset > ip->i_size) - end_dmi_offset = ip->i_size; - error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, offset, - end_dmi_offset - offset, 0, NULL); - if (error) - return error; - } - /* * Allocate file space until done or until there is an error */ -retry: while (allocatesize_fsb && !error) { xfs_fileoff_t s, e; @@ -2451,15 +2300,22 @@ retry: e = allocatesize_fsb; } + /* + * The transaction reservation is limited to a 32-bit block + * count, hence we need to limit the number of blocks we are + * trying to reserve to avoid an overflow. We can't allocate + * more than @nimaps extents, and an extent is limited on disk + * to MAXEXTLEN (21 bits), so use that to enforce the limit. + */ + resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); if (unlikely(rt)) { - resrtextents = qblocks = (uint)(e - s); + resrtextents = qblocks = resblks; resrtextents /= mp->m_sb.sb_rextsize; resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); quota_flag = XFS_QMOPT_RES_RTBLKS; } else { resrtextents = 0; - resblks = qblocks = \ - XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); + resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); quota_flag = XFS_QMOPT_RES_REGBLKS; } @@ -2488,8 +2344,7 @@ retry: if (error) goto error1; - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); /* * Issue the xfs_bmapi() call to allocate the blocks @@ -2498,7 +2353,7 @@ retry: error = xfs_bmapi(tp, ip, startoffset_fsb, allocatesize_fsb, bmapi_flag, &firstfsb, 0, imapp, &nimaps, - &free_list, NULL); + &free_list); if (error) { goto error0; } @@ -2527,17 +2382,6 @@ retry: startoffset_fsb += allocated_fsb; allocatesize_fsb -= allocated_fsb; } -dmapi_enospc_check: - if (error == ENOSPC && (attr_flags & XFS_ATTR_DMI) == 0 && - DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { - error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, - ip, DM_RIGHT_NULL, - ip, DM_RIGHT_NULL, - NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ - if (error == 0) - goto retry; /* Maybe DMAPI app. has made space */ - /* else fall through with error from XFS_SEND_DATA */ - } return error; @@ -2548,7 +2392,7 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_iunlock(ip, XFS_ILOCK_EXCL); - goto dmapi_enospc_check; + return error; } /* @@ -2588,9 +2432,9 @@ xfs_zero_remaining_bytes( if (endoff > ip->i_size) endoff = ip->i_size; - bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, - XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp); + bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? + mp->m_rtdev_targp : mp->m_ddev_targp, + mp->m_sb.sb_blocksize, XBF_DONT_BLOCK); if (!bp) return XFS_ERROR(ENOMEM); @@ -2598,7 +2442,7 @@ xfs_zero_remaining_bytes( offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, - NULL, 0, &imap, &nimap, NULL, NULL); + NULL, 0, &imap, &nimap, NULL); if (error || nimap < 1) break; ASSERT(imap.br_blockcount >= 1); @@ -2616,7 +2460,7 @@ xfs_zero_remaining_bytes( XFS_BUF_READ(bp); XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); xfsbdstrat(mp, bp); - error = xfs_iowait(bp); + error = xfs_buf_iowait(bp); if (error) { xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", mp, bp, XFS_BUF_ADDR(bp)); @@ -2629,7 +2473,7 @@ xfs_zero_remaining_bytes( XFS_BUF_UNREAD(bp); XFS_BUF_WRITE(bp); xfsbdstrat(mp, bp); - error = xfs_iowait(bp); + error = xfs_buf_iowait(bp); if (error) { xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", mp, bp, XFS_BUF_ADDR(bp)); @@ -2661,7 +2505,6 @@ xfs_free_file_space( { int committed; int done; - xfs_off_t end_dmi_offset; xfs_fileoff_t endoffset_fsb; int error; xfs_fsblock_t firstfsb; @@ -2680,7 +2523,7 @@ xfs_free_file_space( mp = ip->i_mount; - xfs_itrace_entry(ip); + trace_xfs_free_file_space(ip); error = xfs_qm_dqattach(ip, 0); if (error) @@ -2691,19 +2534,7 @@ xfs_free_file_space( return error; rt = XFS_IS_REALTIME_INODE(ip); startoffset_fsb = XFS_B_TO_FSB(mp, offset); - end_dmi_offset = offset + len; - endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); - - if (offset < ip->i_size && (attr_flags & XFS_ATTR_DMI) == 0 && - DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { - if (end_dmi_offset > ip->i_size) - end_dmi_offset = ip->i_size; - error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, - offset, end_dmi_offset - offset, - AT_DELAY_FLAG(attr_flags), NULL); - if (error) - return error; - } + endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); if (attr_flags & XFS_ATTR_NOLOCK) need_iolock = 0; @@ -2731,7 +2562,7 @@ xfs_free_file_space( if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { nimap = 1; error = xfs_bmapi(NULL, ip, startoffset_fsb, - 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); + 1, 0, NULL, 0, &imap, &nimap, NULL); if (error) goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); @@ -2746,7 +2577,7 @@ xfs_free_file_space( } nimap = 1; error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, - 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); + 1, 0, NULL, 0, &imap, &nimap, NULL); if (error) goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); @@ -2814,8 +2645,7 @@ xfs_free_file_space( if (error) goto error1; - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); /* * issue the bunmapi() call to free the blocks @@ -2823,7 +2653,7 @@ xfs_free_file_space( xfs_bmap_init(&free_list, &firstfsb); error = xfs_bunmapi(tp, ip, startoffset_fsb, endoffset_fsb - startoffset_fsb, - 0, 2, &firstfsb, &free_list, NULL, &done); + 0, 2, &firstfsb, &free_list, &done); if (error) { goto error0; } @@ -2882,8 +2712,7 @@ xfs_change_file_space( xfs_off_t llen; xfs_trans_t *tp; struct iattr iattr; - - xfs_itrace_entry(ip); + int prealloc_type; if (!S_ISREG(ip->i_d.di_mode)) return XFS_ERROR(EINVAL); @@ -2926,12 +2755,17 @@ xfs_change_file_space( * size to be changed. */ setprealloc = clrprealloc = 0; + prealloc_type = XFS_BMAPI_PREALLOC; switch (cmd) { + case XFS_IOC_ZERO_RANGE: + prealloc_type |= XFS_BMAPI_CONVERT; + xfs_tosspages(ip, startoffset, startoffset + bf->l_len, 0); + /* FALLTHRU */ case XFS_IOC_RESVSP: case XFS_IOC_RESVSP64: error = xfs_alloc_file_space(ip, startoffset, bf->l_len, - 1, attr_flags); + prealloc_type, attr_flags); if (error) return error; setprealloc = 1; @@ -2985,8 +2819,7 @@ xfs_change_file_space( xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); if ((attr_flags & XFS_ATTR_DMI) == 0) { ip->i_d.di_mode &= ~S_ISUID; @@ -3001,7 +2834,7 @@ xfs_change_file_space( if (ip->i_d.di_mode & S_IXGRP) ip->i_d.di_mode &= ~S_ISGID; - xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } if (setprealloc) ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index d8dfa8d0dadd..f6702927eee4 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -2,7 +2,6 @@ #define _XFS_VNODEOPS_H 1 struct attrlist_cursor_kern; -struct cred; struct file; struct iattr; struct inode; @@ -26,7 +25,7 @@ int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, - xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp); + xfs_dev_t rdev, struct xfs_inode **ipp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode *ip); int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, @@ -34,8 +33,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, xfs_off_t *offset, filldir_t filldir); int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, - const char *target_path, mode_t mode, struct xfs_inode **ipp, - cred_t *credp); + const char *target_path, mode_t mode, struct xfs_inode **ipp); int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); int xfs_change_file_space(struct xfs_inode *ip, int cmd, xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); |