diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-11 14:16:53 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-11 14:16:53 -0700 |
commit | 3123bca71993c2346a458875488863772c1d5dc4 (patch) | |
tree | a1e082130a3d7a4ba1faaea60e699939cf821ab6 | |
parent | 582076ab16779208e7eb6ce712a9c0a6cc5bafe4 (diff) | |
parent | e4fbaee29272533a242f117d18712e2974520d2c (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull second set of btrfs updates from Chris Mason:
"The most important changes here are from Josef, fixing a btrfs
regression in 3.14 that can cause corruptions in the extent allocation
tree when snapshots are in use.
Josef also fixed some deadlocks in send/recv and other assorted races
when balance is running"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (23 commits)
Btrfs: fix compile warnings on on avr32 platform
btrfs: allow mounting btrfs subvolumes with different ro/rw options
btrfs: export global block reserve size as space_info
btrfs: fix crash in remount(thread_pool=) case
Btrfs: abort the transaction when we don't find our extent ref
Btrfs: fix EINVAL checks in btrfs_clone
Btrfs: fix unlock in __start_delalloc_inodes()
Btrfs: scrub raid56 stripes in the right way
Btrfs: don't compress for a small write
Btrfs: more efficient io tree navigation on wait_extent_bit
Btrfs: send, build path string only once in send_hole
btrfs: filter invalid arg for btrfs resize
Btrfs: send, fix data corruption due to incorrect hole detection
Btrfs: kmalloc() doesn't return an ERR_PTR
Btrfs: fix snapshot vs nocow writting
btrfs: Change the expanding write sequence to fix snapshot related bug.
btrfs: make device scan less noisy
btrfs: fix lockdep warning with reclaim lock inversion
Btrfs: hold the commit_root_sem when getting the commit root during send
Btrfs: remove transaction from send
...
-rw-r--r-- | fs/btrfs/async-thread.c | 2 | ||||
-rw-r--r-- | fs/btrfs/backref.c | 33 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 94 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 13 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 23 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 35 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 8 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 1 | ||||
-rw-r--r-- | fs/btrfs/file.c | 5 | ||||
-rw-r--r-- | fs/btrfs/inode-map.c | 14 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 36 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 35 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 21 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 108 | ||||
-rw-r--r-- | fs/btrfs/send.c | 117 | ||||
-rw-r--r-- | fs/btrfs/super.c | 22 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 48 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 3 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 35 |
19 files changed, 374 insertions, 279 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index ecb5832c0967..5a201d81049c 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -323,6 +323,8 @@ void btrfs_destroy_workqueue(struct btrfs_workqueue *wq) void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max) { + if (!wq) + return; wq->normal->max_active = max; if (wq->high) wq->high->max_active = max; diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index aad7201ad11b..10db21fa0926 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -330,7 +330,10 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, goto out; } - root_level = btrfs_old_root_level(root, time_seq); + if (path->search_commit_root) + root_level = btrfs_header_level(root->commit_root); + else + root_level = btrfs_old_root_level(root, time_seq); if (root_level + 1 == level) { srcu_read_unlock(&fs_info->subvol_srcu, index); @@ -1099,9 +1102,9 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, * * returns 0 on success, < 0 on error. */ -int btrfs_find_all_roots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **roots) +static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 time_seq, struct ulist **roots) { struct ulist *tmp; struct ulist_node *node = NULL; @@ -1137,6 +1140,20 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, return 0; } +int btrfs_find_all_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 time_seq, struct ulist **roots) +{ + int ret; + + if (!trans) + down_read(&fs_info->commit_root_sem); + ret = __btrfs_find_all_roots(trans, fs_info, bytenr, time_seq, roots); + if (!trans) + up_read(&fs_info->commit_root_sem); + return ret; +} + /* * this makes the path point to (inum INODE_ITEM ioff) */ @@ -1516,6 +1533,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, if (IS_ERR(trans)) return PTR_ERR(trans); btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); + } else { + down_read(&fs_info->commit_root_sem); } ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, @@ -1526,8 +1545,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, ULIST_ITER_INIT(&ref_uiter); while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { - ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, - tree_mod_seq_elem.seq, &roots); + ret = __btrfs_find_all_roots(trans, fs_info, ref_node->val, + tree_mod_seq_elem.seq, &roots); if (ret) break; ULIST_ITER_INIT(&root_uiter); @@ -1549,6 +1568,8 @@ out: if (!search_commit_root) { btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); btrfs_end_transaction(trans, fs_info->extent_root); + } else { + up_read(&fs_info->commit_root_sem); } return ret; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 88d1b1eedc9c..1bcfcdb23cf4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2769,9 +2769,13 @@ again: * the commit roots are read only * so we always do read locks */ + if (p->need_commit_sem) + down_read(&root->fs_info->commit_root_sem); b = root->commit_root; extent_buffer_get(b); level = btrfs_header_level(b); + if (p->need_commit_sem) + up_read(&root->fs_info->commit_root_sem); if (!p->skip_locking) btrfs_tree_read_lock(b); } else { @@ -5360,7 +5364,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root, { int ret; int cmp; - struct btrfs_trans_handle *trans = NULL; struct btrfs_path *left_path = NULL; struct btrfs_path *right_path = NULL; struct btrfs_key left_key; @@ -5378,9 +5381,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root, u64 right_blockptr; u64 left_gen; u64 right_gen; - u64 left_start_ctransid; - u64 right_start_ctransid; - u64 ctransid; left_path = btrfs_alloc_path(); if (!left_path) { @@ -5404,21 +5404,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root, right_path->search_commit_root = 1; right_path->skip_locking = 1; - spin_lock(&left_root->root_item_lock); - left_start_ctransid = btrfs_root_ctransid(&left_root->root_item); - spin_unlock(&left_root->root_item_lock); - - spin_lock(&right_root->root_item_lock); - right_start_ctransid = btrfs_root_ctransid(&right_root->root_item); - spin_unlock(&right_root->root_item_lock); - - trans = btrfs_join_transaction(left_root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - trans = NULL; - goto out; - } - /* * Strategy: Go to the first items of both trees. Then do * @@ -5455,6 +5440,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, * the right if possible or go up and right. */ + down_read(&left_root->fs_info->commit_root_sem); left_level = btrfs_header_level(left_root->commit_root); left_root_level = left_level; left_path->nodes[left_level] = left_root->commit_root; @@ -5464,6 +5450,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, right_root_level = right_level; right_path->nodes[right_level] = right_root->commit_root; extent_buffer_get(right_path->nodes[right_level]); + up_read(&left_root->fs_info->commit_root_sem); if (left_level == 0) btrfs_item_key_to_cpu(left_path->nodes[left_level], @@ -5482,67 +5469,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root, advance_left = advance_right = 0; while (1) { - /* - * We need to make sure the transaction does not get committed - * while we do anything on commit roots. This means, we need to - * join and leave transactions for every item that we process. - */ - if (trans && btrfs_should_end_transaction(trans, left_root)) { - btrfs_release_path(left_path); - btrfs_release_path(right_path); - - ret = btrfs_end_transaction(trans, left_root); - trans = NULL; - if (ret < 0) - goto out; - } - /* now rejoin the transaction */ - if (!trans) { - trans = btrfs_join_transaction(left_root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - trans = NULL; - goto out; - } - - spin_lock(&left_root->root_item_lock); - ctransid = btrfs_root_ctransid(&left_root->root_item); - spin_unlock(&left_root->root_item_lock); - if (ctransid != left_start_ctransid) - left_start_ctransid = 0; - - spin_lock(&right_root->root_item_lock); - ctransid = btrfs_root_ctransid(&right_root->root_item); - spin_unlock(&right_root->root_item_lock); - if (ctransid != right_start_ctransid) - right_start_ctransid = 0; - - if (!left_start_ctransid || !right_start_ctransid) { - WARN(1, KERN_WARNING - "BTRFS: btrfs_compare_tree detected " - "a change in one of the trees while " - "iterating. This is probably a " - "bug.\n"); - ret = -EIO; - goto out; - } - - /* - * the commit root may have changed, so start again - * where we stopped - */ - left_path->lowest_level = left_level; - right_path->lowest_level = right_level; - ret = btrfs_search_slot(NULL, left_root, - &left_key, left_path, 0, 0); - if (ret < 0) - goto out; - ret = btrfs_search_slot(NULL, right_root, - &right_key, right_path, 0, 0); - if (ret < 0) - goto out; - } - if (advance_left && !left_end_reached) { ret = tree_advance(left_root, left_path, &left_level, left_root_level, @@ -5672,14 +5598,6 @@ out: btrfs_free_path(left_path); btrfs_free_path(right_path); kfree(tmp_buf); - - if (trans) { - if (!ret) - ret = btrfs_end_transaction(trans, left_root); - else - btrfs_end_transaction(trans, left_root); - } - return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index bc96c03dd259..4c48df572bd6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -609,6 +609,7 @@ struct btrfs_path { unsigned int skip_locking:1; unsigned int leave_spinning:1; unsigned int search_commit_root:1; + unsigned int need_commit_sem:1; }; /* @@ -986,7 +987,8 @@ struct btrfs_dev_replace_item { #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) #define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) #define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) -#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE +#define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \ + BTRFS_SPACE_INFO_GLOBAL_RSV) enum btrfs_raid_types { BTRFS_RAID_RAID10, @@ -1018,6 +1020,12 @@ enum btrfs_raid_types { */ #define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) +/* + * A fake block group type that is used to communicate global block reserve + * size to userspace via the SPACE_INFO ioctl. + */ +#define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49) + #define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \ BTRFS_AVAIL_ALLOC_BIT_SINGLE) @@ -1440,7 +1448,7 @@ struct btrfs_fs_info { */ struct mutex ordered_extent_flush_mutex; - struct rw_semaphore extent_commit_sem; + struct rw_semaphore commit_root_sem; struct rw_semaphore cleanup_work_sem; @@ -1711,7 +1719,6 @@ struct btrfs_root { struct btrfs_block_rsv *block_rsv; /* free ino cache stuff */ - struct mutex fs_commit_mutex; struct btrfs_free_space_ctl *free_ino_ctl; enum btrfs_caching_type cached; spinlock_t cache_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bd0f752b797b..029d46c2e170 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -329,6 +329,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, { struct extent_state *cached_state = NULL; int ret; + bool need_lock = (current->journal_info == + (void *)BTRFS_SEND_TRANS_STUB); if (!parent_transid || btrfs_header_generation(eb) == parent_transid) return 0; @@ -336,6 +338,11 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, if (atomic) return -EAGAIN; + if (need_lock) { + btrfs_tree_read_lock(eb); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + } + lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, 0, &cached_state); if (extent_buffer_uptodate(eb) && @@ -347,10 +354,21 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, "found %llu\n", eb->start, parent_transid, btrfs_header_generation(eb)); ret = 1; - clear_extent_buffer_uptodate(eb); + + /* + * Things reading via commit roots that don't have normal protection, + * like send, can have a really old block in cache that may point at a + * block that has been free'd and re-allocated. So don't clear uptodate + * if we find an eb that is under IO (dirty/writeback) because we could + * end up reading in the stale data and then writing it back out and + * making everybody very sad. + */ + if (!extent_buffer_under_io(eb)) + clear_extent_buffer_uptodate(eb); out: unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, &cached_state, GFP_NOFS); + btrfs_tree_read_unlock_blocking(eb); return ret; } @@ -1546,7 +1564,6 @@ int btrfs_init_fs_root(struct btrfs_root *root) root->subv_writers = writers; btrfs_init_free_ino_ctl(root); - mutex_init(&root->fs_commit_mutex); spin_lock_init(&root->cache_lock); init_waitqueue_head(&root->cache_wait); @@ -2324,7 +2341,7 @@ int open_ctree(struct super_block *sb, mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); - init_rwsem(&fs_info->extent_commit_sem); + init_rwsem(&fs_info->commit_root_sem); init_rwsem(&fs_info->cleanup_work_sem); init_rwsem(&fs_info->subvol_sem); sema_init(&fs_info->uuid_tree_rescan_sem, 1); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c6b6a6e3e735..1306487c82cf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -419,7 +419,7 @@ static noinline void caching_thread(struct btrfs_work *work) again: mutex_lock(&caching_ctl->mutex); /* need to make sure the commit_root doesn't disappear */ - down_read(&fs_info->extent_commit_sem); + down_read(&fs_info->commit_root_sem); next: ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); @@ -443,10 +443,10 @@ next: break; if (need_resched() || - rwsem_is_contended(&fs_info->extent_commit_sem)) { + rwsem_is_contended(&fs_info->commit_root_sem)) { caching_ctl->progress = last; btrfs_release_path(path); - up_read(&fs_info->extent_commit_sem); + up_read(&fs_info->commit_root_sem); mutex_unlock(&caching_ctl->mutex); cond_resched(); goto again; @@ -513,7 +513,7 @@ next: err: btrfs_free_path(path); - up_read(&fs_info->extent_commit_sem); + up_read(&fs_info->commit_root_sem); free_excluded_extents(extent_root, block_group); @@ -633,10 +633,10 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, return 0; } - down_write(&fs_info->extent_commit_sem); + down_write(&fs_info->commit_root_sem); atomic_inc(&caching_ctl->count); list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); - up_write(&fs_info->extent_commit_sem); + up_write(&fs_info->commit_root_sem); btrfs_get_block_group(cache); @@ -2444,7 +2444,8 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, spin_unlock(&locked_ref->lock); spin_lock(&delayed_refs->lock); spin_lock(&locked_ref->lock); - if (rb_first(&locked_ref->ref_root)) { + if (rb_first(&locked_ref->ref_root) || + locked_ref->extent_op) { spin_unlock(&locked_ref->lock); spin_unlock(&delayed_refs->lock); continue; @@ -5470,7 +5471,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *cache; struct btrfs_space_info *space_info; - down_write(&fs_info->extent_commit_sem); + down_write(&fs_info->commit_root_sem); list_for_each_entry_safe(caching_ctl, next, &fs_info->caching_block_groups, list) { @@ -5489,7 +5490,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, else fs_info->pinned_extents = &fs_info->freed_extents[0]; - up_write(&fs_info->extent_commit_sem); + up_write(&fs_info->commit_root_sem); list_for_each_entry_rcu(space_info, &fs_info->space_info, list) percpu_counter_set(&space_info->total_bytes_pinned, 0); @@ -5744,6 +5745,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", bytenr, parent, root_objectid, owner_objectid, owner_offset); + btrfs_abort_transaction(trans, extent_root, ret); + goto out; } else { btrfs_abort_transaction(trans, extent_root, ret); goto out; @@ -8255,14 +8258,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) struct btrfs_caching_control *caching_ctl; struct rb_node *n; - down_write(&info->extent_commit_sem); + down_write(&info->commit_root_sem); while (!list_empty(&info->caching_block_groups)) { caching_ctl = list_entry(info->caching_block_groups.next, struct btrfs_caching_control, list); list_del(&caching_ctl->list); put_caching_control(caching_ctl); } - up_write(&info->extent_commit_sem); + up_write(&info->commit_root_sem); spin_lock(&info->block_group_cache_lock); while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { @@ -8336,9 +8339,15 @@ static void __link_block_group(struct btrfs_space_info *space_info, struct btrfs_block_group_cache *cache) { int index = get_block_group_index(cache); + bool first = false; down_write(&space_info->groups_sem); - if (list_empty(&space_info->block_groups[index])) { + if (list_empty(&space_info->block_groups[index])) + first = true; + list_add_tail(&cache->list, &space_info->block_groups[index]); + up_write(&space_info->groups_sem); + + if (first) { struct kobject *kobj = &space_info->block_group_kobjs[index]; int ret; @@ -8350,8 +8359,6 @@ static void __link_block_group(struct btrfs_space_info *space_info, kobject_put(&space_info->kobj); } } - list_add_tail(&cache->list, &space_info->block_groups[index]); - up_write(&space_info->groups_sem); } static struct btrfs_block_group_cache * diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ae69a00387e7..3955e475ceec 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -749,6 +749,7 @@ again: * our range starts */ node = tree_search(tree, start); +process_node: if (!node) break; @@ -769,7 +770,10 @@ again: if (start > end) break; - cond_resched_lock(&tree->lock); + if (!cond_resched_lock(&tree->lock)) { + node = rb_next(node); + goto process_node; + } } out: spin_unlock(&tree->lock); @@ -4306,7 +4310,7 @@ static void __free_extent_buffer(struct extent_buffer *eb) kmem_cache_free(extent_buffer_cache, eb); } -static int extent_buffer_under_io(struct extent_buffer *eb) +int extent_buffer_under_io(struct extent_buffer *eb) { return (atomic_read(&eb->io_pages) || test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 58b27e5ab521..c488b45237bf 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -320,6 +320,7 @@ int set_extent_buffer_dirty(struct extent_buffer *eb); int set_extent_buffer_uptodate(struct extent_buffer *eb); int clear_extent_buffer_uptodate(struct extent_buffer *eb); int extent_buffer_uptodate(struct extent_buffer *eb); +int extent_buffer_under_io(struct extent_buffer *eb); int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long min_len, char **map, unsigned long *map_start, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c660527af838..c5998477fe60 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1727,6 +1727,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, struct btrfs_root *root = BTRFS_I(inode)->root; loff_t *ppos = &iocb->ki_pos; u64 start_pos; + u64 end_pos; ssize_t num_written = 0; ssize_t err = 0; size_t count, ocount; @@ -1781,7 +1782,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, start_pos = round_down(pos, root->sectorsize); if (start_pos > i_size_read(inode)) { - err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); + /* Expand hole size to cover write data, preventing empty gap */ + end_pos = round_up(pos + iov->iov_len, root->sectorsize); + err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); if (err) { mutex_unlock(&inode->i_mutex); goto out; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index ab485e57b6fe..cc8ca193d830 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -55,7 +55,7 @@ static int caching_kthread(void *data) key.type = BTRFS_INODE_ITEM_KEY; again: /* need to make sure the commit_root doesn't disappear */ - mutex_lock(&root->fs_commit_mutex); + down_read(&fs_info->commit_root_sem); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) @@ -88,7 +88,7 @@ again: btrfs_item_key_to_cpu(leaf, &key, 0); btrfs_release_path(path); root->cache_progress = last; - mutex_unlock(&root->fs_commit_mutex); + up_read(&fs_info->commit_root_sem); schedule_timeout(1); goto again; } else @@ -127,7 +127,7 @@ next: btrfs_unpin_free_ino(root); out: wake_up(&root->cache_wait); - mutex_unlock(&root->fs_commit_mutex); + up_read(&fs_info->commit_root_sem); btrfs_free_path(path); @@ -223,11 +223,11 @@ again: * or the caching work is done. */ - mutex_lock(&root->fs_commit_mutex); + down_write(&root->fs_info->commit_root_sem); spin_lock(&root->cache_lock); if (root->cached == BTRFS_CACHE_FINISHED) { spin_unlock(&root->cache_lock); - mutex_unlock(&root->fs_commit_mutex); + up_write(&root->fs_info->commit_root_sem); goto again; } spin_unlock(&root->cache_lock); @@ -240,7 +240,7 @@ again: else __btrfs_add_free_space(pinned, objectid, 1); - mutex_unlock(&root->fs_commit_mutex); + up_write(&root->fs_info->commit_root_sem); } } @@ -250,7 +250,7 @@ again: * and others will just be dropped, because the commit root we were * searching has changed. * - * Must be called with root->fs_commit_mutex held + * Must be called with root->fs_info->commit_root_sem held */ void btrfs_unpin_free_ino(struct btrfs_root *root) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 06e9a4152b14..5f805bc944fa 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -394,6 +394,14 @@ static noinline int compress_file_range(struct inode *inode, (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) btrfs_add_inode_defrag(NULL, inode); + /* + * skip compression for a small file range(<=blocksize) that + * isn't an inline extent, since it dosen't save disk space at all. + */ + if ((end - start + 1) <= blocksize && + (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) + goto cleanup_and_bail_uncompressed; + actual_end = min_t(u64, isize, end + 1); again: will_compress = 0; @@ -1271,6 +1279,15 @@ next_slot: disk_bytenr += cur_offset - found_key.offset; num_bytes = min(end + 1, extent_end) - cur_offset; /* + * if there are pending snapshots for this root, + * we fall into common COW way. + */ + if (!nolock) { + err = btrfs_start_nocow_write(root); + if (!err) + goto out_check; + } + /* * force cow if csum exists in the range. * this ensure that csum for a given extent are * either valid or do not exist. @@ -1289,6 +1306,8 @@ next_slot: out_check: if (extent_end <= start) { path->slots[0]++; + if (!nolock && nocow) + btrfs_end_nocow_write(root); goto next_slot; } if (!nocow) { @@ -1306,8 +1325,11 @@ out_check: ret = cow_file_range(inode, locked_page, cow_start, found_key.offset - 1, page_started, nr_written, 1); - if (ret) + if (ret) { + if (!nolock && nocow) + btrfs_end_nocow_write(root); goto error; + } cow_start = (u64)-1; } @@ -1354,8 +1376,11 @@ out_check: BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_reloc_clone_csums(inode, cur_offset, num_bytes); - if (ret) + if (ret) { + if (!nolock && nocow) + btrfs_end_nocow_write(root); goto error; + } } extent_clear_unlock_delalloc(inode, cur_offset, @@ -1363,6 +1388,8 @@ out_check: locked_page, EXTENT_LOCKED | EXTENT_DELALLOC, PAGE_UNLOCK | PAGE_SET_PRIVATE2); + if (!nolock && nocow) + btrfs_end_nocow_write(root); cur_offset = extent_end; if (cur_offset > end) break; @@ -8476,19 +8503,20 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, else iput(inode); ret = -ENOMEM; - break; + goto out; } list_add_tail(&work->list, &works); btrfs_queue_work(root->fs_info->flush_workers, &work->work); ret++; if (nr != -1 && ret >= nr) - break; + goto out; cond_resched(); spin_lock(&root->delalloc_lock); } spin_unlock(&root->delalloc_lock); +out: list_for_each_entry_safe(work, next, &works, list) { list_del_init(&work->list); btrfs_wait_and_free_delalloc_work(work); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0401397b5c92..e79ff6b90cb7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1472,6 +1472,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, struct btrfs_trans_handle *trans; struct btrfs_device *device = NULL; char *sizestr; + char *retptr; char *devstr = NULL; int ret = 0; int mod = 0; @@ -1539,8 +1540,8 @@ static noinline int btrfs_ioctl_resize(struct file *file, mod = 1; sizestr++; } - new_size = memparse(sizestr, NULL); - if (new_size == 0) { + new_size = memparse(sizestr, &retptr); + if (*retptr != '\0' || new_size == 0) { ret = -EINVAL; goto out_free; } @@ -3140,8 +3141,9 @@ process_slot: new_key.offset + datal, 1); if (ret) { - btrfs_abort_transaction(trans, root, - ret); + if (ret != -EINVAL) + btrfs_abort_transaction(trans, + root, ret); btrfs_end_transaction(trans, root); goto out; } @@ -3538,6 +3540,11 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) up_read(&info->groups_sem); } + /* + * Global block reserve, exported as a space_info + */ + slot_count++; + /* space_slots == 0 means they are asking for a count */ if (space_args.space_slots == 0) { space_args.total_spaces = slot_count; @@ -3596,6 +3603,21 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) up_read(&info->groups_sem); } + /* + * Add global block reserve + */ + if (slot_count) { + struct btrfs_block_rsv *block_rsv = &root->fs_info->global_block_rsv; + + spin_lock(&block_rsv->lock); + space.total_bytes = block_rsv->size; + space.used_bytes = block_rsv->size - block_rsv->reserved; + spin_unlock(&block_rsv->lock); + space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV; + memcpy(dest, &space, sizeof(space)); + space_args.total_spaces++; + } + user_dest = (struct btrfs_ioctl_space_info __user *) (arg + sizeof(struct btrfs_ioctl_space_args)); @@ -4531,9 +4553,8 @@ static long btrfs_ioctl_set_received_subvol_32(struct file *file, } args64 = kmalloc(sizeof(*args64), GFP_NOFS); - if (IS_ERR(args64)) { - ret = PTR_ERR(args64); - args64 = NULL; + if (!args64) { + ret = -ENOMEM; goto out; } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index def428a25b2a..7f92ab1daa87 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2317,7 +2317,6 @@ void free_reloc_roots(struct list_head *list) static noinline_for_stack int merge_reloc_roots(struct reloc_control *rc) { - struct btrfs_trans_handle *trans; struct btrfs_root *root; struct btrfs_root *reloc_root; u64 last_snap; @@ -2375,26 +2374,6 @@ again: list_add_tail(&reloc_root->root_list, &reloc_roots); goto out; - } else if (!ret) { - /* - * recover the last snapshot tranid to avoid - * the space balance break NOCOW. - */ - root = read_fs_root(rc->extent_root->fs_info, - objectid); - if (IS_ERR(root)) - continue; - - trans = btrfs_join_transaction(root); - BUG_ON(IS_ERR(trans)); - - /* Check if the fs/file tree was snapshoted or not. */ - if (btrfs_root_last_snapshot(&root->root_item) == - otransid - 1) - btrfs_set_root_last_snapshot(&root->root_item, - last_snap); - - btrfs_end_transaction(trans, root); } } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 93e6d7172844..0be77993378e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2235,6 +2235,47 @@ behind_scrub_pages: return 0; } +/* + * Given a physical address, this will calculate it's + * logical offset. if this is a parity stripe, it will return + * the most left data stripe's logical offset. + * + * return 0 if it is a data stripe, 1 means parity stripe. + */ +static int get_raid56_logic_offset(u64 physical, int num, + struct map_lookup *map, u64 *offset) +{ + int i; + int j = 0; + u64 stripe_nr; + u64 last_offset; + int stripe_index; + int rot; + + last_offset = (physical - map->stripes[num].physical) * + nr_data_stripes(map); + *offset = last_offset; + for (i = 0; i < nr_data_stripes(map); i++) { + *offset = last_offset + i * map->stripe_len; + + stripe_nr = *offset; + do_div(stripe_nr, map->stripe_len); + do_div(stripe_nr, nr_data_stripes(map)); + + /* Work out the disk rotation on this stripe-set */ + rot = do_div(stripe_nr, map->num_stripes); + /* calculate which stripe this data locates */ + rot += i; + stripe_index = rot % map->num_stripes; + if (stripe_index == num) + return 0; + if (stripe_index < num) + j++; + } + *offset = last_offset + j * map->stripe_len; + return 1; +} + static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, struct map_lookup *map, struct btrfs_device *scrub_dev, @@ -2256,6 +2297,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, u64 physical; u64 logical; u64 logic_end; + u64 physical_end; u64 generation; int mirror_num; struct reada_control *reada1; @@ -2269,16 +2311,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, u64 extent_len; struct btrfs_device *extent_dev; int extent_mirror_num; - int stop_loop; - - if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID6)) { - if (num >= nr_data_stripes(map)) { - return 0; - } - } + int stop_loop = 0; nstripes = length; + physical = map->stripes[num].physical; offset = 0; do_div(nstripes, map->stripe_len); if (map->type & BTRFS_BLOCK_GROUP_RAID0) { @@ -2296,6 +2332,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { increment = map->stripe_len; mirror_num = num % map->num_stripes + 1; + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6)) { + get_raid56_logic_offset(physical, num, map, &offset); + increment = map->stripe_len * nr_data_stripes(map); + mirror_num = 1; } else { increment = map->stripe_len; mirror_num = 1; @@ -2319,7 +2360,15 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, * to not hold off transaction commits */ logical = base + offset; - + physical_end = physical + nstripes * map->stripe_len; + if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6)) { + get_raid56_logic_offset(physical_end, num, + map, &logic_end); + logic_end += base; + } else { + logic_end = logical + increment * nstripes; + } wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0); scrub_blocked_if_needed(fs_info); @@ -2328,7 +2377,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, key_start.objectid = logical; key_start.type = BTRFS_EXTENT_ITEM_KEY; key_start.offset = (u64)0; - key_end.objectid = base + offset + nstripes * increment; + key_end.objectid = logic_end; key_end.type = BTRFS_METADATA_ITEM_KEY; key_end.offset = (u64)-1; reada1 = btrfs_reada_add(root, &key_start, &key_end); @@ -2338,7 +2387,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, key_start.offset = logical; key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; key_end.type = BTRFS_EXTENT_CSUM_KEY; - key_end.offset = base + offset + nstripes * increment; + key_end.offset = logic_end; reada2 = btrfs_reada_add(csum_root, &key_start, &key_end); if (!IS_ERR(reada1)) @@ -2356,11 +2405,17 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, /* * now find all extents for each stripe and scrub them */ - logical = base + offset; - physical = map->stripes[num].physical; - logic_end = logical + increment * nstripes; ret = 0; - while (logical < logic_end) { + while (physical < physical_end) { + /* for raid56, we skip parity stripe */ + if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6)) { + ret = get_raid56_logic_offset(physical, num, + map, &logical); + logical += base; + if (ret) + goto skip; + } /* * canceled? */ @@ -2504,15 +2559,29 @@ again: scrub_free_csums(sctx); if (extent_logical + extent_len < key.objectid + bytes) { - logical += increment; - physical += map->stripe_len; - + if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6)) { + /* + * loop until we find next data stripe + * or we have finished all stripes. + */ + do { + physical += map->stripe_len; + ret = get_raid56_logic_offset( + physical, num, + map, &logical); + logical += base; + } while (physical < physical_end && ret); + } else { + physical += map->stripe_len; + logical += increment; + } if (logical < key.objectid + bytes) { cond_resched(); goto again; } - if (logical >= logic_end) { + if (physical >= physical_end) { stop_loop = 1; break; } @@ -2521,6 +2590,7 @@ next: path->slots[0]++; } btrfs_release_path(path); +skip: logical += increment; physical += map->stripe_len; spin_lock(&sctx->stat_lock); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9b6da9d55f9a..1ac3ca98c429 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -493,6 +493,7 @@ static struct btrfs_path *alloc_path_for_send(void) return NULL; path->search_commit_root = 1; path->skip_locking = 1; + path->need_commit_sem = 1; return path; } @@ -771,29 +772,22 @@ out: /* * Helper function to retrieve some fields from an inode item. */ -static int get_inode_info(struct btrfs_root *root, - u64 ino, u64 *size, u64 *gen, - u64 *mode, u64 *uid, u64 *gid, - u64 *rdev) +static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path, + u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid, + u64 *gid, u64 *rdev) { int ret; struct btrfs_inode_item *ii; struct btrfs_key key; - struct btrfs_path *path; - - path = alloc_path_for_send(); - if (!path) - return -ENOMEM; key.objectid = ino; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out; if (ret) { - ret = -ENOENT; - goto out; + if (ret > 0) + ret = -ENOENT; + return ret; } ii = btrfs_item_ptr(path->nodes[0], path->slots[0], @@ -811,7 +805,22 @@ static int get_inode_info(struct btrfs_root *root, if (rdev) *rdev = btrfs_inode_rdev(path->nodes[0], ii); -out: + return ret; +} + +static int get_inode_info(struct btrfs_root *root, + u64 ino, u64 *size, u64 *gen, + u64 *mode, u64 *uid, u64 *gid, + u64 *rdev) +{ + struct btrfs_path *path; + int ret; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid, + rdev); btrfs_free_path(path); return ret; } @@ -1085,6 +1094,7 @@ out: struct backref_ctx { struct send_ctx *sctx; + struct btrfs_path *path; /* number of total found references */ u64 found; @@ -1155,8 +1165,9 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) * There are inodes that have extents that lie behind its i_size. Don't * accept clones from these extents. */ - ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL, - NULL); + ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL, + NULL, NULL, NULL); + btrfs_release_path(bctx->path); if (ret < 0) return ret; @@ -1235,12 +1246,17 @@ static int find_extent_clone(struct send_ctx *sctx, if (!tmp_path) return -ENOMEM; + /* We only use this path under the commit sem */ + tmp_path->need_commit_sem = 0; + backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); if (!backref_ctx) { ret = -ENOMEM; goto out; } + backref_ctx->path = tmp_path; + if (data_offset >= ino_size) { /* * There may be extents that lie behind the file's size. @@ -1268,8 +1284,10 @@ static int find_extent_clone(struct send_ctx *sctx, } logical = disk_byte + btrfs_file_extent_offset(eb, fi); + down_read(&sctx->send_root->fs_info->commit_root_sem); ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, &found_key, &flags); + up_read(&sctx->send_root->fs_info->commit_root_sem); btrfs_release_path(tmp_path); if (ret < 0) @@ -4418,6 +4436,9 @@ static int send_hole(struct send_ctx *sctx, u64 end) p = fs_path_alloc(); if (!p) return -ENOMEM; + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); + if (ret < 0) + goto tlv_put_failure; memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); while (offset < end) { len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); @@ -4425,9 +4446,6 @@ static int send_hole(struct send_ctx *sctx, u64 end) ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); if (ret < 0) break; - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); - if (ret < 0) - break; TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); @@ -4968,7 +4986,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) if (S_ISREG(sctx->cur_inode_mode)) { if (need_send_hole(sctx)) { - if (sctx->cur_inode_last_extent == (u64)-1) { + if (sctx->cur_inode_last_extent == (u64)-1 || + sctx->cur_inode_last_extent < + sctx->cur_inode_size) { ret = get_last_extent(sctx, (u64)-1); if (ret) goto out; @@ -5367,57 +5387,21 @@ out: static int full_send_tree(struct send_ctx *sctx) { int ret; - struct btrfs_trans_handle *trans = NULL; struct btrfs_root *send_root = sctx->send_root; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_path *path; struct extent_buffer *eb; int slot; - u64 start_ctransid; - u64 ctransid; path = alloc_path_for_send(); if (!path) return -ENOMEM; - spin_lock(&send_root->root_item_lock); - start_ctransid = btrfs_root_ctransid(&send_root->root_item); - spin_unlock(&send_root->root_item_lock); - key.objectid = BTRFS_FIRST_FREE_OBJECTID; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; -join_trans: - /* - * We need to make sure the transaction does not get committed - * while we do anything on commit roots. Join a transaction to prevent - * this. - */ - trans = btrfs_join_transaction(send_root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - trans = NULL; - goto out; - } - - /* - * Make sure the tree has not changed after re-joining. We detect this - * by comparing start_ctransid and ctransid. They should always match. - */ - spin_lock(&send_root->root_item_lock); - ctransid = btrfs_root_ctransid(&send_root->root_item); - spin_unlock(&send_root->root_item_lock); - - if (ctransid != start_ctransid) { - WARN(1, KERN_WARNING "BTRFS: the root that you're trying to " - "send was modified in between. This is " - "probably a bug.\n"); - ret = -EIO; - goto out; - } - ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); if (ret < 0) goto out; @@ -5425,19 +5409,6 @@ join_trans: goto out_finish; while (1) { - /* - * When someone want to commit while we iterate, end the - * joined transaction and rejoin. - */ - if (btrfs_should_end_transaction(trans, send_root)) { - ret = btrfs_end_transaction(trans, send_root); - trans = NULL; - if (ret < 0) - goto out; - btrfs_release_path(path); - goto join_trans; - } - eb = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(eb, &found_key, slot); @@ -5465,12 +5436,6 @@ out_finish: out: btrfs_free_path(path); - if (trans) { - if (!ret) - ret = btrfs_end_transaction(trans, send_root); - else - btrfs_end_transaction(trans, send_root); - } return ret; } @@ -5718,7 +5683,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) NULL); sort_clone_roots = 1; + current->journal_info = (void *)BTRFS_SEND_TRANS_STUB; ret = send_subvol(sctx); + current->journal_info = NULL; if (ret < 0) goto out; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9dbf42395153..5011aadacab8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -66,6 +66,8 @@ static const struct super_operations btrfs_super_ops; static struct file_system_type btrfs_fs_type; +static int btrfs_remount(struct super_block *sb, int *flags, char *data); + static const char *btrfs_decode_error(int errno) { char *errstr = "unknown"; @@ -1185,6 +1187,26 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags, mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs); kfree(newargs); + + if (PTR_RET(mnt) == -EBUSY) { + if (flags & MS_RDONLY) { + mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, device_name, + newargs); + } else { + int r; + mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name, + newargs); + if (IS_ERR(mnt)) + return ERR_CAST(mnt); + + r = btrfs_remount(mnt->mnt_sb, &flags, NULL); + if (r < 0) { + /* FIXME: release vfsmount mnt ??*/ + return ERR_PTR(r); + } + } + } + if (IS_ERR(mnt)) return ERR_CAST(mnt); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a04707f740d6..7579f6d0b854 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -75,10 +75,21 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) } } -static noinline void switch_commit_root(struct btrfs_root *root) +static noinline void switch_commit_roots(struct btrfs_transaction *trans, + struct btrfs_fs_info *fs_info) { - free_extent_buffer(root->commit_root); - root->commit_root = btrfs_root_node(root); + struct btrfs_root *root, *tmp; + + down_write(&fs_info->commit_root_sem); + list_for_each_entry_safe(root, tmp, &trans->switch_commits, + dirty_list) { + list_del_init(&root->dirty_list); + free_extent_buffer(root->commit_root); + root->commit_root = btrfs_root_node(root); + if (is_fstree(root->objectid)) + btrfs_unpin_free_ino(root); + } + up_write(&fs_info->commit_root_sem); } static inline void extwriter_counter_inc(struct btrfs_transaction *trans, @@ -208,6 +219,7 @@ loop: INIT_LIST_HEAD(&cur_trans->pending_snapshots); INIT_LIST_HEAD(&cur_trans->ordered_operations); INIT_LIST_HEAD(&cur_trans->pending_chunks); + INIT_LIST_HEAD(&cur_trans->switch_commits); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, fs_info->btree_inode->i_mapping); @@ -375,7 +387,8 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) return ERR_PTR(-EROFS); - if (current->journal_info) { + if (current->journal_info && + current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) { WARN_ON(type & TRANS_EXTWRITERS); h = current->journal_info; h->use_count++; @@ -919,9 +932,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, return ret; } - if (root != root->fs_info->extent_root) - switch_commit_root(root); - return 0; } @@ -977,15 +987,16 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, list_del_init(next); root = list_entry(next, struct btrfs_root, dirty_list); + if (root != fs_info->extent_root) + list_add_tail(&root->dirty_list, + &trans->transaction->switch_commits); ret = update_cowonly_root(trans, root); if (ret) return ret; } - down_write(&fs_info->extent_commit_sem); - switch_commit_root(fs_info->extent_root); - up_write(&fs_info->extent_commit_sem); - + list_add_tail(&fs_info->extent_root->dirty_list, + &trans->transaction->switch_commits); btrfs_after_dev_replace_commit(fs_info); return 0; @@ -1042,11 +1053,8 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, smp_wmb(); if (root->commit_root != root->node) { - mutex_lock(&root->fs_commit_mutex); - switch_commit_root(root); - btrfs_unpin_free_ino(root); - mutex_unlock(&root->fs_commit_mutex); - + list_add_tail(&root->dirty_list, + &trans->transaction->switch_commits); btrfs_set_root_node(&root->root_item, root->node); } @@ -1857,11 +1865,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_root_node(&root->fs_info->tree_root->root_item, root->fs_info->tree_root->node); - switch_commit_root(root->fs_info->tree_root); + list_add_tail(&root->fs_info->tree_root->dirty_list, + &cur_trans->switch_commits); btrfs_set_root_node(&root->fs_info->chunk_root->root_item, root->fs_info->chunk_root->node); - switch_commit_root(root->fs_info->chunk_root); + list_add_tail(&root->fs_info->chunk_root->dirty_list, + &cur_trans->switch_commits); + + switch_commit_roots(cur_trans, root->fs_info); assert_qgroups_uptodate(trans); update_super_roots(root); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 6ac037e9f9f0..b57b924e8e03 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -57,6 +57,7 @@ struct btrfs_transaction { struct list_head pending_snapshots; struct list_head ordered_operations; struct list_head pending_chunks; + struct list_head switch_commits; struct btrfs_delayed_ref_root delayed_refs; int aborted; }; @@ -78,6 +79,8 @@ struct btrfs_transaction { #define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \ __TRANS_ATTACH) +#define BTRFS_SEND_TRANS_STUB 1 + struct btrfs_trans_handle { u64 transid; u64 bytes_reserved; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d241130a32fd..49d7fab73360 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -448,6 +448,14 @@ static void pending_bios_fn(struct btrfs_work *work) run_scheduled_bios(device); } +/* + * Add new device to list of registered devices + * + * Returns: + * 1 - first time device is seen + * 0 - device already known + * < 0 - error + */ static noinline int device_list_add(const char *path, struct btrfs_super_block *disk_super, u64 devid, struct btrfs_fs_devices **fs_devices_ret) @@ -455,6 +463,7 @@ static noinline int device_list_add(const char *path, struct btrfs_device *device; struct btrfs_fs_devices *fs_devices; struct rcu_string *name; + int ret = 0; u64 found_transid = btrfs_super_generation(disk_super); fs_devices = find_fsid(disk_super->fsid); @@ -495,6 +504,7 @@ static noinline int device_list_add(const char *path, fs_devices->num_devices++; mutex_unlock(&fs_devices->device_list_mutex); + ret = 1; device->fs_devices = fs_devices; } else if (!device->name || strcmp(device->name->str, path)) { name = rcu_string_strdup(path, GFP_NOFS); @@ -513,7 +523,8 @@ static noinline int device_list_add(const char *path, fs_devices->latest_trans = found_transid; } *fs_devices_ret = fs_devices; - return 0; + + return ret; } static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) @@ -910,17 +921,19 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, transid = btrfs_super_generation(disk_super); total_devices = btrfs_super_num_devices(disk_super); - if (disk_super->label[0]) { - if (disk_super->label[BTRFS_LABEL_SIZE - 1]) - disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; - printk(KERN_INFO "BTRFS: device label %s ", disk_super->label); - } else { - printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid); - } - - printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); - ret = device_list_add(path, disk_super, devid, fs_devices_ret); + if (ret > 0) { + if (disk_super->label[0]) { + if (disk_super->label[BTRFS_LABEL_SIZE - 1]) + disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; + printk(KERN_INFO "BTRFS: device label %s ", disk_super->label); + } else { + printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid); + } + + printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); + ret = 0; + } if (!ret && fs_devices_ret) (*fs_devices_ret)->total_devices = total_devices; |