summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h15
-rw-r--r--fs/btrfs/delayed-inode.c32
-rw-r--r--fs/btrfs/delayed-inode.h5
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/btrfs/extent-tree.c4
-rw-r--r--fs/btrfs/inode.c1
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/relocation.c30
-rw-r--r--fs/btrfs/sysfs.c146
-rw-r--r--fs/btrfs/transaction.c114
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/exec.c2
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/timerfd.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c50
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c75
-rw-r--r--fs/xfs/xfs_log.c11
18 files changed, 223 insertions, 288 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 378b5b4443f3..300628795fdb 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -967,6 +967,12 @@ struct btrfs_fs_info {
struct srcu_struct subvol_srcu;
spinlock_t trans_lock;
+ /*
+ * the reloc mutex goes with the trans lock, it is taken
+ * during commit to protect us from the relocation code
+ */
+ struct mutex reloc_mutex;
+
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
@@ -1172,6 +1178,14 @@ struct btrfs_root {
u32 type;
u64 highest_objectid;
+
+ /* btrfs_record_root_in_trans is a multi-step process,
+ * and it can race with the balancing code. But the
+ * race is very small, and only the first time the root
+ * is added to each transaction. So in_trans_setup
+ * is used to tell us when more checks are required
+ */
+ unsigned long in_trans_setup;
int ref_cows;
int track_dirty;
int in_radix;
@@ -1181,7 +1195,6 @@ struct btrfs_root {
struct btrfs_key defrag_max;
int defrag_running;
char *name;
- int in_sysfs;
/* the dirty list is only used by non-reference counted roots */
struct list_head dirty_list;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6462c29d2d37..f1cbd028f7b3 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -297,7 +297,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
item->data_len = data_len;
item->ins_or_del = 0;
item->bytes_reserved = 0;
- item->block_rsv = NULL;
item->delayed_node = NULL;
atomic_set(&item->refs, 1);
}
@@ -593,10 +592,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
- if (!ret) {
+ if (!ret)
item->bytes_reserved = num_bytes;
- item->block_rsv = dst_rsv;
- }
return ret;
}
@@ -604,10 +601,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
struct btrfs_delayed_item *item)
{
+ struct btrfs_block_rsv *rsv;
+
if (!item->bytes_reserved)
return;
- btrfs_block_rsv_release(root, item->block_rsv,
+ rsv = &root->fs_info->global_block_rsv;
+ btrfs_block_rsv_release(root, rsv,
item->bytes_reserved);
}
@@ -1014,6 +1014,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_root *delayed_root;
struct btrfs_delayed_node *curr_node, *prev_node;
struct btrfs_path *path;
+ struct btrfs_block_rsv *block_rsv;
int ret = 0;
path = btrfs_alloc_path();
@@ -1021,6 +1022,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
return -ENOMEM;
path->leave_spinning = 1;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &root->fs_info->global_block_rsv;
+
delayed_root = btrfs_get_delayed_root(root);
curr_node = btrfs_first_delayed_node(delayed_root);
@@ -1045,6 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
}
btrfs_free_path(path);
+ trans->block_rsv = block_rsv;
return ret;
}
@@ -1052,6 +1057,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{
struct btrfs_path *path;
+ struct btrfs_block_rsv *block_rsv;
int ret;
path = btrfs_alloc_path();
@@ -1059,6 +1065,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
return -ENOMEM;
path->leave_spinning = 1;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &node->root->fs_info->global_block_rsv;
+
ret = btrfs_insert_delayed_items(trans, path, node->root, node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, node->root, node);
@@ -1066,6 +1075,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
btrfs_free_path(path);
+ trans->block_rsv = block_rsv;
return ret;
}
@@ -1116,6 +1126,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
struct btrfs_path *path;
struct btrfs_delayed_node *delayed_node = NULL;
struct btrfs_root *root;
+ struct btrfs_block_rsv *block_rsv;
unsigned long nr = 0;
int need_requeue = 0;
int ret;
@@ -1134,6 +1145,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
if (IS_ERR(trans))
goto free_path;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &root->fs_info->global_block_rsv;
+
ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, root,
@@ -1176,6 +1190,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
nr = trans->blocks_used;
+ trans->block_rsv = block_rsv;
btrfs_end_transaction_dmeta(trans, root);
__btrfs_btree_balance_dirty(root, nr);
free_path:
@@ -1222,6 +1237,13 @@ again:
return 0;
}
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
+{
+ struct btrfs_delayed_root *delayed_root;
+ delayed_root = btrfs_get_delayed_root(root);
+ WARN_ON(btrfs_first_delayed_node(delayed_root));
+}
+
void btrfs_balance_delayed_items(struct btrfs_root *root)
{
struct btrfs_delayed_root *delayed_root;
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index eb7d240aa648..d1a6a2915c66 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -75,7 +75,6 @@ struct btrfs_delayed_item {
struct list_head tree_list; /* used for batch insert/delete items */
struct list_head readdir_list; /* used for readdir items */
u64 bytes_reserved;
- struct btrfs_block_rsv *block_rsv;
struct btrfs_delayed_node *delayed_node;
atomic_t refs;
int ins_or_del;
@@ -138,4 +137,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
/* for init */
int __init btrfs_delayed_inode_init(void);
void btrfs_delayed_inode_exit(void);
+
+/* for debugging */
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
+
#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9f68c6898653..1ac8db5dc0a3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->last_trans = 0;
root->highest_objectid = 0;
root->name = NULL;
- root->in_sysfs = 0;
root->inode_tree = RB_ROOT;
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
root->block_rsv = NULL;
@@ -1300,19 +1299,21 @@ again:
return root;
root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
- if (!root->free_ino_ctl)
- goto fail;
root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
GFP_NOFS);
- if (!root->free_ino_pinned)
+ if (!root->free_ino_pinned || !root->free_ino_ctl) {
+ ret = -ENOMEM;
goto fail;
+ }
btrfs_init_free_ino_ctl(root);
mutex_init(&root->fs_commit_mutex);
spin_lock_init(&root->cache_lock);
init_waitqueue_head(&root->cache_wait);
- set_anon_super(&root->anon_super, NULL);
+ ret = set_anon_super(&root->anon_super, NULL);
+ if (ret)
+ goto fail;
if (btrfs_root_refs(&root->root_item) == 0) {
ret = -ENOENT;
@@ -1618,6 +1619,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
+ mutex_init(&fs_info->reloc_mutex);
init_completion(&fs_info->kobj_unregister);
fs_info->tree_root = tree_root;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b42efc2ded51..1f61bf5b4960 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3314,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
if (reserved == 0)
return 0;
- /* nothing to shrink - nothing to reclaim */
- if (root->fs_info->delalloc_bytes == 0)
- return 0;
-
max_reclaim = min(reserved, to_reclaim);
while (loops < 1024) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 751ddf8fc58a..0a9b10c5b0a7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3076,6 +3076,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, dir);
BUG_ON(ret);
+ btrfs_free_path(path);
return 0;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b793d112d1f6..a3c4751e07db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -482,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
BUG_ON(ret);
+ spin_lock(&root->fs_info->trans_lock);
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
+ spin_unlock(&root->fs_info->trans_lock);
if (async_transid) {
*async_transid = trans->transid;
ret = btrfs_commit_transaction_async(trans,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b1ef27cc673b..5e0a3dc79a45 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
int ret;
if (!root->reloc_root)
- return 0;
+ goto out;
reloc_root = root->reloc_root;
root_item = &reloc_root->root_item;
@@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
ret = btrfs_update_root(trans, root->fs_info->tree_root,
&reloc_root->root_key, root_item);
BUG_ON(ret);
+
+out:
return 0;
}
@@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err)
u64 num_bytes = 0;
int ret;
- spin_lock(&root->fs_info->trans_lock);
+ mutex_lock(&root->fs_info->reloc_mutex);
rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
rc->merging_rsv_size += rc->nodes_relocated * 2;
- spin_unlock(&root->fs_info->trans_lock);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+
again:
if (!err) {
num_bytes = rc->merging_rsv_size;
@@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc)
int ret;
again:
root = rc->extent_root;
- spin_lock(&root->fs_info->trans_lock);
+
+ /*
+ * this serializes us with btrfs_record_root_in_transaction,
+ * we have to make sure nobody is in the middle of
+ * adding their roots to the list while we are
+ * doing this splice
+ */
+ mutex_lock(&root->fs_info->reloc_mutex);
list_splice_init(&rc->reloc_roots, &reloc_roots);
- spin_unlock(&root->fs_info->trans_lock);
+ mutex_unlock(&root->fs_info->reloc_mutex);
while (!list_empty(&reloc_roots)) {
found = 1;
@@ -3590,17 +3600,19 @@ next:
static void set_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
- spin_lock(&fs_info->trans_lock);
+
+ mutex_lock(&fs_info->reloc_mutex);
fs_info->reloc_ctl = rc;
- spin_unlock(&fs_info->trans_lock);
+ mutex_unlock(&fs_info->reloc_mutex);
}
static void unset_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
- spin_lock(&fs_info->trans_lock);
+
+ mutex_lock(&fs_info->reloc_mutex);
fs_info->reloc_ctl = NULL;
- spin_unlock(&fs_info->trans_lock);
+ mutex_unlock(&fs_info->reloc_mutex);
}
static int check_extent_flags(u64 flags)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c3c223ae6691..daac9ae6d731 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -28,152 +28,6 @@
#include "disk-io.h"
#include "transaction.h"
-static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_root_used(&root->root_item));
-}
-
-static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_root_limit(&root->root_item));
-}
-
-static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
-{
-
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
-}
-
-static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
-}
-
-static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
-}
-
-/* this is for root attrs (subvols/snapshots) */
-struct btrfs_root_attr {
- struct attribute attr;
- ssize_t (*show)(struct btrfs_root *, char *);
- ssize_t (*store)(struct btrfs_root *, const char *, size_t);
-};
-
-#define ROOT_ATTR(name, mode, show, store) \
-static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
- show, store)
-
-ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL);
-ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL);
-
-static struct attribute *btrfs_root_attrs[] = {
- &btrfs_root_attr_blocks_used.attr,
- &btrfs_root_attr_block_limit.attr,
- NULL,
-};
-
-/* this is for super attrs (actual full fs) */
-struct btrfs_super_attr {
- struct attribute attr;
- ssize_t (*show)(struct btrfs_fs_info *, char *);
- ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
-};
-
-#define SUPER_ATTR(name, mode, show, store) \
-static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
- show, store)
-
-SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL);
-SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL);
-SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL);
-
-static struct attribute *btrfs_super_attrs[] = {
- &btrfs_super_attr_blocks_used.attr,
- &btrfs_super_attr_total_blocks.attr,
- &btrfs_super_attr_blocksize.attr,
- NULL,
-};
-
-static ssize_t btrfs_super_attr_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
- super_kobj);
- struct btrfs_super_attr *a = container_of(attr,
- struct btrfs_super_attr,
- attr);
-
- return a->show ? a->show(fs, buf) : 0;
-}
-
-static ssize_t btrfs_super_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t len)
-{
- struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
- super_kobj);
- struct btrfs_super_attr *a = container_of(attr,
- struct btrfs_super_attr,
- attr);
-
- return a->store ? a->store(fs, buf, len) : 0;
-}
-
-static ssize_t btrfs_root_attr_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct btrfs_root *root = container_of(kobj, struct btrfs_root,
- root_kobj);
- struct btrfs_root_attr *a = container_of(attr,
- struct btrfs_root_attr,
- attr);
-
- return a->show ? a->show(root, buf) : 0;
-}
-
-static ssize_t btrfs_root_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t len)
-{
- struct btrfs_root *root = container_of(kobj, struct btrfs_root,
- root_kobj);
- struct btrfs_root_attr *a = container_of(attr,
- struct btrfs_root_attr,
- attr);
- return a->store ? a->store(root, buf, len) : 0;
-}
-
-static void btrfs_super_release(struct kobject *kobj)
-{
- struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
- super_kobj);
- complete(&fs->kobj_unregister);
-}
-
-static void btrfs_root_release(struct kobject *kobj)
-{
- struct btrfs_root *root = container_of(kobj, struct btrfs_root,
- root_kobj);
- complete(&root->kobj_unregister);
-}
-
-static const struct sysfs_ops btrfs_super_attr_ops = {
- .show = btrfs_super_attr_show,
- .store = btrfs_super_attr_store,
-};
-
-static const struct sysfs_ops btrfs_root_attr_ops = {
- .show = btrfs_root_attr_show,
- .store = btrfs_root_attr_store,
-};
-
/* /sys/fs/btrfs/ entry */
static struct kset *btrfs_kset;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2b3590b9fe98..51dcec86757f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
* to make sure the old root from before we joined the transaction is deleted
* when the transaction commits
*/
-int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+static int record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (root->ref_cows && root->last_trans < trans->transid) {
WARN_ON(root == root->fs_info->extent_root);
WARN_ON(root->commit_root != root->node);
+ /*
+ * see below for in_trans_setup usage rules
+ * we have the reloc mutex held now, so there
+ * is only one writer in this function
+ */
+ root->in_trans_setup = 1;
+
+ /* make sure readers find in_trans_setup before
+ * they find our root->last_trans update
+ */
+ smp_wmb();
+
spin_lock(&root->fs_info->fs_roots_radix_lock);
if (root->last_trans == trans->transid) {
spin_unlock(&root->fs_info->fs_roots_radix_lock);
return 0;
}
- root->last_trans = trans->transid;
radix_tree_tag_set(&root->fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
spin_unlock(&root->fs_info->fs_roots_radix_lock);
+ root->last_trans = trans->transid;
+
+ /* this is pretty tricky. We don't want to
+ * take the relocation lock in btrfs_record_root_in_trans
+ * unless we're really doing the first setup for this root in
+ * this transaction.
+ *
+ * Normally we'd use root->last_trans as a flag to decide
+ * if we want to take the expensive mutex.
+ *
+ * But, we have to set root->last_trans before we
+ * init the relocation root, otherwise, we trip over warnings
+ * in ctree.c. The solution used here is to flag ourselves
+ * with root->in_trans_setup. When this is 1, we're still
+ * fixing up the reloc trees and everyone must wait.
+ *
+ * When this is zero, they can trust root->last_trans and fly
+ * through btrfs_record_root_in_trans without having to take the
+ * lock. smp_wmb() makes sure that all the writes above are
+ * done before we pop in the zero below
+ */
btrfs_init_reloc_root(trans, root);
+ smp_wmb();
+ root->in_trans_setup = 0;
}
return 0;
}
+
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ if (!root->ref_cows)
+ return 0;
+
+ /*
+ * see record_root_in_trans for comments about in_trans_setup usage
+ * and barriers
+ */
+ smp_rmb();
+ if (root->last_trans == trans->transid &&
+ !root->in_trans_setup)
+ return 0;
+
+ mutex_lock(&root->fs_info->reloc_mutex);
+ record_root_in_trans(trans, root);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+
+ return 0;
+}
+
/* wait for commit against the current transaction to become unblocked
* when this is done, it is safe to start a new transaction, but the current
* transaction might not be fully on disk.
@@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
parent = dget_parent(dentry);
parent_inode = parent->d_inode;
parent_root = BTRFS_I(parent_inode)->root;
- btrfs_record_root_in_trans(trans, parent_root);
+ record_root_in_trans(trans, parent_root);
/*
* insert the directory item
@@ -900,7 +957,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, parent_root, parent_inode);
BUG_ON(ret);
- btrfs_record_root_in_trans(trans, root);
+ /*
+ * pull in the delayed directory update
+ * and the delayed inode item
+ * otherwise we corrupt the FS during
+ * snapshot
+ */
+ ret = btrfs_run_delayed_items(trans, root);
+ BUG_ON(ret);
+
+ record_root_in_trans(trans, root);
btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
btrfs_check_and_init_root_item(new_root_item);
@@ -961,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
int ret;
list_for_each_entry(pending, head, list) {
- /*
- * We must deal with the delayed items before creating
- * snapshots, or we will create a snapthot with inconsistent
- * information.
- */
- ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
- BUG_ON(ret);
-
ret = create_pending_snapshot(trans, fs_info, pending);
BUG_ON(ret);
}
@@ -1241,21 +1299,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
schedule_timeout(1);
finish_wait(&cur_trans->writer_wait, &wait);
- spin_lock(&root->fs_info->trans_lock);
- root->fs_info->trans_no_join = 1;
- spin_unlock(&root->fs_info->trans_lock);
} while (atomic_read(&cur_trans->num_writers) > 1 ||
(should_grow && cur_trans->num_joined != joined));
- ret = create_pending_snapshots(trans, root->fs_info);
- BUG_ON(ret);
+ /*
+ * Ok now we need to make sure to block out any other joins while we
+ * commit the transaction. We could have started a join before setting
+ * no_join so make sure to wait for num_writers to == 1 again.
+ */
+ spin_lock(&root->fs_info->trans_lock);
+ root->fs_info->trans_no_join = 1;
+ spin_unlock(&root->fs_info->trans_lock);
+ wait_event(cur_trans->writer_wait,
+ atomic_read(&cur_trans->num_writers) == 1);
+
+ /*
+ * the reloc mutex makes sure that we stop
+ * the balancing code from coming in and moving
+ * extents around in the middle of the commit
+ */
+ mutex_lock(&root->fs_info->reloc_mutex);
ret = btrfs_run_delayed_items(trans, root);
BUG_ON(ret);
+ ret = create_pending_snapshots(trans, root->fs_info);
+ BUG_ON(ret);
+
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
BUG_ON(ret);
+ /*
+ * make sure none of the code above managed to slip in a
+ * delayed item
+ */
+ btrfs_assert_delayed_root_empty(root);
+
WARN_ON(cur_trans != trans->transaction);
btrfs_scrub_pause(root);
@@ -1312,6 +1391,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
root->fs_info->running_transaction = NULL;
root->fs_info->trans_no_join = 0;
spin_unlock(&root->fs_info->trans_lock);
+ mutex_unlock(&root->fs_info->reloc_mutex);
wake_up(&root->fs_info->transaction_wait);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 592396c6dc47..4ce8a9f41d1e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3177,7 +3177,7 @@ again:
tmp_key.offset = (u64)-1;
wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
- BUG_ON(!wc.replay_dest);
+ BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));
wc.replay_dest->log_root = log;
btrfs_record_root_in_trans(trans, wc.replay_dest);
diff --git a/fs/exec.c b/fs/exec.c
index 97e0d52d72fd..6075a1e727ae 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1996,7 +1996,7 @@ static void wait_for_dump_helpers(struct file *file)
* is a special value that we use to trap recursive
* core dumps
*/
-static int umh_pipe_setup(struct subprocess_info *info)
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
{
struct file *rp, *wp;
struct fdtable *fdt;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 3db5ba4568fc..b3cc8586984e 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -974,7 +974,7 @@ out_no_inode:
out_no_read:
printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
__func__, s->s_id, iso_blknum, block);
- goto out_freesbi;
+ goto out_freebh;
out_bad_zone_size:
printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n",
sbi->s_log_zone_size);
@@ -989,6 +989,7 @@ out_unknown_format:
out_freebh:
brelse(bh);
+ brelse(pri_bh);
out_freesbi:
kfree(opt.iocharset);
kfree(sbi);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index f67acbdda5e8..dffeb3795af1 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -61,7 +61,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
/*
* Called when the clock was set to cancel the timers in the cancel
- * list.
+ * list. This will wake up processes waiting on these timers. The
+ * wake-up requires ctx->ticks to be non zero, therefore we increment
+ * it before calling wake_up_locked().
*/
void timerfd_clock_was_set(void)
{
@@ -76,6 +78,7 @@ void timerfd_clock_was_set(void)
spin_lock_irqsave(&ctx->wqh.lock, flags);
if (ctx->moffs.tv64 != moffs.tv64) {
ctx->moffs.tv64 = KTIME_MAX;
+ ctx->ticks++;
wake_up_locked(&ctx->wqh);
}
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f4213ba1ff85..7f782af286bf 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -131,19 +131,34 @@ xfs_file_fsync(
{
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
int error = 0;
int log_flushed = 0;
trace_xfs_file_fsync(ip);
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ if (XFS_FORCED_SHUTDOWN(mp))
return -XFS_ERROR(EIO);
xfs_iflags_clear(ip, XFS_ITRUNCATED);
xfs_ioend_wait(ip);
+ if (mp->m_flags & XFS_MOUNT_BARRIER) {
+ /*
+ * If we have an RT and/or log subvolume we need to make sure
+ * to flush the write cache the device used for file data
+ * first. This is to ensure newly written file data make
+ * it to disk before logging the new inode size in case of
+ * an extending write.
+ */
+ if (XFS_IS_REALTIME_INODE(ip))
+ xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+ else if (mp->m_logdev_targp != mp->m_ddev_targp)
+ xfs_blkdev_issue_flush(mp->m_ddev_targp);
+ }
+
/*
* We always need to make sure that the required inode state is safe on
* disk. The inode might be clean but we still might need to force the
@@ -175,9 +190,9 @@ xfs_file_fsync(
* updates. The sync transaction will also force the log.
*/
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
error = xfs_trans_reserve(tp, 0,
- XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
+ XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
return -error;
@@ -209,28 +224,25 @@ xfs_file_fsync(
* force the log.
*/
if (xfs_ipincount(ip)) {
- error = _xfs_log_force_lsn(ip->i_mount,
+ error = _xfs_log_force_lsn(mp,
ip->i_itemp->ili_last_lsn,
XFS_LOG_SYNC, &log_flushed);
}
xfs_iunlock(ip, XFS_ILOCK_SHARED);
}
- if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
- /*
- * If the log write didn't issue an ordered tag we need
- * to flush the disk cache for the data device now.
- */
- if (!log_flushed)
- xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
-
- /*
- * If this inode is on the RT dev we need to flush that
- * cache as well.
- */
- if (XFS_IS_REALTIME_INODE(ip))
- xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
- }
+ /*
+ * If we only have a single device, and the log force about was
+ * a no-op we might have to flush the data device cache here.
+ * This can only happen for fdatasync/O_DSYNC if we were overwriting
+ * an already allocated file and thus do not have any metadata to
+ * commit.
+ */
+ if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
+ mp->m_logdev_targp == mp->m_ddev_targp &&
+ !XFS_IS_REALTIME_INODE(ip) &&
+ !log_flushed)
+ xfs_blkdev_issue_flush(mp->m_ddev_targp);
return -error;
}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index dd21784525a8..d44d92cd12b1 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -182,7 +182,7 @@ xfs_vn_mknod(
if (IS_POSIXACL(dir)) {
default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
if (IS_ERR(default_acl))
- return -PTR_ERR(default_acl);
+ return PTR_ERR(default_acl);
if (!default_acl)
mode &= ~current_umask();
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1e3a7ce804dc..a1a881e68a9a 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -627,68 +627,6 @@ xfs_blkdev_put(
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
-/*
- * Try to write out the superblock using barriers.
- */
-STATIC int
-xfs_barrier_test(
- xfs_mount_t *mp)
-{
- xfs_buf_t *sbp = xfs_getsb(mp, 0);
- int error;
-
- XFS_BUF_UNDONE(sbp);
- XFS_BUF_UNREAD(sbp);
- XFS_BUF_UNDELAYWRITE(sbp);
- XFS_BUF_WRITE(sbp);
- XFS_BUF_UNASYNC(sbp);
- XFS_BUF_ORDERED(sbp);
-
- xfsbdstrat(mp, sbp);
- error = xfs_buf_iowait(sbp);
-
- /*
- * Clear all the flags we set and possible error state in the
- * buffer. We only did the write to try out whether barriers
- * worked and shouldn't leave any traces in the superblock
- * buffer.
- */
- XFS_BUF_DONE(sbp);
- XFS_BUF_ERROR(sbp, 0);
- XFS_BUF_UNORDERED(sbp);
-
- xfs_buf_relse(sbp);
- return error;
-}
-
-STATIC void
-xfs_mountfs_check_barriers(xfs_mount_t *mp)
-{
- int error;
-
- if (mp->m_logdev_targp != mp->m_ddev_targp) {
- xfs_notice(mp,
- "Disabling barriers, not supported with external log device");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-
- if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
- xfs_notice(mp,
- "Disabling barriers, underlying device is readonly");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-
- error = xfs_barrier_test(mp);
- if (error) {
- xfs_notice(mp,
- "Disabling barriers, trial barrier write failed");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-}
-
void
xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg)
@@ -1240,14 +1178,6 @@ xfs_fs_remount(
switch (token) {
case Opt_barrier:
mp->m_flags |= XFS_MOUNT_BARRIER;
-
- /*
- * Test if barriers are actually working if we can,
- * else delay this check until the filesystem is
- * marked writeable.
- */
- if (!(mp->m_flags & XFS_MOUNT_RDONLY))
- xfs_mountfs_check_barriers(mp);
break;
case Opt_nobarrier:
mp->m_flags &= ~XFS_MOUNT_BARRIER;
@@ -1282,8 +1212,6 @@ xfs_fs_remount(
/* ro -> rw */
if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
mp->m_flags &= ~XFS_MOUNT_RDONLY;
- if (mp->m_flags & XFS_MOUNT_BARRIER)
- xfs_mountfs_check_barriers(mp);
/*
* If this is the first remount to writeable state we
@@ -1465,9 +1393,6 @@ xfs_fs_fill_super(
if (error)
goto out_free_sb;
- if (mp->m_flags & XFS_MOUNT_BARRIER)
- xfs_mountfs_check_barriers(mp);
-
error = xfs_filestream_mount(mp);
if (error)
goto out_free_sb;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 211930246f20..41d5b8f2bf92 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1372,8 +1372,17 @@ xlog_sync(xlog_t *log,
XFS_BUF_ASYNC(bp);
bp->b_flags |= XBF_LOG_BUFFER;
- if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
+ if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
+ /*
+ * If we have an external log device, flush the data device
+ * before flushing the log to make sure all meta data
+ * written back from the AIL actually made it to disk
+ * before writing out the new log tail LSN in the log buffer.
+ */
+ if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
+ xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
XFS_BUF_ORDERED(bp);
+ }
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);