11 files changed, 159 insertions, 51 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d47ce8307854..3458b5725540 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1284,6 +1284,8 @@ struct btrfs_root {
 #define BTRFS_INODE_DIRSYNC		(1 << 10)
 #define BTRFS_INODE_COMPRESS		(1 << 11)
 
+#define BTRFS_INODE_ROOT_ITEM_INIT	(1 << 31)
+
 /* some macros to generate set/get funcs for the struct fields.  This
  * assumes there is a lefoo_to_cpu for every type, so lets make a simple
  * one for u8:
@@ -2359,6 +2361,8 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
 int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
 int btrfs_set_root_node(struct btrfs_root_item *item,
 			struct extent_buffer *node);
+void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
+
 /* dir-item.c */
 int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, const char *name,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d7a7315bd031..8f1d44ba332f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1275,8 +1275,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
 	root->commit_root = btrfs_root_node(root);
 	BUG_ON(!root->node);
 out:
-	if (location->objectid != BTRFS_TREE_LOG_OBJECTID)
+	if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
 		root->ref_cows = 1;
+		btrfs_check_and_init_root_item(&root->root_item);
+	}
 
 	return root;
 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 656bc0a892b1..e621ea54a3fd 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -906,7 +906,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 	unsigned long last_index;
 	size_t num_written = 0;
 	int nrptrs;
-	int ret;
+	int ret = 0;
 
 	nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
 		     PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0037427d8a9d..f561c953205b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -24,6 +24,7 @@
 #include "free-space-cache.h"
 #include "transaction.h"
 #include "disk-io.h"
+#include "extent_io.h"
 
 #define BITS_PER_BITMAP		(PAGE_CACHE_SIZE * 8)
 #define MAX_CACHE_BYTES_PER_GIG	(32 * 1024)
@@ -81,6 +82,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
 		return ERR_PTR(-ENOENT);
 	}
 
+	inode->i_mapping->flags &= ~__GFP_FS;
+
 	spin_lock(&block_group->lock);
 	if (!root->fs_info->closing) {
 		block_group->inode = igrab(inode);
@@ -222,6 +225,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
 	u64 num_entries;
 	u64 num_bitmaps;
 	u64 generation;
+	u64 used = btrfs_block_group_used(&block_group->item);
 	u32 cur_crc = ~(u32)0;
 	pgoff_t index = 0;
 	unsigned long first_page_offset;
@@ -467,6 +471,17 @@ next:
 		index++;
 	}
 
+	spin_lock(&block_group->tree_lock);
+	if (block_group->free_space != (block_group->key.offset - used -
+					block_group->bytes_super)) {
+		spin_unlock(&block_group->tree_lock);
+		printk(KERN_ERR "block group %llu has an wrong amount of free "
+		       "space\n", block_group->key.objectid);
+		ret = 0;
+		goto free_cache;
+	}
+	spin_unlock(&block_group->tree_lock);
+
 	ret = 1;
 out:
 	kfree(checksums);
@@ -495,8 +510,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 	struct list_head *pos, *n;
 	struct page *page;
 	struct extent_state *cached_state = NULL;
+	struct btrfs_free_cluster *cluster = NULL;
+	struct extent_io_tree *unpin = NULL;
 	struct list_head bitmap_list;
 	struct btrfs_key key;
+	u64 start, end, len;
 	u64 bytes = 0;
 	u32 *crc, *checksums;
 	pgoff_t index = 0, last_index = 0;
@@ -505,6 +523,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 	int entries = 0;
 	int bitmaps = 0;
 	int ret = 0;
+	bool next_page = false;
 
 	root = root->fs_info->tree_root;
 
@@ -551,6 +570,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 	 */
 	first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
 
+	/* Get the cluster for this block_group if it exists */
+	if (!list_empty(&block_group->cluster_list))
+		cluster = list_entry(block_group->cluster_list.next,
+				     struct btrfs_free_cluster,
+				     block_group_list);
+
+	/*
+	 * We shouldn't have switched the pinned extents yet so this is the
+	 * right one
+	 */
+	unpin = root->fs_info->pinned_extents;
+
 	/*
 	 * Lock all pages first so we can lock the extent safely.
 	 *
@@ -580,6 +611,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
 			 0, &cached_state, GFP_NOFS);
 
+	/*
+	 * When searching for pinned extents, we need to start at our start
+	 * offset.
+	 */
+	start = block_group->key.objectid;
+
 	/* Write out the extent entries */
 	do {
 		struct btrfs_free_space_entry *entry;
@@ -587,6 +624,8 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 		unsigned long offset = 0;
 		unsigned long start_offset = 0;
 
+		next_page = false;
+
 		if (index == 0) {
 			start_offset = first_page_offset;
 			offset = start_offset;
@@ -598,7 +637,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 		entry = addr + start_offset;
 
 		memset(addr, 0, PAGE_CACHE_SIZE);
-		while (1) {
+		while (node && !next_page) {
 			struct btrfs_free_space *e;
 
 			e = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -614,12 +653,49 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 				entry->type = BTRFS_FREE_SPACE_EXTENT;
 			}
 			node = rb_next(node);
-			if (!node)
-				break;
+			if (!node && cluster) {
+				node = rb_first(&cluster->root);
+				cluster = NULL;
+			}
 			offset += sizeof(struct btrfs_free_space_entry);
 			if (offset + sizeof(struct btrfs_free_space_entry) >=
 			    PAGE_CACHE_SIZE)
+				next_page = true;
+			entry++;
+		}
+
+		/*
+		 * We want to add any pinned extents to our free space cache
+		 * so we don't leak the space
+		 */
+		while (!next_page && (start < block_group->key.objectid +
+				      block_group->key.offset)) {
+			ret = find_first_extent_bit(unpin, start, &start, &end,
+						    EXTENT_DIRTY);
+			if (ret) {
+				ret = 0;
+				break;
+			}
+
+			/* This pinned extent is out of our range */
+			if (start >= block_group->key.objectid +
+			    block_group->key.offset)
 				break;
+
+			len = block_group->key.objectid +
+				block_group->key.offset - start;
+			len = min(len, end + 1 - start);
+
+			entries++;
+			entry->offset = cpu_to_le64(start);
+			entry->bytes = cpu_to_le64(len);
+			entry->type = BTRFS_FREE_SPACE_EXTENT;
+
+			start = end + 1;
+			offset += sizeof(struct btrfs_free_space_entry);
+			if (offset + sizeof(struct btrfs_free_space_entry) >=
+			    PAGE_CACHE_SIZE)
+				next_page = true;
 			entry++;
 		}
 		*crc = ~(u32)0;
@@ -650,7 +726,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 		page_cache_release(page);
 
 		index++;
-	} while (node);
+	} while (node || next_page);
 
 	/* Write out the bitmaps */
 	list_for_each_safe(pos, n, &bitmap_list) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 93c28a1d6bdc..65413394daef 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -112,6 +112,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
 static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root, struct inode *inode,
 				u64 start, size_t size, size_t compressed_size,
+				int compress_type,
 				struct page **compressed_pages)
 {
 	struct btrfs_key key;
@@ -126,12 +127,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
 	size_t cur_size = size;
 	size_t datasize;
 	unsigned long offset;
-	int compress_type = BTRFS_COMPRESS_NONE;
 
-	if (compressed_size && compressed_pages) {
-		compress_type = root->fs_info->compress_type;
+	if (compressed_size && compressed_pages)
 		cur_size = compressed_size;
-	}
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -221,7 +219,7 @@ fail:
 static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
 				 struct inode *inode, u64 start, u64 end,
-				 size_t compressed_size,
+				 size_t compressed_size, int compress_type,
 				 struct page **compressed_pages)
 {
 	u64 isize = i_size_read(inode);
@@ -254,7 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
 		inline_len = min_t(u64, isize, actual_end);
 	ret = insert_inline_extent(trans, root, inode, start,
 				   inline_len, compressed_size,
-				   compressed_pages);
+				   compress_type, compressed_pages);
 	BUG_ON(ret);
 	btrfs_delalloc_release_metadata(inode, end + 1 - start);
 	btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
@@ -433,12 +431,13 @@ again:
 			 * to make an uncompressed inline extent.
 			 */
 			ret = cow_file_range_inline(trans, root, inode,
-						    start, end, 0, NULL);
+						    start, end, 0, 0, NULL);
 		} else {
 			/* try making a compressed inline extent */
 			ret = cow_file_range_inline(trans, root, inode,
 						    start, end,
-						    total_compressed, pages);
+						    total_compressed,
+						    compress_type, pages);
 		}
 		if (ret == 0) {
 			/*
@@ -792,7 +791,7 @@ static noinline int cow_file_range(struct inode *inode,
 	if (start == 0) {
 		/* lets try to make an inline extent */
 		ret = cow_file_range_inline(trans, root, inode,
-					    start, end, 0, NULL);
+					    start, end, 0, 0, NULL);
 		if (ret == 0) {
 			extent_clear_unlock_delalloc(inode,
 				     &BTRFS_I(inode)->io_tree,
@@ -2222,8 +2221,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 			insert = 1;
 #endif
 		insert = 1;
-	} else {
-		WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
 	}
 
 	if (!BTRFS_I(inode)->orphan_meta_reserved) {
@@ -2537,8 +2534,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
 	BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
 
 	alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
-	if (location.objectid == BTRFS_FREE_SPACE_OBJECTID)
-		inode->i_mapping->flags &= ~__GFP_FS;
 
 	/*
 	 * try to precache a NULL acl entry for files that don't have
@@ -6960,8 +6955,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 * should cover the worst case number of items we'll modify.
 	 */
 	trans = btrfs_start_transaction(root, 20);
-	if (IS_ERR(trans))
-		return PTR_ERR(trans);
+	if (IS_ERR(trans)) {
+                ret = PTR_ERR(trans);
+                goto out_notrans;
+        }
 
 	btrfs_set_trans_block_group(trans, new_dir);
 
@@ -7061,7 +7058,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 out_fail:
 	btrfs_end_transaction_throttle(trans, root);
-
+out_notrans:
 	if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
 		up_read(&root->fs_info->subvol_sem);
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7c07fe26b7cf..cfc264fefdb0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -373,6 +373,10 @@ static noinline int create_subvol(struct btrfs_root *root,
 	inode_item->nbytes = cpu_to_le64(root->leafsize);
 	inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
 
+	root_item.flags = 0;
+	root_item.byte_limit = 0;
+	inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT);
+
 	btrfs_set_root_bytenr(&root_item, leaf->start);
 	btrfs_set_root_generation(&root_item, trans->transid);
 	btrfs_set_root_level(&root_item, 0);
@@ -2436,8 +2440,10 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
 		return PTR_ERR(trans);
 	transid = trans->transid;
 	ret = btrfs_commit_transaction_async(trans, root, 0);
-	if (ret)
+	if (ret) {
+		btrfs_end_transaction(trans, root);
 		return ret;
+	}
 
 	if (argp)
 		if (copy_to_user(argp, &transid, sizeof(transid)))
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 29b2d7c930eb..6928bff62daa 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -473,3 +473,21 @@ again:
 	btrfs_free_path(path);
 	return 0;
 }
+
+/*
+ * Old btrfs forgets to init root_item->flags and root_item->byte_limit
+ * for subvolumes. To work around this problem, we steal a bit from
+ * root_item->inode_item->flags, and use it to indicate if those fields
+ * have been properly initialized.
+ */
+void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
+{
+	u64 inode_flags = le64_to_cpu(root_item->inode.flags);
+
+	if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) {
+		inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT;
+		root_item->inode.flags = cpu_to_le64(inode_flags);
+		root_item->flags = 0;
+		root_item->byte_limit = 0;
+	}
+}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2edfc039f098..58e7de9cc90c 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -644,6 +644,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct btrfs_root *root = btrfs_sb(vfs->mnt_sb);
 	struct btrfs_fs_info *info = root->fs_info;
+	char *compress_type;
 
 	if (btrfs_test_opt(root, DEGRADED))
 		seq_puts(seq, ",degraded");
@@ -662,8 +663,16 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	if (info->thread_pool_size !=  min_t(unsigned long,
 					     num_online_cpus() + 2, 8))
 		seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
-	if (btrfs_test_opt(root, COMPRESS))
-		seq_puts(seq, ",compress");
+	if (btrfs_test_opt(root, COMPRESS)) {
+		if (info->compress_type == BTRFS_COMPRESS_ZLIB)
+			compress_type = "zlib";
+		else
+			compress_type = "lzo";
+		if (btrfs_test_opt(root, FORCE_COMPRESS))
+			seq_printf(seq, ",compress-force=%s", compress_type);
+		else
+			seq_printf(seq, ",compress=%s", compress_type);
+	}
 	if (btrfs_test_opt(root, NOSSD))
 		seq_puts(seq, ",nossd");
 	if (btrfs_test_opt(root, SSD_SPREAD))
@@ -678,6 +687,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 		seq_puts(seq, ",discard");
 	if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
 		seq_puts(seq, ",noacl");
+	if (btrfs_test_opt(root, SPACE_CACHE))
+		seq_puts(seq, ",space_cache");
+	if (btrfs_test_opt(root, CLEAR_CACHE))
+		seq_puts(seq, ",clear_cache");
+	if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
+		seq_puts(seq, ",user_subvol_rm_allowed");
 	return 0;
 }
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ce48eb59d615..5b158da7e0bb 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -197,6 +197,7 @@ again:
 
 	ret = join_transaction(root);
 	if (ret < 0) {
+		kmem_cache_free(btrfs_trans_handle_cachep, h);
 		if (type != TRANS_JOIN_NOLOCK)
 			mutex_unlock(&root->fs_info->trans_mutex);
 		return ERR_PTR(ret);
@@ -975,6 +976,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	record_root_in_trans(trans, root);
 	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
 	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
+	btrfs_check_and_init_root_item(new_root_item);
 
 	root_flags = btrfs_root_flags(new_root_item);
 	if (pending->readonly)
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index a91b69a6a291..0348d0c8f65e 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -198,6 +198,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
 	idr_for_each(&group->inotify_data.idr, idr_callback, group);
 	idr_remove_all(&group->inotify_data.idr);
 	idr_destroy(&group->inotify_data.idr);
+	atomic_dec(&group->inotify_data.user->inotify_devs);
 	free_uid(group->inotify_data.user);
 }
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index bd46e7c8a0ef..8445fbc8985c 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -290,7 +290,6 @@ static int inotify_fasync(int fd, struct file *file, int on)
 static int inotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
-	struct user_struct *user = group->inotify_data.user;
 
 	pr_debug("%s: group=%p\n", __func__, group);
 
@@ -299,8 +298,6 @@ static int inotify_release(struct inode *ignored, struct file *file)
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
 	fsnotify_put_group(group);
 
-	atomic_dec(&user->inotify_devs);
-
 	return 0;
 }
 
@@ -697,7 +694,7 @@ retry:
 	return ret;
 }
 
-static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events)
+static struct fsnotify_group *inotify_new_group(unsigned int max_events)
 {
 	struct fsnotify_group *group;
 
@@ -710,8 +707,14 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
 	spin_lock_init(&group->inotify_data.idr_lock);
 	idr_init(&group->inotify_data.idr);
 	group->inotify_data.last_wd = 0;
-	group->inotify_data.user = user;
 	group->inotify_data.fa = NULL;
+	group->inotify_data.user = get_current_user();
+
+	if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
+	    inotify_max_user_instances) {
+		fsnotify_put_group(group);
+		return ERR_PTR(-EMFILE);
+	}
 
 	return group;
 }
@@ -721,7 +724,6 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
 SYSCALL_DEFINE1(inotify_init1, int, flags)
 {
 	struct fsnotify_group *group;
-	struct user_struct *user;
 	int ret;
 
 	/* Check the IN_* constants for consistency.  */
@@ -731,31 +733,16 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 	if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
 		return -EINVAL;
 
-	user = get_current_user();
-	if (unlikely(atomic_read(&user->inotify_devs) >=
-			inotify_max_user_instances)) {
-		ret = -EMFILE;
-		goto out_free_uid;
-	}
-
 	/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
-	group = inotify_new_group(user, inotify_max_queued_events);
-	if (IS_ERR(group)) {
-		ret = PTR_ERR(group);
-		goto out_free_uid;
-	}
-
-	atomic_inc(&user->inotify_devs);
+	group = inotify_new_group(inotify_max_queued_events);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
 
 	ret = anon_inode_getfd("inotify", &inotify_fops, group,
 				  O_RDONLY | flags);
-	if (ret >= 0)
-		return ret;
+	if (ret < 0)
+		fsnotify_put_group(group);
 
-	fsnotify_put_group(group);
-	atomic_dec(&user->inotify_devs);
-out_free_uid:
-	free_uid(user);
 	return ret;
 }