95 files changed, 2361 insertions, 1711 deletions
diff --git a/fs/befs/befs.h b/fs/befs/befs.h
index e0f59263a96d..c6bad51d8ec7 100644
--- a/fs/befs/befs.h
+++ b/fs/befs/befs.h
@@ -43,7 +43,10 @@ struct befs_sb_info {
 	u32 ag_shift;
 	u32 num_ags;
 
-	/* jornal log entry */
+	/* State of the superblock */
+	u32 flags;
+
+	/* Journal log entry */
 	befs_block_run log_blocks;
 	befs_off_t log_start;
 	befs_off_t log_end;
@@ -79,7 +82,7 @@ enum befs_err {
 	BEFS_BT_END,
 	BEFS_BT_EMPTY,
 	BEFS_BT_MATCH,
-	BEFS_BT_PARMATCH,
+	BEFS_BT_OVERFLOW,
 	BEFS_BT_NOT_FOUND
 };
 
@@ -140,18 +143,6 @@ befs_iaddrs_per_block(struct super_block *sb)
 	return BEFS_SB(sb)->block_size / sizeof (befs_disk_inode_addr);
 }
 
-static inline int
-befs_iaddr_is_empty(const befs_inode_addr *iaddr)
-{
-	return (!iaddr->allocation_group) && (!iaddr->start) && (!iaddr->len);
-}
-
-static inline size_t
-befs_brun_size(struct super_block *sb, befs_block_run run)
-{
-	return BEFS_SB(sb)->block_size * run.len;
-}
-
 #include "endian.h"
 
 #endif				/* _LINUX_BEFS_H */
diff --git a/fs/befs/btree.c b/fs/befs/btree.c
index 307645f9e284..7e135ea73fdd 100644
--- a/fs/befs/btree.c
+++ b/fs/befs/btree.c
@@ -85,7 +85,7 @@ struct befs_btree_node {
 };
 
 /* local constants */
-static const befs_off_t befs_bt_inval = 0xffffffffffffffffULL;
+static const befs_off_t BEFS_BT_INVAL = 0xffffffffffffffffULL;
 
 /* local functions */
 static int befs_btree_seekleaf(struct super_block *sb, const befs_data_stream *ds,
@@ -156,8 +156,6 @@ befs_bt_read_super(struct super_block *sb, const befs_data_stream *ds,
 	sup->max_depth = fs32_to_cpu(sb, od_sup->max_depth);
 	sup->data_type = fs32_to_cpu(sb, od_sup->data_type);
 	sup->root_node_ptr = fs64_to_cpu(sb, od_sup->root_node_ptr);
-	sup->free_node_ptr = fs64_to_cpu(sb, od_sup->free_node_ptr);
-	sup->max_size = fs64_to_cpu(sb, od_sup->max_size);
 
 	brelse(bh);
 	if (sup->magic != BEFS_BTREE_MAGIC) {
@@ -183,8 +181,8 @@ befs_bt_read_super(struct super_block *sb, const befs_data_stream *ds,
  * Calls befs_read_datastream to read in the indicated btree node and
  * makes sure its header fields are in cpu byteorder, byteswapping if
  * necessary.
- * Note: node->bh must be NULL when this function called first
- * time. Don't forget brelse(node->bh) after last call.
+ * Note: node->bh must be NULL when this function is called the first time.
+ * Don't forget brelse(node->bh) after last call.
  *
  * On success, returns BEFS_OK and *@node contains the btree node that
  * starts at @node_off, with the node->head fields in cpu byte order.
@@ -244,7 +242,7 @@ befs_bt_read_node(struct super_block *sb, const befs_data_stream *ds,
  *   Read the superblock and rootnode of the b+tree.
  *   Drill down through the interior nodes using befs_find_key().
  *   Once at the correct leaf node, use befs_find_key() again to get the
- *   actuall value stored with the key.
+ *   actual value stored with the key.
  */
 int
 befs_btree_find(struct super_block *sb, const befs_data_stream *ds,
@@ -283,9 +281,9 @@ befs_btree_find(struct super_block *sb, const befs_data_stream *ds,
 
 	while (!befs_leafnode(this_node)) {
 		res = befs_find_key(sb, this_node, key, &node_off);
-		if (res == BEFS_BT_NOT_FOUND)
+		/* if no key set, try the overflow node */
+		if (res == BEFS_BT_OVERFLOW)
 			node_off = this_node->head.overflow;
-		/* if no match, go to overflow node */
 		if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) {
 			befs_error(sb, "befs_btree_find() failed to read "
 				   "node at %llu", node_off);
@@ -293,15 +291,15 @@ befs_btree_find(struct super_block *sb, const befs_data_stream *ds,
 		}
 	}
 
-	/* at the correct leaf node now */
-
+	/* at a leaf node now, check if it is correct */
 	res = befs_find_key(sb, this_node, key, value);
 
 	brelse(this_node->bh);
 	kfree(this_node);
 
 	if (res != BEFS_BT_MATCH) {
-		befs_debug(sb, "<--- %s Key %s not found", __func__, key);
+		befs_error(sb, "<--- %s Key %s not found", __func__, key);
+		befs_debug(sb, "<--- %s ERROR", __func__);
 		*value = 0;
 		return BEFS_BT_NOT_FOUND;
 	}
@@ -324,16 +322,12 @@ befs_btree_find(struct super_block *sb, const befs_data_stream *ds,
  * @findkey: Keystring to search for
  * @value: If key is found, the value stored with the key is put here
  *
- * finds exact match if one exists, and returns BEFS_BT_MATCH
- * If no exact match, finds first key in node that is greater
- * (alphabetically) than the search key and returns BEFS_BT_PARMATCH
- * (for partial match, I guess). Can you think of something better to
- * call it?
- *
- * If no key was a match or greater than the search key, return
- * BEFS_BT_NOT_FOUND.
+ * Finds exact match if one exists, and returns BEFS_BT_MATCH.
+ * If there is no match and node's value array is too small for key, return
+ * BEFS_BT_OVERFLOW.
+ * If no match and node should countain this key, return BEFS_BT_NOT_FOUND.
  *
- * Use binary search instead of a linear.
+ * Uses binary search instead of a linear.
  */
 static int
 befs_find_key(struct super_block *sb, struct befs_btree_node *node,
@@ -348,18 +342,16 @@ befs_find_key(struct super_block *sb, struct befs_btree_node *node,
 
 	befs_debug(sb, "---> %s %s", __func__, findkey);
 
-	*value = 0;
-
 	findkey_len = strlen(findkey);
 
-	/* if node can not contain key, just skeep this node */
+	/* if node can not contain key, just skip this node */
 	last = node->head.all_key_count - 1;
 	thiskey = befs_bt_get_key(sb, node, last, &keylen);
 
 	eq = befs_compare_strings(thiskey, keylen, findkey, findkey_len);
 	if (eq < 0) {
-		befs_debug(sb, "<--- %s %s not found", __func__, findkey);
-		return BEFS_BT_NOT_FOUND;
+		befs_debug(sb, "<--- node can't contain %s", findkey);
+		return BEFS_BT_OVERFLOW;
 	}
 
 	valarray = befs_bt_valarray(node);
@@ -387,12 +379,15 @@ befs_find_key(struct super_block *sb, struct befs_btree_node *node,
 		else
 			first = mid + 1;
 	}
+
+	/* return an existing value so caller can arrive to a leaf node */
 	if (eq < 0)
 		*value = fs64_to_cpu(sb, valarray[mid + 1]);
 	else
 		*value = fs64_to_cpu(sb, valarray[mid]);
-	befs_debug(sb, "<--- %s found %s at %d", __func__, thiskey, mid);
-	return BEFS_BT_PARMATCH;
+	befs_error(sb, "<--- %s %s not found", __func__, findkey);
+	befs_debug(sb, "<--- %s ERROR", __func__);
+	return BEFS_BT_NOT_FOUND;
 }
 
 /**
@@ -405,7 +400,7 @@ befs_find_key(struct super_block *sb, struct befs_btree_node *node,
  * @keysize: Length of the returned key
  * @value: Value stored with the returned key
  *
- * Heres how it works: Key_no is the index of the key/value pair to 
+ * Here's how it works: Key_no is the index of the key/value pair to
  * return in keybuf/value.
  * Bufsize is the size of keybuf (BEFS_NAME_LEN+1 is a good size). Keysize is 
  * the number of characters in the key (just a convenience).
@@ -422,7 +417,7 @@ befs_btree_read(struct super_block *sb, const befs_data_stream *ds,
 {
 	struct befs_btree_node *this_node;
 	befs_btree_super bt_super;
-	befs_off_t node_off = 0;
+	befs_off_t node_off;
 	int cur_key;
 	fs64 *valarray;
 	char *keystart;
@@ -467,7 +462,7 @@ befs_btree_read(struct super_block *sb, const befs_data_stream *ds,
 	while (key_sum + this_node->head.all_key_count <= key_no) {
 
 		/* no more nodes to look in: key_no is too large */
-		if (this_node->head.right == befs_bt_inval) {
+		if (this_node->head.right == BEFS_BT_INVAL) {
 			*keysize = 0;
 			*value = 0;
 			befs_debug(sb,
@@ -541,7 +536,6 @@ befs_btree_read(struct super_block *sb, const befs_data_stream *ds,
  * @node_off: Pointer to offset of current node within datastream. Modified
  * 		by the function.
  *
- *
  * Helper function for btree traverse. Moves the current position to the 
  * start of the first leaf node.
  *
@@ -608,7 +602,7 @@ static int
 befs_leafnode(struct befs_btree_node *node)
 {
 	/* all interior nodes (and only interior nodes) have an overflow node */
-	if (node->head.overflow == befs_bt_inval)
+	if (node->head.overflow == BEFS_BT_INVAL)
 		return 1;
 	else
 		return 0;
@@ -715,7 +709,7 @@ befs_bt_get_key(struct super_block *sb, struct befs_btree_node *node,
  *
  * Returns 0 if @key1 and @key2 are equal.
  * Returns >0 if @key1 is greater.
- * Returns <0 if @key2 is greater..
+ * Returns <0 if @key2 is greater.
  */
 static int
 befs_compare_strings(const void *key1, int keylen1,
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index af1bc19b7c85..b4c7ba013c0d 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -22,22 +22,22 @@ const befs_inode_addr BAD_IADDR = { 0, 0, 0 };
 
 static int befs_find_brun_direct(struct super_block *sb,
 				 const befs_data_stream *data,
-				 befs_blocknr_t blockno, befs_block_run * run);
+				 befs_blocknr_t blockno, befs_block_run *run);
 
 static int befs_find_brun_indirect(struct super_block *sb,
 				   const befs_data_stream *data,
 				   befs_blocknr_t blockno,
-				   befs_block_run * run);
+				   befs_block_run *run);
 
 static int befs_find_brun_dblindirect(struct super_block *sb,
 				      const befs_data_stream *data,
 				      befs_blocknr_t blockno,
-				      befs_block_run * run);
+				      befs_block_run *run);
 
 /**
  * befs_read_datastream - get buffer_head containing data, starting from pos.
  * @sb: Filesystem superblock
- * @ds: datastrem to find data with
+ * @ds: datastream to find data with
  * @pos: start of data
  * @off: offset of data in buffer_head->b_data
  *
@@ -46,7 +46,7 @@ static int befs_find_brun_dblindirect(struct super_block *sb,
  */
 struct buffer_head *
 befs_read_datastream(struct super_block *sb, const befs_data_stream *ds,
-		     befs_off_t pos, uint * off)
+		     befs_off_t pos, uint *off)
 {
 	struct buffer_head *bh;
 	befs_block_run run;
@@ -75,7 +75,13 @@ befs_read_datastream(struct super_block *sb, const befs_data_stream *ds,
 	return bh;
 }
 
-/*
+/**
+ * befs_fblock2brun - give back block run for fblock
+ * @sb: the superblock
+ * @data: datastream to read from
+ * @fblock: the blocknumber with the file position to find
+ * @run: The found run is passed back through this pointer
+ *
  * Takes a file position and gives back a brun who's starting block
  * is block number fblock of the file.
  * 
@@ -88,7 +94,7 @@ befs_read_datastream(struct super_block *sb, const befs_data_stream *ds,
  */
 int
 befs_fblock2brun(struct super_block *sb, const befs_data_stream *data,
-		 befs_blocknr_t fblock, befs_block_run * run)
+		 befs_blocknr_t fblock, befs_block_run *run)
 {
 	int err;
 	befs_off_t pos = fblock << BEFS_SB(sb)->block_shift;
@@ -115,7 +121,7 @@ befs_fblock2brun(struct super_block *sb, const befs_data_stream *data,
 /**
  * befs_read_lsmylink - read long symlink from datastream.
  * @sb: Filesystem superblock 
- * @ds: Datastrem to read from
+ * @ds: Datastream to read from
  * @buff: Buffer in which to place long symlink data
  * @len: Length of the long symlink in bytes
  *
@@ -128,6 +134,7 @@ befs_read_lsymlink(struct super_block *sb, const befs_data_stream *ds,
 	befs_off_t bytes_read = 0;	/* bytes readed */
 	u16 plen;
 	struct buffer_head *bh;
+
 	befs_debug(sb, "---> %s length: %llu", __func__, len);
 
 	while (bytes_read < len) {
@@ -183,13 +190,13 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds)
 		metablocks += ds->indirect.len;
 
 	/*
-	   Double indir block, plus all the indirect blocks it mapps
-	   In the double-indirect range, all block runs of data are
-	   BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know 
-	   how many data block runs are in the double-indirect region,
-	   and from that we know how many indirect blocks it takes to
-	   map them. We assume that the indirect blocks are also
-	   BEFS_DBLINDIR_BRUN_LEN blocks long.
+	 * Double indir block, plus all the indirect blocks it maps.
+	 * In the double-indirect range, all block runs of data are
+	 * BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know
+	 * how many data block runs are in the double-indirect region,
+	 * and from that we know how many indirect blocks it takes to
+	 * map them. We assume that the indirect blocks are also
+	 * BEFS_DBLINDIR_BRUN_LEN blocks long.
 	 */
 	if (ds->size > ds->max_indirect_range && ds->max_indirect_range != 0) {
 		uint dbl_bytes;
@@ -212,58 +219,50 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds)
 	return blocks;
 }
 
-/*
-	Finds the block run that starts at file block number blockno
-	in the file represented by the datastream data, if that 
-	blockno is in the direct region of the datastream.
-	
-	sb: the superblock
-	data: the datastream
-	blockno: the blocknumber to find
-	run: The found run is passed back through this pointer
-	
-	Return value is BEFS_OK if the blockrun is found, BEFS_ERR
-	otherwise.
-	
-	Algorithm:
-	Linear search. Checks each element of array[] to see if it
-	contains the blockno-th filesystem block. This is necessary
-	because the block runs map variable amounts of data. Simply
-	keeps a count of the number of blocks searched so far (sum),
-	incrementing this by the length of each block run as we come
-	across it. Adds sum to *count before returning (this is so
-	you can search multiple arrays that are logicaly one array,
-	as in the indirect region code).
-	
-	When/if blockno is found, if blockno is inside of a block 
-	run as stored on disk, we offset the start and length members
-	of the block run, so that blockno is the start and len is
-	still valid (the run ends in the same place).
-	
-	2001-11-15 Will Dyson
-*/
+/**
+ * befs_find_brun_direct - find a direct block run in the datastream
+ * @sb: the superblock
+ * @data: the datastream
+ * @blockno: the blocknumber to find
+ * @run: The found run is passed back through this pointer
+ *
+ * Finds the block run that starts at file block number blockno
+ * in the file represented by the datastream data, if that
+ * blockno is in the direct region of the datastream.
+ *
+ * Return value is BEFS_OK if the blockrun is found, BEFS_ERR
+ * otherwise.
+ *
+ * Algorithm:
+ * Linear search. Checks each element of array[] to see if it
+ * contains the blockno-th filesystem block. This is necessary
+ * because the block runs map variable amounts of data. Simply
+ * keeps a count of the number of blocks searched so far (sum),
+ * incrementing this by the length of each block run as we come
+ * across it. Adds sum to *count before returning (this is so
+ * you can search multiple arrays that are logicaly one array,
+ * as in the indirect region code).
+ *
+ * When/if blockno is found, if blockno is inside of a block
+ * run as stored on disk, we offset the start and length members
+ * of the block run, so that blockno is the start and len is
+ * still valid (the run ends in the same place).
+ */
 static int
 befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data,
-		      befs_blocknr_t blockno, befs_block_run * run)
+		      befs_blocknr_t blockno, befs_block_run *run)
 {
 	int i;
 	const befs_block_run *array = data->direct;
 	befs_blocknr_t sum;
-	befs_blocknr_t max_block =
-	    data->max_direct_range >> BEFS_SB(sb)->block_shift;
 
 	befs_debug(sb, "---> %s, find %lu", __func__, (unsigned long)blockno);
 
-	if (blockno > max_block) {
-		befs_error(sb, "%s passed block outside of direct region",
-			   __func__);
-		return BEFS_ERR;
-	}
-
 	for (i = 0, sum = 0; i < BEFS_NUM_DIRECT_BLOCKS;
 	     sum += array[i].len, i++) {
 		if (blockno >= sum && blockno < sum + (array[i].len)) {
 			int offset = blockno - sum;
+
 			run->allocation_group = array[i].allocation_group;
 			run->start = array[i].start + offset;
 			run->len = array[i].len - offset;
@@ -275,38 +274,39 @@ befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data,
 		}
 	}
 
+	befs_error(sb, "%s failed to find file block %lu", __func__,
+		   (unsigned long)blockno);
 	befs_debug(sb, "---> %s ERROR", __func__);
 	return BEFS_ERR;
 }
 
-/*
-	Finds the block run that starts at file block number blockno
-	in the file represented by the datastream data, if that 
-	blockno is in the indirect region of the datastream.
-	
-	sb: the superblock
-	data: the datastream
-	blockno: the blocknumber to find
-	run: The found run is passed back through this pointer
-	
-	Return value is BEFS_OK if the blockrun is found, BEFS_ERR
-	otherwise.
-	
-	Algorithm:
-	For each block in the indirect run of the datastream, read
-	it in and search through it for	search_blk.
-	
-	XXX:
-	Really should check to make sure blockno is inside indirect
-	region.
-	
-	2001-11-15 Will Dyson
-*/
+/**
+ * befs_find_brun_indirect - find a block run in the datastream
+ * @sb: the superblock
+ * @data: the datastream
+ * @blockno: the blocknumber to find
+ * @run: The found run is passed back through this pointer
+ *
+ * Finds the block run that starts at file block number blockno
+ * in the file represented by the datastream data, if that
+ * blockno is in the indirect region of the datastream.
+ *
+ * Return value is BEFS_OK if the blockrun is found, BEFS_ERR
+ * otherwise.
+ *
+ * Algorithm:
+ * For each block in the indirect run of the datastream, read
+ * it in and search through it for search_blk.
+ *
+ * XXX:
+ * Really should check to make sure blockno is inside indirect
+ * region.
+ */
 static int
 befs_find_brun_indirect(struct super_block *sb,
 			const befs_data_stream *data,
 			befs_blocknr_t blockno,
-			befs_block_run * run)
+			befs_block_run *run)
 {
 	int i, j;
 	befs_blocknr_t sum = 0;
@@ -326,11 +326,12 @@ befs_find_brun_indirect(struct super_block *sb,
 
 	/* Examine blocks of the indirect run one at a time */
 	for (i = 0; i < indirect.len; i++) {
-		indirblock = befs_bread(sb, indirblockno + i);
+		indirblock = sb_bread(sb, indirblockno + i);
 		if (indirblock == NULL) {
-			befs_debug(sb, "---> %s failed to read "
+			befs_error(sb, "---> %s failed to read "
 				   "disk block %lu from the indirect brun",
 				   __func__, (unsigned long)indirblockno + i);
+			befs_debug(sb, "<--- %s ERROR", __func__);
 			return BEFS_ERR;
 		}
 
@@ -370,52 +371,51 @@ befs_find_brun_indirect(struct super_block *sb,
 	return BEFS_ERR;
 }
 
-/*
-	Finds the block run that starts at file block number blockno
-	in the file represented by the datastream data, if that 
-	blockno is in the double-indirect region of the datastream.
-	
-	sb: the superblock
-	data: the datastream
-	blockno: the blocknumber to find
-	run: The found run is passed back through this pointer
-	
-	Return value is BEFS_OK if the blockrun is found, BEFS_ERR
-	otherwise.
-	
-	Algorithm:
-	The block runs in the double-indirect region are different.
-	They are always allocated 4 fs blocks at a time, so each
-	block run maps a constant amount of file data. This means
-	that we can directly calculate how many block runs into the
-	double-indirect region we need to go to get to the one that
-	maps a particular filesystem block.
-	
-	We do this in two stages. First we calculate which of the
-	inode addresses in the double-indirect block will point us
-	to the indirect block that contains the mapping for the data,
-	then we calculate which of the inode addresses in that 
-	indirect block maps the data block we are after.
-	
-	Oh, and once we've done that, we actually read in the blocks 
-	that contain the inode addresses we calculated above. Even 
-	though the double-indirect run may be several blocks long, 
-	we can calculate which of those blocks will contain the index
-	we are after and only read that one. We then follow it to 
-	the indirect block and perform a  similar process to find
-	the actual block run that maps the data block we are interested
-	in.
-	
-	Then we offset the run as in befs_find_brun_array() and we are 
-	done.
-	
-	2001-11-15 Will Dyson
-*/
+/**
+ * befs_find_brun_dblindirect - find a block run in the datastream
+ * @sb: the superblock
+ * @data: the datastream
+ * @blockno: the blocknumber to find
+ * @run: The found run is passed back through this pointer
+ *
+ * Finds the block run that starts at file block number blockno
+ * in the file represented by the datastream data, if that
+ * blockno is in the double-indirect region of the datastream.
+ *
+ * Return value is BEFS_OK if the blockrun is found, BEFS_ERR
+ * otherwise.
+ *
+ * Algorithm:
+ * The block runs in the double-indirect region are different.
+ * They are always allocated 4 fs blocks at a time, so each
+ * block run maps a constant amount of file data. This means
+ * that we can directly calculate how many block runs into the
+ * double-indirect region we need to go to get to the one that
+ * maps a particular filesystem block.
+ *
+ * We do this in two stages. First we calculate which of the
+ * inode addresses in the double-indirect block will point us
+ * to the indirect block that contains the mapping for the data,
+ * then we calculate which of the inode addresses in that
+ * indirect block maps the data block we are after.
+ *
+ * Oh, and once we've done that, we actually read in the blocks
+ * that contain the inode addresses we calculated above. Even
+ * though the double-indirect run may be several blocks long,
+ * we can calculate which of those blocks will contain the index
+ * we are after and only read that one. We then follow it to
+ * the indirect block and perform a similar process to find
+ * the actual block run that maps the data block we are interested
+ * in.
+ *
+ * Then we offset the run as in befs_find_brun_array() and we are
+ * done.
+ */
 static int
 befs_find_brun_dblindirect(struct super_block *sb,
 			   const befs_data_stream *data,
 			   befs_blocknr_t blockno,
-			   befs_block_run * run)
+			   befs_block_run *run)
 {
 	int dblindir_indx;
 	int indir_indx;
@@ -430,10 +430,9 @@ befs_find_brun_dblindirect(struct super_block *sb,
 	struct buffer_head *indir_block;
 	befs_block_run indir_run;
 	befs_disk_inode_addr *iaddr_array;
-	struct befs_sb_info *befs_sb = BEFS_SB(sb);
 
 	befs_blocknr_t indir_start_blk =
-	    data->max_indirect_range >> befs_sb->block_shift;
+	    data->max_indirect_range >> BEFS_SB(sb)->block_shift;
 
 	off_t dbl_indir_off = blockno - indir_start_blk;
 
@@ -471,7 +470,7 @@ befs_find_brun_dblindirect(struct super_block *sb,
 	}
 
 	dbl_indir_block =
-	    befs_bread(sb, iaddr2blockno(sb, &data->double_indirect) +
+	    sb_bread(sb, iaddr2blockno(sb, &data->double_indirect) +
 					dbl_which_block);
 	if (dbl_indir_block == NULL) {
 		befs_error(sb, "%s couldn't read the "
@@ -479,7 +478,6 @@ befs_find_brun_dblindirect(struct super_block *sb,
 			   (unsigned long)
 			   iaddr2blockno(sb, &data->double_indirect) +
 			   dbl_which_block);
-		brelse(dbl_indir_block);
 		return BEFS_ERR;
 	}
 
@@ -499,12 +497,11 @@ befs_find_brun_dblindirect(struct super_block *sb,
 	}
 
 	indir_block =
-	    befs_bread(sb, iaddr2blockno(sb, &indir_run) + which_block);
+	    sb_bread(sb, iaddr2blockno(sb, &indir_run) + which_block);
 	if (indir_block == NULL) {
 		befs_error(sb, "%s couldn't read the indirect block "
 			   "at blockno %lu", __func__, (unsigned long)
 			   iaddr2blockno(sb, &indir_run) + which_block);
-		brelse(indir_block);
 		return BEFS_ERR;
 	}
 
diff --git a/fs/befs/debug.c b/fs/befs/debug.c
index 4de7cffcd662..85c13392e9e8 100644
--- a/fs/befs/debug.c
+++ b/fs/befs/debug.c
@@ -169,6 +169,7 @@ befs_dump_super_block(const struct super_block *sb, befs_super_block * sup)
 
 	befs_debug(sb, "  num_blocks %llu", fs64_to_cpu(sb, sup->num_blocks));
 	befs_debug(sb, "  used_blocks %llu", fs64_to_cpu(sb, sup->used_blocks));
+	befs_debug(sb, "  inode_size %u", fs32_to_cpu(sb, sup->inode_size));
 
 	befs_debug(sb, "  magic2 %08x", fs32_to_cpu(sb, sup->magic2));
 	befs_debug(sb, "  blocks_per_ag %u",
diff --git a/fs/befs/io.c b/fs/befs/io.c
index 523c8af2d770..b4a558126ee1 100644
--- a/fs/befs/io.c
+++ b/fs/befs/io.c
@@ -27,7 +27,7 @@ struct buffer_head *
 befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr)
 {
 	struct buffer_head *bh;
-	befs_blocknr_t block = 0;
+	befs_blocknr_t block;
 	struct befs_sb_info *befs_sb = BEFS_SB(sb);
 
 	befs_debug(sb, "---> Enter %s "
@@ -59,27 +59,3 @@ befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr)
 	befs_debug(sb, "<--- %s ERROR", __func__);
 	return NULL;
 }
-
-struct buffer_head *
-befs_bread(struct super_block *sb, befs_blocknr_t block)
-{
-	struct buffer_head *bh;
-
-	befs_debug(sb, "---> Enter %s %lu", __func__, (unsigned long)block);
-
-	bh = sb_bread(sb, block);
-
-	if (bh == NULL) {
-		befs_error(sb, "Failed to read block %lu",
-			   (unsigned long)block);
-		goto error;
-	}
-
-	befs_debug(sb, "<--- %s", __func__);
-
-	return bh;
-
-      error:
-	befs_debug(sb, "<--- %s ERROR", __func__);
-	return NULL;
-}
diff --git a/fs/befs/io.h b/fs/befs/io.h
index 9b78266b6aa5..78d7bc6e60de 100644
--- a/fs/befs/io.h
+++ b/fs/befs/io.h
@@ -5,5 +5,3 @@
 struct buffer_head *befs_bread_iaddr(struct super_block *sb,
 				     befs_inode_addr iaddr);
 
-struct buffer_head *befs_bread(struct super_block *sb, befs_blocknr_t block);
-
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index bfe9f9994935..647a276eba56 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -120,7 +120,7 @@ befs_get_block(struct inode *inode, sector_t block,
 	struct super_block *sb = inode->i_sb;
 	befs_data_stream *ds = &BEFS_I(inode)->i_data.ds;
 	befs_block_run run = BAD_IADDR;
-	int res = 0;
+	int res;
 	ulong disk_off;
 
 	befs_debug(sb, "---> befs_get_block() for inode %lu, block %ld",
@@ -179,15 +179,16 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 		kfree(utfname);
 
 	} else {
-		ret = befs_btree_find(sb, ds, dentry->d_name.name, &offset);
+		ret = befs_btree_find(sb, ds, name, &offset);
 	}
 
 	if (ret == BEFS_BT_NOT_FOUND) {
 		befs_debug(sb, "<--- %s %pd not found", __func__, dentry);
+		d_add(dentry, NULL);
 		return ERR_PTR(-ENOENT);
 
 	} else if (ret != BEFS_OK || offset == 0) {
-		befs_warning(sb, "<--- %s Error", __func__);
+		befs_error(sb, "<--- %s Error", __func__);
 		return ERR_PTR(-ENODATA);
 	}
 
@@ -211,56 +212,55 @@ befs_readdir(struct file *file, struct dir_context *ctx)
 	befs_off_t value;
 	int result;
 	size_t keysize;
-	unsigned char d_type;
 	char keybuf[BEFS_NAME_LEN + 1];
 
 	befs_debug(sb, "---> %s name %pD, inode %ld, ctx->pos %lld",
 		  __func__, file, inode->i_ino, ctx->pos);
 
-more:
-	result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1,
-				 keybuf, &keysize, &value);
+	while (1) {
+		result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1,
+					 keybuf, &keysize, &value);
 
-	if (result == BEFS_ERR) {
-		befs_debug(sb, "<--- %s ERROR", __func__);
-		befs_error(sb, "IO error reading %pD (inode %lu)",
-			   file, inode->i_ino);
-		return -EIO;
-
-	} else if (result == BEFS_BT_END) {
-		befs_debug(sb, "<--- %s END", __func__);
-		return 0;
-
-	} else if (result == BEFS_BT_EMPTY) {
-		befs_debug(sb, "<--- %s Empty directory", __func__);
-		return 0;
-	}
+		if (result == BEFS_ERR) {
+			befs_debug(sb, "<--- %s ERROR", __func__);
+			befs_error(sb, "IO error reading %pD (inode %lu)",
+				   file, inode->i_ino);
+			return -EIO;
 
-	d_type = DT_UNKNOWN;
+		} else if (result == BEFS_BT_END) {
+			befs_debug(sb, "<--- %s END", __func__);
+			return 0;
 
-	/* Convert to NLS */
-	if (BEFS_SB(sb)->nls) {
-		char *nlsname;
-		int nlsnamelen;
-		result =
-		    befs_utf2nls(sb, keybuf, keysize, &nlsname, &nlsnamelen);
-		if (result < 0) {
-			befs_debug(sb, "<--- %s ERROR", __func__);
-			return result;
+		} else if (result == BEFS_BT_EMPTY) {
+			befs_debug(sb, "<--- %s Empty directory", __func__);
+			return 0;
 		}
-		if (!dir_emit(ctx, nlsname, nlsnamelen,
-				 (ino_t) value, d_type)) {
+
+		/* Convert to NLS */
+		if (BEFS_SB(sb)->nls) {
+			char *nlsname;
+			int nlsnamelen;
+
+			result =
+			    befs_utf2nls(sb, keybuf, keysize, &nlsname,
+					 &nlsnamelen);
+			if (result < 0) {
+				befs_debug(sb, "<--- %s ERROR", __func__);
+				return result;
+			}
+			if (!dir_emit(ctx, nlsname, nlsnamelen,
+				      (ino_t) value, DT_UNKNOWN)) {
+				kfree(nlsname);
+				return 0;
+			}
 			kfree(nlsname);
-			return 0;
+		} else {
+			if (!dir_emit(ctx, keybuf, keysize,
+				      (ino_t) value, DT_UNKNOWN))
+				return 0;
 		}
-		kfree(nlsname);
-	} else {
-		if (!dir_emit(ctx, keybuf, keysize,
-				 (ino_t) value, d_type))
-			return 0;
+		ctx->pos++;
 	}
-	ctx->pos++;
-	goto more;
 }
 
 static struct inode *
@@ -299,7 +299,6 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
 	struct befs_sb_info *befs_sb = BEFS_SB(sb);
 	struct befs_inode_info *befs_ino;
 	struct inode *inode;
-	long ret = -EIO;
 
 	befs_debug(sb, "---> %s inode = %lu", __func__, ino);
 
@@ -318,7 +317,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
 		   befs_ino->i_inode_num.allocation_group,
 		   befs_ino->i_inode_num.start, befs_ino->i_inode_num.len);
 
-	bh = befs_bread(sb, inode->i_ino);
+	bh = sb_bread(sb, inode->i_ino);
 	if (!bh) {
 		befs_error(sb, "unable to read inode block - "
 			   "inode = %lu", inode->i_ino);
@@ -421,7 +420,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
       unacquire_none:
 	iget_failed(inode);
 	befs_debug(sb, "<--- %s - Bad inode", __func__);
-	return ERR_PTR(ret);
+	return ERR_PTR(-EIO);
 }
 
 /* Initialize the inode cache. Called at fs setup.
@@ -436,10 +435,9 @@ befs_init_inodecache(void)
 					      0, (SLAB_RECLAIM_ACCOUNT|
 						SLAB_MEM_SPREAD|SLAB_ACCOUNT),
 					      init_once);
-	if (befs_inode_cachep == NULL) {
-		pr_err("%s: Couldn't initialize inode slabcache\n", __func__);
+	if (befs_inode_cachep == NULL)
 		return -ENOMEM;
-	}
+
 	return 0;
 }
 
@@ -524,8 +522,6 @@ befs_utf2nls(struct super_block *sb, const char *in,
 
 	*out = result = kmalloc(maxlen, GFP_NOFS);
 	if (!*out) {
-		befs_error(sb, "%s cannot allocate memory", __func__);
-		*out_len = 0;
 		return -ENOMEM;
 	}
 
@@ -604,7 +600,6 @@ befs_nls2utf(struct super_block *sb, const char *in,
 
 	*out = result = kmalloc(maxlen, GFP_NOFS);
 	if (!*out) {
-		befs_error(sb, "%s cannot allocate memory", __func__);
 		*out_len = 0;
 		return -ENOMEM;
 	}
@@ -637,10 +632,6 @@ befs_nls2utf(struct super_block *sb, const char *in,
 	return -EILSEQ;
 }
 
-/**
- * Use the
- *
- */
 enum {
 	Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err,
 };
@@ -760,19 +751,19 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 	long ret = -EINVAL;
 	const unsigned long sb_block = 0;
 	const off_t x86_sb_off = 512;
+	int blocksize;
 
 	save_mount_options(sb, data);
 
 	sb->s_fs_info = kzalloc(sizeof(*befs_sb), GFP_KERNEL);
-	if (sb->s_fs_info == NULL) {
-		pr_err("(%s): Unable to allocate memory for private "
-		       "portion of superblock. Bailing.\n", sb->s_id);
+	if (sb->s_fs_info == NULL)
 		goto unacquire_none;
-	}
+
 	befs_sb = BEFS_SB(sb);
 
 	if (!parse_options((char *) data, &befs_sb->mount_opts)) {
-		befs_error(sb, "cannot parse mount options");
+		if (!silent)
+			befs_error(sb, "cannot parse mount options");
 		goto unacquire_priv_sbp;
 	}
 
@@ -793,10 +784,16 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 	 * least 1k to get the second 512 bytes of the volume.
 	 * -WD 10-26-01
 	 */ 
-	sb_min_blocksize(sb, 1024);
+	blocksize = sb_min_blocksize(sb, 1024);
+	if (!blocksize) {
+		if (!silent)
+			befs_error(sb, "unable to set blocksize");
+		goto unacquire_priv_sbp;
+	}
 
 	if (!(bh = sb_bread(sb, sb_block))) {
-		befs_error(sb, "unable to read superblock");
+		if (!silent)
+			befs_error(sb, "unable to read superblock");
 		goto unacquire_priv_sbp;
 	}
 
@@ -820,9 +817,9 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 	brelse(bh);
 
 	if( befs_sb->num_blocks > ~((sector_t)0) ) {
-		befs_error(sb, "blocks count: %llu "
-			"is larger than the host can use",
-			befs_sb->num_blocks);
+		if (!silent)
+			befs_error(sb, "blocks count: %llu is larger than the host can use",
+					befs_sb->num_blocks);
 		goto unacquire_priv_sbp;
 	}
 
@@ -841,7 +838,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 	}
 	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
-		befs_error(sb, "get root inode failed");
+		if (!silent)
+			befs_error(sb, "get root inode failed");
 		goto unacquire_priv_sbp;
 	}
 
@@ -870,9 +868,9 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
       unacquire_priv_sbp:
 	kfree(befs_sb->mount_opts.iocharset);
 	kfree(sb->s_fs_info);
+	sb->s_fs_info = NULL;
 
       unacquire_none:
-	sb->s_fs_info = NULL;
 	return ret;
 }
 
diff --git a/fs/befs/super.c b/fs/befs/super.c
index aeafc4d84278..7c50025c99d8 100644
--- a/fs/befs/super.c
+++ b/fs/befs/super.c
@@ -13,24 +13,20 @@
 #include "befs.h"
 #include "super.h"
 
-/**
- * load_befs_sb -- Read from disk and properly byteswap all the fields
+/*
+ * befs_load_sb -- Read from disk and properly byteswap all the fields
  * of the befs superblock
- *
- *
- *
- *
  */
 int
-befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
+befs_load_sb(struct super_block *sb, befs_super_block *disk_sb)
 {
 	struct befs_sb_info *befs_sb = BEFS_SB(sb);
 
 	/* Check the byte order of the filesystem */
 	if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_LE)
-	    befs_sb->byte_order = BEFS_BYTESEX_LE;
+		befs_sb->byte_order = BEFS_BYTESEX_LE;
 	else if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_BE)
-	    befs_sb->byte_order = BEFS_BYTESEX_BE;
+		befs_sb->byte_order = BEFS_BYTESEX_BE;
 
 	befs_sb->magic1 = fs32_to_cpu(sb, disk_sb->magic1);
 	befs_sb->magic2 = fs32_to_cpu(sb, disk_sb->magic2);
@@ -45,6 +41,8 @@ befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
 	befs_sb->ag_shift = fs32_to_cpu(sb, disk_sb->ag_shift);
 	befs_sb->num_ags = fs32_to_cpu(sb, disk_sb->num_ags);
 
+	befs_sb->flags = fs32_to_cpu(sb, disk_sb->flags);
+
 	befs_sb->log_blocks = fsrun_to_cpu(sb, disk_sb->log_blocks);
 	befs_sb->log_start = fs64_to_cpu(sb, disk_sb->log_start);
 	befs_sb->log_end = fs64_to_cpu(sb, disk_sb->log_end);
@@ -84,15 +82,15 @@ befs_check_sb(struct super_block *sb)
 	}
 
 	if (befs_sb->block_size > PAGE_SIZE) {
-		befs_error(sb, "blocksize(%u) cannot be larger"
+		befs_error(sb, "blocksize(%u) cannot be larger "
 			   "than system pagesize(%lu)", befs_sb->block_size,
 			   PAGE_SIZE);
 		return BEFS_ERR;
 	}
 
 	/*
-	   * block_shift and block_size encode the same information
-	   * in different ways as a consistency check.
+	 * block_shift and block_size encode the same information
+	 * in different ways as a consistency check.
 	 */
 
 	if ((1 << befs_sb->block_shift) != befs_sb->block_size) {
@@ -101,10 +99,18 @@ befs_check_sb(struct super_block *sb)
 		return BEFS_ERR;
 	}
 
-	if (befs_sb->log_start != befs_sb->log_end) {
+
+	/* ag_shift also encodes the same information as blocks_per_ag in a
+	 * different way, non-fatal consistency check
+	 */
+	if ((1 << befs_sb->ag_shift) != befs_sb->blocks_per_ag)
+		befs_error(sb, "ag_shift disagrees with blocks_per_ag.");
+
+	if (befs_sb->log_start != befs_sb->log_end ||
+	    befs_sb->flags == BEFS_DIRTY) {
 		befs_error(sb, "Filesystem not clean! There are blocks in the "
-			   "journal. You must boot into BeOS and mount this volume "
-			   "to make it clean.");
+			   "journal. You must boot into BeOS and mount this "
+			   "volume to make it clean.");
 		return BEFS_ERR;
 	}
 
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index ccc70d96958d..d4d8b7e36b2f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -698,7 +698,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 			ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
 			if (ret) {
-				bio->bi_error = ret;
+				comp_bio->bi_error = ret;
 				bio_endio(comp_bio);
 			}
 
@@ -728,7 +728,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 	ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
 	if (ret) {
-		bio->bi_error = ret;
+		comp_bio->bi_error = ret;
 		bio_endio(comp_bio);
 	}
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6c21bad26a27..0b8ce2b9f7d0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -252,7 +252,8 @@ struct btrfs_super_block {
 #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR		0ULL
 
 #define BTRFS_FEATURE_COMPAT_RO_SUPP			\
-	(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)
+	(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |	\
+	 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
 
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET	0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR	0ULL
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e720d3e6ec20..3a57f99d96aa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2586,6 +2586,7 @@ int open_ctree(struct super_block *sb,
 	int num_backups_tried = 0;
 	int backup_index = 0;
 	int max_active;
+	int clear_free_space_tree = 0;
 
 	tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
 	chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
@@ -3148,6 +3149,26 @@ retry_root_backup:
 	if (sb->s_flags & MS_RDONLY)
 		return 0;
 
+	if (btrfs_test_opt(fs_info, CLEAR_CACHE) &&
+	    btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+		clear_free_space_tree = 1;
+	} else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+		   !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) {
+		btrfs_warn(fs_info, "free space tree is invalid");
+		clear_free_space_tree = 1;
+	}
+
+	if (clear_free_space_tree) {
+		btrfs_info(fs_info, "clearing free space tree");
+		ret = btrfs_clear_free_space_tree(fs_info);
+		if (ret) {
+			btrfs_warn(fs_info,
+				   "failed to clear free space tree: %d", ret);
+			close_ctree(tree_root);
+			return ret;
+		}
+	}
+
 	if (btrfs_test_opt(tree_root->fs_info, FREE_SPACE_TREE) &&
 	    !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
 		btrfs_info(fs_info, "creating free space tree");
@@ -3185,18 +3206,6 @@ retry_root_backup:
 
 	btrfs_qgroup_rescan_resume(fs_info);
 
-	if (btrfs_test_opt(tree_root->fs_info, CLEAR_CACHE) &&
-	    btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
-		btrfs_info(fs_info, "clearing free space tree");
-		ret = btrfs_clear_free_space_tree(fs_info);
-		if (ret) {
-			btrfs_warn(fs_info,
-				"failed to clear free space tree: %d", ret);
-			close_ctree(tree_root);
-			return ret;
-		}
-	}
-
 	if (!fs_info->uuid_root) {
 		btrfs_info(fs_info, "creating UUID tree");
 		ret = btrfs_create_uuid_tree(fs_info);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ee40384c394d..66a755150056 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5558,17 +5558,45 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
 	}
 }
 
-/*
- * The extent buffer bitmap operations are done with byte granularity because
- * bitmap items are not guaranteed to be aligned to a word and therefore a
- * single word in a bitmap may straddle two pages in the extent buffer.
- */
-#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
-#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
-#define BITMAP_FIRST_BYTE_MASK(start) \
-	((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
-#define BITMAP_LAST_BYTE_MASK(nbits) \
-	(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+void le_bitmap_set(u8 *map, unsigned int start, int len)
+{
+	u8 *p = map + BIT_BYTE(start);
+	const unsigned int size = start + len;
+	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
+	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
+
+	while (len - bits_to_set >= 0) {
+		*p |= mask_to_set;
+		len -= bits_to_set;
+		bits_to_set = BITS_PER_BYTE;
+		mask_to_set = ~(u8)0;
+		p++;
+	}
+	if (len) {
+		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+		*p |= mask_to_set;
+	}
+}
+
+void le_bitmap_clear(u8 *map, unsigned int start, int len)
+{
+	u8 *p = map + BIT_BYTE(start);
+	const unsigned int size = start + len;
+	int bits_to_clear = BITS_PER_BYTE - (start % BITS_PER_BYTE);
+	u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(start);
+
+	while (len - bits_to_clear >= 0) {
+		*p &= ~mask_to_clear;
+		len -= bits_to_clear;
+		bits_to_clear = BITS_PER_BYTE;
+		mask_to_clear = ~(u8)0;
+		p++;
+	}
+	if (len) {
+		mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
+		*p &= ~mask_to_clear;
+	}
+}
 
 /*
  * eb_bitmap_offset() - calculate the page and offset of the byte containing the
@@ -5612,7 +5640,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
 int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
 			   unsigned long nr)
 {
-	char *kaddr;
+	u8 *kaddr;
 	struct page *page;
 	unsigned long i;
 	size_t offset;
@@ -5634,13 +5662,13 @@ int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
 void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
 			      unsigned long pos, unsigned long len)
 {
-	char *kaddr;
+	u8 *kaddr;
 	struct page *page;
 	unsigned long i;
 	size_t offset;
 	const unsigned int size = pos + len;
 	int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
-	unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
+	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
 
 	eb_bitmap_offset(eb, start, pos, &i, &offset);
 	page = eb->pages[i];
@@ -5651,7 +5679,7 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
 		kaddr[offset] |= mask_to_set;
 		len -= bits_to_set;
 		bits_to_set = BITS_PER_BYTE;
-		mask_to_set = ~0U;
+		mask_to_set = ~(u8)0;
 		if (++offset >= PAGE_SIZE && len > 0) {
 			offset = 0;
 			page = eb->pages[++i];
@@ -5676,13 +5704,13 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
 void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
 				unsigned long pos, unsigned long len)
 {
-	char *kaddr;
+	u8 *kaddr;
 	struct page *page;
 	unsigned long i;
 	size_t offset;
 	const unsigned int size = pos + len;
 	int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
-	unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
+	u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
 
 	eb_bitmap_offset(eb, start, pos, &i, &offset);
 	page = eb->pages[i];
@@ -5693,7 +5721,7 @@ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
 		kaddr[offset] &= ~mask_to_clear;
 		len -= bits_to_clear;
 		bits_to_clear = BITS_PER_BYTE;
-		mask_to_clear = ~0U;
+		mask_to_clear = ~(u8)0;
 		if (++offset >= PAGE_SIZE && len > 0) {
 			offset = 0;
 			page = eb->pages[++i];
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4a094f1dc7ef..ab31d145227e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -59,6 +59,28 @@
  */
 #define EXTENT_PAGE_PRIVATE 1
 
+/*
+ * The extent buffer bitmap operations are done with byte granularity instead of
+ * word granularity for two reasons:
+ * 1. The bitmaps must be little-endian on disk.
+ * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a
+ *    single word in a bitmap may straddle two pages in the extent buffer.
+ */
+#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
+#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
+#define BITMAP_FIRST_BYTE_MASK(start) \
+	((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
+#define BITMAP_LAST_BYTE_MASK(nbits) \
+	(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+
+static inline int le_test_bit(int nr, const u8 *addr)
+{
+	return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
+}
+
+extern void le_bitmap_set(u8 *map, unsigned int start, int len);
+extern void le_bitmap_clear(u8 *map, unsigned int start, int len);
+
 struct extent_state;
 struct btrfs_root;
 struct btrfs_io_bio;
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index e4a42a8e4f84..57401b474ec6 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -151,7 +151,7 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
 	return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
 }
 
-static unsigned long *alloc_bitmap(u32 bitmap_size)
+static u8 *alloc_bitmap(u32 bitmap_size)
 {
 	void *mem;
 
@@ -180,8 +180,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 	struct btrfs_free_space_info *info;
 	struct btrfs_key key, found_key;
 	struct extent_buffer *leaf;
-	unsigned long *bitmap;
-	char *bitmap_cursor;
+	u8 *bitmap, *bitmap_cursor;
 	u64 start, end;
 	u64 bitmap_range, i;
 	u32 bitmap_size, flags, expected_extent_count;
@@ -231,7 +230,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 						block_group->sectorsize);
 				last = div_u64(found_key.objectid + found_key.offset - start,
 					       block_group->sectorsize);
-				bitmap_set(bitmap, first, last - first);
+				le_bitmap_set(bitmap, first, last - first);
 
 				extent_count++;
 				nr++;
@@ -270,7 +269,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 		goto out;
 	}
 
-	bitmap_cursor = (char *)bitmap;
+	bitmap_cursor = bitmap;
 	bitmap_range = block_group->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
 	i = start;
 	while (i < end) {
@@ -319,7 +318,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 	struct btrfs_free_space_info *info;
 	struct btrfs_key key, found_key;
 	struct extent_buffer *leaf;
-	unsigned long *bitmap;
+	u8 *bitmap;
 	u64 start, end;
 	/* Initialize to silence GCC. */
 	u64 extent_start = 0;
@@ -363,7 +362,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 				break;
 			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
 				unsigned long ptr;
-				char *bitmap_cursor;
+				u8 *bitmap_cursor;
 				u32 bitmap_pos, data_size;
 
 				ASSERT(found_key.objectid >= start);
@@ -373,7 +372,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 				bitmap_pos = div_u64(found_key.objectid - start,
 						     block_group->sectorsize *
 						     BITS_PER_BYTE);
-				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
+				bitmap_cursor = bitmap + bitmap_pos;
 				data_size = free_space_bitmap_size(found_key.offset,
 								   block_group->sectorsize);
 
@@ -410,7 +409,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 	offset = start;
 	bitnr = 0;
 	while (offset < end) {
-		bit = !!test_bit(bitnr, bitmap);
+		bit = !!le_test_bit(bitnr, bitmap);
 		if (prev_bit == 0 && bit == 1) {
 			extent_start = offset;
 		} else if (prev_bit == 1 && bit == 0) {
@@ -1185,6 +1184,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
 	}
 
 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
 
 	ret = btrfs_commit_transaction(trans, tree_root);
@@ -1253,6 +1253,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
 		return PTR_ERR(trans);
 
 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
 	fs_info->free_space_root = NULL;
 
 	ret = clear_free_space_tree(trans, free_space_root);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 01bc36cec26e..71261b459863 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5805,6 +5805,64 @@ static int changed_extent(struct send_ctx *sctx,
 	int ret = 0;
 
 	if (sctx->cur_ino != sctx->cmp_key->objectid) {
+
+		if (result == BTRFS_COMPARE_TREE_CHANGED) {
+			struct extent_buffer *leaf_l;
+			struct extent_buffer *leaf_r;
+			struct btrfs_file_extent_item *ei_l;
+			struct btrfs_file_extent_item *ei_r;
+
+			leaf_l = sctx->left_path->nodes[0];
+			leaf_r = sctx->right_path->nodes[0];
+			ei_l = btrfs_item_ptr(leaf_l,
+					      sctx->left_path->slots[0],
+					      struct btrfs_file_extent_item);
+			ei_r = btrfs_item_ptr(leaf_r,
+					      sctx->right_path->slots[0],
+					      struct btrfs_file_extent_item);
+
+			/*
+			 * We may have found an extent item that has changed
+			 * only its disk_bytenr field and the corresponding
+			 * inode item was not updated. This case happens due to
+			 * very specific timings during relocation when a leaf
+			 * that contains file extent items is COWed while
+			 * relocation is ongoing and its in the stage where it
+			 * updates data pointers. So when this happens we can
+			 * safely ignore it since we know it's the same extent,
+			 * but just at different logical and physical locations
+			 * (when an extent is fully replaced with a new one, we
+			 * know the generation number must have changed too,
+			 * since snapshot creation implies committing the current
+			 * transaction, and the inode item must have been updated
+			 * as well).
+			 * This replacement of the disk_bytenr happens at
+			 * relocation.c:replace_file_extents() through
+			 * relocation.c:btrfs_reloc_cow_block().
+			 */
+			if (btrfs_file_extent_generation(leaf_l, ei_l) ==
+			    btrfs_file_extent_generation(leaf_r, ei_r) &&
+			    btrfs_file_extent_ram_bytes(leaf_l, ei_l) ==
+			    btrfs_file_extent_ram_bytes(leaf_r, ei_r) &&
+			    btrfs_file_extent_compression(leaf_l, ei_l) ==
+			    btrfs_file_extent_compression(leaf_r, ei_r) &&
+			    btrfs_file_extent_encryption(leaf_l, ei_l) ==
+			    btrfs_file_extent_encryption(leaf_r, ei_r) &&
+			    btrfs_file_extent_other_encoding(leaf_l, ei_l) ==
+			    btrfs_file_extent_other_encoding(leaf_r, ei_r) &&
+			    btrfs_file_extent_type(leaf_l, ei_l) ==
+			    btrfs_file_extent_type(leaf_r, ei_r) &&
+			    btrfs_file_extent_disk_bytenr(leaf_l, ei_l) !=
+			    btrfs_file_extent_disk_bytenr(leaf_r, ei_r) &&
+			    btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) ==
+			    btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) &&
+			    btrfs_file_extent_offset(leaf_l, ei_l) ==
+			    btrfs_file_extent_offset(leaf_r, ei_r) &&
+			    btrfs_file_extent_num_bytes(leaf_l, ei_l) ==
+			    btrfs_file_extent_num_bytes(leaf_r, ei_r))
+				return 0;
+		}
+
 		inconsistent_snapshot_error(sctx, result, "extent");
 		return -EIO;
 	}
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index d19ab0317283..caad80bb9bd0 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -273,20 +273,37 @@ out:
 	return ret;
 }
 
-/**
- * test_bit_in_byte - Determine whether a bit is set in a byte
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static inline int test_bit_in_byte(int nr, const u8 *addr)
+static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb,
+			   unsigned long len)
 {
-	return 1UL & (addr[nr / BITS_PER_BYTE] >> (nr & (BITS_PER_BYTE - 1)));
+	unsigned long i;
+
+	for (i = 0; i < len * BITS_PER_BYTE; i++) {
+		int bit, bit1;
+
+		bit = !!test_bit(i, bitmap);
+		bit1 = !!extent_buffer_test_bit(eb, 0, i);
+		if (bit1 != bit) {
+			test_msg("Bits do not match\n");
+			return -EINVAL;
+		}
+
+		bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
+						i % BITS_PER_BYTE);
+		if (bit1 != bit) {
+			test_msg("Offset bits do not match\n");
+			return -EINVAL;
+		}
+	}
+	return 0;
 }
 
 static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 			     unsigned long len)
 {
-	unsigned long i, x;
+	unsigned long i, j;
+	u32 x;
+	int ret;
 
 	memset(bitmap, 0, len);
 	memset_extent_buffer(eb, 0, 0, len);
@@ -297,16 +314,18 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 
 	bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
 	extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
-	if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+	ret = check_eb_bitmap(bitmap, eb, len);
+	if (ret) {
 		test_msg("Setting all bits failed\n");
-		return -EINVAL;
+		return ret;
 	}
 
 	bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
 	extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
-	if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+	ret = check_eb_bitmap(bitmap, eb, len);
+	if (ret) {
 		test_msg("Clearing all bits failed\n");
-		return -EINVAL;
+		return ret;
 	}
 
 	/* Straddling pages test */
@@ -316,9 +335,10 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 			sizeof(long) * BITS_PER_BYTE);
 		extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
 					sizeof(long) * BITS_PER_BYTE);
-		if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+		ret = check_eb_bitmap(bitmap, eb, len);
+		if (ret) {
 			test_msg("Setting straddling pages failed\n");
-			return -EINVAL;
+			return ret;
 		}
 
 		bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
@@ -328,9 +348,10 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 		extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
 		extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
 					sizeof(long) * BITS_PER_BYTE);
-		if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+		ret = check_eb_bitmap(bitmap, eb, len);
+		if (ret) {
 			test_msg("Clearing straddling pages failed\n");
-			return -EINVAL;
+			return ret;
 		}
 	}
 
@@ -339,28 +360,22 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 	 * something repetitive that could miss some hypothetical off-by-n bug.
 	 */
 	x = 0;
-	for (i = 0; i < len / sizeof(long); i++) {
-		x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffUL;
-		bitmap[i] = x;
-	}
-	write_extent_buffer(eb, bitmap, 0, len);
-
-	for (i = 0; i < len * BITS_PER_BYTE; i++) {
-		int bit, bit1;
-
-		bit = !!test_bit_in_byte(i, (u8 *)bitmap);
-		bit1 = !!extent_buffer_test_bit(eb, 0, i);
-		if (bit1 != bit) {
-			test_msg("Testing bit pattern failed\n");
-			return -EINVAL;
+	bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
+	extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
+	for (i = 0; i < len * BITS_PER_BYTE / 32; i++) {
+		x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffU;
+		for (j = 0; j < 32; j++) {
+			if (x & (1U << j)) {
+				bitmap_set(bitmap, i * 32 + j, 1);
+				extent_buffer_bitmap_set(eb, 0, i * 32 + j, 1);
+			}
 		}
+	}
 
-		bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
-						i % BITS_PER_BYTE);
-		if (bit1 != bit) {
-			test_msg("Testing bit pattern with offset failed\n");
-			return -EINVAL;
-		}
+	ret = check_eb_bitmap(bitmap, eb, len);
+	if (ret) {
+		test_msg("Random bit pattern failed\n");
+		return ret;
 	}
 
 	return 0;
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index 7508d3b42780..6e144048a72e 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -24,20 +24,15 @@
 #include "../transaction.h"
 
 struct free_space_extent {
-	u64 start, length;
+	u64 start;
+	u64 length;
 };
 
-/*
- * The test cases align their operations to this in order to hit some of the
- * edge cases in the bitmap code.
- */
-#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE)
-
 static int __check_free_space_extents(struct btrfs_trans_handle *trans,
 				      struct btrfs_fs_info *fs_info,
 				      struct btrfs_block_group_cache *cache,
 				      struct btrfs_path *path,
-				      struct free_space_extent *extents,
+				      const struct free_space_extent * const extents,
 				      unsigned int num_extents)
 {
 	struct btrfs_free_space_info *info;
@@ -126,7 +121,7 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
 				    struct btrfs_fs_info *fs_info,
 				    struct btrfs_block_group_cache *cache,
 				    struct btrfs_path *path,
-				    struct free_space_extent *extents,
+				    const struct free_space_extent * const extents,
 				    unsigned int num_extents)
 {
 	struct btrfs_free_space_info *info;
@@ -168,9 +163,10 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
 static int test_empty_block_group(struct btrfs_trans_handle *trans,
 				  struct btrfs_fs_info *fs_info,
 				  struct btrfs_block_group_cache *cache,
-				  struct btrfs_path *path)
+				  struct btrfs_path *path,
+				  u32 alignment)
 {
-	struct free_space_extent extents[] = {
+	const struct free_space_extent extents[] = {
 		{cache->key.objectid, cache->key.offset},
 	};
 
@@ -181,9 +177,10 @@ static int test_empty_block_group(struct btrfs_trans_handle *trans,
 static int test_remove_all(struct btrfs_trans_handle *trans,
 			   struct btrfs_fs_info *fs_info,
 			   struct btrfs_block_group_cache *cache,
-			   struct btrfs_path *path)
+			   struct btrfs_path *path,
+			   u32 alignment)
 {
-	struct free_space_extent extents[] = {};
+	const struct free_space_extent extents[] = {};
 	int ret;
 
 	ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
@@ -201,16 +198,17 @@ static int test_remove_all(struct btrfs_trans_handle *trans,
 static int test_remove_beginning(struct btrfs_trans_handle *trans,
 				 struct btrfs_fs_info *fs_info,
 				 struct btrfs_block_group_cache *cache,
-				 struct btrfs_path *path)
+				 struct btrfs_path *path,
+				 u32 alignment)
 {
-	struct free_space_extent extents[] = {
-		{cache->key.objectid + BITMAP_RANGE,
-			cache->key.offset - BITMAP_RANGE},
+	const struct free_space_extent extents[] = {
+		{cache->key.objectid + alignment,
+			cache->key.offset - alignment},
 	};
 	int ret;
 
 	ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
-					    cache->key.objectid, BITMAP_RANGE);
+					    cache->key.objectid, alignment);
 	if (ret) {
 		test_msg("Could not remove free space\n");
 		return ret;
@@ -224,17 +222,18 @@ static int test_remove_beginning(struct btrfs_trans_handle *trans,
 static int test_remove_end(struct btrfs_trans_handle *trans,
 			   struct btrfs_fs_info *fs_info,
 			   struct btrfs_block_group_cache *cache,
-			   struct btrfs_path *path)
+			   struct btrfs_path *path,
+			   u32 alignment)
 {
-	struct free_space_extent extents[] = {
-		{cache->key.objectid, cache->key.offset - BITMAP_RANGE},
+	const struct free_space_extent extents[] = {
+		{cache->key.objectid, cache->key.offset - alignment},
 	};
 	int ret;
 
 	ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
 					    cache->key.objectid +
-					    cache->key.offset - BITMAP_RANGE,
-					    BITMAP_RANGE);
+					    cache->key.offset - alignment,
+					    alignment);
 	if (ret) {
 		test_msg("Could not remove free space\n");
 		return ret;
@@ -247,18 +246,19 @@ static int test_remove_end(struct btrfs_trans_handle *trans,
 static int test_remove_middle(struct btrfs_trans_handle *trans,
 			      struct btrfs_fs_info *fs_info,
 			      struct btrfs_block_group_cache *cache,
-			      struct btrfs_path *path)
+			      struct btrfs_path *path,
+			      u32 alignment)
 {
-	struct free_space_extent extents[] = {
-		{cache->key.objectid, BITMAP_RANGE},
-		{cache->key.objectid + 2 * BITMAP_RANGE,
-			cache->key.offset - 2 * BITMAP_RANGE},
+	const struct free_space_extent extents[] = {
+		{cache->key.objectid, alignment},
+		{cache->key.objectid + 2 * alignment,
+			cache->key.offset - 2 * alignment},
 	};
 	int ret;
 
 	ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
-					    cache->key.objectid + BITMAP_RANGE,
-					    BITMAP_RANGE);
+					    cache->key.objectid + alignment,
+					    alignment);
 	if (ret) {
 		test_msg("Could not remove free space\n");
 		return ret;
@@ -271,10 +271,11 @@ static int test_remove_middle(struct btrfs_trans_handle *trans,
 static int test_merge_left(struct btrfs_trans_handle *trans,
 			   struct btrfs_fs_info *fs_info,
 			   struct btrfs_block_group_cache *cache,
-			   struct btrfs_path *path)
+			   struct btrfs_path *path,
+			   u32 alignment)
 {
-	struct free_space_extent extents[] = {
-		{cache->key.objectid, 2 * BITMAP_RANGE},
+	const struct free_space_extent extents[] = {
+		{cache->key.objectid, 2 * alignment},
 	};
 	int ret;
 
@@ -287,15 +288,15 @@ static int test_merge_left(struct btrfs_trans_handle *trans,
 	}
 
 	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-				       cache->key.objectid, BITMAP_RANGE);
+				       cache->key.objectid, alignment);
 	if (ret) {
 		test_msg("Could not add free space\n");
 		return ret;
 	}
 
 	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-				       cache->key.objectid + BITMAP_RANGE,
-				       BITMAP_RANGE);
+				       cache->key.objectid + alignment,
+				       alignment);
 	if (ret) {
 		test_msg("Could not add free space\n");
 		return ret;
@@ -308,10 +309,11 @@ static int test_merge_left(struct btrfs_trans_handle *trans,
 static int test_merge_right(struct btrfs_trans_handle *trans,
 			   struct btrfs_fs_info *fs_info,
 			   struct btrfs_block_group_cache *cache,
-			   struct btrfs_path *path)
+			   struct btrfs_path *path,
+			   u32 alignment)
 {
-	struct free_space_extent extents[] = {
-		{cache->key.objectid + BITMAP_RANGE, 2 * BITMAP_RANGE},
+	const struct free_space_extent extents[] = {
+		{cache->key.objectid + alignment, 2 * alignment},
 	};
 	int ret;
 
@@ -324,16 +326,16 @@ static int test_merge_right(struct btrfs_trans_handle *trans,
 	}
 
 	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-				       cache->key.objectid + 2 * BITMAP_RANGE,
-				       BITMAP_RANGE);
+				       cache->key.objectid + 2 * alignment,
+				       alignment);
 	if (ret) {
 		test_msg("Could not add free space\n");
 		return ret;
 	}
 
 	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-				       cache->key.objectid + BITMAP_RANGE,
-				       BITMAP_RANGE);
+				       cache->key.objectid + alignment,
+				       alignment);
 	if (ret) {
 		test_msg("Could not add free space\n");
 		return ret;
@@ -346,10 +348,11 @@ static int test_merge_right(struct btrfs_trans_handle *trans,
 static int test_merge_both(struct btrfs_trans_handle *trans,
 			   struct btrfs_fs_info *fs_info,
 			   struct btrfs_block_group_cache *cache,
-			   struct btrfs_path *path)
+			   struct btrfs_path *path,
+			   u32 alignment)
 {
-	struct free_space_extent extents[] = {
-		{cache->key.objectid, 3 * BITMAP_RANGE},
+	const struct free_space_extent extents[] = {
+		{cache->key.objectid, 3 * alignment},
 	};
 	int ret;
 
@@ -362,23 +365,23 @@ static int test_merge_both(struct btrfs_trans_handle *trans,
 	}
 
 	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-				       cache->key.objectid, BITMAP_RANGE);
+				       cache->key.objectid, alignment);
 	if (ret) {
 		test_msg("Could not add free space\n");
 		return ret;
 	}
 
 	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-				       cache->key.objectid + 2 * BITMAP_RANGE,
-				       BITMAP_RANGE);
+				       cache->key.objectid + 2 * alignment,
+				       alignment);
 	if (ret) {
 		test_msg("Could not add free space\n");
 		return ret;
 	}
 
 	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-				       cache->key.objectid + BITMAP_RANGE,
-				       BITMAP_RANGE);
+				       cache->key.objectid + alignment,
+				       alignment);
 	if (ret) {
 		test_msg("Could not add free space\n");
 		return ret;
@@ -391,12 +394,13 @@ static int test_merge_both(struct btrfs_trans_handle *trans,
 static int test_merge_none(struct btrfs_trans_handle *trans,
 			   struct btrfs_fs_info *fs_info,
 			   struct btrfs_block_group_cache *cache,
-			   struct btrfs_path *path)
+			   struct btrfs_path *path,
+			   u32 alignment)
 {
-	struct free_space_extent extents[] = {
-		{cache->key.objectid, BITMAP_RANGE},
-		{cache->key.objectid + 2 * BITMAP_RANGE, BITMAP_RANGE},
-		{cache->key.objectid + 4 * BITMAP_RANGE, BITMAP_RANGE},
+	const struct free_space_extent extents[] = {
+		{cache->key.objectid, alignment},
+		{cache->key.objectid + 2 * alignment, alignment},
+		{cache->key.objectid + 4 * alignment, alignment},
 	};
 	int ret;
 
@@ -409,23 +413,23 @@ static int test_merge_none(struct btrfs_trans_handle *trans,
 	}
 
 	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-				       cache->key.objectid, BITMAP_RANGE);
+				       cache->key.objectid, alignment);
 	if (ret) {
 		test_msg("Could not add free space\n");
 		return ret;
 	}
 
 	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-				       cache->key.objectid + 4 * BITMAP_RANGE,
-				       BITMAP_RANGE);
+				       cache->key.objectid + 4 * alignment,
+				       alignment);
 	if (ret) {
 		test_msg("Could not add free space\n");
 		return ret;
 	}
 
 	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-				       cache->key.objectid + 2 * BITMAP_RANGE,
-				       BITMAP_RANGE);
+				       cache->key.objectid + 2 * alignment,
+				       alignment);
 	if (ret) {
 		test_msg("Could not add free space\n");
 		return ret;
@@ -438,10 +442,11 @@ static int test_merge_none(struct btrfs_trans_handle *trans,
 typedef int (*test_func_t)(struct btrfs_trans_handle *,
 			   struct btrfs_fs_info *,
 			   struct btrfs_block_group_cache *,
-			   struct btrfs_path *);
+			   struct btrfs_path *,
+			   u32 alignment);
 
-static int run_test(test_func_t test_func, int bitmaps,
-		u32 sectorsize, u32 nodesize)
+static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
+		    u32 nodesize, u32 alignment)
 {
 	struct btrfs_fs_info *fs_info;
 	struct btrfs_root *root = NULL;
@@ -480,7 +485,7 @@ static int run_test(test_func_t test_func, int bitmaps,
 	btrfs_set_header_nritems(root->node, 0);
 	root->alloc_bytenr += 2 * nodesize;
 
-	cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE, sectorsize);
+	cache = btrfs_alloc_dummy_block_group(8 * alignment, sectorsize);
 	if (!cache) {
 		test_msg("Couldn't allocate dummy block group cache\n");
 		ret = -ENOMEM;
@@ -514,7 +519,7 @@ static int run_test(test_func_t test_func, int bitmaps,
 		}
 	}
 
-	ret = test_func(&trans, root->fs_info, cache, path);
+	ret = test_func(&trans, root->fs_info, cache, path, alignment);
 	if (ret)
 		goto out;
 
@@ -539,15 +544,27 @@ out:
 	return ret;
 }
 
-static int run_test_both_formats(test_func_t test_func,
-	u32 sectorsize, u32 nodesize)
+static int run_test_both_formats(test_func_t test_func, u32 sectorsize,
+				 u32 nodesize, u32 alignment)
 {
+	int test_ret = 0;
 	int ret;
 
-	ret = run_test(test_func, 0, sectorsize, nodesize);
-	if (ret)
-		return ret;
-	return run_test(test_func, 1, sectorsize, nodesize);
+	ret = run_test(test_func, 0, sectorsize, nodesize, alignment);
+	if (ret) {
+		test_msg("%pf failed with extents, sectorsize=%u, nodesize=%u, alignment=%u\n",
+			 test_func, sectorsize, nodesize, alignment);
+		test_ret = ret;
+	}
+
+	ret = run_test(test_func, 1, sectorsize, nodesize, alignment);
+	if (ret) {
+		test_msg("%pf failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u\n",
+			 test_func, sectorsize, nodesize, alignment);
+		test_ret = ret;
+	}
+
+	return test_ret;
 }
 
 int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
@@ -563,18 +580,30 @@ int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
 		test_merge_both,
 		test_merge_none,
 	};
+	u32 bitmap_alignment;
+	int test_ret = 0;
 	int i;
 
+	/*
+	 * Align some operations to a page to flush out bugs in the extent
+	 * buffer bitmap handling of highmem.
+	 */
+	bitmap_alignment = BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE;
+
 	test_msg("Running free space tree tests\n");
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
-		int ret = run_test_both_formats(tests[i], sectorsize,
-			nodesize);
-		if (ret) {
-			test_msg("%pf : sectorsize %u failed\n",
-				tests[i], sectorsize);
-			return ret;
-		}
+		int ret;
+
+		ret = run_test_both_formats(tests[i], sectorsize, nodesize,
+					    sectorsize);
+		if (ret)
+			test_ret = ret;
+
+		ret = run_test_both_formats(tests[i], sectorsize, nodesize,
+					    bitmap_alignment);
+		if (ret)
+			test_ret = ret;
 	}
 
-	return 0;
+	return test_ret;
 }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 528cae123dc9..3d33c4e41e5f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2713,14 +2713,12 @@ static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
 					     int index, int error)
 {
 	struct btrfs_log_ctx *ctx;
+	struct btrfs_log_ctx *safe;
 
-	if (!error) {
-		INIT_LIST_HEAD(&root->log_ctxs[index]);
-		return;
-	}
-
-	list_for_each_entry(ctx, &root->log_ctxs[index], list)
+	list_for_each_entry_safe(ctx, safe, &root->log_ctxs[index], list) {
+		list_del_init(&ctx->list);
 		ctx->log_ret = error;
+	}
 
 	INIT_LIST_HEAD(&root->log_ctxs[index]);
 }
@@ -2961,13 +2959,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	mutex_unlock(&root->log_mutex);
 
 out_wake_log_root:
-	/*
-	 * We needn't get log_mutex here because we are sure all
-	 * the other tasks are blocked.
-	 */
+	mutex_lock(&log_root_tree->log_mutex);
 	btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);
 
-	mutex_lock(&log_root_tree->log_mutex);
 	log_root_tree->log_transid_committed++;
 	atomic_set(&log_root_tree->log_commit[index2], 0);
 	mutex_unlock(&log_root_tree->log_mutex);
@@ -2978,10 +2972,8 @@ out_wake_log_root:
 	if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
 		wake_up(&log_root_tree->log_commit_wait[index2]);
 out:
-	/* See above. */
-	btrfs_remove_all_log_ctxs(root, index1, ret);
-
 	mutex_lock(&root->log_mutex);
+	btrfs_remove_all_log_ctxs(root, index1, ret);
 	root->log_transid_committed++;
 	atomic_set(&root->log_commit[index1], 0);
 	mutex_unlock(&root->log_mutex);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 7bf08825cc11..18630e800208 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1272,7 +1272,8 @@ again:
 		statret = __ceph_do_getattr(inode, page,
 					    CEPH_STAT_CAP_INLINE_DATA, !!page);
 		if (statret < 0) {
-			 __free_page(page);
+			if (page)
+				__free_page(page);
 			if (statret == -ENODATA) {
 				BUG_ON(retry_op != READ_INLINE);
 				goto again;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index bca1b49c1c4b..ef4d04647325 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1511,7 +1511,8 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 			ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir);
 	}
 
-	if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2) {
+	if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 &&
+	    !(rinfo->hash_order && req->r_path2)) {
 		/* note dir version at start of readdir so we can tell
 		 * if any dentries get dropped */
 		req->r_dir_release_cnt = atomic64_read(&ci->i_release_count);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a29ffce98187..b382e5910eea 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -845,6 +845,8 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
 		err = ceph_fs_debugfs_init(fsc);
 		if (err < 0)
 			goto fail;
+	} else {
+		root = dget(fsc->sb->s_root);
 	}
 
 	fsc->mount_state = CEPH_MOUNT_MOUNTED;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 40b703217977..febc28f9e2c2 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -16,7 +16,7 @@
 static int __remove_xattr(struct ceph_inode_info *ci,
 			  struct ceph_inode_xattr *xattr);
 
-const struct xattr_handler ceph_other_xattr_handler;
+static const struct xattr_handler ceph_other_xattr_handler;
 
 /*
  * List of handlers for synthetic system.* attributes. Other
@@ -1086,7 +1086,7 @@ static int ceph_set_xattr_handler(const struct xattr_handler *handler,
 	return __ceph_setxattr(inode, name, value, size, flags);
 }
 
-const struct xattr_handler ceph_other_xattr_handler = {
+static const struct xattr_handler ceph_other_xattr_handler = {
 	.prefix = "",  /* match any name => handlers called with full name */
 	.get = ceph_get_xattr_handler,
 	.set = ceph_set_xattr_handler,
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 6c58e13fed2f..3d03e48a9213 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -152,6 +152,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 	list_for_each(tmp1, &cifs_tcp_ses_list) {
 		server = list_entry(tmp1, struct TCP_Server_Info,
 				    tcp_ses_list);
+		seq_printf(m, "\nNumber of credits: %d", server->credits);
 		i++;
 		list_for_each(tmp2, &server->smb_ses_list) {
 			ses = list_entry(tmp2, struct cifs_ses,
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 1418daa03d95..07ed81cf1552 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -49,6 +49,7 @@
 #define CIFS_MOUNT_USE_PREFIX_PATH 0x1000000 /* make subpath with unaccessible
 					      * root mountable
 					      */
+#define CIFS_MOUNT_UID_FROM_ACL 0x2000000 /* try to get UID via special SID */
 
 struct cifs_sb_info {
 	struct rb_root tlink_tree;
diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h
index 0065256881d8..57ff0756e30c 100644
--- a/fs/cifs/cifs_ioctl.h
+++ b/fs/cifs/cifs_ioctl.h
@@ -36,7 +36,15 @@ struct smb_mnt_fs_info {
 	__u64   cifs_posix_caps;
 } __packed;
 
+struct smb_snapshot_array {
+	__u32	number_of_snapshots;
+	__u32	number_of_snapshots_returned;
+	__u32	snapshot_array_size;
+	/*	snapshots[]; */
+} __packed;
+
 #define CIFS_IOCTL_MAGIC	0xCF
 #define CIFS_IOC_COPYCHUNK_FILE	_IOW(CIFS_IOCTL_MAGIC, 3, int)
 #define CIFS_IOC_SET_INTEGRITY  _IO(CIFS_IOCTL_MAGIC, 4)
 #define CIFS_IOC_GET_MNT_INFO _IOR(CIFS_IOCTL_MAGIC, 5, struct smb_mnt_fs_info)
+#define CIFS_ENUMERATE_SNAPSHOTS _IOR(CIFS_IOCTL_MAGIC, 6, struct smb_snapshot_array)
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 71e8a56e9479..15bac390dff9 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -42,6 +42,35 @@ static const struct cifs_sid sid_authusers = {
 /* group users */
 static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
 
+/* S-1-22-1 Unmapped Unix users */
+static const struct cifs_sid sid_unix_users = {1, 1, {0, 0, 0, 0, 0, 22},
+		{cpu_to_le32(1), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-22-2 Unmapped Unix groups */
+static const struct cifs_sid sid_unix_groups = { 1, 1, {0, 0, 0, 0, 0, 22},
+		{cpu_to_le32(2), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/*
+ * See http://technet.microsoft.com/en-us/library/hh509017(v=ws.10).aspx
+ */
+
+/* S-1-5-88 MS NFS and Apple style UID/GID/mode */
+
+/* S-1-5-88-1 Unix uid */
+static const struct cifs_sid sid_unix_NFS_users = { 1, 2, {0, 0, 0, 0, 0, 5},
+	{cpu_to_le32(88),
+	 cpu_to_le32(1), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-5-88-2 Unix gid */
+static const struct cifs_sid sid_unix_NFS_groups = { 1, 2, {0, 0, 0, 0, 0, 5},
+	{cpu_to_le32(88),
+	 cpu_to_le32(2), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-5-88-3 Unix mode */
+static const struct cifs_sid sid_unix_NFS_mode = { 1, 2, {0, 0, 0, 0, 0, 5},
+	{cpu_to_le32(88),
+	 cpu_to_le32(3), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
 static const struct cred *root_cred;
 
 static int
@@ -183,6 +212,62 @@ compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
 	return 0; /* sids compare/match */
 }
 
+static bool
+is_well_known_sid(const struct cifs_sid *psid, uint32_t *puid, bool is_group)
+{
+	int i;
+	int num_subauth;
+	const struct cifs_sid *pwell_known_sid;
+
+	if (!psid || (puid == NULL))
+		return false;
+
+	num_subauth = psid->num_subauth;
+
+	/* check if Mac (or Windows NFS) vs. Samba format for Unix owner SID */
+	if (num_subauth == 2) {
+		if (is_group)
+			pwell_known_sid = &sid_unix_groups;
+		else
+			pwell_known_sid = &sid_unix_users;
+	} else if (num_subauth == 3) {
+		if (is_group)
+			pwell_known_sid = &sid_unix_NFS_groups;
+		else
+			pwell_known_sid = &sid_unix_NFS_users;
+	} else
+		return false;
+
+	/* compare the revision */
+	if (psid->revision != pwell_known_sid->revision)
+		return false;
+
+	/* compare all of the six auth values */
+	for (i = 0; i < NUM_AUTHS; ++i) {
+		if (psid->authority[i] != pwell_known_sid->authority[i]) {
+			cifs_dbg(FYI, "auth %d did not match\n", i);
+			return false;
+		}
+	}
+
+	if (num_subauth == 2) {
+		if (psid->sub_auth[0] != pwell_known_sid->sub_auth[0])
+			return false;
+
+		*puid = le32_to_cpu(psid->sub_auth[1]);
+	} else /* 3 subauths, ie Windows/Mac style */ {
+		*puid = le32_to_cpu(psid->sub_auth[0]);
+		if ((psid->sub_auth[0] != pwell_known_sid->sub_auth[0]) ||
+		    (psid->sub_auth[1] != pwell_known_sid->sub_auth[1]))
+			return false;
+
+		*puid = le32_to_cpu(psid->sub_auth[2]);
+	}
+
+	cifs_dbg(FYI, "Unix UID %d returned from SID\n", *puid);
+	return true; /* well known sid found, uid returned */
+}
+
 static void
 cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
 {
@@ -276,6 +361,43 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
 		return -EIO;
 	}
 
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) {
+		uint32_t unix_id;
+		bool is_group;
+
+		if (sidtype != SIDOWNER)
+			is_group = true;
+		else
+			is_group = false;
+
+		if (is_well_known_sid(psid, &unix_id, is_group) == false)
+			goto try_upcall_to_get_id;
+
+		if (is_group) {
+			kgid_t gid;
+			gid_t id;
+
+			id = (gid_t)unix_id;
+			gid = make_kgid(&init_user_ns, id);
+			if (gid_valid(gid)) {
+				fgid = gid;
+				goto got_valid_id;
+			}
+		} else {
+			kuid_t uid;
+			uid_t id;
+
+			id = (uid_t)unix_id;
+			uid = make_kuid(&init_user_ns, id);
+			if (uid_valid(uid)) {
+				fuid = uid;
+				goto got_valid_id;
+			}
+		}
+		/* If unable to find uid/gid easily from SID try via upcall */
+	}
+
+try_upcall_to_get_id:
 	sidstr = sid_to_key_str(psid, sidtype);
 	if (!sidstr)
 		return -ENOMEM;
@@ -329,6 +451,7 @@ out_revert_creds:
 	 * Note that we return 0 here unconditionally. If the mapping
 	 * fails then we just fall back to using the mnt_uid/mnt_gid.
 	 */
+got_valid_id:
 	if (sidtype == SIDOWNER)
 		fattr->cf_uid = fuid;
 	else
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index cca04e710421..15261ba464c5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -64,15 +64,15 @@ unsigned int global_secflags = CIFSSEC_DEF;
 unsigned int sign_CIFS_PDUs = 1;
 static const struct super_operations cifs_super_ops;
 unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
-module_param(CIFSMaxBufSize, uint, 0);
+module_param(CIFSMaxBufSize, uint, 0444);
 MODULE_PARM_DESC(CIFSMaxBufSize, "Network buffer size (not including header). "
 				 "Default: 16384 Range: 8192 to 130048");
 unsigned int cifs_min_rcv = CIFS_MIN_RCV_POOL;
-module_param(cifs_min_rcv, uint, 0);
+module_param(cifs_min_rcv, uint, 0444);
 MODULE_PARM_DESC(cifs_min_rcv, "Network buffers in pool. Default: 4 Range: "
 				"1 to 64");
 unsigned int cifs_min_small = 30;
-module_param(cifs_min_small, uint, 0);
+module_param(cifs_min_small, uint, 0444);
 MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 "
 				 "Range: 2 to 256");
 unsigned int cifs_max_pending = CIFS_MAX_REQ;
@@ -271,7 +271,7 @@ cifs_alloc_inode(struct super_block *sb)
 	cifs_inode->createtime = 0;
 	cifs_inode->epoch = 0;
 #ifdef CONFIG_CIFS_SMB2
-	get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE);
+	generate_random_uuid(cifs_inode->lease_key);
 #endif
 	/*
 	 * Can not set i_flags here - they get immediately overwritten to zero
@@ -469,6 +469,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 		seq_puts(s, ",posixpaths");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
 		seq_puts(s, ",setuids");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
+		seq_puts(s, ",idsfromsid");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
 		seq_puts(s, ",serverino");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
@@ -1262,7 +1264,6 @@ init_cifs(void)
 	GlobalTotalActiveXid = 0;
 	GlobalMaxActiveXid = 0;
 	spin_lock_init(&cifs_tcp_ses_lock);
-	spin_lock_init(&cifs_file_list_lock);
 	spin_lock_init(&GlobalMid_Lock);
 
 	get_random_bytes(&cifs_lock_secret, sizeof(cifs_lock_secret));
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8f1d8c1e72be..1f17f6bd7a60 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -75,6 +75,18 @@
 #define SMB_ECHO_INTERVAL_MAX 600
 #define SMB_ECHO_INTERVAL_DEFAULT 60
 
+/*
+ * Default number of credits to keep available for SMB3.
+ * This value is chosen somewhat arbitrarily. The Windows client
+ * defaults to 128 credits, the Windows server allows clients up to
+ * 512 credits (or 8K for later versions), and the NetApp server
+ * does not limit clients at all.  Choose a high enough default value
+ * such that the client shouldn't limit performance, but allow mount
+ * to override (until you approach 64K, where we limit credits to 65000
+ * to reduce possibility of seeing more server credit overflow bugs.
+ */
+#define SMB2_MAX_CREDITS_AVAILABLE 32000
+
 #include "cifspdu.h"
 
 #ifndef XATTR_DOS_ATTRIB
@@ -376,6 +388,8 @@ struct smb_version_operations {
 	int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *);
 	int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon,
 			     struct cifsFileInfo *src_file);
+	int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon,
+			     struct cifsFileInfo *src_file, void __user *);
 	int (*query_mf_symlink)(unsigned int, struct cifs_tcon *,
 				struct cifs_sb_info *, const unsigned char *,
 				char *, unsigned int *);
@@ -464,6 +478,7 @@ struct smb_vol {
 	bool retry:1;
 	bool intr:1;
 	bool setuids:1;
+	bool setuidfromacl:1;
 	bool override_uid:1;
 	bool override_gid:1;
 	bool dynperm:1;
@@ -510,6 +525,7 @@ struct smb_vol {
 	struct sockaddr_storage srcaddr; /* allow binding to a local IP */
 	struct nls_table *local_nls;
 	unsigned int echo_interval; /* echo interval in secs */
+	unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */
 };
 
 #define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \
@@ -567,7 +583,8 @@ struct TCP_Server_Info {
 	bool noblocksnd;		/* use blocking sendmsg */
 	bool noautotune;		/* do not autotune send buf sizes */
 	bool tcp_nodelay;
-	int credits;  /* send no more requests at once */
+	unsigned int credits;  /* send no more requests at once */
+	unsigned int max_credits; /* can override large 32000 default at mnt */
 	unsigned int in_flight;  /* number of requests on the wire to server */
 	spinlock_t req_lock;  /* protect the two values above */
 	struct mutex srv_mutex;
@@ -833,6 +850,7 @@ struct cifs_tcon {
 	struct list_head tcon_list;
 	int tc_count;
 	struct list_head openFileList;
+	spinlock_t open_file_lock; /* protects list above */
 	struct cifs_ses *ses;	/* pointer to session associated with */
 	char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
 	char *nativeFileSystem;
@@ -889,7 +907,7 @@ struct cifs_tcon {
 #endif /* CONFIG_CIFS_STATS2 */
 	__u64    bytes_read;
 	__u64    bytes_written;
-	spinlock_t stat_lock;
+	spinlock_t stat_lock;  /* protects the two fields above */
 #endif /* CONFIG_CIFS_STATS */
 	FILE_SYSTEM_DEVICE_INFO fsDevInfo;
 	FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
@@ -1040,20 +1058,24 @@ struct cifs_fid_locks {
 };
 
 struct cifsFileInfo {
+	/* following two lists are protected by tcon->open_file_lock */
 	struct list_head tlist;	/* pointer to next fid owned by tcon */
 	struct list_head flist;	/* next fid (file instance) for this inode */
+	/* lock list below protected by cifsi->lock_sem */
 	struct cifs_fid_locks *llist;	/* brlocks held by this fid */
 	kuid_t uid;		/* allows finding which FileInfo structure */
 	__u32 pid;		/* process id who opened file */
 	struct cifs_fid fid;	/* file id from remote */
+	struct list_head rlist; /* reconnect list */
 	/* BB add lock scope info here if needed */ ;
 	/* lock scope id (0 if none) */
 	struct dentry *dentry;
-	unsigned int f_flags;
 	struct tcon_link *tlink;
+	unsigned int f_flags;
 	bool invalidHandle:1;	/* file closed via session abend */
 	bool oplock_break_cancelled:1;
-	int count;		/* refcount protected by cifs_file_list_lock */
+	int count;
+	spinlock_t file_info_lock; /* protects four flag/count fields above */
 	struct mutex fh_mutex; /* prevents reopen race after dead ses*/
 	struct cifs_search_info srch_inf;
 	struct work_struct oplock_break; /* work for oplock breaks */
@@ -1120,7 +1142,7 @@ struct cifs_writedata {
 
 /*
  * Take a reference on the file private data. Must be called with
- * cifs_file_list_lock held.
+ * cfile->file_info_lock held.
  */
 static inline void
 cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file)
@@ -1514,8 +1536,10 @@ require use of the stronger protocol */
  *  GlobalMid_Lock protects:
  *	list operations on pending_mid_q and oplockQ
  *      updates to XID counters, multiplex id  and SMB sequence numbers
- *  cifs_file_list_lock protects:
- *	list operations on tcp and SMB session lists and tCon lists
+ *  tcp_ses_lock protects:
+ *	list operations on tcp and SMB session lists
+ *  tcon->open_file_lock protects the list of open files hanging off the tcon
+ *  cfile->file_info_lock protects counters and fields in cifs file struct
  *  f_owner.lock protects certain per file struct operations
  *  mapping->page_lock protects certain per page operations
  *
@@ -1547,18 +1571,12 @@ GLOBAL_EXTERN struct list_head		cifs_tcp_ses_list;
  * tcp session, and the list of tcon's per smb session. It also protects
  * the reference counters for the server, smb session, and tcon. Finally,
  * changes to the tcon->tidStatus should be done while holding this lock.
+ * generally the locks should be taken in order tcp_ses_lock before
+ * tcon->open_file_lock and that before file->file_info_lock since the
+ * structure order is cifs_socket-->cifs_ses-->cifs_tcon-->cifs_file
  */
 GLOBAL_EXTERN spinlock_t		cifs_tcp_ses_lock;
 
-/*
- * This lock protects the cifs_file->llist and cifs_file->flist
- * list operations, and updates to some flags (cifs_file->invalidHandle)
- * It will be moved to either use the tcon->stat_lock or equivalent later.
- * If cifs_tcp_ses_lock and the lock below are both needed to be held, then
- * the cifs_tcp_ses_lock must be grabbed first and released last.
- */
-GLOBAL_EXTERN spinlock_t	cifs_file_list_lock;
-
 #ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
 /* Outstanding dir notify requests */
 GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 4ead72a001f9..ced0e42ce460 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -193,6 +193,8 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data,
 extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *);
 extern void cifs_umount(struct cifs_sb_info *);
 extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon);
+extern void cifs_reopen_persistent_handles(struct cifs_tcon *tcon);
+
 extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
 				    __u64 length, __u8 type,
 				    struct cifsLockInfo **conf_lock,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f82d2823622f..3f3185febc58 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -98,13 +98,13 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
 	struct list_head *tmp1;
 
 	/* list all files open on tree connection and mark them invalid */
-	spin_lock(&cifs_file_list_lock);
+	spin_lock(&tcon->open_file_lock);
 	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
 		open_file->invalidHandle = true;
 		open_file->oplock_break_cancelled = true;
 	}
-	spin_unlock(&cifs_file_list_lock);
+	spin_unlock(&tcon->open_file_lock);
 	/*
 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
 	 * to this tcon.
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2e4f4bad8b1e..aab5227979e2 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -63,7 +63,6 @@ extern mempool_t *cifs_req_poolp;
 #define TLINK_IDLE_EXPIRE	(600 * HZ)
 
 enum {
-
 	/* Mount options that take no arguments */
 	Opt_user_xattr, Opt_nouser_xattr,
 	Opt_forceuid, Opt_noforceuid,
@@ -76,7 +75,7 @@ enum {
 	Opt_noposixpaths, Opt_nounix,
 	Opt_nocase,
 	Opt_brl, Opt_nobrl,
-	Opt_forcemandatorylock, Opt_setuids,
+	Opt_forcemandatorylock, Opt_setuidfromacl, Opt_setuids,
 	Opt_nosetuids, Opt_dynperm, Opt_nodynperm,
 	Opt_nohard, Opt_nosoft,
 	Opt_nointr, Opt_intr,
@@ -95,7 +94,7 @@ enum {
 	Opt_cruid, Opt_gid, Opt_file_mode,
 	Opt_dirmode, Opt_port,
 	Opt_rsize, Opt_wsize, Opt_actimeo,
-	Opt_echo_interval,
+	Opt_echo_interval, Opt_max_credits,
 
 	/* Mount options which take string value */
 	Opt_user, Opt_pass, Opt_ip,
@@ -148,6 +147,7 @@ static const match_table_t cifs_mount_option_tokens = {
 	{ Opt_forcemandatorylock, "forcemand" },
 	{ Opt_setuids, "setuids" },
 	{ Opt_nosetuids, "nosetuids" },
+	{ Opt_setuidfromacl, "idsfromsid" },
 	{ Opt_dynperm, "dynperm" },
 	{ Opt_nodynperm, "nodynperm" },
 	{ Opt_nohard, "nohard" },
@@ -190,6 +190,7 @@ static const match_table_t cifs_mount_option_tokens = {
 	{ Opt_wsize, "wsize=%s" },
 	{ Opt_actimeo, "actimeo=%s" },
 	{ Opt_echo_interval, "echo_interval=%s" },
+	{ Opt_max_credits, "max_credits=%s" },
 
 	{ Opt_blank_user, "user=" },
 	{ Opt_blank_user, "username=" },
@@ -1376,6 +1377,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 		case Opt_nosetuids:
 			vol->setuids = 0;
 			break;
+		case Opt_setuidfromacl:
+			vol->setuidfromacl = 1;
+			break;
 		case Opt_dynperm:
 			vol->dynperm = true;
 			break;
@@ -1586,6 +1590,15 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			}
 			vol->echo_interval = option;
 			break;
+		case Opt_max_credits:
+			if (get_option_ul(args, &option) || (option < 20) ||
+			    (option > 60000)) {
+				cifs_dbg(VFS, "%s: Invalid max_credits value\n",
+					 __func__);
+				goto cifs_parse_mount_err;
+			}
+			vol->max_credits = option;
+			break;
 
 		/* String Arguments */
 
@@ -2163,7 +2176,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 	memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr,
 		sizeof(tcp_ses->dstaddr));
 #ifdef CONFIG_CIFS_SMB2
-	get_random_bytes(tcp_ses->client_guid, SMB2_CLIENT_GUID_SIZE);
+	generate_random_uuid(tcp_ses->client_guid);
 #endif
 	/*
 	 * at this point we are the only ones with the pointer
@@ -3270,6 +3283,8 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
 	if (pvolume_info->setuids)
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID;
+	if (pvolume_info->setuidfromacl)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UID_FROM_ACL;
 	if (pvolume_info->server_ino)
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM;
 	if (pvolume_info->remap)
@@ -3598,7 +3613,11 @@ try_mount_again:
 		bdi_destroy(&cifs_sb->bdi);
 		goto out;
 	}
-
+	if ((volume_info->max_credits < 20) ||
+	     (volume_info->max_credits > 60000))
+		server->max_credits = SMB2_MAX_CREDITS_AVAILABLE;
+	else
+		server->max_credits = volume_info->max_credits;
 	/* get a reference to a SMB session */
 	ses = cifs_get_smb_ses(server, volume_info);
 	if (IS_ERR(ses)) {
@@ -3688,14 +3707,16 @@ remote_path_check:
 			goto mount_fail_check;
 		}
 
-		rc = cifs_are_all_path_components_accessible(server,
+		if (rc != -EREMOTE) {
+			rc = cifs_are_all_path_components_accessible(server,
 							     xid, tcon, cifs_sb,
 							     full_path);
-		if (rc != 0) {
-			cifs_dbg(VFS, "cannot query dirs between root and final path, "
-				 "enabling CIFS_MOUNT_USE_PREFIX_PATH\n");
-			cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
-			rc = 0;
+			if (rc != 0) {
+				cifs_dbg(VFS, "cannot query dirs between root and final path, "
+					 "enabling CIFS_MOUNT_USE_PREFIX_PATH\n");
+				cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+				rc = 0;
+			}
 		}
 		kfree(full_path);
 	}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a95fe8b1afe9..7f5f6176c6f1 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -305,6 +305,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	cfile->tlink = cifs_get_tlink(tlink);
 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 	mutex_init(&cfile->fh_mutex);
+	spin_lock_init(&cfile->file_info_lock);
 
 	cifs_sb_active(inode->i_sb);
 
@@ -317,7 +318,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 		oplock = 0;
 	}
 
-	spin_lock(&cifs_file_list_lock);
+	spin_lock(&tcon->open_file_lock);
 	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 		oplock = fid->pending_open->oplock;
 	list_del(&fid->pending_open->olist);
@@ -326,12 +327,13 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	server->ops->set_fid(cfile, fid, oplock);
 
 	list_add(&cfile->tlist, &tcon->openFileList);
+
 	/* if readable file instance put first in list*/
 	if (file->f_mode & FMODE_READ)
 		list_add(&cfile->flist, &cinode->openFileList);
 	else
 		list_add_tail(&cfile->flist, &cinode->openFileList);
-	spin_unlock(&cifs_file_list_lock);
+	spin_unlock(&tcon->open_file_lock);
 
 	if (fid->purge_cache)
 		cifs_zap_mapping(inode);
@@ -343,16 +345,16 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 struct cifsFileInfo *
 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 {
-	spin_lock(&cifs_file_list_lock);
+	spin_lock(&cifs_file->file_info_lock);
 	cifsFileInfo_get_locked(cifs_file);
-	spin_unlock(&cifs_file_list_lock);
+	spin_unlock(&cifs_file->file_info_lock);
 	return cifs_file;
 }
 
 /*
  * Release a reference on the file private data. This may involve closing
  * the filehandle out on the server. Must be called without holding
- * cifs_file_list_lock.
+ * tcon->open_file_lock and cifs_file->file_info_lock.
  */
 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 {
@@ -367,11 +369,15 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 	struct cifs_pending_open open;
 	bool oplock_break_cancelled;
 
-	spin_lock(&cifs_file_list_lock);
+	spin_lock(&tcon->open_file_lock);
+
+	spin_lock(&cifs_file->file_info_lock);
 	if (--cifs_file->count > 0) {
-		spin_unlock(&cifs_file_list_lock);
+		spin_unlock(&cifs_file->file_info_lock);
+		spin_unlock(&tcon->open_file_lock);
 		return;
 	}
+	spin_unlock(&cifs_file->file_info_lock);
 
 	if (server->ops->get_lease_key)
 		server->ops->get_lease_key(inode, &fid);
@@ -395,7 +401,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 		cifs_set_oplock_level(cifsi, 0);
 	}
-	spin_unlock(&cifs_file_list_lock);
+
+	spin_unlock(&tcon->open_file_lock);
 
 	oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
 
@@ -732,6 +739,15 @@ reopen_success:
 	 * to the server to get the new inode info.
 	 */
 
+	/*
+	 * If the server returned a read oplock and we have mandatory brlocks,
+	 * set oplock level to None.
+	 */
+	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
+		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
+		oplock = 0;
+	}
+
 	server->ops->set_fid(cfile, &cfile->fid, oplock);
 	if (oparms.reconnect)
 		cifs_relock_file(cfile);
@@ -753,6 +769,36 @@ int cifs_close(struct inode *inode, struct file *file)
 	return 0;
 }
 
+void
+cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
+{
+	struct cifsFileInfo *open_file;
+	struct list_head *tmp;
+	struct list_head *tmp1;
+	struct list_head tmp_list;
+
+	cifs_dbg(FYI, "Reopen persistent handles");
+	INIT_LIST_HEAD(&tmp_list);
+
+	/* list all files open on tree connection, reopen resilient handles  */
+	spin_lock(&tcon->open_file_lock);
+	list_for_each(tmp, &tcon->openFileList) {
+		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
+		if (!open_file->invalidHandle)
+			continue;
+		cifsFileInfo_get(open_file);
+		list_add_tail(&open_file->rlist, &tmp_list);
+	}
+	spin_unlock(&tcon->open_file_lock);
+
+	list_for_each_safe(tmp, tmp1, &tmp_list) {
+		open_file = list_entry(tmp, struct cifsFileInfo, rlist);
+		cifs_reopen_file(open_file, false /* do not flush */);
+		list_del_init(&open_file->rlist);
+		cifsFileInfo_put(open_file);
+	}
+}
+
 int cifs_closedir(struct inode *inode, struct file *file)
 {
 	int rc = 0;
@@ -772,10 +818,10 @@ int cifs_closedir(struct inode *inode, struct file *file)
 	server = tcon->ses->server;
 
 	cifs_dbg(FYI, "Freeing private data in close dir\n");
-	spin_lock(&cifs_file_list_lock);
+	spin_lock(&cfile->file_info_lock);
 	if (server->ops->dir_needs_close(cfile)) {
 		cfile->invalidHandle = true;
-		spin_unlock(&cifs_file_list_lock);
+		spin_unlock(&cfile->file_info_lock);
 		if (server->ops->close_dir)
 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 		else
@@ -784,7 +830,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
 		/* not much we can do if it fails anyway, ignore rc */
 		rc = 0;
 	} else
-		spin_unlock(&cifs_file_list_lock);
+		spin_unlock(&cfile->file_info_lock);
 
 	buf = cfile->srch_inf.ntwrk_buf_start;
 	if (buf) {
@@ -1728,12 +1774,13 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 {
 	struct cifsFileInfo *open_file = NULL;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
 
 	/* only filter by fsuid on multiuser mounts */
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
 		fsuid_only = false;
 
-	spin_lock(&cifs_file_list_lock);
+	spin_lock(&tcon->open_file_lock);
 	/* we could simply get the first_list_entry since write-only entries
 	   are always at the end of the list but since the first entry might
 	   have a close pending, we go through the whole list */
@@ -1744,8 +1791,8 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 			if (!open_file->invalidHandle) {
 				/* found a good file */
 				/* lock it so it will not be closed on us */
-				cifsFileInfo_get_locked(open_file);
-				spin_unlock(&cifs_file_list_lock);
+				cifsFileInfo_get(open_file);
+				spin_unlock(&tcon->open_file_lock);
 				return open_file;
 			} /* else might as well continue, and look for
 			     another, or simply have the caller reopen it
@@ -1753,7 +1800,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 		} else /* write only file */
 			break; /* write only files are last so must be done */
 	}
-	spin_unlock(&cifs_file_list_lock);
+	spin_unlock(&tcon->open_file_lock);
 	return NULL;
 }
 
@@ -1762,6 +1809,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
 {
 	struct cifsFileInfo *open_file, *inv_file = NULL;
 	struct cifs_sb_info *cifs_sb;
+	struct cifs_tcon *tcon;
 	bool any_available = false;
 	int rc;
 	unsigned int refind = 0;
@@ -1777,15 +1825,16 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
 	}
 
 	cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+	tcon = cifs_sb_master_tcon(cifs_sb);
 
 	/* only filter by fsuid on multiuser mounts */
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
 		fsuid_only = false;
 
-	spin_lock(&cifs_file_list_lock);
+	spin_lock(&tcon->open_file_lock);
 refind_writable:
 	if (refind > MAX_REOPEN_ATT) {
-		spin_unlock(&cifs_file_list_lock);
+		spin_unlock(&tcon->open_file_lock);
 		return NULL;
 	}
 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
@@ -1796,8 +1845,8 @@ refind_writable:
 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
 			if (!open_file->invalidHandle) {
 				/* found a good writable file */
-				cifsFileInfo_get_locked(open_file);
-				spin_unlock(&cifs_file_list_lock);
+				cifsFileInfo_get(open_file);
+				spin_unlock(&tcon->open_file_lock);
 				return open_file;
 			} else {
 				if (!inv_file)
@@ -1813,24 +1862,24 @@ refind_writable:
 
 	if (inv_file) {
 		any_available = false;
-		cifsFileInfo_get_locked(inv_file);
+		cifsFileInfo_get(inv_file);
 	}
 
-	spin_unlock(&cifs_file_list_lock);
+	spin_unlock(&tcon->open_file_lock);
 
 	if (inv_file) {
 		rc = cifs_reopen_file(inv_file, false);
 		if (!rc)
 			return inv_file;
 		else {
-			spin_lock(&cifs_file_list_lock);
+			spin_lock(&tcon->open_file_lock);
 			list_move_tail(&inv_file->flist,
 					&cifs_inode->openFileList);
-			spin_unlock(&cifs_file_list_lock);
+			spin_unlock(&tcon->open_file_lock);
 			cifsFileInfo_put(inv_file);
-			spin_lock(&cifs_file_list_lock);
 			++refind;
 			inv_file = NULL;
+			spin_lock(&tcon->open_file_lock);
 			goto refind_writable;
 		}
 	}
@@ -3612,15 +3661,17 @@ static int cifs_readpage(struct file *file, struct page *page)
 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
 {
 	struct cifsFileInfo *open_file;
+	struct cifs_tcon *tcon =
+		cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
 
-	spin_lock(&cifs_file_list_lock);
+	spin_lock(&tcon->open_file_lock);
 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
-			spin_unlock(&cifs_file_list_lock);
+			spin_unlock(&tcon->open_file_lock);
 			return 1;
 		}
 	}
-	spin_unlock(&cifs_file_list_lock);
+	spin_unlock(&tcon->open_file_lock);
 	return 0;
 }
 
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 7a3b84e300f8..9f51b81119f2 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -189,7 +189,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 	xid = get_xid();
 
 	cifs_sb = CIFS_SB(inode->i_sb);
-
+	cifs_dbg(VFS, "cifs ioctl 0x%x\n", command);
 	switch (command) {
 		case FS_IOC_GETFLAGS:
 			if (pSMBFile == NULL)
@@ -267,11 +267,23 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 			tcon = tlink_tcon(pSMBFile->tlink);
 			rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg);
 			break;
+		case CIFS_ENUMERATE_SNAPSHOTS:
+			if (arg == 0) {
+				rc = -EINVAL;
+				goto cifs_ioc_exit;
+			}
+			tcon = tlink_tcon(pSMBFile->tlink);
+			if (tcon->ses->server->ops->enum_snapshots)
+				rc = tcon->ses->server->ops->enum_snapshots(xid, tcon,
+						pSMBFile, (void __user *)arg);
+			else
+				rc = -EOPNOTSUPP;
+			break;
 		default:
 			cifs_dbg(FYI, "unsupported ioctl\n");
 			break;
 	}
-
+cifs_ioc_exit:
 	free_xid(xid);
 	return rc;
 }
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 813fe13c2ae1..c6729156f9a0 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -120,6 +120,7 @@ tconInfoAlloc(void)
 		++ret_buf->tc_count;
 		INIT_LIST_HEAD(&ret_buf->openFileList);
 		INIT_LIST_HEAD(&ret_buf->tcon_list);
+		spin_lock_init(&ret_buf->open_file_lock);
 #ifdef CONFIG_CIFS_STATS
 		spin_lock_init(&ret_buf->stat_lock);
 #endif
@@ -465,7 +466,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
 				continue;
 
 			cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
-			spin_lock(&cifs_file_list_lock);
+			spin_lock(&tcon->open_file_lock);
 			list_for_each(tmp2, &tcon->openFileList) {
 				netfile = list_entry(tmp2, struct cifsFileInfo,
 						     tlist);
@@ -495,11 +496,11 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
 					   &netfile->oplock_break);
 				netfile->oplock_break_cancelled = false;
 
-				spin_unlock(&cifs_file_list_lock);
+				spin_unlock(&tcon->open_file_lock);
 				spin_unlock(&cifs_tcp_ses_lock);
 				return true;
 			}
-			spin_unlock(&cifs_file_list_lock);
+			spin_unlock(&tcon->open_file_lock);
 			spin_unlock(&cifs_tcp_ses_lock);
 			cifs_dbg(FYI, "No matching file for oplock break\n");
 			return true;
@@ -613,9 +614,9 @@ backup_cred(struct cifs_sb_info *cifs_sb)
 void
 cifs_del_pending_open(struct cifs_pending_open *open)
 {
-	spin_lock(&cifs_file_list_lock);
+	spin_lock(&tlink_tcon(open->tlink)->open_file_lock);
 	list_del(&open->olist);
-	spin_unlock(&cifs_file_list_lock);
+	spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
 }
 
 void
@@ -635,7 +636,7 @@ void
 cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
 		      struct cifs_pending_open *open)
 {
-	spin_lock(&cifs_file_list_lock);
+	spin_lock(&tlink_tcon(tlink)->open_file_lock);
 	cifs_add_pending_open_locked(fid, tlink, open);
-	spin_unlock(&cifs_file_list_lock);
+	spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
 }
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 65cf85dcda09..8f6a2a5863b9 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -597,14 +597,14 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
 	     is_dir_changed(file)) || (index_to_find < first_entry_in_buffer)) {
 		/* close and restart search */
 		cifs_dbg(FYI, "search backing up - close and restart search\n");
-		spin_lock(&cifs_file_list_lock);
+		spin_lock(&cfile->file_info_lock);
 		if (server->ops->dir_needs_close(cfile)) {
 			cfile->invalidHandle = true;
-			spin_unlock(&cifs_file_list_lock);
+			spin_unlock(&cfile->file_info_lock);
 			if (server->ops->close_dir)
 				server->ops->close_dir(xid, tcon, &cfile->fid);
 		} else
-			spin_unlock(&cifs_file_list_lock);
+			spin_unlock(&cfile->file_info_lock);
 		if (cfile->srch_inf.ntwrk_buf_start) {
 			cifs_dbg(FYI, "freeing SMB ff cache buf on search rewind\n");
 			if (cfile->srch_inf.smallBuf)
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 4f0231e685a9..1238cd3552f9 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -266,9 +266,15 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
 	struct tcon_link *tlink;
 	int rc;
 
+	if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) &&
+	    (buf->LastWriteTime == 0) && (buf->ChangeTime) &&
+	    (buf->Attributes == 0))
+		return 0; /* would be a no op, no sense sending this */
+
 	tlink = cifs_sb_tlink(cifs_sb);
 	if (IS_ERR(tlink))
 		return PTR_ERR(tlink);
+
 	rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path,
 				FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf,
 				SMB2_OP_SET_INFO);
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 389fb9f8c84e..3d383489b9cf 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -549,19 +549,19 @@ smb2_is_valid_lease_break(char *buffer)
 		list_for_each(tmp1, &server->smb_ses_list) {
 			ses = list_entry(tmp1, struct cifs_ses, smb_ses_list);
 
-			spin_lock(&cifs_file_list_lock);
 			list_for_each(tmp2, &ses->tcon_list) {
 				tcon = list_entry(tmp2, struct cifs_tcon,
 						  tcon_list);
+				spin_lock(&tcon->open_file_lock);
 				cifs_stats_inc(
 				    &tcon->stats.cifs_stats.num_oplock_brks);
 				if (smb2_tcon_has_lease(tcon, rsp, lw)) {
-					spin_unlock(&cifs_file_list_lock);
+					spin_unlock(&tcon->open_file_lock);
 					spin_unlock(&cifs_tcp_ses_lock);
 					return true;
 				}
+				spin_unlock(&tcon->open_file_lock);
 			}
-			spin_unlock(&cifs_file_list_lock);
 		}
 	}
 	spin_unlock(&cifs_tcp_ses_lock);
@@ -603,7 +603,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
 			tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
 
 			cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
-			spin_lock(&cifs_file_list_lock);
+			spin_lock(&tcon->open_file_lock);
 			list_for_each(tmp2, &tcon->openFileList) {
 				cfile = list_entry(tmp2, struct cifsFileInfo,
 						     tlist);
@@ -615,7 +615,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
 
 				cifs_dbg(FYI, "file id match, oplock break\n");
 				cinode = CIFS_I(d_inode(cfile->dentry));
-
+				spin_lock(&cfile->file_info_lock);
 				if (!CIFS_CACHE_WRITE(cinode) &&
 				    rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE)
 					cfile->oplock_break_cancelled = true;
@@ -637,14 +637,14 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
 					clear_bit(
 					   CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
 					   &cinode->flags);
-
+				spin_unlock(&cfile->file_info_lock);
 				queue_work(cifsiod_wq, &cfile->oplock_break);
 
-				spin_unlock(&cifs_file_list_lock);
+				spin_unlock(&tcon->open_file_lock);
 				spin_unlock(&cifs_tcp_ses_lock);
 				return true;
 			}
-			spin_unlock(&cifs_file_list_lock);
+			spin_unlock(&tcon->open_file_lock);
 			spin_unlock(&cifs_tcp_ses_lock);
 			cifs_dbg(FYI, "No matching file for oplock break\n");
 			return true;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d203c0329626..5d456ebb3813 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -28,6 +28,7 @@
 #include "cifs_unicode.h"
 #include "smb2status.h"
 #include "smb2glob.h"
+#include "cifs_ioctl.h"
 
 static int
 change_conf(struct TCP_Server_Info *server)
@@ -70,6 +71,10 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
 	spin_lock(&server->req_lock);
 	val = server->ops->get_credits_field(server, optype);
 	*val += add;
+	if (*val > 65000) {
+		*val = 65000; /* Don't get near 64K credits, avoid srv bugs */
+		printk_once(KERN_WARNING "server overflowed SMB3 credits\n");
+	}
 	server->in_flight--;
 	if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP)
 		rc = change_conf(server);
@@ -287,7 +292,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
 		cifs_dbg(FYI, "Link Speed %lld\n",
 			le64_to_cpu(out_buf->LinkSpeed));
 	}
-
+	kfree(out_buf);
 	return rc;
 }
 #endif /* STATS2 */
@@ -541,6 +546,7 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
 	server->ops->set_oplock_level(cinode, oplock, fid->epoch,
 				      &fid->purge_cache);
 	cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
+	memcpy(cfile->fid.create_guid, fid->create_guid, 16);
 }
 
 static void
@@ -699,6 +705,7 @@ smb2_clone_range(const unsigned int xid,
 
 cchunk_out:
 	kfree(pcchunk);
+	kfree(retbuf);
 	return rc;
 }
 
@@ -823,7 +830,6 @@ smb2_duplicate_extents(const unsigned int xid,
 {
 	int rc;
 	unsigned int ret_data_len;
-	char *retbuf = NULL;
 	struct duplicate_extents_to_file dup_ext_buf;
 	struct cifs_tcon *tcon = tlink_tcon(trgtfile->tlink);
 
@@ -849,7 +855,7 @@ smb2_duplicate_extents(const unsigned int xid,
 			FSCTL_DUPLICATE_EXTENTS_TO_FILE,
 			true /* is_fsctl */, (char *)&dup_ext_buf,
 			sizeof(struct duplicate_extents_to_file),
-			(char **)&retbuf,
+			NULL,
 			&ret_data_len);
 
 	if (ret_data_len > 0)
@@ -872,7 +878,6 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
 		   struct cifsFileInfo *cfile)
 {
 	struct fsctl_set_integrity_information_req integr_info;
-	char *retbuf = NULL;
 	unsigned int ret_data_len;
 
 	integr_info.ChecksumAlgorithm = cpu_to_le16(CHECKSUM_TYPE_UNCHANGED);
@@ -884,9 +889,53 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
 			FSCTL_SET_INTEGRITY_INFORMATION,
 			true /* is_fsctl */, (char *)&integr_info,
 			sizeof(struct fsctl_set_integrity_information_req),
+			NULL,
+			&ret_data_len);
+
+}
+
+static int
+smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
+		   struct cifsFileInfo *cfile, void __user *ioc_buf)
+{
+	char *retbuf = NULL;
+	unsigned int ret_data_len = 0;
+	int rc;
+	struct smb_snapshot_array snapshot_in;
+
+	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+			cfile->fid.volatile_fid,
+			FSCTL_SRV_ENUMERATE_SNAPSHOTS,
+			true /* is_fsctl */, NULL, 0 /* no input data */,
 			(char **)&retbuf,
 			&ret_data_len);
+	cifs_dbg(FYI, "enum snaphots ioctl returned %d and ret buflen is %d\n",
+			rc, ret_data_len);
+	if (rc)
+		return rc;
 
+	if (ret_data_len && (ioc_buf != NULL) && (retbuf != NULL)) {
+		/* Fixup buffer */
+		if (copy_from_user(&snapshot_in, ioc_buf,
+		    sizeof(struct smb_snapshot_array))) {
+			rc = -EFAULT;
+			kfree(retbuf);
+			return rc;
+		}
+		if (snapshot_in.snapshot_array_size < sizeof(struct smb_snapshot_array)) {
+			rc = -ERANGE;
+			return rc;
+		}
+
+		if (ret_data_len > snapshot_in.snapshot_array_size)
+			ret_data_len = snapshot_in.snapshot_array_size;
+
+		if (copy_to_user(ioc_buf, retbuf, ret_data_len))
+			rc = -EFAULT;
+	}
+
+	kfree(retbuf);
+	return rc;
 }
 
 static int
@@ -1041,7 +1090,7 @@ smb2_set_lease_key(struct inode *inode, struct cifs_fid *fid)
 static void
 smb2_new_lease_key(struct cifs_fid *fid)
 {
-	get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE);
+	generate_random_uuid(fid->lease_key);
 }
 
 #define SMB2_SYMLINK_STRUCT_SIZE \
@@ -1654,6 +1703,7 @@ struct smb_version_operations smb21_operations = {
 	.clone_range = smb2_clone_range,
 	.wp_retry_size = smb2_wp_retry_size,
 	.dir_needs_close = smb2_dir_needs_close,
+	.enum_snapshots = smb3_enum_snapshots,
 };
 
 struct smb_version_operations smb30_operations = {
@@ -1740,6 +1790,7 @@ struct smb_version_operations smb30_operations = {
 	.wp_retry_size = smb2_wp_retry_size,
 	.dir_needs_close = smb2_dir_needs_close,
 	.fallocate = smb3_fallocate,
+	.enum_snapshots = smb3_enum_snapshots,
 };
 
 #ifdef CONFIG_CIFS_SMB311
@@ -1827,6 +1878,7 @@ struct smb_version_operations smb311_operations = {
 	.wp_retry_size = smb2_wp_retry_size,
 	.dir_needs_close = smb2_dir_needs_close,
 	.fallocate = smb3_fallocate,
+	.enum_snapshots = smb3_enum_snapshots,
 };
 #endif /* CIFS_SMB311 */
 
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 29e06db5f187..5ca5ea4668a1 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -100,7 +100,21 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
 	hdr->ProtocolId = SMB2_PROTO_NUMBER;
 	hdr->StructureSize = cpu_to_le16(64);
 	hdr->Command = smb2_cmd;
-	hdr->CreditRequest = cpu_to_le16(2); /* BB make this dynamic */
+	if (tcon && tcon->ses && tcon->ses->server) {
+		struct TCP_Server_Info *server = tcon->ses->server;
+
+		spin_lock(&server->req_lock);
+		/* Request up to 2 credits but don't go over the limit. */
+		if (server->credits >= server->max_credits)
+			hdr->CreditRequest = cpu_to_le16(0);
+		else
+			hdr->CreditRequest = cpu_to_le16(
+				min_t(int, server->max_credits -
+						server->credits, 2));
+		spin_unlock(&server->req_lock);
+	} else {
+		hdr->CreditRequest = cpu_to_le16(2);
+	}
 	hdr->ProcessId = cpu_to_le32((__u16)current->tgid);
 
 	if (!tcon)
@@ -236,8 +250,13 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
 	}
 
 	cifs_mark_open_files_invalid(tcon);
+
 	rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nls_codepage);
 	mutex_unlock(&tcon->ses->session_mutex);
+
+	if (tcon->use_persistent)
+		cifs_reopen_persistent_handles(tcon);
+
 	cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc);
 	if (rc)
 		goto out;
@@ -574,59 +593,42 @@ vneg_out:
 	return -EIO;
 }
 
-int
-SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
-		const struct nls_table *nls_cp)
+struct SMB2_sess_data {
+	unsigned int xid;
+	struct cifs_ses *ses;
+	struct nls_table *nls_cp;
+	void (*func)(struct SMB2_sess_data *);
+	int result;
+	u64 previous_session;
+
+	/* we will send the SMB in three pieces:
+	 * a fixed length beginning part, an optional
+	 * SPNEGO blob (which can be zero length), and a
+	 * last part which will include the strings
+	 * and rest of bcc area. This allows us to avoid
+	 * a large buffer 17K allocation
+	 */
+	int buf0_type;
+	struct kvec iov[2];
+};
+
+static int
+SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
 {
+	int rc;
+	struct cifs_ses *ses = sess_data->ses;
 	struct smb2_sess_setup_req *req;
-	struct smb2_sess_setup_rsp *rsp = NULL;
-	struct kvec iov[2];
-	int rc = 0;
-	int resp_buftype = CIFS_NO_BUFFER;
-	__le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
 	struct TCP_Server_Info *server = ses->server;
-	u16 blob_length = 0;
-	struct key *spnego_key = NULL;
-	char *security_blob = NULL;
-	unsigned char *ntlmssp_blob = NULL;
-	bool use_spnego = false; /* else use raw ntlmssp */
-
-	cifs_dbg(FYI, "Session Setup\n");
-
-	if (!server) {
-		WARN(1, "%s: server is NULL!\n", __func__);
-		return -EIO;
-	}
-
-	/*
-	 * If we are here due to reconnect, free per-smb session key
-	 * in case signing was required.
-	 */
-	kfree(ses->auth_key.response);
-	ses->auth_key.response = NULL;
-
-	/*
-	 * If memory allocation is successful, caller of this function
-	 * frees it.
-	 */
-	ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
-	if (!ses->ntlmssp)
-		return -ENOMEM;
-	ses->ntlmssp->sesskey_per_smbsess = true;
-
-	/* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */
-	if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP)
-		ses->sectype = RawNTLMSSP;
-
-ssetup_ntlmssp_authenticate:
-	if (phase == NtLmChallenge)
-		phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
 
 	rc = small_smb2_init(SMB2_SESSION_SETUP, NULL, (void **) &req);
 	if (rc)
 		return rc;
 
 	req->hdr.SessionId = 0; /* First session, not a reauthenticate */
+
+	/* if reconnect, we need to send previous sess id, otherwise it is 0 */
+	req->PreviousSessionId = sess_data->previous_session;
+
 	req->Flags = 0; /* MBZ */
 	/* to enable echos and oplocks */
 	req->hdr.CreditRequest = cpu_to_le16(3);
@@ -642,199 +644,368 @@ ssetup_ntlmssp_authenticate:
 	req->Capabilities = 0;
 	req->Channel = 0; /* MBZ */
 
-	iov[0].iov_base = (char *)req;
+	sess_data->iov[0].iov_base = (char *)req;
 	/* 4 for rfc1002 length field and 1 for pad */
-	iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+	sess_data->iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+	/*
+	 * This variable will be used to clear the buffer
+	 * allocated above in case of any error in the calling function.
+	 */
+	sess_data->buf0_type = CIFS_SMALL_BUFFER;
 
-	if (ses->sectype == Kerberos) {
-#ifdef CONFIG_CIFS_UPCALL
-		struct cifs_spnego_msg *msg;
+	return 0;
+}
 
-		spnego_key = cifs_get_spnego_key(ses);
-		if (IS_ERR(spnego_key)) {
-			rc = PTR_ERR(spnego_key);
-			spnego_key = NULL;
-			goto ssetup_exit;
-		}
+static void
+SMB2_sess_free_buffer(struct SMB2_sess_data *sess_data)
+{
+	free_rsp_buf(sess_data->buf0_type, sess_data->iov[0].iov_base);
+	sess_data->buf0_type = CIFS_NO_BUFFER;
+}
 
-		msg = spnego_key->payload.data[0];
-		/*
-		 * check version field to make sure that cifs.upcall is
-		 * sending us a response in an expected form
-		 */
-		if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
-			cifs_dbg(VFS,
-				  "bad cifs.upcall version. Expected %d got %d",
-				  CIFS_SPNEGO_UPCALL_VERSION, msg->version);
-			rc = -EKEYREJECTED;
-			goto ssetup_exit;
-		}
-		ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
-						 GFP_KERNEL);
-		if (!ses->auth_key.response) {
-			cifs_dbg(VFS,
-				"Kerberos can't allocate (%u bytes) memory",
-				msg->sesskey_len);
-			rc = -ENOMEM;
-			goto ssetup_exit;
-		}
-		ses->auth_key.len = msg->sesskey_len;
-		blob_length = msg->secblob_len;
-		iov[1].iov_base = msg->data + msg->sesskey_len;
-		iov[1].iov_len = blob_length;
-#else
-		rc = -EOPNOTSUPP;
-		goto ssetup_exit;
-#endif /* CONFIG_CIFS_UPCALL */
-	} else if (phase == NtLmNegotiate) { /* if not krb5 must be ntlmssp */
-		ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE),
-				       GFP_KERNEL);
-		if (ntlmssp_blob == NULL) {
-			rc = -ENOMEM;
-			goto ssetup_exit;
-		}
-		build_ntlmssp_negotiate_blob(ntlmssp_blob, ses);
-		if (use_spnego) {
-			/* blob_length = build_spnego_ntlmssp_blob(
-					&security_blob,
-					sizeof(struct _NEGOTIATE_MESSAGE),
-					ntlmssp_blob); */
-			/* BB eventually need to add this */
-			cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
-			rc = -EOPNOTSUPP;
-			kfree(ntlmssp_blob);
-			goto ssetup_exit;
-		} else {
-			blob_length = sizeof(struct _NEGOTIATE_MESSAGE);
-			/* with raw NTLMSSP we don't encapsulate in SPNEGO */
-			security_blob = ntlmssp_blob;
-		}
-		iov[1].iov_base = security_blob;
-		iov[1].iov_len = blob_length;
-	} else if (phase == NtLmAuthenticate) {
-		req->hdr.SessionId = ses->Suid;
-		rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
-					     nls_cp);
-		if (rc) {
-			cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n",
-				 rc);
-			goto ssetup_exit; /* BB double check error handling */
-		}
-		if (use_spnego) {
-			/* blob_length = build_spnego_ntlmssp_blob(
-							&security_blob,
-							blob_length,
-							ntlmssp_blob); */
-			cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
-			rc = -EOPNOTSUPP;
-			kfree(ntlmssp_blob);
-			goto ssetup_exit;
-		} else {
-			security_blob = ntlmssp_blob;
-		}
-		iov[1].iov_base = security_blob;
-		iov[1].iov_len = blob_length;
-	} else {
-		cifs_dbg(VFS, "illegal ntlmssp phase\n");
-		rc = -EIO;
-		goto ssetup_exit;
-	}
+static int
+SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
+{
+	int rc;
+	struct smb2_sess_setup_req *req = sess_data->iov[0].iov_base;
 
 	/* Testing shows that buffer offset must be at location of Buffer[0] */
 	req->SecurityBufferOffset =
-				cpu_to_le16(sizeof(struct smb2_sess_setup_req) -
-					    1 /* pad */ - 4 /* rfc1001 len */);
-	req->SecurityBufferLength = cpu_to_le16(blob_length);
+		cpu_to_le16(sizeof(struct smb2_sess_setup_req) -
+			1 /* pad */ - 4 /* rfc1001 len */);
+	req->SecurityBufferLength = cpu_to_le16(sess_data->iov[1].iov_len);
 
-	inc_rfc1001_len(req, blob_length - 1 /* pad */);
+	inc_rfc1001_len(req, sess_data->iov[1].iov_len - 1 /* pad */);
 
 	/* BB add code to build os and lm fields */
 
-	rc = SendReceive2(xid, ses, iov, 2, &resp_buftype,
-			  CIFS_LOG_ERROR | CIFS_NEG_OP);
+	rc = SendReceive2(sess_data->xid, sess_data->ses,
+				sess_data->iov, 2,
+				&sess_data->buf0_type,
+				CIFS_LOG_ERROR | CIFS_NEG_OP);
 
-	kfree(security_blob);
-	rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base;
-	ses->Suid = rsp->hdr.SessionId;
-	if (resp_buftype != CIFS_NO_BUFFER &&
-	    rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) {
-		if (phase != NtLmNegotiate) {
-			cifs_dbg(VFS, "Unexpected more processing error\n");
-			goto ssetup_exit;
-		}
-		if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 !=
-				le16_to_cpu(rsp->SecurityBufferOffset)) {
-			cifs_dbg(VFS, "Invalid security buffer offset %d\n",
-				 le16_to_cpu(rsp->SecurityBufferOffset));
-			rc = -EIO;
-			goto ssetup_exit;
+	return rc;
+}
+
+static int
+SMB2_sess_establish_session(struct SMB2_sess_data *sess_data)
+{
+	int rc = 0;
+	struct cifs_ses *ses = sess_data->ses;
+
+	mutex_lock(&ses->server->srv_mutex);
+	if (ses->server->sign && ses->server->ops->generate_signingkey) {
+		rc = ses->server->ops->generate_signingkey(ses);
+		kfree(ses->auth_key.response);
+		ses->auth_key.response = NULL;
+		if (rc) {
+			cifs_dbg(FYI,
+				"SMB3 session key generation failed\n");
+			mutex_unlock(&ses->server->srv_mutex);
+			goto keygen_exit;
 		}
+	}
+	if (!ses->server->session_estab) {
+		ses->server->sequence_number = 0x2;
+		ses->server->session_estab = true;
+	}
+	mutex_unlock(&ses->server->srv_mutex);
+
+	cifs_dbg(FYI, "SMB2/3 session established successfully\n");
+	spin_lock(&GlobalMid_Lock);
+	ses->status = CifsGood;
+	ses->need_reconnect = false;
+	spin_unlock(&GlobalMid_Lock);
 
-		/* NTLMSSP Negotiate sent now processing challenge (response) */
-		phase = NtLmChallenge; /* process ntlmssp challenge */
-		rc = 0; /* MORE_PROCESSING is not an error here but expected */
-		rc = decode_ntlmssp_challenge(rsp->Buffer,
-				le16_to_cpu(rsp->SecurityBufferLength), ses);
+keygen_exit:
+	if (!ses->server->sign) {
+		kfree(ses->auth_key.response);
+		ses->auth_key.response = NULL;
+	}
+	return rc;
+}
+
+#ifdef CONFIG_CIFS_UPCALL
+static void
+SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
+{
+	int rc;
+	struct cifs_ses *ses = sess_data->ses;
+	struct cifs_spnego_msg *msg;
+	struct key *spnego_key = NULL;
+	struct smb2_sess_setup_rsp *rsp = NULL;
+
+	rc = SMB2_sess_alloc_buffer(sess_data);
+	if (rc)
+		goto out;
+
+	spnego_key = cifs_get_spnego_key(ses);
+	if (IS_ERR(spnego_key)) {
+		rc = PTR_ERR(spnego_key);
+		spnego_key = NULL;
+		goto out;
 	}
 
+	msg = spnego_key->payload.data[0];
 	/*
-	 * BB eventually add code for SPNEGO decoding of NtlmChallenge blob,
-	 * but at least the raw NTLMSSP case works.
+	 * check version field to make sure that cifs.upcall is
+	 * sending us a response in an expected form
 	 */
+	if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
+		cifs_dbg(VFS,
+			  "bad cifs.upcall version. Expected %d got %d",
+			  CIFS_SPNEGO_UPCALL_VERSION, msg->version);
+		rc = -EKEYREJECTED;
+		goto out_put_spnego_key;
+	}
+
+	ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
+					 GFP_KERNEL);
+	if (!ses->auth_key.response) {
+		cifs_dbg(VFS,
+			"Kerberos can't allocate (%u bytes) memory",
+			msg->sesskey_len);
+		rc = -ENOMEM;
+		goto out_put_spnego_key;
+	}
+	ses->auth_key.len = msg->sesskey_len;
+
+	sess_data->iov[1].iov_base = msg->data + msg->sesskey_len;
+	sess_data->iov[1].iov_len = msg->secblob_len;
+
+	rc = SMB2_sess_sendreceive(sess_data);
+	if (rc)
+		goto out_put_spnego_key;
+
+	rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
+	ses->Suid = rsp->hdr.SessionId;
+
+	ses->session_flags = le16_to_cpu(rsp->SessionFlags);
+	if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
+		cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
+
+	rc = SMB2_sess_establish_session(sess_data);
+out_put_spnego_key:
+	key_invalidate(spnego_key);
+	key_put(spnego_key);
+out:
+	sess_data->result = rc;
+	sess_data->func = NULL;
+	SMB2_sess_free_buffer(sess_data);
+}
+#else
+static void
+SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
+{
+	cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
+	sess_data->result = -EOPNOTSUPP;
+	sess_data->func = NULL;
+}
+#endif
+
+static void
+SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data);
+
+static void
+SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
+{
+	int rc;
+	struct cifs_ses *ses = sess_data->ses;
+	struct smb2_sess_setup_rsp *rsp = NULL;
+	char *ntlmssp_blob = NULL;
+	bool use_spnego = false; /* else use raw ntlmssp */
+	u16 blob_length = 0;
+
 	/*
-	 * No tcon so can't do
-	 * cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]);
+	 * If memory allocation is successful, caller of this function
+	 * frees it.
 	 */
-	if (rc != 0)
-		goto ssetup_exit;
+	ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
+	if (!ses->ntlmssp) {
+		rc = -ENOMEM;
+		goto out_err;
+	}
+	ses->ntlmssp->sesskey_per_smbsess = true;
+
+	rc = SMB2_sess_alloc_buffer(sess_data);
+	if (rc)
+		goto out_err;
+
+	ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE),
+			       GFP_KERNEL);
+	if (ntlmssp_blob == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	build_ntlmssp_negotiate_blob(ntlmssp_blob, ses);
+	if (use_spnego) {
+		/* BB eventually need to add this */
+		cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
+		rc = -EOPNOTSUPP;
+		goto out;
+	} else {
+		blob_length = sizeof(struct _NEGOTIATE_MESSAGE);
+		/* with raw NTLMSSP we don't encapsulate in SPNEGO */
+	}
+	sess_data->iov[1].iov_base = ntlmssp_blob;
+	sess_data->iov[1].iov_len = blob_length;
+
+	rc = SMB2_sess_sendreceive(sess_data);
+	rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
+
+	/* If true, rc here is expected and not an error */
+	if (sess_data->buf0_type != CIFS_NO_BUFFER &&
+		rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED)
+		rc = 0;
+
+	if (rc)
+		goto out;
+
+	if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 !=
+			le16_to_cpu(rsp->SecurityBufferOffset)) {
+		cifs_dbg(VFS, "Invalid security buffer offset %d\n",
+			le16_to_cpu(rsp->SecurityBufferOffset));
+		rc = -EIO;
+		goto out;
+	}
+	rc = decode_ntlmssp_challenge(rsp->Buffer,
+			le16_to_cpu(rsp->SecurityBufferLength), ses);
+	if (rc)
+		goto out;
+
+	cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
+
 
+	ses->Suid = rsp->hdr.SessionId;
 	ses->session_flags = le16_to_cpu(rsp->SessionFlags);
 	if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
 		cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
-ssetup_exit:
-	free_rsp_buf(resp_buftype, rsp);
-
-	/* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */
-	if ((phase == NtLmChallenge) && (rc == 0))
-		goto ssetup_ntlmssp_authenticate;
 
+out:
+	kfree(ntlmssp_blob);
+	SMB2_sess_free_buffer(sess_data);
 	if (!rc) {
-		mutex_lock(&server->srv_mutex);
-		if (server->sign && server->ops->generate_signingkey) {
-			rc = server->ops->generate_signingkey(ses);
-			kfree(ses->auth_key.response);
-			ses->auth_key.response = NULL;
-			if (rc) {
-				cifs_dbg(FYI,
-					"SMB3 session key generation failed\n");
-				mutex_unlock(&server->srv_mutex);
-				goto keygen_exit;
-			}
-		}
-		if (!server->session_estab) {
-			server->sequence_number = 0x2;
-			server->session_estab = true;
-		}
-		mutex_unlock(&server->srv_mutex);
-
-		cifs_dbg(FYI, "SMB2/3 session established successfully\n");
-		spin_lock(&GlobalMid_Lock);
-		ses->status = CifsGood;
-		ses->need_reconnect = false;
-		spin_unlock(&GlobalMid_Lock);
+		sess_data->result = 0;
+		sess_data->func = SMB2_sess_auth_rawntlmssp_authenticate;
+		return;
 	}
+out_err:
+	kfree(ses->ntlmssp);
+	ses->ntlmssp = NULL;
+	sess_data->result = rc;
+	sess_data->func = NULL;
+}
 
-keygen_exit:
-	if (!server->sign) {
-		kfree(ses->auth_key.response);
-		ses->auth_key.response = NULL;
+static void
+SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
+{
+	int rc;
+	struct cifs_ses *ses = sess_data->ses;
+	struct smb2_sess_setup_req *req;
+	struct smb2_sess_setup_rsp *rsp = NULL;
+	unsigned char *ntlmssp_blob = NULL;
+	bool use_spnego = false; /* else use raw ntlmssp */
+	u16 blob_length = 0;
+
+	rc = SMB2_sess_alloc_buffer(sess_data);
+	if (rc)
+		goto out;
+
+	req = (struct smb2_sess_setup_req *) sess_data->iov[0].iov_base;
+	req->hdr.SessionId = ses->Suid;
+
+	rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
+					sess_data->nls_cp);
+	if (rc) {
+		cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n", rc);
+		goto out;
 	}
-	if (spnego_key) {
-		key_invalidate(spnego_key);
-		key_put(spnego_key);
+
+	if (use_spnego) {
+		/* BB eventually need to add this */
+		cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
+		rc = -EOPNOTSUPP;
+		goto out;
 	}
+	sess_data->iov[1].iov_base = ntlmssp_blob;
+	sess_data->iov[1].iov_len = blob_length;
+
+	rc = SMB2_sess_sendreceive(sess_data);
+	if (rc)
+		goto out;
+
+	rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
+
+	ses->Suid = rsp->hdr.SessionId;
+	ses->session_flags = le16_to_cpu(rsp->SessionFlags);
+	if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
+		cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
+
+	rc = SMB2_sess_establish_session(sess_data);
+out:
+	kfree(ntlmssp_blob);
+	SMB2_sess_free_buffer(sess_data);
 	kfree(ses->ntlmssp);
+	ses->ntlmssp = NULL;
+	sess_data->result = rc;
+	sess_data->func = NULL;
+}
 
+static int
+SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data)
+{
+	if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP)
+		ses->sectype = RawNTLMSSP;
+
+	switch (ses->sectype) {
+	case Kerberos:
+		sess_data->func = SMB2_auth_kerberos;
+		break;
+	case RawNTLMSSP:
+		sess_data->func = SMB2_sess_auth_rawntlmssp_negotiate;
+		break;
+	default:
+		cifs_dbg(VFS, "secType %d not supported!\n", ses->sectype);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+int
+SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
+		const struct nls_table *nls_cp)
+{
+	int rc = 0;
+	struct TCP_Server_Info *server = ses->server;
+	struct SMB2_sess_data *sess_data;
+
+	cifs_dbg(FYI, "Session Setup\n");
+
+	if (!server) {
+		WARN(1, "%s: server is NULL!\n", __func__);
+		return -EIO;
+	}
+
+	sess_data = kzalloc(sizeof(struct SMB2_sess_data), GFP_KERNEL);
+	if (!sess_data)
+		return -ENOMEM;
+
+	rc = SMB2_select_sec(ses, sess_data);
+	if (rc)
+		goto out;
+	sess_data->xid = xid;
+	sess_data->ses = ses;
+	sess_data->buf0_type = CIFS_NO_BUFFER;
+	sess_data->nls_cp = (struct nls_table *) nls_cp;
+
+	while (sess_data->func)
+		sess_data->func(sess_data);
+
+	rc = sess_data->result;
+out:
+	kfree(sess_data);
 	return rc;
 }
 
@@ -1164,7 +1335,7 @@ create_durable_v2_buf(struct cifs_fid *pfid)
 
 	buf->dcontext.Timeout = 0; /* Should this be configurable by workload */
 	buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT);
-	get_random_bytes(buf->dcontext.CreateGuid, 16);
+	generate_random_uuid(buf->dcontext.CreateGuid);
 	memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16);
 
 	/* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DH2Q" */
@@ -2057,6 +2228,7 @@ smb2_async_readv(struct cifs_readdata *rdata)
 	if (rdata->credits) {
 		buf->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
 						SMB2_MAX_BUFFER_SIZE));
+		buf->CreditRequest = buf->CreditCharge;
 		spin_lock(&server->req_lock);
 		server->credits += rdata->credits -
 						le16_to_cpu(buf->CreditCharge);
@@ -2243,6 +2415,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
 	if (wdata->credits) {
 		req->hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
 						    SMB2_MAX_BUFFER_SIZE));
+		req->hdr.CreditRequest = req->hdr.CreditCharge;
 		spin_lock(&server->req_lock);
 		server->credits += wdata->credits -
 					le16_to_cpu(req->hdr.CreditCharge);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index ff88d9feb01e..fd3709e8de33 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -276,7 +276,7 @@ struct smb2_sess_setup_req {
 	__le32 Channel;
 	__le16 SecurityBufferOffset;
 	__le16 SecurityBufferLength;
-	__le64 PreviousSessionId;
+	__u64 PreviousSessionId;
 	__u8   Buffer[1];	/* variable length GSS security buffer */
 } __packed;
 
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 5e23f64c0804..20af5187ba63 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -33,7 +33,8 @@
 
 #define MAX_EA_VALUE_SIZE 65535
 #define CIFS_XATTR_CIFS_ACL "system.cifs_acl"
-
+#define CIFS_XATTR_ATTRIB "cifs.dosattrib"  /* full name: user.cifs.dosattrib */
+#define CIFS_XATTR_CREATETIME "cifs.creationtime"  /* user.cifs.creationtime */
 /* BB need to add server (Samba e.g) support for security and trusted prefix */
 
 enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT };
@@ -144,6 +145,54 @@ out:
 	return rc;
 }
 
+static int cifs_attrib_get(struct dentry *dentry,
+			   struct inode *inode, void *value,
+			   size_t size)
+{
+	ssize_t rc;
+	__u32 *pattribute;
+
+	rc = cifs_revalidate_dentry_attr(dentry);
+
+	if (rc)
+		return rc;
+
+	if ((value == NULL) || (size == 0))
+		return sizeof(__u32);
+	else if (size < sizeof(__u32))
+		return -ERANGE;
+
+	/* return dos attributes as pseudo xattr */
+	pattribute = (__u32 *)value;
+	*pattribute = CIFS_I(inode)->cifsAttrs;
+
+	return sizeof(__u32);
+}
+
+static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode,
+				  void *value, size_t size)
+{
+	ssize_t rc;
+	__u64 * pcreatetime;
+
+	rc = cifs_revalidate_dentry_attr(dentry);
+	if (rc)
+		return rc;
+
+	if ((value == NULL) || (size == 0))
+		return sizeof(__u64);
+	else if (size < sizeof(__u64))
+		return -ERANGE;
+
+	/* return dos attributes as pseudo xattr */
+	pcreatetime = (__u64 *)value;
+	*pcreatetime = CIFS_I(inode)->createtime;
+	return sizeof(__u64);
+
+	return rc;
+}
+
+
 static int cifs_xattr_get(const struct xattr_handler *handler,
 			  struct dentry *dentry, struct inode *inode,
 			  const char *name, void *value, size_t size)
@@ -168,10 +217,19 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
 		rc = -ENOMEM;
 		goto out;
 	}
-	/* return dos attributes as pseudo xattr */
+
 	/* return alt name if available as pseudo attr */
 	switch (handler->flags) {
 	case XATTR_USER:
+		cifs_dbg(FYI, "%s:querying user xattr %s\n", __func__, name);
+		if (strcmp(name, CIFS_XATTR_ATTRIB) == 0) {
+			rc = cifs_attrib_get(dentry, inode, value, size);
+			break;
+		} else if (strcmp(name, CIFS_XATTR_CREATETIME) == 0) {
+			rc = cifs_creation_time_get(dentry, inode, value, size);
+			break;
+		}
+
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
 			goto out;
 
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 61057b7dbddb..98f87fe8f186 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -151,7 +151,10 @@ static int do_page_crypto(struct inode *inode,
 			struct page *src_page, struct page *dest_page,
 			gfp_t gfp_flags)
 {
-	u8 xts_tweak[FS_XTS_TWEAK_SIZE];
+	struct {
+		__le64 index;
+		u8 padding[FS_XTS_TWEAK_SIZE - sizeof(__le64)];
+	} xts_tweak;
 	struct skcipher_request *req = NULL;
 	DECLARE_FS_COMPLETION_RESULT(ecr);
 	struct scatterlist dst, src;
@@ -171,17 +174,15 @@ static int do_page_crypto(struct inode *inode,
 		req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
 		page_crypt_complete, &ecr);
 
-	BUILD_BUG_ON(FS_XTS_TWEAK_SIZE < sizeof(index));
-	memcpy(xts_tweak, &index, sizeof(index));
-	memset(&xts_tweak[sizeof(index)], 0,
-			FS_XTS_TWEAK_SIZE - sizeof(index));
+	BUILD_BUG_ON(sizeof(xts_tweak) != FS_XTS_TWEAK_SIZE);
+	xts_tweak.index = cpu_to_le64(index);
+	memset(xts_tweak.padding, 0, sizeof(xts_tweak.padding));
 
 	sg_init_table(&dst, 1);
 	sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
 	sg_init_table(&src, 1);
 	sg_set_page(&src, src_page, PAGE_SIZE, 0);
-	skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE,
-					xts_tweak);
+	skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE, &xts_tweak);
 	if (rw == FS_DECRYPT)
 		res = crypto_skcipher_decrypt(req);
 	else
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index ed115acb5dee..6865663aac69 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -109,6 +109,8 @@ int fscrypt_process_policy(struct file *filp,
 	if (ret)
 		return ret;
 
+	inode_lock(inode);
+
 	if (!inode_has_encryption_context(inode)) {
 		if (!S_ISDIR(inode->i_mode))
 			ret = -EINVAL;
@@ -127,6 +129,8 @@ int fscrypt_process_policy(struct file *filp,
 		ret = -EINVAL;
 	}
 
+	inode_unlock(inode);
+
 	mnt_drop_write_file(filp);
 	return ret;
 }
diff --git a/fs/exec.c b/fs/exec.c
index 6fcfb3f7b137..4e497b9ee71e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -191,6 +191,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 {
 	struct page *page;
 	int ret;
+	unsigned int gup_flags = FOLL_FORCE;
 
 #ifdef CONFIG_STACK_GROWSUP
 	if (write) {
@@ -199,12 +200,16 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 			return NULL;
 	}
 #endif
+
+	if (write)
+		gup_flags |= FOLL_WRITE;
+
 	/*
 	 * We are doing an exec().  'current' is the process
 	 * doing the exec and bprm->mm is the new process's mm.
 	 */
-	ret = get_user_pages_remote(current, bprm->mm, pos, 1, write,
-			1, &page, NULL);
+	ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags,
+			&page, NULL);
 	if (ret <= 0)
 		return NULL;
 
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 79101651fe9e..42f9a0a0c4ca 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -137,7 +137,7 @@ Espan:
 bad_entry:
 	EXOFS_ERR(
 		"ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - "
-		"offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n",
+		"offset=%lu, inode=0x%llx, rec_len=%d, name_len=%d\n",
 		dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
 		_LLU(le64_to_cpu(p->inode_no)),
 		rec_len, p->name_len);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index d831e24dc885..41b8b44a391c 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -622,7 +622,7 @@ static int ext2_get_blocks(struct inode *inode,
 			   u32 *bno, bool *new, bool *boundary,
 			   int create)
 {
-	int err = -EIO;
+	int err;
 	int offsets[4];
 	Indirect chain[4];
 	Indirect *partial;
@@ -639,7 +639,7 @@ static int ext2_get_blocks(struct inode *inode,
 	depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
 
 	if (depth == 0)
-		return (err);
+		return -EIO;
 
 	partial = ext2_get_branch(inode, depth, offsets, chain, &err);
 	/* Simplest case - block found, no allocation needed */
@@ -761,7 +761,6 @@ static int ext2_get_blocks(struct inode *inode,
 	ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
 	mutex_unlock(&ei->truncate_mutex);
 got_it:
-	*bno = le32_to_cpu(chain[depth-1].key);
 	if (count > blocks_to_boundary)
 		*boundary = true;
 	err = count;
@@ -772,6 +771,8 @@ cleanup:
 		brelse(partial->bh);
 		partial--;
 	}
+	if (err > 0)
+		*bno = le32_to_cpu(chain[depth-1].key);
 	return err;
 }
 
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 02ddec6d8a7d..fdb19543af1e 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -128,12 +128,12 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
 	node = rb_first(&sbi->system_blks);
 	while (node) {
 		entry = rb_entry(node, struct ext4_system_zone, node);
-		printk("%s%llu-%llu", first ? "" : ", ",
+		printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ",
 		       entry->start_blk, entry->start_blk + entry->count - 1);
 		first = 0;
 		node = rb_next(node);
 	}
-	printk("\n");
+	printk(KERN_CONT "\n");
 }
 
 int ext4_setup_system_zone(struct super_block *sb)
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 3ef1df6ae9ec..1aba469f8220 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -27,16 +27,15 @@
 #ifdef CONFIG_EXT4_DEBUG
 extern ushort ext4_mballoc_debug;
 
-#define mb_debug(n, fmt, a...)	                                        \
-	do {								\
-		if ((n) <= ext4_mballoc_debug) {		        \
-			printk(KERN_DEBUG "(%s, %d): %s: ",		\
-			       __FILE__, __LINE__, __func__);		\
-			printk(fmt, ## a);				\
-		}							\
-	} while (0)
+#define mb_debug(n, fmt, ...)	                                        \
+do {									\
+	if ((n) <= ext4_mballoc_debug) {				\
+		printk(KERN_DEBUG "(%s, %d): %s: " fmt,			\
+		       __FILE__, __LINE__, __func__, ##__VA_ARGS__);	\
+	}								\
+} while (0)
 #else
-#define mb_debug(n, fmt, a...)		no_printk(fmt, ## a)
+#define mb_debug(n, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 #define EXT4_MB_HISTORY_ALLOC		1	/* allocation */
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f92f10d4f66a..104f8bfba718 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -577,12 +577,13 @@ static inline unsigned dx_node_limit(struct inode *dir)
 static void dx_show_index(char * label, struct dx_entry *entries)
 {
 	int i, n = dx_get_count (entries);
-	printk(KERN_DEBUG "%s index ", label);
+	printk(KERN_DEBUG "%s index", label);
 	for (i = 0; i < n; i++) {
-		printk("%x->%lu ", i ? dx_get_hash(entries + i) :
-				0, (unsigned long)dx_get_block(entries + i));
+		printk(KERN_CONT " %x->%lu",
+		       i ? dx_get_hash(entries + i) : 0,
+		       (unsigned long)dx_get_block(entries + i));
 	}
-	printk("\n");
+	printk(KERN_CONT "\n");
 }
 
 struct stats
@@ -679,7 +680,7 @@ static struct stats dx_show_leaf(struct inode *dir,
 		}
 		de = ext4_next_entry(de, size);
 	}
-	printk("(%i)\n", names);
+	printk(KERN_CONT "(%i)\n", names);
 	return (struct stats) { names, space, 1 };
 }
 
@@ -798,7 +799,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 		q = entries + count - 1;
 		while (p <= q) {
 			m = p + (q - p) / 2;
-			dxtrace(printk("."));
+			dxtrace(printk(KERN_CONT "."));
 			if (dx_get_hash(m) > hash)
 				q = m - 1;
 			else
@@ -810,7 +811,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 			at = entries;
 			while (n--)
 			{
-				dxtrace(printk(","));
+				dxtrace(printk(KERN_CONT ","));
 				if (dx_get_hash(++at) > hash)
 				{
 					at--;
@@ -821,7 +822,8 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 		}
 
 		at = p - 1;
-		dxtrace(printk(" %x->%u\n", at == entries ? 0 : dx_get_hash(at),
+		dxtrace(printk(KERN_CONT " %x->%u\n",
+			       at == entries ? 0 : dx_get_hash(at),
 			       dx_get_block(at)));
 		frame->entries = entries;
 		frame->at = at;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6db81fbcbaa6..20da99da0a34 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -597,14 +597,15 @@ void __ext4_std_error(struct super_block *sb, const char *function,
 void __ext4_abort(struct super_block *sb, const char *function,
 		unsigned int line, const char *fmt, ...)
 {
+	struct va_format vaf;
 	va_list args;
 
 	save_error_info(sb, function, line);
 	va_start(args, fmt);
-	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
-	       function, line);
-	vprintk(fmt, args);
-	printk("\n");
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
+	       sb->s_id, function, line, &vaf);
 	va_end(args);
 
 	if ((sb->s_flags & MS_RDONLY) == 0) {
@@ -2715,12 +2716,12 @@ static void print_daily_error_info(unsigned long arg)
 		       es->s_first_error_func,
 		       le32_to_cpu(es->s_first_error_line));
 		if (es->s_first_error_ino)
-			printk(": inode %u",
+			printk(KERN_CONT ": inode %u",
 			       le32_to_cpu(es->s_first_error_ino));
 		if (es->s_first_error_block)
-			printk(": block %llu", (unsigned long long)
+			printk(KERN_CONT ": block %llu", (unsigned long long)
 			       le64_to_cpu(es->s_first_error_block));
-		printk("\n");
+		printk(KERN_CONT "\n");
 	}
 	if (es->s_last_error_time) {
 		printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
@@ -2729,12 +2730,12 @@ static void print_daily_error_info(unsigned long arg)
 		       es->s_last_error_func,
 		       le32_to_cpu(es->s_last_error_line));
 		if (es->s_last_error_ino)
-			printk(": inode %u",
+			printk(KERN_CONT ": inode %u",
 			       le32_to_cpu(es->s_last_error_ino));
 		if (es->s_last_error_block)
-			printk(": block %llu", (unsigned long long)
+			printk(KERN_CONT ": block %llu", (unsigned long long)
 			       le64_to_cpu(es->s_last_error_block));
-		printk("\n");
+		printk(KERN_CONT "\n");
 	}
 	mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
 }
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 73bcfd41f5f2..42145be5c6b4 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -223,14 +223,18 @@ static struct attribute *ext4_attrs[] = {
 EXT4_ATTR_FEATURE(lazy_itable_init);
 EXT4_ATTR_FEATURE(batched_discard);
 EXT4_ATTR_FEATURE(meta_bg_resize);
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
 EXT4_ATTR_FEATURE(encryption);
+#endif
 EXT4_ATTR_FEATURE(metadata_csum_seed);
 
 static struct attribute *ext4_feat_attrs[] = {
 	ATTR_LIST(lazy_itable_init),
 	ATTR_LIST(batched_discard),
 	ATTR_LIST(meta_bg_resize),
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
 	ATTR_LIST(encryption),
+#endif
 	ATTR_LIST(metadata_csum_seed),
 	NULL,
 };
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c15d63389957..d77be9e9f535 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -61,18 +61,12 @@
 #include "acl.h"
 
 #ifdef EXT4_XATTR_DEBUG
-# define ea_idebug(inode, f...) do { \
-		printk(KERN_DEBUG "inode %s:%lu: ", \
-			inode->i_sb->s_id, inode->i_ino); \
-		printk(f); \
-		printk("\n"); \
-	} while (0)
-# define ea_bdebug(bh, f...) do { \
-		printk(KERN_DEBUG "block %pg:%lu: ",		   \
-		       bh->b_bdev, (unsigned long) bh->b_blocknr); \
-		printk(f); \
-		printk("\n"); \
-	} while (0)
+# define ea_idebug(inode, fmt, ...)					\
+	printk(KERN_DEBUG "inode %s:%lu: " fmt "\n",			\
+	       inode->i_sb->s_id, inode->i_ino, ##__VA_ARGS__)
+# define ea_bdebug(bh, fmt, ...)					\
+	printk(KERN_DEBUG "block %pg:%lu: " fmt "\n",			\
+	       bh->b_bdev, (unsigned long)bh->b_blocknr, ##__VA_ARGS__)
 #else
 # define ea_idebug(inode, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 # define ea_bdebug(bh, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
@@ -241,7 +235,7 @@ __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
 	int error = -EFSCORRUPTED;
 
 	if (((void *) header >= end) ||
-	    (header->h_magic != le32_to_cpu(EXT4_XATTR_MAGIC)))
+	    (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)))
 		goto errout;
 	error = ext4_xattr_check_names(entry, end, entry);
 errout:
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 93985c64d8a8..6f14ee923acd 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -852,16 +852,16 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
 
 	for (segno = start_segno; segno < end_segno; segno++) {
 
-		if (get_valid_blocks(sbi, segno, 1) == 0 ||
-					unlikely(f2fs_cp_error(sbi)))
-			goto next;
-
 		/* find segment summary of victim */
 		sum_page = find_get_page(META_MAPPING(sbi),
 					GET_SUM_BLOCK(sbi, segno));
-		f2fs_bug_on(sbi, !PageUptodate(sum_page));
 		f2fs_put_page(sum_page, 0);
 
+		if (get_valid_blocks(sbi, segno, 1) == 0 ||
+				!PageUptodate(sum_page) ||
+				unlikely(f2fs_cp_error(sbi)))
+			goto next;
+
 		sum = page_address(sum_page);
 		f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
 
diff --git a/fs/iomap.c b/fs/iomap.c
index 013d1d36fbbf..a8ee8c33ca78 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -433,8 +433,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
 	struct page *page = data;
 	int ret;
 
-	ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length,
-			NULL, iomap);
+	ret = __block_write_begin_int(page, pos, length, NULL, iomap);
 	if (ret)
 		return ret;
 
@@ -561,7 +560,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
 	}
 
 	while (len > 0) {
-		ret = iomap_apply(inode, start, len, 0, ops, &ctx,
+		ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx,
 				iomap_fiemap_actor);
 		/* inode with no (attribute) mapping will give ENOENT */
 		if (ret == -ENOENT)
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index ad0c745ebad7..871c8b392099 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -687,6 +687,11 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
 	pri_bh = NULL;
 
 root_found:
+	/* We don't support read-write mounts */
+	if (!(s->s_flags & MS_RDONLY)) {
+		error = -EACCES;
+		goto out_freebh;
+	}
 
 	if (joliet_level && (pri == NULL || !opt.rock)) {
 		/* This is the case of Joliet with the norock mount flag.
@@ -1501,9 +1506,6 @@ struct inode *__isofs_iget(struct super_block *sb,
 static struct dentry *isofs_mount(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
-	/* We don't support read-write mounts */
-	if (!(flags & MS_RDONLY))
-		return ERR_PTR(-EACCES);
 	return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
 }
 
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 3d8246a9faa4..e1652665bd93 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1149,6 +1149,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 		JBUFFER_TRACE(jh, "file as BJ_Reserved");
 		spin_lock(&journal->j_list_lock);
 		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
+		spin_unlock(&journal->j_list_lock);
 	} else if (jh->b_transaction == journal->j_committing_transaction) {
 		/* first access by this transaction */
 		jh->b_modified = 0;
@@ -1156,8 +1157,8 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 		JBUFFER_TRACE(jh, "set next transaction");
 		spin_lock(&journal->j_list_lock);
 		jh->b_next_transaction = transaction;
+		spin_unlock(&journal->j_list_lock);
 	}
-	spin_unlock(&journal->j_list_lock);
 	jbd_unlock_bh_state(bh);
 
 	/*
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index dcd96aac02f5..cf4c636ff4da 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -110,8 +110,9 @@ static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
  * kn_to:   /n1/n2/n3         [depth=3]
  * result:  /../..
  *
- * return value: length of the string.  If greater than buflen,
- * then contents of buf are undefined.  On error, -1 is returned.
+ * Returns the length of the full path.  If the full length is equal to or
+ * greater than @buflen, @buf contains the truncated path with the trailing
+ * '\0'.  On error, -errno is returned.
  */
 static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
 					struct kernfs_node *kn_from,
@@ -119,9 +120,8 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
 {
 	struct kernfs_node *kn, *common;
 	const char parent_str[] = "/..";
-	size_t depth_from, depth_to, len = 0, nlen = 0;
-	char *p;
-	int i;
+	size_t depth_from, depth_to, len = 0;
+	int i, j;
 
 	if (!kn_from)
 		kn_from = kernfs_root(kn_to)->kn;
@@ -131,7 +131,7 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
 
 	common = kernfs_common_ancestor(kn_from, kn_to);
 	if (WARN_ON(!common))
-		return -1;
+		return -EINVAL;
 
 	depth_to = kernfs_depth(common, kn_to);
 	depth_from = kernfs_depth(common, kn_from);
@@ -144,22 +144,16 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
 			       len < buflen ? buflen - len : 0);
 
 	/* Calculate how many bytes we need for the rest */
-	for (kn = kn_to; kn != common; kn = kn->parent)
-		nlen += strlen(kn->name) + 1;
-
-	if (len + nlen >= buflen)
-		return len + nlen;
-
-	p = buf + len + nlen;
-	*p = '\0';
-	for (kn = kn_to; kn != common; kn = kn->parent) {
-		size_t tmp = strlen(kn->name);
-		p -= tmp;
-		memcpy(p, kn->name, tmp);
-		*(--p) = '/';
+	for (i = depth_to - 1; i >= 0; i--) {
+		for (kn = kn_to, j = 0; j < i; j++)
+			kn = kn->parent;
+		len += strlcpy(buf + len, "/",
+			       len < buflen ? buflen - len : 0);
+		len += strlcpy(buf + len, kn->name,
+			       len < buflen ? buflen - len : 0);
 	}
 
-	return len + nlen;
+	return len;
 }
 
 /**
@@ -186,29 +180,6 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
 }
 
 /**
- * kernfs_path_len - determine the length of the full path of a given node
- * @kn: kernfs_node of interest
- *
- * The returned length doesn't include the space for the terminating '\0'.
- */
-size_t kernfs_path_len(struct kernfs_node *kn)
-{
-	size_t len = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&kernfs_rename_lock, flags);
-
-	do {
-		len += strlen(kn->name) + 1;
-		kn = kn->parent;
-	} while (kn && kn->parent);
-
-	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
-
-	return len;
-}
-
-/**
  * kernfs_path_from_node - build path of node @to relative to @from.
  * @from: parent kernfs_node relative to which we need to build the path
  * @to: kernfs_node of interest
@@ -220,8 +191,9 @@ size_t kernfs_path_len(struct kernfs_node *kn)
  * path (which includes '..'s) as needed to reach from @from to @to is
  * returned.
  *
- * If @buf isn't long enough, the return value will be greater than @buflen
- * and @buf contents are undefined.
+ * Returns the length of the full path.  If the full length is equal to or
+ * greater than @buflen, @buf contains the truncated path with the trailing
+ * '\0'.  On error, -errno is returned.
  */
 int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
 			  char *buf, size_t buflen)
@@ -237,28 +209,6 @@ int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
 EXPORT_SYMBOL_GPL(kernfs_path_from_node);
 
 /**
- * kernfs_path - build full path of a given node
- * @kn: kernfs_node of interest
- * @buf: buffer to copy @kn's name into
- * @buflen: size of @buf
- *
- * Builds and returns the full path of @kn in @buf of @buflen bytes.  The
- * path is built from the end of @buf so the returned pointer usually
- * doesn't match @buf.  If @buf isn't long enough, @buf is nul terminated
- * and %NULL is returned.
- */
-char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
-{
-	int ret;
-
-	ret = kernfs_path_from_node(kn, NULL, buf, buflen);
-	if (ret < 0 || ret >= buflen)
-		return NULL;
-	return buf;
-}
-EXPORT_SYMBOL_GPL(kernfs_path);
-
-/**
  * pr_cont_kernfs_name - pr_cont name of a kernfs_node
  * @kn: kernfs_node of interest
  *
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 2bcb86e6e6ca..78219d5644e9 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -911,6 +911,7 @@ const struct file_operations kernfs_file_fops = {
 	.open		= kernfs_fop_open,
 	.release	= kernfs_fop_release,
 	.poll		= kernfs_fop_poll,
+	.fsync		= noop_fsync,
 };
 
 /**
diff --git a/fs/locks.c b/fs/locks.c
index ce93b416b490..22c5b4aa4961 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1609,6 +1609,7 @@ int fcntl_getlease(struct file *filp)
 
 	ctx = smp_load_acquire(&inode->i_flctx);
 	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
+		percpu_down_read_preempt_disable(&file_rwsem);
 		spin_lock(&ctx->flc_lock);
 		time_out_leases(inode, &dispose);
 		list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
@@ -1618,6 +1619,8 @@ int fcntl_getlease(struct file *filp)
 			break;
 		}
 		spin_unlock(&ctx->flc_lock);
+		percpu_up_read_preempt_enable(&file_rwsem);
+
 		locks_dispose_list(&dispose);
 	}
 	return type;
@@ -2529,11 +2532,14 @@ locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
 	if (list_empty(&ctx->flc_lease))
 		return;
 
+	percpu_down_read_preempt_disable(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
 		if (filp == fl->fl_file)
 			lease_modify(fl, F_UNLCK, &dispose);
 	spin_unlock(&ctx->flc_lock);
+	percpu_up_read_preempt_enable(&file_rwsem);
+
 	locks_dispose_list(&dispose);
 }
 
diff --git a/fs/namei.c b/fs/namei.c
index a7f601cd521a..5b4eed221530 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4668,6 +4668,31 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 }
 EXPORT_SYMBOL(generic_readlink);
 
+/**
+ * vfs_get_link - get symlink body
+ * @dentry: dentry on which to get symbolic link
+ * @done: caller needs to free returned data with this
+ *
+ * Calls security hook and i_op->get_link() on the supplied inode.
+ *
+ * It does not touch atime.  That's up to the caller if necessary.
+ *
+ * Does not work on "special" symlinks like /proc/$$/fd/N
+ */
+const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done)
+{
+	const char *res = ERR_PTR(-EINVAL);
+	struct inode *inode = d_inode(dentry);
+
+	if (d_is_symlink(dentry)) {
+		res = ERR_PTR(security_inode_readlink(dentry));
+		if (!res)
+			res = inode->i_op->get_link(dentry, inode, done);
+	}
+	return res;
+}
+EXPORT_SYMBOL(vfs_get_link);
+
 /* get the link contents into pagecache */
 const char *page_get_link(struct dentry *dentry, struct inode *inode,
 			  struct delayed_call *callback)
diff --git a/fs/namespace.c b/fs/namespace.c
index 58aca9c931ac..e6c234b1a645 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2824,6 +2824,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 	return new_ns;
 }
 
+__latent_entropy
 struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 		struct user_namespace *user_ns, struct fs_struct *new_fs)
 {
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 217847679f0e..2905479f214a 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -344,9 +344,10 @@ static void bl_write_cleanup(struct work_struct *work)
 		u64 start = hdr->args.offset & (loff_t)PAGE_MASK;
 		u64 end = (hdr->args.offset + hdr->args.count +
 			PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
+		u64 lwb = hdr->args.offset + hdr->args.count;
 
 		ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
-					(end - start) >> SECTOR_SHIFT, end);
+					(end - start) >> SECTOR_SHIFT, lwb);
 	}
 
 	pnfs_ld_write_done(hdr);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ad917bd72b38..7897826d7c51 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1545,7 +1545,7 @@ static int update_open_stateid(struct nfs4_state *state,
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(state->inode);
 	struct nfs_delegation *deleg_cur;
-	nfs4_stateid freeme = {0};
+	nfs4_stateid freeme = { };
 	int ret = 0;
 
 	fmode &= (FMODE_READ|FMODE_WRITE);
diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c
index 1e8fe844e69f..5355efba4bc8 100644
--- a/fs/orangefs/dcache.c
+++ b/fs/orangefs/dcache.c
@@ -73,7 +73,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry)
 		}
 	}
 
-	dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+	orangefs_set_timeout(dentry);
 	ret = 1;
 out_release_op:
 	op_release(new_op);
@@ -94,8 +94,9 @@ out_drop:
 static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	int ret;
+	unsigned long time = (unsigned long) dentry->d_fsdata;
 
-	if (time_before(jiffies, dentry->d_time))
+	if (time_before(jiffies, time))
 		return 1;
 
 	if (flags & LOOKUP_RCU)
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 66ea0cc37b18..02cc6139ec90 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -621,9 +621,9 @@ static int orangefs_file_release(struct inode *inode, struct file *file)
 	 * readahead cache (if any); this forces an expensive refresh of
 	 * data for the next caller of mmap (or 'get_block' accesses)
 	 */
-	if (file->f_path.dentry->d_inode &&
-	    file->f_path.dentry->d_inode->i_mapping &&
-	    mapping_nrpages(&file->f_path.dentry->d_inode->i_data)) {
+	if (file_inode(file) &&
+	    file_inode(file)->i_mapping &&
+	    mapping_nrpages(&file_inode(file)->i_data)) {
 		if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) {
 			gossip_debug(GOSSIP_INODE_DEBUG,
 			    "calling flush_racache on %pU\n",
@@ -632,7 +632,7 @@ static int orangefs_file_release(struct inode *inode, struct file *file)
 			gossip_debug(GOSSIP_INODE_DEBUG,
 			    "flush_racache finished\n");
 		}
-		truncate_inode_pages(file->f_path.dentry->d_inode->i_mapping,
+		truncate_inode_pages(file_inode(file)->i_mapping,
 				     0);
 	}
 	return 0;
@@ -648,7 +648,7 @@ static int orangefs_fsync(struct file *file,
 {
 	int ret = -EINVAL;
 	struct orangefs_inode_s *orangefs_inode =
-		ORANGEFS_I(file->f_path.dentry->d_inode);
+		ORANGEFS_I(file_inode(file));
 	struct orangefs_kernel_op_s *new_op = NULL;
 
 	/* required call */
@@ -661,7 +661,7 @@ static int orangefs_fsync(struct file *file,
 
 	ret = service_operation(new_op,
 			"orangefs_fsync",
-			get_interruptible_flag(file->f_path.dentry->d_inode));
+			get_interruptible_flag(file_inode(file)));
 
 	gossip_debug(GOSSIP_FILE_DEBUG,
 		     "orangefs_fsync got return value of %d\n",
@@ -669,7 +669,7 @@ static int orangefs_fsync(struct file *file,
 
 	op_release(new_op);
 
-	orangefs_flush_inode(file->f_path.dentry->d_inode);
+	orangefs_flush_inode(file_inode(file));
 	return ret;
 }
 
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index d15d3d2dba62..a290ff6ec756 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -72,7 +72,7 @@ static int orangefs_create(struct inode *dir,
 
 	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
-	dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+	orangefs_set_timeout(dentry);
 	ORANGEFS_I(inode)->getattr_time = jiffies - 1;
 
 	gossip_debug(GOSSIP_NAME_DEBUG,
@@ -183,7 +183,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
 		goto out;
 	}
 
-	dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+	orangefs_set_timeout(dentry);
 
 	inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
 	if (IS_ERR(inode)) {
@@ -322,7 +322,7 @@ static int orangefs_symlink(struct inode *dir,
 
 	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
-	dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+	orangefs_set_timeout(dentry);
 	ORANGEFS_I(inode)->getattr_time = jiffies - 1;
 
 	gossip_debug(GOSSIP_NAME_DEBUG,
@@ -386,7 +386,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 
 	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
-	dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+	orangefs_set_timeout(dentry);
 	ORANGEFS_I(inode)->getattr_time = jiffies - 1;
 
 	gossip_debug(GOSSIP_NAME_DEBUG,
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 0a82048f3aaf..3bf803d732c5 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -580,4 +580,11 @@ static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size)
 #endif
 }
 
+static inline void orangefs_set_timeout(struct dentry *dentry)
+{
+	unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+
+	dentry->d_fsdata = (void *) time;
+}
+
 #endif /* __ORANGEFSKERNEL_H */
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 3f803b3a1f82..aeb60f791418 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -57,6 +57,7 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
 	ssize_t list_size, size, value_size = 0;
 	char *buf, *name, *value = NULL;
 	int uninitialized_var(error);
+	size_t slen;
 
 	if (!(old->d_inode->i_opflags & IOP_XATTR) ||
 	    !(new->d_inode->i_opflags & IOP_XATTR))
@@ -79,7 +80,16 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
 		goto out;
 	}
 
-	for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
+	for (name = buf; list_size; name += slen) {
+		slen = strnlen(name, list_size) + 1;
+
+		/* underlying fs providing us with an broken xattr list? */
+		if (WARN_ON(slen > list_size)) {
+			error = -EIO;
+			break;
+		}
+		list_size -= slen;
+
 		if (ovl_is_private_xattr(name))
 			continue;
 retry:
@@ -174,40 +184,6 @@ out_fput:
 	return error;
 }
 
-static char *ovl_read_symlink(struct dentry *realdentry)
-{
-	int res;
-	char *buf;
-	struct inode *inode = realdentry->d_inode;
-	mm_segment_t old_fs;
-
-	res = -EINVAL;
-	if (!inode->i_op->readlink)
-		goto err;
-
-	res = -ENOMEM;
-	buf = (char *) __get_free_page(GFP_KERNEL);
-	if (!buf)
-		goto err;
-
-	old_fs = get_fs();
-	set_fs(get_ds());
-	/* The cast to a user pointer is valid due to the set_fs() */
-	res = inode->i_op->readlink(realdentry,
-				    (char __user *)buf, PAGE_SIZE - 1);
-	set_fs(old_fs);
-	if (res < 0) {
-		free_page((unsigned long) buf);
-		goto err;
-	}
-	buf[res] = '\0';
-
-	return buf;
-
-err:
-	return ERR_PTR(res);
-}
-
 static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
 {
 	struct iattr attr = {
@@ -354,19 +330,20 @@ out_cleanup:
 int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 		    struct path *lowerpath, struct kstat *stat)
 {
+	DEFINE_DELAYED_CALL(done);
 	struct dentry *workdir = ovl_workdir(dentry);
 	int err;
 	struct kstat pstat;
 	struct path parentpath;
+	struct dentry *lowerdentry = lowerpath->dentry;
 	struct dentry *upperdir;
 	struct dentry *upperdentry;
-	const struct cred *old_cred;
-	char *link = NULL;
+	const char *link = NULL;
 
 	if (WARN_ON(!workdir))
 		return -EROFS;
 
-	ovl_do_check_copy_up(lowerpath->dentry);
+	ovl_do_check_copy_up(lowerdentry);
 
 	ovl_path_upper(parent, &parentpath);
 	upperdir = parentpath.dentry;
@@ -376,13 +353,11 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 		return err;
 
 	if (S_ISLNK(stat->mode)) {
-		link = ovl_read_symlink(lowerpath->dentry);
+		link = vfs_get_link(lowerdentry, &done);
 		if (IS_ERR(link))
 			return PTR_ERR(link);
 	}
 
-	old_cred = ovl_override_creds(dentry->d_sb);
-
 	err = -EIO;
 	if (lock_rename(workdir, upperdir) != NULL) {
 		pr_err("overlayfs: failed to lock workdir+upperdir\n");
@@ -403,19 +378,16 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	}
 out_unlock:
 	unlock_rename(workdir, upperdir);
-	revert_creds(old_cred);
-
-	if (link)
-		free_page((unsigned long) link);
+	do_delayed_call(&done);
 
 	return err;
 }
 
 int ovl_copy_up(struct dentry *dentry)
 {
-	int err;
+	int err = 0;
+	const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
 
-	err = 0;
 	while (!err) {
 		struct dentry *next;
 		struct dentry *parent;
@@ -447,6 +419,7 @@ int ovl_copy_up(struct dentry *dentry)
 		dput(parent);
 		dput(next);
 	}
+	revert_creds(old_cred);
 
 	return err;
 }
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 5f90ddf778ba..306b6c161840 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -14,6 +14,7 @@
 #include <linux/cred.h>
 #include <linux/posix_acl.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/atomic.h>
 #include "overlayfs.h"
 
 void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
@@ -37,8 +38,10 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
 {
 	struct dentry *temp;
 	char name[20];
+	static atomic_t temp_id = ATOMIC_INIT(0);
 
-	snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
+	/* counter is allowed to wrap, since temp dentries are ephemeral */
+	snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
 
 	temp = lookup_one_len(name, workdir, strlen(name));
 	if (!IS_ERR(temp) && temp->d_inode) {
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index c18d6a4ff456..c58f01babf30 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -19,6 +19,7 @@ static int ovl_copy_up_truncate(struct dentry *dentry)
 	struct dentry *parent;
 	struct kstat stat;
 	struct path lowerpath;
+	const struct cred *old_cred;
 
 	parent = dget_parent(dentry);
 	err = ovl_copy_up(parent);
@@ -26,12 +27,14 @@ static int ovl_copy_up_truncate(struct dentry *dentry)
 		goto out_dput_parent;
 
 	ovl_path_lower(dentry, &lowerpath);
-	err = vfs_getattr(&lowerpath, &stat);
-	if (err)
-		goto out_dput_parent;
 
-	stat.size = 0;
-	err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
+	old_cred = ovl_override_creds(dentry->d_sb);
+	err = vfs_getattr(&lowerpath, &stat);
+	if (!err) {
+		stat.size = 0;
+		err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
+	}
+	revert_creds(old_cred);
 
 out_dput_parent:
 	dput(parent);
@@ -153,45 +156,18 @@ static const char *ovl_get_link(struct dentry *dentry,
 				struct inode *inode,
 				struct delayed_call *done)
 {
-	struct dentry *realdentry;
-	struct inode *realinode;
 	const struct cred *old_cred;
 	const char *p;
 
 	if (!dentry)
 		return ERR_PTR(-ECHILD);
 
-	realdentry = ovl_dentry_real(dentry);
-	realinode = realdentry->d_inode;
-
-	if (WARN_ON(!realinode->i_op->get_link))
-		return ERR_PTR(-EPERM);
-
 	old_cred = ovl_override_creds(dentry->d_sb);
-	p = realinode->i_op->get_link(realdentry, realinode, done);
+	p = vfs_get_link(ovl_dentry_real(dentry), done);
 	revert_creds(old_cred);
 	return p;
 }
 
-static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
-{
-	struct path realpath;
-	struct inode *realinode;
-	const struct cred *old_cred;
-	int err;
-
-	ovl_path_real(dentry, &realpath);
-	realinode = realpath.dentry->d_inode;
-
-	if (!realinode->i_op->readlink)
-		return -EINVAL;
-
-	old_cred = ovl_override_creds(dentry->d_sb);
-	err = realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
-	revert_creds(old_cred);
-	return err;
-}
-
 bool ovl_is_private_xattr(const char *name)
 {
 	return strncmp(name, OVL_XATTR_PREFIX,
@@ -375,7 +351,7 @@ static const struct inode_operations ovl_file_inode_operations = {
 static const struct inode_operations ovl_symlink_inode_operations = {
 	.setattr	= ovl_setattr,
 	.get_link	= ovl_get_link,
-	.readlink	= ovl_readlink,
+	.readlink	= generic_readlink,
 	.getattr	= ovl_getattr,
 	.listxattr	= ovl_listxattr,
 	.update_time	= ovl_update_time,
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 7e3f0127fc1a..bcf3965be819 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -273,12 +273,11 @@ static bool ovl_is_opaquedir(struct dentry *dentry)
 {
 	int res;
 	char val;
-	struct inode *inode = dentry->d_inode;
 
-	if (!S_ISDIR(inode->i_mode) || !(inode->i_opflags & IOP_XATTR))
+	if (!d_is_dir(dentry))
 		return false;
 
-	res = __vfs_getxattr(dentry, inode, OVL_XATTR_OPAQUE, &val, 1);
+	res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1);
 	if (res == 1 && val == 'y')
 		return true;
 
@@ -419,16 +418,12 @@ static bool ovl_dentry_weird(struct dentry *dentry)
 				  DCACHE_OP_COMPARE);
 }
 
-static inline struct dentry *ovl_lookup_real(struct super_block *ovl_sb,
-					     struct dentry *dir,
+static inline struct dentry *ovl_lookup_real(struct dentry *dir,
 					     const struct qstr *name)
 {
-	const struct cred *old_cred;
 	struct dentry *dentry;
 
-	old_cred = ovl_override_creds(ovl_sb);
 	dentry = lookup_one_len_unlocked(name->name, dir, name->len);
-	revert_creds(old_cred);
 
 	if (IS_ERR(dentry)) {
 		if (PTR_ERR(dentry) == -ENOENT)
@@ -469,6 +464,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			  unsigned int flags)
 {
 	struct ovl_entry *oe;
+	const struct cred *old_cred;
 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
 	struct path *stack = NULL;
 	struct dentry *upperdir, *upperdentry = NULL;
@@ -479,9 +475,10 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	unsigned int i;
 	int err;
 
+	old_cred = ovl_override_creds(dentry->d_sb);
 	upperdir = ovl_upperdentry_dereference(poe);
 	if (upperdir) {
-		this = ovl_lookup_real(dentry->d_sb, upperdir, &dentry->d_name);
+		this = ovl_lookup_real(upperdir, &dentry->d_name);
 		err = PTR_ERR(this);
 		if (IS_ERR(this))
 			goto out;
@@ -514,8 +511,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		bool opaque = false;
 		struct path lowerpath = poe->lowerstack[i];
 
-		this = ovl_lookup_real(dentry->d_sb,
-				       lowerpath.dentry, &dentry->d_name);
+		this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name);
 		err = PTR_ERR(this);
 		if (IS_ERR(this)) {
 			/*
@@ -588,6 +584,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		ovl_copyattr(realdentry->d_inode, inode);
 	}
 
+	revert_creds(old_cred);
 	oe->opaque = upperopaque;
 	oe->__upperdentry = upperdentry;
 	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
@@ -606,6 +603,7 @@ out_put:
 out_put_upper:
 	dput(upperdentry);
 out:
+	revert_creds(old_cred);
 	return ERR_PTR(err);
 }
 
@@ -834,6 +832,19 @@ retry:
 		if (err)
 			goto out_dput;
 
+		/*
+		 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
+		 *
+		 * a) success (there was a POSIX ACL xattr and was removed)
+		 * b) -ENODATA (there was no POSIX ACL xattr)
+		 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
+		 *
+		 * There are various other error values that could effectively
+		 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
+		 * if the xattr name is too long), but the set of filesystems
+		 * allowed as upper are limited to "normal" ones, where checking
+		 * for the above two errors is sufficient.
+		 */
 		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
 		if (err && err != -ENODATA && err != -EOPNOTSUPP)
 			goto out_dput;
@@ -1292,6 +1303,12 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	if (!oe)
 		goto out_put_cred;
 
+	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
+	sb->s_op = &ovl_super_operations;
+	sb->s_xattr = ovl_xattr_handlers;
+	sb->s_fs_info = ufs;
+	sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
+
 	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR));
 	if (!root_dentry)
 		goto out_free_oe;
@@ -1315,12 +1332,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry);
 	ovl_copyattr(realinode, d_inode(root_dentry));
 
-	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
-	sb->s_op = &ovl_super_operations;
-	sb->s_xattr = ovl_xattr_handlers;
 	sb->s_root = root_dentry;
-	sb->s_fs_info = ufs;
-	sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
 
 	return 0;
 
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 89600fd5963d..81818adb8e9e 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -412,10 +412,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	mm = get_task_mm(task);
 	if (mm) {
 		vsize = task_vsize(mm);
-		if (permitted) {
-			eip = KSTK_EIP(task);
-			esp = KSTK_ESP(task);
-		}
+		/*
+		 * esp and eip are intentionally zeroed out.  There is no
+		 * non-racy way to read them without freezing the task.
+		 * Programs that need reliable values can use ptrace(2).
+		 */
 	}
 
 	get_task_comm(tcomm, task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c2964d890c9a..ca651ac00660 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -832,6 +832,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
 	unsigned long addr = *ppos;
 	ssize_t copied;
 	char *page;
+	unsigned int flags;
 
 	if (!mm)
 		return 0;
@@ -844,6 +845,11 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
 	if (!atomic_inc_not_zero(&mm->mm_users))
 		goto free;
 
+	/* Maybe we should limit FOLL_FORCE to actual ptrace users? */
+	flags = FOLL_FORCE;
+	if (write)
+		flags |= FOLL_WRITE;
+
 	while (count > 0) {
 		int this_len = min_t(int, count, PAGE_SIZE);
 
@@ -852,7 +858,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
 			break;
 		}
 
-		this_len = access_remote_vm(mm, addr, page, this_len, write);
+		this_len = access_remote_vm(mm, addr, page, this_len, flags);
 		if (!this_len) {
 			if (!copied)
 				copied = -EIO;
@@ -964,8 +970,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
 		max_len = min_t(size_t, PAGE_SIZE, count);
 		this_len = min(max_len, this_len);
 
-		retval = access_remote_vm(mm, (env_start + src),
-			page, this_len, 0);
+		retval = access_remote_vm(mm, (env_start + src), page, this_len, 0);
 
 		if (retval <= 0) {
 			ret = retval;
@@ -1007,6 +1012,9 @@ static ssize_t auxv_read(struct file *file, char __user *buf,
 {
 	struct mm_struct *mm = file->private_data;
 	unsigned int nwords = 0;
+
+	if (!mm)
+		return 0;
 	do {
 		nwords += 2;
 	} while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6909582ce5e5..35b92d81692f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -266,24 +266,15 @@ static int do_maps_open(struct inode *inode, struct file *file,
  * /proc/PID/maps that is the stack of the main task.
  */
 static int is_stack(struct proc_maps_private *priv,
-		    struct vm_area_struct *vma, int is_pid)
+		    struct vm_area_struct *vma)
 {
-	int stack = 0;
-
-	if (is_pid) {
-		stack = vma->vm_start <= vma->vm_mm->start_stack &&
-			vma->vm_end >= vma->vm_mm->start_stack;
-	} else {
-		struct inode *inode = priv->inode;
-		struct task_struct *task;
-
-		rcu_read_lock();
-		task = pid_task(proc_pid(inode), PIDTYPE_PID);
-		if (task)
-			stack = vma_is_stack_for_task(vma, task);
-		rcu_read_unlock();
-	}
-	return stack;
+	/*
+	 * We make no effort to guess what a given thread considers to be
+	 * its "stack".  It's not even well-defined for programs written
+	 * languages like Go.
+	 */
+	return vma->vm_start <= vma->vm_mm->start_stack &&
+		vma->vm_end >= vma->vm_mm->start_stack;
 }
 
 static void
@@ -354,7 +345,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 			goto done;
 		}
 
-		if (is_stack(priv, vma, is_pid))
+		if (is_stack(priv, vma))
 			name = "[stack]";
 	}
 
@@ -1669,7 +1660,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 		seq_file_path(m, file, "\n\t= ");
 	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
 		seq_puts(m, " heap");
-	} else if (is_stack(proc_priv, vma, is_pid)) {
+	} else if (is_stack(proc_priv, vma)) {
 		seq_puts(m, " stack");
 	}
 
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index faacb0c0d857..37175621e890 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -124,25 +124,17 @@ unsigned long task_statm(struct mm_struct *mm,
 }
 
 static int is_stack(struct proc_maps_private *priv,
-		    struct vm_area_struct *vma, int is_pid)
+		    struct vm_area_struct *vma)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	int stack = 0;
-
-	if (is_pid) {
-		stack = vma->vm_start <= mm->start_stack &&
-			vma->vm_end >= mm->start_stack;
-	} else {
-		struct inode *inode = priv->inode;
-		struct task_struct *task;
-
-		rcu_read_lock();
-		task = pid_task(proc_pid(inode), PIDTYPE_PID);
-		if (task)
-			stack = vma_is_stack_for_task(vma, task);
-		rcu_read_unlock();
-	}
-	return stack;
+
+	/*
+	 * We make no effort to guess what a given thread considers to be
+	 * its "stack".  It's not even well-defined for programs written
+	 * languages like Go.
+	 */
+	return vma->vm_start <= mm->start_stack &&
+		vma->vm_end >= mm->start_stack;
 }
 
 /*
@@ -184,7 +176,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
 	if (file) {
 		seq_pad(m, ' ');
 		seq_file_path(m, file, "");
-	} else if (mm && is_stack(priv, vma, is_pid)) {
+	} else if (mm && is_stack(priv, vma)) {
 		seq_pad(m, ' ');
 		seq_printf(m, "[stack]");
 	}
diff --git a/fs/read_write.c b/fs/read_write.c
index 66215a7b17cf..190e0d362581 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -730,6 +730,35 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
 /* A write operation does a read from user space and vice versa */
 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
 
+/**
+ * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
+ *     into the kernel and check that it is valid.
+ *
+ * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
+ * @uvector: Pointer to the userspace array.
+ * @nr_segs: Number of elements in userspace array.
+ * @fast_segs: Number of elements in @fast_pointer.
+ * @fast_pointer: Pointer to (usually small on-stack) kernel array.
+ * @ret_pointer: (output parameter) Pointer to a variable that will point to
+ *     either @fast_pointer, a newly allocated kernel array, or NULL,
+ *     depending on which array was used.
+ *
+ * This function copies an array of &struct iovec of @nr_segs from
+ * userspace into the kernel and checks that each element is valid (e.g.
+ * it does not point to a kernel address or cause overflow by being too
+ * large, etc.).
+ *
+ * As an optimization, the caller may provide a pointer to a small
+ * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
+ * (the size of this array, or 0 if unused, should be given in @fast_segs).
+ *
+ * @ret_pointer will always point to the array that was used, so the
+ * caller must take care not to call kfree() on it e.g. in case the
+ * @fast_pointer array was used and it was allocated on the stack.
+ *
+ * Return: The total number of bytes covered by the iovec array on success
+ *   or a negative error code on error.
+ */
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
diff --git a/fs/super.c b/fs/super.c
index c2ff475c1711..c183835566c1 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1269,25 +1269,34 @@ EXPORT_SYMBOL(__sb_start_write);
 static void sb_wait_write(struct super_block *sb, int level)
 {
 	percpu_down_write(sb->s_writers.rw_sem + level-1);
-	/*
-	 * We are going to return to userspace and forget about this lock, the
-	 * ownership goes to the caller of thaw_super() which does unlock.
-	 *
-	 * FIXME: we should do this before return from freeze_super() after we
-	 * called sync_filesystem(sb) and s_op->freeze_fs(sb), and thaw_super()
-	 * should re-acquire these locks before s_op->unfreeze_fs(sb). However
-	 * this leads to lockdep false-positives, so currently we do the early
-	 * release right after acquire.
-	 */
-	percpu_rwsem_release(sb->s_writers.rw_sem + level-1, 0, _THIS_IP_);
 }
 
-static void sb_freeze_unlock(struct super_block *sb)
+/*
+ * We are going to return to userspace and forget about these locks, the
+ * ownership goes to the caller of thaw_super() which does unlock().
+ */
+static void lockdep_sb_freeze_release(struct super_block *sb)
+{
+	int level;
+
+	for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
+		percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
+}
+
+/*
+ * Tell lockdep we are holding these locks before we call ->unfreeze_fs(sb).
+ */
+static void lockdep_sb_freeze_acquire(struct super_block *sb)
 {
 	int level;
 
 	for (level = 0; level < SB_FREEZE_LEVELS; ++level)
 		percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
+}
+
+static void sb_freeze_unlock(struct super_block *sb)
+{
+	int level;
 
 	for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
 		percpu_up_write(sb->s_writers.rw_sem + level);
@@ -1379,10 +1388,11 @@ int freeze_super(struct super_block *sb)
 		}
 	}
 	/*
-	 * This is just for debugging purposes so that fs can warn if it
-	 * sees write activity when frozen is set to SB_FREEZE_COMPLETE.
+	 * For debugging purposes so that fs can warn if it sees write activity
+	 * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
 	 */
 	sb->s_writers.frozen = SB_FREEZE_COMPLETE;
+	lockdep_sb_freeze_release(sb);
 	up_write(&sb->s_umount);
 	return 0;
 }
@@ -1399,7 +1409,7 @@ int thaw_super(struct super_block *sb)
 	int error;
 
 	down_write(&sb->s_umount);
-	if (sb->s_writers.frozen == SB_UNFROZEN) {
+	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
 		up_write(&sb->s_umount);
 		return -EINVAL;
 	}
@@ -1409,11 +1419,14 @@ int thaw_super(struct super_block *sb)
 		goto out;
 	}
 
+	lockdep_sb_freeze_acquire(sb);
+
 	if (sb->s_op->unfreeze_fs) {
 		error = sb->s_op->unfreeze_fs(sb);
 		if (error) {
 			printk(KERN_ERR
 				"VFS:Filesystem thaw failed\n");
+			lockdep_sb_freeze_release(sb);
 			up_write(&sb->s_umount);
 			return error;
 		}
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 94374e435025..2b67bda2021b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -21,14 +21,14 @@ DEFINE_SPINLOCK(sysfs_symlink_target_lock);
 
 void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
 {
-	char *buf, *path = NULL;
+	char *buf;
 
 	buf = kzalloc(PATH_MAX, GFP_KERNEL);
 	if (buf)
-		path = kernfs_path(parent, buf, PATH_MAX);
+		kernfs_path(parent, buf, PATH_MAX);
 
 	WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s/%s'\n",
-	     path, name);
+	     buf, name);
 
 	kfree(buf);
 }
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index c8f60df2733e..ca16c5d7bab1 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -439,7 +439,7 @@ static unsigned int vfs_dent_type(uint8_t type)
  */
 static int ubifs_readdir(struct file *file, struct dir_context *ctx)
 {
-	int err;
+	int err = 0;
 	struct qstr nm;
 	union ubifs_key key;
 	struct ubifs_dent_node *dent;
@@ -541,14 +541,20 @@ out:
 	kfree(file->private_data);
 	file->private_data = NULL;
 
-	if (err != -ENOENT) {
+	if (err != -ENOENT)
 		ubifs_err(c, "cannot find next direntry, error %d", err);
-		return err;
-	}
+	else
+		/*
+		 * -ENOENT is a non-fatal error in this context, the TNC uses
+		 * it to indicate that the cursor moved past the current directory
+		 * and readdir() has to stop.
+		 */
+		err = 0;
+
 
 	/* 2 is a special value indicating that there are no more direntries */
 	ctx->pos = 2;
-	return 0;
+	return err;
 }
 
 /* Free saved readdir() state when the directory is closed */
@@ -1060,9 +1066,9 @@ static void unlock_4_inodes(struct inode *inode1, struct inode *inode2,
 	mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
 }
 
-static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
-			struct inode *new_dir, struct dentry *new_dentry,
-			unsigned int flags)
+static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
+		     struct inode *new_dir, struct dentry *new_dentry,
+		     unsigned int flags)
 {
 	struct ubifs_info *c = old_dir->i_sb->s_fs_info;
 	struct inode *old_inode = d_inode(old_dentry);
@@ -1323,7 +1329,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
 	return err;
 }
 
-static int ubifs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			struct inode *new_dir, struct dentry *new_dentry,
 			unsigned int flags)
 {
@@ -1336,7 +1342,7 @@ static int ubifs_rename2(struct inode *old_dir, struct dentry *old_dentry,
 	if (flags & RENAME_EXCHANGE)
 		return ubifs_xrename(old_dir, old_dentry, new_dir, new_dentry);
 
-	return ubifs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
+	return do_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
 }
 
 int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -1387,7 +1393,7 @@ const struct inode_operations ubifs_dir_inode_operations = {
 	.mkdir       = ubifs_mkdir,
 	.rmdir       = ubifs_rmdir,
 	.mknod       = ubifs_mknod,
-	.rename      = ubifs_rename2,
+	.rename      = ubifs_rename,
 	.setattr     = ubifs_setattr,
 	.getattr     = ubifs_getattr,
 	.listxattr   = ubifs_listxattr,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 6c2f4d41ed73..d9f9615bfd71 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -172,6 +172,7 @@ out_cancel:
 	host_ui->xattr_cnt -= 1;
 	host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
 	host_ui->xattr_size -= CALC_XATTR_BYTES(size);
+	host_ui->xattr_names -= nm->len;
 	mutex_unlock(&host_ui->ui_mutex);
 out_free:
 	make_bad_inode(inode);
@@ -478,6 +479,7 @@ out_cancel:
 	host_ui->xattr_cnt += 1;
 	host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
 	host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
+	host_ui->xattr_names += nm->len;
 	mutex_unlock(&host_ui->ui_mutex);
 	ubifs_release_budget(c, &req);
 	make_bad_inode(inode);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index c27344cf38e1..c6eb21940783 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3974,9 +3974,6 @@ xfs_bmap_remap_alloc(
 	 * allocating, so skip that check by pretending to be freeing.
 	 */
 	error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
-	if (error)
-		goto error0;
-error0:
 	xfs_perag_put(args.pag);
 	if (error)
 		trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
@@ -3999,6 +3996,39 @@ xfs_bmap_alloc(
 	return xfs_bmap_btalloc(ap);
 }
 
+/* Trim extent to fit a logical block range. */
+void
+xfs_trim_extent(
+	struct xfs_bmbt_irec	*irec,
+	xfs_fileoff_t		bno,
+	xfs_filblks_t		len)
+{
+	xfs_fileoff_t		distance;
+	xfs_fileoff_t		end = bno + len;
+
+	if (irec->br_startoff + irec->br_blockcount <= bno ||
+	    irec->br_startoff >= end) {
+		irec->br_blockcount = 0;
+		return;
+	}
+
+	if (irec->br_startoff < bno) {
+		distance = bno - irec->br_startoff;
+		if (isnullstartblock(irec->br_startblock))
+			irec->br_startblock = DELAYSTARTBLOCK;
+		if (irec->br_startblock != DELAYSTARTBLOCK &&
+		    irec->br_startblock != HOLESTARTBLOCK)
+			irec->br_startblock += distance;
+		irec->br_startoff += distance;
+		irec->br_blockcount -= distance;
+	}
+
+	if (end < irec->br_startoff + irec->br_blockcount) {
+		distance = irec->br_startoff + irec->br_blockcount - end;
+		irec->br_blockcount -= distance;
+	}
+}
+
 /*
  * Trim the returned map to the required bounds
  */
@@ -4829,6 +4859,219 @@ xfs_bmap_split_indlen(
 	return stolen;
 }
 
+int
+xfs_bmap_del_extent_delay(
+	struct xfs_inode	*ip,
+	int			whichfork,
+	xfs_extnum_t		*idx,
+	struct xfs_bmbt_irec	*got,
+	struct xfs_bmbt_irec	*del)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
+	struct xfs_bmbt_irec	new;
+	int64_t			da_old, da_new, da_diff = 0;
+	xfs_fileoff_t		del_endoff, got_endoff;
+	xfs_filblks_t		got_indlen, new_indlen, stolen;
+	int			error = 0, state = 0;
+	bool			isrt;
+
+	XFS_STATS_INC(mp, xs_del_exlist);
+
+	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+	del_endoff = del->br_startoff + del->br_blockcount;
+	got_endoff = got->br_startoff + got->br_blockcount;
+	da_old = startblockval(got->br_startblock);
+	da_new = 0;
+
+	ASSERT(*idx >= 0);
+	ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+	ASSERT(del->br_blockcount > 0);
+	ASSERT(got->br_startoff <= del->br_startoff);
+	ASSERT(got_endoff >= del_endoff);
+
+	if (isrt) {
+		int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
+
+		do_div(rtexts, mp->m_sb.sb_rextsize);
+		xfs_mod_frextents(mp, rtexts);
+	}
+
+	/*
+	 * Update the inode delalloc counter now and wait to update the
+	 * sb counters as we might have to borrow some blocks for the
+	 * indirect block accounting.
+	 */
+	xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0,
+			isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+	ip->i_delayed_blks -= del->br_blockcount;
+
+	if (whichfork == XFS_COW_FORK)
+		state |= BMAP_COWFORK;
+
+	if (got->br_startoff == del->br_startoff)
+		state |= BMAP_LEFT_CONTIG;
+	if (got_endoff == del_endoff)
+		state |= BMAP_RIGHT_CONTIG;
+
+	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
+	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+		/*
+		 * Matches the whole extent.  Delete the entry.
+		 */
+		xfs_iext_remove(ip, *idx, 1, state);
+		--*idx;
+		break;
+	case BMAP_LEFT_CONTIG:
+		/*
+		 * Deleting the first part of the extent.
+		 */
+		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+		got->br_startoff = del_endoff;
+		got->br_blockcount -= del->br_blockcount;
+		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
+				got->br_blockcount), da_old);
+		got->br_startblock = nullstartblock((int)da_new);
+		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+		break;
+	case BMAP_RIGHT_CONTIG:
+		/*
+		 * Deleting the last part of the extent.
+		 */
+		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+		got->br_blockcount = got->br_blockcount - del->br_blockcount;
+		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
+				got->br_blockcount), da_old);
+		got->br_startblock = nullstartblock((int)da_new);
+		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+		break;
+	case 0:
+		/*
+		 * Deleting the middle of the extent.
+		 *
+		 * Distribute the original indlen reservation across the two new
+		 * extents.  Steal blocks from the deleted extent if necessary.
+		 * Stealing blocks simply fudges the fdblocks accounting below.
+		 * Warn if either of the new indlen reservations is zero as this
+		 * can lead to delalloc problems.
+		 */
+		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+
+		got->br_blockcount = del->br_startoff - got->br_startoff;
+		got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
+
+		new.br_blockcount = got_endoff - del_endoff;
+		new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
+
+		WARN_ON_ONCE(!got_indlen || !new_indlen);
+		stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
+						       del->br_blockcount);
+
+		got->br_startblock = nullstartblock((int)got_indlen);
+		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+		trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_);
+
+		new.br_startoff = del_endoff;
+		new.br_state = got->br_state;
+		new.br_startblock = nullstartblock((int)new_indlen);
+
+		++*idx;
+		xfs_iext_insert(ip, *idx, 1, &new, state);
+
+		da_new = got_indlen + new_indlen - stolen;
+		del->br_blockcount -= stolen;
+		break;
+	}
+
+	ASSERT(da_old >= da_new);
+	da_diff = da_old - da_new;
+	if (!isrt)
+		da_diff += del->br_blockcount;
+	if (da_diff)
+		xfs_mod_fdblocks(mp, da_diff, false);
+	return error;
+}
+
+void
+xfs_bmap_del_extent_cow(
+	struct xfs_inode	*ip,
+	xfs_extnum_t		*idx,
+	struct xfs_bmbt_irec	*got,
+	struct xfs_bmbt_irec	*del)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+	struct xfs_bmbt_irec	new;
+	xfs_fileoff_t		del_endoff, got_endoff;
+	int			state = BMAP_COWFORK;
+
+	XFS_STATS_INC(mp, xs_del_exlist);
+
+	del_endoff = del->br_startoff + del->br_blockcount;
+	got_endoff = got->br_startoff + got->br_blockcount;
+
+	ASSERT(*idx >= 0);
+	ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+	ASSERT(del->br_blockcount > 0);
+	ASSERT(got->br_startoff <= del->br_startoff);
+	ASSERT(got_endoff >= del_endoff);
+	ASSERT(!isnullstartblock(got->br_startblock));
+
+	if (got->br_startoff == del->br_startoff)
+		state |= BMAP_LEFT_CONTIG;
+	if (got_endoff == del_endoff)
+		state |= BMAP_RIGHT_CONTIG;
+
+	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
+	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+		/*
+		 * Matches the whole extent.  Delete the entry.
+		 */
+		xfs_iext_remove(ip, *idx, 1, state);
+		--*idx;
+		break;
+	case BMAP_LEFT_CONTIG:
+		/*
+		 * Deleting the first part of the extent.
+		 */
+		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+		got->br_startoff = del_endoff;
+		got->br_blockcount -= del->br_blockcount;
+		got->br_startblock = del->br_startblock + del->br_blockcount;
+		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+		break;
+	case BMAP_RIGHT_CONTIG:
+		/*
+		 * Deleting the last part of the extent.
+		 */
+		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+		got->br_blockcount -= del->br_blockcount;
+		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+		break;
+	case 0:
+		/*
+		 * Deleting the middle of the extent.
+		 */
+		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+		got->br_blockcount = del->br_startoff - got->br_startoff;
+		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+		new.br_startoff = del_endoff;
+		new.br_blockcount = got_endoff - del_endoff;
+		new.br_state = got->br_state;
+		new.br_startblock = del->br_startblock + del->br_blockcount;
+
+		++*idx;
+		xfs_iext_insert(ip, *idx, 1, &new, state);
+		break;
+	}
+}
+
 /*
  * Called by xfs_bmapi to update file extent records and the btree
  * after removing space (or undoing a delayed allocation).
@@ -5171,175 +5414,6 @@ done:
 	return error;
 }
 
-/* Remove an extent from the CoW fork.  Similar to xfs_bmap_del_extent. */
-int
-xfs_bunmapi_cow(
-	struct xfs_inode		*ip,
-	struct xfs_bmbt_irec		*del)
-{
-	xfs_filblks_t			da_new;
-	xfs_filblks_t			da_old;
-	xfs_fsblock_t			del_endblock = 0;
-	xfs_fileoff_t			del_endoff;
-	int				delay;
-	struct xfs_bmbt_rec_host	*ep;
-	int				error;
-	struct xfs_bmbt_irec		got;
-	xfs_fileoff_t			got_endoff;
-	struct xfs_ifork		*ifp;
-	struct xfs_mount		*mp;
-	xfs_filblks_t			nblks;
-	struct xfs_bmbt_irec		new;
-	/* REFERENCED */
-	uint				qfield;
-	xfs_filblks_t			temp;
-	xfs_filblks_t			temp2;
-	int				state = BMAP_COWFORK;
-	int				eof;
-	xfs_extnum_t			eidx;
-
-	mp = ip->i_mount;
-	XFS_STATS_INC(mp, xs_del_exlist);
-
-	ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof,
-			&eidx, &got, &new);
-
-	ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ifp = ifp;
-	ASSERT((eidx >= 0) && (eidx < ifp->if_bytes /
-		(uint)sizeof(xfs_bmbt_rec_t)));
-	ASSERT(del->br_blockcount > 0);
-	ASSERT(got.br_startoff <= del->br_startoff);
-	del_endoff = del->br_startoff + del->br_blockcount;
-	got_endoff = got.br_startoff + got.br_blockcount;
-	ASSERT(got_endoff >= del_endoff);
-	delay = isnullstartblock(got.br_startblock);
-	ASSERT(isnullstartblock(del->br_startblock) == delay);
-	qfield = 0;
-	error = 0;
-	/*
-	 * If deleting a real allocation, must free up the disk space.
-	 */
-	if (!delay) {
-		nblks = del->br_blockcount;
-		qfield = XFS_TRANS_DQ_BCOUNT;
-		/*
-		 * Set up del_endblock and cur for later.
-		 */
-		del_endblock = del->br_startblock + del->br_blockcount;
-		da_old = da_new = 0;
-	} else {
-		da_old = startblockval(got.br_startblock);
-		da_new = 0;
-		nblks = 0;
-	}
-	qfield = qfield;
-	nblks = nblks;
-
-	/*
-	 * Set flag value to use in switch statement.
-	 * Left-contig is 2, right-contig is 1.
-	 */
-	switch (((got.br_startoff == del->br_startoff) << 1) |
-		(got_endoff == del_endoff)) {
-	case 3:
-		/*
-		 * Matches the whole extent.  Delete the entry.
-		 */
-		xfs_iext_remove(ip, eidx, 1, BMAP_COWFORK);
-		--eidx;
-		break;
-
-	case 2:
-		/*
-		 * Deleting the first part of the extent.
-		 */
-		trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
-		xfs_bmbt_set_startoff(ep, del_endoff);
-		temp = got.br_blockcount - del->br_blockcount;
-		xfs_bmbt_set_blockcount(ep, temp);
-		if (delay) {
-			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
-				da_old);
-			xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-			trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-			da_new = temp;
-			break;
-		}
-		xfs_bmbt_set_startblock(ep, del_endblock);
-		trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-		break;
-
-	case 1:
-		/*
-		 * Deleting the last part of the extent.
-		 */
-		temp = got.br_blockcount - del->br_blockcount;
-		trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
-		xfs_bmbt_set_blockcount(ep, temp);
-		if (delay) {
-			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
-				da_old);
-			xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-			trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-			da_new = temp;
-			break;
-		}
-		trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-		break;
-
-	case 0:
-		/*
-		 * Deleting the middle of the extent.
-		 */
-		temp = del->br_startoff - got.br_startoff;
-		trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
-		xfs_bmbt_set_blockcount(ep, temp);
-		new.br_startoff = del_endoff;
-		temp2 = got_endoff - del_endoff;
-		new.br_blockcount = temp2;
-		new.br_state = got.br_state;
-		if (!delay) {
-			new.br_startblock = del_endblock;
-		} else {
-			temp = xfs_bmap_worst_indlen(ip, temp);
-			xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-			temp2 = xfs_bmap_worst_indlen(ip, temp2);
-			new.br_startblock = nullstartblock((int)temp2);
-			da_new = temp + temp2;
-			while (da_new > da_old) {
-				if (temp) {
-					temp--;
-					da_new--;
-					xfs_bmbt_set_startblock(ep,
-						nullstartblock((int)temp));
-				}
-				if (da_new == da_old)
-					break;
-				if (temp2) {
-					temp2--;
-					da_new--;
-					new.br_startblock =
-						nullstartblock((int)temp2);
-				}
-			}
-		}
-		trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-		xfs_iext_insert(ip, eidx + 1, 1, &new, state);
-		++eidx;
-		break;
-	}
-
-	/*
-	 * Account for change in delayed indirect blocks.
-	 * Nothing to do for disk quota accounting here.
-	 */
-	ASSERT(da_old >= da_new);
-	if (da_old > da_new)
-		xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
-
-	return error;
-}
-
 /*
  * Unmap (remove) blocks from a file.
  * If nexts is nonzero then the number of extents to remove is limited to
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index f97db7132564..7cae6ec27fa6 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -190,6 +190,8 @@ void	xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
 #define	XFS_BMAP_TRACE_EXLIST(ip,c,w)
 #endif
 
+void	xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
+		xfs_filblks_t len);
 int	xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
 void	xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
 void	xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
@@ -221,7 +223,11 @@ int	xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t bno, xfs_filblks_t len, int flags,
 		xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
 		struct xfs_defer_ops *dfops, int *done);
-int	xfs_bunmapi_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *del);
+int	xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
+		xfs_extnum_t *idx, struct xfs_bmbt_irec *got,
+		struct xfs_bmbt_irec *del);
+void	xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx,
+		struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del);
 int	xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
 		xfs_extnum_t num);
 uint	xfs_default_attroffset(struct xfs_inode *ip);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 5c8e6f2ce44f..0e80993c8a59 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4826,7 +4826,7 @@ xfs_btree_calc_size(
 	return rval;
 }
 
-int
+static int
 xfs_btree_count_blocks_helper(
 	struct xfs_btree_cur	*cur,
 	int			level,
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 3cc3cf767474..ac9a003dd29a 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -191,8 +191,7 @@ xfs_dquot_buf_verify_crc(
 	if (mp->m_quotainfo)
 		ndquots = mp->m_quotainfo->qi_dqperchunk;
 	else
-		ndquots = xfs_calc_dquots_per_chunk(
-					XFS_BB_TO_FSB(mp, bp->b_length));
+		ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
 
 	for (i = 0; i < ndquots; i++, d++) {
 		if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index f6547fc5e016..6b7579e7b60a 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -865,7 +865,6 @@ typedef struct xfs_timestamp {
  * padding field for v3 inodes.
  */
 #define	XFS_DINODE_MAGIC		0x494e	/* 'IN' */
-#define XFS_DINODE_GOOD_VERSION(v)	((v) >= 1 && (v) <= 3)
 typedef struct xfs_dinode {
 	__be16		di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
 	__be16		di_mode;	/* mode and type of file */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 8de9a3a29589..134424fac434 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -57,6 +57,17 @@ xfs_inobp_check(
 }
 #endif
 
+bool
+xfs_dinode_good_version(
+	struct xfs_mount *mp,
+	__u8		version)
+{
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		return version == 3;
+
+	return version == 1 || version == 2;
+}
+
 /*
  * If we are doing readahead on an inode buffer, we might be in log recovery
  * reading an inode allocation buffer that hasn't yet been replayed, and hence
@@ -91,7 +102,7 @@ xfs_inode_buf_verify(
 
 		dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
 		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
-			    XFS_DINODE_GOOD_VERSION(dip->di_version);
+			xfs_dinode_good_version(mp, dip->di_version);
 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
 						XFS_ERRTAG_ITOBP_INOTOBP,
 						XFS_RANDOM_ITOBP_INOTOBP))) {
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 62d9d4681c8c..3cfe12a4f58a 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -74,6 +74,8 @@ void	xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
 void	xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
 			       struct xfs_dinode *to);
 
+bool	xfs_dinode_good_version(struct xfs_mount *mp, __u8 version);
+
 #if defined(DEBUG)
 void	xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 #else
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a314fc7b56fa..6e4f7f900fea 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -249,6 +249,7 @@ xfs_file_dio_aio_read(
 	struct xfs_inode	*ip = XFS_I(inode);
 	loff_t			isize = i_size_read(inode);
 	size_t			count = iov_iter_count(to);
+	loff_t			end = iocb->ki_pos + count - 1;
 	struct iov_iter		data;
 	struct xfs_buftarg	*target;
 	ssize_t			ret = 0;
@@ -272,49 +273,21 @@ xfs_file_dio_aio_read(
 
 	file_accessed(iocb->ki_filp);
 
-	/*
-	 * Locking is a bit tricky here. If we take an exclusive lock for direct
-	 * IO, we effectively serialise all new concurrent read IO to this file
-	 * and block it behind IO that is currently in progress because IO in
-	 * progress holds the IO lock shared. We only need to hold the lock
-	 * exclusive to blow away the page cache, so only take lock exclusively
-	 * if the page cache needs invalidation. This allows the normal direct
-	 * IO case of no page cache pages to proceeed concurrently without
-	 * serialisation.
-	 */
 	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
 	if (mapping->nrpages) {
-		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
+		ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
+		if (ret)
+			goto out_unlock;
 
 		/*
-		 * The generic dio code only flushes the range of the particular
-		 * I/O. Because we take an exclusive lock here, this whole
-		 * sequence is considerably more expensive for us. This has a
-		 * noticeable performance impact for any file with cached pages,
-		 * even when outside of the range of the particular I/O.
-		 *
-		 * Hence, amortize the cost of the lock against a full file
-		 * flush and reduce the chances of repeated iolock cycles going
-		 * forward.
+		 * Invalidate whole pages. This can return an error if we fail
+		 * to invalidate a page, but this should never happen on XFS.
+		 * Warn if it does fail.
 		 */
-		if (mapping->nrpages) {
-			ret = filemap_write_and_wait(mapping);
-			if (ret) {
-				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
-				return ret;
-			}
-
-			/*
-			 * Invalidate whole pages. This can return an error if
-			 * we fail to invalidate a page, but this should never
-			 * happen on XFS. Warn if it does fail.
-			 */
-			ret = invalidate_inode_pages2(mapping);
-			WARN_ON_ONCE(ret);
-			ret = 0;
-		}
-		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+		ret = invalidate_inode_pages2_range(mapping,
+				iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+		WARN_ON_ONCE(ret);
+		ret = 0;
 	}
 
 	data = *to;
@@ -324,8 +297,9 @@ xfs_file_dio_aio_read(
 		iocb->ki_pos += ret;
 		iov_iter_advance(to, ret);
 	}
-	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 
+out_unlock:
+	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 	return ret;
 }
 
@@ -570,61 +544,49 @@ xfs_file_dio_aio_write(
 	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
 		return -EINVAL;
 
-	/* "unaligned" here means not aligned to a filesystem block */
-	if ((iocb->ki_pos & mp->m_blockmask) ||
-	    ((iocb->ki_pos + count) & mp->m_blockmask))
-		unaligned_io = 1;
-
 	/*
-	 * We don't need to take an exclusive lock unless there page cache needs
-	 * to be invalidated or unaligned IO is being executed. We don't need to
-	 * consider the EOF extension case here because
-	 * xfs_file_aio_write_checks() will relock the inode as necessary for
-	 * EOF zeroing cases and fill out the new inode size as appropriate.
+	 * Don't take the exclusive iolock here unless the I/O is unaligned to
+	 * the file system block size.  We don't need to consider the EOF
+	 * extension case here because xfs_file_aio_write_checks() will relock
+	 * the inode as necessary for EOF zeroing cases and fill out the new
+	 * inode size as appropriate.
 	 */
-	if (unaligned_io || mapping->nrpages)
+	if ((iocb->ki_pos & mp->m_blockmask) ||
+	    ((iocb->ki_pos + count) & mp->m_blockmask)) {
+		unaligned_io = 1;
 		iolock = XFS_IOLOCK_EXCL;
-	else
+	} else {
 		iolock = XFS_IOLOCK_SHARED;
-	xfs_rw_ilock(ip, iolock);
-
-	/*
-	 * Recheck if there are cached pages that need invalidate after we got
-	 * the iolock to protect against other threads adding new pages while
-	 * we were waiting for the iolock.
-	 */
-	if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
-		xfs_rw_iunlock(ip, iolock);
-		iolock = XFS_IOLOCK_EXCL;
-		xfs_rw_ilock(ip, iolock);
 	}
 
+	xfs_rw_ilock(ip, iolock);
+
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
 	count = iov_iter_count(from);
 	end = iocb->ki_pos + count - 1;
 
-	/*
-	 * See xfs_file_dio_aio_read() for why we do a full-file flush here.
-	 */
 	if (mapping->nrpages) {
-		ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+		ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
 		if (ret)
 			goto out;
+
 		/*
 		 * Invalidate whole pages. This can return an error if we fail
 		 * to invalidate a page, but this should never happen on XFS.
 		 * Warn if it does fail.
 		 */
-		ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
+		ret = invalidate_inode_pages2_range(mapping,
+				iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
 		WARN_ON_ONCE(ret);
 		ret = 0;
 	}
 
 	/*
 	 * If we are doing unaligned IO, wait for all other IO to drain,
-	 * otherwise demote the lock if we had to flush cached pages
+	 * otherwise demote the lock if we had to take the exclusive lock
+	 * for other reasons in xfs_file_aio_write_checks.
 	 */
 	if (unaligned_io)
 		inode_dio_wait(inode);
@@ -947,134 +909,6 @@ out_unlock:
 	return error;
 }
 
-/*
- * Flush all file writes out to disk.
- */
-static int
-xfs_file_wait_for_io(
-	struct inode	*inode,
-	loff_t		offset,
-	size_t		len)
-{
-	loff_t		rounding;
-	loff_t		ioffset;
-	loff_t		iendoffset;
-	loff_t		bs;
-	int		ret;
-
-	bs = inode->i_sb->s_blocksize;
-	inode_dio_wait(inode);
-
-	rounding = max_t(xfs_off_t, bs, PAGE_SIZE);
-	ioffset = round_down(offset, rounding);
-	iendoffset = round_up(offset + len, rounding) - 1;
-	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-					   iendoffset);
-	return ret;
-}
-
-/* Hook up to the VFS reflink function */
-STATIC int
-xfs_file_share_range(
-	struct file	*file_in,
-	loff_t		pos_in,
-	struct file	*file_out,
-	loff_t		pos_out,
-	u64		len,
-	bool		is_dedupe)
-{
-	struct inode	*inode_in;
-	struct inode	*inode_out;
-	ssize_t		ret;
-	loff_t		bs;
-	loff_t		isize;
-	int		same_inode;
-	loff_t		blen;
-	unsigned int	flags = 0;
-
-	inode_in = file_inode(file_in);
-	inode_out = file_inode(file_out);
-	bs = inode_out->i_sb->s_blocksize;
-
-	/* Don't touch certain kinds of inodes */
-	if (IS_IMMUTABLE(inode_out))
-		return -EPERM;
-	if (IS_SWAPFILE(inode_in) ||
-	    IS_SWAPFILE(inode_out))
-		return -ETXTBSY;
-
-	/* Reflink only works within this filesystem. */
-	if (inode_in->i_sb != inode_out->i_sb)
-		return -EXDEV;
-	same_inode = (inode_in->i_ino == inode_out->i_ino);
-
-	/* Don't reflink dirs, pipes, sockets... */
-	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
-		return -EISDIR;
-	if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
-		return -EINVAL;
-	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
-		return -EINVAL;
-
-	/* Don't share DAX file data for now. */
-	if (IS_DAX(inode_in) || IS_DAX(inode_out))
-		return -EINVAL;
-
-	/* Are we going all the way to the end? */
-	isize = i_size_read(inode_in);
-	if (isize == 0)
-		return 0;
-	if (len == 0)
-		len = isize - pos_in;
-
-	/* Ensure offsets don't wrap and the input is inside i_size */
-	if (pos_in + len < pos_in || pos_out + len < pos_out ||
-	    pos_in + len > isize)
-		return -EINVAL;
-
-	/* Don't allow dedupe past EOF in the dest file */
-	if (is_dedupe) {
-		loff_t	disize;
-
-		disize = i_size_read(inode_out);
-		if (pos_out >= disize || pos_out + len > disize)
-			return -EINVAL;
-	}
-
-	/* If we're linking to EOF, continue to the block boundary. */
-	if (pos_in + len == isize)
-		blen = ALIGN(isize, bs) - pos_in;
-	else
-		blen = len;
-
-	/* Only reflink if we're aligned to block boundaries */
-	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
-	    !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
-		return -EINVAL;
-
-	/* Don't allow overlapped reflink within the same file */
-	if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen)
-		return -EINVAL;
-
-	/* Wait for the completion of any pending IOs on srcfile */
-	ret = xfs_file_wait_for_io(inode_in, pos_in, len);
-	if (ret)
-		goto out;
-	ret = xfs_file_wait_for_io(inode_out, pos_out, len);
-	if (ret)
-		goto out;
-
-	if (is_dedupe)
-		flags |= XFS_REFLINK_DEDUPE;
-	ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out),
-			pos_out, len, flags);
-	if (ret < 0)
-		goto out;
-
-out:
-	return ret;
-}
-
 STATIC ssize_t
 xfs_file_copy_range(
 	struct file	*file_in,
@@ -1086,7 +920,7 @@ xfs_file_copy_range(
 {
 	int		error;
 
-	error = xfs_file_share_range(file_in, pos_in, file_out, pos_out,
+	error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
 				     len, false);
 	if (error)
 		return error;
@@ -1101,7 +935,7 @@ xfs_file_clone_range(
 	loff_t		pos_out,
 	u64		len)
 {
-	return xfs_file_share_range(file_in, pos_in, file_out, pos_out,
+	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
 				     len, false);
 }
 
@@ -1124,7 +958,7 @@ xfs_file_dedupe_range(
 	if (len > XFS_MAX_DEDUPE_LEN)
 		len = XFS_MAX_DEDUPE_LEN;
 
-	error = xfs_file_share_range(src_file, loff, dst_file, dst_loff,
+	error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
 				     len, true);
 	if (error)
 		return error;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 14796b744e0a..f295049db681 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1656,9 +1656,9 @@ void
 xfs_inode_set_cowblocks_tag(
 	xfs_inode_t	*ip)
 {
-	trace_xfs_inode_set_eofblocks_tag(ip);
+	trace_xfs_inode_set_cowblocks_tag(ip);
 	return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
-			trace_xfs_perag_set_eofblocks,
+			trace_xfs_perag_set_cowblocks,
 			XFS_ICI_COWBLOCKS_TAG);
 }
 
@@ -1666,7 +1666,7 @@ void
 xfs_inode_clear_cowblocks_tag(
 	xfs_inode_t	*ip)
 {
-	trace_xfs_inode_clear_eofblocks_tag(ip);
+	trace_xfs_inode_clear_cowblocks_tag(ip);
 	return __xfs_inode_clear_eofblocks_tag(ip,
-			trace_xfs_perag_clear_eofblocks, XFS_ICI_COWBLOCKS_TAG);
+			trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
 }
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index d907eb9f8ef3..436e109bb01e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -566,6 +566,17 @@ xfs_file_iomap_begin_delay(
 	xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
 			&got, &prev);
 	if (!eof && got.br_startoff <= offset_fsb) {
+		if (xfs_is_reflink_inode(ip)) {
+			bool		shared;
+
+			end_fsb = min(XFS_B_TO_FSB(mp, offset + count),
+					maxbytes_fsb);
+			xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
+			error = xfs_reflink_reserve_cow(ip, &got, &shared);
+			if (error)
+				goto out_unlock;
+		}
+
 		trace_xfs_iomap_found(ip, offset, count, 0, &got);
 		goto done;
 	}
@@ -961,19 +972,13 @@ xfs_file_iomap_begin(
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_bmbt_irec	imap;
 	xfs_fileoff_t		offset_fsb, end_fsb;
-	bool			shared, trimmed;
 	int			nimaps = 1, error = 0;
+	bool			shared = false, trimmed = false;
 	unsigned		lockmode;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
-		error = xfs_reflink_reserve_cow_range(ip, offset, length);
-		if (error < 0)
-			return error;
-	}
-
 	if ((flags & IOMAP_WRITE) && !IS_DAX(inode) &&
 		   !xfs_get_extsz_hint(ip)) {
 		/* Reserve delalloc blocks for regular writeback. */
@@ -981,7 +986,16 @@ xfs_file_iomap_begin(
 				iomap);
 	}
 
-	lockmode = xfs_ilock_data_map_shared(ip);
+	/*
+	 * COW writes will allocate delalloc space, so we need to make sure
+	 * to take the lock exclusively here.
+	 */
+	if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+		lockmode = XFS_ILOCK_EXCL;
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+	} else {
+		lockmode = xfs_ilock_data_map_shared(ip);
+	}
 
 	ASSERT(offset <= mp->m_super->s_maxbytes);
 	if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -991,16 +1005,24 @@ xfs_file_iomap_begin(
 
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
 			       &nimaps, 0);
-	if (error) {
-		xfs_iunlock(ip, lockmode);
-		return error;
+	if (error)
+		goto out_unlock;
+
+	if (flags & IOMAP_REPORT) {
+		/* Trim the mapping to the nearest shared extent boundary. */
+		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
+				&trimmed);
+		if (error)
+			goto out_unlock;
 	}
 
-	/* Trim the mapping to the nearest shared extent boundary. */
-	error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed);
-	if (error) {
-		xfs_iunlock(ip, lockmode);
-		return error;
+	if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+		error = xfs_reflink_reserve_cow(ip, &imap, &shared);
+		if (error)
+			goto out_unlock;
+
+		end_fsb = imap.br_startoff + imap.br_blockcount;
+		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
 	}
 
 	if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
@@ -1039,6 +1061,9 @@ xfs_file_iomap_begin(
 	if (shared)
 		iomap->flags |= IOMAP_F_SHARED;
 	return 0;
+out_unlock:
+	xfs_iunlock(ip, lockmode);
+	return error;
 }
 
 static int
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index fc7873942bea..b341f10cf481 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1009,6 +1009,7 @@ xfs_mountfs(
  out_quota:
 	xfs_qm_unmount_quotas(mp);
  out_rtunmount:
+	mp->m_super->s_flags &= ~MS_ACTIVE;
 	xfs_rtunmount_inodes(mp);
  out_rele_rip:
 	IRELE(rip);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 5965e9455d91..a279b4e7f5fe 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -182,7 +182,8 @@ xfs_reflink_trim_around_shared(
 	if (!xfs_is_reflink_inode(ip) ||
 	    ISUNWRITTEN(irec) ||
 	    irec->br_startblock == HOLESTARTBLOCK ||
-	    irec->br_startblock == DELAYSTARTBLOCK) {
+	    irec->br_startblock == DELAYSTARTBLOCK ||
+	    isnullstartblock(irec->br_startblock)) {
 		*shared = false;
 		return 0;
 	}
@@ -227,50 +228,54 @@ xfs_reflink_trim_around_shared(
 	}
 }
 
-/* Create a CoW reservation for a range of blocks within a file. */
-static int
-__xfs_reflink_reserve_cow(
+/*
+ * Trim the passed in imap to the next shared/unshared extent boundary, and
+ * if imap->br_startoff points to a shared extent reserve space for it in the
+ * COW fork.  In this case *shared is set to true, else to false.
+ *
+ * Note that imap will always contain the block numbers for the existing blocks
+ * in the data fork, as the upper layers need them for read-modify-write
+ * operations.
+ */
+int
+xfs_reflink_reserve_cow(
 	struct xfs_inode	*ip,
-	xfs_fileoff_t		*offset_fsb,
-	xfs_fileoff_t		end_fsb,
-	bool			*skipped)
+	struct xfs_bmbt_irec	*imap,
+	bool			*shared)
 {
-	struct xfs_bmbt_irec	got, prev, imap;
-	xfs_fileoff_t		orig_end_fsb;
-	int			nimaps, eof = 0, error = 0;
-	bool			shared = false, trimmed = false;
+	struct xfs_bmbt_irec	got, prev;
+	xfs_fileoff_t		end_fsb, orig_end_fsb;
+	int			eof = 0, error = 0;
+	bool			trimmed;
 	xfs_extnum_t		idx;
 	xfs_extlen_t		align;
 
-	/* Already reserved?  Skip the refcount btree access. */
-	xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx,
+	/*
+	 * Search the COW fork extent list first.  This serves two purposes:
+	 * first this implement the speculative preallocation using cowextisze,
+	 * so that we also unshared block adjacent to shared blocks instead
+	 * of just the shared blocks themselves.  Second the lookup in the
+	 * extent list is generally faster than going out to the shared extent
+	 * tree.
+	 */
+	xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
 			&got, &prev);
-	if (!eof && got.br_startoff <= *offset_fsb) {
-		end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount;
-		trace_xfs_reflink_cow_found(ip, &got);
-		goto done;
-	}
+	if (!eof && got.br_startoff <= imap->br_startoff) {
+		trace_xfs_reflink_cow_found(ip, imap);
+		xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
 
-	/* Read extent from the source file. */
-	nimaps = 1;
-	error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
-			&imap, &nimaps, 0);
-	if (error)
-		goto out_unlock;
-	ASSERT(nimaps == 1);
+		*shared = true;
+		return 0;
+	}
 
 	/* Trim the mapping to the nearest shared extent boundary. */
-	error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed);
+	error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
 	if (error)
-		goto out_unlock;
-
-	end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount;
+		return error;
 
 	/* Not shared?  Just report the (potentially capped) extent. */
-	if (!shared) {
-		*skipped = true;
-		goto done;
-	}
+	if (!*shared)
+		return 0;
 
 	/*
 	 * Fork all the shared blocks from our write offset until the end of
@@ -278,72 +283,38 @@ __xfs_reflink_reserve_cow(
 	 */
 	error = xfs_qm_dqattach_locked(ip, 0);
 	if (error)
-		goto out_unlock;
+		return error;
+
+	end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
 
 	align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
 	if (align)
 		end_fsb = roundup_64(end_fsb, align);
 
 retry:
-	error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb,
-			end_fsb - *offset_fsb, &got,
-			&prev, &idx, eof);
+	error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
+			end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
 	switch (error) {
 	case 0:
 		break;
 	case -ENOSPC:
 	case -EDQUOT:
 		/* retry without any preallocation */
-		trace_xfs_reflink_cow_enospc(ip, &imap);
+		trace_xfs_reflink_cow_enospc(ip, imap);
 		if (end_fsb != orig_end_fsb) {
 			end_fsb = orig_end_fsb;
 			goto retry;
 		}
 		/*FALLTHRU*/
 	default:
-		goto out_unlock;
+		return error;
 	}
 
 	if (end_fsb != orig_end_fsb)
 		xfs_inode_set_cowblocks_tag(ip);
 
 	trace_xfs_reflink_cow_alloc(ip, &got);
-done:
-	*offset_fsb = end_fsb;
-out_unlock:
-	return error;
-}
-
-/* Create a CoW reservation for part of a file. */
-int
-xfs_reflink_reserve_cow_range(
-	struct xfs_inode	*ip,
-	xfs_off_t		offset,
-	xfs_off_t		count)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		offset_fsb, end_fsb;
-	bool			skipped = false;
-	int			error;
-
-	trace_xfs_reflink_reserve_cow_range(ip, offset, count);
-
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	end_fsb = XFS_B_TO_FSB(mp, offset + count);
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	while (offset_fsb < end_fsb) {
-		error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb,
-				&skipped);
-		if (error) {
-			trace_xfs_reflink_reserve_cow_range_error(ip, error,
-				_RET_IP_);
-			break;
-		}
-	}
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-	return error;
+	return 0;
 }
 
 /* Allocate all CoW reservations covering a range of blocks in a file. */
@@ -358,9 +329,8 @@ __xfs_reflink_allocate_cow(
 	struct xfs_defer_ops	dfops;
 	struct xfs_trans	*tp;
 	xfs_fsblock_t		first_block;
-	xfs_fileoff_t		next_fsb;
 	int			nimaps = 1, error;
-	bool			skipped = false;
+	bool			shared;
 
 	xfs_defer_init(&dfops, &first_block);
 
@@ -371,33 +341,38 @@ __xfs_reflink_allocate_cow(
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 
-	next_fsb = *offset_fsb;
-	error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped);
+	/* Read extent from the source file. */
+	nimaps = 1;
+	error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
+			&imap, &nimaps, 0);
+	if (error)
+		goto out_unlock;
+	ASSERT(nimaps == 1);
+
+	error = xfs_reflink_reserve_cow(ip, &imap, &shared);
 	if (error)
 		goto out_trans_cancel;
 
-	if (skipped) {
-		*offset_fsb = next_fsb;
+	if (!shared) {
+		*offset_fsb = imap.br_startoff + imap.br_blockcount;
 		goto out_trans_cancel;
 	}
 
 	xfs_trans_ijoin(tp, ip, 0);
-	error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb,
+	error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
 			XFS_BMAPI_COWFORK, &first_block,
 			XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
 			&imap, &nimaps, &dfops);
 	if (error)
 		goto out_trans_cancel;
 
-	/* We might not have been able to map the whole delalloc extent */
-	*offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb);
-
 	error = xfs_defer_finish(&tp, &dfops, NULL);
 	if (error)
 		goto out_trans_cancel;
 
 	error = xfs_trans_commit(tp);
 
+	*offset_fsb = imap.br_startoff + imap.br_blockcount;
 out_unlock:
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
@@ -536,58 +511,49 @@ xfs_reflink_cancel_cow_blocks(
 	xfs_fileoff_t			offset_fsb,
 	xfs_fileoff_t			end_fsb)
 {
-	struct xfs_bmbt_irec		irec;
-	xfs_filblks_t			count_fsb;
+	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+	struct xfs_bmbt_irec		got, prev, del;
+	xfs_extnum_t			idx;
 	xfs_fsblock_t			firstfsb;
 	struct xfs_defer_ops		dfops;
-	int				error = 0;
-	int				nimaps;
+	int				error = 0, eof = 0;
 
 	if (!xfs_is_reflink_inode(ip))
 		return 0;
 
-	/* Go find the old extent in the CoW fork. */
-	while (offset_fsb < end_fsb) {
-		nimaps = 1;
-		count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
-		error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
-				&nimaps, XFS_BMAPI_COWFORK);
-		if (error)
-			break;
-		ASSERT(nimaps == 1);
-
-		trace_xfs_reflink_cancel_cow(ip, &irec);
+	xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx,
+			&got, &prev);
+	if (eof)
+		return 0;
 
-		if (irec.br_startblock == DELAYSTARTBLOCK) {
-			/* Free a delayed allocation. */
-			xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount,
-					false);
-			ip->i_delayed_blks -= irec.br_blockcount;
+	while (got.br_startoff < end_fsb) {
+		del = got;
+		xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
+		trace_xfs_reflink_cancel_cow(ip, &del);
 
-			/* Remove the mapping from the CoW fork. */
-			error = xfs_bunmapi_cow(ip, &irec);
+		if (isnullstartblock(del.br_startblock)) {
+			error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
+					&idx, &got, &del);
 			if (error)
 				break;
-		} else if (irec.br_startblock == HOLESTARTBLOCK) {
-			/* empty */
 		} else {
 			xfs_trans_ijoin(*tpp, ip, 0);
 			xfs_defer_init(&dfops, &firstfsb);
 
 			/* Free the CoW orphan record. */
 			error = xfs_refcount_free_cow_extent(ip->i_mount,
-					&dfops, irec.br_startblock,
-					irec.br_blockcount);
+					&dfops, del.br_startblock,
+					del.br_blockcount);
 			if (error)
 				break;
 
 			xfs_bmap_add_free(ip->i_mount, &dfops,
-					irec.br_startblock, irec.br_blockcount,
+					del.br_startblock, del.br_blockcount,
 					NULL);
 
 			/* Update quota accounting */
 			xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
-					-(long)irec.br_blockcount);
+					-(long)del.br_blockcount);
 
 			/* Roll the transaction */
 			error = xfs_defer_finish(tpp, &dfops, ip);
@@ -597,15 +563,18 @@ xfs_reflink_cancel_cow_blocks(
 			}
 
 			/* Remove the mapping from the CoW fork. */
-			error = xfs_bunmapi_cow(ip, &irec);
-			if (error)
-				break;
+			xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
 		}
 
-		/* Roll on... */
-		offset_fsb = irec.br_startoff + irec.br_blockcount;
+		if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec))
+			break;
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
 	}
 
+	/* clear tag if cow fork is emptied */
+	if (!ifp->if_bytes)
+		xfs_inode_clear_cowblocks_tag(ip);
+
 	return error;
 }
 
@@ -668,25 +637,26 @@ xfs_reflink_end_cow(
 	xfs_off_t			offset,
 	xfs_off_t			count)
 {
-	struct xfs_bmbt_irec		irec;
-	struct xfs_bmbt_irec		uirec;
+	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+	struct xfs_bmbt_irec		got, prev, del;
 	struct xfs_trans		*tp;
 	xfs_fileoff_t			offset_fsb;
 	xfs_fileoff_t			end_fsb;
-	xfs_filblks_t			count_fsb;
 	xfs_fsblock_t			firstfsb;
 	struct xfs_defer_ops		dfops;
-	int				error;
+	int				error, eof = 0;
 	unsigned int			resblks;
-	xfs_filblks_t			ilen;
 	xfs_filblks_t			rlen;
-	int				nimaps;
+	xfs_extnum_t			idx;
 
 	trace_xfs_reflink_end_cow(ip, offset, count);
 
+	/* No COW extents?  That's easy! */
+	if (ifp->if_bytes == 0)
+		return 0;
+
 	offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
 	end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
-	count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
 
 	/* Start a rolling transaction to switch the mappings */
 	resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
@@ -698,72 +668,65 @@ xfs_reflink_end_cow(
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, 0);
 
-	/* Go find the old extent in the CoW fork. */
-	while (offset_fsb < end_fsb) {
-		/* Read extent from the source file */
-		nimaps = 1;
-		count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
-		error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
-				&nimaps, XFS_BMAPI_COWFORK);
-		if (error)
-			goto out_cancel;
-		ASSERT(nimaps == 1);
+	xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx,
+			&got, &prev);
 
-		ASSERT(irec.br_startblock != DELAYSTARTBLOCK);
-		trace_xfs_reflink_cow_remap(ip, &irec);
+	/* If there is a hole at end_fsb - 1 go to the previous extent */
+	if (eof || got.br_startoff > end_fsb) {
+		ASSERT(idx > 0);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got);
+	}
 
-		/*
-		 * We can have a hole in the CoW fork if part of a directio
-		 * write is CoW but part of it isn't.
-		 */
-		rlen = ilen = irec.br_blockcount;
-		if (irec.br_startblock == HOLESTARTBLOCK)
+	/* Walk backwards until we're out of the I/O range... */
+	while (got.br_startoff + got.br_blockcount > offset_fsb) {
+		del = got;
+		xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
+
+		/* Extent delete may have bumped idx forward */
+		if (!del.br_blockcount) {
+			idx--;
 			goto next_extent;
+		}
+
+		ASSERT(!isnullstartblock(got.br_startblock));
 
 		/* Unmap the old blocks in the data fork. */
-		while (rlen) {
-			xfs_defer_init(&dfops, &firstfsb);
-			error = __xfs_bunmapi(tp, ip, irec.br_startoff,
-					&rlen, 0, 1, &firstfsb, &dfops);
-			if (error)
-				goto out_defer;
-
-			/*
-			 * Trim the extent to whatever got unmapped.
-			 * Remember, bunmapi works backwards.
-			 */
-			uirec.br_startblock = irec.br_startblock + rlen;
-			uirec.br_startoff = irec.br_startoff + rlen;
-			uirec.br_blockcount = irec.br_blockcount - rlen;
-			irec.br_blockcount = rlen;
-			trace_xfs_reflink_cow_remap_piece(ip, &uirec);
+		xfs_defer_init(&dfops, &firstfsb);
+		rlen = del.br_blockcount;
+		error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1,
+				&firstfsb, &dfops);
+		if (error)
+			goto out_defer;
 
-			/* Free the CoW orphan record. */
-			error = xfs_refcount_free_cow_extent(tp->t_mountp,
-					&dfops, uirec.br_startblock,
-					uirec.br_blockcount);
-			if (error)
-				goto out_defer;
+		/* Trim the extent to whatever got unmapped. */
+		if (rlen) {
+			xfs_trim_extent(&del, del.br_startoff + rlen,
+				del.br_blockcount - rlen);
+		}
+		trace_xfs_reflink_cow_remap(ip, &del);
 
-			/* Map the new blocks into the data fork. */
-			error = xfs_bmap_map_extent(tp->t_mountp, &dfops,
-					ip, &uirec);
-			if (error)
-				goto out_defer;
+		/* Free the CoW orphan record. */
+		error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops,
+				del.br_startblock, del.br_blockcount);
+		if (error)
+			goto out_defer;
 
-			/* Remove the mapping from the CoW fork. */
-			error = xfs_bunmapi_cow(ip, &uirec);
-			if (error)
-				goto out_defer;
+		/* Map the new blocks into the data fork. */
+		error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del);
+		if (error)
+			goto out_defer;
 
-			error = xfs_defer_finish(&tp, &dfops, ip);
-			if (error)
-				goto out_defer;
-		}
+		/* Remove the mapping from the CoW fork. */
+		xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
+
+		error = xfs_defer_finish(&tp, &dfops, ip);
+		if (error)
+			goto out_defer;
 
 next_extent:
-		/* Roll on... */
-		offset_fsb = irec.br_startoff + ilen;
+		if (idx < 0)
+			break;
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
 	}
 
 	error = xfs_trans_commit(tp);
@@ -774,7 +737,6 @@ next_extent:
 
 out_defer:
 	xfs_defer_cancel(&dfops);
-out_cancel:
 	xfs_trans_cancel(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 out:
@@ -1312,19 +1274,26 @@ out_error:
  */
 int
 xfs_reflink_remap_range(
-	struct xfs_inode	*src,
-	xfs_off_t		srcoff,
-	struct xfs_inode	*dest,
-	xfs_off_t		destoff,
-	xfs_off_t		len,
-	unsigned int		flags)
+	struct file		*file_in,
+	loff_t			pos_in,
+	struct file		*file_out,
+	loff_t			pos_out,
+	u64			len,
+	bool			is_dedupe)
 {
+	struct inode		*inode_in = file_inode(file_in);
+	struct xfs_inode	*src = XFS_I(inode_in);
+	struct inode		*inode_out = file_inode(file_out);
+	struct xfs_inode	*dest = XFS_I(inode_out);
 	struct xfs_mount	*mp = src->i_mount;
+	loff_t			bs = inode_out->i_sb->s_blocksize;
+	bool			same_inode = (inode_in == inode_out);
 	xfs_fileoff_t		sfsbno, dfsbno;
 	xfs_filblks_t		fsblen;
-	int			error;
 	xfs_extlen_t		cowextsize;
-	bool			is_same;
+	loff_t			isize;
+	ssize_t			ret;
+	loff_t			blen;
 
 	if (!xfs_sb_version_hasreflink(&mp->m_sb))
 		return -EOPNOTSUPP;
@@ -1332,17 +1301,8 @@ xfs_reflink_remap_range(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	/* Don't reflink realtime inodes */
-	if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
-		return -EINVAL;
-
-	if (flags & ~XFS_REFLINK_ALL)
-		return -EINVAL;
-
-	trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
-
 	/* Lock both files against IO */
-	if (src->i_ino == dest->i_ino) {
+	if (same_inode) {
 		xfs_ilock(src, XFS_IOLOCK_EXCL);
 		xfs_ilock(src, XFS_MMAPLOCK_EXCL);
 	} else {
@@ -1350,39 +1310,126 @@ xfs_reflink_remap_range(
 		xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
 	}
 
+	/* Don't touch certain kinds of inodes */
+	ret = -EPERM;
+	if (IS_IMMUTABLE(inode_out))
+		goto out_unlock;
+
+	ret = -ETXTBSY;
+	if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+		goto out_unlock;
+
+
+	/* Don't reflink dirs, pipes, sockets... */
+	ret = -EISDIR;
+	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+		goto out_unlock;
+	ret = -EINVAL;
+	if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
+		goto out_unlock;
+	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+		goto out_unlock;
+
+	/* Don't reflink realtime inodes */
+	if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
+		goto out_unlock;
+
+	/* Don't share DAX file data for now. */
+	if (IS_DAX(inode_in) || IS_DAX(inode_out))
+		goto out_unlock;
+
+	/* Are we going all the way to the end? */
+	isize = i_size_read(inode_in);
+	if (isize == 0) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	if (len == 0)
+		len = isize - pos_in;
+
+	/* Ensure offsets don't wrap and the input is inside i_size */
+	if (pos_in + len < pos_in || pos_out + len < pos_out ||
+	    pos_in + len > isize)
+		goto out_unlock;
+
+	/* Don't allow dedupe past EOF in the dest file */
+	if (is_dedupe) {
+		loff_t	disize;
+
+		disize = i_size_read(inode_out);
+		if (pos_out >= disize || pos_out + len > disize)
+			goto out_unlock;
+	}
+
+	/* If we're linking to EOF, continue to the block boundary. */
+	if (pos_in + len == isize)
+		blen = ALIGN(isize, bs) - pos_in;
+	else
+		blen = len;
+
+	/* Only reflink if we're aligned to block boundaries */
+	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
+	    !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
+		goto out_unlock;
+
+	/* Don't allow overlapped reflink within the same file */
+	if (same_inode) {
+		if (pos_out + blen > pos_in && pos_out < pos_in + blen)
+			goto out_unlock;
+	}
+
+	/* Wait for the completion of any pending IOs on both files */
+	inode_dio_wait(inode_in);
+	if (!same_inode)
+		inode_dio_wait(inode_out);
+
+	ret = filemap_write_and_wait_range(inode_in->i_mapping,
+			pos_in, pos_in + len - 1);
+	if (ret)
+		goto out_unlock;
+
+	ret = filemap_write_and_wait_range(inode_out->i_mapping,
+			pos_out, pos_out + len - 1);
+	if (ret)
+		goto out_unlock;
+
+	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
+
 	/*
 	 * Check that the extents are the same.
 	 */
-	if (flags & XFS_REFLINK_DEDUPE) {
-		is_same = false;
-		error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest),
-				destoff, len, &is_same);
-		if (error)
-			goto out_error;
+	if (is_dedupe) {
+		bool		is_same = false;
+
+		ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out,
+				len, &is_same);
+		if (ret)
+			goto out_unlock;
 		if (!is_same) {
-			error = -EBADE;
-			goto out_error;
+			ret = -EBADE;
+			goto out_unlock;
 		}
 	}
 
-	error = xfs_reflink_set_inode_flag(src, dest);
-	if (error)
-		goto out_error;
+	ret = xfs_reflink_set_inode_flag(src, dest);
+	if (ret)
+		goto out_unlock;
 
 	/*
 	 * Invalidate the page cache so that we can clear any CoW mappings
 	 * in the destination file.
 	 */
-	truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff,
-				   PAGE_ALIGN(destoff + len) - 1);
+	truncate_inode_pages_range(&inode_out->i_data, pos_out,
+				   PAGE_ALIGN(pos_out + len) - 1);
 
-	dfsbno = XFS_B_TO_FSBT(mp, destoff);
-	sfsbno = XFS_B_TO_FSBT(mp, srcoff);
+	dfsbno = XFS_B_TO_FSBT(mp, pos_out);
+	sfsbno = XFS_B_TO_FSBT(mp, pos_in);
 	fsblen = XFS_B_TO_FSB(mp, len);
-	error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
-			destoff + len);
-	if (error)
-		goto out_error;
+	ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
+			pos_out + len);
+	if (ret)
+		goto out_unlock;
 
 	/*
 	 * Carry the cowextsize hint from src to dest if we're sharing the
@@ -1390,26 +1437,24 @@ xfs_reflink_remap_range(
 	 * has a cowextsize hint, and the destination file does not.
 	 */
 	cowextsize = 0;
-	if (srcoff == 0 && len == i_size_read(VFS_I(src)) &&
+	if (pos_in == 0 && len == i_size_read(inode_in) &&
 	    (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
-	    destoff == 0 && len >= i_size_read(VFS_I(dest)) &&
+	    pos_out == 0 && len >= i_size_read(inode_out) &&
 	    !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
 		cowextsize = src->i_d.di_cowextsize;
 
-	error = xfs_reflink_update_dest(dest, destoff + len, cowextsize);
-	if (error)
-		goto out_error;
+	ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
 
-out_error:
+out_unlock:
 	xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
 	xfs_iunlock(src, XFS_IOLOCK_EXCL);
 	if (src->i_ino != dest->i_ino) {
 		xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
 		xfs_iunlock(dest, XFS_IOLOCK_EXCL);
 	}
-	if (error)
-		trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_);
-	return error;
+	if (ret)
+		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
+	return ret;
 }
 
 /*
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 5dc3c8ac12aa..fad11607c9ad 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -26,8 +26,8 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
 extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
 		struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
 
-extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip,
-		xfs_off_t offset, xfs_off_t count);
+extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
+		struct xfs_bmbt_irec *imap, bool *shared);
 extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
 		xfs_off_t offset, xfs_off_t count);
 extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
@@ -43,11 +43,8 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
 extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
-#define XFS_REFLINK_DEDUPE	1	/* only reflink if contents match */
-#define XFS_REFLINK_ALL		(XFS_REFLINK_DEDUPE)
-extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
-		struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
-		unsigned int flags);
+extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
+		struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
 extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
 		struct xfs_trans **tpp);
 extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 5f8d55d29a11..276d3023d60f 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -512,13 +512,13 @@ static struct attribute *xfs_error_attrs[] = {
 };
 
 
-struct kobj_type xfs_error_cfg_ktype = {
+static struct kobj_type xfs_error_cfg_ktype = {
 	.release = xfs_sysfs_release,
 	.sysfs_ops = &xfs_sysfs_ops,
 	.default_attrs = xfs_error_attrs,
 };
 
-struct kobj_type xfs_error_ktype = {
+static struct kobj_type xfs_error_ktype = {
 	.release = xfs_sysfs_release,
 	.sysfs_ops = &xfs_sysfs_ops,
 };
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ad188d3a83f3..0907752be62d 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
 
-DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range);
+DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
 DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
 
 DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
@@ -3356,9 +3356,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
-DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece);
 
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);