31 files changed, 2004 insertions, 388 deletions
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 0b5b1af35e5e..e850e76acaaf 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -116,6 +116,8 @@ struct bdi_writeback {
 	struct list_head work_list;
 	struct delayed_work dwork;	/* work item used for writeback */
 
+	unsigned long dirty_sleep;	/* last wait */
+
 	struct list_head bdi_node;	/* anchored at bdi->wb_list */
 
 #ifdef CONFIG_CGROUP_WRITEBACK
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 97cb48f03dc7..7cf8a6c70a3f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -63,6 +63,12 @@
 #define bio_end_sector(bio)	((bio)->bi_iter.bi_sector + bio_sectors((bio)))
 
 /*
+ * Return the data direction, READ or WRITE.
+ */
+#define bio_data_dir(bio) \
+	(op_is_write(bio_op(bio)) ? WRITE : READ)
+
+/*
  * Check whether this bio carries any data or not. A NULL bio is allowed.
  */
 static inline bool bio_has_data(struct bio *bio)
@@ -70,7 +76,8 @@ static inline bool bio_has_data(struct bio *bio)
 	if (bio &&
 	    bio->bi_iter.bi_size &&
 	    bio_op(bio) != REQ_OP_DISCARD &&
-	    bio_op(bio) != REQ_OP_SECURE_ERASE)
+	    bio_op(bio) != REQ_OP_SECURE_ERASE &&
+	    bio_op(bio) != REQ_OP_WRITE_ZEROES)
 		return true;
 
 	return false;
@@ -80,18 +87,8 @@ static inline bool bio_no_advance_iter(struct bio *bio)
 {
 	return bio_op(bio) == REQ_OP_DISCARD ||
 	       bio_op(bio) == REQ_OP_SECURE_ERASE ||
-	       bio_op(bio) == REQ_OP_WRITE_SAME;
-}
-
-static inline bool bio_is_rw(struct bio *bio)
-{
-	if (!bio_has_data(bio))
-		return false;
-
-	if (bio_no_advance_iter(bio))
-		return false;
-
-	return true;
+	       bio_op(bio) == REQ_OP_WRITE_SAME ||
+	       bio_op(bio) == REQ_OP_WRITE_ZEROES;
 }
 
 static inline bool bio_mergeable(struct bio *bio)
@@ -193,18 +190,20 @@ static inline unsigned bio_segments(struct bio *bio)
 	struct bvec_iter iter;
 
 	/*
-	 * We special case discard/write same, because they interpret bi_size
-	 * differently:
+	 * We special case discard/write same/write zeroes, because they
+	 * interpret bi_size differently:
 	 */
 
-	if (bio_op(bio) == REQ_OP_DISCARD)
-		return 1;
-
-	if (bio_op(bio) == REQ_OP_SECURE_ERASE)
-		return 1;
-
-	if (bio_op(bio) == REQ_OP_WRITE_SAME)
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
+	case REQ_OP_WRITE_ZEROES:
+		return 0;
+	case REQ_OP_WRITE_SAME:
 		return 1;
+	default:
+		break;
+	}
 
 	bio_for_each_segment(bv, bio, iter)
 		segs++;
@@ -409,6 +408,8 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
 
 }
 
+extern blk_qc_t submit_bio(struct bio *);
+
 extern void bio_endio(struct bio *);
 
 static inline void bio_io_error(struct bio *bio)
@@ -423,13 +424,15 @@ extern int bio_phys_segments(struct request_queue *, struct bio *);
 extern int submit_bio_wait(struct bio *bio);
 extern void bio_advance(struct bio *, unsigned);
 
-extern void bio_init(struct bio *);
+extern void bio_init(struct bio *bio, struct bio_vec *table,
+		     unsigned short max_vecs);
 extern void bio_reset(struct bio *);
 void bio_chain(struct bio *, struct bio *);
 
 extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
+int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
 struct rq_map_data;
 extern struct bio *bio_map_user_iov(struct request_queue *,
 				    const struct iov_iter *, gfp_t);
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 3bf5d33800ab..01b62e7bac74 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -581,15 +581,14 @@ static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
 /**
  * blkg_rwstat_add - add a value to a blkg_rwstat
  * @rwstat: target blkg_rwstat
- * @op: REQ_OP
- * @op_flags: rq_flag_bits
+ * @op: REQ_OP and flags
  * @val: value to add
  *
  * Add @val to @rwstat.  The counters are chosen according to @rw.  The
  * caller is responsible for synchronizing calls to this function.
  */
 static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
-				   int op, int op_flags, uint64_t val)
+				   unsigned int op, uint64_t val)
 {
 	struct percpu_counter *cnt;
 
@@ -600,7 +599,7 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
 
 	__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
 
-	if (op_flags & REQ_SYNC)
+	if (op_is_sync(op))
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
 	else
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
@@ -705,9 +704,9 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 
 	if (!throtl) {
 		blkg = blkg ?: q->root_blkg;
-		blkg_rwstat_add(&blkg->stat_bytes, bio_op(bio), bio->bi_opf,
+		blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
 				bio->bi_iter.bi_size);
-		blkg_rwstat_add(&blkg->stat_ios, bio_op(bio), bio->bi_opf, 1);
+		blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
 	}
 
 	rcu_read_unlock();
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 535ab2e13d2e..87e404aae267 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -3,6 +3,7 @@
 
 #include <linux/blkdev.h>
 #include <linux/sbitmap.h>
+#include <linux/srcu.h>
 
 struct blk_mq_tags;
 struct blk_flush_queue;
@@ -35,6 +36,8 @@ struct blk_mq_hw_ctx {
 
 	struct blk_mq_tags	*tags;
 
+	struct srcu_struct	queue_rq_srcu;
+
 	unsigned long		queued;
 	unsigned long		run;
 #define BLK_MQ_MAX_DISPATCH_ORDER	7
@@ -215,18 +218,20 @@ void blk_mq_start_request(struct request *rq);
 void blk_mq_end_request(struct request *rq, int error);
 void __blk_mq_end_request(struct request *rq, int error);
 
-void blk_mq_requeue_request(struct request *rq);
-void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
-void blk_mq_cancel_requeue_work(struct request_queue *q);
+void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
+void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
+				bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 void blk_mq_abort_requeue_list(struct request_queue *q);
 void blk_mq_complete_request(struct request *rq, int error);
 
+bool blk_mq_queue_stopped(struct request_queue *q);
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_hw_queues(struct request_queue *q);
+void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index cd395ecec99d..519ea2c9df61 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -17,7 +17,6 @@ struct io_context;
 struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
 
-#ifdef CONFIG_BLOCK
 /*
  * main unit of I/O for the block layer and lower layers (ie drivers and
  * stacking drivers)
@@ -88,24 +87,6 @@ struct bio {
 	struct bio_vec		bi_inline_vecs[0];
 };
 
-#define BIO_OP_SHIFT	(8 * FIELD_SIZEOF(struct bio, bi_opf) - REQ_OP_BITS)
-#define bio_flags(bio)	((bio)->bi_opf & ((1 << BIO_OP_SHIFT) - 1))
-#define bio_op(bio)	((bio)->bi_opf >> BIO_OP_SHIFT)
-
-#define bio_set_op_attrs(bio, op, op_flags) do {			\
-	if (__builtin_constant_p(op))					\
-		BUILD_BUG_ON((op) + 0U >= (1U << REQ_OP_BITS));		\
-	else								\
-		WARN_ON_ONCE((op) + 0U >= (1U << REQ_OP_BITS));		\
-	if (__builtin_constant_p(op_flags))				\
-		BUILD_BUG_ON((op_flags) + 0U >= (1U << BIO_OP_SHIFT));	\
-	else								\
-		WARN_ON_ONCE((op_flags) + 0U >= (1U << BIO_OP_SHIFT));	\
-	(bio)->bi_opf = bio_flags(bio);					\
-	(bio)->bi_opf |= (((op) + 0U) << BIO_OP_SHIFT);			\
-	(bio)->bi_opf |= (op_flags);					\
-} while (0)
-
 #define BIO_RESET_BYTES		offsetof(struct bio, bi_max_vecs)
 
 /*
@@ -119,6 +100,8 @@ struct bio {
 #define BIO_QUIET	6	/* Make BIO Quiet */
 #define BIO_CHAIN	7	/* chained bio, ->bi_remaining in effect */
 #define BIO_REFFED	8	/* bio has elevated ->bi_cnt */
+#define BIO_THROTTLED	9	/* This bio has already been subjected to
+				 * throttling rules. Don't do it again. */
 
 /*
  * Flags starting here get preserved by bio_reset() - this includes
@@ -142,53 +125,61 @@ struct bio {
 #define BVEC_POOL_OFFSET	(16 - BVEC_POOL_BITS)
 #define BVEC_POOL_IDX(bio)	((bio)->bi_flags >> BVEC_POOL_OFFSET)
 
-#endif /* CONFIG_BLOCK */
-
 /*
- * Request flags.  For use in the cmd_flags field of struct request, and in
- * bi_opf of struct bio.  Note that some flags are only valid in either one.
+ * Operations and flags common to the bio and request structures.
+ * We use 8 bits for encoding the operation, and the remaining 24 for flags.
+ *
+ * The least significant bit of the operation number indicates the data
+ * transfer direction:
+ *
+ *   - if the least significant bit is set transfers are TO the device
+ *   - if the least significant bit is not set transfers are FROM the device
+ *
+ * If a operation does not transfer data the least significant bit has no
+ * meaning.
  */
-enum rq_flag_bits {
-	/* common flags */
-	__REQ_FAILFAST_DEV,	/* no driver retries of device errors */
+#define REQ_OP_BITS	8
+#define REQ_OP_MASK	((1 << REQ_OP_BITS) - 1)
+#define REQ_FLAG_BITS	24
+
+enum req_opf {
+	/* read sectors from the device */
+	REQ_OP_READ		= 0,
+	/* write sectors to the device */
+	REQ_OP_WRITE		= 1,
+	/* flush the volatile write cache */
+	REQ_OP_FLUSH		= 2,
+	/* discard sectors */
+	REQ_OP_DISCARD		= 3,
+	/* get zone information */
+	REQ_OP_ZONE_REPORT	= 4,
+	/* securely erase sectors */
+	REQ_OP_SECURE_ERASE	= 5,
+	/* seset a zone write pointer */
+	REQ_OP_ZONE_RESET	= 6,
+	/* write the same sector many times */
+	REQ_OP_WRITE_SAME	= 7,
+	/* write the zero filled sector many times */
+	REQ_OP_WRITE_ZEROES	= 8,
+
+	REQ_OP_LAST,
+};
+
+enum req_flag_bits {
+	__REQ_FAILFAST_DEV =	/* no driver retries of device errors */
+		REQ_OP_BITS,
 	__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
 	__REQ_FAILFAST_DRIVER,	/* no driver retries of driver errors */
-
 	__REQ_SYNC,		/* request is sync (sync write or read) */
 	__REQ_META,		/* metadata io request */
 	__REQ_PRIO,		/* boost priority in cfq */
-
-	__REQ_NOIDLE,		/* don't anticipate more IO after this one */
+	__REQ_NOMERGE,		/* don't touch this for merging */
+	__REQ_IDLE,		/* anticipate more IO after this one */
 	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
 	__REQ_FUA,		/* forced unit access */
 	__REQ_PREFLUSH,		/* request for cache flush */
-
-	/* bio only flags */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
-	__REQ_THROTTLED,	/* This bio has already been subjected to
-				 * throttling rules. Don't do it again. */
-
-	/* request only flags */
-	__REQ_SORTED,		/* elevator knows about this request */
-	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
-	__REQ_NOMERGE,		/* don't touch this for merging */
-	__REQ_STARTED,		/* drive already may have started this one */
-	__REQ_DONTPREP,		/* don't call prep for this one */
-	__REQ_QUEUED,		/* uses queueing */
-	__REQ_ELVPRIV,		/* elevator private data attached */
-	__REQ_FAILED,		/* set if the request failed */
-	__REQ_QUIET,		/* don't worry about errors */
-	__REQ_PREEMPT,		/* set for "ide_preempt" requests and also
-				   for requests for which the SCSI "quiesce"
-				   state must be ignored. */
-	__REQ_ALLOCED,		/* request came from our alloc pool */
-	__REQ_COPY_USER,	/* contains copies of user pages */
-	__REQ_FLUSH_SEQ,	/* request for flush sequence */
-	__REQ_IO_STAT,		/* account I/O stat */
-	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
-	__REQ_PM,		/* runtime pm request */
-	__REQ_HASHED,		/* on IO scheduler merge hash */
-	__REQ_MQ_INFLIGHT,	/* track inflight for MQ */
+	__REQ_BACKGROUND,	/* background IO */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -198,54 +189,47 @@ enum rq_flag_bits {
 #define REQ_SYNC		(1ULL << __REQ_SYNC)
 #define REQ_META		(1ULL << __REQ_META)
 #define REQ_PRIO		(1ULL << __REQ_PRIO)
-#define REQ_NOIDLE		(1ULL << __REQ_NOIDLE)
+#define REQ_NOMERGE		(1ULL << __REQ_NOMERGE)
+#define REQ_IDLE		(1ULL << __REQ_IDLE)
 #define REQ_INTEGRITY		(1ULL << __REQ_INTEGRITY)
+#define REQ_FUA			(1ULL << __REQ_FUA)
+#define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
+#define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
+#define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
-#define REQ_COMMON_MASK \
-	(REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \
-	 REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE)
-#define REQ_CLONE_MASK		REQ_COMMON_MASK
 
-/* This mask is used for both bio and request merge checking */
 #define REQ_NOMERGE_FLAGS \
-	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_PREFLUSH | REQ_FUA | REQ_FLUSH_SEQ)
+	(REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
 
-#define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
-#define REQ_THROTTLED		(1ULL << __REQ_THROTTLED)
+#define bio_op(bio) \
+	((bio)->bi_opf & REQ_OP_MASK)
+#define req_op(req) \
+	((req)->cmd_flags & REQ_OP_MASK)
 
-#define REQ_SORTED		(1ULL << __REQ_SORTED)
-#define REQ_SOFTBARRIER		(1ULL << __REQ_SOFTBARRIER)
-#define REQ_FUA			(1ULL << __REQ_FUA)
-#define REQ_NOMERGE		(1ULL << __REQ_NOMERGE)
-#define REQ_STARTED		(1ULL << __REQ_STARTED)
-#define REQ_DONTPREP		(1ULL << __REQ_DONTPREP)
-#define REQ_QUEUED		(1ULL << __REQ_QUEUED)
-#define REQ_ELVPRIV		(1ULL << __REQ_ELVPRIV)
-#define REQ_FAILED		(1ULL << __REQ_FAILED)
-#define REQ_QUIET		(1ULL << __REQ_QUIET)
-#define REQ_PREEMPT		(1ULL << __REQ_PREEMPT)
-#define REQ_ALLOCED		(1ULL << __REQ_ALLOCED)
-#define REQ_COPY_USER		(1ULL << __REQ_COPY_USER)
-#define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
-#define REQ_FLUSH_SEQ		(1ULL << __REQ_FLUSH_SEQ)
-#define REQ_IO_STAT		(1ULL << __REQ_IO_STAT)
-#define REQ_MIXED_MERGE		(1ULL << __REQ_MIXED_MERGE)
-#define REQ_PM			(1ULL << __REQ_PM)
-#define REQ_HASHED		(1ULL << __REQ_HASHED)
-#define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)
-
-enum req_op {
-	REQ_OP_READ,
-	REQ_OP_WRITE,
-	REQ_OP_DISCARD,		/* request to discard sectors */
-	REQ_OP_SECURE_ERASE,	/* request to securely erase sectors */
-	REQ_OP_WRITE_SAME,	/* write same block many times */
-	REQ_OP_FLUSH,		/* request for cache flush */
-};
+/* obsolete, don't use in new code */
+static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
+		unsigned op_flags)
+{
+	bio->bi_opf = op | op_flags;
+}
 
-#define REQ_OP_BITS 3
+static inline bool op_is_write(unsigned int op)
+{
+	return (op & 1);
+}
+
+/*
+ * Reads are always treated as synchronous, as are requests with the FUA or
+ * PREFLUSH flag.  Other operations may be marked as synchronous using the
+ * REQ_SYNC flag.
+ */
+static inline bool op_is_sync(unsigned int op)
+{
+	return (op & REQ_OP_MASK) == REQ_OP_READ ||
+		(op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
+}
 
 typedef unsigned int blk_qc_t;
 #define BLK_QC_T_NONE	-1U
@@ -271,4 +255,20 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
 	return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
 }
 
+struct blk_issue_stat {
+	u64 time;
+};
+
+#define BLK_RQ_STAT_BATCH	64
+
+struct blk_rq_stat {
+	s64 mean;
+	u64 min;
+	u64 max;
+	s32 nr_samples;
+	s32 nr_batch;
+	u64 batch;
+	s64 time;
+};
+
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c47c358ba052..c5393766909d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -24,6 +24,7 @@
 #include <linux/rcupdate.h>
 #include <linux/percpu-refcount.h>
 #include <linux/scatterlist.h>
+#include <linux/blkzoned.h>
 
 struct module;
 struct scsi_ioctl_command;
@@ -37,6 +38,7 @@ struct bsg_job;
 struct blkcg_gq;
 struct blk_flush_queue;
 struct pr_ops;
+struct rq_wb;
 
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
@@ -77,6 +79,55 @@ enum rq_cmd_type_bits {
 	REQ_TYPE_DRV_PRIV,		/* driver defined types from here */
 };
 
+/*
+ * request flags */
+typedef __u32 __bitwise req_flags_t;
+
+/* elevator knows about this request */
+#define RQF_SORTED		((__force req_flags_t)(1 << 0))
+/* drive already may have started this one */
+#define RQF_STARTED		((__force req_flags_t)(1 << 1))
+/* uses tagged queueing */
+#define RQF_QUEUED		((__force req_flags_t)(1 << 2))
+/* may not be passed by ioscheduler */
+#define RQF_SOFTBARRIER		((__force req_flags_t)(1 << 3))
+/* request for flush sequence */
+#define RQF_FLUSH_SEQ		((__force req_flags_t)(1 << 4))
+/* merge of different types, fail separately */
+#define RQF_MIXED_MERGE		((__force req_flags_t)(1 << 5))
+/* track inflight for MQ */
+#define RQF_MQ_INFLIGHT		((__force req_flags_t)(1 << 6))
+/* don't call prep for this one */
+#define RQF_DONTPREP		((__force req_flags_t)(1 << 7))
+/* set for "ide_preempt" requests and also for requests for which the SCSI
+   "quiesce" state must be ignored. */
+#define RQF_PREEMPT		((__force req_flags_t)(1 << 8))
+/* contains copies of user pages */
+#define RQF_COPY_USER		((__force req_flags_t)(1 << 9))
+/* vaguely specified driver internal error.  Ignored by the block layer */
+#define RQF_FAILED		((__force req_flags_t)(1 << 10))
+/* don't warn about errors */
+#define RQF_QUIET		((__force req_flags_t)(1 << 11))
+/* elevator private data attached */
+#define RQF_ELVPRIV		((__force req_flags_t)(1 << 12))
+/* account I/O stat */
+#define RQF_IO_STAT		((__force req_flags_t)(1 << 13))
+/* request came from our alloc pool */
+#define RQF_ALLOCED		((__force req_flags_t)(1 << 14))
+/* runtime pm request */
+#define RQF_PM			((__force req_flags_t)(1 << 15))
+/* on IO scheduler merge hash */
+#define RQF_HASHED		((__force req_flags_t)(1 << 16))
+/* IO stats tracking on */
+#define RQF_STATS		((__force req_flags_t)(1 << 17))
+/* Look at ->special_vec for the actual data payload instead of the
+   bio chain. */
+#define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
+
+/* flags that prevent us from merging requests: */
+#define RQF_NOMERGE_FLAGS \
+	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
+
 #define BLK_MAX_CDB	16
 
 /*
@@ -97,7 +148,8 @@ struct request {
 
 	int cpu;
 	unsigned cmd_type;
-	u64 cmd_flags;
+	unsigned int cmd_flags;		/* op and common flags */
+	req_flags_t rq_flags;
 	unsigned long atomic_flags;
 
 	/* the following two fields are internal, NEVER access directly */
@@ -126,6 +178,7 @@ struct request {
 	 */
 	union {
 		struct rb_node rb_node;	/* sort/lookup */
+		struct bio_vec special_vec;
 		void *completion_data;
 	};
 
@@ -151,6 +204,7 @@ struct request {
 	struct gendisk *rq_disk;
 	struct hd_struct *part;
 	unsigned long start_time;
+	struct blk_issue_stat issue_stat;
 #ifdef CONFIG_BLK_CGROUP
 	struct request_list *rl;		/* rl this rq is alloced from */
 	unsigned long long start_time_ns;
@@ -198,20 +252,6 @@ struct request {
 	struct request *next_rq;
 };
 
-#define REQ_OP_SHIFT (8 * sizeof(u64) - REQ_OP_BITS)
-#define req_op(req)  ((req)->cmd_flags >> REQ_OP_SHIFT)
-
-#define req_set_op(req, op) do {				\
-	WARN_ON(op >= (1 << REQ_OP_BITS));			\
-	(req)->cmd_flags &= ((1ULL << REQ_OP_SHIFT) - 1);	\
-	(req)->cmd_flags |= ((u64) (op) << REQ_OP_SHIFT);	\
-} while (0)
-
-#define req_set_op_attrs(req, op, flags) do {	\
-	req_set_op(req, op);			\
-	(req)->cmd_flags |= flags;		\
-} while (0)
-
 static inline unsigned short req_get_ioprio(struct request *req)
 {
 	return req->ioprio;
@@ -261,6 +301,15 @@ struct blk_queue_tag {
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
 
+/*
+ * Zoned block device models (zoned limit).
+ */
+enum blk_zoned_model {
+	BLK_ZONED_NONE,	/* Regular block device */
+	BLK_ZONED_HA,	/* Host-aware zoned block device */
+	BLK_ZONED_HM,	/* Host-managed zoned block device */
+};
+
 struct queue_limits {
 	unsigned long		bounce_pfn;
 	unsigned long		seg_boundary_mask;
@@ -278,6 +327,7 @@ struct queue_limits {
 	unsigned int		max_discard_sectors;
 	unsigned int		max_hw_discard_sectors;
 	unsigned int		max_write_same_sectors;
+	unsigned int		max_write_zeroes_sectors;
 	unsigned int		discard_granularity;
 	unsigned int		discard_alignment;
 
@@ -290,8 +340,45 @@ struct queue_limits {
 	unsigned char		cluster;
 	unsigned char		discard_zeroes_data;
 	unsigned char		raid_partial_stripes_expensive;
+	enum blk_zoned_model	zoned;
 };
 
+#ifdef CONFIG_BLK_DEV_ZONED
+
+struct blk_zone_report_hdr {
+	unsigned int	nr_zones;
+	u8		padding[60];
+};
+
+extern int blkdev_report_zones(struct block_device *bdev,
+			       sector_t sector, struct blk_zone *zones,
+			       unsigned int *nr_zones, gfp_t gfp_mask);
+extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
+			      sector_t nr_sectors, gfp_t gfp_mask);
+
+extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
+				     unsigned int cmd, unsigned long arg);
+extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
+				    unsigned int cmd, unsigned long arg);
+
+#else /* CONFIG_BLK_DEV_ZONED */
+
+static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
+					    fmode_t mode, unsigned int cmd,
+					    unsigned long arg)
+{
+	return -ENOTTY;
+}
+
+static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
+					   fmode_t mode, unsigned int cmd,
+					   unsigned long arg)
+{
+	return -ENOTTY;
+}
+
+#endif /* CONFIG_BLK_DEV_ZONED */
+
 struct request_queue {
 	/*
 	 * Together with queue_head for cacheline sharing
@@ -302,6 +389,8 @@ struct request_queue {
 	int			nr_rqs[2];	/* # allocated [a]sync rqs */
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
+	struct rq_wb		*rq_wb;
+
 	/*
 	 * If blkcg is not used, @q->root_rl serves all requests.  If blkcg
 	 * is used, root blkg allocates from @q->root_rl and all other
@@ -327,6 +416,8 @@ struct request_queue {
 	struct blk_mq_ctx __percpu	*queue_ctx;
 	unsigned int		nr_queues;
 
+	unsigned int		queue_depth;
+
 	/* hw dispatch queues */
 	struct blk_mq_hw_ctx	**queue_hw_ctx;
 	unsigned int		nr_hw_queues;
@@ -412,6 +503,9 @@ struct request_queue {
 
 	unsigned int		nr_sorted;
 	unsigned int		in_flight[2];
+
+	struct blk_rq_stat	rq_stats[2];
+
 	/*
 	 * Number of active block driver functions for which blk_drain_queue()
 	 * must wait. Must be incremented around functions that unlock the
@@ -420,6 +514,7 @@ struct request_queue {
 	unsigned int		request_fn_active;
 
 	unsigned int		rq_timeout;
+	int			poll_nsec;
 	struct timer_list	timeout;
 	struct work_struct	timeout_work;
 	struct list_head	timeout_list;
@@ -505,6 +600,7 @@ struct request_queue {
 #define QUEUE_FLAG_FUA	       24	/* device supports FUA writes */
 #define QUEUE_FLAG_FLUSH_NQ    25	/* flush not queueuable */
 #define QUEUE_FLAG_DAX         26	/* device supports DAX */
+#define QUEUE_FLAG_STATS       27	/* track rq completion times */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -601,7 +697,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 			     REQ_FAILFAST_DRIVER))
 
 #define blk_account_rq(rq) \
-	(((rq)->cmd_flags & REQ_STARTED) && \
+	(((rq)->rq_flags & RQF_STARTED) && \
 	 ((rq)->cmd_type == REQ_TYPE_FS))
 
 #define blk_rq_cpu_valid(rq)	((rq)->cpu != -1)
@@ -627,17 +723,31 @@ static inline unsigned int blk_queue_cluster(struct request_queue *q)
 	return q->limits.cluster;
 }
 
-/*
- * We regard a request as sync, if either a read or a sync write
- */
-static inline bool rw_is_sync(int op, unsigned int rw_flags)
+static inline enum blk_zoned_model
+blk_queue_zoned_model(struct request_queue *q)
 {
-	return op == REQ_OP_READ || (rw_flags & REQ_SYNC);
+	return q->limits.zoned;
+}
+
+static inline bool blk_queue_is_zoned(struct request_queue *q)
+{
+	switch (blk_queue_zoned_model(q)) {
+	case BLK_ZONED_HA:
+	case BLK_ZONED_HM:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline unsigned int blk_queue_zone_size(struct request_queue *q)
+{
+	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
 }
 
 static inline bool rq_is_sync(struct request *rq)
 {
-	return rw_is_sync(req_op(rq), rq->cmd_flags);
+	return op_is_sync(rq->cmd_flags);
 }
 
 static inline bool blk_rl_full(struct request_list *rl, bool sync)
@@ -669,8 +779,13 @@ static inline bool rq_mergeable(struct request *rq)
 	if (req_op(rq) == REQ_OP_FLUSH)
 		return false;
 
+	if (req_op(rq) == REQ_OP_WRITE_ZEROES)
+		return false;
+
 	if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
 		return false;
+	if (rq->rq_flags & RQF_NOMERGE_FLAGS)
+		return false;
 
 	return true;
 }
@@ -683,6 +798,14 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
 	return false;
 }
 
+static inline unsigned int blk_queue_depth(struct request_queue *q)
+{
+	if (q->queue_depth)
+		return q->queue_depth;
+
+	return q->nr_requests;
+}
+
 /*
  * q->prep_rq_fn return values
  */
@@ -790,8 +913,6 @@ extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
-extern void blk_add_request_payload(struct request *rq, struct page *page,
-		int offset, unsigned int len);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 			     struct bio_set *bs, gfp_t gfp_mask,
@@ -824,6 +945,7 @@ extern void __blk_run_queue(struct request_queue *q);
 extern void __blk_run_queue_uncond(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_run_queue_async(struct request_queue *q);
+extern void blk_mq_quiesce_queue(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
@@ -837,7 +959,7 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *,
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
 
-bool blk_poll(struct request_queue *q, blk_qc_t cookie);
+bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
@@ -888,6 +1010,9 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
 	if (unlikely(op == REQ_OP_WRITE_SAME))
 		return q->limits.max_write_same_sectors;
 
+	if (unlikely(op == REQ_OP_WRITE_ZEROES))
+		return q->limits.max_write_zeroes_sectors;
+
 	return q->limits.max_sectors;
 }
 
@@ -991,6 +1116,8 @@ extern void blk_queue_max_discard_sectors(struct request_queue *q,
 		unsigned int max_discard_sectors);
 extern void blk_queue_max_write_same_sectors(struct request_queue *q,
 		unsigned int max_write_same_sectors);
+extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
+		unsigned int max_write_same_sectors);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
 extern void blk_queue_alignment_offset(struct request_queue *q,
@@ -999,6 +1126,7 @@ extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
+extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern void blk_set_stacking_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
@@ -1027,6 +1155,13 @@ extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
+static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
+{
+	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+		return 1;
+	return rq->nr_phys_segments;
+}
+
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
 extern long nr_blockdev_pages(void);
@@ -1057,7 +1192,7 @@ static inline int blk_pre_runtime_suspend(struct request_queue *q)
 static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {}
 static inline void blk_pre_runtime_resume(struct request_queue *q) {}
 static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
-extern inline void blk_set_runtime_active(struct request_queue *q) {}
+static inline void blk_set_runtime_active(struct request_queue *q) {}
 #endif
 
 /*
@@ -1078,6 +1213,7 @@ struct blk_plug {
 	struct list_head cb_list; /* md requires an unplug callback */
 };
 #define BLK_MAX_REQUEST_COUNT 16
+#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
 
 struct blk_plug_cb;
 typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
@@ -1151,6 +1287,9 @@ extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		struct bio **biop);
 extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, struct page *page);
+extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+		sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
+		bool discard);
 extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, bool discard);
 static inline int sb_issue_discard(struct super_block *sb, sector_t block,
@@ -1354,6 +1493,46 @@ static inline unsigned int bdev_write_same(struct block_device *bdev)
 	return 0;
 }
 
+static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q)
+		return q->limits.max_write_zeroes_sectors;
+
+	return 0;
+}
+
+static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q)
+		return blk_queue_zoned_model(q);
+
+	return BLK_ZONED_NONE;
+}
+
+static inline bool bdev_is_zoned(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q)
+		return blk_queue_is_zoned(q);
+
+	return false;
+}
+
+static inline unsigned int bdev_zone_size(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q)
+		return blk_queue_zone_size(q);
+
+	return 0;
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index cceb72f9e29f..e417f080219a 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -118,7 +118,7 @@ static inline int blk_cmd_buf_len(struct request *rq)
 }
 
 extern void blk_dump_cmd(char *buf, struct request *rq);
-extern void blk_fill_rwbs(char *rwbs, int op, u32 rw, int bytes);
+extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes);
 
 #endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
 
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 8dbd7879fdc6..67bcef2ecddb 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -1,7 +1,7 @@
 #ifndef __FS_CEPH_MESSENGER_H
 #define __FS_CEPH_MESSENGER_H
 
-#include <linux/blk_types.h>
+#include <linux/bvec.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
 #include <linux/net.h>
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index b91b023deffb..a52c6580cc9a 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -58,7 +58,7 @@ struct dm_io_notify {
 struct dm_io_client;
 struct dm_io_request {
 	int bi_op;			/* REQ_OP */
-	int bi_op_flags;		/* rq_flag_bits */
+	int bi_op_flags;		/* req_flag_bits */
 	struct dm_io_memory mem;	/* Memory to use for io */
 	struct dm_io_notify notify;	/* Synchronous if notify.fn is NULL */
 	struct dm_io_client *client;	/* Client memory handler */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index e7f358d2e5fc..b276e9ef0e0b 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -30,7 +30,7 @@ typedef int (elevator_dispatch_fn) (struct request_queue *, int);
 typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
 typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
 typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
-typedef int (elevator_may_queue_fn) (struct request_queue *, int, int);
+typedef int (elevator_may_queue_fn) (struct request_queue *, unsigned int);
 
 typedef void (elevator_init_icq_fn) (struct io_cq *);
 typedef void (elevator_exit_icq_fn) (struct io_cq *);
@@ -108,6 +108,11 @@ struct elevator_type
 
 #define ELV_HASH_BITS 6
 
+void elv_rqhash_del(struct request_queue *q, struct request *rq);
+void elv_rqhash_add(struct request_queue *q, struct request *rq);
+void elv_rqhash_reposition(struct request_queue *q, struct request *rq);
+struct request *elv_rqhash_find(struct request_queue *q, sector_t offset);
+
 /*
  * each queue has an elevator_queue associated with it
  */
@@ -139,7 +144,7 @@ extern struct request *elv_former_request(struct request_queue *, struct request
 extern struct request *elv_latter_request(struct request_queue *, struct request *);
 extern int elv_register_queue(struct request_queue *q);
 extern void elv_unregister_queue(struct request_queue *q);
-extern int elv_may_queue(struct request_queue *, int, int);
+extern int elv_may_queue(struct request_queue *, unsigned int);
 extern void elv_completed_request(struct request_queue *, struct request *);
 extern int elv_set_request(struct request_queue *q, struct request *rq,
 			   struct bio *bio, gfp_t gfp_mask);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dc0478c07b2a..b84230e070be 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -28,7 +28,6 @@
 #include <linux/uidgid.h>
 #include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
-#include <linux/blk_types.h>
 #include <linux/workqueue.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/delayed_call.h>
@@ -38,6 +37,7 @@
 
 struct backing_dev_info;
 struct bdi_writeback;
+struct bio;
 struct export_operations;
 struct hd_geometry;
 struct iovec;
@@ -152,58 +152,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define CHECK_IOVEC_ONLY -1
 
 /*
- * The below are the various read and write flags that we support. Some of
- * them include behavioral modifiers that send information down to the
- * block layer and IO scheduler. They should be used along with a req_op.
- * Terminology:
- *
- *	The block layer uses device plugging to defer IO a little bit, in
- *	the hope that we will see more IO very shortly. This increases
- *	coalescing of adjacent IO and thus reduces the number of IOs we
- *	have to send to the device. It also allows for better queuing,
- *	if the IO isn't mergeable. If the caller is going to be waiting
- *	for the IO, then he must ensure that the device is unplugged so
- *	that the IO is dispatched to the driver.
- *
- *	All IO is handled async in Linux. This is fine for background
- *	writes, but for reads or writes that someone waits for completion
- *	on, we want to notify the block layer and IO scheduler so that they
- *	know about it. That allows them to make better scheduling
- *	decisions. So when the below references 'sync' and 'async', it
- *	is referencing this priority hint.
- *
- * With that in mind, the available types are:
- *
- * READ			A normal read operation. Device will be plugged.
- * READ_SYNC		A synchronous read. Device is not plugged, caller can
- *			immediately wait on this read without caring about
- *			unplugging.
- * WRITE		A normal async write. Device will be plugged.
- * WRITE_SYNC		Synchronous write. Identical to WRITE, but passes down
- *			the hint that someone will be waiting on this IO
- *			shortly. The write equivalent of READ_SYNC.
- * WRITE_ODIRECT	Special case write for O_DIRECT only.
- * WRITE_FLUSH		Like WRITE_SYNC but with preceding cache flush.
- * WRITE_FUA		Like WRITE_SYNC but data is guaranteed to be on
- *			non-volatile media on completion.
- * WRITE_FLUSH_FUA	Combination of WRITE_FLUSH and FUA. The IO is preceded
- *			by a cache flush and data is guaranteed to be on
- *			non-volatile media on completion.
- *
- */
-#define RW_MASK			REQ_OP_WRITE
-
-#define READ			REQ_OP_READ
-#define WRITE			REQ_OP_WRITE
-
-#define READ_SYNC		REQ_SYNC
-#define WRITE_SYNC		(REQ_SYNC | REQ_NOIDLE)
-#define WRITE_ODIRECT		REQ_SYNC
-#define WRITE_FLUSH		(REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH)
-#define WRITE_FUA		(REQ_SYNC | REQ_NOIDLE | REQ_FUA)
-#define WRITE_FLUSH_FUA		(REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA)
-
-/*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
  */
@@ -2499,19 +2447,6 @@ extern void make_bad_inode(struct inode *);
 extern bool is_bad_inode(struct inode *);
 
 #ifdef CONFIG_BLOCK
-static inline bool op_is_write(unsigned int op)
-{
-	return op == REQ_OP_READ ? false : true;
-}
-
-/*
- * return data direction, READ or WRITE
- */
-static inline int bio_data_dir(struct bio *bio)
-{
-	return op_is_write(bio_op(bio)) ? WRITE : READ;
-}
-
 extern void check_disk_size_change(struct gendisk *disk,
 				   struct block_device *bdev);
 extern int revalidate_disk(struct gendisk *);
@@ -2782,7 +2717,6 @@ static inline void remove_inode_hash(struct inode *inode)
 extern void inode_sb_list_add(struct inode *inode);
 
 #ifdef CONFIG_BLOCK
-extern blk_qc_t submit_bio(struct bio *);
 extern int bdev_read_only(struct block_device *);
 #endif
 extern int set_blocksize(struct block_device *, int);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index bc6ed52a39b9..01b6b460c34d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -50,6 +50,10 @@
 #define PTR_ALIGN(p, a)		((typeof(p))ALIGN((unsigned long)(p), (a)))
 #define IS_ALIGNED(x, a)		(((x) & ((typeof(x))(a) - 1)) == 0)
 
+/* generic data direction definitions */
+#define READ			0
+#define WRITE			1
+
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
 
 #define u64_to_user_ptr(x) (		\
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index d190786e4ad8..7c273bbc5351 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -47,6 +47,7 @@ struct ppa_addr {
 struct nvm_rq;
 struct nvm_id;
 struct nvm_dev;
+struct nvm_tgt_dev;
 
 typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
 typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
@@ -107,6 +108,8 @@ enum {
 	NVM_RSP_NOT_CHANGEABLE	= 0x1,
 	NVM_RSP_ERR_FAILWRITE	= 0x40ff,
 	NVM_RSP_ERR_EMPTYPAGE	= 0x42ff,
+	NVM_RSP_ERR_FAILECC	= 0x4281,
+	NVM_RSP_WARN_HIGHECC	= 0x4700,
 
 	/* Device opcodes */
 	NVM_OP_HBREAD		= 0x02,
@@ -208,7 +211,7 @@ struct nvm_id {
 
 struct nvm_target {
 	struct list_head list;
-	struct nvm_dev *dev;
+	struct nvm_tgt_dev *dev;
 	struct nvm_tgt_type *type;
 	struct gendisk *disk;
 };
@@ -228,7 +231,7 @@ typedef void (nvm_end_io_fn)(struct nvm_rq *);
 
 struct nvm_rq {
 	struct nvm_tgt_instance *ins;
-	struct nvm_dev *dev;
+	struct nvm_tgt_dev *dev;
 
 	struct bio *bio;
 
@@ -263,35 +266,12 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
 	return rqdata + 1;
 }
 
-struct nvm_block;
-
-struct nvm_lun {
-	int id;
-
-	int lun_id;
-	int chnl_id;
-
-	spinlock_t lock;
-
-	unsigned int nr_free_blocks;	/* Number of unused blocks */
-	struct nvm_block *blocks;
-};
-
 enum {
 	NVM_BLK_ST_FREE =	0x1,	/* Free block */
 	NVM_BLK_ST_TGT =	0x2,	/* Block in use by target */
 	NVM_BLK_ST_BAD =	0x8,	/* Bad block */
 };
 
-struct nvm_block {
-	struct list_head list;
-	struct nvm_lun *lun;
-	unsigned long id;
-
-	void *priv;
-	int state;
-};
-
 /* system block cpu representation */
 struct nvm_sb_info {
 	unsigned long		seqnr;
@@ -301,22 +281,12 @@ struct nvm_sb_info {
 	struct ppa_addr		fs_ppa;
 };
 
-struct nvm_dev {
-	struct nvm_dev_ops *ops;
-
-	struct list_head devices;
-
-	/* Media manager */
-	struct nvmm_type *mt;
-	void *mp;
-
-	/* System blocks */
-	struct nvm_sb_info sb;
-
-	/* Device information */
+/* Device generic information */
+struct nvm_geo {
 	int nr_chnls;
+	int nr_luns;
+	int luns_per_chnl; /* -1 if channels are not symmetric */
 	int nr_planes;
-	int luns_per_chnl;
 	int sec_per_pg; /* only sectors for a single page */
 	int pgs_per_blk;
 	int blks_per_lun;
@@ -336,14 +306,44 @@ struct nvm_dev {
 	int sec_per_pl; /* all sectors across planes */
 	int sec_per_blk;
 	int sec_per_lun;
+};
+
+struct nvm_tgt_dev {
+	/* Device information */
+	struct nvm_geo geo;
+
+	/* Base ppas for target LUNs */
+	struct ppa_addr *luns;
+
+	sector_t total_secs;
+
+	struct nvm_id identity;
+	struct request_queue *q;
+
+	struct nvm_dev *parent;
+	void *map;
+};
+
+struct nvm_dev {
+	struct nvm_dev_ops *ops;
+
+	struct list_head devices;
+
+	/* Media manager */
+	struct nvmm_type *mt;
+	void *mp;
+
+	/* System blocks */
+	struct nvm_sb_info sb;
+
+	/* Device information */
+	struct nvm_geo geo;
 
 	/* lower page table */
 	int lps_per_blk;
 	int *lptbl;
 
-	unsigned long total_blocks;
 	unsigned long total_secs;
-	int nr_luns;
 
 	unsigned long *lun_map;
 	void *dma_pool;
@@ -352,26 +352,57 @@ struct nvm_dev {
 
 	/* Backend device */
 	struct request_queue *q;
-	struct device dev;
-	struct device *parent_dev;
 	char name[DISK_NAME_LEN];
 	void *private_data;
 
+	void *rmap;
+
 	struct mutex mlock;
 	spinlock_t lock;
 };
 
+static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
+						     u64 pba)
+{
+	struct ppa_addr l;
+	int secs, pgs, blks, luns;
+	sector_t ppa = pba;
+
+	l.ppa = 0;
+
+	div_u64_rem(ppa, geo->sec_per_pg, &secs);
+	l.g.sec = secs;
+
+	sector_div(ppa, geo->sec_per_pg);
+	div_u64_rem(ppa, geo->pgs_per_blk, &pgs);
+	l.g.pg = pgs;
+
+	sector_div(ppa, geo->pgs_per_blk);
+	div_u64_rem(ppa, geo->blks_per_lun, &blks);
+	l.g.blk = blks;
+
+	sector_div(ppa, geo->blks_per_lun);
+	div_u64_rem(ppa, geo->luns_per_chnl, &luns);
+	l.g.lun = luns;
+
+	sector_div(ppa, geo->luns_per_chnl);
+	l.g.ch = ppa;
+
+	return l;
+}
+
 static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
 						struct ppa_addr r)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr l;
 
-	l.ppa = ((u64)r.g.blk) << dev->ppaf.blk_offset;
-	l.ppa |= ((u64)r.g.pg) << dev->ppaf.pg_offset;
-	l.ppa |= ((u64)r.g.sec) << dev->ppaf.sect_offset;
-	l.ppa |= ((u64)r.g.pl) << dev->ppaf.pln_offset;
-	l.ppa |= ((u64)r.g.lun) << dev->ppaf.lun_offset;
-	l.ppa |= ((u64)r.g.ch) << dev->ppaf.ch_offset;
+	l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset;
+	l.ppa |= ((u64)r.g.pg) << geo->ppaf.pg_offset;
+	l.ppa |= ((u64)r.g.sec) << geo->ppaf.sect_offset;
+	l.ppa |= ((u64)r.g.pl) << geo->ppaf.pln_offset;
+	l.ppa |= ((u64)r.g.lun) << geo->ppaf.lun_offset;
+	l.ppa |= ((u64)r.g.ch) << geo->ppaf.ch_offset;
 
 	return l;
 }
@@ -379,24 +410,25 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
 static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
 						struct ppa_addr r)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr l;
 
 	l.ppa = 0;
 	/*
 	 * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc.
 	 */
-	l.g.blk = (r.ppa >> dev->ppaf.blk_offset) &
-					(((1 << dev->ppaf.blk_len) - 1));
-	l.g.pg |= (r.ppa >> dev->ppaf.pg_offset) &
-					(((1 << dev->ppaf.pg_len) - 1));
-	l.g.sec |= (r.ppa >> dev->ppaf.sect_offset) &
-					(((1 << dev->ppaf.sect_len) - 1));
-	l.g.pl |= (r.ppa >> dev->ppaf.pln_offset) &
-					(((1 << dev->ppaf.pln_len) - 1));
-	l.g.lun |= (r.ppa >> dev->ppaf.lun_offset) &
-					(((1 << dev->ppaf.lun_len) - 1));
-	l.g.ch |= (r.ppa >> dev->ppaf.ch_offset) &
-					(((1 << dev->ppaf.ch_len) - 1));
+	l.g.blk = (r.ppa >> geo->ppaf.blk_offset) &
+					(((1 << geo->ppaf.blk_len) - 1));
+	l.g.pg |= (r.ppa >> geo->ppaf.pg_offset) &
+					(((1 << geo->ppaf.pg_len) - 1));
+	l.g.sec |= (r.ppa >> geo->ppaf.sect_offset) &
+					(((1 << geo->ppaf.sect_len) - 1));
+	l.g.pl |= (r.ppa >> geo->ppaf.pln_offset) &
+					(((1 << geo->ppaf.pln_len) - 1));
+	l.g.lun |= (r.ppa >> geo->ppaf.lun_offset) &
+					(((1 << geo->ppaf.lun_len) - 1));
+	l.g.ch |= (r.ppa >> geo->ppaf.ch_offset) &
+					(((1 << geo->ppaf.ch_len) - 1));
 
 	return l;
 }
@@ -411,18 +443,13 @@ static inline void ppa_set_empty(struct ppa_addr *ppa_addr)
 	ppa_addr->ppa = ADDR_EMPTY;
 }
 
-static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
-							struct nvm_block *blk)
+static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2)
 {
-	struct ppa_addr ppa;
-	struct nvm_lun *lun = blk->lun;
-
-	ppa.ppa = 0;
-	ppa.g.blk = blk->id % dev->blks_per_lun;
-	ppa.g.lun = lun->lun_id;
-	ppa.g.ch = lun->chnl_id;
+	if (ppa_empty(ppa1) || ppa_empty(ppa2))
+		return 0;
 
-	return ppa;
+	return ((ppa1.g.ch == ppa2.g.ch) && (ppa1.g.lun == ppa2.g.lun) &&
+					(ppa1.g.blk == ppa2.g.blk));
 }
 
 static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
@@ -432,7 +459,7 @@ static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
 
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
-typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
+typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *);
 typedef void (nvm_tgt_exit_fn)(void *);
 
 struct nvm_tgt_type {
@@ -465,23 +492,18 @@ typedef void (nvmm_unregister_fn)(struct nvm_dev *);
 
 typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *);
 typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
-typedef struct nvm_block *(nvmm_get_blk_fn)(struct nvm_dev *,
-					      struct nvm_lun *, unsigned long);
-typedef void (nvmm_put_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef int (nvmm_open_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef int (nvmm_close_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
-								unsigned long);
-typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
-typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
-typedef int (nvmm_reserve_lun)(struct nvm_dev *, int);
-typedef void (nvmm_release_lun)(struct nvm_dev *, int);
-typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
-
+typedef int (nvmm_submit_io_fn)(struct nvm_tgt_dev *, struct nvm_rq *);
+typedef int (nvmm_erase_blk_fn)(struct nvm_tgt_dev *, struct ppa_addr *, int);
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
 typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
+typedef struct ppa_addr (nvmm_trans_ppa_fn)(struct nvm_tgt_dev *,
+					    struct ppa_addr, int);
+typedef void (nvmm_part_to_tgt_fn)(struct nvm_dev *, sector_t*, int);
+
+enum {
+	TRANS_TGT_TO_DEV =	0x0,
+	TRANS_DEV_TO_TGT =	0x1,
+};
 
 struct nvmm_type {
 	const char *name;
@@ -493,54 +515,41 @@ struct nvmm_type {
 	nvmm_create_tgt_fn *create_tgt;
 	nvmm_remove_tgt_fn *remove_tgt;
 
-	/* Block administration callbacks */
-	nvmm_get_blk_fn *get_blk;
-	nvmm_put_blk_fn *put_blk;
-	nvmm_open_blk_fn *open_blk;
-	nvmm_close_blk_fn *close_blk;
-	nvmm_flush_blk_fn *flush_blk;
-
 	nvmm_submit_io_fn *submit_io;
 	nvmm_erase_blk_fn *erase_blk;
 
-	/* Bad block mgmt */
-	nvmm_mark_blk_fn *mark_blk;
-
-	/* Configuration management */
-	nvmm_get_lun_fn *get_lun;
-	nvmm_reserve_lun *reserve_lun;
-	nvmm_release_lun *release_lun;
-
-	/* Statistics */
-	nvmm_lun_info_print_fn *lun_info_print;
-
 	nvmm_get_area_fn *get_area;
 	nvmm_put_area_fn *put_area;
 
+	nvmm_trans_ppa_fn *trans_ppa;
+	nvmm_part_to_tgt_fn *part_to_tgt;
+
 	struct list_head list;
 };
 
 extern int nvm_register_mgr(struct nvmm_type *);
 extern void nvm_unregister_mgr(struct nvmm_type *);
 
-extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *,
-								unsigned long);
-extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *);
-
 extern struct nvm_dev *nvm_alloc_dev(int);
 extern int nvm_register(struct nvm_dev *);
 extern void nvm_unregister(struct nvm_dev *);
 
-void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type);
-
-extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *);
+extern int nvm_set_bb_tbl(struct nvm_dev *, struct ppa_addr *, int, int);
+extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
+			      int, int);
+extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
+extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
 extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
 extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
 					const struct ppa_addr *, int, int);
 extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
-extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int);
-extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *);
+extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int);
+extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int);
+extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
+			   void *);
+extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
+extern void nvm_put_area(struct nvm_tgt_dev *, sector_t);
 extern void nvm_end_io(struct nvm_rq *, int);
 extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
 								void *, int);
@@ -548,6 +557,7 @@ extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int,
 							int, void *, int);
 extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
 extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *);
+extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
 
 /* sysblk.c */
 #define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */
@@ -569,10 +579,10 @@ extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *);
 
 extern int nvm_dev_factory(struct nvm_dev *, int flags);
 
-#define nvm_for_each_lun_ppa(dev, ppa, chid, lunid)			\
-	for ((chid) = 0, (ppa).ppa = 0; (chid) < (dev)->nr_chnls;	\
+#define nvm_for_each_lun_ppa(geo, ppa, chid, lunid)			\
+	for ((chid) = 0, (ppa).ppa = 0; (chid) < (geo)->nr_chnls;	\
 					(chid)++, (ppa).g.ch = (chid))	\
-		for ((lunid) = 0; (lunid) < (dev)->luns_per_chnl;	\
+		for ((lunid) = 0; (lunid) < (geo)->luns_per_chnl;	\
 					(lunid)++, (ppa).g.lun = (lunid))
 
 #else /* CONFIG_NVM */
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
new file mode 100644
index 000000000000..f21471f7ee40
--- /dev/null
+++ b/include/linux/nvme-fc-driver.h
@@ -0,0 +1,851 @@
+/*
+ * Copyright (c) 2016, Avago Technologies
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVME_FC_DRIVER_H
+#define _NVME_FC_DRIVER_H 1
+
+
+/*
+ * **********************  LLDD FC-NVME Host API ********************
+ *
+ *  For FC LLDD's that are the NVME Host role.
+ *
+ * ******************************************************************
+ */
+
+
+
+/* FC Port role bitmask - can merge with FC Port Roles in fc transport */
+#define FC_PORT_ROLE_NVME_INITIATOR	0x10
+#define FC_PORT_ROLE_NVME_TARGET	0x11
+#define FC_PORT_ROLE_NVME_DISCOVERY	0x12
+
+
+/**
+ * struct nvme_fc_port_info - port-specific ids and FC connection-specific
+ *                            data element used during NVME Host role
+ *                            registrations
+ *
+ * Static fields describing the port being registered:
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @port_role: What NVME roles are supported (see FC_PORT_ROLE_xxx)
+ *
+ * Initialization values for dynamic port fields:
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ */
+struct nvme_fc_port_info {
+	u64			node_name;
+	u64			port_name;
+	u32			port_role;
+	u32			port_id;
+};
+
+
+/**
+ * struct nvmefc_ls_req - Request structure passed from NVME-FC transport
+ *                        to LLDD in order to perform a NVME FC-4 LS
+ *                        request and obtain a response.
+ *
+ * Values set by the NVME-FC layer prior to calling the LLDD ls_req
+ * entrypoint.
+ * @rqstaddr: pointer to request buffer
+ * @rqstdma:  PCI DMA address of request buffer
+ * @rqstlen:  Length, in bytes, of request buffer
+ * @rspaddr:  pointer to response buffer
+ * @rspdma:   PCI DMA address of response buffer
+ * @rsplen:   Length, in bytes, of response buffer
+ * @timeout:  Maximum amount of time, in seconds, to wait for the LS response.
+ *            If timeout exceeded, LLDD to abort LS exchange and complete
+ *            LS request with error status.
+ * @private:  pointer to memory allocated alongside the ls request structure
+ *            that is specifically for the LLDD to use while processing the
+ *            request. The length of the buffer corresponds to the
+ *            lsrqst_priv_sz value specified in the nvme_fc_port_template
+ *            supplied by the LLDD.
+ * @done:     The callback routine the LLDD is to invoke upon completion of
+ *            the LS request. req argument is the pointer to the original LS
+ *            request structure. Status argument must be 0 upon success, a
+ *            negative errno on failure (example: -ENXIO).
+ */
+struct nvmefc_ls_req {
+	void			*rqstaddr;
+	dma_addr_t		rqstdma;
+	u32			rqstlen;
+	void			*rspaddr;
+	dma_addr_t		rspdma;
+	u32			rsplen;
+	u32			timeout;
+
+	void			*private;
+
+	void (*done)(struct nvmefc_ls_req *req, int status);
+
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+enum nvmefc_fcp_datadir {
+	NVMEFC_FCP_NODATA,	/* payload_length and sg_cnt will be zero */
+	NVMEFC_FCP_WRITE,
+	NVMEFC_FCP_READ,
+};
+
+
+#define NVME_FC_MAX_SEGMENTS		256
+
+/**
+ * struct nvmefc_fcp_req - Request structure passed from NVME-FC transport
+ *                         to LLDD in order to perform a NVME FCP IO operation.
+ *
+ * Values set by the NVME-FC layer prior to calling the LLDD fcp_io
+ * entrypoint.
+ * @cmdaddr:   pointer to the FCP CMD IU buffer
+ * @rspaddr:   pointer to the FCP RSP IU buffer
+ * @cmddma:    PCI DMA address of the FCP CMD IU buffer
+ * @rspdma:    PCI DMA address of the FCP RSP IU buffer
+ * @cmdlen:    Length, in bytes, of the FCP CMD IU buffer
+ * @rsplen:    Length, in bytes, of the FCP RSP IU buffer
+ * @payload_length: Length of DATA_IN or DATA_OUT payload data to transfer
+ * @sg_table:  scatter/gather structure for payload data
+ * @first_sgl: memory for 1st scatter/gather list segment for payload data
+ * @sg_cnt:    number of elements in the scatter/gather list
+ * @io_dir:    direction of the FCP request (see NVMEFC_FCP_xxx)
+ * @sqid:      The nvme SQID the command is being issued on
+ * @done:      The callback routine the LLDD is to invoke upon completion of
+ *             the FCP operation. req argument is the pointer to the original
+ *             FCP IO operation.
+ * @private:   pointer to memory allocated alongside the FCP operation
+ *             request structure that is specifically for the LLDD to use
+ *             while processing the operation. The length of the buffer
+ *             corresponds to the fcprqst_priv_sz value specified in the
+ *             nvme_fc_port_template supplied by the LLDD.
+ *
+ * Values set by the LLDD indicating completion status of the FCP operation.
+ * Must be set prior to calling the done() callback.
+ * @transferred_length: amount of payload data, in bytes, that were
+ *             transferred. Should equal payload_length on success.
+ * @rcv_rsplen: length, in bytes, of the FCP RSP IU received.
+ * @status:    Completion status of the FCP operation. must be 0 upon success,
+ *             NVME_SC_FC_xxx value upon failure. Note: this is NOT a
+ *             reflection of the NVME CQE completion status. Only the status
+ *             of the FCP operation at the NVME-FC level.
+ */
+struct nvmefc_fcp_req {
+	void			*cmdaddr;
+	void			*rspaddr;
+	dma_addr_t		cmddma;
+	dma_addr_t		rspdma;
+	u16			cmdlen;
+	u16			rsplen;
+
+	u32			payload_length;
+	struct sg_table		sg_table;
+	struct scatterlist	*first_sgl;
+	int			sg_cnt;
+	enum nvmefc_fcp_datadir	io_dir;
+
+	__le16			sqid;
+
+	void (*done)(struct nvmefc_fcp_req *req);
+
+	void			*private;
+
+	u32			transferred_length;
+	u16			rcv_rsplen;
+	u32			status;
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+/*
+ * Direct copy of fc_port_state enum. For later merging
+ */
+enum nvme_fc_obj_state {
+	FC_OBJSTATE_UNKNOWN,
+	FC_OBJSTATE_NOTPRESENT,
+	FC_OBJSTATE_ONLINE,
+	FC_OBJSTATE_OFFLINE,		/* User has taken Port Offline */
+	FC_OBJSTATE_BLOCKED,
+	FC_OBJSTATE_BYPASSED,
+	FC_OBJSTATE_DIAGNOSTICS,
+	FC_OBJSTATE_LINKDOWN,
+	FC_OBJSTATE_ERROR,
+	FC_OBJSTATE_LOOPBACK,
+	FC_OBJSTATE_DELETED,
+};
+
+
+/**
+ * struct nvme_fc_local_port - structure used between NVME-FC transport and
+ *                 a LLDD to reference a local NVME host port.
+ *                 Allocated/created by the nvme_fc_register_localport()
+ *                 transport interface.
+ *
+ * Fields with static values for the port. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_num:  NVME-FC transport host port number
+ * @port_role: NVME roles are supported on the port (see FC_PORT_ROLE_xxx)
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @private:   pointer to memory allocated alongside the local port
+ *             structure that is specifically for the LLDD to use.
+ *             The length of the buffer corresponds to the local_priv_sz
+ *             value specified in the nvme_fc_port_template supplied by
+ *             the LLDD.
+ *
+ * Fields with dynamic values. Values may change base on link state. LLDD
+ * may reference fields directly to change them. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ * @port_state:   Operational state of the port.
+ */
+struct nvme_fc_local_port {
+	/* static/read-only fields */
+	u32 port_num;
+	u32 port_role;
+	u64 node_name;
+	u64 port_name;
+
+	void *private;
+
+	/* dynamic fields */
+	u32 port_id;
+	enum nvme_fc_obj_state port_state;
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+/**
+ * struct nvme_fc_remote_port - structure used between NVME-FC transport and
+ *                 a LLDD to reference a remote NVME subsystem port.
+ *                 Allocated/created by the nvme_fc_register_remoteport()
+ *                 transport interface.
+ *
+ * Fields with static values for the port. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_num:  NVME-FC transport remote subsystem port number
+ * @port_role: NVME roles are supported on the port (see FC_PORT_ROLE_xxx)
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @localport: pointer to the NVME-FC local host port the subsystem is
+ *             connected to.
+ * @private:   pointer to memory allocated alongside the remote port
+ *             structure that is specifically for the LLDD to use.
+ *             The length of the buffer corresponds to the remote_priv_sz
+ *             value specified in the nvme_fc_port_template supplied by
+ *             the LLDD.
+ *
+ * Fields with dynamic values. Values may change base on link or login
+ * state. LLDD may reference fields directly to change them. Initialized by
+ * the port_info struct supplied to the registration call.
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ * @port_state:   Operational state of the remote port. Valid values are
+ *                ONLINE or UNKNOWN.
+ */
+struct nvme_fc_remote_port {
+	/* static fields */
+	u32 port_num;
+	u32 port_role;
+	u64 node_name;
+	u64 port_name;
+
+	struct nvme_fc_local_port *localport;
+
+	void *private;
+
+	/* dynamic fields */
+	u32 port_id;
+	enum nvme_fc_obj_state port_state;
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+/**
+ * struct nvme_fc_port_template - structure containing static entrypoints and
+ *                 operational parameters for an LLDD that supports NVME host
+ *                 behavior. Passed by reference in port registrations.
+ *                 NVME-FC transport remembers template reference and may
+ *                 access it during runtime operation.
+ *
+ * Host/Initiator Transport Entrypoints/Parameters:
+ *
+ * @localport_delete:  The LLDD initiates deletion of a localport via
+ *       nvme_fc_deregister_localport(). However, the teardown is
+ *       asynchronous. This routine is called upon the completion of the
+ *       teardown to inform the LLDD that the localport has been deleted.
+ *       Entrypoint is Mandatory.
+ *
+ * @remoteport_delete:  The LLDD initiates deletion of a remoteport via
+ *       nvme_fc_deregister_remoteport(). However, the teardown is
+ *       asynchronous. This routine is called upon the completion of the
+ *       teardown to inform the LLDD that the remoteport has been deleted.
+ *       Entrypoint is Mandatory.
+ *
+ * @create_queue:  Upon creating a host<->controller association, queues are
+ *       created such that they can be affinitized to cpus/cores. This
+ *       callback into the LLDD to notify that a controller queue is being
+ *       created.  The LLDD may choose to allocate an associated hw queue
+ *       or map it onto a shared hw queue. Upon return from the call, the
+ *       LLDD specifies a handle that will be given back to it for any
+ *       command that is posted to the controller queue.  The handle can
+ *       be used by the LLDD to map quickly to the proper hw queue for
+ *       command execution.  The mask of cpu's that will map to this queue
+ *       at the block-level is also passed in. The LLDD should use the
+ *       queue id and/or cpu masks to ensure proper affinitization of the
+ *       controller queue to the hw queue.
+ *       Entrypoint is Optional.
+ *
+ * @delete_queue:  This is the inverse of the crete_queue. During
+ *       host<->controller association teardown, this routine is called
+ *       when a controller queue is being terminated. Any association with
+ *       a hw queue should be termined. If there is a unique hw queue, the
+ *       hw queue should be torn down.
+ *       Entrypoint is Optional.
+ *
+ * @poll_queue:  Called to poll for the completion of an io on a blk queue.
+ *       Entrypoint is Optional.
+ *
+ * @ls_req:  Called to issue a FC-NVME FC-4 LS service request.
+ *       The nvme_fc_ls_req structure will fully describe the buffers for
+ *       the request payload and where to place the response payload. The
+ *       LLDD is to allocate an exchange, issue the LS request, obtain the
+ *       LS response, and call the "done" routine specified in the request
+ *       structure (argument to done is the ls request structure itself).
+ *       Entrypoint is Mandatory.
+ *
+ * @fcp_io:  called to issue a FC-NVME I/O request.  The I/O may be for
+ *       an admin queue or an i/o queue.  The nvmefc_fcp_req structure will
+ *       fully describe the io: the buffer containing the FC-NVME CMD IU
+ *       (which contains the SQE), the sg list for the payload if applicable,
+ *       and the buffer to place the FC-NVME RSP IU into.  The LLDD will
+ *       complete the i/o, indicating the amount of data transferred or
+ *       any transport error, and call the "done" routine specified in the
+ *       request structure (argument to done is the fcp request structure
+ *       itself).
+ *       Entrypoint is Mandatory.
+ *
+ * @ls_abort: called to request the LLDD to abort the indicated ls request.
+ *       The call may return before the abort has completed. After aborting
+ *       the request, the LLDD must still call the ls request done routine
+ *       indicating an FC transport Aborted status.
+ *       Entrypoint is Mandatory.
+ *
+ * @fcp_abort: called to request the LLDD to abort the indicated fcp request.
+ *       The call may return before the abort has completed. After aborting
+ *       the request, the LLDD must still call the fcp request done routine
+ *       indicating an FC transport Aborted status.
+ *       Entrypoint is Mandatory.
+ *
+ * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
+ *       supports for cpu affinitization.
+ *       Value is Mandatory. Must be at least 1.
+ *
+ * @max_sgl_segments:  indicates the maximum number of sgl segments supported
+ *       by the LLDD
+ *       Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @max_dif_sgl_segments:  indicates the maximum number of sgl segments
+ *       supported by the LLDD for DIF operations.
+ *       Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @dma_boundary:  indicates the dma address boundary where dma mappings
+ *       will be split across.
+ *       Value is Mandatory. Typical value is 0xFFFFFFFF to split across
+ *       4Gig address boundarys
+ *
+ * @local_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a localport is allocated.  The additional memory
+ *       area solely for the of the LLDD and its location is specified by
+ *       the localport->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ *
+ * @remote_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a remoteport is allocated.  The additional memory
+ *       area solely for the of the LLDD and its location is specified by
+ *       the remoteport->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ *
+ * @lsrqst_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a ls request structure is allocated. The additional
+ *       memory area solely for the of the LLDD and its location is
+ *       specified by the ls_request->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ *
+ * @fcprqst_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a fcp request structure is allocated. The additional
+ *       memory area solely for the of the LLDD and its location is
+ *       specified by the fcp_request->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ */
+struct nvme_fc_port_template {
+	/* initiator-based functions */
+	void	(*localport_delete)(struct nvme_fc_local_port *);
+	void	(*remoteport_delete)(struct nvme_fc_remote_port *);
+	int	(*create_queue)(struct nvme_fc_local_port *,
+				unsigned int qidx, u16 qsize,
+				void **handle);
+	void	(*delete_queue)(struct nvme_fc_local_port *,
+				unsigned int qidx, void *handle);
+	void	(*poll_queue)(struct nvme_fc_local_port *, void *handle);
+	int	(*ls_req)(struct nvme_fc_local_port *,
+				struct nvme_fc_remote_port *,
+				struct nvmefc_ls_req *);
+	int	(*fcp_io)(struct nvme_fc_local_port *,
+				struct nvme_fc_remote_port *,
+				void *hw_queue_handle,
+				struct nvmefc_fcp_req *);
+	void	(*ls_abort)(struct nvme_fc_local_port *,
+				struct nvme_fc_remote_port *,
+				struct nvmefc_ls_req *);
+	void	(*fcp_abort)(struct nvme_fc_local_port *,
+				struct nvme_fc_remote_port *,
+				void *hw_queue_handle,
+				struct nvmefc_fcp_req *);
+
+	u32	max_hw_queues;
+	u16	max_sgl_segments;
+	u16	max_dif_sgl_segments;
+	u64	dma_boundary;
+
+	/* sizes of additional private data for data structures */
+	u32	local_priv_sz;
+	u32	remote_priv_sz;
+	u32	lsrqst_priv_sz;
+	u32	fcprqst_priv_sz;
+};
+
+
+/*
+ * Initiator/Host functions
+ */
+
+int nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
+			struct nvme_fc_port_template *template,
+			struct device *dev,
+			struct nvme_fc_local_port **lport_p);
+
+int nvme_fc_unregister_localport(struct nvme_fc_local_port *localport);
+
+int nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
+			struct nvme_fc_port_info *pinfo,
+			struct nvme_fc_remote_port **rport_p);
+
+int nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *remoteport);
+
+
+
+/*
+ * ***************  LLDD FC-NVME Target/Subsystem API ***************
+ *
+ *  For FC LLDD's that are the NVME Subsystem role
+ *
+ * ******************************************************************
+ */
+
+/**
+ * struct nvmet_fc_port_info - port-specific ids and FC connection-specific
+ *                             data element used during NVME Subsystem role
+ *                             registrations
+ *
+ * Static fields describing the port being registered:
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ *
+ * Initialization values for dynamic port fields:
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ */
+struct nvmet_fc_port_info {
+	u64			node_name;
+	u64			port_name;
+	u32			port_id;
+};
+
+
+/**
+ * struct nvmefc_tgt_ls_req - Structure used between LLDD and NVMET-FC
+ *                            layer to represent the exchange context for
+ *                            a FC-NVME Link Service (LS).
+ *
+ * The structure is allocated by the LLDD whenever a LS Request is received
+ * from the FC link. The address of the structure is passed to the nvmet-fc
+ * layer via the nvmet_fc_rcv_ls_req() call. The address of the structure
+ * will be passed back to the LLDD when the response is to be transmit.
+ * The LLDD is to use the address to map back to the LLDD exchange structure
+ * which maintains information such as the targetport the LS was received
+ * on, the remote FC NVME initiator that sent the LS, and any FC exchange
+ * context.  Upon completion of the LS response transmit, the address of the
+ * structure will be passed back to the LS rsp done() routine, allowing the
+ * nvmet-fc layer to release dma resources. Upon completion of the done()
+ * routine, no further access will be made by the nvmet-fc layer and the
+ * LLDD can de-allocate the structure.
+ *
+ * Field initialization:
+ *   At the time of the nvmet_fc_rcv_ls_req() call, there is no content that
+ *     is valid in the structure.
+ *
+ *   When the structure is used for the LLDD->xmt_ls_rsp() call, the nvmet-fc
+ *     layer will fully set the fields in order to specify the response
+ *     payload buffer and its length as well as the done routine to be called
+ *     upon compeletion of the transmit.  The nvmet-fc layer will also set a
+ *     private pointer for its own use in the done routine.
+ *
+ * Values set by the NVMET-FC layer prior to calling the LLDD xmt_ls_rsp
+ * entrypoint.
+ * @rspbuf:   pointer to the LS response buffer
+ * @rspdma:   PCI DMA address of the LS response buffer
+ * @rsplen:   Length, in bytes, of the LS response buffer
+ * @done:     The callback routine the LLDD is to invoke upon completion of
+ *            transmitting the LS response. req argument is the pointer to
+ *            the original ls request.
+ * @nvmet_fc_private:  pointer to an internal NVMET-FC layer structure used
+ *            as part of the NVMET-FC processing. The LLDD is not to access
+ *            this pointer.
+ */
+struct nvmefc_tgt_ls_req {
+	void		*rspbuf;
+	dma_addr_t	rspdma;
+	u16		rsplen;
+
+	void (*done)(struct nvmefc_tgt_ls_req *req);
+	void *nvmet_fc_private;		/* LLDD is not to access !! */
+};
+
+/* Operations that NVME-FC layer may request the LLDD to perform for FCP */
+enum {
+	NVMET_FCOP_READDATA	= 1,	/* xmt data to initiator */
+	NVMET_FCOP_WRITEDATA	= 2,	/* xmt data from initiator */
+	NVMET_FCOP_READDATA_RSP	= 3,	/* xmt data to initiator and send
+					 * rsp as well
+					 */
+	NVMET_FCOP_RSP		= 4,	/* send rsp frame */
+	NVMET_FCOP_ABORT	= 5,	/* abort exchange via ABTS */
+	NVMET_FCOP_BA_ACC	= 6,	/* send BA_ACC */
+	NVMET_FCOP_BA_RJT	= 7,	/* send BA_RJT */
+};
+
+/**
+ * struct nvmefc_tgt_fcp_req - Structure used between LLDD and NVMET-FC
+ *                            layer to represent the exchange context and
+ *                            the specific FC-NVME IU operation(s) to perform
+ *                            for a FC-NVME FCP IO.
+ *
+ * Structure used between LLDD and nvmet-fc layer to represent the exchange
+ * context for a FC-NVME FCP I/O operation (e.g. a nvme sqe, the sqe-related
+ * memory transfers, and its assocated cqe transfer).
+ *
+ * The structure is allocated by the LLDD whenever a FCP CMD IU is received
+ * from the FC link. The address of the structure is passed to the nvmet-fc
+ * layer via the nvmet_fc_rcv_fcp_req() call. The address of the structure
+ * will be passed back to the LLDD for the data operations and transmit of
+ * the response. The LLDD is to use the address to map back to the LLDD
+ * exchange structure which maintains information such as the targetport
+ * the FCP I/O was received on, the remote FC NVME initiator that sent the
+ * FCP I/O, and any FC exchange context.  Upon completion of the FCP target
+ * operation, the address of the structure will be passed back to the FCP
+ * op done() routine, allowing the nvmet-fc layer to release dma resources.
+ * Upon completion of the done() routine for either RSP or ABORT ops, no
+ * further access will be made by the nvmet-fc layer and the LLDD can
+ * de-allocate the structure.
+ *
+ * Field initialization:
+ *   At the time of the nvmet_fc_rcv_fcp_req() call, there is no content that
+ *     is valid in the structure.
+ *
+ *   When the structure is used for an FCP target operation, the nvmet-fc
+ *     layer will fully set the fields in order to specify the scattergather
+ *     list, the transfer length, as well as the done routine to be called
+ *     upon compeletion of the operation.  The nvmet-fc layer will also set a
+ *     private pointer for its own use in the done routine.
+ *
+ * Note: the LLDD must never fail a NVMET_FCOP_ABORT request !!
+ *
+ * Values set by the NVMET-FC layer prior to calling the LLDD fcp_op
+ * entrypoint.
+ * @op:       Indicates the FCP IU operation to perform (see NVMET_FCOP_xxx)
+ * @hwqid:    Specifies the hw queue index (0..N-1, where N is the
+ *            max_hw_queues value from the LLD's nvmet_fc_target_template)
+ *            that the operation is to use.
+ * @offset:   Indicates the DATA_OUT/DATA_IN payload offset to be tranferred.
+ *            Field is only valid on WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @timeout:  amount of time, in seconds, to wait for a response from the NVME
+ *            host. A value of 0 is an infinite wait.
+ *            Valid only for the following ops:
+ *              WRITEDATA: caps the wait for data reception
+ *              READDATA_RSP & RSP: caps wait for FCP_CONF reception (if used)
+ * @transfer_length: the length, in bytes, of the DATA_OUT or DATA_IN payload
+ *            that is to be transferred.
+ *            Valid only for the WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @ba_rjt:   Contains the BA_RJT payload that is to be transferred.
+ *            Valid only for the NVMET_FCOP_BA_RJT op.
+ * @sg:       Scatter/gather list for the DATA_OUT/DATA_IN payload data.
+ *            Valid only for the WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @sg_cnt:   Number of valid entries in the scatter/gather list.
+ *            Valid only for the WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @rspaddr:  pointer to the FCP RSP IU buffer to be transmit
+ *            Used by RSP and READDATA_RSP ops
+ * @rspdma:   PCI DMA address of the FCP RSP IU buffer
+ *            Used by RSP and READDATA_RSP ops
+ * @rsplen:   Length, in bytes, of the FCP RSP IU buffer
+ *            Used by RSP and READDATA_RSP ops
+ * @done:     The callback routine the LLDD is to invoke upon completion of
+ *            the operation. req argument is the pointer to the original
+ *            FCP subsystem op request.
+ * @nvmet_fc_private:  pointer to an internal NVMET-FC layer structure used
+ *            as part of the NVMET-FC processing. The LLDD is not to
+ *            reference this field.
+ *
+ * Values set by the LLDD indicating completion status of the FCP operation.
+ * Must be set prior to calling the done() callback.
+ * @transferred_length: amount of DATA_OUT payload data received by a
+ *            a WRITEDATA operation. If not a WRITEDATA operation, value must
+ *            be set to 0. Should equal transfer_length on success.
+ * @fcp_error: status of the FCP operation. Must be 0 on success; on failure
+ *            must be a NVME_SC_FC_xxxx value.
+ */
+struct nvmefc_tgt_fcp_req {
+	u8			op;
+	u16			hwqid;
+	u32			offset;
+	u32			timeout;
+	u32			transfer_length;
+	struct fc_ba_rjt	ba_rjt;
+	struct scatterlist	sg[NVME_FC_MAX_SEGMENTS];
+	int			sg_cnt;
+	void			*rspaddr;
+	dma_addr_t		rspdma;
+	u16			rsplen;
+
+	void (*done)(struct nvmefc_tgt_fcp_req *);
+
+	void *nvmet_fc_private;		/* LLDD is not to access !! */
+
+	u32			transferred_length;
+	int			fcp_error;
+};
+
+
+/* Target Features (Bit fields) LLDD supports */
+enum {
+	NVMET_FCTGTFEAT_READDATA_RSP = (1 << 0),
+		/* Bit 0: supports the NVMET_FCPOP_READDATA_RSP op, which
+		 * sends (the last) Read Data sequence followed by the RSP
+		 * sequence in one LLDD operation. Errors during Data
+		 * sequence transmit must not allow RSP sequence to be sent.
+		 */
+	NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED = (1 << 1),
+		/* Bit 1: When 0, the LLDD will deliver FCP CMD
+		 * on the CPU it should be affinitized to. Thus work will
+		 * be scheduled on the cpu received on. When 1, the LLDD
+		 * may not deliver the CMD on the CPU it should be worked
+		 * on. The transport should pick a cpu to schedule the work
+		 * on.
+		 */
+};
+
+
+/**
+ * struct nvmet_fc_target_port - structure used between NVME-FC transport and
+ *                 a LLDD to reference a local NVME subsystem port.
+ *                 Allocated/created by the nvme_fc_register_targetport()
+ *                 transport interface.
+ *
+ * Fields with static values for the port. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_num:  NVME-FC transport subsytem port number
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @private:   pointer to memory allocated alongside the local port
+ *             structure that is specifically for the LLDD to use.
+ *             The length of the buffer corresponds to the target_priv_sz
+ *             value specified in the nvme_fc_target_template supplied by
+ *             the LLDD.
+ *
+ * Fields with dynamic values. Values may change base on link state. LLDD
+ * may reference fields directly to change them. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ * @port_state:   Operational state of the port.
+ */
+struct nvmet_fc_target_port {
+	/* static/read-only fields */
+	u32 port_num;
+	u64 node_name;
+	u64 port_name;
+
+	void *private;
+
+	/* dynamic fields */
+	u32 port_id;
+	enum nvme_fc_obj_state port_state;
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+/**
+ * struct nvmet_fc_target_template - structure containing static entrypoints
+ *                 and operational parameters for an LLDD that supports NVME
+ *                 subsystem behavior. Passed by reference in port
+ *                 registrations. NVME-FC transport remembers template
+ *                 reference and may access it during runtime operation.
+ *
+ * Subsystem/Target Transport Entrypoints/Parameters:
+ *
+ * @targetport_delete:  The LLDD initiates deletion of a targetport via
+ *       nvmet_fc_unregister_targetport(). However, the teardown is
+ *       asynchronous. This routine is called upon the completion of the
+ *       teardown to inform the LLDD that the targetport has been deleted.
+ *       Entrypoint is Mandatory.
+ *
+ * @xmt_ls_rsp:  Called to transmit the response to a FC-NVME FC-4 LS service.
+ *       The nvmefc_tgt_ls_req structure is the same LLDD-supplied exchange
+ *       structure specified in the nvmet_fc_rcv_ls_req() call made when
+ *       the LS request was received.  The structure will fully describe
+ *       the buffers for the response payload and the dma address of the
+ *       payload. The LLDD is to transmit the response (or return a non-zero
+ *       errno status), and upon completion of the transmit, call the
+ *       "done" routine specified in the nvmefc_tgt_ls_req structure
+ *       (argument to done is the ls reqwuest structure itself).
+ *       After calling the done routine, the LLDD shall consider the
+ *       LS handling complete and the nvmefc_tgt_ls_req structure may
+ *       be freed/released.
+ *       Entrypoint is Mandatory.
+ *
+ * @fcp_op:  Called to perform a data transfer, transmit a response, or
+ *       abort an FCP opertion. The nvmefc_tgt_fcp_req structure is the same
+ *       LLDD-supplied exchange structure specified in the
+ *       nvmet_fc_rcv_fcp_req() call made when the FCP CMD IU was received.
+ *       The op field in the structure shall indicate the operation for
+ *       the LLDD to perform relative to the io.
+ *         NVMET_FCOP_READDATA operation: the LLDD is to send the
+ *           payload data (described by sglist) to the host in 1 or
+ *           more FC sequences (preferrably 1).  Note: the fc-nvme layer
+ *           may call the READDATA operation multiple times for longer
+ *           payloads.
+ *         NVMET_FCOP_WRITEDATA operation: the LLDD is to receive the
+ *           payload data (described by sglist) from the host via 1 or
+ *           more FC sequences (preferrably 1). The LLDD is to generate
+ *           the XFER_RDY IU(s) corresponding to the data being requested.
+ *           Note: the FC-NVME layer may call the WRITEDATA operation
+ *           multiple times for longer payloads.
+ *         NVMET_FCOP_READDATA_RSP operation: the LLDD is to send the
+ *           payload data (described by sglist) to the host in 1 or
+ *           more FC sequences (preferrably 1). If an error occurs during
+ *           payload data transmission, the LLDD is to set the
+ *           nvmefc_tgt_fcp_req fcp_error and transferred_length field, then
+ *           consider the operation complete. On error, the LLDD is to not
+ *           transmit the FCP_RSP iu. If all payload data is transferred
+ *           successfully, the LLDD is to update the nvmefc_tgt_fcp_req
+ *           transferred_length field and may subsequently transmit the
+ *           FCP_RSP iu payload (described by rspbuf, rspdma, rsplen).
+ *           The LLDD is to await FCP_CONF reception to confirm the RSP
+ *           reception by the host. The LLDD may retramsit the FCP_RSP iu
+ *           if necessary per FC-NVME. Upon reception of FCP_CONF, or upon
+ *           FCP_CONF failure, the LLDD is to set the nvmefc_tgt_fcp_req
+ *           fcp_error field and consider the operation complete..
+ *         NVMET_FCOP_RSP: the LLDD is to transmit the FCP_RSP iu payload
+ *           (described by rspbuf, rspdma, rsplen).  The LLDD is to await
+ *           FCP_CONF reception to confirm the RSP reception by the host.
+ *           The LLDD may retramsit the FCP_RSP iu if necessary per FC-NVME.
+ *           Upon reception of FCP_CONF, or upon FCP_CONF failure, the
+ *           LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and
+ *           consider the operation complete..
+ *         NVMET_FCOP_ABORT: the LLDD is to terminate the exchange
+ *           corresponding to the fcp operation. The LLDD shall send
+ *           ABTS and follow FC exchange abort-multi rules, including
+ *           ABTS retries and possible logout.
+ *       Upon completing the indicated operation, the LLDD is to set the
+ *       status fields for the operation (tranferred_length and fcp_error
+ *       status) in the request, then all the "done" routine
+ *       indicated in the fcp request.  Upon return from the "done"
+ *       routine for either a NVMET_FCOP_RSP or NVMET_FCOP_ABORT operation
+ *       the fc-nvme layer will not longer reference the fcp request,
+ *       allowing the LLDD to free/release the fcp request.
+ *       Note: when calling the done routine for READDATA or WRITEDATA
+ *       operations, the fc-nvme layer may immediate convert, in the same
+ *       thread and before returning to the LLDD, the fcp operation to
+ *       the next operation for the fcp io and call the LLDDs fcp_op
+ *       call again. If fields in the fcp request are to be accessed post
+ *       the done call, the LLDD should save their values prior to calling
+ *       the done routine, and inspect the save values after the done
+ *       routine.
+ *       Returns 0 on success, -<errno> on failure (Ex: -EIO)
+ *       Entrypoint is Mandatory.
+ *
+ * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
+ *       supports for cpu affinitization.
+ *       Value is Mandatory. Must be at least 1.
+ *
+ * @max_sgl_segments:  indicates the maximum number of sgl segments supported
+ *       by the LLDD
+ *       Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @max_dif_sgl_segments:  indicates the maximum number of sgl segments
+ *       supported by the LLDD for DIF operations.
+ *       Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @dma_boundary:  indicates the dma address boundary where dma mappings
+ *       will be split across.
+ *       Value is Mandatory. Typical value is 0xFFFFFFFF to split across
+ *       4Gig address boundarys
+ *
+ * @target_features: The LLDD sets bits in this field to correspond to
+ *       optional features that are supported by the LLDD.
+ *       Refer to the NVMET_FCTGTFEAT_xxx values.
+ *       Value is Mandatory. Allowed to be zero.
+ *
+ * @target_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a targetport is allocated.  The additional memory
+ *       area solely for the of the LLDD and its location is specified by
+ *       the targetport->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ */
+struct nvmet_fc_target_template {
+	void (*targetport_delete)(struct nvmet_fc_target_port *tgtport);
+	int (*xmt_ls_rsp)(struct nvmet_fc_target_port *tgtport,
+				struct nvmefc_tgt_ls_req *tls_req);
+	int (*fcp_op)(struct nvmet_fc_target_port *tgtport,
+				struct nvmefc_tgt_fcp_req *);
+
+	u32	max_hw_queues;
+	u16	max_sgl_segments;
+	u16	max_dif_sgl_segments;
+	u64	dma_boundary;
+
+	u32	target_features;
+
+	u32	target_priv_sz;
+};
+
+
+int nvmet_fc_register_targetport(struct nvmet_fc_port_info *portinfo,
+			struct nvmet_fc_target_template *template,
+			struct device *dev,
+			struct nvmet_fc_target_port **tgtport_p);
+
+int nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *tgtport);
+
+int nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *tgtport,
+			struct nvmefc_tgt_ls_req *lsreq,
+			void *lsreqbuf, u32 lsreqbuf_len);
+
+int nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *tgtport,
+			struct nvmefc_tgt_fcp_req *fcpreq,
+			void *cmdiubuf, u32 cmdiubuf_len);
+
+#endif /* _NVME_FC_DRIVER_H */
diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
new file mode 100644
index 000000000000..4b45226bd604
--- /dev/null
+++ b/include/linux/nvme-fc.h
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2016 Avago Technologies.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful.
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
+ * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO
+ * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID.
+ * See the GNU General Public License for more details, a copy of which
+ * can be found in the file COPYING included with this package
+ *
+ */
+
+/*
+ * This file contains definitions relative to FC-NVME r1.11 and a few
+ * newer items
+ */
+
+#ifndef _NVME_FC_H
+#define _NVME_FC_H 1
+
+
+#define NVME_CMD_SCSI_ID		0xFD
+#define NVME_CMD_FC_ID			FC_TYPE_NVME
+
+/* FC-NVME Cmd IU Flags */
+#define FCNVME_CMD_FLAGS_DIRMASK	0x03
+#define FCNVME_CMD_FLAGS_WRITE		0x01
+#define FCNVME_CMD_FLAGS_READ		0x02
+
+struct nvme_fc_cmd_iu {
+	__u8			scsi_id;
+	__u8			fc_id;
+	__be16			iu_len;
+	__u8			rsvd4[3];
+	__u8			flags;
+	__be64			connection_id;
+	__be32			csn;
+	__be32			data_len;
+	struct nvme_command	sqe;
+	__be32			rsvd88[2];
+};
+
+#define NVME_FC_SIZEOF_ZEROS_RSP	12
+
+struct nvme_fc_ersp_iu {
+	__u8			rsvd0[2];
+	__be16			iu_len;
+	__be32			rsn;
+	__be32			xfrd_len;
+	__be32			rsvd12;
+	struct nvme_completion	cqe;
+	/* for now - no additional payload */
+};
+
+
+/* FC-NVME r1.03/16-119v0 NVME Link Services */
+enum {
+	FCNVME_LS_RSVD			= 0,
+	FCNVME_LS_RJT			= 1,
+	FCNVME_LS_ACC			= 2,
+	FCNVME_LS_CREATE_ASSOCIATION	= 3,
+	FCNVME_LS_CREATE_CONNECTION	= 4,
+	FCNVME_LS_DISCONNECT		= 5,
+};
+
+/* FC-NVME r1.03/16-119v0 NVME Link Service Descriptors */
+enum {
+	FCNVME_LSDESC_RSVD		= 0x0,
+	FCNVME_LSDESC_RQST		= 0x1,
+	FCNVME_LSDESC_RJT		= 0x2,
+	FCNVME_LSDESC_CREATE_ASSOC_CMD	= 0x3,
+	FCNVME_LSDESC_CREATE_CONN_CMD	= 0x4,
+	FCNVME_LSDESC_DISCONN_CMD	= 0x5,
+	FCNVME_LSDESC_CONN_ID		= 0x6,
+	FCNVME_LSDESC_ASSOC_ID		= 0x7,
+};
+
+
+/* ********** start of Link Service Descriptors ********** */
+
+
+/*
+ * fills in length of a descriptor. Struture minus descriptor header
+ */
+static inline __be32 fcnvme_lsdesc_len(size_t sz)
+{
+	return cpu_to_be32(sz - (2 * sizeof(u32)));
+}
+
+
+struct fcnvme_ls_rqst_w0 {
+	u8	ls_cmd;			/* FCNVME_LS_xxx */
+	u8	zeros[3];
+};
+
+/* FCNVME_LSDESC_RQST */
+struct fcnvme_lsdesc_rqst {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	struct fcnvme_ls_rqst_w0	w0;
+	__be32	rsvd12;
+};
+
+
+
+
+/* FCNVME_LSDESC_RJT */
+struct fcnvme_lsdesc_rjt {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	u8	rsvd8;
+
+	/*
+	 * Reject reason and explanaction codes are generic
+	 * to ELs's from LS-3.
+	 */
+	u8	reason_code;
+	u8	reason_explanation;
+
+	u8	vendor;
+	__be32	rsvd12;
+};
+
+
+#define FCNVME_ASSOC_HOSTID_LEN		64
+#define FCNVME_ASSOC_HOSTNQN_LEN	256
+#define FCNVME_ASSOC_SUBNQN_LEN		256
+
+/* FCNVME_LSDESC_CREATE_ASSOC_CMD */
+struct fcnvme_lsdesc_cr_assoc_cmd {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	__be16	ersp_ratio;
+	__be16	rsvd10;
+	__be32	rsvd12[9];
+	__be16	cntlid;
+	__be16	sqsize;
+	__be32	rsvd52;
+	u8	hostid[FCNVME_ASSOC_HOSTID_LEN];
+	u8	hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
+	u8	subnqn[FCNVME_ASSOC_SUBNQN_LEN];
+	u8	rsvd632[384];
+};
+
+/* FCNVME_LSDESC_CREATE_CONN_CMD */
+struct fcnvme_lsdesc_cr_conn_cmd {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	__be16	ersp_ratio;
+	__be16	rsvd10;
+	__be32	rsvd12[9];
+	__be16	qid;
+	__be16	sqsize;
+	__be32  rsvd52;
+};
+
+/* Disconnect Scope Values */
+enum {
+	FCNVME_DISCONN_ASSOCIATION	= 0,
+	FCNVME_DISCONN_CONNECTION	= 1,
+};
+
+/* FCNVME_LSDESC_DISCONN_CMD */
+struct fcnvme_lsdesc_disconn_cmd {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	u8	rsvd8[3];
+	/* note: scope is really a 1 bit field */
+	u8	scope;			/* FCNVME_DISCONN_xxx */
+	__be32	rsvd12;
+	__be64	id;
+};
+
+/* FCNVME_LSDESC_CONN_ID */
+struct fcnvme_lsdesc_conn_id {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	__be64	connection_id;
+};
+
+/* FCNVME_LSDESC_ASSOC_ID */
+struct fcnvme_lsdesc_assoc_id {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	__be64	association_id;
+};
+
+/* r_ctl values */
+enum {
+	FCNVME_RS_RCTL_DATA		= 1,
+	FCNVME_RS_RCTL_XFER_RDY		= 5,
+	FCNVME_RS_RCTL_RSP		= 8,
+};
+
+
+/* ********** start of Link Services ********** */
+
+
+/* FCNVME_LS_RJT */
+struct fcnvme_ls_rjt {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_rqst		rqst;
+	struct fcnvme_lsdesc_rjt		rjt;
+};
+
+/* FCNVME_LS_ACC */
+struct fcnvme_ls_acc_hdr {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_rqst		rqst;
+	/* Followed by cmd-specific ACC descriptors, see next definitions */
+};
+
+/* FCNVME_LS_CREATE_ASSOCIATION */
+struct fcnvme_ls_cr_assoc_rqst {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_cr_assoc_cmd	assoc_cmd;
+};
+
+struct fcnvme_ls_cr_assoc_acc {
+	struct fcnvme_ls_acc_hdr		hdr;
+	struct fcnvme_lsdesc_assoc_id		associd;
+	struct fcnvme_lsdesc_conn_id		connectid;
+};
+
+
+/* FCNVME_LS_CREATE_CONNECTION */
+struct fcnvme_ls_cr_conn_rqst {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_assoc_id		associd;
+	struct fcnvme_lsdesc_cr_conn_cmd	connect_cmd;
+};
+
+struct fcnvme_ls_cr_conn_acc {
+	struct fcnvme_ls_acc_hdr		hdr;
+	struct fcnvme_lsdesc_conn_id		connectid;
+};
+
+/* FCNVME_LS_DISCONNECT */
+struct fcnvme_ls_disconnect_rqst {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_assoc_id		associd;
+	struct fcnvme_lsdesc_disconn_cmd	discon_cmd;
+};
+
+struct fcnvme_ls_disconnect_acc {
+	struct fcnvme_ls_acc_hdr		hdr;
+};
+
+
+/*
+ * Yet to be defined in FC-NVME:
+ */
+#define NVME_FC_CONNECT_TIMEOUT_SEC	2		/* 2 seconds */
+#define NVME_FC_LS_TIMEOUT_SEC		2		/* 2 seconds */
+#define NVME_FC_TGTOP_TIMEOUT_SEC	2		/* 2 seconds */
+
+
+#endif /* _NVME_FC_H */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index fc3c24206593..3d1c6f1b15c9 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -242,6 +242,7 @@ enum {
 	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
 	NVME_CTRL_ONCS_WRITE_UNCORRECTABLE	= 1 << 1,
 	NVME_CTRL_ONCS_DSM			= 1 << 2,
+	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 };
 
@@ -558,6 +559,23 @@ struct nvme_dsm_range {
 	__le64			slba;
 };
 
+struct nvme_write_zeroes_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	union nvme_data_ptr	dptr;
+	__le64			slba;
+	__le16			length;
+	__le16			control;
+	__le32			dsmgmt;
+	__le32			reftag;
+	__le16			apptag;
+	__le16			appmask;
+};
+
 /* Admin commands */
 
 enum nvme_admin_opcode {
@@ -857,6 +875,7 @@ struct nvme_command {
 		struct nvme_download_firmware dlfw;
 		struct nvme_format_cmd format;
 		struct nvme_dsm_cmd dsm;
+		struct nvme_write_zeroes_cmd write_zeroes;
 		struct nvme_abort_cmd abort;
 		struct nvme_get_log_page_command get_log_page;
 		struct nvmf_common_command fabrics;
@@ -947,6 +966,7 @@ enum {
 	NVME_SC_BAD_ATTRIBUTES		= 0x180,
 	NVME_SC_INVALID_PI		= 0x181,
 	NVME_SC_READ_ONLY		= 0x182,
+	NVME_SC_ONCS_NOT_SUPPORTED	= 0x183,
 
 	/*
 	 * I/O Command Set Specific - Fabrics commands:
@@ -973,17 +993,30 @@ enum {
 	NVME_SC_UNWRITTEN_BLOCK		= 0x287,
 
 	NVME_SC_DNR			= 0x4000,
+
+
+	/*
+	 * FC Transport-specific error status values for NVME commands
+	 *
+	 * Transport-specific status code values must be in the range 0xB0..0xBF
+	 */
+
+	/* Generic FC failure - catchall */
+	NVME_SC_FC_TRANSPORT_ERROR	= 0x00B0,
+
+	/* I/O failure due to FC ABTS'd */
+	NVME_SC_FC_TRANSPORT_ABORTED	= 0x00B1,
 };
 
 struct nvme_completion {
 	/*
 	 * Used by Admin and Fabrics commands to return data:
 	 */
-	union {
-		__le16	result16;
-		__le32	result;
-		__le64	result64;
-	};
+	union nvme_result {
+		__le16	u16;
+		__le32	u32;
+		__le64	u64;
+	} result;
 	__le16	sq_head;	/* how much of this queue may be reclaimed */
 	__le16	sq_id;		/* submission queue that generated this entry */
 	__u16	command_id;	/* of the command which completed */
diff --git a/include/linux/parser.h b/include/linux/parser.h
index 39d5b7955b23..884c1e6eb3fe 100644
--- a/include/linux/parser.h
+++ b/include/linux/parser.h
@@ -27,6 +27,7 @@ typedef struct {
 
 int match_token(char *, const match_table_t table, substring_t args[]);
 int match_int(substring_t *, int *result);
+int match_u64(substring_t *, u64 *result);
 int match_octal(substring_t *, int *result);
 int match_hex(substring_t *, int *result);
 bool match_wildcard(const char *pattern, const char *str);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 09b212d37f1d..09f4be179ff3 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -319,6 +319,9 @@ extern int kswapd_run(int nid);
 extern void kswapd_stop(int nid);
 
 #ifdef CONFIG_SWAP
+
+#include <linux/blk_types.h> /* for bio_end_io_t */
+
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6e22b544d039..d5aba1512b8b 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -125,7 +125,7 @@ static inline bool iter_is_iovec(const struct iov_iter *i)
  *
  * The ?: is just for type safety.
  */
-#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & RW_MASK)
+#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE))
 
 /*
  * Cap the iov_iter by given limit; note that the second argument is
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 797100e10010..c78f9f0920b5 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -9,6 +9,9 @@
 #include <linux/fs.h>
 #include <linux/flex_proportions.h>
 #include <linux/backing-dev-defs.h>
+#include <linux/blk_types.h>
+
+struct bio;
 
 DECLARE_PER_CPU(int, dirty_throttle_leaks);
 
@@ -100,6 +103,16 @@ struct writeback_control {
 #endif
 };
 
+static inline int wbc_to_write_flags(struct writeback_control *wbc)
+{
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		return REQ_SYNC;
+	else if (wbc->for_kupdate || wbc->for_background)
+		return REQ_BACKGROUND;
+
+	return 0;
+}
+
 /*
  * A wb_domain represents a domain that wb's (bdi_writeback's) belong to
  * and are measured against each other in.  There always is one global
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 8a9563144890..8990e580b278 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -414,14 +414,14 @@ extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 extern int scsi_execute_req_flags(struct scsi_device *sdev,
 	const unsigned char *cmd, int data_direction, void *buffer,
 	unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,
-	int retries, int *resid, u64 flags);
+	int retries, int *resid, u64 flags, req_flags_t rq_flags);
 static inline int scsi_execute_req(struct scsi_device *sdev,
 	const unsigned char *cmd, int data_direction, void *buffer,
 	unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,
 	int retries, int *resid)
 {
 	return scsi_execute_req_flags(sdev, cmd, data_direction, buffer,
-		bufflen, sshdr, timeout, retries, resid, 0);
+		bufflen, sshdr, timeout, retries, resid, 0, 0);
 }
 extern void sdev_disable_disk_events(struct scsi_device *sdev);
 extern void sdev_enable_disk_events(struct scsi_device *sdev);
diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index d1defd1ebd95..6ba66e01f6df 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h
@@ -299,4 +299,21 @@ struct scsi_lun {
 #define SCSI_ACCESS_STATE_MASK        0x0f
 #define SCSI_ACCESS_STATE_PREFERRED   0x80
 
+/* Reporting options for REPORT ZONES */
+enum zbc_zone_reporting_options {
+	ZBC_ZONE_REPORTING_OPTION_ALL = 0,
+	ZBC_ZONE_REPORTING_OPTION_EMPTY,
+	ZBC_ZONE_REPORTING_OPTION_IMPLICIT_OPEN,
+	ZBC_ZONE_REPORTING_OPTION_EXPLICIT_OPEN,
+	ZBC_ZONE_REPORTING_OPTION_CLOSED,
+	ZBC_ZONE_REPORTING_OPTION_FULL,
+	ZBC_ZONE_REPORTING_OPTION_READONLY,
+	ZBC_ZONE_REPORTING_OPTION_OFFLINE,
+	ZBC_ZONE_REPORTING_OPTION_NEED_RESET_WP = 0x10,
+	ZBC_ZONE_REPORTING_OPTION_NON_SEQWRITE,
+	ZBC_ZONE_REPORTING_OPTION_NON_WP = 0x3f,
+};
+
+#define ZBC_REPORT_ZONE_PARTIAL 0x80
+
 #endif /* _SCSI_PROTO_H_ */
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index d336b890e31f..df3e9ae5ad8d 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -27,8 +27,7 @@ DECLARE_EVENT_CLASS(bcache_request,
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->orig_sector	= bio->bi_iter.bi_sector - 16;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
@@ -102,8 +101,7 @@ DECLARE_EVENT_CLASS(bcache_bio,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d  %s %llu + %u",
@@ -138,8 +136,7 @@ TRACE_EVENT(bcache_read,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 		__entry->cache_hit = hit;
 		__entry->bypass = bypass;
 	),
@@ -170,8 +167,7 @@ TRACE_EVENT(bcache_write,
 		__entry->inode		= inode;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 		__entry->writeback = writeback;
 		__entry->bypass = bypass;
 	),
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 8f3a163b8166..3e02e3a25413 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -84,8 +84,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
 					0 : blk_rq_sectors(rq);
 		__entry->errors    = rq->errors;
 
-		blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags,
-			      blk_rq_bytes(rq));
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
 		blk_dump_cmd(__get_str(cmd), rq);
 	),
 
@@ -163,7 +162,7 @@ TRACE_EVENT(block_rq_complete,
 		__entry->nr_sector = nr_bytes >> 9;
 		__entry->errors    = rq->errors;
 
-		blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags, nr_bytes);
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes);
 		blk_dump_cmd(__get_str(cmd), rq);
 	),
 
@@ -199,8 +198,7 @@ DECLARE_EVENT_CLASS(block_rq,
 		__entry->bytes     = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
 					blk_rq_bytes(rq) : 0;
 
-		blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags,
-			      blk_rq_bytes(rq));
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
 		blk_dump_cmd(__get_str(cmd), rq);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
@@ -274,8 +272,7 @@ TRACE_EVENT(block_bio_bounce,
 					  bio->bi_bdev->bd_dev : 0;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -313,8 +310,7 @@ TRACE_EVENT(block_bio_complete,
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
 		__entry->error		= error;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d %s %llu + %u [%d]",
@@ -341,8 +337,7 @@ DECLARE_EVENT_CLASS(block_bio_merge,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -409,8 +404,7 @@ TRACE_EVENT(block_bio_queue,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -438,7 +432,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
 		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
 		__entry->sector		= bio ? bio->bi_iter.bi_sector : 0;
 		__entry->nr_sector	= bio ? bio_sectors(bio) : 0;
-		blk_fill_rwbs(__entry->rwbs, bio ? bio_op(bio) : 0,
+		blk_fill_rwbs(__entry->rwbs,
 			      bio ? bio->bi_opf : 0, __entry->nr_sector);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
         ),
@@ -573,8 +567,7 @@ TRACE_EVENT(block_split,
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->new_sector	= new_sector;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -617,8 +610,7 @@ TRACE_EVENT(block_bio_remap,
 		__entry->nr_sector	= bio_sectors(bio);
 		__entry->old_dev	= dev;
 		__entry->old_sector	= from;
-		blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
-			      bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
@@ -664,8 +656,7 @@ TRACE_EVENT(block_rq_remap,
 		__entry->old_dev	= dev;
 		__entry->old_sector	= from;
 		__entry->nr_bios	= blk_rq_count_bios(rq);
-		blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags,
-			      blk_rq_bytes(rq));
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
 	),
 
 	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu %u",
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 903a09165bb1..5da2c829a718 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -32,7 +32,7 @@ TRACE_DEFINE_ENUM(LFS);
 TRACE_DEFINE_ENUM(SSR);
 TRACE_DEFINE_ENUM(__REQ_RAHEAD);
 TRACE_DEFINE_ENUM(__REQ_SYNC);
-TRACE_DEFINE_ENUM(__REQ_NOIDLE);
+TRACE_DEFINE_ENUM(__REQ_IDLE);
 TRACE_DEFINE_ENUM(__REQ_PREFLUSH);
 TRACE_DEFINE_ENUM(__REQ_FUA);
 TRACE_DEFINE_ENUM(__REQ_PRIO);
@@ -55,7 +55,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
 		{ IPU,		"IN-PLACE" },				\
 		{ OPU,		"OUT-OF-PLACE" })
 
-#define F2FS_BIO_FLAG_MASK(t)	(t & (REQ_RAHEAD | WRITE_FLUSH_FUA))
+#define F2FS_BIO_FLAG_MASK(t)	(t & (REQ_RAHEAD | REQ_PREFLUSH | REQ_FUA))
 #define F2FS_BIO_EXTRA_MASK(t)	(t & (REQ_META | REQ_PRIO))
 
 #define show_bio_type(op_flags)	show_bio_op_flags(op_flags), 		\
@@ -65,11 +65,9 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
 	__print_symbolic(F2FS_BIO_FLAG_MASK(flags),			\
 		{ 0,			"WRITE" },			\
 		{ REQ_RAHEAD, 		"READAHEAD" },			\
-		{ READ_SYNC, 		"READ_SYNC" },			\
-		{ WRITE_SYNC, 		"WRITE_SYNC" },			\
-		{ WRITE_FLUSH,		"WRITE_FLUSH" },		\
-		{ WRITE_FUA, 		"WRITE_FUA" },			\
-		{ WRITE_FLUSH_FUA,	"WRITE_FLUSH_FUA" })
+		{ REQ_SYNC, 		"REQ_SYNC" },			\
+		{ REQ_PREFLUSH,		"REQ_PREFLUSH" },		\
+		{ REQ_FUA,		"REQ_FUA" })
 
 #define show_bio_extra(type)						\
 	__print_symbolic(F2FS_BIO_EXTRA_MASK(type),			\
diff --git a/include/trace/events/wbt.h b/include/trace/events/wbt.h
new file mode 100644
index 000000000000..3c518e455680
--- /dev/null
+++ b/include/trace/events/wbt.h
@@ -0,0 +1,153 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM wbt
+
+#if !defined(_TRACE_WBT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WBT_H
+
+#include <linux/tracepoint.h>
+#include "../../../block/blk-wbt.h"
+
+/**
+ * wbt_stat - trace stats for blk_wb
+ * @stat: array of read/write stats
+ */
+TRACE_EVENT(wbt_stat,
+
+	TP_PROTO(struct backing_dev_info *bdi, struct blk_rq_stat *stat),
+
+	TP_ARGS(bdi, stat),
+
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+		__field(s64, rmean)
+		__field(u64, rmin)
+		__field(u64, rmax)
+		__field(s64, rnr_samples)
+		__field(s64, rtime)
+		__field(s64, wmean)
+		__field(u64, wmin)
+		__field(u64, wmax)
+		__field(s64, wnr_samples)
+		__field(s64, wtime)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		__entry->rmean		= stat[0].mean;
+		__entry->rmin		= stat[0].min;
+		__entry->rmax		= stat[0].max;
+		__entry->rnr_samples	= stat[0].nr_samples;
+		__entry->wmean		= stat[1].mean;
+		__entry->wmin		= stat[1].min;
+		__entry->wmax		= stat[1].max;
+		__entry->wnr_samples	= stat[1].nr_samples;
+	),
+
+	TP_printk("%s: rmean=%llu, rmin=%llu, rmax=%llu, rsamples=%llu, "
+		  "wmean=%llu, wmin=%llu, wmax=%llu, wsamples=%llu\n",
+		  __entry->name, __entry->rmean, __entry->rmin, __entry->rmax,
+		  __entry->rnr_samples, __entry->wmean, __entry->wmin,
+		  __entry->wmax, __entry->wnr_samples)
+);
+
+/**
+ * wbt_lat - trace latency event
+ * @lat: latency trigger
+ */
+TRACE_EVENT(wbt_lat,
+
+	TP_PROTO(struct backing_dev_info *bdi, unsigned long lat),
+
+	TP_ARGS(bdi, lat),
+
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+		__field(unsigned long, lat)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		__entry->lat = div_u64(lat, 1000);
+	),
+
+	TP_printk("%s: latency %lluus\n", __entry->name,
+			(unsigned long long) __entry->lat)
+);
+
+/**
+ * wbt_step - trace wb event step
+ * @msg: context message
+ * @step: the current scale step count
+ * @window: the current monitoring window
+ * @bg: the current background queue limit
+ * @normal: the current normal writeback limit
+ * @max: the current max throughput writeback limit
+ */
+TRACE_EVENT(wbt_step,
+
+	TP_PROTO(struct backing_dev_info *bdi, const char *msg,
+		 int step, unsigned long window, unsigned int bg,
+		 unsigned int normal, unsigned int max),
+
+	TP_ARGS(bdi, msg, step, window, bg, normal, max),
+
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+		__field(const char *, msg)
+		__field(int, step)
+		__field(unsigned long, window)
+		__field(unsigned int, bg)
+		__field(unsigned int, normal)
+		__field(unsigned int, max)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		__entry->msg	= msg;
+		__entry->step	= step;
+		__entry->window	= div_u64(window, 1000);
+		__entry->bg	= bg;
+		__entry->normal	= normal;
+		__entry->max	= max;
+	),
+
+	TP_printk("%s: %s: step=%d, window=%luus, background=%u, normal=%u, max=%u\n",
+		  __entry->name, __entry->msg, __entry->step, __entry->window,
+		  __entry->bg, __entry->normal, __entry->max)
+);
+
+/**
+ * wbt_timer - trace wb timer event
+ * @status: timer state status
+ * @step: the current scale step count
+ * @inflight: tracked writes inflight
+ */
+TRACE_EVENT(wbt_timer,
+
+	TP_PROTO(struct backing_dev_info *bdi, unsigned int status,
+		 int step, unsigned int inflight),
+
+	TP_ARGS(bdi, status, step, inflight),
+
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+		__field(unsigned int, status)
+		__field(int, step)
+		__field(unsigned int, inflight)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		__entry->status		= status;
+		__entry->step		= step;
+		__entry->inflight	= inflight;
+	),
+
+	TP_printk("%s: status=%u, step=%d, inflight=%u\n", __entry->name,
+		  __entry->status, __entry->step, __entry->inflight)
+);
+
+#endif /* _TRACE_WBT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 9838a5c7b3e7..bc2ef9fef7c8 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -70,6 +70,7 @@ header-y += bfs_fs.h
 header-y += binfmts.h
 header-y += blkpg.h
 header-y += blktrace_api.h
+header-y += blkzoned.h
 header-y += bpf_common.h
 header-y += bpf_perf_event.h
 header-y += bpf.h
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
new file mode 100644
index 000000000000..40d1d7bff537
--- /dev/null
+++ b/include/uapi/linux/blkzoned.h
@@ -0,0 +1,143 @@
+/*
+ * Zoned block devices handling.
+ *
+ * Copyright (C) 2015 Seagate Technology PLC
+ *
+ * Written by: Shaun Tancheff <shaun.tancheff@seagate.com>
+ *
+ * Modified by: Damien Le Moal <damien.lemoal@hgst.com>
+ * Copyright (C) 2016 Western Digital
+ *
+ * This file is licensed under  the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#ifndef _UAPI_BLKZONED_H
+#define _UAPI_BLKZONED_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/**
+ * enum blk_zone_type - Types of zones allowed in a zoned device.
+ *
+ * @BLK_ZONE_TYPE_CONVENTIONAL: The zone has no write pointer and can be writen
+ *                              randomly. Zone reset has no effect on the zone.
+ * @BLK_ZONE_TYPE_SEQWRITE_REQ: The zone must be written sequentially
+ * @BLK_ZONE_TYPE_SEQWRITE_PREF: The zone can be written non-sequentially
+ *
+ * Any other value not defined is reserved and must be considered as invalid.
+ */
+enum blk_zone_type {
+	BLK_ZONE_TYPE_CONVENTIONAL	= 0x1,
+	BLK_ZONE_TYPE_SEQWRITE_REQ	= 0x2,
+	BLK_ZONE_TYPE_SEQWRITE_PREF	= 0x3,
+};
+
+/**
+ * enum blk_zone_cond - Condition [state] of a zone in a zoned device.
+ *
+ * @BLK_ZONE_COND_NOT_WP: The zone has no write pointer, it is conventional.
+ * @BLK_ZONE_COND_EMPTY: The zone is empty.
+ * @BLK_ZONE_COND_IMP_OPEN: The zone is open, but not explicitly opened.
+ * @BLK_ZONE_COND_EXP_OPEN: The zones was explicitly opened by an
+ *                          OPEN ZONE command.
+ * @BLK_ZONE_COND_CLOSED: The zone was [explicitly] closed after writing.
+ * @BLK_ZONE_COND_FULL: The zone is marked as full, possibly by a zone
+ *                      FINISH ZONE command.
+ * @BLK_ZONE_COND_READONLY: The zone is read-only.
+ * @BLK_ZONE_COND_OFFLINE: The zone is offline (sectors cannot be read/written).
+ *
+ * The Zone Condition state machine in the ZBC/ZAC standards maps the above
+ * deinitions as:
+ *   - ZC1: Empty         | BLK_ZONE_EMPTY
+ *   - ZC2: Implicit Open | BLK_ZONE_COND_IMP_OPEN
+ *   - ZC3: Explicit Open | BLK_ZONE_COND_EXP_OPEN
+ *   - ZC4: Closed        | BLK_ZONE_CLOSED
+ *   - ZC5: Full          | BLK_ZONE_FULL
+ *   - ZC6: Read Only     | BLK_ZONE_READONLY
+ *   - ZC7: Offline       | BLK_ZONE_OFFLINE
+ *
+ * Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should
+ * be considered invalid.
+ */
+enum blk_zone_cond {
+	BLK_ZONE_COND_NOT_WP	= 0x0,
+	BLK_ZONE_COND_EMPTY	= 0x1,
+	BLK_ZONE_COND_IMP_OPEN	= 0x2,
+	BLK_ZONE_COND_EXP_OPEN	= 0x3,
+	BLK_ZONE_COND_CLOSED	= 0x4,
+	BLK_ZONE_COND_READONLY	= 0xD,
+	BLK_ZONE_COND_FULL	= 0xE,
+	BLK_ZONE_COND_OFFLINE	= 0xF,
+};
+
+/**
+ * struct blk_zone - Zone descriptor for BLKREPORTZONE ioctl.
+ *
+ * @start: Zone start in 512 B sector units
+ * @len: Zone length in 512 B sector units
+ * @wp: Zone write pointer location in 512 B sector units
+ * @type: see enum blk_zone_type for possible values
+ * @cond: see enum blk_zone_cond for possible values
+ * @non_seq: Flag indicating that the zone is using non-sequential resources
+ *           (for host-aware zoned block devices only).
+ * @reset: Flag indicating that a zone reset is recommended.
+ * @reserved: Padding to 64 B to match the ZBC/ZAC defined zone descriptor size.
+ *
+ * start, len and wp use the regular 512 B sector unit, regardless of the
+ * device logical block size. The overall structure size is 64 B to match the
+ * ZBC/ZAC defined zone descriptor and allow support for future additional
+ * zone information.
+ */
+struct blk_zone {
+	__u64	start;		/* Zone start sector */
+	__u64	len;		/* Zone length in number of sectors */
+	__u64	wp;		/* Zone write pointer position */
+	__u8	type;		/* Zone type */
+	__u8	cond;		/* Zone condition */
+	__u8	non_seq;	/* Non-sequential write resources active */
+	__u8	reset;		/* Reset write pointer recommended */
+	__u8	reserved[36];
+};
+
+/**
+ * struct blk_zone_report - BLKREPORTZONE ioctl request/reply
+ *
+ * @sector: starting sector of report
+ * @nr_zones: IN maximum / OUT actual
+ * @reserved: padding to 16 byte alignment
+ * @zones: Space to hold @nr_zones @zones entries on reply.
+ *
+ * The array of at most @nr_zones must follow this structure in memory.
+ */
+struct blk_zone_report {
+	__u64		sector;
+	__u32		nr_zones;
+	__u8		reserved[4];
+	struct blk_zone zones[0];
+} __packed;
+
+/**
+ * struct blk_zone_range - BLKRESETZONE ioctl request
+ * @sector: starting sector of the first zone to issue reset write pointer
+ * @nr_sectors: Total number of sectors of 1 or more zones to reset
+ */
+struct blk_zone_range {
+	__u64		sector;
+	__u64		nr_sectors;
+};
+
+/**
+ * Zoned block device ioctl's:
+ *
+ * @BLKREPORTZONE: Get zone information. Takes a zone report as argument.
+ *                 The zone report will start from the zone containing the
+ *                 sector specified in the report request structure.
+ * @BLKRESETZONE: Reset the write pointer of the zones in the specified
+ *                sector range. The sector range must be zone aligned.
+ */
+#define BLKREPORTZONE	_IOWR(0x12, 130, struct blk_zone_report)
+#define BLKRESETZONE	_IOW(0x12, 131, struct blk_zone_range)
+
+#endif /* _UAPI_BLKZONED_H */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index acb2b6152ba0..c1d11df07b28 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -225,6 +225,10 @@ struct fsxattr {
 #define BLKSECDISCARD _IO(0x12,125)
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
+/*
+ * A jump here: 130-131 are reserved for zoned block devices
+ * (see uapi/linux/blkzoned.h)
+ */
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h
index e08e413d5f71..c91c642ea900 100644
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h
@@ -38,11 +38,12 @@ enum {
 };
 
 /* values for flags field */
-#define NBD_FLAG_HAS_FLAGS    (1 << 0) /* nbd-server supports flags */
-#define NBD_FLAG_READ_ONLY    (1 << 1) /* device is read-only */
-#define NBD_FLAG_SEND_FLUSH   (1 << 2) /* can flush writeback cache */
+#define NBD_FLAG_HAS_FLAGS	(1 << 0) /* nbd-server supports flags */
+#define NBD_FLAG_READ_ONLY	(1 << 1) /* device is read-only */
+#define NBD_FLAG_SEND_FLUSH	(1 << 2) /* can flush writeback cache */
 /* there is a gap here to match userspace */
-#define NBD_FLAG_SEND_TRIM    (1 << 5) /* send trim/discard */
+#define NBD_FLAG_SEND_TRIM	(1 << 5) /* send trim/discard */
+#define NBD_FLAG_CAN_MULTI_CONN	(1 << 8)	/* Server supports multiple connections per export. */
 
 /* userspace doesn't need the nbd_device structure */
 
diff --git a/include/uapi/scsi/fc/fc_fs.h b/include/uapi/scsi/fc/fc_fs.h
index 50f28b143451..dcf314dc2a27 100644
--- a/include/uapi/scsi/fc/fc_fs.h
+++ b/include/uapi/scsi/fc/fc_fs.h
@@ -190,6 +190,7 @@ enum fc_fh_type {
 	FC_TYPE_FCP =	0x08,	/* SCSI FCP */
 	FC_TYPE_CT =	0x20,	/* Fibre Channel Services (FC-CT) */
 	FC_TYPE_ILS =	0x22,	/* internal link service */
+	FC_TYPE_NVME =	0x28,	/* FC-NVME */
 };
 
 /*
@@ -203,6 +204,7 @@ enum fc_fh_type {
 	[FC_TYPE_FCP] =		"FCP",			\
 	[FC_TYPE_CT] =		"CT",			\
 	[FC_TYPE_ILS] =		"ILS",			\
+	[FC_TYPE_NVME] =	"NVME",			\
 }
 
 /*