Merge tag 'for-5.9/drivers-20200803' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe: - NVMe: - ZNS support (Aravind, Keith, Matias, Niklas) - Misc cleanups, optimizations, fixes (Baolin, Chaitanya, David, Dongli, Max, Sagi) - null_blk zone capacity support (Aravind) - MD: - raid5/6 fixes (ChangSyun) - Warning fixes (Damien) - raid5 stripe fixes (Guoqing, Song, Yufen) - sysfs deadlock fix (Junxiao) - raid10 deadlock fix (Vitaly) - struct_size conversions (Gustavo) - Set of bcache updates/fixes (Coly) * tag 'for-5.9/drivers-20200803' of git://git.kernel.dk/linux-block: (117 commits) md/raid5: Allow degraded raid6 to do rmw md/raid5: Fix Force reconstruct-write io stuck in degraded raid5 raid5: don't duplicate code for different paths in handle_stripe raid5-cache: hold spinlock instead of mutex in r5c_journal_mode_show md: print errno in super_written md/raid5: remove the redundant setting of STRIPE_HANDLE md: register new md sysfs file 'uuid' read-only md: fix max sectors calculation for super 1.0 nvme-loop: remove extra variable in create ctrl nvme-loop: set ctrl state connecting after init nvme-multipath: do not fall back to __nvme_find_path() for non-optimized paths nvme-multipath: fix logic for non-optimized paths nvme-rdma: fix controller reset hang during traffic nvme-tcp: fix controller reset hang during traffic nvmet: introduce the passthru Kconfig option nvmet: introduce the passthru configfs interface nvmet: Add passthru enable/disable helpers nvmet: add passthru code to process commands nvme: export nvme_find_get_ns() and nvme_put_ns() nvme: introduce nvme_ctrl_get_by_path() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2020-08-05 10:51:40 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2020-08-05 10:51:40 -0700
commit: e0fc99e21e6e299673f1640105ac0c1d829c2d93 (patch)
tree: 4637b171164c3e653e9002b2bc962b32f05b5a07 /drivers/nvme/target
parent: 4834ce9d8e074bb7ae197632e0708219b9f389b5 (diff)
parent: f59589fc89665102923725e80e12f782d5f74f67 (diff)
13 files changed, 815 insertions, 129 deletions
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 4474952d64c6..8056955e652c 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -16,6 +16,18 @@ config NVME_TARGET
 	  To configure the NVMe target you probably want to use the nvmetcli
 	  tool from http://git.infradead.org/users/hch/nvmetcli.git.
 
+config NVME_TARGET_PASSTHRU
+	bool "NVMe Target Passthrough support"
+	depends on NVME_TARGET
+	depends on NVME_CORE=y || NVME_CORE=NVME_TARGET
+	help
+	  This enables target side NVMe passthru controller support for the
+	  NVMe Over Fabrics protocol. It allows for hosts to manage and
+	  directly access an actual NVMe controller residing on the target
+	  side, incuding executing Vendor Unique Commands.
+
+	  If unsure, say N.
+
 config NVME_TARGET_LOOP
 	tristate "NVMe loopback device support"
 	depends on NVME_TARGET
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index 2b33836f3d3e..ebf91fc4c72e 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_NVME_TARGET_TCP)		+= nvmet-tcp.o
 
 nvmet-y		+= core.o configfs.o admin-cmd.o fabrics-cmd.o \
 			discovery.o io-cmd-file.o io-cmd-bdev.o
+nvmet-$(CONFIG_NVME_TARGET_PASSTHRU)	+= passthru.o
 nvme-loop-y	+= loop.o
 nvmet-rdma-y	+= rdma.o
 nvmet-fc-y	+= fc.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 1db8c0498668..e9fe91786bbb 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -113,11 +113,10 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
 	u64 data_units_read = 0, data_units_written = 0;
 	struct nvmet_ns *ns;
 	struct nvmet_ctrl *ctrl;
+	unsigned long idx;
 
 	ctrl = req->sq->ctrl;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+	xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
 		/* we don't have the right data for file backed ns */
 		if (!ns->bdev)
 			continue;
@@ -127,9 +126,7 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
 		host_writes += part_stat_read(ns->bdev->bd_part, ios[WRITE]);
 		data_units_written += DIV_ROUND_UP(
 			part_stat_read(ns->bdev->bd_part, sectors[WRITE]), 1000);
-
 	}
-	rcu_read_unlock();
 
 	put_unaligned_le64(host_reads, &slog->host_reads[0]);
 	put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
@@ -230,14 +227,13 @@ static u32 nvmet_format_ana_group(struct nvmet_req *req, u32 grpid,
 {
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvmet_ns *ns;
+	unsigned long idx;
 	u32 count = 0;
 
 	if (!(req->cmd->get_log_page.lsp & NVME_ANA_LOG_RGO)) {
-		rcu_read_lock();
-		list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
+		xa_for_each(&ctrl->subsys->namespaces, idx, ns)
 			if (ns->anagrpid == grpid)
 				desc->nsids[count++] = cpu_to_le32(ns->nsid);
-		rcu_read_unlock();
 	}
 
 	desc->grpid = cpu_to_le32(grpid);
@@ -427,7 +423,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	id->awupf = 0;
 
 	id->sgls = cpu_to_le32(1 << 0);	/* we always support SGLs */
-	if (ctrl->ops->has_keyed_sgls)
+	if (ctrl->ops->flags & NVMF_KEYED_SGLS)
 		id->sgls |= cpu_to_le32(1 << 2);
 	if (req->port->inline_data_size)
 		id->sgls |= cpu_to_le32(1 << 20);
@@ -556,6 +552,7 @@ static void nvmet_execute_identify_nslist(struct nvmet_req *req)
 	static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvmet_ns *ns;
+	unsigned long idx;
 	u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
 	__le32 *list;
 	u16 status = 0;
@@ -567,15 +564,13 @@ static void nvmet_execute_identify_nslist(struct nvmet_req *req)
 		goto out;
 	}
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+	xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
 		if (ns->nsid <= min_nsid)
 			continue;
 		list[i++] = cpu_to_le32(ns->nsid);
 		if (i == buf_size / sizeof(__le32))
 			break;
 	}
-	rcu_read_unlock();
 
 	status = nvmet_copy_to_sgl(req, 0, list, buf_size);
 
@@ -754,7 +749,7 @@ u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask)
 	return 0;
 }
 
-static void nvmet_execute_set_features(struct nvmet_req *req)
+void nvmet_execute_set_features(struct nvmet_req *req)
 {
 	struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
 	u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
@@ -829,7 +824,7 @@ void nvmet_get_feat_async_event(struct nvmet_req *req)
 	nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled));
 }
 
-static void nvmet_execute_get_features(struct nvmet_req *req)
+void nvmet_execute_get_features(struct nvmet_req *req)
 {
 	struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
 	u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
@@ -945,6 +940,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
 	if (unlikely(ret))
 		return ret;
 
+	if (nvmet_req_passthru_ctrl(req))
+		return nvmet_parse_passthru_admin_cmd(req);
+
 	switch (cmd->common.opcode) {
 	case nvme_admin_get_log_page:
 		req->execute = nvmet_execute_get_log_page;
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 419e0d4ce79b..74b2b61c773b 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -666,6 +666,103 @@ static const struct config_item_type nvmet_namespaces_type = {
 	.ct_owner		= THIS_MODULE,
 };
 
+#ifdef CONFIG_NVME_TARGET_PASSTHRU
+
+static ssize_t nvmet_passthru_device_path_show(struct config_item *item,
+		char *page)
+{
+	struct nvmet_subsys *subsys = to_subsys(item->ci_parent);
+
+	return snprintf(page, PAGE_SIZE, "%s\n", subsys->passthru_ctrl_path);
+}
+
+static ssize_t nvmet_passthru_device_path_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_subsys *subsys = to_subsys(item->ci_parent);
+	size_t len;
+	int ret;
+
+	mutex_lock(&subsys->lock);
+
+	ret = -EBUSY;
+	if (subsys->passthru_ctrl)
+		goto out_unlock;
+
+	ret = -EINVAL;
+	len = strcspn(page, "\n");
+	if (!len)
+		goto out_unlock;
+
+	kfree(subsys->passthru_ctrl_path);
+	ret = -ENOMEM;
+	subsys->passthru_ctrl_path = kstrndup(page, len, GFP_KERNEL);
+	if (!subsys->passthru_ctrl_path)
+		goto out_unlock;
+
+	mutex_unlock(&subsys->lock);
+
+	return count;
+out_unlock:
+	mutex_unlock(&subsys->lock);
+	return ret;
+}
+CONFIGFS_ATTR(nvmet_passthru_, device_path);
+
+static ssize_t nvmet_passthru_enable_show(struct config_item *item,
+		char *page)
+{
+	struct nvmet_subsys *subsys = to_subsys(item->ci_parent);
+
+	return sprintf(page, "%d\n", subsys->passthru_ctrl ? 1 : 0);
+}
+
+static ssize_t nvmet_passthru_enable_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_subsys *subsys = to_subsys(item->ci_parent);
+	bool enable;
+	int ret = 0;
+
+	if (strtobool(page, &enable))
+		return -EINVAL;
+
+	if (enable)
+		ret = nvmet_passthru_ctrl_enable(subsys);
+	else
+		nvmet_passthru_ctrl_disable(subsys);
+
+	return ret ? ret : count;
+}
+CONFIGFS_ATTR(nvmet_passthru_, enable);
+
+static struct configfs_attribute *nvmet_passthru_attrs[] = {
+	&nvmet_passthru_attr_device_path,
+	&nvmet_passthru_attr_enable,
+	NULL,
+};
+
+static const struct config_item_type nvmet_passthru_type = {
+	.ct_attrs		= nvmet_passthru_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+static void nvmet_add_passthru_group(struct nvmet_subsys *subsys)
+{
+	config_group_init_type_name(&subsys->passthru_group,
+				    "passthru", &nvmet_passthru_type);
+	configfs_add_default_group(&subsys->passthru_group,
+				   &subsys->group);
+}
+
+#else /* CONFIG_NVME_TARGET_PASSTHRU */
+
+static void nvmet_add_passthru_group(struct nvmet_subsys *subsys)
+{
+}
+
+#endif /* CONFIG_NVME_TARGET_PASSTHRU */
+
 static int nvmet_port_subsys_allow_link(struct config_item *parent,
 		struct config_item *target)
 {
@@ -862,14 +959,14 @@ static ssize_t nvmet_subsys_attr_version_show(struct config_item *item,
 	struct nvmet_subsys *subsys = to_subsys(item);
 
 	if (NVME_TERTIARY(subsys->ver))
-		return snprintf(page, PAGE_SIZE, "%d.%d.%d\n",
-				(int)NVME_MAJOR(subsys->ver),
-				(int)NVME_MINOR(subsys->ver),
-				(int)NVME_TERTIARY(subsys->ver));
+		return snprintf(page, PAGE_SIZE, "%llu.%llu.%llu\n",
+				NVME_MAJOR(subsys->ver),
+				NVME_MINOR(subsys->ver),
+				NVME_TERTIARY(subsys->ver));
 
-	return snprintf(page, PAGE_SIZE, "%d.%d\n",
-			(int)NVME_MAJOR(subsys->ver),
-			(int)NVME_MINOR(subsys->ver));
+	return snprintf(page, PAGE_SIZE, "%llu.%llu\n",
+			NVME_MAJOR(subsys->ver),
+			NVME_MINOR(subsys->ver));
 }
 
 static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
@@ -879,6 +976,10 @@ static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
 	int major, minor, tertiary = 0;
 	int ret;
 
+	/* passthru subsystems use the underlying controller's version */
+	if (nvmet_passthru_ctrl(subsys))
+		return -EINVAL;
+
 	ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
 	if (ret != 2 && ret != 3)
 		return -EINVAL;
@@ -1121,6 +1222,8 @@ static struct config_group *nvmet_subsys_make(struct config_group *group,
 	configfs_add_default_group(&subsys->allowed_hosts_group,
 			&subsys->group);
 
+	nvmet_add_passthru_group(subsys);
+
 	return &subsys->group;
 }
 
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 6816507fba58..b92f45f5cd5b 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -115,13 +115,14 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
 
 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
 {
-	struct nvmet_ns *ns;
+	unsigned long nsid = 0;
+	struct nvmet_ns *cur;
+	unsigned long idx;
 
-	if (list_empty(&subsys->namespaces))
-		return 0;
+	xa_for_each(&subsys->namespaces, idx, cur)
+		nsid = cur->nsid;
 
-	ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link);
-	return ns->nsid;
+	return nsid;
 }
 
 static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
@@ -336,7 +337,7 @@ int nvmet_enable_port(struct nvmet_port *port)
 	 * If the user requested PI support and the transport isn't pi capable,
 	 * don't enable the port.
 	 */
-	if (port->pi_enable && !ops->metadata_support) {
+	if (port->pi_enable && !(ops->flags & NVMF_METADATA_SUPPORTED)) {
 		pr_err("T10-PI is not supported by transport type %d\n",
 		       port->disc_addr.trtype);
 		ret = -EINVAL;
@@ -410,28 +411,13 @@ static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
 	cancel_delayed_work_sync(&ctrl->ka_work);
 }
 
-static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
-		__le32 nsid)
-{
-	struct nvmet_ns *ns;
-
-	list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
-		if (ns->nsid == le32_to_cpu(nsid))
-			return ns;
-	}
-
-	return NULL;
-}
-
 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
 {
 	struct nvmet_ns *ns;
 
-	rcu_read_lock();
-	ns = __nvmet_find_namespace(ctrl, nsid);
+	ns = xa_load(&ctrl->subsys->namespaces, le32_to_cpu(nsid));
 	if (ns)
 		percpu_ref_get(&ns->ref);
-	rcu_read_unlock();
 
 	return ns;
 }
@@ -558,6 +544,12 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
 
 	mutex_lock(&subsys->lock);
 	ret = 0;
+
+	if (nvmet_passthru_ctrl(subsys)) {
+		pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
+		goto out_unlock;
+	}
+
 	if (ns->enabled)
 		goto out_unlock;
 
@@ -586,24 +578,10 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
 	if (ns->nsid > subsys->max_nsid)
 		subsys->max_nsid = ns->nsid;
 
-	/*
-	 * The namespaces list needs to be sorted to simplify the implementation
-	 * of the Identify Namepace List subcommand.
-	 */
-	if (list_empty(&subsys->namespaces)) {
-		list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
-	} else {
-		struct nvmet_ns *old;
-
-		list_for_each_entry_rcu(old, &subsys->namespaces, dev_link,
-					lockdep_is_held(&subsys->lock)) {
-			BUG_ON(ns->nsid == old->nsid);
-			if (ns->nsid < old->nsid)
-				break;
-		}
+	ret = xa_insert(&subsys->namespaces, ns->nsid, ns, GFP_KERNEL);
+	if (ret)
+		goto out_restore_subsys_maxnsid;
 
-		list_add_tail_rcu(&ns->dev_link, &old->dev_link);
-	}
 	subsys->nr_namespaces++;
 
 	nvmet_ns_changed(subsys, ns->nsid);
@@ -612,6 +590,10 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
 out_unlock:
 	mutex_unlock(&subsys->lock);
 	return ret;
+
+out_restore_subsys_maxnsid:
+	subsys->max_nsid = nvmet_max_nsid(subsys);
+	percpu_ref_exit(&ns->ref);
 out_dev_put:
 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 		pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
@@ -630,7 +612,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
 		goto out_unlock;
 
 	ns->enabled = false;
-	list_del_rcu(&ns->dev_link);
+	xa_erase(&ns->subsys->namespaces, ns->nsid);
 	if (ns->nsid == subsys->max_nsid)
 		subsys->max_nsid = nvmet_max_nsid(subsys);
 
@@ -681,7 +663,6 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
 	if (!ns)
 		return NULL;
 
-	INIT_LIST_HEAD(&ns->dev_link);
 	init_completion(&ns->disable_done);
 
 	ns->nsid = nsid;
@@ -874,6 +855,9 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
 	if (unlikely(ret))
 		return ret;
 
+	if (nvmet_req_passthru_ctrl(req))
+		return nvmet_parse_passthru_io_cmd(req);
+
 	req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
 	if (unlikely(!req->ns)) {
 		req->error_loc = offsetof(struct nvme_common_command, nsid);
@@ -1263,14 +1247,14 @@ static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
 		struct nvmet_req *req)
 {
 	struct nvmet_ns *ns;
+	unsigned long idx;
 
 	if (!req->p2p_client)
 		return;
 
 	ctrl->p2p_client = get_device(req->p2p_client);
 
-	list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link,
-				lockdep_is_held(&ctrl->subsys->lock))
+	xa_for_each(&ctrl->subsys->namespaces, idx, ns)
 		nvmet_p2pmem_ns_add_p2p(ctrl, ns);
 }
 
@@ -1495,7 +1479,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 	if (!subsys)
 		return ERR_PTR(-ENOMEM);
 
-	subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
+	subsys->ver = NVMET_DEFAULT_VS;
 	/* generate a random serial number as our controllers are ephemeral: */
 	get_random_bytes(&subsys->serial, sizeof(subsys->serial));
 
@@ -1523,7 +1507,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 	kref_init(&subsys->ref);
 
 	mutex_init(&subsys->lock);
-	INIT_LIST_HEAD(&subsys->namespaces);
+	xa_init(&subsys->namespaces);
 	INIT_LIST_HEAD(&subsys->ctrls);
 	INIT_LIST_HEAD(&subsys->hosts);
 
@@ -1535,7 +1519,10 @@ static void nvmet_subsys_free(struct kref *ref)
 	struct nvmet_subsys *subsys =
 		container_of(ref, struct nvmet_subsys, ref);
 
-	WARN_ON_ONCE(!list_empty(&subsys->namespaces));
+	WARN_ON_ONCE(!xa_empty(&subsys->namespaces));
+
+	xa_destroy(&subsys->namespaces);
+	nvmet_passthru_subsys_free(subsys);
 
 	kfree(subsys->subsysnqn);
 	kfree_rcu(subsys->model, rcuhead);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 40cf0b6e6c9d..f40c05c33c3a 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -277,7 +277,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
 	id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
 
 	id->sgls = cpu_to_le32(1 << 0);	/* we always support SGLs */
-	if (ctrl->ops->has_keyed_sgls)
+	if (ctrl->ops->flags & NVMF_KEYED_SGLS)
 		id->sgls |= cpu_to_le32(1 << 2);
 	if (req->port->inline_data_size)
 		id->sgls |= cpu_to_le32(1 << 20);
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 27fd3b5aa621..55bafd56166a 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -167,7 +167,6 @@ struct nvmet_fc_tgt_assoc {
 	struct nvmet_fc_tgt_queue	*queues[NVMET_NR_QUEUES + 1];
 	struct kref			ref;
 	struct work_struct		del_work;
-	atomic_t			del_work_active;
 };
 
 
@@ -1090,7 +1089,6 @@ nvmet_fc_delete_assoc(struct work_struct *work)
 		container_of(work, struct nvmet_fc_tgt_assoc, del_work);
 
 	nvmet_fc_delete_target_assoc(assoc);
-	atomic_set(&assoc->del_work_active, 0);
 	nvmet_fc_tgt_a_put(assoc);
 }
 
@@ -1123,7 +1121,6 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
 	INIT_LIST_HEAD(&assoc->a_list);
 	kref_init(&assoc->ref);
 	INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc);
-	atomic_set(&assoc->del_work_active, 0);
 	atomic_set(&assoc->terminating, 0);
 
 	while (needrandom) {
@@ -1243,7 +1240,8 @@ nvmet_fc_find_target_assoc(struct nvmet_fc_tgtport *tgtport,
 	list_for_each_entry(assoc, &tgtport->assoc_list, a_list) {
 		if (association_id == assoc->association_id) {
 			ret = assoc;
-			nvmet_fc_tgt_a_get(assoc);
+			if (!nvmet_fc_tgt_a_get(assoc))
+				ret = NULL;
 			break;
 		}
 	}
@@ -1477,21 +1475,15 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport)
 {
 	struct nvmet_fc_tgt_assoc *assoc, *next;
 	unsigned long flags;
-	int ret;
 
 	spin_lock_irqsave(&tgtport->lock, flags);
 	list_for_each_entry_safe(assoc, next,
 				&tgtport->assoc_list, a_list) {
 		if (!nvmet_fc_tgt_a_get(assoc))
 			continue;
-		ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1);
-		if (ret == 0) {
-			if (!schedule_work(&assoc->del_work))
-				nvmet_fc_tgt_a_put(assoc);
-		} else {
+		if (!schedule_work(&assoc->del_work))
 			/* already deleting - release local reference */
 			nvmet_fc_tgt_a_put(assoc);
-		}
 	}
 	spin_unlock_irqrestore(&tgtport->lock, flags);
 }
@@ -1533,7 +1525,6 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port,
 	struct nvmet_fc_tgt_assoc *assoc, *next;
 	unsigned long flags;
 	bool noassoc = true;
-	int ret;
 
 	spin_lock_irqsave(&tgtport->lock, flags);
 	list_for_each_entry_safe(assoc, next,
@@ -1545,14 +1536,9 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port,
 			continue;
 		assoc->hostport->invalid = 1;
 		noassoc = false;
-		ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1);
-		if (ret == 0) {
-			if (!schedule_work(&assoc->del_work))
-				nvmet_fc_tgt_a_put(assoc);
-		} else {
+		if (!schedule_work(&assoc->del_work))
 			/* already deleting - release local reference */
 			nvmet_fc_tgt_a_put(assoc);
-		}
 	}
 	spin_unlock_irqrestore(&tgtport->lock, flags);
 
@@ -1573,7 +1559,6 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
 	struct nvmet_fc_tgt_queue *queue;
 	unsigned long flags;
 	bool found_ctrl = false;
-	int ret;
 
 	/* this is a bit ugly, but don't want to make locks layered */
 	spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
@@ -1597,14 +1582,9 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
 		nvmet_fc_tgtport_put(tgtport);
 
 		if (found_ctrl) {
-			ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1);
-			if (ret == 0) {
-				if (!schedule_work(&assoc->del_work))
-					nvmet_fc_tgt_a_put(assoc);
-			} else {
+			if (!schedule_work(&assoc->del_work))
 				/* already deleting - release local reference */
 				nvmet_fc_tgt_a_put(assoc);
-			}
 			return;
 		}
 
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 2ff1d1334a03..c97e60b71bbc 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -43,6 +43,17 @@ static const match_table_t opt_tokens = {
 	{ NVMF_OPT_ERR,		NULL		}
 };
 
+static int fcloop_verify_addr(substring_t *s)
+{
+	size_t blen = s->to - s->from + 1;
+
+	if (strnlen(s->from, blen) != NVME_FC_TRADDR_HEXNAMELEN + 2 ||
+	    strncmp(s->from, "0x", 2))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int
 fcloop_parse_options(struct fcloop_ctrl_options *opts,
 		const char *buf)
@@ -64,14 +75,16 @@ fcloop_parse_options(struct fcloop_ctrl_options *opts,
 		opts->mask |= token;
 		switch (token) {
 		case NVMF_OPT_WWNN:
-			if (match_u64(args, &token64)) {
+			if (fcloop_verify_addr(args) ||
+			    match_u64(args, &token64)) {
 				ret = -EINVAL;
 				goto out_free_options;
 			}
 			opts->wwnn = token64;
 			break;
 		case NVMF_OPT_WWPN:
-			if (match_u64(args, &token64)) {
+			if (fcloop_verify_addr(args) ||
+			    match_u64(args, &token64)) {
 				ret = -EINVAL;
 				goto out_free_options;
 			}
@@ -92,14 +105,16 @@ fcloop_parse_options(struct fcloop_ctrl_options *opts,
 			opts->fcaddr = token;
 			break;
 		case NVMF_OPT_LPWWNN:
-			if (match_u64(args, &token64)) {
+			if (fcloop_verify_addr(args) ||
+			    match_u64(args, &token64)) {
 				ret = -EINVAL;
 				goto out_free_options;
 			}
 			opts->lpwwnn = token64;
 			break;
 		case NVMF_OPT_LPWWPN:
-			if (match_u64(args, &token64)) {
+			if (fcloop_verify_addr(args) ||
+			    match_u64(args, &token64)) {
 				ret = -EINVAL;
 				goto out_free_options;
 			}
@@ -141,14 +156,16 @@ fcloop_parse_nm_options(struct device *dev, u64 *nname, u64 *pname,
 		token = match_token(p, opt_tokens, args);
 		switch (token) {
 		case NVMF_OPT_WWNN:
-			if (match_u64(args, &token64)) {
+			if (fcloop_verify_addr(args) ||
+			    match_u64(args, &token64)) {
 				ret = -EINVAL;
 				goto out_free_options;
 			}
 			*nname = token64;
 			break;
 		case NVMF_OPT_WWPN:
-			if (match_u64(args, &token64)) {
+			if (fcloop_verify_addr(args) ||
+			    match_u64(args, &token64)) {
 				ret = -EINVAL;
 				goto out_free_options;
 			}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 8a0d4fe7bc18..4884ef1e46a2 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -36,7 +36,6 @@ struct nvme_loop_ctrl {
 	struct nvme_loop_iod	async_event_iod;
 	struct nvme_ctrl	ctrl;
 
-	struct nvmet_ctrl	*target_ctrl;
 	struct nvmet_port	*port;
 };
 
@@ -445,7 +444,6 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 {
 	struct nvme_loop_ctrl *ctrl =
 		container_of(work, struct nvme_loop_ctrl, ctrl.reset_work);
-	bool changed;
 	int ret;
 
 	nvme_stop_ctrl(&ctrl->ctrl);
@@ -472,8 +470,8 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	blk_mq_update_nr_hw_queues(&ctrl->tag_set,
 			ctrl->ctrl.queue_count - 1);
 
-	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-	WARN_ON_ONCE(!changed);
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE))
+		WARN_ON_ONCE(1);
 
 	nvme_start_ctrl(&ctrl->ctrl);
 
@@ -568,7 +566,6 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 		struct nvmf_ctrl_options *opts)
 {
 	struct nvme_loop_ctrl *ctrl;
-	bool changed;
 	int ret;
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
@@ -584,6 +581,9 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 	if (ret)
 		goto out_put_ctrl;
 
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
+		WARN_ON_ONCE(1);
+
 	ret = -ENOMEM;
 
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -618,8 +618,8 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 	dev_info(ctrl->ctrl.device,
 		 "new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn);
 
-	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-	WARN_ON_ONCE(!changed);
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE))
+		WARN_ON_ONCE(1);
 
 	mutex_lock(&nvme_loop_ctrl_mutex);
 	list_add_tail(&ctrl->list, &nvme_loop_ctrl_list);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 809691291e73..47ee3fb193bd 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -21,6 +21,8 @@
 #include <linux/radix-tree.h>
 #include <linux/t10-pi.h>
 
+#define NVMET_DEFAULT_VS		NVME_VS(1, 3, 0)
+
 #define NVMET_ASYNC_EVENTS		4
 #define NVMET_ERROR_LOG_SLOTS		128
 #define NVMET_NO_ERROR_LOC		((u16)-1)
@@ -52,7 +54,6 @@
 	(cpu_to_le32(offsetof(struct nvmf_connect_command, x)))
 
 struct nvmet_ns {
-	struct list_head	dev_link;
 	struct percpu_ref	ref;
 	struct block_device	*bdev;
 	struct file		*file;
@@ -219,7 +220,7 @@ struct nvmet_subsys {
 	struct mutex		lock;
 	struct kref		ref;
 
-	struct list_head	namespaces;
+	struct xarray		namespaces;
 	unsigned int		nr_namespaces;
 	unsigned int		max_nsid;
 	u16			cntlid_min;
@@ -243,6 +244,12 @@ struct nvmet_subsys {
 	struct config_group	allowed_hosts_group;
 
 	struct nvmet_subsys_model	__rcu *model;
+
+#ifdef CONFIG_NVME_TARGET_PASSTHRU
+	struct nvme_ctrl	*passthru_ctrl;
+	char			*passthru_ctrl_path;
+	struct config_group	passthru_group;
+#endif /* CONFIG_NVME_TARGET_PASSTHRU */
 };
 
 static inline struct nvmet_subsys *to_subsys(struct config_item *item)
@@ -286,8 +293,9 @@ struct nvmet_fabrics_ops {
 	struct module *owner;
 	unsigned int type;
 	unsigned int msdbd;
-	bool has_keyed_sgls : 1;
-	bool metadata_support : 1;
+	unsigned int flags;
+#define NVMF_KEYED_SGLS			(1 << 0)
+#define NVMF_METADATA_SUPPORTED		(1 << 1)
 	void (*queue_response)(struct nvmet_req *req);
 	int (*add_port)(struct nvmet_port *port);
 	void (*remove_port)(struct nvmet_port *port);
@@ -321,6 +329,11 @@ struct nvmet_req {
 			struct bio_vec          *bvec;
 			struct work_struct      work;
 		} f;
+		struct {
+			struct request		*rq;
+			struct work_struct      work;
+			bool			use_workqueue;
+		} p;
 	};
 	int			sg_cnt;
 	int			metadata_sg_cnt;
@@ -400,6 +413,8 @@ void nvmet_req_complete(struct nvmet_req *req, u16 status);
 int nvmet_req_alloc_sgls(struct nvmet_req *req);
 void nvmet_req_free_sgls(struct nvmet_req *req);
 
+void nvmet_execute_set_features(struct nvmet_req *req);
+void nvmet_execute_get_features(struct nvmet_req *req);
 void nvmet_execute_keep_alive(struct nvmet_req *req);
 
 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
@@ -532,6 +547,43 @@ static inline u32 nvmet_dsm_len(struct nvmet_req *req)
 		sizeof(struct nvme_dsm_range);
 }
 
+#ifdef CONFIG_NVME_TARGET_PASSTHRU
+void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys);
+int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys);
+void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys);
+u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req);
+u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req);
+static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys)
+{
+	return subsys->passthru_ctrl;
+}
+#else /* CONFIG_NVME_TARGET_PASSTHRU */
+static inline void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys)
+{
+}
+static inline void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
+{
+}
+static inline u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
+{
+	return 0;
+}
+static inline u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
+{
+	return 0;
+}
+static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys)
+{
+	return NULL;
+}
+#endif /* CONFIG_NVME_TARGET_PASSTHRU */
+
+static inline struct nvme_ctrl *
+nvmet_req_passthru_ctrl(struct nvmet_req *req)
+{
+	return nvmet_passthru_ctrl(req->sq->ctrl->subsys);
+}
+
 u16 errno_to_nvme_status(struct nvmet_req *req, int errno);
 
 /* Convert a 32-bit number to a 16-bit 0's based number */
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
new file mode 100644
index 000000000000..89d91dc999a6
--- /dev/null
+++ b/drivers/nvme/target/passthru.c
@@ -0,0 +1,544 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe Over Fabrics Target Passthrough command implementation.
+ *
+ * Copyright (c) 2017-2018 Western Digital Corporation or its
+ * affiliates.
+ * Copyright (c) 2019-2020, Eideticom Inc.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+
+#include "../host/nvme.h"
+#include "nvmet.h"
+
+MODULE_IMPORT_NS(NVME_TARGET_PASSTHRU);
+
+/*
+ * xarray to maintain one passthru subsystem per nvme controller.
+ */
+static DEFINE_XARRAY(passthru_subsystems);
+
+static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
+{
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+	struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
+	u16 status = NVME_SC_SUCCESS;
+	struct nvme_id_ctrl *id;
+	u32 max_hw_sectors;
+	int page_shift;
+
+	id = kzalloc(sizeof(*id), GFP_KERNEL);
+	if (!id)
+		return NVME_SC_INTERNAL;
+
+	status = nvmet_copy_from_sgl(req, 0, id, sizeof(*id));
+	if (status)
+		goto out_free;
+
+	id->cntlid = cpu_to_le16(ctrl->cntlid);
+	id->ver = cpu_to_le32(ctrl->subsys->ver);
+
+	/*
+	 * The passthru NVMe driver may have a limit on the number of segments
+	 * which depends on the host's memory fragementation. To solve this,
+	 * ensure mdts is limited to the pages equal to the number of segments.
+	 */
+	max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
+				      pctrl->max_hw_sectors);
+
+	page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
+
+	id->mdts = ilog2(max_hw_sectors) + 9 - page_shift;
+
+	id->acl = 3;
+	/*
+	 * We export aerl limit for the fabrics controller, update this when
+	 * passthru based aerl support is added.
+	 */
+	id->aerl = NVMET_ASYNC_EVENTS - 1;
+
+	/* emulate kas as most of the PCIe ctrl don't have a support for kas */
+	id->kas = cpu_to_le16(NVMET_KAS);
+
+	/* don't support host memory buffer */
+	id->hmpre = 0;
+	id->hmmin = 0;
+
+	id->sqes = min_t(__u8, ((0x6 << 4) | 0x6), id->sqes);
+	id->cqes = min_t(__u8, ((0x4 << 4) | 0x4), id->cqes);
+	id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
+
+	/* don't support fuse commands */
+	id->fuses = 0;
+
+	id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
+	if (ctrl->ops->flags & NVMF_KEYED_SGLS)
+		id->sgls |= cpu_to_le32(1 << 2);
+	if (req->port->inline_data_size)
+		id->sgls |= cpu_to_le32(1 << 20);
+
+	/*
+	 * When passsthru controller is setup using nvme-loop transport it will
+	 * export the passthru ctrl subsysnqn (PCIe NVMe ctrl) and will fail in
+	 * the nvme/host/core.c in the nvme_init_subsystem()->nvme_active_ctrl()
+	 * code path with duplicate ctr subsynqn. In order to prevent that we
+	 * mask the passthru-ctrl subsysnqn with the target ctrl subsysnqn.
+	 */
+	memcpy(id->subnqn, ctrl->subsysnqn, sizeof(id->subnqn));
+
+	/* use fabric id-ctrl values */
+	id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
+				req->port->inline_data_size) / 16);
+	id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
+
+	id->msdbd = ctrl->ops->msdbd;
+
+	/* Support multipath connections with fabrics */
+	id->cmic |= 1 << 1;
+
+	/* Disable reservations, see nvmet_parse_passthru_io_cmd() */
+	id->oncs &= cpu_to_le16(~NVME_CTRL_ONCS_RESERVATIONS);
+
+	status = nvmet_copy_to_sgl(req, 0, id, sizeof(struct nvme_id_ctrl));
+
+out_free:
+	kfree(id);
+	return status;
+}
+
+static u16 nvmet_passthru_override_id_ns(struct nvmet_req *req)
+{
+	u16 status = NVME_SC_SUCCESS;
+	struct nvme_id_ns *id;
+	int i;
+
+	id = kzalloc(sizeof(*id), GFP_KERNEL);
+	if (!id)
+		return NVME_SC_INTERNAL;
+
+	status = nvmet_copy_from_sgl(req, 0, id, sizeof(struct nvme_id_ns));
+	if (status)
+		goto out_free;
+
+	for (i = 0; i < (id->nlbaf + 1); i++)
+		if (id->lbaf[i].ms)
+			memset(&id->lbaf[i], 0, sizeof(id->lbaf[i]));
+
+	id->flbas = id->flbas & ~(1 << 4);
+
+	/*
+	 * Presently the NVMEof target code does not support sending
+	 * metadata, so we must disable it here. This should be updated
+	 * once target starts supporting metadata.
+	 */
+	id->mc = 0;
+
+	status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
+
+out_free:
+	kfree(id);
+	return status;
+}
+
+static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
+{
+	struct nvmet_req *req = container_of(w, struct nvmet_req, p.work);
+	struct request *rq = req->p.rq;
+	u16 status;
+
+	nvme_execute_passthru_rq(rq);
+
+	status = nvme_req(rq)->status;
+	if (status == NVME_SC_SUCCESS &&
+	    req->cmd->common.opcode == nvme_admin_identify) {
+		switch (req->cmd->identify.cns) {
+		case NVME_ID_CNS_CTRL:
+			nvmet_passthru_override_id_ctrl(req);
+			break;
+		case NVME_ID_CNS_NS:
+			nvmet_passthru_override_id_ns(req);
+			break;
+		}
+	}
+
+	req->cqe->result = nvme_req(rq)->result;
+	nvmet_req_complete(req, status);
+	blk_put_request(rq);
+}
+
+static void nvmet_passthru_req_done(struct request *rq,
+				    blk_status_t blk_status)
+{
+	struct nvmet_req *req = rq->end_io_data;
+
+	req->cqe->result = nvme_req(rq)->result;
+	nvmet_req_complete(req, nvme_req(rq)->status);
+	blk_put_request(rq);
+}
+
+static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
+{
+	int sg_cnt = req->sg_cnt;
+	struct scatterlist *sg;
+	int op_flags = 0;
+	struct bio *bio;
+	int i, ret;
+
+	if (req->cmd->common.opcode == nvme_cmd_flush)
+		op_flags = REQ_FUA;
+	else if (nvme_is_write(req->cmd))
+		op_flags = REQ_SYNC | REQ_IDLE;
+
+	bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+	bio->bi_end_io = bio_put;
+	bio->bi_opf = req_op(rq) | op_flags;
+
+	for_each_sg(req->sg, sg, req->sg_cnt, i) {
+		if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length,
+				    sg->offset) < sg->length) {
+			bio_put(bio);
+			return -EINVAL;
+		}
+		sg_cnt--;
+	}
+
+	ret = blk_rq_append_bio(rq, &bio);
+	if (unlikely(ret)) {
+		bio_put(bio);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
+{
+	struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
+	struct request_queue *q = ctrl->admin_q;
+	struct nvme_ns *ns = NULL;
+	struct request *rq = NULL;
+	u32 effects;
+	u16 status;
+	int ret;
+
+	if (likely(req->sq->qid != 0)) {
+		u32 nsid = le32_to_cpu(req->cmd->common.nsid);
+
+		ns = nvme_find_get_ns(ctrl, nsid);
+		if (unlikely(!ns)) {
+			pr_err("failed to get passthru ns nsid:%u\n", nsid);
+			status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+			goto fail_out;
+		}
+
+		q = ns->queue;
+	}
+
+	rq = nvme_alloc_request(q, req->cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
+	if (IS_ERR(rq)) {
+		rq = NULL;
+		status = NVME_SC_INTERNAL;
+		goto fail_out;
+	}
+
+	if (req->sg_cnt) {
+		ret = nvmet_passthru_map_sg(req, rq);
+		if (unlikely(ret)) {
+			status = NVME_SC_INTERNAL;
+			goto fail_out;
+		}
+	}
+
+	/*
+	 * If there are effects for the command we are about to execute, or
+	 * an end_req function we need to use nvme_execute_passthru_rq()
+	 * synchronously in a work item seeing the end_req function and
+	 * nvme_passthru_end() can't be called in the request done callback
+	 * which is typically in interrupt context.
+	 */
+	effects = nvme_command_effects(ctrl, ns, req->cmd->common.opcode);
+	if (req->p.use_workqueue || effects) {
+		INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work);
+		req->p.rq = rq;
+		schedule_work(&req->p.work);
+	} else {
+		rq->end_io_data = req;
+		blk_execute_rq_nowait(rq->q, ns ? ns->disk : NULL, rq, 0,
+				      nvmet_passthru_req_done);
+	}
+
+	if (ns)
+		nvme_put_ns(ns);
+
+	return;
+
+fail_out:
+	if (ns)
+		nvme_put_ns(ns);
+	nvmet_req_complete(req, status);
+	blk_put_request(rq);
+}
+
+/*
+ * We need to emulate set host behaviour to ensure that any requested
+ * behaviour of the target's host matches the requested behaviour
+ * of the device's host and fail otherwise.
+ */
+static void nvmet_passthru_set_host_behaviour(struct nvmet_req *req)
+{
+	struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
+	struct nvme_feat_host_behavior *host;
+	u16 status = NVME_SC_INTERNAL;
+	int ret;
+
+	host = kzalloc(sizeof(*host) * 2, GFP_KERNEL);
+	if (!host)
+		goto out_complete_req;
+
+	ret = nvme_get_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0,
+				host, sizeof(*host), NULL);
+	if (ret)
+		goto out_free_host;
+
+	status = nvmet_copy_from_sgl(req, 0, &host[1], sizeof(*host));
+	if (status)
+		goto out_free_host;
+
+	if (memcmp(&host[0], &host[1], sizeof(host[0]))) {
+		pr_warn("target host has requested different behaviour from the local host\n");
+		status = NVME_SC_INTERNAL;
+	}
+
+out_free_host:
+	kfree(host);
+out_complete_req:
+	nvmet_req_complete(req, status);
+}
+
+static u16 nvmet_setup_passthru_command(struct nvmet_req *req)
+{
+	req->p.use_workqueue = false;
+	req->execute = nvmet_passthru_execute_cmd;
+	return NVME_SC_SUCCESS;
+}
+
+u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
+{
+	switch (req->cmd->common.opcode) {
+	case nvme_cmd_resv_register:
+	case nvme_cmd_resv_report:
+	case nvme_cmd_resv_acquire:
+	case nvme_cmd_resv_release:
+		/*
+		 * Reservations cannot be supported properly because the
+		 * underlying device has no way of differentiating different
+		 * hosts that connect via fabrics. This could potentially be
+		 * emulated in the future if regular targets grow support for
+		 * this feature.
+		 */
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+
+	return nvmet_setup_passthru_command(req);
+}
+
+/*
+ * Only features that are emulated or specifically allowed in the list  are
+ * passed down to the controller. This function implements the allow list for
+ * both get and set features.
+ */
+static u16 nvmet_passthru_get_set_features(struct nvmet_req *req)
+{
+	switch (le32_to_cpu(req->cmd->features.fid)) {
+	case NVME_FEAT_ARBITRATION:
+	case NVME_FEAT_POWER_MGMT:
+	case NVME_FEAT_LBA_RANGE:
+	case NVME_FEAT_TEMP_THRESH:
+	case NVME_FEAT_ERR_RECOVERY:
+	case NVME_FEAT_VOLATILE_WC:
+	case NVME_FEAT_WRITE_ATOMIC:
+	case NVME_FEAT_AUTO_PST:
+	case NVME_FEAT_TIMESTAMP:
+	case NVME_FEAT_HCTM:
+	case NVME_FEAT_NOPSC:
+	case NVME_FEAT_RRL:
+	case NVME_FEAT_PLM_CONFIG:
+	case NVME_FEAT_PLM_WINDOW:
+	case NVME_FEAT_HOST_BEHAVIOR:
+	case NVME_FEAT_SANITIZE:
+	case NVME_FEAT_VENDOR_START ... NVME_FEAT_VENDOR_END:
+		return nvmet_setup_passthru_command(req);
+
+	case NVME_FEAT_ASYNC_EVENT:
+		/* There is no support for forwarding ASYNC events */
+	case NVME_FEAT_IRQ_COALESCE:
+	case NVME_FEAT_IRQ_CONFIG:
+		/* The IRQ settings will not apply to the target controller */
+	case NVME_FEAT_HOST_MEM_BUF:
+		/*
+		 * Any HMB that's set will not be passed through and will
+		 * not work as expected
+		 */
+	case NVME_FEAT_SW_PROGRESS:
+		/*
+		 * The Pre-Boot Software Load Count doesn't make much
+		 * sense for a target to export
+		 */
+	case NVME_FEAT_RESV_MASK:
+	case NVME_FEAT_RESV_PERSIST:
+		/* No reservations, see nvmet_parse_passthru_io_cmd() */
+	default:
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+}
+
+u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
+{
+	/*
+	 * Passthru all vendor specific commands
+	 */
+	if (req->cmd->common.opcode >= nvme_admin_vendor_start)
+		return nvmet_setup_passthru_command(req);
+
+	switch (req->cmd->common.opcode) {
+	case nvme_admin_async_event:
+		req->execute = nvmet_execute_async_event;
+		return NVME_SC_SUCCESS;
+	case nvme_admin_keep_alive:
+		/*
+		 * Most PCIe ctrls don't support keep alive cmd, we route keep
+		 * alive to the non-passthru mode. In future please change this
+		 * code when PCIe ctrls with keep alive support available.
+		 */
+		req->execute = nvmet_execute_keep_alive;
+		return NVME_SC_SUCCESS;
+	case nvme_admin_set_features:
+		switch (le32_to_cpu(req->cmd->features.fid)) {
+		case NVME_FEAT_ASYNC_EVENT:
+		case NVME_FEAT_KATO:
+		case NVME_FEAT_NUM_QUEUES:
+		case NVME_FEAT_HOST_ID:
+			req->execute = nvmet_execute_set_features;
+			return NVME_SC_SUCCESS;
+		case NVME_FEAT_HOST_BEHAVIOR:
+			req->execute = nvmet_passthru_set_host_behaviour;
+			return NVME_SC_SUCCESS;
+		default:
+			return nvmet_passthru_get_set_features(req);
+		}
+		break;
+	case nvme_admin_get_features:
+		switch (le32_to_cpu(req->cmd->features.fid)) {
+		case NVME_FEAT_ASYNC_EVENT:
+		case NVME_FEAT_KATO:
+		case NVME_FEAT_NUM_QUEUES:
+		case NVME_FEAT_HOST_ID:
+			req->execute = nvmet_execute_get_features;
+			return NVME_SC_SUCCESS;
+		default:
+			return nvmet_passthru_get_set_features(req);
+		}
+		break;
+	case nvme_admin_identify:
+		switch (req->cmd->identify.cns) {
+		case NVME_ID_CNS_CTRL:
+			req->execute = nvmet_passthru_execute_cmd;
+			req->p.use_workqueue = true;
+			return NVME_SC_SUCCESS;
+		case NVME_ID_CNS_NS:
+			req->execute = nvmet_passthru_execute_cmd;
+			req->p.use_workqueue = true;
+			return NVME_SC_SUCCESS;
+		default:
+			return nvmet_setup_passthru_command(req);
+		}
+	case nvme_admin_get_log_page:
+		return nvmet_setup_passthru_command(req);
+	default:
+		/* Reject commands not in the allowlist above */
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+}
+
+int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
+{
+	struct nvme_ctrl *ctrl;
+	int ret = -EINVAL;
+	void *old;
+
+	mutex_lock(&subsys->lock);
+	if (!subsys->passthru_ctrl_path)
+		goto out_unlock;
+	if (subsys->passthru_ctrl)
+		goto out_unlock;
+
+	if (subsys->nr_namespaces) {
+		pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
+		goto out_unlock;
+	}
+
+	ctrl = nvme_ctrl_get_by_path(subsys->passthru_ctrl_path);
+	if (IS_ERR(ctrl)) {
+		ret = PTR_ERR(ctrl);
+		pr_err("failed to open nvme controller %s\n",
+		       subsys->passthru_ctrl_path);
+
+		goto out_unlock;
+	}
+
+	old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL,
+			 subsys, GFP_KERNEL);
+	if (xa_is_err(old)) {
+		ret = xa_err(old);
+		goto out_put_ctrl;
+	}
+
+	if (old)
+		goto out_put_ctrl;
+
+	subsys->passthru_ctrl = ctrl;
+	subsys->ver = ctrl->vs;
+
+	if (subsys->ver < NVME_VS(1, 2, 1)) {
+		pr_warn("nvme controller version is too old: %llu.%llu.%llu, advertising 1.2.1\n",
+			NVME_MAJOR(subsys->ver), NVME_MINOR(subsys->ver),
+			NVME_TERTIARY(subsys->ver));
+		subsys->ver = NVME_VS(1, 2, 1);
+	}
+
+	mutex_unlock(&subsys->lock);
+	return 0;
+
+out_put_ctrl:
+	nvme_put_ctrl(ctrl);
+out_unlock:
+	mutex_unlock(&subsys->lock);
+	return ret;
+}
+
+static void __nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
+{
+	if (subsys->passthru_ctrl) {
+		xa_erase(&passthru_subsystems, subsys->passthru_ctrl->cntlid);
+		nvme_put_ctrl(subsys->passthru_ctrl);
+	}
+	subsys->passthru_ctrl = NULL;
+	subsys->ver = NVMET_DEFAULT_VS;
+}
+
+void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
+{
+	mutex_lock(&subsys->lock);
+	__nvmet_passthru_ctrl_disable(subsys);
+	mutex_unlock(&subsys->lock);
+}
+
+void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys)
+{
+	mutex_lock(&subsys->lock);
+	__nvmet_passthru_ctrl_disable(subsys);
+	mutex_unlock(&subsys->lock);
+	kfree(subsys->passthru_ctrl_path);
+}
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 76ea23a2c2be..3ccb59260b4a 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -752,7 +752,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct nvmet_rdma_rsp *rsp =
 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe);
-	struct nvmet_rdma_queue *queue = cq->cq_context;
+	struct nvmet_rdma_queue *queue = wc->qp->qp_context;
 	u16 status = 0;
 
 	WARN_ON(rsp->n_rdma <= 0);
@@ -1008,7 +1008,7 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct nvmet_rdma_cmd *cmd =
 		container_of(wc->wr_cqe, struct nvmet_rdma_cmd, cqe);
-	struct nvmet_rdma_queue *queue = cq->cq_context;
+	struct nvmet_rdma_queue *queue = wc->qp->qp_context;
 	struct nvmet_rdma_rsp *rsp;
 
 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
@@ -1258,9 +1258,8 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
 	 */
 	nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size;
 
-	queue->cq = ib_alloc_cq(ndev->device, queue,
-			nr_cqe + 1, queue->comp_vector,
-			IB_POLL_WORKQUEUE);
+	queue->cq = ib_cq_pool_get(ndev->device, nr_cqe + 1,
+				   queue->comp_vector, IB_POLL_WORKQUEUE);
 	if (IS_ERR(queue->cq)) {
 		ret = PTR_ERR(queue->cq);
 		pr_err("failed to create CQ cqe= %d ret= %d\n",
@@ -1322,7 +1321,7 @@ out:
 err_destroy_qp:
 	rdma_destroy_qp(queue->cm_id);
 err_destroy_cq:
-	ib_free_cq(queue->cq);
+	ib_cq_pool_put(queue->cq, nr_cqe + 1);
 	goto out;
 }
 
@@ -1332,7 +1331,8 @@ static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
 	if (queue->cm_id)
 		rdma_destroy_id(queue->cm_id);
 	ib_destroy_qp(queue->qp);
-	ib_free_cq(queue->cq);
+	ib_cq_pool_put(queue->cq, queue->recv_queue_size + 2 *
+		       queue->send_queue_size + 1);
 }
 
 static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
@@ -1970,8 +1970,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
 	.owner			= THIS_MODULE,
 	.type			= NVMF_TRTYPE_RDMA,
 	.msdbd			= 1,
-	.has_keyed_sgls		= 1,
-	.metadata_support	= 1,
+	.flags			= NVMF_KEYED_SGLS | NVMF_METADATA_SUPPORTED,
 	.add_port		= nvmet_rdma_add_port,
 	.remove_port		= nvmet_rdma_remove_port,
 	.queue_response		= nvmet_rdma_queue_response,
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index de9217cfd22d..9eda91162fe4 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -459,17 +459,11 @@ static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd)
 static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue)
 {
 	struct llist_node *node;
+	struct nvmet_tcp_cmd *cmd;
 
-	node = llist_del_all(&queue->resp_list);
-	if (!node)
-		return;
-
-	while (node) {
-		struct nvmet_tcp_cmd *cmd = llist_entry(node,
-					struct nvmet_tcp_cmd, lentry);
-
+	for (node = llist_del_all(&queue->resp_list); node; node = node->next) {
+		cmd = llist_entry(node, struct nvmet_tcp_cmd, lentry);
 		list_add(&cmd->entry, &queue->resp_send_list);
-		node = node->next;
 		queue->send_list_len++;
 	}
 }
@@ -1717,7 +1711,6 @@ static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
 	.owner			= THIS_MODULE,
 	.type			= NVMF_TRTYPE_TCP,
 	.msdbd			= 1,
-	.has_keyed_sgls		= 0,
 	.add_port		= nvmet_tcp_add_port,
 	.remove_port		= nvmet_tcp_remove_port,
 	.queue_response		= nvmet_tcp_queue_response,
author	Linus Torvalds <torvalds@linux-foundation.org>	2020-08-05 10:51:40 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2020-08-05 10:51:40 -0700
commit	e0fc99e21e6e299673f1640105ac0c1d829c2d93 (patch)
tree	4637b171164c3e653e9002b2bc962b32f05b5a07 /drivers/nvme/target
parent	4834ce9d8e074bb7ae197632e0708219b9f389b5 (diff)
parent	f59589fc89665102923725e80e12f782d5f74f67 (diff)