diff options
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/core.c | 150 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 25 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 5 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 8 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 11 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 27 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/zns.c | 13 | ||||
-rw-r--r-- | drivers/nvme/target/Kconfig | 2 | ||||
-rw-r--r-- | drivers/nvme/target/configfs.c | 40 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 15 | ||||
-rw-r--r-- | drivers/nvme/target/discovery.c | 1 | ||||
-rw-r--r-- | drivers/nvme/target/fcloop.c | 81 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/nvmet.h | 4 | ||||
-rw-r--r-- | drivers/nvme/target/passthru.c | 37 |
19 files changed, 324 insertions, 105 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9b6ebeb29cca..ce1b61519441 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -85,7 +85,7 @@ static LIST_HEAD(nvme_subsystems); static DEFINE_MUTEX(nvme_subsystems_lock); static DEFINE_IDA(nvme_instance_ida); -static dev_t nvme_chr_devt; +static dev_t nvme_ctrl_base_chr_devt; static struct class *nvme_class; static struct class *nvme_subsys_class; @@ -137,6 +137,38 @@ int nvme_try_sched_reset(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_try_sched_reset); +static void nvme_failfast_work(struct work_struct *work) +{ + struct nvme_ctrl *ctrl = container_of(to_delayed_work(work), + struct nvme_ctrl, failfast_work); + + if (ctrl->state != NVME_CTRL_CONNECTING) + return; + + set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); + dev_info(ctrl->device, "failfast expired\n"); + nvme_kick_requeue_lists(ctrl); +} + +static inline void nvme_start_failfast_work(struct nvme_ctrl *ctrl) +{ + if (!ctrl->opts || ctrl->opts->fast_io_fail_tmo == -1) + return; + + schedule_delayed_work(&ctrl->failfast_work, + ctrl->opts->fast_io_fail_tmo * HZ); +} + +static inline void nvme_stop_failfast_work(struct nvme_ctrl *ctrl) +{ + if (!ctrl->opts) + return; + + cancel_delayed_work_sync(&ctrl->failfast_work); + clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); +} + + int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) @@ -422,8 +454,17 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, } spin_unlock_irqrestore(&ctrl->lock, flags); - if (changed && ctrl->state == NVME_CTRL_LIVE) + if (!changed) + return false; + + if (ctrl->state == NVME_CTRL_LIVE) { + if (old_state == NVME_CTRL_CONNECTING) + nvme_stop_failfast_work(ctrl); nvme_kick_requeue_lists(ctrl); + } else if (ctrl->state == NVME_CTRL_CONNECTING && + old_state == NVME_CTRL_RESETTING) { + nvme_start_failfast_work(ctrl); + } return changed; } EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); @@ -507,29 +548,49 @@ static inline void nvme_clear_nvme_request(struct request *req) } } -struct request *nvme_alloc_request(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) +static inline unsigned int nvme_req_op(struct nvme_command *cmd) { - unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; - struct request *req; + return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; +} - if (qid == NVME_QID_ANY) { - req = blk_mq_alloc_request(q, op, flags); - } else { - req = blk_mq_alloc_request_hctx(q, op, flags, - qid ? qid - 1 : 0); - } - if (IS_ERR(req)) - return req; +static inline void nvme_init_request(struct request *req, + struct nvme_command *cmd) +{ + if (req->q->queuedata) + req->timeout = NVME_IO_TIMEOUT; + else /* no queuedata implies admin queue */ + req->timeout = NVME_ADMIN_TIMEOUT; req->cmd_flags |= REQ_FAILFAST_DRIVER; nvme_clear_nvme_request(req); nvme_req(req)->cmd = cmd; +} +struct request *nvme_alloc_request(struct request_queue *q, + struct nvme_command *cmd, blk_mq_req_flags_t flags) +{ + struct request *req; + + req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags); + if (!IS_ERR(req)) + nvme_init_request(req, cmd); return req; } EXPORT_SYMBOL_GPL(nvme_alloc_request); +struct request *nvme_alloc_request_qid(struct request_queue *q, + struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) +{ + struct request *req; + + req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags, + qid ? qid - 1 : 0); + if (!IS_ERR(req)) + nvme_init_request(req, cmd); + return req; +} +EXPORT_SYMBOL_GPL(nvme_alloc_request_qid); + static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) { struct nvme_command c; @@ -886,11 +947,15 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, struct request *req; int ret; - req = nvme_alloc_request(q, cmd, flags, qid); + if (qid == NVME_QID_ANY) + req = nvme_alloc_request(q, cmd, flags); + else + req = nvme_alloc_request_qid(q, cmd, flags, qid); if (IS_ERR(req)) return PTR_ERR(req); - req->timeout = timeout ? timeout : ADMIN_TIMEOUT; + if (timeout) + req->timeout = timeout; if (buffer && bufflen) { ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); @@ -1056,11 +1121,12 @@ static int nvme_submit_user_cmd(struct request_queue *q, void *meta = NULL; int ret; - req = nvme_alloc_request(q, cmd, 0, NVME_QID_ANY); + req = nvme_alloc_request(q, cmd, 0); if (IS_ERR(req)) return PTR_ERR(req); - req->timeout = timeout ? timeout : ADMIN_TIMEOUT; + if (timeout) + req->timeout = timeout; nvme_req(req)->flags |= NVME_REQ_USERCMD; if (ubuffer && bufflen) { @@ -1130,8 +1196,8 @@ static int nvme_keep_alive(struct nvme_ctrl *ctrl) { struct request *rq; - rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED, - NVME_QID_ANY); + rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, + BLK_MQ_REQ_RESERVED); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -1291,7 +1357,8 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, NVME_IDENTIFY_DATA_SIZE); if (status) { dev_warn(ctrl->device, - "Identify Descriptors failed (%d)\n", status); + "Identify Descriptors failed (nsid=%u, status=0x%x)\n", + nsid, status); goto free_data; } @@ -2047,7 +2114,8 @@ static void nvme_update_disk_info(struct gendisk *disk, nvme_config_discard(disk, ns); nvme_config_write_zeroes(disk, ns); - if (id->nsattr & NVME_NS_ATTR_RO) + if ((id->nsattr & NVME_NS_ATTR_RO) || + test_bit(NVME_NS_FORCE_RO, &ns->flags)) set_disk_ro(disk, true); } @@ -2249,13 +2317,13 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, cmd.common.cdw10 = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8); cmd.common.cdw11 = cpu_to_le32(len); - return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, - ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0, false); + return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, 0, + NVME_QID_ANY, 1, 0, false); } EXPORT_SYMBOL_GPL(nvme_sec_submit); #endif /* CONFIG_BLK_SED_OPAL */ -static const struct block_device_operations nvme_fops = { +static const struct block_device_operations nvme_bdev_ops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, .compat_ioctl = nvme_compat_ioctl, @@ -3262,7 +3330,7 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); - if (disk->fops == &nvme_fops) + if (disk->fops == &nvme_bdev_ops) return nvme_get_ns_from_dev(dev)->head; else return disk->private_data; @@ -3371,7 +3439,7 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, } #ifdef CONFIG_NVME_MULTIPATH if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) { - if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */ + if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */ return 0; if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) return 0; @@ -3792,7 +3860,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, struct gendisk *disk; struct nvme_id_ns *id; char disk_name[DISK_NAME_LEN]; - int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT, ret; + int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT; if (nvme_identify_ns(ctrl, nsid, ids, &id)) return; @@ -3816,8 +3884,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, ns->ctrl = ctrl; kref_init(&ns->kref); - ret = nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED); - if (ret) + if (nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED)) goto out_free_queue; nvme_set_disk_name(disk_name, ns, ctrl, &flags); @@ -3825,7 +3892,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, if (!disk) goto out_unlink_ns; - disk->fops = &nvme_fops; + disk->fops = &nvme_bdev_ops; disk->private_data = ns; disk->queue = ns->queue; disk->flags = flags; @@ -3836,8 +3903,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, goto out_put_disk; if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { - ret = nvme_nvm_register(ns, disk_name, node); - if (ret) { + if (nvme_nvm_register(ns, disk_name, node)) { dev_warn(ctrl->device, "LightNVM init failure\n"); goto out_put_disk; } @@ -4028,8 +4094,11 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) ret = nvme_submit_sync_cmd(ctrl->admin_q, &cmd, ns_list, NVME_IDENTIFY_DATA_SIZE); - if (ret) + if (ret) { + dev_warn(ctrl->device, + "Identify NS List failed (status=0x%x)\n", ret); goto free; + } for (i = 0; i < nr_entries; i++) { u32 nsid = le32_to_cpu(ns_list[i]); @@ -4332,6 +4401,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_mpath_stop(ctrl); nvme_stop_keep_alive(ctrl); + nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); } @@ -4409,6 +4479,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, int ret; ctrl->state = NVME_CTRL_NEW; + clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); spin_lock_init(&ctrl->lock); mutex_init(&ctrl->scan_lock); INIT_LIST_HEAD(&ctrl->namespaces); @@ -4425,6 +4496,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, init_waitqueue_head(&ctrl->state_wq); INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work); + INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work); memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive; @@ -4443,7 +4515,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, device_initialize(&ctrl->ctrl_device); ctrl->device = &ctrl->ctrl_device; - ctrl->device->devt = MKDEV(MAJOR(nvme_chr_devt), ctrl->instance); + ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt), + ctrl->instance); ctrl->device->class = nvme_class; ctrl->device->parent = ctrl->dev; ctrl->device->groups = nvme_dev_attr_groups; @@ -4652,7 +4725,8 @@ static int __init nvme_core_init(void) if (!nvme_delete_wq) goto destroy_reset_wq; - result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme"); + result = alloc_chrdev_region(&nvme_ctrl_base_chr_devt, 0, + NVME_MINORS, "nvme"); if (result < 0) goto destroy_delete_wq; @@ -4673,7 +4747,7 @@ static int __init nvme_core_init(void) destroy_class: class_destroy(nvme_class); unregister_chrdev: - unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); + unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); destroy_delete_wq: destroy_workqueue(nvme_delete_wq); destroy_reset_wq: @@ -4688,7 +4762,7 @@ static void __exit nvme_core_exit(void) { class_destroy(nvme_subsys_class); class_destroy(nvme_class); - unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); + unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); destroy_workqueue(nvme_delete_wq); destroy_workqueue(nvme_reset_wq); destroy_workqueue(nvme_wq); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 8575724734e0..72ac00173500 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -549,6 +549,7 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, { if (ctrl->state != NVME_CTRL_DELETING_NOIO && ctrl->state != NVME_CTRL_DEAD && + !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) && !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; @@ -615,6 +616,7 @@ static const match_table_t opt_tokens = { { NVMF_OPT_NR_WRITE_QUEUES, "nr_write_queues=%d" }, { NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" }, { NVMF_OPT_TOS, "tos=%d" }, + { NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" }, { NVMF_OPT_ERR, NULL } }; @@ -634,6 +636,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY; opts->kato = NVME_DEFAULT_KATO; opts->duplicate_connect = false; + opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO; opts->hdr_digest = false; opts->data_digest = false; opts->tos = -1; /* < 0 == use transport default */ @@ -754,6 +757,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n"); ctrl_loss_tmo = token; break; + case NVMF_OPT_FAIL_FAST_TMO: + if (match_int(args, &token)) { + ret = -EINVAL; + goto out; + } + + if (token >= 0) + pr_warn("I/O fail on reconnect controller after %d sec\n", + token); + opts->fast_io_fail_tmo = token; + break; case NVMF_OPT_HOSTNQN: if (opts->host) { pr_err("hostnqn already user-assigned: %s\n", @@ -884,11 +898,15 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->nr_poll_queues = 0; opts->duplicate_connect = true; } - if (ctrl_loss_tmo < 0) + if (ctrl_loss_tmo < 0) { opts->max_reconnects = -1; - else + } else { opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, opts->reconnect_delay); + if (ctrl_loss_tmo < opts->fast_io_fail_tmo) + pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n", + opts->fast_io_fail_tmo, ctrl_loss_tmo); + } if (!opts->host) { kref_get(&nvmf_default_host->ref); @@ -988,7 +1006,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options); #define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \ NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \ NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\ - NVMF_OPT_DISABLE_SQFLOW) + NVMF_OPT_DISABLE_SQFLOW |\ + NVMF_OPT_FAIL_FAST_TMO) static struct nvme_ctrl * nvmf_create_ctrl(struct device *dev, const char *buf) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index a9c1e3b4585e..733010d2eafd 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -15,6 +15,8 @@ #define NVMF_DEF_RECONNECT_DELAY 10 /* default to 600 seconds of reconnect attempts before giving up */ #define NVMF_DEF_CTRL_LOSS_TMO 600 +/* default is -1: the fail fast mechanism is disabled */ +#define NVMF_DEF_FAIL_FAST_TMO -1 /* * Define a host as seen by the target. We allocate one at boot, but also @@ -56,6 +58,7 @@ enum { NVMF_OPT_NR_WRITE_QUEUES = 1 << 17, NVMF_OPT_NR_POLL_QUEUES = 1 << 18, NVMF_OPT_TOS = 1 << 19, + NVMF_OPT_FAIL_FAST_TMO = 1 << 20, }; /** @@ -89,6 +92,7 @@ enum { * @nr_write_queues: number of queues for write I/O * @nr_poll_queues: number of queues for polling I/O * @tos: type of service + * @fast_io_fail_tmo: Fast I/O fail timeout in seconds */ struct nvmf_ctrl_options { unsigned mask; @@ -111,6 +115,7 @@ struct nvmf_ctrl_options { unsigned int nr_write_queues; unsigned int nr_poll_queues; int tos; + int fast_io_fail_tmo; }; /* diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index f4c246462658..38373a0e86ef 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3479,7 +3479,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->lport->ops->fcprqst_priv_sz); ctrl->admin_tag_set.driver_data = ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; + ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 8e562d0f2c30..470cef3abec3 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -653,7 +653,7 @@ static struct request *nvme_nvm_alloc_request(struct request_queue *q, nvme_nvm_rqtocmd(rqd, ns, cmd); - rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY); + rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0); if (IS_ERR(rq)) return rq; @@ -767,14 +767,14 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q, DECLARE_COMPLETION_ONSTACK(wait); int ret = 0; - rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0, - NVME_QID_ANY); + rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0); if (IS_ERR(rq)) { ret = -ENOMEM; goto err_cmd; } - rq->timeout = timeout ? timeout : ADMIN_TIMEOUT; + if (timeout) + rq->timeout = timeout; if (ppa_buf && ppa_len) { ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 106cf5c44ee7..9ac762b28811 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -279,6 +279,8 @@ static bool nvme_available_path(struct nvme_ns_head *head) struct nvme_ns *ns; list_for_each_entry_rcu(ns, &head->list, siblings) { + if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags)) + continue; switch (ns->ctrl->state) { case NVME_CTRL_LIVE: case NVME_CTRL_RESETTING: diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 567f7ad18a91..7e49f61f81df 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -24,7 +24,7 @@ extern unsigned int nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) extern unsigned int admin_timeout; -#define ADMIN_TIMEOUT (admin_timeout * HZ) +#define NVME_ADMIN_TIMEOUT (admin_timeout * HZ) #define NVME_DEFAULT_KATO 5 #define NVME_KATO_GRACE 10 @@ -178,7 +178,8 @@ static inline u16 nvme_req_qid(struct request *req) { if (!req->q->queuedata) return 0; - return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1; + + return req->mq_hctx->queue_num + 1; } /* The below value is the specific amount of delay needed before checking @@ -298,6 +299,7 @@ struct nvme_ctrl { struct work_struct scan_work; struct work_struct async_event_work; struct delayed_work ka_work; + struct delayed_work failfast_work; struct nvme_command ka_cmd; struct work_struct fw_act_work; unsigned long events; @@ -331,6 +333,8 @@ struct nvme_ctrl { u16 icdoff; u16 maxcmd; int nr_reconnects; + unsigned long flags; +#define NVME_CTRL_FAILFAST_EXPIRED 0 struct nvmf_ctrl_options *opts; struct page *discard_page; @@ -442,6 +446,7 @@ struct nvme_ns { #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 #define NVME_NS_ANA_PENDING 2 +#define NVME_NS_FORCE_RO 3 struct nvme_fault_inject fault_inject; @@ -604,6 +609,8 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, + struct nvme_command *cmd, blk_mq_req_flags_t flags); +struct request *nvme_alloc_request_qid(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid); void nvme_cleanup_cmd(struct request *req); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3be352403839..b4385cb0ff60 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1319,13 +1319,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) req->tag, nvmeq->qid); abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd, - BLK_MQ_REQ_NOWAIT, NVME_QID_ANY); + BLK_MQ_REQ_NOWAIT); if (IS_ERR(abort_req)) { atomic_inc(&dev->ctrl.abort_limit); return BLK_EH_RESET_TIMER; } - abort_req->timeout = ADMIN_TIMEOUT; abort_req->end_io_data = NULL; blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio); @@ -1622,7 +1621,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->admin_tagset.nr_hw_queues = 1; dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH; - dev->admin_tagset.timeout = ADMIN_TIMEOUT; + dev->admin_tagset.timeout = NVME_ADMIN_TIMEOUT; dev->admin_tagset.numa_node = dev->ctrl.numa_node; dev->admin_tagset.cmd_size = sizeof(struct nvme_iod); dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; @@ -2104,6 +2103,12 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) static unsigned int nvme_max_io_queues(struct nvme_dev *dev) { + /* + * If tags are shared with admin queue (Apple bug), then + * make sure we only use one IO queue. + */ + if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) + return 1; return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues; } @@ -2122,16 +2127,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) dev->nr_write_queues = write_queues; dev->nr_poll_queues = poll_queues; - /* - * If tags are shared with admin queue (Apple bug), then - * make sure we only use one IO queue. - */ - if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) - nr_io_queues = 1; - else - nr_io_queues = min(nvme_max_io_queues(dev), - dev->nr_allocated_queues - 1); - + nr_io_queues = dev->nr_allocated_queues - 1; result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) return result; @@ -2234,11 +2230,10 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) cmd.delete_queue.opcode = opcode; cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid); - req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY); + req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT); if (IS_ERR(req)) return PTR_ERR(req); - req->timeout = ADMIN_TIMEOUT; req->end_io_data = nvmeq; init_completion(&nvmeq->delete_done); @@ -2254,7 +2249,7 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) unsigned long timeout; retry: - timeout = ADMIN_TIMEOUT; + timeout = NVME_ADMIN_TIMEOUT; while (nr_queues > 0) { if (nvme_delete_queue(&dev->queues[nr_queues], opcode)) break; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 65e3d0ef36e1..df9f6f4549f1 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -797,7 +797,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, NVME_RDMA_DATA_SGL_SIZE; set->driver_data = ctrl; set->nr_hw_queues = 1; - set->timeout = ADMIN_TIMEOUT; + set->timeout = NVME_ADMIN_TIMEOUT; set->flags = BLK_MQ_F_NO_SCHED; } else { set = &ctrl->tag_set; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index c0c33320fe65..1ba659927442 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1568,7 +1568,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, set->cmd_size = sizeof(struct nvme_tcp_request); set->driver_data = ctrl; set->nr_hw_queues = 1; - set->timeout = ADMIN_TIMEOUT; + set->timeout = NVME_ADMIN_TIMEOUT; } else { set = &ctrl->tag_set; memset(set, 0, sizeof(*set)); diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 67e87e9f306f..1dfe9a3500e3 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -55,12 +55,17 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) int status; /* Driver requires zone append support */ - if (!(le32_to_cpu(log->iocs[nvme_cmd_zone_append]) & + if ((le32_to_cpu(log->iocs[nvme_cmd_zone_append]) & NVME_CMD_EFFECTS_CSUPP)) { + if (test_and_clear_bit(NVME_NS_FORCE_RO, &ns->flags)) + dev_warn(ns->ctrl->device, + "Zone Append supported for zoned namespace:%d. Remove read-only mode\n", + ns->head->ns_id); + } else { + set_bit(NVME_NS_FORCE_RO, &ns->flags); dev_warn(ns->ctrl->device, - "append not supported for zoned namespace:%d\n", - ns->head->ns_id); - return -EINVAL; + "Zone Append not supported for zoned namespace:%d. Forcing to read-only mode\n", + ns->head->ns_id); } /* Lazily query controller append limit for the first zoned namespace */ diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 8056955e652c..4be2ececbc45 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -24,7 +24,7 @@ config NVME_TARGET_PASSTHRU This enables target side NVMe passthru controller support for the NVMe Over Fabrics protocol. It allows for hosts to manage and directly access an actual NVMe controller residing on the target - side, incuding executing Vendor Unique Commands. + side, including executing Vendor Unique Commands. If unsure, say N. diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 37e1d7784e17..c61ffd767062 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -736,9 +736,49 @@ static ssize_t nvmet_passthru_enable_store(struct config_item *item, } CONFIGFS_ATTR(nvmet_passthru_, enable); +static ssize_t nvmet_passthru_admin_timeout_show(struct config_item *item, + char *page) +{ + return sprintf(page, "%u\n", to_subsys(item->ci_parent)->admin_timeout); +} + +static ssize_t nvmet_passthru_admin_timeout_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item->ci_parent); + unsigned int timeout; + + if (kstrtouint(page, 0, &timeout)) + return -EINVAL; + subsys->admin_timeout = timeout; + return count; +} +CONFIGFS_ATTR(nvmet_passthru_, admin_timeout); + +static ssize_t nvmet_passthru_io_timeout_show(struct config_item *item, + char *page) +{ + return sprintf(page, "%u\n", to_subsys(item->ci_parent)->io_timeout); +} + +static ssize_t nvmet_passthru_io_timeout_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item->ci_parent); + unsigned int timeout; + + if (kstrtouint(page, 0, &timeout)) + return -EINVAL; + subsys->io_timeout = timeout; + return count; +} +CONFIGFS_ATTR(nvmet_passthru_, io_timeout); + static struct configfs_attribute *nvmet_passthru_attrs[] = { &nvmet_passthru_attr_device_path, &nvmet_passthru_attr_enable, + &nvmet_passthru_attr_admin_timeout, + &nvmet_passthru_attr_io_timeout, NULL, }; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 957b39a82431..8ce4d59cc9e7 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -757,8 +757,6 @@ void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, { cq->qid = qid; cq->size = size; - - ctrl->cqs[qid] = cq; } void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, @@ -1344,20 +1342,14 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!ctrl->changed_ns_list) goto out_free_ctrl; - ctrl->cqs = kcalloc(subsys->max_qid + 1, - sizeof(struct nvmet_cq *), - GFP_KERNEL); - if (!ctrl->cqs) - goto out_free_changed_ns_list; - ctrl->sqs = kcalloc(subsys->max_qid + 1, sizeof(struct nvmet_sq *), GFP_KERNEL); if (!ctrl->sqs) - goto out_free_cqs; + goto out_free_changed_ns_list; if (subsys->cntlid_min > subsys->cntlid_max) - goto out_free_cqs; + goto out_free_changed_ns_list; ret = ida_simple_get(&cntlid_ida, subsys->cntlid_min, subsys->cntlid_max, @@ -1395,8 +1387,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, out_free_sqs: kfree(ctrl->sqs); -out_free_cqs: - kfree(ctrl->cqs); out_free_changed_ns_list: kfree(ctrl->changed_ns_list); out_free_ctrl: @@ -1426,7 +1416,6 @@ static void nvmet_ctrl_free(struct kref *ref) nvmet_async_events_free(ctrl); kfree(ctrl->sqs); - kfree(ctrl->cqs); kfree(ctrl->changed_ns_list); kfree(ctrl); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index f40c05c33c3a..682854e0e079 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -69,6 +69,7 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, struct nvmet_port *port; struct nvmet_subsys_link *s; + lockdep_assert_held(&nvmet_config_sem); nvmet_genctr++; list_for_each_entry(port, nvmet_ports, global_entry) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 3da067a8311e..733d9363900e 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -564,6 +564,50 @@ fcloop_call_host_done(struct nvmefc_fcp_req *fcpreq, fcloop_tfcp_req_put(tfcp_req); } +static bool drop_fabric_opcode; +#define DROP_OPCODE_MASK 0x00FF +/* fabrics opcode will have a bit set above 1st byte */ +static int drop_opcode = -1; +static int drop_instance; +static int drop_amount; +static int drop_current_cnt; + +/* + * Routine to parse io and determine if the io is to be dropped. + * Returns: + * 0 if io is not obstructed + * 1 if io was dropped + */ +static int check_for_drop(struct fcloop_fcpreq *tfcp_req) +{ + struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvme_fc_cmd_iu *cmdiu = fcpreq->cmdaddr; + struct nvme_command *sqe = &cmdiu->sqe; + + if (drop_opcode == -1) + return 0; + + pr_info("%s: seq opcd x%02x fctype x%02x: drop F %s op x%02x " + "inst %d start %d amt %d\n", + __func__, sqe->common.opcode, sqe->fabrics.fctype, + drop_fabric_opcode ? "y" : "n", + drop_opcode, drop_current_cnt, drop_instance, drop_amount); + + if ((drop_fabric_opcode && + (sqe->common.opcode != nvme_fabrics_command || + sqe->fabrics.fctype != drop_opcode)) || + (!drop_fabric_opcode && sqe->common.opcode != drop_opcode)) + return 0; + + if (++drop_current_cnt >= drop_instance) { + if (drop_current_cnt >= drop_instance + drop_amount) + drop_opcode = -1; + return 1; + } + + return 0; +} + static void fcloop_fcp_recv_work(struct work_struct *work) { @@ -590,10 +634,14 @@ fcloop_fcp_recv_work(struct work_struct *work) if (unlikely(aborted)) ret = -ECANCELED; - else - ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, + else { + if (likely(!check_for_drop(tfcp_req))) + ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, &tfcp_req->tgt_fcp_req, fcpreq->cmdaddr, fcpreq->cmdlen); + else + pr_info("%s: dropped command ********\n", __func__); + } if (ret) fcloop_call_host_done(fcpreq, tfcp_req, ret); @@ -1449,6 +1497,33 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr, return ret ? ret : count; } +static ssize_t +fcloop_set_cmd_drop(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int opcode, starting, amount; + + if (sscanf(buf, "%x:%d:%d", &opcode, &starting, &amount) != 3) + return -EBADRQC; + + drop_current_cnt = 0; + drop_fabric_opcode = (opcode & ~DROP_OPCODE_MASK) ? true : false; + drop_opcode = (opcode & DROP_OPCODE_MASK); + drop_instance = starting; + /* the check to drop routine uses instance + count to know when + * to end. Thus, if dropping 1 instance, count should be 0. + * so subtract 1 from the count. + */ + drop_amount = amount - 1; + + pr_info("%s: DROP: Starting at instance %d of%s opcode x%x drop +%d " + "instances\n", + __func__, drop_instance, drop_fabric_opcode ? " fabric" : "", + drop_opcode, drop_amount); + + return count; +} + static DEVICE_ATTR(add_local_port, 0200, NULL, fcloop_create_local_port); static DEVICE_ATTR(del_local_port, 0200, NULL, fcloop_delete_local_port); @@ -1456,6 +1531,7 @@ static DEVICE_ATTR(add_remote_port, 0200, NULL, fcloop_create_remote_port); static DEVICE_ATTR(del_remote_port, 0200, NULL, fcloop_delete_remote_port); static DEVICE_ATTR(add_target_port, 0200, NULL, fcloop_create_target_port); static DEVICE_ATTR(del_target_port, 0200, NULL, fcloop_delete_target_port); +static DEVICE_ATTR(set_cmd_drop, 0200, NULL, fcloop_set_cmd_drop); static struct attribute *fcloop_dev_attrs[] = { &dev_attr_add_local_port.attr, @@ -1464,6 +1540,7 @@ static struct attribute *fcloop_dev_attrs[] = { &dev_attr_del_remote_port.attr, &dev_attr_add_target_port.attr, &dev_attr_del_target_port.attr, + &dev_attr_set_cmd_drop.attr, NULL }; diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 07806016c09d..cb6f86572b24 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -355,7 +355,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) NVME_INLINE_SG_CNT * sizeof(struct scatterlist); ctrl->admin_tag_set.driver_data = ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; + ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; ctrl->queues[0].ctrl = ctrl; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 559a15ccc322..592763732065 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -164,7 +164,6 @@ static inline struct nvmet_port *ana_groups_to_port( struct nvmet_ctrl { struct nvmet_subsys *subsys; - struct nvmet_cq **cqs; struct nvmet_sq **sqs; bool cmd_seen; @@ -249,6 +248,8 @@ struct nvmet_subsys { struct nvme_ctrl *passthru_ctrl; char *passthru_ctrl_path; struct config_group passthru_group; + unsigned int admin_timeout; + unsigned int io_timeout; #endif /* CONFIG_NVME_TARGET_PASSTHRU */ }; @@ -330,6 +331,7 @@ struct nvmet_req { struct work_struct work; } f; struct { + struct bio inline_bio; struct request *rq; struct work_struct work; bool use_workqueue; diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 8ee94f056898..b9776fc8f08f 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -188,35 +188,31 @@ static void nvmet_passthru_req_done(struct request *rq, static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) { struct scatterlist *sg; - int op_flags = 0; struct bio *bio; - int i, ret; + int i; if (req->sg_cnt > BIO_MAX_PAGES) return -EINVAL; - if (req->cmd->common.opcode == nvme_cmd_flush) - op_flags = REQ_FUA; - else if (nvme_is_write(req->cmd)) - op_flags = REQ_SYNC | REQ_IDLE; - - bio = bio_alloc(GFP_KERNEL, req->sg_cnt); - bio->bi_end_io = bio_put; - bio->bi_opf = req_op(rq) | op_flags; + if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) { + bio = &req->p.inline_bio; + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + } else { + bio = bio_alloc(GFP_KERNEL, min(req->sg_cnt, BIO_MAX_PAGES)); + bio->bi_end_io = bio_put; + } + bio->bi_opf = req_op(rq); for_each_sg(req->sg, sg, req->sg_cnt, i) { if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length, sg->offset) < sg->length) { - bio_put(bio); + if (bio != &req->p.inline_bio) + bio_put(bio); return -EINVAL; } } - ret = blk_rq_append_bio(rq, &bio); - if (unlikely(ret)) { - bio_put(bio); - return ret; - } + blk_rq_bio_prep(rq, bio, req->sg_cnt); return 0; } @@ -227,6 +223,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) struct request_queue *q = ctrl->admin_q; struct nvme_ns *ns = NULL; struct request *rq = NULL; + unsigned int timeout; u32 effects; u16 status; int ret; @@ -242,14 +239,20 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) } q = ns->queue; + timeout = req->sq->ctrl->subsys->io_timeout; + } else { + timeout = req->sq->ctrl->subsys->admin_timeout; } - rq = nvme_alloc_request(q, req->cmd, 0, NVME_QID_ANY); + rq = nvme_alloc_request(q, req->cmd, 0); if (IS_ERR(rq)) { status = NVME_SC_INTERNAL; goto out_put_ns; } + if (timeout) + rq->timeout = timeout; + if (req->sg_cnt) { ret = nvmet_passthru_map_sg(req, rq); if (unlikely(ret)) { |