From 213cbada7b07bf66409604e0d0dcd66a6a14891a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 30 Apr 2024 15:19:24 +0200 Subject: nvmet: lock config semaphore when accessing DH-HMAC-CHAP key When the DH-HMAC-CHAP key is accessed via configfs we need to take the config semaphore as a reconnect might be running at the same time. Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 2 ++ drivers/nvme/target/configfs.c | 22 +++++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 3ddbc3880cac..9afc28f1ffac 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -44,6 +44,7 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret, dhchap_secret = kstrdup(secret, GFP_KERNEL); if (!dhchap_secret) return -ENOMEM; + down_write(&nvmet_config_sem); if (set_ctrl) { kfree(host->dhchap_ctrl_secret); host->dhchap_ctrl_secret = strim(dhchap_secret); @@ -53,6 +54,7 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret, host->dhchap_secret = strim(dhchap_secret); host->dhchap_key_hash = key_hash; } + up_write(&nvmet_config_sem); return 0; } diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 77a6e817b315..7c28b9c0ee57 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1990,11 +1990,17 @@ static struct config_group nvmet_ports_group; static ssize_t nvmet_host_dhchap_key_show(struct config_item *item, char *page) { - u8 *dhchap_secret = to_host(item)->dhchap_secret; + u8 *dhchap_secret; + ssize_t ret; + down_read(&nvmet_config_sem); + dhchap_secret = to_host(item)->dhchap_secret; if (!dhchap_secret) - return sprintf(page, "\n"); - return sprintf(page, "%s\n", dhchap_secret); + ret = sprintf(page, "\n"); + else + ret = sprintf(page, "%s\n", dhchap_secret); + up_read(&nvmet_config_sem); + return ret; } static ssize_t nvmet_host_dhchap_key_store(struct config_item *item, @@ -2018,10 +2024,16 @@ static ssize_t nvmet_host_dhchap_ctrl_key_show(struct config_item *item, char *page) { u8 *dhchap_secret = to_host(item)->dhchap_ctrl_secret; + ssize_t ret; + down_read(&nvmet_config_sem); + dhchap_secret = to_host(item)->dhchap_ctrl_secret; if (!dhchap_secret) - return sprintf(page, "\n"); - return sprintf(page, "%s\n", dhchap_secret); + ret = sprintf(page, "\n"); + else + ret = sprintf(page, "%s\n", dhchap_secret); + up_read(&nvmet_config_sem); + return ret; } static ssize_t nvmet_host_dhchap_ctrl_key_store(struct config_item *item, -- cgit v1.2.3-58-ga151 From 44e3c25efae8575e06f1c5d1dc40058a991e3cb2 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 30 Apr 2024 15:19:25 +0200 Subject: nvmet: return DHCHAP status codes from nvmet_setup_auth() A failure in nvmet_setup_auth() does not mean that the NVMe authentication command failed, so we should rather return a protocol error with a 'failure1' response than an NVMe status. Also update the type used for dhchap_step and dhchap_status to u8 to avoid confusions with nvme status. Furthermore, split dhchap_status and nvme status so we don't accidentally mix these return values. Reviewed-by: Christoph Hellwig Signed-off-by: Hannes Reinecke [dwagner: - use u8 as type for dhchap_{step|status} - separate nvme status from dhcap_status] Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 20 ++++++-------- drivers/nvme/target/fabrics-cmd-auth.c | 49 +++++++++++++++++----------------- drivers/nvme/target/fabrics-cmd.c | 11 ++++---- drivers/nvme/target/nvmet.h | 8 +++--- 4 files changed, 43 insertions(+), 45 deletions(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 9afc28f1ffac..53bf1a084469 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -126,12 +126,11 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id) return ret; } -int nvmet_setup_auth(struct nvmet_ctrl *ctrl) +u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl) { int ret = 0; struct nvmet_host_link *p; struct nvmet_host *host = NULL; - const char *hash_name; down_read(&nvmet_config_sem); if (nvmet_is_disc_subsys(ctrl->subsys)) @@ -149,13 +148,16 @@ int nvmet_setup_auth(struct nvmet_ctrl *ctrl) } if (!host) { pr_debug("host %s not found\n", ctrl->hostnqn); - ret = -EPERM; + ret = NVME_AUTH_DHCHAP_FAILURE_FAILED; goto out_unlock; } ret = nvmet_setup_dhgroup(ctrl, host->dhchap_dhgroup_id); - if (ret < 0) + if (ret < 0) { pr_warn("Failed to setup DH group"); + ret = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE; + goto out_unlock; + } if (!host->dhchap_secret) { pr_debug("No authentication provided\n"); @@ -166,12 +168,6 @@ int nvmet_setup_auth(struct nvmet_ctrl *ctrl) pr_debug("Re-use existing hash ID %d\n", ctrl->shash_id); } else { - hash_name = nvme_auth_hmac_name(host->dhchap_hash_id); - if (!hash_name) { - pr_warn("Hash ID %d invalid\n", host->dhchap_hash_id); - ret = -EINVAL; - goto out_unlock; - } ctrl->shash_id = host->dhchap_hash_id; } @@ -180,7 +176,7 @@ int nvmet_setup_auth(struct nvmet_ctrl *ctrl) ctrl->host_key = nvme_auth_extract_key(host->dhchap_secret + 10, host->dhchap_key_hash); if (IS_ERR(ctrl->host_key)) { - ret = PTR_ERR(ctrl->host_key); + ret = NVME_AUTH_DHCHAP_FAILURE_NOT_USABLE; ctrl->host_key = NULL; goto out_free_hash; } @@ -198,7 +194,7 @@ int nvmet_setup_auth(struct nvmet_ctrl *ctrl) ctrl->ctrl_key = nvme_auth_extract_key(host->dhchap_ctrl_secret + 10, host->dhchap_ctrl_key_hash); if (IS_ERR(ctrl->ctrl_key)) { - ret = PTR_ERR(ctrl->ctrl_key); + ret = NVME_AUTH_DHCHAP_FAILURE_NOT_USABLE; ctrl->ctrl_key = NULL; goto out_free_hash; } diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index eb7785be0ca7..d61b8c6ff3b2 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -31,7 +31,7 @@ void nvmet_auth_sq_init(struct nvmet_sq *sq) sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE; } -static u16 nvmet_auth_negotiate(struct nvmet_req *req, void *d) +static u8 nvmet_auth_negotiate(struct nvmet_req *req, void *d) { struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmf_auth_dhchap_negotiate_data *data = d; @@ -109,7 +109,7 @@ static u16 nvmet_auth_negotiate(struct nvmet_req *req, void *d) return 0; } -static u16 nvmet_auth_reply(struct nvmet_req *req, void *d) +static u8 nvmet_auth_reply(struct nvmet_req *req, void *d) { struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmf_auth_dhchap_reply_data *data = d; @@ -172,7 +172,7 @@ static u16 nvmet_auth_reply(struct nvmet_req *req, void *d) return 0; } -static u16 nvmet_auth_failure2(void *d) +static u8 nvmet_auth_failure2(void *d) { struct nvmf_auth_dhchap_failure_data *data = d; @@ -186,6 +186,7 @@ void nvmet_execute_auth_send(struct nvmet_req *req) void *d; u32 tl; u16 status = 0; + u8 dhchap_status; if (req->cmd->auth_send.secp != NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER) { status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; @@ -237,30 +238,32 @@ void nvmet_execute_auth_send(struct nvmet_req *req) if (data->auth_type == NVME_AUTH_COMMON_MESSAGES) { if (data->auth_id == NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE) { /* Restart negotiation */ - pr_debug("%s: ctrl %d qid %d reset negotiation\n", __func__, - ctrl->cntlid, req->sq->qid); + pr_debug("%s: ctrl %d qid %d reset negotiation\n", + __func__, ctrl->cntlid, req->sq->qid); if (!req->sq->qid) { - if (nvmet_setup_auth(ctrl) < 0) { - status = NVME_SC_INTERNAL; - pr_err("ctrl %d qid 0 failed to setup" - "re-authentication", + dhchap_status = nvmet_setup_auth(ctrl); + if (dhchap_status) { + pr_err("ctrl %d qid 0 failed to setup re-authentication\n", ctrl->cntlid); - goto done_failure1; + req->sq->dhchap_status = dhchap_status; + req->sq->dhchap_step = + NVME_AUTH_DHCHAP_MESSAGE_FAILURE1; + goto done_kfree; } } - req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE; + req->sq->dhchap_step = + NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE; } else if (data->auth_id != req->sq->dhchap_step) goto done_failure1; /* Validate negotiation parameters */ - status = nvmet_auth_negotiate(req, d); - if (status == 0) + dhchap_status = nvmet_auth_negotiate(req, d); + if (dhchap_status == 0) req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE; else { req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_FAILURE1; - req->sq->dhchap_status = status; - status = 0; + req->sq->dhchap_status = dhchap_status; } goto done_kfree; } @@ -284,15 +287,14 @@ void nvmet_execute_auth_send(struct nvmet_req *req) switch (data->auth_id) { case NVME_AUTH_DHCHAP_MESSAGE_REPLY: - status = nvmet_auth_reply(req, d); - if (status == 0) + dhchap_status = nvmet_auth_reply(req, d); + if (dhchap_status == 0) req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1; else { req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_FAILURE1; - req->sq->dhchap_status = status; - status = 0; + req->sq->dhchap_status = dhchap_status; } goto done_kfree; case NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2: @@ -301,13 +303,12 @@ void nvmet_execute_auth_send(struct nvmet_req *req) __func__, ctrl->cntlid, req->sq->qid); goto done_kfree; case NVME_AUTH_DHCHAP_MESSAGE_FAILURE2: - status = nvmet_auth_failure2(d); - if (status) { + dhchap_status = nvmet_auth_failure2(d); + if (dhchap_status) { pr_warn("ctrl %d qid %d: authentication failed (%d)\n", - ctrl->cntlid, req->sq->qid, status); - req->sq->dhchap_status = status; + ctrl->cntlid, req->sq->qid, dhchap_status); + req->sq->dhchap_status = dhchap_status; req->sq->authenticated = false; - status = 0; } goto done_kfree; default: diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index b23f4cf840bd..042b379cbb36 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -211,7 +211,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmf_connect_data *d; struct nvmet_ctrl *ctrl = NULL; u16 status; - int ret; + u8 dhchap_status; if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) return; @@ -254,11 +254,12 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) uuid_copy(&ctrl->hostid, &d->hostid); - ret = nvmet_setup_auth(ctrl); - if (ret < 0) { - pr_err("Failed to setup authentication, error %d\n", ret); + dhchap_status = nvmet_setup_auth(ctrl); + if (dhchap_status) { + pr_err("Failed to setup authentication, dhchap status %u\n", + dhchap_status); nvmet_ctrl_put(ctrl); - if (ret == -EPERM) + if (dhchap_status == NVME_AUTH_DHCHAP_FAILURE_FAILED) status = (NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR); else status = NVME_SC_INTERNAL; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index f460728e1df1..e4ba54b85511 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -113,8 +113,8 @@ struct nvmet_sq { bool authenticated; struct delayed_work auth_expired_work; u16 dhchap_tid; - u16 dhchap_status; - int dhchap_step; + u8 dhchap_status; + u8 dhchap_step; u8 *dhchap_c1; u8 *dhchap_c2; u32 dhchap_s1; @@ -713,7 +713,7 @@ void nvmet_execute_auth_receive(struct nvmet_req *req); int nvmet_auth_set_key(struct nvmet_host *host, const char *secret, bool set_ctrl); int nvmet_auth_set_host_hash(struct nvmet_host *host, const char *hash); -int nvmet_setup_auth(struct nvmet_ctrl *ctrl); +u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl); void nvmet_auth_sq_init(struct nvmet_sq *sq); void nvmet_destroy_auth(struct nvmet_ctrl *ctrl); void nvmet_auth_sq_free(struct nvmet_sq *sq); @@ -732,7 +732,7 @@ int nvmet_auth_ctrl_exponential(struct nvmet_req *req, int nvmet_auth_ctrl_sesskey(struct nvmet_req *req, u8 *buf, int buf_size); #else -static inline int nvmet_setup_auth(struct nvmet_ctrl *ctrl) +static inline u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl) { return 0; } -- cgit v1.2.3-58-ga151 From 44350336fd9dab7a337dec686978a160cf1abfc5 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 30 Apr 2024 15:19:26 +0200 Subject: nvme: return kernel error codes for admin queue connect nvmf_connect_admin_queue returns NVMe error status codes and kernel error codes. This mixes the different domains which makes maintainability difficult. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 6 +++--- drivers/nvme/host/fabrics.c | 31 +++++++++++++------------------ drivers/nvme/host/nvme.h | 2 +- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c9955ecd1790..a066429b790d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -383,14 +383,14 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) if (likely(nvme_req(req)->status == 0)) return COMPLETE; - if ((nvme_req(req)->status & 0x7ff) == NVME_SC_AUTH_REQUIRED) - return AUTHENTICATE; - if (blk_noretry_request(req) || (nvme_req(req)->status & NVME_SC_DNR) || nvme_req(req)->retries >= nvme_max_retries) return COMPLETE; + if ((nvme_req(req)->status & 0x7ff) == NVME_SC_AUTH_REQUIRED) + return AUTHENTICATE; + if (req->cmd_flags & REQ_NVME_MPATH) { if (nvme_is_path_error(nvme_req(req)->status) || blk_queue_dying(req->q)) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 1f0ea1f32d22..f7eaf9580b4f 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -428,12 +428,6 @@ static void nvmf_connect_cmd_prep(struct nvme_ctrl *ctrl, u16 qid, * fabrics-protocol connection of the NVMe Admin queue between the * host system device and the allocated NVMe controller on the * target system via a NVMe Fabrics "Connect" command. - * - * Return: - * 0: success - * > 0: NVMe error status code - * < 0: Linux errno error code - * */ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) { @@ -467,7 +461,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) if (result & NVME_CONNECT_AUTHREQ_ASCR) { dev_warn(ctrl->device, "qid 0: secure concatenation is not supported\n"); - ret = NVME_SC_AUTH_REQUIRED; + ret = -EOPNOTSUPP; goto out_free_data; } /* Authentication required */ @@ -475,14 +469,14 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) if (ret) { dev_warn(ctrl->device, "qid 0: authentication setup failed\n"); - ret = NVME_SC_AUTH_REQUIRED; goto out_free_data; } ret = nvme_auth_wait(ctrl, 0); - if (ret) + if (ret) { dev_warn(ctrl->device, - "qid 0: authentication failed\n"); - else + "qid 0: authentication failed, error %d\n", + ret); + } else dev_info(ctrl->device, "qid 0: authenticated\n"); } @@ -542,7 +536,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) if (result & NVME_CONNECT_AUTHREQ_ASCR) { dev_warn(ctrl->device, "qid 0: secure concatenation is not supported\n"); - ret = NVME_SC_AUTH_REQUIRED; + ret = -EOPNOTSUPP; goto out_free_data; } /* Authentication required */ @@ -550,12 +544,13 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) if (ret) { dev_warn(ctrl->device, "qid %d: authentication setup failed\n", qid); - ret = NVME_SC_AUTH_REQUIRED; - } else { - ret = nvme_auth_wait(ctrl, qid); - if (ret) - dev_warn(ctrl->device, - "qid %u: authentication failed\n", qid); + goto out_free_data; + } + ret = nvme_auth_wait(ctrl, qid); + if (ret) { + dev_warn(ctrl->device, + "qid %u: authentication failed, error %d\n", + qid, ret); } } out_free_data: diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 24193fcb8bd5..f243a5822c2b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1114,7 +1114,7 @@ static inline int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid) } static inline int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid) { - return NVME_SC_AUTH_REQUIRED; + return -EPROTONOSUPPORT; } static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {}; #endif -- cgit v1.2.3-58-ga151 From adfde7ed0b301ef14c37efe3143a8b26849843f6 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 30 Apr 2024 15:19:27 +0200 Subject: nvme-fabrics: short-circuit reconnect retries Returning a nvme status from nvme_tcp_setup_ctrl() indicates that the association was established and we have received a status from the controller; consequently we should honour the DNR bit. If not any future reconnect attempts will just return the same error, so we can short-circuit the reconnect attempts and fail the connection directly. Signed-off-by: Hannes Reinecke [dwagner: - extended nvme_should_reconnect] Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/fabrics.c | 14 +++++++++++++- drivers/nvme/host/fabrics.h | 2 +- drivers/nvme/host/fc.c | 4 +--- drivers/nvme/host/rdma.c | 19 ++++++++++++------- drivers/nvme/host/tcp.c | 22 ++++++++++++++-------- 5 files changed, 41 insertions(+), 20 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index f7eaf9580b4f..36d3e2ff27f3 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -559,8 +559,20 @@ out_free_data: } EXPORT_SYMBOL_GPL(nvmf_connect_io_queue); -bool nvmf_should_reconnect(struct nvme_ctrl *ctrl) +/* + * Evaluate the status information returned by the transport in order to decided + * if a reconnect attempt should be scheduled. + * + * Do not retry when: + * + * - the DNR bit is set and the specification states no further connect + * attempts with the same set of paramenters should be attempted. + */ +bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status) { + if (status > 0 && (status & NVME_SC_DNR)) + return false; + if (ctrl->opts->max_reconnects == -1 || ctrl->nr_reconnects < ctrl->opts->max_reconnects) return true; diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 37c974c38dcb..602135910ae9 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -223,7 +223,7 @@ int nvmf_register_transport(struct nvmf_transport_ops *ops); void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); -bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); +bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status); bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, struct nvmf_ctrl_options *opts); void nvmf_set_io_queues(struct nvmf_ctrl_options *opts, u32 nr_io_queues, diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 68a5d971657b..b330a6a7b63a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3310,12 +3310,10 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) dev_info(ctrl->ctrl.device, "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", ctrl->cnum, status); - if (status > 0 && (status & NVME_SC_DNR)) - recon = false; } else if (time_after_eq(jiffies, rport->dev_loss_end)) recon = false; - if (recon && nvmf_should_reconnect(&ctrl->ctrl)) { + if (recon && nvmf_should_reconnect(&ctrl->ctrl, status)) { if (portptr->port_state == FC_OBJSTATE_ONLINE) dev_info(ctrl->ctrl.device, "NVME-FC{%d}: Reconnect attempt in %ld " diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 366f0bb4ebfc..821ab3e0fd3b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -982,7 +982,8 @@ free_ctrl: kfree(ctrl); } -static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) +static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl, + int status) { enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); @@ -992,7 +993,7 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) return; } - if (nvmf_should_reconnect(&ctrl->ctrl)) { + if (nvmf_should_reconnect(&ctrl->ctrl, status)) { dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n", ctrl->ctrl.opts->reconnect_delay); queue_delayed_work(nvme_wq, &ctrl->reconnect_work, @@ -1104,10 +1105,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), struct nvme_rdma_ctrl, reconnect_work); + int ret; ++ctrl->ctrl.nr_reconnects; - if (nvme_rdma_setup_ctrl(ctrl, false)) + ret = nvme_rdma_setup_ctrl(ctrl, false); + if (ret) goto requeue; dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n", @@ -1120,7 +1123,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) requeue: dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", ctrl->ctrl.nr_reconnects); - nvme_rdma_reconnect_or_remove(ctrl); + nvme_rdma_reconnect_or_remove(ctrl, ret); } static void nvme_rdma_error_recovery_work(struct work_struct *work) @@ -1145,7 +1148,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) return; } - nvme_rdma_reconnect_or_remove(ctrl); + nvme_rdma_reconnect_or_remove(ctrl, 0); } static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) @@ -2169,6 +2172,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work); + int ret; nvme_stop_ctrl(&ctrl->ctrl); nvme_rdma_shutdown_ctrl(ctrl, false); @@ -2179,14 +2183,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) return; } - if (nvme_rdma_setup_ctrl(ctrl, false)) + ret = nvme_rdma_setup_ctrl(ctrl, false); + if (ret) goto out_fail; return; out_fail: ++ctrl->ctrl.nr_reconnects; - nvme_rdma_reconnect_or_remove(ctrl); + nvme_rdma_reconnect_or_remove(ctrl, ret); } static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index fdbcdcedcee9..3e0c33323320 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2155,7 +2155,8 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, nvme_tcp_destroy_io_queues(ctrl, remove); } -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, + int status) { enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); @@ -2165,13 +2166,14 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) return; } - if (nvmf_should_reconnect(ctrl)) { + if (nvmf_should_reconnect(ctrl, status)) { dev_info(ctrl->device, "Reconnecting in %d seconds...\n", ctrl->opts->reconnect_delay); queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work, ctrl->opts->reconnect_delay * HZ); } else { - dev_info(ctrl->device, "Removing controller...\n"); + dev_info(ctrl->device, "Removing controller (%d)...\n", + status); nvme_delete_ctrl(ctrl); } } @@ -2252,10 +2254,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work), struct nvme_tcp_ctrl, connect_work); struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; + int ret; ++ctrl->nr_reconnects; - if (nvme_tcp_setup_ctrl(ctrl, false)) + ret = nvme_tcp_setup_ctrl(ctrl, false); + if (ret) goto requeue; dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n", @@ -2268,7 +2272,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) requeue: dev_info(ctrl->device, "Failed reconnect attempt %d\n", ctrl->nr_reconnects); - nvme_tcp_reconnect_or_remove(ctrl); + nvme_tcp_reconnect_or_remove(ctrl, ret); } static void nvme_tcp_error_recovery_work(struct work_struct *work) @@ -2295,7 +2299,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) return; } - nvme_tcp_reconnect_or_remove(ctrl); + nvme_tcp_reconnect_or_remove(ctrl, 0); } static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) @@ -2315,6 +2319,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, reset_work); + int ret; nvme_stop_ctrl(ctrl); nvme_tcp_teardown_ctrl(ctrl, false); @@ -2328,14 +2333,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work) return; } - if (nvme_tcp_setup_ctrl(ctrl, false)) + ret = nvme_tcp_setup_ctrl(ctrl, false); + if (ret) goto out_fail; return; out_fail: ++ctrl->nr_reconnects; - nvme_tcp_reconnect_or_remove(ctrl); + nvme_tcp_reconnect_or_remove(ctrl, ret); } static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl) -- cgit v1.2.3-58-ga151 From 0e34bd9605f6c95609c32aa82f0a394e2a945908 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 30 Apr 2024 15:19:28 +0200 Subject: nvme: do not retry authentication failures When the key is invalid there is no point in retrying. Because the auth code returns kernel error codes only, we can't test on the DNR bit. Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 6 +++--- drivers/nvme/host/fabrics.c | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index a264b3ae078b..371e14f0a203 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -730,7 +730,7 @@ static void nvme_queue_auth_work(struct work_struct *work) NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE); if (ret) { chap->status = ret; - chap->error = -ECONNREFUSED; + chap->error = -EKEYREJECTED; return; } @@ -797,7 +797,7 @@ static void nvme_queue_auth_work(struct work_struct *work) NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1); if (ret) { chap->status = ret; - chap->error = -ECONNREFUSED; + chap->error = -EKEYREJECTED; return; } @@ -818,7 +818,7 @@ static void nvme_queue_auth_work(struct work_struct *work) ret = nvme_auth_process_dhchap_success1(ctrl, chap); if (ret) { /* Controller authentication failed */ - chap->error = -ECONNREFUSED; + chap->error = -EKEYREJECTED; goto fail2; } diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 36d3e2ff27f3..c6ad2148c2e0 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -567,12 +567,18 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue); * * - the DNR bit is set and the specification states no further connect * attempts with the same set of paramenters should be attempted. + * + * - when the authentication attempt fails, because the key was invalid. + * This error code is set on the host side. */ bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status) { if (status > 0 && (status & NVME_SC_DNR)) return false; + if (status == -EKEYREJECTED) + return false; + if (ctrl->opts->max_reconnects == -1 || ctrl->nr_reconnects < ctrl->opts->max_reconnects) return true; -- cgit v1.2.3-58-ga151 From c51a22e63ffde3033f74865a6e7b7d6e27cd6ab4 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 7 May 2024 09:54:44 +0300 Subject: nvmet-rdma: Avoid o(n^2) loop in delete_ctrl When deleting a nvmet-rdma ctrl, we essentially loop over all queues that belong to the controller and schedule a removal of each. Instead of restarting the loop every time a queue is found, do a simple safe list traversal. This addresses an unneeded time spent scheduling queue removal in cases there a lot of queues. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/rdma.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 5b8c63e74639..955296ac377f 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1814,18 +1814,14 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl) { - struct nvmet_rdma_queue *queue; + struct nvmet_rdma_queue *queue, *n; -restart: mutex_lock(&nvmet_rdma_queue_mutex); - list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) { - if (queue->nvme_sq.ctrl == ctrl) { - list_del_init(&queue->queue_list); - mutex_unlock(&nvmet_rdma_queue_mutex); - - __nvmet_rdma_queue_disconnect(queue); - goto restart; - } + list_for_each_entry_safe(queue, n, &nvmet_rdma_queue_list, queue_list) { + if (queue->nvme_sq.ctrl != ctrl) + continue; + list_del_init(&queue->queue_list); + __nvmet_rdma_queue_disconnect(queue); } mutex_unlock(&nvmet_rdma_queue_mutex); } -- cgit v1.2.3-58-ga151 From 54a76c8732b265aa86030134d4af6a5a3c59fe52 Mon Sep 17 00:00:00 2001 From: Tokunori Ikegami Date: Mon, 6 May 2024 00:24:59 +0900 Subject: nvme-rdma, nvme-tcp: include max reconnects for reconnect logging Makes clear max reconnects translated by ctrl loss tmo and reconnect delay. Signed-off-by: Tokunori Ikegami Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 4 ++-- drivers/nvme/host/tcp.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 821ab3e0fd3b..51a62b0c645a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1121,8 +1121,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) return; requeue: - dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", - ctrl->ctrl.nr_reconnects); + dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d/%d\n", + ctrl->ctrl.nr_reconnects, ctrl->ctrl.opts->max_reconnects); nvme_rdma_reconnect_or_remove(ctrl, ret); } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 3e0c33323320..9efeecc560b7 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2262,16 +2262,16 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) if (ret) goto requeue; - dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n", - ctrl->nr_reconnects); + dev_info(ctrl->device, "Successfully reconnected (attempt %d/%d)\n", + ctrl->nr_reconnects, ctrl->opts->max_reconnects); ctrl->nr_reconnects = 0; return; requeue: - dev_info(ctrl->device, "Failed reconnect attempt %d\n", - ctrl->nr_reconnects); + dev_info(ctrl->device, "Failed reconnect attempt %d/%d\n", + ctrl->nr_reconnects, ctrl->opts->max_reconnects); nvme_tcp_reconnect_or_remove(ctrl, ret); } -- cgit v1.2.3-58-ga151