From 61263b3a11a2594b4e898f166c31162236182b5c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 11 Jan 2022 09:24:41 +0800 Subject: scsi: elx: efct: Don't use GFP_KERNEL under spin lock GFP_KERNEL/GFP_DMA can't be used under a spin lock. According the comment, els_ios_lock is used to protect els ios list so we can move down the spin lock to avoid using this flag under the lock. Link: https://lore.kernel.org/r/20220111012441.3232527-1-yangyingliang@huawei.com Fixes: 8f406ef72859 ("scsi: elx: libefc: Extended link Service I/O handling") Reported-by: Hulk Robot Reviewed-by: James Smart Signed-off-by: Yang Yingliang Signed-off-by: Martin K. Petersen --- drivers/scsi/elx/libefc/efc_els.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/elx/libefc/efc_els.c b/drivers/scsi/elx/libefc/efc_els.c index 7bb4f9aad2c8..84bc81d7ce76 100644 --- a/drivers/scsi/elx/libefc/efc_els.c +++ b/drivers/scsi/elx/libefc/efc_els.c @@ -46,18 +46,14 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen) efc = node->efc; - spin_lock_irqsave(&node->els_ios_lock, flags); - if (!node->els_io_enabled) { efc_log_err(efc, "els io alloc disabled\n"); - spin_unlock_irqrestore(&node->els_ios_lock, flags); return NULL; } els = mempool_alloc(efc->els_io_pool, GFP_ATOMIC); if (!els) { atomic_add_return(1, &efc->els_io_alloc_failed_count); - spin_unlock_irqrestore(&node->els_ios_lock, flags); return NULL; } @@ -74,7 +70,6 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen) &els->io.req.phys, GFP_KERNEL); if (!els->io.req.virt) { mempool_free(els, efc->els_io_pool); - spin_unlock_irqrestore(&node->els_ios_lock, flags); return NULL; } @@ -94,10 +89,11 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen) /* add els structure to ELS IO list */ INIT_LIST_HEAD(&els->list_entry); + spin_lock_irqsave(&node->els_ios_lock, flags); list_add_tail(&els->list_entry, &node->els_ios_list); + spin_unlock_irqrestore(&node->els_ios_lock, flags); } - spin_unlock_irqrestore(&node->els_ios_lock, flags); return els; } -- cgit v1.2.3-58-ga151 From a861790afaa8b6369eee8a88c5d5d73f5799c0c6 Mon Sep 17 00:00:00 2001 From: ZouMingzhe Date: Tue, 11 Jan 2022 13:47:42 +0800 Subject: scsi: target: iscsi: Make sure the np under each tpg is unique iscsit_tpg_check_network_portal() has nested for_each loops and is supposed to return true when a match is found. However, the tpg loop will still continue after existing the tpg_np loop. If this tpg_np is not the last the match value will be changed. Break the outer loop after finding a match and make sure the np under each tpg is unique. Link: https://lore.kernel.org/r/20220111054742.19582-1-mingzhe.zou@easystack.cn Signed-off-by: ZouMingzhe Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target_tpg.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 8075f60fd02c..2d5cf1714ae0 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -443,6 +443,9 @@ static bool iscsit_tpg_check_network_portal( break; } spin_unlock(&tpg->tpg_np_lock); + + if (match) + break; } spin_unlock(&tiqn->tiqn_tpg_lock); -- cgit v1.2.3-58-ga151 From a65b32748f4566f986ba2495a8236c141fa42a26 Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Sun, 16 Jan 2022 11:06:49 +0800 Subject: scsi: ufs: ufshcd-pltfrm: Check the return value of devm_kstrdup() devm_kstrdup() returns pointer to allocated string on success, NULL on failure. So it is better to check the return value of it. Link: https://lore.kernel.org/r/tencent_4257E15D4A94FF9020DDCC4BB9B21C041408@qq.com Reviewed-by: Bean Huo Signed-off-by: Xiaoke Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pltfrm.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c index 8b16bbbcb806..87975d1a21c8 100644 --- a/drivers/scsi/ufs/ufshcd-pltfrm.c +++ b/drivers/scsi/ufs/ufshcd-pltfrm.c @@ -92,6 +92,11 @@ static int ufshcd_parse_clock_info(struct ufs_hba *hba) clki->min_freq = clkfreq[i]; clki->max_freq = clkfreq[i+1]; clki->name = devm_kstrdup(dev, name, GFP_KERNEL); + if (!clki->name) { + ret = -ENOMEM; + goto out; + } + if (!strcmp(name, "ref_clk")) clki->keep_link_active = true; dev_dbg(dev, "%s: min %u max %u name %s\n", "freq-table-hz", @@ -127,6 +132,8 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name, return -ENOMEM; vreg->name = devm_kstrdup(dev, name, GFP_KERNEL); + if (!vreg->name) + return -ENOMEM; snprintf(prop_name, MAX_PROP_SIZE, "%s-max-microamp", name); if (of_property_read_u32(np, prop_name, &vreg->max_uA)) { -- cgit v1.2.3-58-ga151 From b70a99fd13282d7885f69bf1372e28b7506a1613 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Mon, 17 Jan 2022 05:53:09 -0800 Subject: scsi: qedf: Add stag_work to all the vports Call trace seen when creating NPIV ports, only 32 out of 64 show online. stag work was not initialized for vport, hence initialize the stag work. WARNING: CPU: 8 PID: 645 at kernel/workqueue.c:1635 __queue_delayed_work+0x68/0x80 CPU: 8 PID: 645 Comm: kworker/8:1 Kdump: loaded Tainted: G IOE --------- -- 4.18.0-348.el8.x86_64 #1 Hardware name: Dell Inc. PowerEdge MX740c/0177V9, BIOS 2.12.2 07/09/2021 Workqueue: events fc_lport_timeout [libfc] RIP: 0010:__queue_delayed_work+0x68/0x80 Code: 89 b2 88 00 00 00 44 89 82 90 00 00 00 48 01 c8 48 89 42 50 41 81 f8 00 20 00 00 75 1d e9 60 24 07 00 44 89 c7 e9 98 f6 ff ff <0f> 0b eb c5 0f 0b eb a1 0f 0b eb a7 0f 0b eb ac 44 89 c6 e9 40 23 RSP: 0018:ffffae514bc3be40 EFLAGS: 00010006 RAX: ffff8d25d6143750 RBX: 0000000000000202 RCX: 0000000000000002 RDX: ffff8d2e31383748 RSI: ffff8d25c000d600 RDI: ffff8d2e31383788 RBP: ffff8d2e31380de0 R08: 0000000000002000 R09: ffff8d2e31383750 R10: ffffffffc0c957e0 R11: ffff8d2624800000 R12: ffff8d2e31380a58 R13: ffff8d2d915eb000 R14: ffff8d25c499b5c0 R15: ffff8d2e31380e18 FS: 0000000000000000(0000) GS:ffff8d2d1fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055fd0484b8b8 CR3: 00000008ffc10006 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: queue_delayed_work_on+0x36/0x40 qedf_elsct_send+0x57/0x60 [qedf] fc_lport_enter_flogi+0x90/0xc0 [libfc] fc_lport_timeout+0xb7/0x140 [libfc] process_one_work+0x1a7/0x360 ? create_worker+0x1a0/0x1a0 worker_thread+0x30/0x390 ? create_worker+0x1a0/0x1a0 kthread+0x116/0x130 ? kthread_flush_work_fn+0x10/0x10 ret_from_fork+0x35/0x40 ---[ end trace 008f00f722f2c2ff ]-- Initialize stag work for all the vports. Link: https://lore.kernel.org/r/20220117135311.6256-2-njavali@marvell.com Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index cdc66e2a9488..3a5ce540cfc4 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -1864,6 +1864,7 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled) vport_qedf->cmd_mgr = base_qedf->cmd_mgr; init_completion(&vport_qedf->flogi_compl); INIT_LIST_HEAD(&vport_qedf->fcports); + INIT_DELAYED_WORK(&vport_qedf->stag_work, qedf_stag_change_work); rc = qedf_vport_libfc_config(vport, vn_port); if (rc) { -- cgit v1.2.3-58-ga151 From 5239ab63f17cee643bd4bf6addfedebaa7d4f41e Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Mon, 17 Jan 2022 05:53:10 -0800 Subject: scsi: qedf: Fix refcount issue when LOGO is received during TMF Hung task call trace was seen during LOGO processing. [ 974.309060] [0000:00:00.0]:[qedf_eh_device_reset:868]: 1:0:2:0: LUN RESET Issued... [ 974.309065] [0000:00:00.0]:[qedf_initiate_tmf:2422]: tm_flags 0x10 sc_cmd 00000000c16b930f op = 0x2a target_id = 0x2 lun=0 [ 974.309178] [0000:00:00.0]:[qedf_initiate_tmf:2431]: portid=016900 tm_flags =LUN RESET [ 974.309222] [0000:00:00.0]:[qedf_initiate_tmf:2438]: orig io_req = 00000000ec78df8f xid = 0x180 ref_cnt = 1. [ 974.309625] host1: rport 016900: Received LOGO request while in state Ready [ 974.309627] host1: rport 016900: Delete port [ 974.309642] host1: rport 016900: work event 3 [ 974.309644] host1: rport 016900: lld callback ev 3 [ 974.313243] [0000:61:00.2]:[qedf_execute_tmf:2383]:1: fcport is uploading, not executing flush. [ 974.313295] [0000:61:00.2]:[qedf_execute_tmf:2400]:1: task mgmt command success... [ 984.031088] INFO: task jbd2/dm-15-8:7645 blocked for more than 120 seconds. [ 984.031136] Not tainted 4.18.0-305.el8.x86_64 #1 [ 984.031166] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 984.031209] jbd2/dm-15-8 D 0 7645 2 0x80004080 [ 984.031212] Call Trace: [ 984.031222] __schedule+0x2c4/0x700 [ 984.031230] ? unfreeze_partials.isra.83+0x16e/0x1a0 [ 984.031233] ? bit_wait_timeout+0x90/0x90 [ 984.031235] schedule+0x38/0xa0 [ 984.031238] io_schedule+0x12/0x40 [ 984.031240] bit_wait_io+0xd/0x50 [ 984.031243] __wait_on_bit+0x6c/0x80 [ 984.031248] ? free_buffer_head+0x21/0x50 [ 984.031251] out_of_line_wait_on_bit+0x91/0xb0 [ 984.031257] ? init_wait_var_entry+0x50/0x50 [ 984.031268] jbd2_journal_commit_transaction+0x112e/0x19f0 [jbd2] [ 984.031280] kjournald2+0xbd/0x270 [jbd2] [ 984.031284] ? finish_wait+0x80/0x80 [ 984.031291] ? commit_timeout+0x10/0x10 [jbd2] [ 984.031294] kthread+0x116/0x130 [ 984.031300] ? kthread_flush_work_fn+0x10/0x10 [ 984.031305] ret_from_fork+0x1f/0x40 There was a ref count issue when LOGO is received during TMF. This leads to one of the I/Os hanging with the driver. Fix the ref count. Link: https://lore.kernel.org/r/20220117135311.6256-3-njavali@marvell.com Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 99a56ca1fb16..fab43dabe5b3 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -2250,6 +2250,7 @@ process_els: io_req->tm_flags == FCP_TMF_TGT_RESET) { clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags); io_req->sc_cmd = NULL; + kref_put(&io_req->refcount, qedf_release_cmd); complete(&io_req->tm_done); } -- cgit v1.2.3-58-ga151 From 64fd4af6274eb0f49d29772c228fffcf6bde1635 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Mon, 17 Jan 2022 05:53:11 -0800 Subject: scsi: qedf: Change context reset messages to ratelimited If FCoE is not configured, libfc/libfcoe keeps on retrying FLOGI and after 3 retries driver does a context reset and tries fipvlan again. This leads to context reset message flooding the logs. Hence ratelimit the message to prevent flooding the logs. Link: https://lore.kernel.org/r/20220117135311.6256-4-njavali@marvell.com Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 3a5ce540cfc4..6ad28bc8e948 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -911,7 +911,7 @@ void qedf_ctx_soft_reset(struct fc_lport *lport) struct qed_link_output if_link; if (lport->vport) { - QEDF_ERR(NULL, "Cannot issue host reset on NPIV port.\n"); + printk_ratelimited("Cannot issue host reset on NPIV port.\n"); return; } @@ -3981,7 +3981,9 @@ void qedf_stag_change_work(struct work_struct *work) struct qedf_ctx *qedf = container_of(work, struct qedf_ctx, stag_work.work); - QEDF_ERR(&qedf->dbg_ctx, "Performing software context reset.\n"); + printk_ratelimited("[%s]:[%s:%d]:%d: Performing software context reset.", + dev_name(&qedf->pdev->dev), __func__, __LINE__, + qedf->dbg_ctx.host_no); qedf_ctx_soft_reset(qedf->lport); } -- cgit v1.2.3-58-ga151 From 62afb379a0fee7e9c2f9f68e1abeb85ceddf51b9 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 18 Jan 2022 20:15:05 +0800 Subject: scsi: pm8001: Fix bogus FW crash for maxcpus=1 According to the comment in check_fw_ready() we should not check the IOP1_READY field in register SCRATCH_PAD_1 for 8008 or 8009 controllers. However we check this very field in process_oq() for processing the highest index interrupt vector. The highest interrupt vector is checked as the FW is programmed to signal fatal errors through this irq. Change that function to not check IOP1_READY for those mentioned controllers, but do check ILA_READY in both cases. The reason I assume that this was not hit earlier was because we always allocated 64 MSI(X), and just did not pass the vector index check in process_oq(), i.e. the handler never ran for vector index 63. Link: https://lore.kernel.org/r/1642508105-95432-1-git-send-email-john.garry@huawei.com Tested-by: Damien Le Moal Reviewed-by: Damien Le Moal Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm80xx_hwi.c | 16 ++++++++++++++-- drivers/scsi/pm8001/pm80xx_hwi.h | 6 +++++- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index bbf538fe15b3..2530d1365556 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -4151,10 +4151,22 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec) u32 ret = MPI_IO_STATUS_FAIL; u32 regval; + /* + * Fatal errors are programmed to be signalled in irq vector + * pm8001_ha->max_q_num - 1 through pm8001_ha->main_cfg_tbl.pm80xx_tbl. + * fatal_err_interrupt + */ if (vec == (pm8001_ha->max_q_num - 1)) { + u32 mipsall_ready; + + if (pm8001_ha->chip_id == chip_8008 || + pm8001_ha->chip_id == chip_8009) + mipsall_ready = SCRATCH_PAD_MIPSALL_READY_8PORT; + else + mipsall_ready = SCRATCH_PAD_MIPSALL_READY_16PORT; + regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1); - if ((regval & SCRATCH_PAD_MIPSALL_READY) != - SCRATCH_PAD_MIPSALL_READY) { + if ((regval & mipsall_ready) != mipsall_ready) { pm8001_ha->controller_fatal_error = true; pm8001_dbg(pm8001_ha, FAIL, "Firmware Fatal error! Regval:0x%x\n", diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h index c7e5d93bea92..c41ed039c92a 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.h +++ b/drivers/scsi/pm8001/pm80xx_hwi.h @@ -1405,8 +1405,12 @@ typedef struct SASProtocolTimerConfig SASProtocolTimerConfig_t; #define SCRATCH_PAD_BOOT_LOAD_SUCCESS 0x0 #define SCRATCH_PAD_IOP0_READY 0xC00 #define SCRATCH_PAD_IOP1_READY 0x3000 -#define SCRATCH_PAD_MIPSALL_READY (SCRATCH_PAD_IOP1_READY | \ +#define SCRATCH_PAD_MIPSALL_READY_16PORT (SCRATCH_PAD_IOP1_READY | \ SCRATCH_PAD_IOP0_READY | \ + SCRATCH_PAD_ILA_READY | \ + SCRATCH_PAD_RAAE_READY) +#define SCRATCH_PAD_MIPSALL_READY_8PORT (SCRATCH_PAD_IOP0_READY | \ + SCRATCH_PAD_ILA_READY | \ SCRATCH_PAD_RAAE_READY) /* boot loader state */ -- cgit v1.2.3-58-ga151 From 8c9db6679be4348b8aae108e11d4be2f83976e30 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 18 Jan 2022 17:58:03 +0100 Subject: scsi: zfcp: Fix failed recovery on gone remote port with non-NPIV FCP devices Suppose we have an environment with a number of non-NPIV FCP devices (virtual HBAs / FCP devices / zfcp "adapter"s) sharing the same physical FCP channel (HBA port) and its I_T nexus. Plus a number of storage target ports zoned to such shared channel. Now one target port logs out of the fabric causing an RSCN. Zfcp reacts with an ADISC ELS and subsequent port recovery depending on the ADISC result. This happens on all such FCP devices (in different Linux images) concurrently as they all receive a copy of this RSCN. In the following we look at one of those FCP devices. Requests other than FSF_QTCB_FCP_CMND can be slow until they get a response. Depending on which requests are affected by slow responses, there are different recovery outcomes. Here we want to fix failed recoveries on port or adapter level by avoiding recovery requests that can be slow. We need the cached N_Port_ID for the remote port "link" test with ADISC. Just before sending the ADISC, we now intentionally forget the old cached N_Port_ID. The idea is that on receiving an RSCN for a port, we have to assume that any cached information about this port is stale. This forces a fresh new GID_PN [FC-GS] nameserver lookup on any subsequent recovery for the same port. Since we typically can still communicate with the nameserver efficiently, we now reach steady state quicker: Either the nameserver still does not know about the port so we stop recovery, or the nameserver already knows the port potentially with a new N_Port_ID and we can successfully and quickly perform open port recovery. For the one case, where ADISC returns successfully, we re-initialize port->d_id because that case does not involve any port recovery. This also solves a problem if the storage WWPN quickly logs into the fabric again but with a different N_Port_ID. Such as on virtual WWPN takeover during target NPIV failover. [https://www.redbooks.ibm.com/abstracts/redp5477.html] In that case the RSCN from the storage FDISC was ignored by zfcp and we could not successfully recover the failover. On some later failback on the storage, we could have been lucky if the virtual WWPN got the same old N_Port_ID from the SAN switch as we still had cached. Then the related RSCN triggered a successful port reopen recovery. However, there is no guarantee to get the same N_Port_ID on NPIV FDISC. Even though NPIV-enabled FCP devices are not affected by this problem, this code change optimizes recovery time for gone remote ports as a side effect. The timely drop of cached N_Port_IDs prevents unnecessary slow open port attempts. While the problem might have been in code before v2.6.32 commit 799b76d09aee ("[SCSI] zfcp: Decouple gid_pn requests from erp") this fix depends on the gid_pn_work introduced with that commit, so we mark it as culprit to satisfy fix dependencies. Note: Point-to-point remote port is already handled separately and gets its N_Port_ID from the cached peer_d_id. So resetting port->d_id in general does not affect PtP. Link: https://lore.kernel.org/r/20220118165803.3667947-1-maier@linux.ibm.com Fixes: 799b76d09aee ("[SCSI] zfcp: Decouple gid_pn requests from erp") Cc: #2.6.32+ Suggested-by: Benjamin Block Reviewed-by: Benjamin Block Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_fc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index d24cafe02708..511bf8e0a436 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -521,6 +521,8 @@ static void zfcp_fc_adisc_handler(void *data) goto out; } + /* re-init to undo drop from zfcp_fc_adisc() */ + port->d_id = ntoh24(adisc_resp->adisc_port_id); /* port is good, unblock rport without going through erp */ zfcp_scsi_schedule_rport_register(port); out: @@ -534,6 +536,7 @@ static int zfcp_fc_adisc(struct zfcp_port *port) struct zfcp_fc_req *fc_req; struct zfcp_adapter *adapter = port->adapter; struct Scsi_Host *shost = adapter->scsi_host; + u32 d_id; int ret; fc_req = kmem_cache_zalloc(zfcp_fc_req_cache, GFP_ATOMIC); @@ -558,7 +561,15 @@ static int zfcp_fc_adisc(struct zfcp_port *port) fc_req->u.adisc.req.adisc_cmd = ELS_ADISC; hton24(fc_req->u.adisc.req.adisc_port_id, fc_host_port_id(shost)); - ret = zfcp_fsf_send_els(adapter, port->d_id, &fc_req->ct_els, + d_id = port->d_id; /* remember as destination for send els below */ + /* + * Force fresh GID_PN lookup on next port recovery. + * Must happen after request setup and before sending request, + * to prevent race with port->d_id re-init in zfcp_fc_adisc_handler(). + */ + port->d_id = 0; + + ret = zfcp_fsf_send_els(adapter, d_id, &fc_req->ct_els, ZFCP_FC_CTELS_TMO); if (ret) kmem_cache_free(zfcp_fc_req_cache, fc_req); -- cgit v1.2.3-58-ga151 From 847f9ea4c5186fdb7b84297e3eeed9e340e83fce Mon Sep 17 00:00:00 2001 From: John Meneghini Date: Fri, 14 Jan 2022 23:00:44 -0500 Subject: scsi: bnx2fc: Flush destroy_work queue before calling bnx2fc_interface_put() The bnx2fc_destroy() functions are removing the interface before calling destroy_work. This results multiple WARNings from sysfs_remove_group() as the controller rport device attributes are removed too early. Replace the fcoe_port's destroy_work queue. It's not needed. The problem is easily reproducible with the following steps. Example: $ dmesg -w & $ systemctl enable --now fcoe $ fipvlan -s -c ens2f1 $ fcoeadm -d ens2f1.802 [ 583.464488] host2: libfc: Link down on port (7500a1) [ 583.472651] bnx2fc: 7500a1 - rport not created Yet!! [ 583.490468] ------------[ cut here ]------------ [ 583.538725] sysfs group 'power' not found for kobject 'rport-2:0-0' [ 583.568814] WARNING: CPU: 3 PID: 192 at fs/sysfs/group.c:279 sysfs_remove_group+0x6f/0x80 [ 583.607130] Modules linked in: dm_service_time 8021q garp mrp stp llc bnx2fc cnic uio rpcsec_gss_krb5 auth_rpcgss nfsv4 ... [ 583.942994] CPU: 3 PID: 192 Comm: kworker/3:2 Kdump: loaded Not tainted 5.14.0-39.el9.x86_64 #1 [ 583.984105] Hardware name: HP ProLiant DL120 G7, BIOS J01 07/01/2013 [ 584.016535] Workqueue: fc_wq_2 fc_rport_final_delete [scsi_transport_fc] [ 584.050691] RIP: 0010:sysfs_remove_group+0x6f/0x80 [ 584.074725] Code: ff 5b 48 89 ef 5d 41 5c e9 ee c0 ff ff 48 89 ef e8 f6 b8 ff ff eb d1 49 8b 14 24 48 8b 33 48 c7 c7 ... [ 584.162586] RSP: 0018:ffffb567c15afdc0 EFLAGS: 00010282 [ 584.188225] RAX: 0000000000000000 RBX: ffffffff8eec4220 RCX: 0000000000000000 [ 584.221053] RDX: ffff8c1586ce84c0 RSI: ffff8c1586cd7cc0 RDI: ffff8c1586cd7cc0 [ 584.255089] RBP: 0000000000000000 R08: 0000000000000000 R09: ffffb567c15afc00 [ 584.287954] R10: ffffb567c15afbf8 R11: ffffffff8fbe7f28 R12: ffff8c1486326400 [ 584.322356] R13: ffff8c1486326480 R14: ffff8c1483a4a000 R15: 0000000000000004 [ 584.355379] FS: 0000000000000000(0000) GS:ffff8c1586cc0000(0000) knlGS:0000000000000000 [ 584.394419] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 584.421123] CR2: 00007fe95a6f7840 CR3: 0000000107674002 CR4: 00000000000606e0 [ 584.454888] Call Trace: [ 584.466108] device_del+0xb2/0x3e0 [ 584.481701] device_unregister+0x13/0x60 [ 584.501306] bsg_unregister_queue+0x5b/0x80 [ 584.522029] bsg_remove_queue+0x1c/0x40 [ 584.541884] fc_rport_final_delete+0xf3/0x1d0 [scsi_transport_fc] [ 584.573823] process_one_work+0x1e3/0x3b0 [ 584.592396] worker_thread+0x50/0x3b0 [ 584.609256] ? rescuer_thread+0x370/0x370 [ 584.628877] kthread+0x149/0x170 [ 584.643673] ? set_kthread_struct+0x40/0x40 [ 584.662909] ret_from_fork+0x22/0x30 [ 584.680002] ---[ end trace 53575ecefa942ece ]--- Link: https://lore.kernel.org/r/20220115040044.1013475-1-jmeneghi@redhat.com Fixes: 0cbf32e1681d ("[SCSI] bnx2fc: Avoid calling bnx2fc_if_destroy with unnecessary locks") Tested-by: Guangwu Zhang Co-developed-by: Maurizio Lombardi Signed-off-by: Maurizio Lombardi Signed-off-by: John Meneghini Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 71fa62bd3083..9be273c320e2 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -82,7 +82,7 @@ static int bnx2fc_bind_pcidev(struct bnx2fc_hba *hba); static void bnx2fc_unbind_pcidev(struct bnx2fc_hba *hba); static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, struct device *parent, int npiv); -static void bnx2fc_destroy_work(struct work_struct *work); +static void bnx2fc_port_destroy(struct fcoe_port *port); static struct bnx2fc_hba *bnx2fc_hba_lookup(struct net_device *phys_dev); static struct bnx2fc_interface *bnx2fc_interface_lookup(struct net_device @@ -907,9 +907,6 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event, __bnx2fc_destroy(interface); } mutex_unlock(&bnx2fc_dev_lock); - - /* Ensure ALL destroy work has been completed before return */ - flush_workqueue(bnx2fc_wq); return; default: @@ -1215,8 +1212,8 @@ static int bnx2fc_vport_destroy(struct fc_vport *vport) mutex_unlock(&n_port->lp_mutex); bnx2fc_free_vport(interface->hba, port->lport); bnx2fc_port_shutdown(port->lport); + bnx2fc_port_destroy(port); bnx2fc_interface_put(interface); - queue_work(bnx2fc_wq, &port->destroy_work); return 0; } @@ -1525,7 +1522,6 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, port->lport = lport; port->priv = interface; port->get_netdev = bnx2fc_netdev; - INIT_WORK(&port->destroy_work, bnx2fc_destroy_work); /* Configure fcoe_port */ rc = bnx2fc_lport_config(lport); @@ -1653,8 +1649,8 @@ static void __bnx2fc_destroy(struct bnx2fc_interface *interface) bnx2fc_interface_cleanup(interface); bnx2fc_stop(interface); list_del(&interface->list); + bnx2fc_port_destroy(port); bnx2fc_interface_put(interface); - queue_work(bnx2fc_wq, &port->destroy_work); } /** @@ -1694,15 +1690,12 @@ netdev_err: return rc; } -static void bnx2fc_destroy_work(struct work_struct *work) +static void bnx2fc_port_destroy(struct fcoe_port *port) { - struct fcoe_port *port; struct fc_lport *lport; - port = container_of(work, struct fcoe_port, destroy_work); lport = port->lport; - - BNX2FC_HBA_DBG(lport, "Entered bnx2fc_destroy_work\n"); + BNX2FC_HBA_DBG(lport, "Entered %s, destroying lport %p\n", __func__, lport); bnx2fc_if_destroy(lport); } @@ -2556,9 +2549,6 @@ static void bnx2fc_ulp_exit(struct cnic_dev *dev) __bnx2fc_destroy(interface); mutex_unlock(&bnx2fc_dev_lock); - /* Ensure ALL destroy work has been completed before return */ - flush_workqueue(bnx2fc_wq); - bnx2fc_ulp_stop(hba); /* unregister cnic device */ if (test_and_clear_bit(BNX2FC_CNIC_REGISTERED, &hba->reg_with_cnic)) -- cgit v1.2.3-58-ga151 From fb8d5ea8fd907faa3751a9e5df5d01b5f3803e35 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 15 Jan 2022 09:53:03 +0100 Subject: scsi: 3w-sas: Remove useless DMA-32 fallback configuration As stated in [1], dma_set_mask() with a 64-bit mask never fails if dev->dma_mask is non-NULL. So, if it fails, the 32-bit case will also fail for the same reason. Simplify code and remove some dead code accordingly. [1]: https://lore.kernel.org/linux-kernel/YL3vSPK5DXTNvgdx@infradead.org/#t Link: https://lore.kernel.org/r/dbbe8671ca760972d80f8d35f3170b4609bee368.1642236763.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Martin K. Petersen --- drivers/scsi/3w-sas.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c index b9482da79512..3ebe66151dcb 100644 --- a/drivers/scsi/3w-sas.c +++ b/drivers/scsi/3w-sas.c @@ -1567,8 +1567,6 @@ static int twl_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) pci_try_set_mwi(pdev); retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (retval) - retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (retval) { TW_PRINTK(host, TW_DRIVER, 0x18, "Failed to set dma mask"); retval = -ENODEV; @@ -1786,8 +1784,6 @@ static int __maybe_unused twl_resume(struct device *dev) pci_try_set_mwi(pdev); retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (retval) - retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (retval) { TW_PRINTK(host, TW_DRIVER, 0x25, "Failed to set dma mask during resume"); retval = -ENODEV; -- cgit v1.2.3-58-ga151 From 8001fa240fc0af1c3538a9fbaccd2c345ff9ab62 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 15 Jan 2022 10:05:22 +0100 Subject: scsi: hisi_sas: Remove useless DMA-32 fallback configuration As stated in [1], dma_set_mask() with a 64-bit mask never fails if dev->dma_mask is non-NULL. So, if it fails, the 32-bit case will also fail for the same reason. Simplify code and remove some dead code accordingly. [1]: https://lore.kernel.org/linux-kernel/YL3vSPK5DXTNvgdx@infradead.org/#t Link: https://lore.kernel.org/r/1bf2d3660178b0e6f172e5208bc0bd68d31d9268.1642237482.git.christophe.jaillet@wanadoo.fr Acked-by: John Garry Signed-off-by: Christophe JAILLET Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 3 --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 2 -- 2 files changed, 5 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index a05ec7aece5a..2f53a2ee024a 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2666,9 +2666,6 @@ static struct Scsi_Host *hisi_sas_shost_alloc(struct platform_device *pdev, goto err_out; error = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); - if (error) - error = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); - if (error) { dev_err(dev, "No usable DMA addressing method\n"); goto err_out; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index a45ef9a5e12e..a01a3a7b706b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -4695,8 +4695,6 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_out; rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (rc) - rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (rc) { dev_err(dev, "No usable DMA addressing method\n"); rc = -ENODEV; -- cgit v1.2.3-58-ga151 From 012d98dae453821ac31da25595ffa26d4ad49c8c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 15 Jan 2022 10:15:41 +0100 Subject: scsi: bfa: Remove useless DMA-32 fallback configuration As stated in [1], dma_set_mask() with a 64-bit mask never fails if dev->dma_mask is non-NULL. So, if it fails, the 32-bit case will also fail for the same reason. Simplify code and remove some dead code accordingly. [1]: https://lore.kernel.org/linux-kernel/YL3vSPK5DXTNvgdx@infradead.org/#t Link: https://lore.kernel.org/r/5663cef9b54004fa56cca7ce65f51eadfc3ecddb.1642238127.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfad.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index 440ef32be048..e5aa982ffedc 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -732,9 +732,6 @@ bfad_pci_init(struct pci_dev *pdev, struct bfad_s *bfad) pci_set_master(pdev); rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (rc) - rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (rc) { rc = -ENODEV; printk(KERN_ERR "dma_set_mask_and_coherent fail %p\n", pdev); @@ -1559,9 +1556,6 @@ bfad_pci_slot_reset(struct pci_dev *pdev) pci_set_master(pdev); rc = dma_set_mask_and_coherent(&bfad->pcidev->dev, DMA_BIT_MASK(64)); - if (rc) - rc = dma_set_mask_and_coherent(&bfad->pcidev->dev, - DMA_BIT_MASK(32)); if (rc) goto out_disable_device; -- cgit v1.2.3-58-ga151 From ad6c8a426446873febc98140d81d5353f8c0825b Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Fri, 21 Jan 2022 14:33:02 +0900 Subject: scsi: ufs: Use generic error code in ufshcd_set_dev_pwr_mode() The return value of ufshcd_set_dev_pwr_mode() is passed to device PM core. However, the function currently returns a SCSI result which the PM core doesn't understand. This might lead to unexpected behaviors in userland; a platform reset was observed in Android. Use a generic error code for SSU failures. Link: https://lore.kernel.org/r/1642743182-54098-1-git-send-email-kwmad.kim@samsung.com Reviewed-by: Bart Van Assche Signed-off-by: Kiwoong Kim Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 460d2b440d2e..50b12d60dc1b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8613,7 +8613,7 @@ static void ufshcd_hba_exit(struct ufs_hba *hba) * @pwr_mode: device power mode to set * * Returns 0 if requested power mode is set successfully - * Returns non-zero if failed to set the requested power mode + * Returns < 0 if failed to set the requested power mode */ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, enum ufs_dev_pwr_mode pwr_mode) @@ -8667,8 +8667,11 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, sdev_printk(KERN_WARNING, sdp, "START_STOP failed for power mode: %d, result %x\n", pwr_mode, ret); - if (ret > 0 && scsi_sense_valid(&sshdr)) - scsi_print_sense_hdr(sdp, NULL, &sshdr); + if (ret > 0) { + if (scsi_sense_valid(&sshdr)) + scsi_print_sense_hdr(sdp, NULL, &sshdr); + ret = -EIO; + } } if (!ret) -- cgit v1.2.3-58-ga151 From c99b9b2301492b665b6e51ba6c06ec362eddcd10 Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Fri, 21 Jan 2022 14:37:55 +0900 Subject: scsi: ufs: Treat link loss as fatal error This event is raised when link is lost as specified in UFSHCI spec and that means communication is not possible. Thus initializing UFS interface needs to be done. Make UFS driver considers Link Lost as fatal in the INT_FATAL_ERRORS mask. This will trigger a host reset whenever a link lost interrupt occurs. Link: https://lore.kernel.org/r/1642743475-54275-1-git-send-email-kwmad.kim@samsung.com Signed-off-by: Kiwoong Kim Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h index 6a295c88d850..a7ff0e5b5494 100644 --- a/drivers/scsi/ufs/ufshci.h +++ b/drivers/scsi/ufs/ufshci.h @@ -142,7 +142,8 @@ static inline u32 ufshci_version(u32 major, u32 minor) #define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ SYSTEM_BUS_FATAL_ERROR |\ - CRYPTO_ENGINE_FATAL_ERROR) + CRYPTO_ENGINE_FATAL_ERROR |\ + UIC_LINK_LOST) /* HCS - Host Controller Status 30h */ #define DEVICE_PRESENT 0x1 -- cgit v1.2.3-58-ga151 From efd7bb1d75cf6808d67c869a29245c88a990bdea Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 23 Jan 2022 17:55:30 +0000 Subject: scsi: 53c700: Remove redundant assignment to pointer SCp Pointer SCp is being re-assigned the same value that it was initialized to a few lines earlier, the assignment is redundant and can be removed. Link: https://lore.kernel.org/r/20220123175530.110462-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/53c700.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 3ad3ebaca8e9..ad4972c0fc53 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -1507,7 +1507,6 @@ NCR_700_intr(int irq, void *dev_id) struct scsi_cmnd *SCp = hostdata->cmd; handled = 1; - SCp = hostdata->cmd; if(istat & SCSI_INT_PENDING) { udelay(10); -- cgit v1.2.3-58-ga151 From 4db09593af0b0b4d7d4805ebb3273df51d7cc30d Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 23 Jan 2022 14:57:17 -0800 Subject: scsi: myrs: Fix crash in error case In myrs_detect(), cs->disable_intr is NULL when privdata->hw_init() fails with non-zero. In this case, myrs_cleanup(cs) will call a NULL ptr and crash the kernel. [ 1.105606] myrs 0000:00:03.0: Unknown Initialization Error 5A [ 1.105872] myrs 0000:00:03.0: Failed to initialize Controller [ 1.106082] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 1.110774] Call Trace: [ 1.110950] myrs_cleanup+0xe4/0x150 [myrs] [ 1.111135] myrs_probe.cold+0x91/0x56a [myrs] [ 1.111302] ? DAC960_GEM_intr_handler+0x1f0/0x1f0 [myrs] [ 1.111500] local_pci_probe+0x48/0x90 Link: https://lore.kernel.org/r/20220123225717.1069538-1-ztong0001@gmail.com Reviewed-by: Hannes Reinecke Signed-off-by: Tong Zhang Signed-off-by: Martin K. Petersen --- drivers/scsi/myrs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index 253ceca54a84..7eb8c39da366 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -2267,7 +2267,8 @@ static void myrs_cleanup(struct myrs_hba *cs) myrs_unmap(cs); if (cs->mmio_base) { - cs->disable_intr(cs); + if (cs->disable_intr) + cs->disable_intr(cs); iounmap(cs->mmio_base); cs->mmio_base = NULL; } -- cgit v1.2.3-58-ga151