scsi: qedf: Simplify s/g list mapping

When mapping the pages from a scatter/gather list from the SCSI layer we only need to follow these rules: - Max SGEs for each I/O request is 256 - No size limit on each SGE - No need to split OS provided SGEs to 4K before sending to firmware. - Slow SGE is applicable only when: - There are > 8 SGEs and any middle SGE is less than a page size (4K) Make necessary changes so that driver follows these rules. Applicable only for Write requests (not for Read requests). No need to check SGE address alignment requirements (first, middle or last) before declaring slow SGE. Signed-off-by: Chad Dupuis <cdupuis@marvell.com> Signed-off-by: Saurav Kashyap <skashyap@marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
author: Chad Dupuis <cdupuis@marvell.com> 2019-03-26 00:38:36 -0700
committer: Martin K. Petersen <martin.petersen@oracle.com> 2019-03-27 21:54:51 -0400
commit: 3e2c11b3fb7a237683f9405f14482a6f089b7e37 (patch)
tree: e4af67b5fd1f7dfbdbea9f37ce5da42a7e0680d9 /drivers/scsi/qedf
parent: c5e06ba2f76809ad1492fdad312e81335df46bc5 (diff)
3 files changed, 40 insertions, 94 deletions
diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h
index 8e75c21b18b9..4080af8b620c 100644
--- a/drivers/scsi/qedf/qedf.h
+++ b/drivers/scsi/qedf/qedf.h
@@ -102,7 +102,6 @@ struct qedf_ioreq {
 	struct list_head link;
 	uint16_t xid;
 	struct scsi_cmnd *sc_cmd;
-	bool use_slowpath; /* Use slow SGL for this I/O */
 #define QEDF_SCSI_CMD		1
 #define QEDF_TASK_MGMT_CMD	2
 #define QEDF_ABTS		3
@@ -152,9 +151,9 @@ struct qedf_ioreq {
 	int fp_idx;
 	unsigned int cpu;
 	unsigned int int_cpu;
-#define QEDF_IOREQ_SLOW_SGE		0
-#define QEDF_IOREQ_SINGLE_SGE		1
-#define QEDF_IOREQ_FAST_SGE		2
+#define QEDF_IOREQ_UNKNOWN_SGE		1
+#define QEDF_IOREQ_SLOW_SGE		2
+#define QEDF_IOREQ_FAST_SGE		3
 	u8 sge_type;
 	struct delayed_work rrq_work;
 
@@ -366,7 +365,6 @@ struct qedf_ctx {
 
 	u32 slow_sge_ios;
 	u32 fast_sge_ios;
-	u32 single_sge_ios;
 
 	uint8_t	*grcdump;
 	uint32_t grcdump_size;
diff --git a/drivers/scsi/qedf/qedf_debugfs.c b/drivers/scsi/qedf/qedf_debugfs.c
index a32d8ee4666e..6ae78ddc8a65 100644
--- a/drivers/scsi/qedf/qedf_debugfs.c
+++ b/drivers/scsi/qedf/qedf_debugfs.c
@@ -303,7 +303,6 @@ qedf_driver_stats_show(struct seq_file *s, void *unused)
 	seq_printf(s, "cmg_mgr free io_reqs: %d\n",
 	    atomic_read(&qedf->cmd_mgr->free_list_cnt));
 	seq_printf(s, "slow SGEs: %d\n", qedf->slow_sge_ios);
-	seq_printf(s, "single SGEs: %d\n", qedf->single_sge_ios);
 	seq_printf(s, "fast SGEs: %d\n\n", qedf->fast_sge_ios);
 
 	seq_puts(s, "Offloaded ports:\n\n");
@@ -361,7 +360,6 @@ qedf_dbg_clear_stats_cmd_write(struct file *filp,
 
 	/* Clear stat counters exposed by 'stats' node */
 	qedf->slow_sge_ios = 0;
-	qedf->single_sge_ios = 0;
 	qedf->fast_sge_ios = 0;
 
 	return count;
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
index da728d2e17b3..ed38df90f042 100644
--- a/drivers/scsi/qedf/qedf_io.c
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -428,29 +428,6 @@ void qedf_release_cmd(struct kref *ref)
 	clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags);
 }
 
-static int qedf_split_bd(struct qedf_ioreq *io_req, u64 addr, int sg_len,
-	int bd_index)
-{
-	struct scsi_sge *bd = io_req->bd_tbl->bd_tbl;
-	int frag_size, sg_frags;
-
-	sg_frags = 0;
-	while (sg_len) {
-		if (sg_len > QEDF_BD_SPLIT_SZ)
-			frag_size = QEDF_BD_SPLIT_SZ;
-		else
-			frag_size = sg_len;
-		bd[bd_index + sg_frags].sge_addr.lo = U64_LO(addr);
-		bd[bd_index + sg_frags].sge_addr.hi = U64_HI(addr);
-		bd[bd_index + sg_frags].sge_len = (uint16_t)frag_size;
-
-		addr += (u64)frag_size;
-		sg_frags++;
-		sg_len -= frag_size;
-	}
-	return sg_frags;
-}
-
 static int qedf_map_sg(struct qedf_ioreq *io_req)
 {
 	struct scsi_cmnd *sc = io_req->sc_cmd;
@@ -462,75 +439,45 @@ static int qedf_map_sg(struct qedf_ioreq *io_req)
 	int byte_count = 0;
 	int sg_count = 0;
 	int bd_count = 0;
-	int sg_frags;
-	unsigned int sg_len;
+	u32 sg_len;
 	u64 addr, end_addr;
-	int i;
+	int i = 0;
 
 	sg_count = dma_map_sg(&qedf->pdev->dev, scsi_sglist(sc),
 	    scsi_sg_count(sc), sc->sc_data_direction);
-
 	sg = scsi_sglist(sc);
 
-	/*
-	 * New condition to send single SGE as cached-SGL with length less
-	 * than 64k.
-	 */
-	if ((sg_count == 1) && (sg_dma_len(sg) <=
-	    QEDF_MAX_SGLEN_FOR_CACHESGL)) {
-		sg_len = sg_dma_len(sg);
-		addr = (u64)sg_dma_address(sg);
-
-		bd[bd_count].sge_addr.lo = (addr & 0xffffffff);
-		bd[bd_count].sge_addr.hi = (addr >> 32);
-		bd[bd_count].sge_len = (u16)sg_len;
+	io_req->sge_type = QEDF_IOREQ_UNKNOWN_SGE;
 
-		return ++bd_count;
-	}
+	if (sg_count <= 8 || io_req->io_req_flags == QEDF_READ)
+		io_req->sge_type = QEDF_IOREQ_FAST_SGE;
 
 	scsi_for_each_sg(sc, sg, sg_count, i) {
-		sg_len = sg_dma_len(sg);
+		sg_len = (u32)sg_dma_len(sg);
 		addr = (u64)sg_dma_address(sg);
 		end_addr = (u64)(addr + sg_len);
 
 		/*
-		 * First s/g element in the list so check if the end_addr
-		 * is paged aligned. Also check to make sure the length is
-		 * at least page size.
-		 */
-		if ((i == 0) && (sg_count > 1) &&
-		    ((end_addr % QEDF_PAGE_SIZE) ||
-		    sg_len < QEDF_PAGE_SIZE))
-			io_req->use_slowpath = true;
-		/*
-		 * Last s/g element so check if the start address is paged
-		 * aligned.
-		 */
-		else if ((i == (sg_count - 1)) && (sg_count > 1) &&
-		    (addr % QEDF_PAGE_SIZE))
-			io_req->use_slowpath = true;
-		/*
 		 * Intermediate s/g element so check if start and end address
-		 * is page aligned.
+		 * is page aligned.  Only required for writes and only if the
+		 * number of scatter/gather elements is 8 or more.
 		 */
-		else if ((i != 0) && (i != (sg_count - 1)) &&
-		    ((addr % QEDF_PAGE_SIZE) || (end_addr % QEDF_PAGE_SIZE)))
-			io_req->use_slowpath = true;
+		if (io_req->sge_type == QEDF_IOREQ_UNKNOWN_SGE && (i) &&
+		    (i != (sg_count - 1)) && sg_len < QEDF_PAGE_SIZE)
+			io_req->sge_type = QEDF_IOREQ_SLOW_SGE;
 
-		if (sg_len > QEDF_MAX_BD_LEN) {
-			sg_frags = qedf_split_bd(io_req, addr, sg_len,
-			    bd_count);
-		} else {
-			sg_frags = 1;
-			bd[bd_count].sge_addr.lo = U64_LO(addr);
-			bd[bd_count].sge_addr.hi  = U64_HI(addr);
-			bd[bd_count].sge_len = (uint16_t)sg_len;
-		}
+		bd[bd_count].sge_addr.lo = cpu_to_le32(U64_LO(addr));
+		bd[bd_count].sge_addr.hi  = cpu_to_le32(U64_HI(addr));
+		bd[bd_count].sge_len = cpu_to_le32(sg_len);
 
-		bd_count += sg_frags;
+		bd_count++;
 		byte_count += sg_len;
 	}
 
+	/* To catch a case where FAST and SLOW nothing is set, set FAST */
+	if (io_req->sge_type == QEDF_IOREQ_UNKNOWN_SGE)
+		io_req->sge_type = QEDF_IOREQ_FAST_SGE;
+
 	if (byte_count != scsi_bufflen(sc))
 		QEDF_ERR(&(qedf->dbg_ctx), "byte_count = %d != "
 			  "scsi_bufflen = %d, task_id = 0x%x.\n", byte_count,
@@ -655,8 +602,10 @@ static void  qedf_init_task(struct qedf_rport *fcport, struct fc_lport *lport,
 		io_req->sgl_task_params->num_sges = bd_count;
 		io_req->sgl_task_params->total_buffer_size =
 		    scsi_bufflen(io_req->sc_cmd);
-		io_req->sgl_task_params->small_mid_sge =
-			io_req->use_slowpath;
+		if (io_req->sge_type == QEDF_IOREQ_SLOW_SGE)
+			io_req->sgl_task_params->small_mid_sge = 1;
+		else
+			io_req->sgl_task_params->small_mid_sge = 0;
 	}
 
 	/* Fill in physical address of sense buffer */
@@ -679,16 +628,10 @@ static void  qedf_init_task(struct qedf_rport *fcport, struct fc_lport *lport,
 				    io_req->task_retry_identifier, fcp_cmnd);
 
 	/* Increment SGL type counters */
-	if (bd_count == 1) {
-		qedf->single_sge_ios++;
-		io_req->sge_type = QEDF_IOREQ_SINGLE_SGE;
-	} else if (io_req->use_slowpath) {
+	if (io_req->sge_type == QEDF_IOREQ_SLOW_SGE)
 		qedf->slow_sge_ios++;
-		io_req->sge_type = QEDF_IOREQ_SLOW_SGE;
-	} else {
+	else
 		qedf->fast_sge_ios++;
-		io_req->sge_type = QEDF_IOREQ_FAST_SGE;
-	}
 }
 
 void qedf_init_mp_task(struct qedf_ioreq *io_req,
@@ -770,9 +713,6 @@ void qedf_init_mp_task(struct qedf_ioreq *io_req,
 						     &task_fc_hdr,
 						     &tx_sgl_task_params,
 						     &rx_sgl_task_params, 0);
-
-	/* Midpath requests always consume 1 SGE */
-	qedf->single_sge_ios++;
 }
 
 /* Presumed that fcport->rport_lock is held */
@@ -872,7 +812,7 @@ int qedf_post_io_req(struct qedf_rport *fcport, struct qedf_ioreq *io_req)
 	/* Initialize rest of io_req fileds */
 	io_req->data_xfer_len = scsi_bufflen(sc_cmd);
 	sc_cmd->SCp.ptr = (char *)io_req;
-	io_req->use_slowpath = false; /* Assume fast SGL by default */
+	io_req->sge_type = QEDF_IOREQ_FAST_SGE; /* Assume fast SGL by default */
 
 	/* Record which cpu this request is associated with */
 	io_req->cpu = smp_processor_id();
@@ -942,7 +882,17 @@ qedf_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc_cmd)
 	int rc = 0;
 	int rval;
 	unsigned long flags = 0;
-
+	int num_sgs = 0;
+
+	num_sgs = scsi_sg_count(sc_cmd);
+	if (scsi_sg_count(sc_cmd) > QEDF_MAX_BDS_PER_CMD) {
+		QEDF_ERR(&qedf->dbg_ctx,
+			 "Number of SG elements %d exceeds what hardware limitation of %d.\n",
+			 num_sgs, QEDF_MAX_BDS_PER_CMD);
+		sc_cmd->result = DID_ERROR;
+		sc_cmd->scsi_done(sc_cmd);
+		return 0;
+	}
 
 	if (test_bit(QEDF_UNLOADING, &qedf->flags) ||
 	    test_bit(QEDF_DBG_STOP_IO, &qedf->flags)) {
author	Chad Dupuis <cdupuis@marvell.com>	2019-03-26 00:38:36 -0700
committer	Martin K. Petersen <martin.petersen@oracle.com>	2019-03-27 21:54:51 -0400
commit	3e2c11b3fb7a237683f9405f14482a6f089b7e37 (patch)
tree	e4af67b5fd1f7dfbdbea9f37ce5da42a7e0680d9 /drivers/scsi/qedf
parent	c5e06ba2f76809ad1492fdad312e81335df46bc5 (diff)