From 6824c5ef5e8900e61ce8ed40885cacc1c9301c14 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 18 Nov 2015 16:33:08 -0700 Subject: NVMe: Fix possible arithmetic overflow for max segments Reported-by: Paul Grabinar Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8187df204695..394fd1631cd0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2268,7 +2268,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) if (dev->max_hw_sectors) { blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); blk_queue_max_segments(ns->queue, - ((dev->max_hw_sectors << 9) / dev->page_size) + 1); + (dev->max_hw_sectors / (dev->page_size >> 9)) + 1); } if (dev->stripe_size) blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9); -- cgit v1.2.3-58-ga151 From 604e8c8da8854351496215d269c3fa93859e3fee Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 20 Nov 2015 08:38:13 -0700 Subject: NVMe: reap completion entries when deleting queue Make sure that there are no unprocesssed entries on a completion queue before deleting it, and check for validity of the CQ door bell before writing completions to it. This fixes problems with doing a sysfs reset of the device while it's handling IO. Tested-by: Jon Derrick Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 394fd1631cd0..930042fa2d69 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -968,7 +968,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) return; - writel(head, nvmeq->q_db + nvmeq->dev->db_stride); + if (likely(nvmeq->cq_vector >= 0)) + writel(head, nvmeq->q_db + nvmeq->dev->db_stride); nvmeq->cq_head = head; nvmeq->cq_phase = phase; @@ -2787,6 +2788,10 @@ static void nvme_del_queue_end(struct nvme_queue *nvmeq) { struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx; nvme_put_dq(dq); + + spin_lock_irq(&nvmeq->q_lock); + nvme_process_cq(nvmeq); + spin_unlock_irq(&nvmeq->q_lock); } static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode, -- cgit v1.2.3-58-ga151 From c5c9f25b98a568451d665afe4aeefe17bf9f2995 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Tue, 24 Nov 2015 09:55:05 -0700 Subject: NVMe: default to 4k device page size We received a bug report recently when DDW (64-bit direct DMA on Power) is not enabled for NVMe devices. In that case, we fall back to 32-bit DMA via the IOMMU, which is always done via 4K TCEs (Translation Control Entries). The NVMe device driver, though, assumes that the DMA alignment for the PRP entries will match the device's page size, and that the DMA aligment matches the kernel's page aligment. On Power, the the IOMMU page size, as mentioned above, can be 4K, while the device can have a page size of 8K, while the kernel has a page size of 64K. This eventually trips the BUG_ON in nvme_setup_prps(), as we have a 'dma_len' that is a multiple of 4K but not 8K (e.g., 0xF000). In this particular case of page sizes, we clearly want to use the IOMMU's page size in the driver. And generally, the NVMe driver in this function should be using the IOMMU's page size for the default device page size, rather than the kernel's page size. There is not currently an API to obtain the IOMMU's page size across all architectures and in the interest of a stop-gap fix to this functional issue, default the NVMe device page size to 4K, with the intent of adding such an API and implementation across all architectures in the next merge window. With the functionally equivalent v3 of this patch, our hardware test exerciser survives when using 32-bit DMA; without the patch, the kernel will BUG within a few minutes. Signed-off-by: Nishanth Aravamudan Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 930042fa2d69..d9d6229e9f3f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1728,9 +1728,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) u32 aqa; u64 cap = lo_hi_readq(&dev->bar->cap); struct nvme_queue *nvmeq; - unsigned page_shift = PAGE_SHIFT; + /* + * default to a 4K page size, with the intention to update this + * path in the future to accomodate architectures with differing + * kernel and IO page sizes. + */ + unsigned page_shift = 12; unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12; - unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12; if (page_shift < dev_page_min) { dev_err(dev->dev, @@ -1739,13 +1743,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) 1 << page_shift); return -ENODEV; } - if (page_shift > dev_page_max) { - dev_info(dev->dev, - "Device maximum page size (%u) smaller than " - "host (%u); enabling work-around\n", - 1 << dev_page_max, 1 << page_shift); - page_shift = dev_page_max; - } dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ? NVME_CAP_NSSRC(cap) : 0; -- cgit v1.2.3-58-ga151 From bf508e910b02a6107a5aa054e03c6fc8a65dae1e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 16 Oct 2015 07:58:31 +0200 Subject: nvme: add missing unmaps in nvme_queue_rq When we fail various metadata related operations in nvme_queue_rq we need to unmap the data SGL. Cc: stable@vger.kernel.org Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d9d6229e9f3f..f3b53af789ef 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -896,19 +896,28 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, goto retry_cmd; } if (blk_integrity_rq(req)) { - if (blk_rq_count_integrity_sg(req->q, req->bio) != 1) + if (blk_rq_count_integrity_sg(req->q, req->bio) != 1) { + dma_unmap_sg(dev->dev, iod->sg, iod->nents, + dma_dir); goto error_cmd; + } sg_init_table(iod->meta_sg, 1); if (blk_rq_map_integrity_sg( - req->q, req->bio, iod->meta_sg) != 1) + req->q, req->bio, iod->meta_sg) != 1) { + dma_unmap_sg(dev->dev, iod->sg, iod->nents, + dma_dir); goto error_cmd; + } if (rq_data_dir(req)) nvme_dif_remap(req, nvme_dif_prep); - if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir)) + if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir)) { + dma_unmap_sg(dev->dev, iod->sg, iod->nents, + dma_dir); goto error_cmd; + } } } -- cgit v1.2.3-58-ga151 From 1f390c1fde3a96974784be53cb3a645da3e4849c Mon Sep 17 00:00:00 2001 From: Stephan Günther Date: Tue, 1 Dec 2015 13:23:22 -0700 Subject: nvme: temporary fix for Apple controller reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent patches added basic support for the Apple NVMe controller but still cause resets and data corruption on that particular controller when a specific pattern of read/flush commands occurs. Limiting the queue depth to 2 works around that issue. This patch enforces that limit only for the Apple controller and is considered a temporary fix until we find the root source of that problem. Signed-off-by: Stephan Günther Signed-off-by: Maurice Leclaire Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f3b53af789ef..9e294ff4e652 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2708,6 +2708,18 @@ static int nvme_dev_map(struct nvme_dev *dev) dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH); dev->db_stride = 1 << NVME_CAP_STRIDE(cap); dev->dbs = ((void __iomem *)dev->bar) + 4096; + + /* + * Temporary fix for the Apple controller found in the MacBook8,1 and + * some MacBook7,1 to avoid controller resets and data loss. + */ + if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) { + dev->q_depth = 2; + dev_warn(dev->dev, "detected Apple NVMe controller, set " + "queue depth=%u to work around controller resets\n", + dev->q_depth); + } + if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) dev->cmb = nvme_map_cmb(dev); -- cgit v1.2.3-58-ga151