summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-01-01 15:55:29 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-01-01 15:55:29 -0800
commit8e143b90e4d45cca3dc53760d3cfab988bc74571 (patch)
treecd924b3abd58786ce1f3f7a41f5f32ff9f3e6af7 /drivers
parent78e8696c234ab637c4dd516cabeac344d84ec10b (diff)
parent03ebe48e235f17d70f34890d34d8153b8a84c02e (diff)
Merge tag 'iommu-updates-v4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull IOMMU updates from Joerg Roedel: - Page table code for AMD IOMMU now supports large pages where smaller page-sizes were mapped before. VFIO had to work around that in the past and I included a patch to remove it (acked by Alex Williamson) - Patches to unmodularize a couple of IOMMU drivers that would never work as modules anyway. - Work to unify the the iommu-related pointers in 'struct device' into one pointer. This work is not finished yet, but will probably be in the next cycle. - NUMA aware allocation in iommu-dma code - Support for r8a774a1 and r8a774c0 in the Renesas IOMMU driver - Scalable mode support for the Intel VT-d driver - PM runtime improvements for the ARM-SMMU driver - Support for the QCOM-SMMUv2 IOMMU hardware from Qualcom - Various smaller fixes and improvements * tag 'iommu-updates-v4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (78 commits) iommu: Check for iommu_ops == NULL in iommu_probe_device() ACPI/IORT: Don't call iommu_ops->add_device directly iommu/of: Don't call iommu_ops->add_device directly iommu: Consolitate ->add/remove_device() calls iommu/sysfs: Rename iommu_release_device() dmaengine: sh: rcar-dmac: Use device_iommu_mapped() xhci: Use device_iommu_mapped() powerpc/iommu: Use device_iommu_mapped() ACPI/IORT: Use device_iommu_mapped() iommu/of: Use device_iommu_mapped() driver core: Introduce device_iommu_mapped() function iommu/tegra: Use helper functions to access dev->iommu_fwspec iommu/qcom: Use helper functions to access dev->iommu_fwspec iommu/of: Use helper functions to access dev->iommu_fwspec iommu/mediatek: Use helper functions to access dev->iommu_fwspec iommu/ipmmu-vmsa: Use helper functions to access dev->iommu_fwspec iommu/dma: Use helper functions to access dev->iommu_fwspec iommu/arm-smmu: Use helper functions to access dev->iommu_fwspec ACPI/IORT: Use helper functions to access dev->iommu_fwspec iommu: Introduce wrappers around dev->iommu_fwspec ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/arm64/iort.c23
-rw-r--r--drivers/dma/sh/rcar-dmac.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c2
-rw-r--r--drivers/gpu/drm/i915/intel_display.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c2
-rw-r--r--drivers/iommu/amd_iommu.c275
-rw-r--r--drivers/iommu/amd_iommu_init.c64
-rw-r--r--drivers/iommu/amd_iommu_types.h1
-rw-r--r--drivers/iommu/amd_iommu_v2.c2
-rw-r--r--drivers/iommu/arm-smmu-v3.c63
-rw-r--r--drivers/iommu/arm-smmu.c209
-rw-r--r--drivers/iommu/dma-iommu.c22
-rw-r--r--drivers/iommu/dmar.c91
-rw-r--r--drivers/iommu/intel-iommu.c351
-rw-r--r--drivers/iommu/intel-pasid.c449
-rw-r--r--drivers/iommu/intel-pasid.h40
-rw-r--r--drivers/iommu/intel-svm.c171
-rw-r--r--drivers/iommu/intel_irq_remapping.c6
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c4
-rw-r--r--drivers/iommu/iommu-sysfs.c14
-rw-r--r--drivers/iommu/iommu.c113
-rw-r--r--drivers/iommu/ipmmu-vmsa.c88
-rw-r--r--drivers/iommu/irq_remapping.c1
-rw-r--r--drivers/iommu/msm_iommu.c13
-rw-r--r--drivers/iommu/mtk_iommu.c25
-rw-r--r--drivers/iommu/mtk_iommu_v1.c47
-rw-r--r--drivers/iommu/of_iommu.c16
-rw-r--r--drivers/iommu/omap-iommu-debug.c25
-rw-r--r--drivers/iommu/qcom_iommu.c34
-rw-r--r--drivers/iommu/rockchip-iommu.c13
-rw-r--r--drivers/iommu/tegra-gart.c37
-rw-r--r--drivers/iommu/tegra-smmu.c26
-rw-r--r--drivers/misc/mic/scif/scif_rma.c2
-rw-r--r--drivers/misc/mic/scif/scif_rma.h2
-rw-r--r--drivers/usb/host/xhci.c2
-rw-r--r--drivers/vfio/vfio_iommu_type1.c33
36 files changed, 1435 insertions, 835 deletions
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 2159ad9bf9ed..fdd90ffceb85 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -779,7 +779,7 @@ static inline bool iort_iommu_driver_enabled(u8 type)
static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev)
{
struct acpi_iort_node *iommu;
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
iommu = iort_get_iort_node(fwspec->iommu_fwnode);
@@ -794,9 +794,10 @@ static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev)
return NULL;
}
-static inline const struct iommu_ops *iort_fwspec_iommu_ops(
- struct iommu_fwspec *fwspec)
+static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
return (fwspec && fwspec->ops) ? fwspec->ops : NULL;
}
@@ -805,8 +806,8 @@ static inline int iort_add_device_replay(const struct iommu_ops *ops,
{
int err = 0;
- if (ops->add_device && dev->bus && !dev->iommu_group)
- err = ops->add_device(dev);
+ if (dev->bus && !device_iommu_mapped(dev))
+ err = iommu_probe_device(dev);
return err;
}
@@ -824,6 +825,7 @@ static inline int iort_add_device_replay(const struct iommu_ops *ops,
*/
int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct acpi_iort_its_group *its;
struct acpi_iort_node *iommu_node, *its_node = NULL;
int i, resv = 0;
@@ -841,9 +843,9 @@ int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
* a given PCI or named component may map IDs to.
*/
- for (i = 0; i < dev->iommu_fwspec->num_ids; i++) {
+ for (i = 0; i < fwspec->num_ids; i++) {
its_node = iort_node_map_id(iommu_node,
- dev->iommu_fwspec->ids[i],
+ fwspec->ids[i],
NULL, IORT_MSI_TYPE);
if (its_node)
break;
@@ -874,8 +876,7 @@ int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
return (resv == its->its_count) ? resv : -ENODEV;
}
#else
-static inline const struct iommu_ops *iort_fwspec_iommu_ops(
- struct iommu_fwspec *fwspec)
+static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev);
{ return NULL; }
static inline int iort_add_device_replay(const struct iommu_ops *ops,
struct device *dev)
@@ -1045,7 +1046,7 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
* If we already translated the fwspec there
* is nothing left to do, return the iommu_ops.
*/
- ops = iort_fwspec_iommu_ops(dev->iommu_fwspec);
+ ops = iort_fwspec_iommu_ops(dev);
if (ops)
return ops;
@@ -1084,7 +1085,7 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
* add_device callback for dev, replay it to get things in order.
*/
if (!err) {
- ops = iort_fwspec_iommu_ops(dev->iommu_fwspec);
+ ops = iort_fwspec_iommu_ops(dev);
err = iort_add_device_replay(ops, dev);
}
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 74fa2b1a6a86..2b4f25698169 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1809,7 +1809,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)
* level we can't disable it selectively, so ignore channel 0 for now if
* the device is part of an IOMMU group.
*/
- if (pdev->dev.iommu_group) {
+ if (device_iommu_mapped(&pdev->dev)) {
dmac->n_channels--;
channels_offset = 1;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 786d719e652d..8ff6b581cf1c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -26,7 +26,7 @@
*
*/
-#include <linux/dma_remapping.h>
+#include <linux/intel-iommu.h>
#include <linux/reservation.h>
#include <linux/sync_file.h>
#include <linux/uaccess.h>
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 07c861884c70..3da9c0f9e948 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -46,7 +46,7 @@
#include <drm/drm_plane_helper.h>
#include <drm/drm_rect.h>
#include <drm/drm_atomic_uapi.h>
-#include <linux/dma_remapping.h>
+#include <linux/intel-iommu.h>
#include <linux/reservation.h>
/* Primary plane formats for gen <= 3 */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 9fd5fbe8bebf..25afb1d594e3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -34,7 +34,7 @@
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_module.h>
-#include <linux/dma_remapping.h>
+#include <linux/intel-iommu.h>
#define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices"
#define VMWGFX_CHIP_SVGAII 0
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 567221cca13c..87ba23a75b38 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -17,6 +17,8 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+
#include <linux/ratelimit.h>
#include <linux/pci.h>
#include <linux/acpi.h>
@@ -277,7 +279,7 @@ static u16 get_alias(struct device *dev)
return pci_alias;
}
- pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d "
+ pr_info("Using IVRS reported alias %02x:%02x.%d "
"for device %s[%04x:%04x], kernel reported alias "
"%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias),
PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device,
@@ -291,7 +293,7 @@ static u16 get_alias(struct device *dev)
if (pci_alias == devid &&
PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
pci_add_dma_alias(pdev, ivrs_alias & 0xff);
- pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n",
+ pr_info("Added PCI DMA alias %02x.%d for %s\n",
PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
dev_name(dev));
}
@@ -436,7 +438,14 @@ static int iommu_init_device(struct device *dev)
dev_data->alias = get_alias(dev);
- if (dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
+ /*
+ * By default we use passthrough mode for IOMMUv2 capable device.
+ * But if amd_iommu=force_isolation is set (e.g. to debug DMA to
+ * invalid address), we ignore the capability for the device so
+ * it'll be forced to go into translation mode.
+ */
+ if ((iommu_pass_through || !amd_iommu_force_isolation) &&
+ dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
struct amd_iommu *iommu;
iommu = amd_iommu_rlookup_table[dev_data->devid];
@@ -511,7 +520,7 @@ static void dump_dte_entry(u16 devid)
int i;
for (i = 0; i < 4; ++i)
- pr_err("AMD-Vi: DTE[%d]: %016llx\n", i,
+ pr_err("DTE[%d]: %016llx\n", i,
amd_iommu_dev_table[devid].data[i]);
}
@@ -521,7 +530,7 @@ static void dump_command(unsigned long phys_addr)
int i;
for (i = 0; i < 4; ++i)
- pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
+ pr_err("CMD[%d]: %08x\n", i, cmd->data[i]);
}
static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
@@ -536,10 +545,10 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
dev_data = get_dev_data(&pdev->dev);
if (dev_data && __ratelimit(&dev_data->rs)) {
- dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ dev_err(&pdev->dev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
domain_id, address, flags);
} else if (printk_ratelimit()) {
- pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
domain_id, address, flags);
}
@@ -566,7 +575,7 @@ retry:
if (type == 0) {
/* Did we hit the erratum? */
if (++count == LOOP_TIMEOUT) {
- pr_err("AMD-Vi: No event written to event log\n");
+ pr_err("No event written to event log\n");
return;
}
udelay(1);
@@ -576,43 +585,41 @@ retry:
if (type == EVENT_TYPE_IO_FAULT) {
amd_iommu_report_page_fault(devid, pasid, address, flags);
return;
- } else {
- dev_err(dev, "AMD-Vi: Event logged [");
}
switch (type) {
case EVENT_TYPE_ILL_DEV:
- dev_err(dev, "ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
dump_dte_entry(devid);
break;
case EVENT_TYPE_DEV_TAB_ERR:
- dev_err(dev, "DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
- "address=0x%016llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+ "address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address, flags);
break;
case EVENT_TYPE_PAGE_TAB_ERR:
- dev_err(dev, "PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
break;
case EVENT_TYPE_ILL_CMD:
- dev_err(dev, "ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
+ dev_err(dev, "Event logged [ILLEGAL_COMMAND_ERROR address=0x%llx]\n", address);
dump_command(address);
break;
case EVENT_TYPE_CMD_HARD_ERR:
- dev_err(dev, "COMMAND_HARDWARE_ERROR address=0x%016llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [COMMAND_HARDWARE_ERROR address=0x%llx flags=0x%04x]\n",
address, flags);
break;
case EVENT_TYPE_IOTLB_INV_TO:
- dev_err(dev, "IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%016llx]\n",
+ dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%llx]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address);
break;
case EVENT_TYPE_INV_DEV_REQ:
- dev_err(dev, "INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
break;
@@ -620,12 +627,12 @@ retry:
pasid = ((event[0] >> 16) & 0xFFFF)
| ((event[1] << 6) & 0xF0000);
tag = event[1] & 0x03FF;
- dev_err(dev, "INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
break;
default:
- dev_err(dev, "UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n",
+ dev_err(dev, "Event logged [UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n",
event[0], event[1], event[2], event[3]);
}
@@ -652,7 +659,7 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
struct amd_iommu_fault fault;
if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
- pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
+ pr_err_ratelimited("Unknown PPR request received\n");
return;
}
@@ -757,12 +764,12 @@ static void iommu_poll_ga_log(struct amd_iommu *iommu)
if (!iommu_ga_log_notifier)
break;
- pr_debug("AMD-Vi: %s: devid=%#x, ga_tag=%#x\n",
+ pr_debug("%s: devid=%#x, ga_tag=%#x\n",
__func__, GA_DEVID(log_entry),
GA_TAG(log_entry));
if (iommu_ga_log_notifier(GA_TAG(log_entry)) != 0)
- pr_err("AMD-Vi: GA log notifier failed.\n");
+ pr_err("GA log notifier failed.\n");
break;
default:
break;
@@ -787,18 +794,18 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data)
iommu->mmio_base + MMIO_STATUS_OFFSET);
if (status & MMIO_STATUS_EVT_INT_MASK) {
- pr_devel("AMD-Vi: Processing IOMMU Event Log\n");
+ pr_devel("Processing IOMMU Event Log\n");
iommu_poll_events(iommu);
}
if (status & MMIO_STATUS_PPR_INT_MASK) {
- pr_devel("AMD-Vi: Processing IOMMU PPR Log\n");
+ pr_devel("Processing IOMMU PPR Log\n");
iommu_poll_ppr_log(iommu);
}
#ifdef CONFIG_IRQ_REMAP
if (status & MMIO_STATUS_GALOG_INT_MASK) {
- pr_devel("AMD-Vi: Processing IOMMU GA Log\n");
+ pr_devel("Processing IOMMU GA Log\n");
iommu_poll_ga_log(iommu);
}
#endif
@@ -842,7 +849,7 @@ static int wait_on_sem(volatile u64 *sem)
}
if (i == LOOP_TIMEOUT) {
- pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
+ pr_alert("Completion-Wait loop timed out\n");
return -EIO;
}
@@ -1034,7 +1041,7 @@ again:
/* Skip udelay() the first time around */
if (count++) {
if (count == LOOP_TIMEOUT) {
- pr_err("AMD-Vi: Command buffer timeout\n");
+ pr_err("Command buffer timeout\n");
return -EIO;
}
@@ -1315,6 +1322,101 @@ static void domain_flush_devices(struct protection_domain *domain)
*
****************************************************************************/
+static void free_page_list(struct page *freelist)
+{
+ while (freelist != NULL) {
+ unsigned long p = (unsigned long)page_address(freelist);
+ freelist = freelist->freelist;
+ free_page(p);
+ }
+}
+
+static struct page *free_pt_page(unsigned long pt, struct page *freelist)
+{
+ struct page *p = virt_to_page((void *)pt);
+
+ p->freelist = freelist;
+
+ return p;
+}
+
+#define DEFINE_FREE_PT_FN(LVL, FN) \
+static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \
+{ \
+ unsigned long p; \
+ u64 *pt; \
+ int i; \
+ \
+ pt = (u64 *)__pt; \
+ \
+ for (i = 0; i < 512; ++i) { \
+ /* PTE present? */ \
+ if (!IOMMU_PTE_PRESENT(pt[i])) \
+ continue; \
+ \
+ /* Large PTE? */ \
+ if (PM_PTE_LEVEL(pt[i]) == 0 || \
+ PM_PTE_LEVEL(pt[i]) == 7) \
+ continue; \
+ \
+ p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \
+ freelist = FN(p, freelist); \
+ } \
+ \
+ return free_pt_page((unsigned long)pt, freelist); \
+}
+
+DEFINE_FREE_PT_FN(l2, free_pt_page)
+DEFINE_FREE_PT_FN(l3, free_pt_l2)
+DEFINE_FREE_PT_FN(l4, free_pt_l3)
+DEFINE_FREE_PT_FN(l5, free_pt_l4)
+DEFINE_FREE_PT_FN(l6, free_pt_l5)
+
+static struct page *free_sub_pt(unsigned long root, int mode,
+ struct page *freelist)
+{
+ switch (mode) {
+ case PAGE_MODE_NONE:
+ case PAGE_MODE_7_LEVEL:
+ break;
+ case PAGE_MODE_1_LEVEL:
+ freelist = free_pt_page(root, freelist);
+ break;
+ case PAGE_MODE_2_LEVEL:
+ freelist = free_pt_l2(root, freelist);
+ break;
+ case PAGE_MODE_3_LEVEL:
+ freelist = free_pt_l3(root, freelist);
+ break;
+ case PAGE_MODE_4_LEVEL:
+ freelist = free_pt_l4(root, freelist);
+ break;
+ case PAGE_MODE_5_LEVEL:
+ freelist = free_pt_l5(root, freelist);
+ break;
+ case PAGE_MODE_6_LEVEL:
+ freelist = free_pt_l6(root, freelist);
+ break;
+ default:
+ BUG();
+ }
+
+ return freelist;
+}
+
+static void free_pagetable(struct protection_domain *domain)
+{
+ unsigned long root = (unsigned long)domain->pt_root;
+ struct page *freelist = NULL;
+
+ BUG_ON(domain->mode < PAGE_MODE_NONE ||
+ domain->mode > PAGE_MODE_6_LEVEL);
+
+ free_sub_pt(root, domain->mode, freelist);
+
+ free_page_list(freelist);
+}
+
/*
* This function is used to add another level to an IO page table. Adding
* another level increases the size of the address space by 9 bits to a size up
@@ -1363,10 +1465,13 @@ static u64 *alloc_pte(struct protection_domain *domain,
while (level > end_lvl) {
u64 __pte, __npte;
+ int pte_level;
- __pte = *pte;
+ __pte = *pte;
+ pte_level = PM_PTE_LEVEL(__pte);
- if (!IOMMU_PTE_PRESENT(__pte)) {
+ if (!IOMMU_PTE_PRESENT(__pte) ||
+ pte_level == PAGE_MODE_7_LEVEL) {
page = (u64 *)get_zeroed_page(gfp);
if (!page)
return NULL;
@@ -1374,19 +1479,21 @@ static u64 *alloc_pte(struct protection_domain *domain,
__npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
/* pte could have been changed somewhere. */
- if (cmpxchg64(pte, __pte, __npte) != __pte) {
+ if (cmpxchg64(pte, __pte, __npte) != __pte)
free_page((unsigned long)page);
- continue;
- }
+ else if (pte_level == PAGE_MODE_7_LEVEL)
+ domain->updated = true;
+
+ continue;
}
/* No level skipping support yet */
- if (PM_PTE_LEVEL(*pte) != level)
+ if (pte_level != level)
return NULL;
level -= 1;
- pte = IOMMU_PTE_PAGE(*pte);
+ pte = IOMMU_PTE_PAGE(__pte);
if (pte_page && level == end_lvl)
*pte_page = pte;
@@ -1455,6 +1562,25 @@ static u64 *fetch_pte(struct protection_domain *domain,
return pte;
}
+static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
+{
+ unsigned long pt;
+ int mode;
+
+ while (cmpxchg64(pte, pteval, 0) != pteval) {
+ pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
+ pteval = *pte;
+ }
+
+ if (!IOMMU_PTE_PRESENT(pteval))
+ return freelist;
+
+ pt = (unsigned long)IOMMU_PTE_PAGE(pteval);
+ mode = IOMMU_PTE_MODE(pteval);
+
+ return free_sub_pt(pt, mode, freelist);
+}
+
/*
* Generic mapping functions. It maps a physical address into a DMA
* address space. It allocates the page table pages if necessary.
@@ -1469,6 +1595,7 @@ static int iommu_map_page(struct protection_domain *dom,
int prot,
gfp_t gfp)
{
+ struct page *freelist = NULL;
u64 __pte, *pte;
int i, count;
@@ -1485,8 +1612,10 @@ static int iommu_map_page(struct protection_domain *dom,
return -ENOMEM;
for (i = 0; i < count; ++i)
- if (IOMMU_PTE_PRESENT(pte[i]))
- return -EBUSY;
+ freelist = free_clear_pte(&pte[i], pte[i], freelist);
+
+ if (freelist != NULL)
+ dom->updated = true;
if (count > 1) {
__pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
@@ -1504,6 +1633,9 @@ static int iommu_map_page(struct protection_domain *dom,
update_domain(dom);
+ /* Everything flushed out, free pages now */
+ free_page_list(freelist);
+
return 0;
}
@@ -1636,67 +1768,6 @@ static void domain_id_free(int id)
spin_unlock(&pd_bitmap_lock);
}
-#define DEFINE_FREE_PT_FN(LVL, FN) \
-static void free_pt_##LVL (unsigned long __pt) \
-{ \
- unsigned long p; \
- u64 *pt; \
- int i; \
- \
- pt = (u64 *)__pt; \
- \
- for (i = 0; i < 512; ++i) { \
- /* PTE present? */ \
- if (!IOMMU_PTE_PRESENT(pt[i])) \
- continue; \
- \
- /* Large PTE? */ \
- if (PM_PTE_LEVEL(pt[i]) == 0 || \
- PM_PTE_LEVEL(pt[i]) == 7) \
- continue; \
- \
- p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \
- FN(p); \
- } \
- free_page((unsigned long)pt); \
-}
-
-DEFINE_FREE_PT_FN(l2, free_page)
-DEFINE_FREE_PT_FN(l3, free_pt_l2)
-DEFINE_FREE_PT_FN(l4, free_pt_l3)
-DEFINE_FREE_PT_FN(l5, free_pt_l4)
-DEFINE_FREE_PT_FN(l6, free_pt_l5)
-
-static void free_pagetable(struct protection_domain *domain)
-{
- unsigned long root = (unsigned long)domain->pt_root;
-
- switch (domain->mode) {
- case PAGE_MODE_NONE:
- break;
- case PAGE_MODE_1_LEVEL:
- free_page(root);
- break;
- case PAGE_MODE_2_LEVEL:
- free_pt_l2(root);
- break;
- case PAGE_MODE_3_LEVEL:
- free_pt_l3(root);
- break;
- case PAGE_MODE_4_LEVEL:
- free_pt_l4(root);
- break;
- case PAGE_MODE_5_LEVEL:
- free_pt_l5(root);
- break;
- case PAGE_MODE_6_LEVEL:
- free_pt_l6(root);
- break;
- default:
- BUG();
- }
-}
-
static void free_gcr3_tbl_level1(u64 *tbl)
{
u64 *ptr;
@@ -2771,9 +2842,9 @@ int __init amd_iommu_init_dma_ops(void)
iommu_detected = 1;
if (amd_iommu_unmap_flush)
- pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n");
+ pr_info("IO/TLB flush on unmap enabled\n");
else
- pr_info("AMD-Vi: Lazy IO/TLB flushing enabled\n");
+ pr_info("Lazy IO/TLB flushing enabled\n");
return 0;
@@ -2878,7 +2949,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
case IOMMU_DOMAIN_DMA:
dma_domain = dma_ops_domain_alloc();
if (!dma_domain) {
- pr_err("AMD-Vi: Failed to allocate\n");
+ pr_err("Failed to allocate\n");
return NULL;
}
pdomain = &dma_domain->domain;
@@ -4299,7 +4370,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
* legacy mode. So, we force legacy mode instead.
*/
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) {
- pr_debug("AMD-Vi: %s: Fall back to using intr legacy remap\n",
+ pr_debug("%s: Fall back to using intr legacy remap\n",
__func__);
pi_data->is_guest_mode = false;
}
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index d8f7000a466a..66123b911ec8 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -17,6 +17,8 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/list.h>
@@ -443,9 +445,9 @@ static void iommu_disable(struct amd_iommu *iommu)
static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
{
if (!request_mem_region(address, end, "amd_iommu")) {
- pr_err("AMD-Vi: Can not reserve memory region %llx-%llx for mmio\n",
+ pr_err("Can not reserve memory region %llx-%llx for mmio\n",
address, end);
- pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n");
+ pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
return NULL;
}
@@ -512,7 +514,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
u32 ivhd_size = get_ivhd_header_size(h);
if (!ivhd_size) {
- pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type);
+ pr_err("Unsupported IVHD type %#x\n", h->type);
return -EINVAL;
}
@@ -553,7 +555,7 @@ static int __init check_ivrs_checksum(struct acpi_table_header *table)
checksum += p[i];
if (checksum != 0) {
/* ACPI table corrupt */
- pr_err(FW_BUG "AMD-Vi: IVRS invalid checksum\n");
+ pr_err(FW_BUG "IVRS invalid checksum\n");
return -ENODEV;
}
@@ -1028,7 +1030,7 @@ static int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
if (!(entry->id == id && entry->cmd_line))
continue;
- pr_info("AMD-Vi: Command-line override present for %s id %d - ignoring\n",
+ pr_info("Command-line override present for %s id %d - ignoring\n",
type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
*devid = entry->devid;
@@ -1061,7 +1063,7 @@ static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid,
!entry->cmd_line)
continue;
- pr_info("AMD-Vi: Command-line override for hid:%s uid:%s\n",
+ pr_info("Command-line override for hid:%s uid:%s\n",
hid, uid);
*devid = entry->devid;
return 0;
@@ -1077,7 +1079,7 @@ static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid,
entry->cmd_line = cmd_line;
entry->root_devid = (entry->devid & (~0x7));
- pr_info("AMD-Vi:%s, add hid:%s, uid:%s, rdevid:%d\n",
+ pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
entry->cmd_line ? "cmd" : "ivrs",
entry->hid, entry->uid, entry->root_devid);
@@ -1173,7 +1175,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
*/
ivhd_size = get_ivhd_header_size(h);
if (!ivhd_size) {
- pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type);
+ pr_err("Unsupported IVHD type %#x\n", h->type);
return -EINVAL;
}
@@ -1455,7 +1457,7 @@ static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
- pr_info("AMD-Vi: Applying erratum 746 workaround for IOMMU at %s\n",
+ pr_info("Applying erratum 746 workaround for IOMMU at %s\n",
dev_name(&iommu->dev->dev));
/* Clear the enable writing bit */
@@ -1486,7 +1488,7 @@ static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
iommu_write_l2(iommu, 0x47, value | BIT(0));
- pr_info("AMD-Vi: Applying ATS write check workaround for IOMMU at %s\n",
+ pr_info("Applying ATS write check workaround for IOMMU at %s\n",
dev_name(&iommu->dev->dev));
}
@@ -1506,7 +1508,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
iommu->index = amd_iommus_present++;
if (unlikely(iommu->index >= MAX_IOMMUS)) {
- WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
+ WARN(1, "System has more IOMMUs than supported by this driver\n");
return -ENOSYS;
}
@@ -1674,12 +1676,12 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
(iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
(val != val2)) {
- pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
+ pr_err("Unable to write to IOMMU perf counter.\n");
amd_iommu_pc_present = false;
return;
}
- pr_info("AMD-Vi: IOMMU performance counters supported\n");
+ pr_info("IOMMU performance counters supported\n");
val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
iommu->max_banks = (u8) ((val >> 12) & 0x3f);
@@ -1840,11 +1842,11 @@ static void print_iommu_info(void)
for_each_iommu(iommu) {
int i;
- pr_info("AMD-Vi: Found IOMMU at %s cap 0x%hx\n",
+ pr_info("Found IOMMU at %s cap 0x%hx\n",
dev_name(&iommu->dev->dev), iommu->cap_ptr);
if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
- pr_info("AMD-Vi: Extended features (%#llx):\n",
+ pr_info("Extended features (%#llx):\n",
iommu->features);
for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
if (iommu_feature(iommu, (1ULL << i)))
@@ -1858,11 +1860,11 @@ static void print_iommu_info(void)
}
}
if (irq_remapping_enabled) {
- pr_info("AMD-Vi: Interrupt remapping enabled\n");
+ pr_info("Interrupt remapping enabled\n");
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
- pr_info("AMD-Vi: virtual APIC enabled\n");
+ pr_info("Virtual APIC enabled\n");
if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
- pr_info("AMD-Vi: X2APIC enabled\n");
+ pr_info("X2APIC enabled\n");
}
}
@@ -2376,7 +2378,7 @@ static bool __init check_ioapic_information(void)
devid = get_ioapic_devid(id);
if (devid < 0) {
- pr_err("%sAMD-Vi: IOAPIC[%d] not in IVRS table\n",
+ pr_err("%s: IOAPIC[%d] not in IVRS table\n",
fw_bug, id);
ret = false;
} else if (devid == IOAPIC_SB_DEVID) {
@@ -2394,11 +2396,11 @@ static bool __init check_ioapic_information(void)
* when the BIOS is buggy and provides us the wrong
* device id for the IOAPIC in the system.
*/
- pr_err("%sAMD-Vi: No southbridge IOAPIC found\n", fw_bug);
+ pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
}
if (!ret)
- pr_err("AMD-Vi: Disabling interrupt remapping\n");
+ pr_err("Disabling interrupt remapping\n");
return ret;
}
@@ -2453,7 +2455,7 @@ static int __init early_amd_iommu_init(void)
return -ENODEV;
else if (ACPI_FAILURE(status)) {
const char *err = acpi_format_exception(status);
- pr_err("AMD-Vi: IVRS table error: %s\n", err);
+ pr_err("IVRS table error: %s\n", err);
return -EINVAL;
}
@@ -2606,7 +2608,7 @@ static bool detect_ivrs(void)
return false;
else if (ACPI_FAILURE(status)) {
const char *err = acpi_format_exception(status);
- pr_err("AMD-Vi: IVRS table error: %s\n", err);
+ pr_err("IVRS table error: %s\n", err);
return false;
}
@@ -2641,7 +2643,7 @@ static int __init state_next(void)
ret = early_amd_iommu_init();
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
- pr_info("AMD-Vi: AMD IOMMU disabled on kernel command-line\n");
+ pr_info("AMD IOMMU disabled on kernel command-line\n");
free_dma_resources();
free_iommu_resources();
init_state = IOMMU_CMDLINE_DISABLED;
@@ -2788,7 +2790,7 @@ static bool amd_iommu_sme_check(void)
(boot_cpu_data.microcode <= 0x080011ff))
return true;
- pr_notice("AMD-Vi: IOMMU not currently supported when SME is active\n");
+ pr_notice("IOMMU not currently supported when SME is active\n");
return false;
}
@@ -2873,12 +2875,12 @@ static int __init parse_ivrs_ioapic(char *str)
ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
if (ret != 4) {
- pr_err("AMD-Vi: Invalid command line: ivrs_ioapic%s\n", str);
+ pr_err("Invalid command line: ivrs_ioapic%s\n", str);
return 1;
}
if (early_ioapic_map_size == EARLY_MAP_SIZE) {
- pr_err("AMD-Vi: Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
+ pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
str);
return 1;
}
@@ -2903,12 +2905,12 @@ static int __init parse_ivrs_hpet(char *str)
ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
if (ret != 4) {
- pr_err("AMD-Vi: Invalid command line: ivrs_hpet%s\n", str);
+ pr_err("Invalid command line: ivrs_hpet%s\n", str);
return 1;
}
if (early_hpet_map_size == EARLY_MAP_SIZE) {
- pr_err("AMD-Vi: Early HPET map overflow - ignoring ivrs_hpet%s\n",
+ pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
str);
return 1;
}
@@ -2933,7 +2935,7 @@ static int __init parse_ivrs_acpihid(char *str)
ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
if (ret != 4) {
- pr_err("AMD-Vi: Invalid command line: ivrs_acpihid(%s)\n", str);
+ pr_err("Invalid command line: ivrs_acpihid(%s)\n", str);
return 1;
}
@@ -2942,7 +2944,7 @@ static int __init parse_ivrs_acpihid(char *str)
uid = p;
if (!hid || !(*hid) || !uid) {
- pr_err("AMD-Vi: Invalid command line: hid or uid\n");
+ pr_err("Invalid command line: hid or uid\n");
return 1;
}
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index e2b342e65a7b..eae0741f72dc 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -269,6 +269,7 @@
#define PAGE_MODE_4_LEVEL 0x04
#define PAGE_MODE_5_LEVEL 0x05
#define PAGE_MODE_6_LEVEL 0x06
+#define PAGE_MODE_7_LEVEL 0x07
#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9))
#define PM_LEVEL_SIZE(x) (((x) < 6) ? \
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index fd552235bd13..23dae9348ace 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -16,6 +16,8 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+
#include <linux/mmu_notifier.h>
#include <linux/amd-iommu.h>
#include <linux/mm_types.h>
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 6947ccf26512..0d284029dc73 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -20,7 +20,8 @@
#include <linux/interrupt.h>
#include <linux/iommu.h>
#include <linux/iopoll.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
#include <linux/msi.h>
#include <linux/of.h>
#include <linux/of_address.h>
@@ -356,6 +357,10 @@
#define MSI_IOVA_BASE 0x8000000
#define MSI_IOVA_LENGTH 0x100000
+/*
+ * not really modular, but the easiest way to keep compat with existing
+ * bootargs behaviour is to continue using module_param_named here.
+ */
static bool disable_bypass = 1;
module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
MODULE_PARM_DESC(disable_bypass,
@@ -576,7 +581,11 @@ struct arm_smmu_device {
struct arm_smmu_strtab_cfg strtab_cfg;
- u32 sync_count;
+ /* Hi16xx adds an extra 32 bits of goodness to its MSI payload */
+ union {
+ u32 sync_count;
+ u64 padding;
+ };
/* IOMMU core code handle */
struct iommu_device iommu;
@@ -675,7 +684,13 @@ static void queue_inc_cons(struct arm_smmu_queue *q)
u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
- writel(q->cons, q->cons_reg);
+
+ /*
+ * Ensure that all CPU accesses (reads and writes) to the queue
+ * are complete before we update the cons pointer.
+ */
+ mb();
+ writel_relaxed(q->cons, q->cons_reg);
}
static int queue_sync_prod(struct arm_smmu_queue *q)
@@ -828,7 +843,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA, ent->sync.msidata);
+ /*
+ * Commands are written little-endian, but we want the SMMU to
+ * receive MSIData, and thus write it back to memory, in CPU
+ * byte order, so big-endian needs an extra byteswap here.
+ */
+ cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA,
+ cpu_to_le32(ent->sync.msidata));
cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
break;
default:
@@ -1691,24 +1712,26 @@ static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
static void arm_smmu_detach_dev(struct device *dev)
{
- struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct arm_smmu_master_data *master = fwspec->iommu_priv;
master->ste.assigned = false;
- arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
+ arm_smmu_install_ste_for_dev(fwspec);
}
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
int ret = 0;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_device *smmu;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_master_data *master;
struct arm_smmu_strtab_ent *ste;
- if (!dev->iommu_fwspec)
+ if (!fwspec)
return -ENOENT;
- master = dev->iommu_fwspec->iommu_priv;
+ master = fwspec->iommu_priv;
smmu = master->smmu;
ste = &master->ste;
@@ -1748,7 +1771,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
ste->s2_cfg = &smmu_domain->s2_cfg;
}
- arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
+ arm_smmu_install_ste_for_dev(fwspec);
out_unlock:
mutex_unlock(&smmu_domain->init_mutex);
return ret;
@@ -1839,7 +1862,7 @@ static int arm_smmu_add_device(struct device *dev)
int i, ret;
struct arm_smmu_device *smmu;
struct arm_smmu_master_data *master;
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct iommu_group *group;
if (!fwspec || fwspec->ops != &arm_smmu_ops)
@@ -1890,7 +1913,7 @@ static int arm_smmu_add_device(struct device *dev)
static void arm_smmu_remove_device(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_master_data *master;
struct arm_smmu_device *smmu;
@@ -2928,37 +2951,25 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
return 0;
}
-static int arm_smmu_device_remove(struct platform_device *pdev)
+static void arm_smmu_device_shutdown(struct platform_device *pdev)
{
struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
arm_smmu_device_disable(smmu);
-
- return 0;
-}
-
-static void arm_smmu_device_shutdown(struct platform_device *pdev)
-{
- arm_smmu_device_remove(pdev);
}
static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,smmu-v3", },
{ },
};
-MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
static struct platform_driver arm_smmu_driver = {
.driver = {
.name = "arm-smmu-v3",
.of_match_table = of_match_ptr(arm_smmu_of_match),
+ .suppress_bind_attrs = true,
},
.probe = arm_smmu_device_probe,
- .remove = arm_smmu_device_remove,
.shutdown = arm_smmu_device_shutdown,
};
-module_platform_driver(arm_smmu_driver);
-
-MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
-MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
-MODULE_LICENSE("GPL v2");
+builtin_platform_driver(arm_smmu_driver);
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 5a28ae892504..af18a7e7f917 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -41,13 +41,15 @@
#include <linux/io-64-nonatomic-hi-lo.h>
#include <linux/iommu.h>
#include <linux/iopoll.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/of_iommu.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -101,6 +103,10 @@
#define MSI_IOVA_LENGTH 0x100000
static int force_stage;
+/*
+ * not really modular, but the easiest way to keep compat with existing
+ * bootargs behaviour is to continue using module_param() here.
+ */
module_param(force_stage, int, S_IRUGO);
MODULE_PARM_DESC(force_stage,
"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
@@ -119,6 +125,7 @@ enum arm_smmu_implementation {
GENERIC_SMMU,
ARM_MMU500,
CAVIUM_SMMUV2,
+ QCOM_SMMUV2,
};
struct arm_smmu_s2cr {
@@ -206,6 +213,8 @@ struct arm_smmu_device {
u32 num_global_irqs;
u32 num_context_irqs;
unsigned int *irqs;
+ struct clk_bulk_data *clks;
+ int num_clks;
u32 cavium_id_base; /* Specific to Cavium */
@@ -267,6 +276,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
{ 0, NULL},
};
+static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
+{
+ if (pm_runtime_enabled(smmu->dev))
+ return pm_runtime_get_sync(smmu->dev);
+
+ return 0;
+}
+
+static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
+{
+ if (pm_runtime_enabled(smmu->dev))
+ pm_runtime_put(smmu->dev);
+}
+
static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
{
return container_of(dom, struct arm_smmu_domain, domain);
@@ -926,11 +949,15 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- int irq;
+ int ret, irq;
if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
return;
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ return;
+
/*
* Disable the context bank and free the page tables before freeing
* it.
@@ -945,6 +972,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
free_io_pgtable_ops(smmu_domain->pgtbl_ops);
__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
+
+ arm_smmu_rpm_put(smmu);
}
static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
@@ -1103,7 +1132,7 @@ static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
static int arm_smmu_master_alloc_smes(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
struct arm_smmu_device *smmu = cfg->smmu;
struct arm_smmu_smr *smrs = smmu->smrs;
@@ -1206,7 +1235,7 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
int ret;
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_device *smmu;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -1226,10 +1255,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
return -ENODEV;
smmu = fwspec_smmu(fwspec);
+
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ return ret;
+
/* Ensure that the domain is finalised */
ret = arm_smmu_init_domain_context(domain, smmu);
if (ret < 0)
- return ret;
+ goto rpm_put;
/*
* Sanity check the domain. We don't support domains across
@@ -1239,49 +1273,74 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
dev_err(dev,
"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
- return -EINVAL;
+ ret = -EINVAL;
+ goto rpm_put;
}
/* Looks ok, so add the device to the domain */
- return arm_smmu_domain_add_master(smmu_domain, fwspec);
+ ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
+
+rpm_put:
+ arm_smmu_rpm_put(smmu);
+ return ret;
}
static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+ int ret;
if (!ops)
return -ENODEV;
- return ops->map(ops, iova, paddr, size, prot);
+ arm_smmu_rpm_get(smmu);
+ ret = ops->map(ops, iova, paddr, size, prot);
+ arm_smmu_rpm_put(smmu);
+
+ return ret;
}
static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+ size_t ret;
if (!ops)
return 0;
- return ops->unmap(ops, iova, size);
+ arm_smmu_rpm_get(smmu);
+ ret = ops->unmap(ops, iova, size);
+ arm_smmu_rpm_put(smmu);
+
+ return ret;
}
static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
- if (smmu_domain->tlb_ops)
+ if (smmu_domain->tlb_ops) {
+ arm_smmu_rpm_get(smmu);
smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
+ arm_smmu_rpm_put(smmu);
+ }
}
static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
- if (smmu_domain->tlb_ops)
+ if (smmu_domain->tlb_ops) {
+ arm_smmu_rpm_get(smmu);
smmu_domain->tlb_ops->tlb_sync(smmu_domain);
+ arm_smmu_rpm_put(smmu);
+ }
}
static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
@@ -1296,6 +1355,11 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
u32 tmp;
u64 phys;
unsigned long va, flags;
+ int ret;
+
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ return 0;
cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
@@ -1324,6 +1388,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
return 0;
}
+ arm_smmu_rpm_put(smmu);
+
return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
}
@@ -1380,7 +1446,7 @@ static int arm_smmu_add_device(struct device *dev)
{
struct arm_smmu_device *smmu;
struct arm_smmu_master_cfg *cfg;
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
int i, ret;
if (using_legacy_binding) {
@@ -1391,7 +1457,7 @@ static int arm_smmu_add_device(struct device *dev)
* will allocate/initialise a new one. Thus we need to update fwspec for
* later use.
*/
- fwspec = dev->iommu_fwspec;
+ fwspec = dev_iommu_fwspec_get(dev);
if (ret)
goto out_free;
} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
@@ -1428,12 +1494,21 @@ static int arm_smmu_add_device(struct device *dev)
while (i--)
cfg->smendx[i] = INVALID_SMENDX;
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ goto out_cfg_free;
+
ret = arm_smmu_master_alloc_smes(dev);
+ arm_smmu_rpm_put(smmu);
+
if (ret)
goto out_cfg_free;
iommu_device_link(&smmu->iommu, dev);
+ device_link_add(dev, smmu->dev,
+ DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
+
return 0;
out_cfg_free:
@@ -1445,10 +1520,10 @@ out_free:
static void arm_smmu_remove_device(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_master_cfg *cfg;
struct arm_smmu_device *smmu;
-
+ int ret;
if (!fwspec || fwspec->ops != &arm_smmu_ops)
return;
@@ -1456,8 +1531,15 @@ static void arm_smmu_remove_device(struct device *dev)
cfg = fwspec->iommu_priv;
smmu = cfg->smmu;
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ return;
+
iommu_device_unlink(&smmu->iommu, dev);
arm_smmu_master_free_smes(fwspec);
+
+ arm_smmu_rpm_put(smmu);
+
iommu_group_remove_device(dev);
kfree(fwspec->iommu_priv);
iommu_fwspec_free(dev);
@@ -1465,7 +1547,7 @@ static void arm_smmu_remove_device(struct device *dev)
static struct iommu_group *arm_smmu_device_group(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
struct iommu_group *group = NULL;
int i, idx;
@@ -1947,13 +2029,14 @@ struct arm_smmu_match_data {
};
#define ARM_SMMU_MATCH_DATA(name, ver, imp) \
-static struct arm_smmu_match_data name = { .version = ver, .model = imp }
+static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
+ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
@@ -1962,9 +2045,9 @@ static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
+ { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
{ },
};
-MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
#ifdef CONFIG_ACPI
static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
@@ -2150,6 +2233,17 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
smmu->irqs[i] = irq;
}
+ err = devm_clk_bulk_get_all(dev, &smmu->clks);
+ if (err < 0) {
+ dev_err(dev, "failed to get clocks %d\n", err);
+ return err;
+ }
+ smmu->num_clks = err;
+
+ err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
+ if (err)
+ return err;
+
err = arm_smmu_device_cfg_probe(smmu);
if (err)
return err;
@@ -2200,6 +2294,17 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
arm_smmu_test_smr_masks(smmu);
/*
+ * We want to avoid touching dev->power.lock in fastpaths unless
+ * it's really going to do something useful - pm_runtime_enabled()
+ * can serve as an ideal proxy for that decision. So, conditionally
+ * enable pm_runtime.
+ */
+ if (dev->pm_domain) {
+ pm_runtime_set_active(dev);
+ pm_runtime_enable(dev);
+ }
+
+ /*
* For ACPI and generic DT bindings, an SMMU will be probed before
* any device which might need it, so we want the bus ops in place
* ready to handle default domain setup as soon as any SMMU exists.
@@ -2224,48 +2329,82 @@ static int arm_smmu_legacy_bus_init(void)
}
device_initcall_sync(arm_smmu_legacy_bus_init);
-static int arm_smmu_device_remove(struct platform_device *pdev)
+static void arm_smmu_device_shutdown(struct platform_device *pdev)
{
struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
if (!smmu)
- return -ENODEV;
+ return;
if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
dev_err(&pdev->dev, "removing device with active domains!\n");
+ arm_smmu_rpm_get(smmu);
/* Turn the thing off */
writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
- return 0;
+ arm_smmu_rpm_put(smmu);
+
+ if (pm_runtime_enabled(smmu->dev))
+ pm_runtime_force_suspend(smmu->dev);
+ else
+ clk_bulk_disable(smmu->num_clks, smmu->clks);
+
+ clk_bulk_unprepare(smmu->num_clks, smmu->clks);
}
-static void arm_smmu_device_shutdown(struct platform_device *pdev)
+static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
{
- arm_smmu_device_remove(pdev);
+ struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+ int ret;
+
+ ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
+ if (ret)
+ return ret;
+
+ arm_smmu_device_reset(smmu);
+
+ return 0;
}
-static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
+static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
{
struct arm_smmu_device *smmu = dev_get_drvdata(dev);
- arm_smmu_device_reset(smmu);
+ clk_bulk_disable(smmu->num_clks, smmu->clks);
+
return 0;
}
-static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
+static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
+{
+ if (pm_runtime_suspended(dev))
+ return 0;
+
+ return arm_smmu_runtime_resume(dev);
+}
+
+static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
+{
+ if (pm_runtime_suspended(dev))
+ return 0;
+
+ return arm_smmu_runtime_suspend(dev);
+}
+
+static const struct dev_pm_ops arm_smmu_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
+ SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
+ arm_smmu_runtime_resume, NULL)
+};
static struct platform_driver arm_smmu_driver = {
.driver = {
- .name = "arm-smmu",
- .of_match_table = of_match_ptr(arm_smmu_of_match),
- .pm = &arm_smmu_pm_ops,
+ .name = "arm-smmu",
+ .of_match_table = of_match_ptr(arm_smmu_of_match),
+ .pm = &arm_smmu_pm_ops,
+ .suppress_bind_attrs = true,
},
.probe = arm_smmu_device_probe,
- .remove = arm_smmu_device_remove,
.shutdown = arm_smmu_device_shutdown,
};
-module_platform_driver(arm_smmu_driver);
-
-MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
-MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
-MODULE_LICENSE("GPL v2");
+builtin_platform_driver(arm_smmu_driver);
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 60c7e9e9901e..d19f3d6b43c1 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -175,7 +175,7 @@ EXPORT_SYMBOL(iommu_put_dma_cookie);
void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
{
- if (!is_of_node(dev->iommu_fwspec->iommu_fwnode))
+ if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
iort_iommu_msi_get_resv_regions(dev, list);
}
@@ -447,20 +447,17 @@ static void __iommu_dma_free_pages(struct page **pages, int count)
kvfree(pages);
}
-static struct page **__iommu_dma_alloc_pages(unsigned int count,
- unsigned long order_mask, gfp_t gfp)
+static struct page **__iommu_dma_alloc_pages(struct device *dev,
+ unsigned int count, unsigned long order_mask, gfp_t gfp)
{
struct page **pages;
- unsigned int i = 0, array_size = count * sizeof(*pages);
+ unsigned int i = 0, nid = dev_to_node(dev);
order_mask &= (2U << MAX_ORDER) - 1;
if (!order_mask)
return NULL;
- if (array_size <= PAGE_SIZE)
- pages = kzalloc(array_size, GFP_KERNEL);
- else
- pages = vzalloc(array_size);
+ pages = kvzalloc(count * sizeof(*pages), GFP_KERNEL);
if (!pages)
return NULL;
@@ -479,10 +476,12 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count,
for (order_mask &= (2U << __fls(count)) - 1;
order_mask; order_mask &= ~order_size) {
unsigned int order = __fls(order_mask);
+ gfp_t alloc_flags = gfp;
order_size = 1U << order;
- page = alloc_pages((order_mask - order_size) ?
- gfp | __GFP_NORETRY : gfp, order);
+ if (order_mask > order_size)
+ alloc_flags |= __GFP_NORETRY;
+ page = alloc_pages_node(nid, alloc_flags, order);
if (!page)
continue;
if (!order)
@@ -567,7 +566,8 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
alloc_sizes = min_size;
count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- pages = __iommu_dma_alloc_pages(count, alloc_sizes >> PAGE_SHIFT, gfp);
+ pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT,
+ gfp);
if (!pages)
return NULL;
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 1edf2a251336..dc9f14811e0f 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1160,6 +1160,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
int head, tail;
struct q_inval *qi = iommu->qi;
int wait_index = (index + 1) % QI_LENGTH;
+ int shift = qi_shift(iommu);
if (qi->desc_status[wait_index] == QI_ABORT)
return -EAGAIN;
@@ -1173,13 +1174,19 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
*/
if (fault & DMA_FSTS_IQE) {
head = readl(iommu->reg + DMAR_IQH_REG);
- if ((head >> DMAR_IQ_SHIFT) == index) {
- pr_err("VT-d detected invalid descriptor: "
- "low=%llx, high=%llx\n",
- (unsigned long long)qi->desc[index].low,
- (unsigned long long)qi->desc[index].high);
- memcpy(&qi->desc[index], &qi->desc[wait_index],
- sizeof(struct qi_desc));
+ if ((head >> shift) == index) {
+ struct qi_desc *desc = qi->desc + head;
+
+ /*
+ * desc->qw2 and desc->qw3 are either reserved or
+ * used by software as private data. We won't print
+ * out these two qw's for security consideration.
+ */
+ pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
+ (unsigned long long)desc->qw0,
+ (unsigned long long)desc->qw1);
+ memcpy(desc, qi->desc + (wait_index << shift),
+ 1 << shift);
writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
return -EINVAL;
}
@@ -1191,10 +1198,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
*/
if (fault & DMA_FSTS_ITE) {
head = readl(iommu->reg + DMAR_IQH_REG);
- head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
+ head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
head |= 1;
tail = readl(iommu->reg + DMAR_IQT_REG);
- tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
+ tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
@@ -1222,15 +1229,14 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
{
int rc;
struct q_inval *qi = iommu->qi;
- struct qi_desc *hw, wait_desc;
+ int offset, shift, length;
+ struct qi_desc wait_desc;
int wait_index, index;
unsigned long flags;
if (!qi)
return 0;
- hw = qi->desc;
-
restart:
rc = 0;
@@ -1243,16 +1249,21 @@ restart:
index = qi->free_head;
wait_index = (index + 1) % QI_LENGTH;
+ shift = qi_shift(iommu);
+ length = 1 << shift;
qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
- hw[index] = *desc;
-
- wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
+ offset = index << shift;
+ memcpy(qi->desc + offset, desc, length);
+ wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
- wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
+ wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
+ wait_desc.qw2 = 0;
+ wait_desc.qw3 = 0;
- hw[wait_index] = wait_desc;
+ offset = wait_index << shift;
+ memcpy(qi->desc + offset, &wait_desc, length);
qi->free_head = (qi->free_head + 2) % QI_LENGTH;
qi->free_cnt -= 2;
@@ -1261,7 +1272,7 @@ restart:
* update the HW tail register indicating the presence of
* new descriptors.
*/
- writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
+ writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
while (qi->desc_status[wait_index] != QI_DONE) {
/*
@@ -1298,8 +1309,10 @@ void qi_global_iec(struct intel_iommu *iommu)
{
struct qi_desc desc;
- desc.low = QI_IEC_TYPE;
- desc.high = 0;
+ desc.qw0 = QI_IEC_TYPE;
+ desc.qw1 = 0;
+ desc.qw2 = 0;
+ desc.qw3 = 0;
/* should never fail */
qi_submit_sync(&desc, iommu);
@@ -1310,9 +1323,11 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
{
struct qi_desc desc;
- desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
+ desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
| QI_CC_GRAN(type) | QI_CC_TYPE;
- desc.high = 0;
+ desc.qw1 = 0;
+ desc.qw2 = 0;
+ desc.qw3 = 0;
qi_submit_sync(&desc, iommu);
}
@@ -1331,10 +1346,12 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
if (cap_read_drain(iommu->cap))
dr = 1;
- desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
+ desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
- desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
+ desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
| QI_IOTLB_AM(size_order);
+ desc.qw2 = 0;
+ desc.qw3 = 0;
qi_submit_sync(&desc, iommu);
}
@@ -1347,15 +1364,17 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
if (mask) {
WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1));
addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
- desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
+ desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
} else
- desc.high = QI_DEV_IOTLB_ADDR(addr);
+ desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
if (qdep >= QI_DEV_IOTLB_MAX_INVS)
qdep = 0;
- desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
+ desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
+ desc.qw2 = 0;
+ desc.qw3 = 0;
qi_submit_sync(&desc, iommu);
}
@@ -1403,16 +1422,24 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
u32 sts;
unsigned long flags;
struct q_inval *qi = iommu->qi;
+ u64 val = virt_to_phys(qi->desc);
qi->free_head = qi->free_tail = 0;
qi->free_cnt = QI_LENGTH;
+ /*
+ * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
+ * is present.
+ */
+ if (ecap_smts(iommu->ecap))
+ val |= (1 << 11) | 1;
+
raw_spin_lock_irqsave(&iommu->register_lock, flags);
/* write zero to the tail reg */
writel(0, iommu->reg + DMAR_IQT_REG);
- dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
+ dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
iommu->gcmd |= DMA_GCMD_QIE;
writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
@@ -1448,8 +1475,12 @@ int dmar_enable_qi(struct intel_iommu *iommu)
qi = iommu->qi;
-
- desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
+ /*
+ * Need two pages to accommodate 256 descriptors of 256 bits each
+ * if the remapping hardware supports scalable mode translation.
+ */
+ desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
+ !!ecap_smts(iommu->ecap));
if (!desc_page) {
kfree(qi);
iommu->qi = NULL;
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 63b6ce78492a..2bd9ac285c0d 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -292,49 +292,6 @@ static inline void context_clear_entry(struct context_entry *context)
}
/*
- * 0: readable
- * 1: writable
- * 2-6: reserved
- * 7: super page
- * 8-10: available
- * 11: snoop behavior
- * 12-63: Host physcial address
- */
-struct dma_pte {
- u64 val;
-};
-
-static inline void dma_clear_pte(struct dma_pte *pte)
-{
- pte->val = 0;
-}
-
-static inline u64 dma_pte_addr(struct dma_pte *pte)
-{
-#ifdef CONFIG_64BIT
- return pte->val & VTD_PAGE_MASK;
-#else
- /* Must have a full atomic 64-bit read */
- return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
-#endif
-}
-
-static inline bool dma_pte_present(struct dma_pte *pte)
-{
- return (pte->val & 3) != 0;
-}
-
-static inline bool dma_pte_superpage(struct dma_pte *pte)
-{
- return (pte->val & DMA_PTE_LARGE_PAGE);
-}
-
-static inline int first_pte_in_page(struct dma_pte *pte)
-{
- return !((unsigned long)pte & ~VTD_PAGE_MASK);
-}
-
-/*
* This domain is a statically identity mapping domain.
* 1. This domain creats a static 1:1 mapping to all usable memory.
* 2. It maps to each iommu if successful.
@@ -406,38 +363,16 @@ static int dmar_map_gfx = 1;
static int dmar_forcedac;
static int intel_iommu_strict;
static int intel_iommu_superpage = 1;
-static int intel_iommu_ecs = 1;
-static int intel_iommu_pasid28;
+static int intel_iommu_sm = 1;
static int iommu_identity_mapping;
#define IDENTMAP_ALL 1
#define IDENTMAP_GFX 2
#define IDENTMAP_AZALIA 4
-/* Broadwell and Skylake have broken ECS support — normal so-called "second
- * level" translation of DMA requests-without-PASID doesn't actually happen
- * unless you also set the NESTE bit in an extended context-entry. Which of
- * course means that SVM doesn't work because it's trying to do nested
- * translation of the physical addresses it finds in the process page tables,
- * through the IOVA->phys mapping found in the "second level" page tables.
- *
- * The VT-d specification was retroactively changed to change the definition
- * of the capability bits and pretend that Broadwell/Skylake never happened...
- * but unfortunately the wrong bit was changed. It's ECS which is broken, but
- * for some reason it was the PASID capability bit which was redefined (from
- * bit 28 on BDW/SKL to bit 40 in future).
- *
- * So our test for ECS needs to eschew those implementations which set the old
- * PASID capabiity bit 28, since those are the ones on which ECS is broken.
- * Unless we are working around the 'pasid28' limitations, that is, by putting
- * the device into passthrough mode for normal DMA and thus masking the bug.
- */
-#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
- (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
-/* PASID support is thus enabled if ECS is enabled and *either* of the old
- * or new capability bits are set. */
-#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
- (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
+#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
+#define pasid_supported(iommu) (sm_supported(iommu) && \
+ ecap_pasid((iommu)->ecap))
int intel_iommu_gfx_mapped;
EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
@@ -448,21 +383,24 @@ static LIST_HEAD(device_domain_list);
/*
* Iterate over elements in device_domain_list and call the specified
- * callback @fn against each element. This helper should only be used
- * in the context where the device_domain_lock has already been holden.
+ * callback @fn against each element.
*/
int for_each_device_domain(int (*fn)(struct device_domain_info *info,
void *data), void *data)
{
int ret = 0;
+ unsigned long flags;
struct device_domain_info *info;
- assert_spin_locked(&device_domain_lock);
+ spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_entry(info, &device_domain_list, global) {
ret = fn(info, data);
- if (ret)
+ if (ret) {
+ spin_unlock_irqrestore(&device_domain_lock, flags);
return ret;
+ }
}
+ spin_unlock_irqrestore(&device_domain_lock, flags);
return 0;
}
@@ -518,15 +456,9 @@ static int __init intel_iommu_setup(char *str)
} else if (!strncmp(str, "sp_off", 6)) {
pr_info("Disable supported super page\n");
intel_iommu_superpage = 0;
- } else if (!strncmp(str, "ecs_off", 7)) {
- printk(KERN_INFO
- "Intel-IOMMU: disable extended context table support\n");
- intel_iommu_ecs = 0;
- } else if (!strncmp(str, "pasid28", 7)) {
- printk(KERN_INFO
- "Intel-IOMMU: enable pre-production PASID support\n");
- intel_iommu_pasid28 = 1;
- iommu_identity_mapping |= IDENTMAP_GFX;
+ } else if (!strncmp(str, "sm_off", 6)) {
+ pr_info("Intel-IOMMU: disable scalable mode support\n");
+ intel_iommu_sm = 0;
} else if (!strncmp(str, "tboot_noforce", 13)) {
printk(KERN_INFO
"Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
@@ -773,7 +705,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
u64 *entry;
entry = &root->lo;
- if (ecs_enabled(iommu)) {
+ if (sm_supported(iommu)) {
if (devfn >= 0x80) {
devfn -= 0x80;
entry = &root->hi;
@@ -915,7 +847,7 @@ static void free_context_table(struct intel_iommu *iommu)
if (context)
free_pgtable_page(context);
- if (!ecs_enabled(iommu))
+ if (!sm_supported(iommu))
continue;
context = iommu_context_addr(iommu, i, 0x80, 0);
@@ -1267,8 +1199,8 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
unsigned long flag;
addr = virt_to_phys(iommu->root_entry);
- if (ecs_enabled(iommu))
- addr |= DMA_RTADDR_RTT;
+ if (sm_supported(iommu))
+ addr |= DMA_RTADDR_SMT;
raw_spin_lock_irqsave(&iommu->register_lock, flag);
dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
@@ -1282,7 +1214,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-static void iommu_flush_write_buffer(struct intel_iommu *iommu)
+void iommu_flush_write_buffer(struct intel_iommu *iommu)
{
u32 val;
unsigned long flag;
@@ -1694,6 +1626,16 @@ static int iommu_init_domains(struct intel_iommu *iommu)
*/
set_bit(0, iommu->domain_ids);
+ /*
+ * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
+ * entry for first-level or pass-through translation modes should
+ * be programmed with a domain id different from those used for
+ * second-level or nested translation. We reserve a domain id for
+ * this purpose.
+ */
+ if (sm_supported(iommu))
+ set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
+
return 0;
}
@@ -1758,10 +1700,9 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
free_context_table(iommu);
#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_enabled(iommu)) {
+ if (pasid_supported(iommu)) {
if (ecap_prs(iommu->ecap))
intel_svm_finish_prq(iommu);
- intel_svm_exit(iommu);
}
#endif
}
@@ -1981,8 +1922,59 @@ static void domain_exit(struct dmar_domain *domain)
free_domain_mem(domain);
}
+/*
+ * Get the PASID directory size for scalable mode context entry.
+ * Value of X in the PDTS field of a scalable mode context entry
+ * indicates PASID directory with 2^(X + 7) entries.
+ */
+static inline unsigned long context_get_sm_pds(struct pasid_table *table)
+{
+ int pds, max_pde;
+
+ max_pde = table->max_pasid >> PASID_PDE_SHIFT;
+ pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
+ if (pds < 7)
+ return 0;
+
+ return pds - 7;
+}
+
+/*
+ * Set the RID_PASID field of a scalable mode context entry. The
+ * IOMMU hardware will use the PASID value set in this field for
+ * DMA translations of DMA requests without PASID.
+ */
+static inline void
+context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
+{
+ context->hi |= pasid & ((1 << 20) - 1);
+ context->hi |= (1 << 20);
+}
+
+/*
+ * Set the DTE(Device-TLB Enable) field of a scalable mode context
+ * entry.
+ */
+static inline void context_set_sm_dte(struct context_entry *context)
+{
+ context->lo |= (1 << 2);
+}
+
+/*
+ * Set the PRE(Page Request Enable) field of a scalable mode context
+ * entry.
+ */
+static inline void context_set_sm_pre(struct context_entry *context)
+{
+ context->lo |= (1 << 4);
+}
+
+/* Convert value to context PASID directory size field coding. */
+#define context_pdts(pds) (((pds) & 0x7) << 9)
+
static int domain_context_mapping_one(struct dmar_domain *domain,
struct intel_iommu *iommu,
+ struct pasid_table *table,
u8 bus, u8 devfn)
{
u16 did = domain->iommu_did[iommu->seq_id];
@@ -1990,8 +1982,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
struct device_domain_info *info = NULL;
struct context_entry *context;
unsigned long flags;
- struct dma_pte *pgd;
- int ret, agaw;
+ int ret;
WARN_ON(did == 0);
@@ -2037,41 +2028,67 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
}
}
- pgd = domain->pgd;
-
context_clear_entry(context);
- context_set_domain_id(context, did);
- /*
- * Skip top levels of page tables for iommu which has less agaw
- * than default. Unnecessary for PT mode.
- */
- if (translation != CONTEXT_TT_PASS_THROUGH) {
- for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
- ret = -ENOMEM;
- pgd = phys_to_virt(dma_pte_addr(pgd));
- if (!dma_pte_present(pgd))
- goto out_unlock;
- }
+ if (sm_supported(iommu)) {
+ unsigned long pds;
- info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
- if (info && info->ats_supported)
- translation = CONTEXT_TT_DEV_IOTLB;
- else
- translation = CONTEXT_TT_MULTI_LEVEL;
+ WARN_ON(!table);
+
+ /* Setup the PASID DIR pointer: */
+ pds = context_get_sm_pds(table);
+ context->lo = (u64)virt_to_phys(table->table) |
+ context_pdts(pds);
+
+ /* Setup the RID_PASID field: */
+ context_set_sm_rid2pasid(context, PASID_RID2PASID);
- context_set_address_root(context, virt_to_phys(pgd));
- context_set_address_width(context, iommu->agaw);
- } else {
/*
- * In pass through mode, AW must be programmed to
- * indicate the largest AGAW value supported by
- * hardware. And ASR is ignored by hardware.
+ * Setup the Device-TLB enable bit and Page request
+ * Enable bit:
*/
- context_set_address_width(context, iommu->msagaw);
+ info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
+ if (info && info->ats_supported)
+ context_set_sm_dte(context);
+ if (info && info->pri_supported)
+ context_set_sm_pre(context);
+ } else {
+ struct dma_pte *pgd = domain->pgd;
+ int agaw;
+
+ context_set_domain_id(context, did);
+ context_set_translation_type(context, translation);
+
+ if (translation != CONTEXT_TT_PASS_THROUGH) {
+ /*
+ * Skip top levels of page tables for iommu which has
+ * less agaw than default. Unnecessary for PT mode.
+ */
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ ret = -ENOMEM;
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd))
+ goto out_unlock;
+ }
+
+ info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
+ if (info && info->ats_supported)
+ translation = CONTEXT_TT_DEV_IOTLB;
+ else
+ translation = CONTEXT_TT_MULTI_LEVEL;
+
+ context_set_address_root(context, virt_to_phys(pgd));
+ context_set_address_width(context, agaw);
+ } else {
+ /*
+ * In pass through mode, AW must be programmed to
+ * indicate the largest AGAW value supported by
+ * hardware. And ASR is ignored by hardware.
+ */
+ context_set_address_width(context, iommu->msagaw);
+ }
}
- context_set_translation_type(context, translation);
context_set_fault_enable(context);
context_set_present(context);
domain_flush_cache(domain, context, sizeof(*context));
@@ -2105,6 +2122,7 @@ out_unlock:
struct domain_context_mapping_data {
struct dmar_domain *domain;
struct intel_iommu *iommu;
+ struct pasid_table *table;
};
static int domain_context_mapping_cb(struct pci_dev *pdev,
@@ -2113,25 +2131,31 @@ static int domain_context_mapping_cb(struct pci_dev *pdev,
struct domain_context_mapping_data *data = opaque;
return domain_context_mapping_one(data->domain, data->iommu,
- PCI_BUS_NUM(alias), alias & 0xff);
+ data->table, PCI_BUS_NUM(alias),
+ alias & 0xff);
}
static int
domain_context_mapping(struct dmar_domain *domain, struct device *dev)
{
+ struct domain_context_mapping_data data;
+ struct pasid_table *table;
struct intel_iommu *iommu;
u8 bus, devfn;
- struct domain_context_mapping_data data;
iommu = device_to_iommu(dev, &bus, &devfn);
if (!iommu)
return -ENODEV;
+ table = intel_pasid_get_table(dev);
+
if (!dev_is_pci(dev))
- return domain_context_mapping_one(domain, iommu, bus, devfn);
+ return domain_context_mapping_one(domain, iommu, table,
+ bus, devfn);
data.domain = domain;
data.iommu = iommu;
+ data.table = table;
return pci_for_each_dma_alias(to_pci_dev(dev),
&domain_context_mapping_cb, &data);
@@ -2467,8 +2491,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
dmar_find_matched_atsr_unit(pdev))
info->ats_supported = 1;
- if (ecs_enabled(iommu)) {
- if (pasid_enabled(iommu)) {
+ if (sm_supported(iommu)) {
+ if (pasid_supported(iommu)) {
int features = pci_pasid_features(pdev);
if (features >= 0)
info->pasid_supported = features | 1;
@@ -2514,16 +2538,34 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
list_add(&info->global, &device_domain_list);
if (dev)
dev->archdata.iommu = info;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
- if (dev && dev_is_pci(dev) && info->pasid_supported) {
+ /* PASID table is mandatory for a PCI device in scalable mode. */
+ if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
ret = intel_pasid_alloc_table(dev);
if (ret) {
- pr_warn("No pasid table for %s, pasid disabled\n",
- dev_name(dev));
- info->pasid_supported = 0;
+ pr_err("PASID table allocation for %s failed\n",
+ dev_name(dev));
+ dmar_remove_one_dev_info(domain, dev);
+ return NULL;
+ }
+
+ /* Setup the PASID entry for requests without PASID: */
+ spin_lock(&iommu->lock);
+ if (hw_pass_through && domain_type_is_si(domain))
+ ret = intel_pasid_setup_pass_through(iommu, domain,
+ dev, PASID_RID2PASID);
+ else
+ ret = intel_pasid_setup_second_level(iommu, domain,
+ dev, PASID_RID2PASID);
+ spin_unlock(&iommu->lock);
+ if (ret) {
+ pr_err("Setup RID2PASID for %s failed\n",
+ dev_name(dev));
+ dmar_remove_one_dev_info(domain, dev);
+ return NULL;
}
}
- spin_unlock_irqrestore(&device_domain_lock, flags);
if (dev && domain_context_mapping(domain, dev)) {
pr_err("Domain context map for %s failed\n", dev_name(dev));
@@ -3287,7 +3329,7 @@ static int __init init_dmars(void)
* We need to ensure the system pasid table is no bigger
* than the smallest supported.
*/
- if (pasid_enabled(iommu)) {
+ if (pasid_supported(iommu)) {
u32 temp = 2 << ecap_pss(iommu->ecap);
intel_pasid_max_id = min_t(u32, temp,
@@ -3348,7 +3390,7 @@ static int __init init_dmars(void)
if (!ecap_pass_through(iommu->ecap))
hw_pass_through = 0;
#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_enabled(iommu))
+ if (pasid_supported(iommu))
intel_svm_init(iommu);
#endif
}
@@ -3452,7 +3494,7 @@ domains_done:
iommu_flush_write_buffer(iommu);
#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
+ if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
ret = intel_svm_enable_prq(iommu);
if (ret)
goto free_iommu;
@@ -4335,7 +4377,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
goto out;
#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_enabled(iommu))
+ if (pasid_supported(iommu))
intel_svm_init(iommu);
#endif
@@ -4352,7 +4394,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
iommu_flush_write_buffer(iommu);
#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
+ if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
ret = intel_svm_enable_prq(iommu);
if (ret)
goto disable_iommu;
@@ -4927,6 +4969,10 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
iommu = info->iommu;
if (info->dev) {
+ if (dev_is_pci(info->dev) && sm_supported(iommu))
+ intel_pasid_tear_down_entry(iommu, info->dev,
+ PASID_RID2PASID);
+
iommu_disable_dev_iotlb(info);
domain_context_clear(iommu, info->dev);
intel_pasid_free_table(info->dev);
@@ -5254,19 +5300,6 @@ static void intel_iommu_put_resv_regions(struct device *dev,
}
#ifdef CONFIG_INTEL_IOMMU_SVM
-#define MAX_NR_PASID_BITS (20)
-static inline unsigned long intel_iommu_get_pts(struct device *dev)
-{
- int pts, max_pasid;
-
- max_pasid = intel_pasid_get_dev_max_id(dev);
- pts = find_first_bit((unsigned long *)&max_pasid, MAX_NR_PASID_BITS);
- if (pts < 5)
- return 0;
-
- return pts - 5;
-}
-
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
{
struct device_domain_info *info;
@@ -5298,33 +5331,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
sdev->sid = PCI_DEVID(info->bus, info->devfn);
if (!(ctx_lo & CONTEXT_PASIDE)) {
- if (iommu->pasid_state_table)
- context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
- context[1].lo = (u64)virt_to_phys(info->pasid_table->table) |
- intel_iommu_get_pts(sdev->dev);
-
- wmb();
- /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
- * extended to permit requests-with-PASID if the PASIDE bit
- * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
- * however, the PASIDE bit is ignored and requests-with-PASID
- * are unconditionally blocked. Which makes less sense.
- * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
- * "guest mode" translation types depending on whether ATS
- * is available or not. Annoyingly, we can't use the new
- * modes *unless* PASIDE is set. */
- if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
- ctx_lo &= ~CONTEXT_TT_MASK;
- if (info->ats_supported)
- ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
- else
- ctx_lo |= CONTEXT_TT_PT_PASID << 2;
- }
ctx_lo |= CONTEXT_PASIDE;
- if (iommu->pasid_state_table)
- ctx_lo |= CONTEXT_DINVE;
- if (info->pri_supported)
- ctx_lo |= CONTEXT_PRS;
context[0].lo = ctx_lo;
wmb();
iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index fe95c9bd4d33..53fe5248d8f1 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -9,6 +9,8 @@
#define pr_fmt(fmt) "DMAR: " fmt
+#include <linux/bitops.h>
+#include <linux/cpufeature.h>
#include <linux/dmar.h>
#include <linux/intel-iommu.h>
#include <linux/iommu.h>
@@ -123,12 +125,13 @@ int intel_pasid_alloc_table(struct device *dev)
struct pasid_table *pasid_table;
struct pasid_table_opaque data;
struct page *pages;
- size_t size, count;
+ int max_pasid = 0;
int ret, order;
+ int size;
+ might_sleep();
info = dev->archdata.iommu;
- if (WARN_ON(!info || !dev_is_pci(dev) ||
- !info->pasid_supported || info->pasid_table))
+ if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
return -EINVAL;
/* DMA alias device already has a pasid table, use it: */
@@ -138,23 +141,25 @@ int intel_pasid_alloc_table(struct device *dev)
if (ret)
goto attach_out;
- pasid_table = kzalloc(sizeof(*pasid_table), GFP_ATOMIC);
+ pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
if (!pasid_table)
return -ENOMEM;
INIT_LIST_HEAD(&pasid_table->dev);
- size = sizeof(struct pasid_entry);
- count = min_t(int, pci_max_pasids(to_pci_dev(dev)), intel_pasid_max_id);
- order = get_order(size * count);
+ if (info->pasid_supported)
+ max_pasid = min_t(int, pci_max_pasids(to_pci_dev(dev)),
+ intel_pasid_max_id);
+
+ size = max_pasid >> (PASID_PDE_SHIFT - 3);
+ order = size ? get_order(size) : 0;
pages = alloc_pages_node(info->iommu->node,
- GFP_ATOMIC | __GFP_ZERO,
- order);
+ GFP_KERNEL | __GFP_ZERO, order);
if (!pages)
return -ENOMEM;
pasid_table->table = page_address(pages);
pasid_table->order = order;
- pasid_table->max_pasid = count;
+ pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
attach_out:
device_attach_pasid_table(info, pasid_table);
@@ -162,14 +167,33 @@ attach_out:
return 0;
}
+/* Get PRESENT bit of a PASID directory entry. */
+static inline bool
+pasid_pde_is_present(struct pasid_dir_entry *pde)
+{
+ return READ_ONCE(pde->val) & PASID_PTE_PRESENT;
+}
+
+/* Get PASID table from a PASID directory entry. */
+static inline struct pasid_entry *
+get_pasid_table_from_pde(struct pasid_dir_entry *pde)
+{
+ if (!pasid_pde_is_present(pde))
+ return NULL;
+
+ return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK);
+}
+
void intel_pasid_free_table(struct device *dev)
{
struct device_domain_info *info;
struct pasid_table *pasid_table;
+ struct pasid_dir_entry *dir;
+ struct pasid_entry *table;
+ int i, max_pde;
info = dev->archdata.iommu;
- if (!info || !dev_is_pci(dev) ||
- !info->pasid_supported || !info->pasid_table)
+ if (!info || !dev_is_pci(dev) || !info->pasid_table)
return;
pasid_table = info->pasid_table;
@@ -178,6 +202,14 @@ void intel_pasid_free_table(struct device *dev)
if (!list_empty(&pasid_table->dev))
return;
+ /* Free scalable mode PASID directory tables: */
+ dir = pasid_table->table;
+ max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
+ for (i = 0; i < max_pde; i++) {
+ table = get_pasid_table_from_pde(&dir[i]);
+ free_pgtable_page(table);
+ }
+
free_pages((unsigned long)pasid_table->table, pasid_table->order);
kfree(pasid_table);
}
@@ -206,17 +238,37 @@ int intel_pasid_get_dev_max_id(struct device *dev)
struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
{
+ struct device_domain_info *info;
struct pasid_table *pasid_table;
+ struct pasid_dir_entry *dir;
struct pasid_entry *entries;
+ int dir_index, index;
pasid_table = intel_pasid_get_table(dev);
if (WARN_ON(!pasid_table || pasid < 0 ||
pasid >= intel_pasid_get_dev_max_id(dev)))
return NULL;
- entries = pasid_table->table;
+ dir = pasid_table->table;
+ info = dev->archdata.iommu;
+ dir_index = pasid >> PASID_PDE_SHIFT;
+ index = pasid & PASID_PTE_MASK;
+
+ spin_lock(&pasid_lock);
+ entries = get_pasid_table_from_pde(&dir[dir_index]);
+ if (!entries) {
+ entries = alloc_pgtable_page(info->iommu->node);
+ if (!entries) {
+ spin_unlock(&pasid_lock);
+ return NULL;
+ }
+
+ WRITE_ONCE(dir[dir_index].val,
+ (u64)virt_to_phys(entries) | PASID_PTE_PRESENT);
+ }
+ spin_unlock(&pasid_lock);
- return &entries[pasid];
+ return &entries[index];
}
/*
@@ -224,10 +276,17 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
*/
static inline void pasid_clear_entry(struct pasid_entry *pe)
{
- WRITE_ONCE(pe->val, 0);
+ WRITE_ONCE(pe->val[0], 0);
+ WRITE_ONCE(pe->val[1], 0);
+ WRITE_ONCE(pe->val[2], 0);
+ WRITE_ONCE(pe->val[3], 0);
+ WRITE_ONCE(pe->val[4], 0);
+ WRITE_ONCE(pe->val[5], 0);
+ WRITE_ONCE(pe->val[6], 0);
+ WRITE_ONCE(pe->val[7], 0);
}
-void intel_pasid_clear_entry(struct device *dev, int pasid)
+static void intel_pasid_clear_entry(struct device *dev, int pasid)
{
struct pasid_entry *pe;
@@ -237,3 +296,361 @@ void intel_pasid_clear_entry(struct device *dev, int pasid)
pasid_clear_entry(pe);
}
+
+static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
+{
+ u64 old;
+
+ old = READ_ONCE(*ptr);
+ WRITE_ONCE(*ptr, (old & ~mask) | bits);
+}
+
+/*
+ * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
+ * PASID entry.
+ */
+static inline void
+pasid_set_domain_id(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value);
+}
+
+/*
+ * Get domain ID value of a scalable mode PASID entry.
+ */
+static inline u16
+pasid_get_domain_id(struct pasid_entry *pe)
+{
+ return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0));
+}
+
+/*
+ * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_slptr(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value);
+}
+
+/*
+ * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
+ * entry.
+ */
+static inline void
+pasid_set_address_width(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2);
+}
+
+/*
+ * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_translation_type(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6);
+}
+
+/*
+ * Enable fault processing by clearing the FPD(Fault Processing
+ * Disable) field (Bit 1) of a scalable mode PASID entry.
+ */
+static inline void pasid_set_fault_enable(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[0], 1 << 1, 0);
+}
+
+/*
+ * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
+ * scalable mode PASID entry.
+ */
+static inline void pasid_set_sre(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[2], 1 << 0, 1);
+}
+
+/*
+ * Setup the P(Present) field (Bit 0) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_present(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[0], 1 << 0, 1);
+}
+
+/*
+ * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
+{
+ pasid_set_bits(&pe->val[1], 1 << 23, value);
+}
+
+/*
+ * Setup the First Level Page table Pointer field (Bit 140~191)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_flptr(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value);
+}
+
+/*
+ * Setup the First Level Paging Mode field (Bit 130~131) of a
+ * scalable mode PASID entry.
+ */
+static inline void
+pasid_set_flpm(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2);
+}
+
+static void
+pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
+ u16 did, int pasid)
+{
+ struct qi_desc desc;
+
+ desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
+ desc.qw1 = 0;
+ desc.qw2 = 0;
+ desc.qw3 = 0;
+
+ qi_submit_sync(&desc, iommu);
+}
+
+static void
+iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid)
+{
+ struct qi_desc desc;
+
+ desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
+ desc.qw1 = 0;
+ desc.qw2 = 0;
+ desc.qw3 = 0;
+
+ qi_submit_sync(&desc, iommu);
+}
+
+static void
+devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
+ struct device *dev, int pasid)
+{
+ struct device_domain_info *info;
+ u16 sid, qdep, pfsid;
+
+ info = dev->archdata.iommu;
+ if (!info || !info->ats_enabled)
+ return;
+
+ sid = info->bus << 8 | info->devfn;
+ qdep = info->ats_qdep;
+ pfsid = info->pfsid;
+
+ qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
+}
+
+void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
+ struct device *dev, int pasid)
+{
+ struct pasid_entry *pte;
+ u16 did;
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (WARN_ON(!pte))
+ return;
+
+ intel_pasid_clear_entry(dev, pasid);
+ did = pasid_get_domain_id(pte);
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+ iotlb_invalidation_with_pasid(iommu, did, pasid);
+
+ /* Device IOTLB doesn't need to be flushed in caching mode. */
+ if (!cap_caching_mode(iommu->cap))
+ devtlb_invalidation_with_pasid(iommu, dev, pasid);
+}
+
+/*
+ * Set up the scalable mode pasid table entry for first only
+ * translation type.
+ */
+int intel_pasid_setup_first_level(struct intel_iommu *iommu,
+ struct device *dev, pgd_t *pgd,
+ int pasid, u16 did, int flags)
+{
+ struct pasid_entry *pte;
+
+ if (!ecap_flts(iommu->ecap)) {
+ pr_err("No first level translation support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (WARN_ON(!pte))
+ return -EINVAL;
+
+ pasid_clear_entry(pte);
+
+ /* Setup the first level page table pointer: */
+ pasid_set_flptr(pte, (u64)__pa(pgd));
+ if (flags & PASID_FLAG_SUPERVISOR_MODE) {
+ if (!ecap_srs(iommu->ecap)) {
+ pr_err("No supervisor request support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+ pasid_set_sre(pte);
+ }
+
+#ifdef CONFIG_X86
+ if (cpu_feature_enabled(X86_FEATURE_LA57))
+ pasid_set_flpm(pte, 1);
+#endif /* CONFIG_X86 */
+
+ pasid_set_domain_id(pte, did);
+ pasid_set_address_width(pte, iommu->agaw);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+ /* Setup Present and PASID Granular Transfer Type: */
+ pasid_set_translation_type(pte, 1);
+ pasid_set_present(pte);
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ if (cap_caching_mode(iommu->cap)) {
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+ iotlb_invalidation_with_pasid(iommu, did, pasid);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+
+ return 0;
+}
+
+/*
+ * Set up the scalable mode pasid entry for second only translation type.
+ */
+int intel_pasid_setup_second_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, int pasid)
+{
+ struct pasid_entry *pte;
+ struct dma_pte *pgd;
+ u64 pgd_val;
+ int agaw;
+ u16 did;
+
+ /*
+ * If hardware advertises no support for second level
+ * translation, return directly.
+ */
+ if (!ecap_slts(iommu->ecap)) {
+ pr_err("No second level translation support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ /*
+ * Skip top levels of page tables for iommu which has less agaw
+ * than default. Unnecessary for PT mode.
+ */
+ pgd = domain->pgd;
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd)) {
+ dev_err(dev, "Invalid domain page table\n");
+ return -EINVAL;
+ }
+ }
+
+ pgd_val = virt_to_phys(pgd);
+ did = domain->iommu_did[iommu->seq_id];
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
+ return -ENODEV;
+ }
+
+ pasid_clear_entry(pte);
+ pasid_set_domain_id(pte, did);
+ pasid_set_slptr(pte, pgd_val);
+ pasid_set_address_width(pte, agaw);
+ pasid_set_translation_type(pte, 2);
+ pasid_set_fault_enable(pte);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+ /*
+ * Since it is a second level only translation setup, we should
+ * set SRE bit as well (addresses are expected to be GPAs).
+ */
+ pasid_set_sre(pte);
+ pasid_set_present(pte);
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ if (cap_caching_mode(iommu->cap)) {
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+ iotlb_invalidation_with_pasid(iommu, did, pasid);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+
+ return 0;
+}
+
+/*
+ * Set up the scalable mode pasid entry for passthrough translation type.
+ */
+int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, int pasid)
+{
+ u16 did = FLPT_DEFAULT_DID;
+ struct pasid_entry *pte;
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
+ return -ENODEV;
+ }
+
+ pasid_clear_entry(pte);
+ pasid_set_domain_id(pte, did);
+ pasid_set_address_width(pte, iommu->agaw);
+ pasid_set_translation_type(pte, 4);
+ pasid_set_fault_enable(pte);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+ /*
+ * We should set SRE bit as well since the addresses are expected
+ * to be GPAs.
+ */
+ pasid_set_sre(pte);
+ pasid_set_present(pte);
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ if (cap_caching_mode(iommu->cap)) {
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+ iotlb_invalidation_with_pasid(iommu, did, pasid);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+
+ return 0;
+}
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index 1fb5e12b029a..23537b3f34e3 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -10,13 +10,37 @@
#ifndef __INTEL_PASID_H
#define __INTEL_PASID_H
+#define PASID_RID2PASID 0x0
#define PASID_MIN 0x1
-#define PASID_MAX 0x20000
+#define PASID_MAX 0x100000
+#define PASID_PTE_MASK 0x3F
+#define PASID_PTE_PRESENT 1
+#define PDE_PFN_MASK PAGE_MASK
+#define PASID_PDE_SHIFT 6
+#define MAX_NR_PASID_BITS 20
-struct pasid_entry {
+/*
+ * Domain ID reserved for pasid entries programmed for first-level
+ * only and pass-through transfer modes.
+ */
+#define FLPT_DEFAULT_DID 1
+
+/*
+ * The SUPERVISOR_MODE flag indicates a first level translation which
+ * can be used for access to kernel addresses. It is valid only for
+ * access to the kernel's static 1:1 mapping of physical memory — not
+ * to vmalloc or even module mappings.
+ */
+#define PASID_FLAG_SUPERVISOR_MODE BIT(0)
+
+struct pasid_dir_entry {
u64 val;
};
+struct pasid_entry {
+ u64 val[8];
+};
+
/* The representative of a PASID table */
struct pasid_table {
void *table; /* pasid table pointer */
@@ -34,6 +58,16 @@ void intel_pasid_free_table(struct device *dev);
struct pasid_table *intel_pasid_get_table(struct device *dev);
int intel_pasid_get_dev_max_id(struct device *dev);
struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid);
-void intel_pasid_clear_entry(struct device *dev, int pasid);
+int intel_pasid_setup_first_level(struct intel_iommu *iommu,
+ struct device *dev, pgd_t *pgd,
+ int pasid, u16 did, int flags);
+int intel_pasid_setup_second_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, int pasid);
+int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, int pasid);
+void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
+ struct device *dev, int pasid);
#endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 887150907526..a2a2aa4439aa 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -29,21 +29,10 @@
#include "intel-pasid.h"
-#define PASID_ENTRY_P BIT_ULL(0)
-#define PASID_ENTRY_FLPM_5LP BIT_ULL(9)
-#define PASID_ENTRY_SRE BIT_ULL(11)
-
static irqreturn_t prq_event_thread(int irq, void *d);
-struct pasid_state_entry {
- u64 val;
-};
-
int intel_svm_init(struct intel_iommu *iommu)
{
- struct page *pages;
- int order;
-
if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
!cap_fl1gp_support(iommu->cap))
return -EINVAL;
@@ -52,41 +41,6 @@ int intel_svm_init(struct intel_iommu *iommu)
!cap_5lp_support(iommu->cap))
return -EINVAL;
- /* Start at 2 because it's defined as 2^(1+PSS) */
- iommu->pasid_max = 2 << ecap_pss(iommu->ecap);
-
- /* Eventually I'm promised we will get a multi-level PASID table
- * and it won't have to be physically contiguous. Until then,
- * limit the size because 8MiB contiguous allocations can be hard
- * to come by. The limit of 0x20000, which is 1MiB for each of
- * the PASID and PASID-state tables, is somewhat arbitrary. */
- if (iommu->pasid_max > 0x20000)
- iommu->pasid_max = 0x20000;
-
- order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
- if (ecap_dis(iommu->ecap)) {
- /* Just making it explicit... */
- BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
- pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
- if (pages)
- iommu->pasid_state_table = page_address(pages);
- else
- pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
- iommu->name);
- }
-
- return 0;
-}
-
-int intel_svm_exit(struct intel_iommu *iommu)
-{
- int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
-
- if (iommu->pasid_state_table) {
- free_pages((unsigned long)iommu->pasid_state_table, order);
- iommu->pasid_state_table = NULL;
- }
-
return 0;
}
@@ -163,27 +117,40 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
* because that's the only option the hardware gives us. Despite
* the fact that they are actually only accessible through one. */
if (gl)
- desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
- QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE;
+ desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
+ QI_EIOTLB_DID(sdev->did) |
+ QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) |
+ QI_EIOTLB_TYPE;
else
- desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
- QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
- desc.high = 0;
+ desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
+ QI_EIOTLB_DID(sdev->did) |
+ QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+ QI_EIOTLB_TYPE;
+ desc.qw1 = 0;
} else {
int mask = ilog2(__roundup_pow_of_two(pages));
- desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
- QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
- desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) |
- QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
+ desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
+ QI_EIOTLB_DID(sdev->did) |
+ QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
+ QI_EIOTLB_TYPE;
+ desc.qw1 = QI_EIOTLB_ADDR(address) |
+ QI_EIOTLB_GL(gl) |
+ QI_EIOTLB_IH(ih) |
+ QI_EIOTLB_AM(mask);
}
+ desc.qw2 = 0;
+ desc.qw3 = 0;
qi_submit_sync(&desc, svm->iommu);
if (sdev->dev_iotlb) {
- desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
- QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
+ desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) |
+ QI_DEV_EIOTLB_SID(sdev->sid) |
+ QI_DEV_EIOTLB_QDEP(sdev->qdep) |
+ QI_DEIOTLB_TYPE;
if (pages == -1) {
- desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE;
+ desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) |
+ QI_DEV_EIOTLB_SIZE;
} else if (pages > 1) {
/* The least significant zero bit indicates the size. So,
* for example, an "address" value of 0x12345f000 will
@@ -191,10 +158,13 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
unsigned long mask = __rounddown_pow_of_two(address ^ last);
- desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE;
+ desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) |
+ (mask - 1)) | QI_DEV_EIOTLB_SIZE;
} else {
- desc.high = QI_DEV_EIOTLB_ADDR(address);
+ desc.qw1 = QI_DEV_EIOTLB_ADDR(address);
}
+ desc.qw2 = 0;
+ desc.qw3 = 0;
qi_submit_sync(&desc, svm->iommu);
}
}
@@ -204,11 +174,6 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
{
struct intel_svm_dev *sdev;
- /* Try deferred invalidate if available */
- if (svm->iommu->pasid_state_table &&
- !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63))
- return;
-
rcu_read_lock();
list_for_each_entry_rcu(sdev, &svm->devs, list)
intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl);
@@ -234,17 +199,6 @@ static void intel_invalidate_range(struct mmu_notifier *mn,
(end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0);
}
-
-static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid)
-{
- struct qi_desc desc;
-
- desc.high = 0;
- desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
-
- qi_submit_sync(&desc, svm->iommu);
-}
-
static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
{
struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
@@ -264,8 +218,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
*/
rcu_read_lock();
list_for_each_entry_rcu(sdev, &svm->devs, list) {
- intel_pasid_clear_entry(sdev->dev, svm->pasid);
- intel_flush_pasid_dev(svm, sdev, svm->pasid);
+ intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
}
rcu_read_unlock();
@@ -284,11 +237,9 @@ static LIST_HEAD(global_svm_list);
int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
{
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
- struct pasid_entry *entry;
struct intel_svm_dev *sdev;
struct intel_svm *svm = NULL;
struct mm_struct *mm = NULL;
- u64 pasid_entry_val;
int pasid_max;
int ret;
@@ -397,24 +348,22 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
kfree(sdev);
goto out;
}
- pasid_entry_val = (u64)__pa(mm->pgd) | PASID_ENTRY_P;
- } else
- pasid_entry_val = (u64)__pa(init_mm.pgd) |
- PASID_ENTRY_P | PASID_ENTRY_SRE;
- if (cpu_feature_enabled(X86_FEATURE_LA57))
- pasid_entry_val |= PASID_ENTRY_FLPM_5LP;
-
- entry = intel_pasid_get_entry(dev, svm->pasid);
- entry->val = pasid_entry_val;
-
- wmb();
+ }
- /*
- * Flush PASID cache when a PASID table entry becomes
- * present.
- */
- if (cap_caching_mode(iommu->cap))
- intel_flush_pasid_dev(svm, sdev, svm->pasid);
+ spin_lock(&iommu->lock);
+ ret = intel_pasid_setup_first_level(iommu, dev,
+ mm ? mm->pgd : init_mm.pgd,
+ svm->pasid, FLPT_DEFAULT_DID,
+ mm ? 0 : PASID_FLAG_SUPERVISOR_MODE);
+ spin_unlock(&iommu->lock);
+ if (ret) {
+ if (mm)
+ mmu_notifier_unregister(&svm->notifier, mm);
+ intel_pasid_free_id(svm->pasid);
+ kfree(svm);
+ kfree(sdev);
+ goto out;
+ }
list_add_tail(&svm->list, &global_svm_list);
}
@@ -460,10 +409,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
* to use. We have a *shared* PASID table, because it's
* large and has to be physically contiguous. So it's
* hard to be as defensive as we might like. */
- intel_flush_pasid_dev(svm, sdev, svm->pasid);
+ intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
kfree_rcu(sdev, rcu);
- intel_pasid_clear_entry(dev, svm->pasid);
if (list_empty(&svm->devs)) {
intel_pasid_free_id(svm->pasid);
@@ -671,24 +619,27 @@ static irqreturn_t prq_event_thread(int irq, void *d)
no_pasid:
if (req->lpig) {
/* Page Group Response */
- resp.low = QI_PGRP_PASID(req->pasid) |
+ resp.qw0 = QI_PGRP_PASID(req->pasid) |
QI_PGRP_DID((req->bus << 8) | req->devfn) |
QI_PGRP_PASID_P(req->pasid_present) |
QI_PGRP_RESP_TYPE;
- resp.high = QI_PGRP_IDX(req->prg_index) |
- QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result);
-
- qi_submit_sync(&resp, iommu);
+ resp.qw1 = QI_PGRP_IDX(req->prg_index) |
+ QI_PGRP_PRIV(req->private) |
+ QI_PGRP_RESP_CODE(result);
} else if (req->srr) {
/* Page Stream Response */
- resp.low = QI_PSTRM_IDX(req->prg_index) |
- QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) |
- QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE;
- resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) |
+ resp.qw0 = QI_PSTRM_IDX(req->prg_index) |
+ QI_PSTRM_PRIV(req->private) |
+ QI_PSTRM_BUS(req->bus) |
+ QI_PSTRM_PASID(req->pasid) |
+ QI_PSTRM_RESP_TYPE;
+ resp.qw1 = QI_PSTRM_ADDR(address) |
+ QI_PSTRM_DEVFN(req->devfn) |
QI_PSTRM_RESP_CODE(result);
-
- qi_submit_sync(&resp, iommu);
}
+ resp.qw2 = 0;
+ resp.qw3 = 0;
+ qi_submit_sync(&resp, iommu);
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index c2d6c11431de..24d45b07f425 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -145,9 +145,11 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
{
struct qi_desc desc;
- desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
+ desc.qw0 = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
| QI_IEC_SELECTIVE;
- desc.high = 0;
+ desc.qw1 = 0;
+ desc.qw2 = 0;
+ desc.qw3 = 0;
return qi_submit_sync(&desc, iommu);
}
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 445c3bde0480..cec29bf45c9b 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -709,10 +709,6 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
{
struct arm_v7s_io_pgtable *data;
-#ifdef PHYS_OFFSET
- if (upper_32_bits(PHYS_OFFSET))
- return NULL;
-#endif
if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
return NULL;
diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c
index 36d1a7ce7fc4..44127d54e943 100644
--- a/drivers/iommu/iommu-sysfs.c
+++ b/drivers/iommu/iommu-sysfs.c
@@ -11,7 +11,7 @@
#include <linux/device.h>
#include <linux/iommu.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/slab.h>
/*
@@ -22,25 +22,25 @@ static struct attribute *devices_attr[] = {
NULL,
};
-static const struct attribute_group iommu_devices_attr_group = {
+static const struct attribute_group devices_attr_group = {
.name = "devices",
.attrs = devices_attr,
};
-static const struct attribute_group *iommu_dev_groups[] = {
- &iommu_devices_attr_group,
+static const struct attribute_group *dev_groups[] = {
+ &devices_attr_group,
NULL,
};
-static void iommu_release_device(struct device *dev)
+static void release_device(struct device *dev)
{
kfree(dev);
}
static struct class iommu_class = {
.name = "iommu",
- .dev_release = iommu_release_device,
- .dev_groups = iommu_dev_groups,
+ .dev_release = release_device,
+ .dev_groups = dev_groups,
};
static int __init iommu_dev_init(void)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index edbdf5d6962c..3ed4db334341 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -22,7 +22,8 @@
#include <linux/kernel.h>
#include <linux/bug.h>
#include <linux/types.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/export.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/iommu.h>
@@ -110,6 +111,27 @@ void iommu_device_unregister(struct iommu_device *iommu)
spin_unlock(&iommu_device_lock);
}
+int iommu_probe_device(struct device *dev)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+ int ret = -EINVAL;
+
+ WARN_ON(dev->iommu_group);
+
+ if (ops)
+ ret = ops->add_device(dev);
+
+ return ret;
+}
+
+void iommu_release_device(struct device *dev)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+ if (dev->iommu_group)
+ ops->remove_device(dev);
+}
+
static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
unsigned type);
static int __iommu_attach_device(struct iommu_domain *domain,
@@ -1117,16 +1139,7 @@ struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
static int add_iommu_group(struct device *dev, void *data)
{
- struct iommu_callback_data *cb = data;
- const struct iommu_ops *ops = cb->ops;
- int ret;
-
- if (!ops->add_device)
- return 0;
-
- WARN_ON(dev->iommu_group);
-
- ret = ops->add_device(dev);
+ int ret = iommu_probe_device(dev);
/*
* We ignore -ENODEV errors for now, as they just mean that the
@@ -1141,11 +1154,7 @@ static int add_iommu_group(struct device *dev, void *data)
static int remove_iommu_group(struct device *dev, void *data)
{
- struct iommu_callback_data *cb = data;
- const struct iommu_ops *ops = cb->ops;
-
- if (ops->remove_device && dev->iommu_group)
- ops->remove_device(dev);
+ iommu_release_device(dev);
return 0;
}
@@ -1153,27 +1162,22 @@ static int remove_iommu_group(struct device *dev, void *data)
static int iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
+ unsigned long group_action = 0;
struct device *dev = data;
- const struct iommu_ops *ops = dev->bus->iommu_ops;
struct iommu_group *group;
- unsigned long group_action = 0;
/*
* ADD/DEL call into iommu driver ops if provided, which may
* result in ADD/DEL notifiers to group->notifier
*/
if (action == BUS_NOTIFY_ADD_DEVICE) {
- if (ops->add_device) {
- int ret;
+ int ret;
- ret = ops->add_device(dev);
- return (ret) ? NOTIFY_DONE : NOTIFY_OK;
- }
+ ret = iommu_probe_device(dev);
+ return (ret) ? NOTIFY_DONE : NOTIFY_OK;
} else if (action == BUS_NOTIFY_REMOVED_DEVICE) {
- if (ops->remove_device && dev->iommu_group) {
- ops->remove_device(dev);
- return 0;
- }
+ iommu_release_device(dev);
+ return NOTIFY_OK;
}
/*
@@ -1712,33 +1716,32 @@ EXPORT_SYMBOL_GPL(iommu_unmap_fast);
size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
struct scatterlist *sg, unsigned int nents, int prot)
{
- struct scatterlist *s;
- size_t mapped = 0;
- unsigned int i, min_pagesz;
+ size_t len = 0, mapped = 0;
+ phys_addr_t start;
+ unsigned int i = 0;
int ret;
- if (unlikely(domain->pgsize_bitmap == 0UL))
- return 0;
+ while (i <= nents) {
+ phys_addr_t s_phys = sg_phys(sg);
- min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
-
- for_each_sg(sg, s, nents, i) {
- phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset;
+ if (len && s_phys != start + len) {
+ ret = iommu_map(domain, iova + mapped, start, len, prot);
+ if (ret)
+ goto out_err;
- /*
- * We are mapping on IOMMU page boundaries, so offset within
- * the page must be 0. However, the IOMMU may support pages
- * smaller than PAGE_SIZE, so s->offset may still represent
- * an offset of that boundary within the CPU page.
- */
- if (!IS_ALIGNED(s->offset, min_pagesz))
- goto out_err;
+ mapped += len;
+ len = 0;
+ }
- ret = iommu_map(domain, iova + mapped, phys, s->length, prot);
- if (ret)
- goto out_err;
+ if (len) {
+ len += sg->length;
+ } else {
+ len = sg->length;
+ start = s_phys;
+ }
- mapped += s->length;
+ if (++i < nents)
+ sg = sg_next(sg);
}
return mapped;
@@ -1976,7 +1979,7 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
const struct iommu_ops *ops)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
if (fwspec)
return ops == fwspec->ops ? 0 : -EINVAL;
@@ -1988,26 +1991,26 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
of_node_get(to_of_node(iommu_fwnode));
fwspec->iommu_fwnode = iommu_fwnode;
fwspec->ops = ops;
- dev->iommu_fwspec = fwspec;
+ dev_iommu_fwspec_set(dev, fwspec);
return 0;
}
EXPORT_SYMBOL_GPL(iommu_fwspec_init);
void iommu_fwspec_free(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
if (fwspec) {
fwnode_handle_put(fwspec->iommu_fwnode);
kfree(fwspec);
- dev->iommu_fwspec = NULL;
+ dev_iommu_fwspec_set(dev, NULL);
}
}
EXPORT_SYMBOL_GPL(iommu_fwspec_free);
int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
size_t size;
int i;
@@ -2016,11 +2019,11 @@ int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
size = offsetof(struct iommu_fwspec, ids[fwspec->num_ids + num_ids]);
if (size > sizeof(*fwspec)) {
- fwspec = krealloc(dev->iommu_fwspec, size, GFP_KERNEL);
+ fwspec = krealloc(fwspec, size, GFP_KERNEL);
if (!fwspec)
return -ENOMEM;
- dev->iommu_fwspec = fwspec;
+ dev_iommu_fwspec_set(dev, fwspec);
}
for (i = 0; i < num_ids; i++)
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index ddf3a492e1d5..7a4529c61c19 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * IPMMU VMSA
+ * IOMMU API for Renesas VMSA-compatible IPMMU
+ * Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
*
* Copyright (C) 2014 Renesas Electronics Corporation
*/
@@ -11,10 +12,10 @@
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/export.h>
+#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/iommu.h>
-#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/of_iommu.h>
@@ -81,7 +82,9 @@ static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom)
static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev)
{
- return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+ return fwspec ? fwspec->iommu_priv : NULL;
}
#define TLB_LOOP_TIMEOUT 100 /* 100us */
@@ -643,7 +646,7 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
static int ipmmu_attach_device(struct iommu_domain *io_domain,
struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
unsigned int i;
@@ -692,7 +695,7 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
static void ipmmu_detach_device(struct iommu_domain *io_domain,
struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
unsigned int i;
@@ -744,36 +747,71 @@ static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain,
static int ipmmu_init_platform_device(struct device *dev,
struct of_phandle_args *args)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct platform_device *ipmmu_pdev;
ipmmu_pdev = of_find_device_by_node(args->np);
if (!ipmmu_pdev)
return -ENODEV;
- dev->iommu_fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev);
- return 0;
-}
+ fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev);
-static bool ipmmu_slave_whitelist(struct device *dev)
-{
- /* By default, do not allow use of IPMMU */
- return false;
+ return 0;
}
static const struct soc_device_attribute soc_rcar_gen3[] = {
+ { .soc_id = "r8a774a1", },
+ { .soc_id = "r8a774c0", },
{ .soc_id = "r8a7795", },
{ .soc_id = "r8a7796", },
{ .soc_id = "r8a77965", },
{ .soc_id = "r8a77970", },
+ { .soc_id = "r8a77990", },
+ { .soc_id = "r8a77995", },
+ { /* sentinel */ }
+};
+
+static const struct soc_device_attribute soc_rcar_gen3_whitelist[] = {
+ { .soc_id = "r8a774c0", },
+ { .soc_id = "r8a7795", .revision = "ES3.*" },
+ { .soc_id = "r8a77965", },
+ { .soc_id = "r8a77990", },
{ .soc_id = "r8a77995", },
{ /* sentinel */ }
};
+static const char * const rcar_gen3_slave_whitelist[] = {
+};
+
+static bool ipmmu_slave_whitelist(struct device *dev)
+{
+ unsigned int i;
+
+ /*
+ * For R-Car Gen3 use a white list to opt-in slave devices.
+ * For Other SoCs, this returns true anyway.
+ */
+ if (!soc_device_match(soc_rcar_gen3))
+ return true;
+
+ /* Check whether this R-Car Gen3 can use the IPMMU correctly or not */
+ if (!soc_device_match(soc_rcar_gen3_whitelist))
+ return false;
+
+ /* Check whether this slave device can work with the IPMMU */
+ for (i = 0; i < ARRAY_SIZE(rcar_gen3_slave_whitelist); i++) {
+ if (!strcmp(dev_name(dev), rcar_gen3_slave_whitelist[i]))
+ return true;
+ }
+
+ /* Otherwise, do not allow use of IPMMU */
+ return false;
+}
+
static int ipmmu_of_xlate(struct device *dev,
struct of_phandle_args *spec)
{
- /* For R-Car Gen3 use a white list to opt-in slave devices */
- if (soc_device_match(soc_rcar_gen3) && !ipmmu_slave_whitelist(dev))
+ if (!ipmmu_slave_whitelist(dev))
return -ENODEV;
iommu_fwspec_add_ids(dev, spec->args, 1);
@@ -941,6 +979,12 @@ static const struct of_device_id ipmmu_of_ids[] = {
.compatible = "renesas,ipmmu-vmsa",
.data = &ipmmu_features_default,
}, {
+ .compatible = "renesas,ipmmu-r8a774a1",
+ .data = &ipmmu_features_rcar_gen3,
+ }, {
+ .compatible = "renesas,ipmmu-r8a774c0",
+ .data = &ipmmu_features_rcar_gen3,
+ }, {
.compatible = "renesas,ipmmu-r8a7795",
.data = &ipmmu_features_rcar_gen3,
}, {
@@ -953,6 +997,9 @@ static const struct of_device_id ipmmu_of_ids[] = {
.compatible = "renesas,ipmmu-r8a77970",
.data = &ipmmu_features_rcar_gen3,
}, {
+ .compatible = "renesas,ipmmu-r8a77990",
+ .data = &ipmmu_features_rcar_gen3,
+ }, {
.compatible = "renesas,ipmmu-r8a77995",
.data = &ipmmu_features_rcar_gen3,
}, {
@@ -960,8 +1007,6 @@ static const struct of_device_id ipmmu_of_ids[] = {
},
};
-MODULE_DEVICE_TABLE(of, ipmmu_of_ids);
-
static int ipmmu_probe(struct platform_device *pdev)
{
struct ipmmu_vmsa_device *mmu;
@@ -1132,15 +1177,4 @@ static int __init ipmmu_init(void)
setup_done = true;
return 0;
}
-
-static void __exit ipmmu_exit(void)
-{
- return platform_driver_unregister(&ipmmu_driver);
-}
-
subsys_initcall(ipmmu_init);
-module_exit(ipmmu_exit);
-
-MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
-MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 7d0f3074d41d..b94ebd42edd8 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -1,4 +1,3 @@
-#include <linux/seq_file.h>
#include <linux/cpumask.h>
#include <linux/kernel.h>
#include <linux/string.h>
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index fc5f0b53adaf..fc4270733f11 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -1,5 +1,7 @@
/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
*
+ * Author: Stepan Moskovchenko <stepanm@codeaurora.org>
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
@@ -17,7 +19,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/errno.h>
#include <linux/io.h>
@@ -861,14 +863,5 @@ static int __init msm_iommu_driver_init(void)
return ret;
}
-
-static void __exit msm_iommu_driver_exit(void)
-{
- platform_driver_unregister(&msm_iommu_driver);
-}
-
subsys_initcall(msm_iommu_driver_init);
-module_exit(msm_iommu_driver_exit);
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 44bd5b9166bb..de3e02277b70 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -113,7 +113,7 @@ struct mtk_iommu_domain {
struct iommu_domain domain;
};
-static struct iommu_ops mtk_iommu_ops;
+static const struct iommu_ops mtk_iommu_ops;
static LIST_HEAD(m4ulist); /* List all the M4U HWs */
@@ -244,7 +244,7 @@ static void mtk_iommu_config(struct mtk_iommu_data *data,
{
struct mtk_smi_larb_iommu *larb_mmu;
unsigned int larbid, portid;
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
int i;
for (i = 0; i < fwspec->num_ids; ++i) {
@@ -336,7 +336,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
- struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
+ struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
if (!data)
return -ENODEV;
@@ -355,7 +355,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
static void mtk_iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
- struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
+ struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
if (!data)
return;
@@ -417,13 +417,14 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
static int mtk_iommu_add_device(struct device *dev)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct mtk_iommu_data *data;
struct iommu_group *group;
- if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
+ if (!fwspec || fwspec->ops != &mtk_iommu_ops)
return -ENODEV; /* Not a iommu client device */
- data = dev->iommu_fwspec->iommu_priv;
+ data = fwspec->iommu_priv;
iommu_device_link(&data->iommu, dev);
group = iommu_group_get_for_dev(dev);
@@ -436,12 +437,13 @@ static int mtk_iommu_add_device(struct device *dev)
static void mtk_iommu_remove_device(struct device *dev)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct mtk_iommu_data *data;
- if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
+ if (!fwspec || fwspec->ops != &mtk_iommu_ops)
return;
- data = dev->iommu_fwspec->iommu_priv;
+ data = fwspec->iommu_priv;
iommu_device_unlink(&data->iommu, dev);
iommu_group_remove_device(dev);
@@ -468,6 +470,7 @@ static struct iommu_group *mtk_iommu_device_group(struct device *dev)
static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct platform_device *m4updev;
if (args->args_count != 1) {
@@ -476,19 +479,19 @@ static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
return -EINVAL;
}
- if (!dev->iommu_fwspec->iommu_priv) {
+ if (!fwspec->iommu_priv) {
/* Get the m4u device */
m4updev = of_find_device_by_node(args->np);
if (WARN_ON(!m4updev))
return -EINVAL;
- dev->iommu_fwspec->iommu_priv = platform_get_drvdata(m4updev);
+ fwspec->iommu_priv = platform_get_drvdata(m4updev);
}
return iommu_fwspec_add_ids(dev, args->args, 1);
}
-static struct iommu_ops mtk_iommu_ops = {
+static const struct iommu_ops mtk_iommu_ops = {
.domain_alloc = mtk_iommu_domain_alloc,
.domain_free = mtk_iommu_domain_free,
.attach_dev = mtk_iommu_attach_device,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 0e780848f59b..6ede4286b835 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -1,4 +1,6 @@
/*
+ * IOMMU API for MTK architected m4u v1 implementations
+ *
* Copyright (c) 2015-2016 MediaTek Inc.
* Author: Honghui Zhang <honghui.zhang@mediatek.com>
*
@@ -35,7 +37,7 @@
#include <linux/spinlock.h>
#include <asm/barrier.h>
#include <asm/dma-iommu.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <dt-bindings/memory/mt2701-larb-port.h>
#include <soc/mediatek/smi.h>
#include "mtk_iommu.h"
@@ -206,7 +208,7 @@ static void mtk_iommu_config(struct mtk_iommu_data *data,
{
struct mtk_smi_larb_iommu *larb_mmu;
unsigned int larbid, portid;
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
int i;
for (i = 0; i < fwspec->num_ids; ++i) {
@@ -271,7 +273,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
- struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
+ struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
int ret;
if (!data)
@@ -293,7 +295,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
static void mtk_iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
- struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
+ struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
if (!data)
return;
@@ -362,7 +364,7 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
return pa;
}
-static struct iommu_ops mtk_iommu_ops;
+static const struct iommu_ops mtk_iommu_ops;
/*
* MTK generation one iommu HW only support one iommu domain, and all the client
@@ -371,6 +373,7 @@ static struct iommu_ops mtk_iommu_ops;
static int mtk_iommu_create_mapping(struct device *dev,
struct of_phandle_args *args)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct mtk_iommu_data *data;
struct platform_device *m4updev;
struct dma_iommu_mapping *mtk_mapping;
@@ -383,28 +386,29 @@ static int mtk_iommu_create_mapping(struct device *dev,
return -EINVAL;
}
- if (!dev->iommu_fwspec) {
+ if (!fwspec) {
ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_ops);
if (ret)
return ret;
- } else if (dev->iommu_fwspec->ops != &mtk_iommu_ops) {
+ fwspec = dev_iommu_fwspec_get(dev);
+ } else if (dev_iommu_fwspec_get(dev)->ops != &mtk_iommu_ops) {
return -EINVAL;
}
- if (!dev->iommu_fwspec->iommu_priv) {
+ if (!fwspec->iommu_priv) {
/* Get the m4u device */
m4updev = of_find_device_by_node(args->np);
if (WARN_ON(!m4updev))
return -EINVAL;
- dev->iommu_fwspec->iommu_priv = platform_get_drvdata(m4updev);
+ fwspec->iommu_priv = platform_get_drvdata(m4updev);
}
ret = iommu_fwspec_add_ids(dev, args->args, 1);
if (ret)
return ret;
- data = dev->iommu_fwspec->iommu_priv;
+ data = fwspec->iommu_priv;
m4udev = data->dev;
mtk_mapping = m4udev->archdata.iommu;
if (!mtk_mapping) {
@@ -422,6 +426,7 @@ static int mtk_iommu_create_mapping(struct device *dev,
static int mtk_iommu_add_device(struct device *dev)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct dma_iommu_mapping *mtk_mapping;
struct of_phandle_args iommu_spec;
struct of_phandle_iterator it;
@@ -440,7 +445,7 @@ static int mtk_iommu_add_device(struct device *dev)
of_node_put(iommu_spec.np);
}
- if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
+ if (!fwspec || fwspec->ops != &mtk_iommu_ops)
return -ENODEV; /* Not a iommu client device */
/*
@@ -458,7 +463,7 @@ static int mtk_iommu_add_device(struct device *dev)
if (err)
return err;
- data = dev->iommu_fwspec->iommu_priv;
+ data = fwspec->iommu_priv;
mtk_mapping = data->dev->archdata.iommu;
err = arm_iommu_attach_device(dev, mtk_mapping);
if (err) {
@@ -471,12 +476,13 @@ static int mtk_iommu_add_device(struct device *dev)
static void mtk_iommu_remove_device(struct device *dev)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct mtk_iommu_data *data;
- if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
+ if (!fwspec || fwspec->ops != &mtk_iommu_ops)
return;
- data = dev->iommu_fwspec->iommu_priv;
+ data = fwspec->iommu_priv;
iommu_device_unlink(&data->iommu, dev);
iommu_group_remove_device(dev);
@@ -524,7 +530,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
return 0;
}
-static struct iommu_ops mtk_iommu_ops = {
+static const struct iommu_ops mtk_iommu_ops = {
.domain_alloc = mtk_iommu_domain_alloc,
.domain_free = mtk_iommu_domain_free,
.attach_dev = mtk_iommu_attach_device,
@@ -704,15 +710,4 @@ static int __init m4u_init(void)
{
return platform_driver_register(&mtk_iommu_driver);
}
-
-static void __exit m4u_exit(void)
-{
- return platform_driver_unregister(&mtk_iommu_driver);
-}
-
subsys_initcall(m4u_init);
-module_exit(m4u_exit);
-
-MODULE_DESCRIPTION("IOMMU API for MTK architected m4u v1 implementations");
-MODULE_AUTHOR("Honghui Zhang <honghui.zhang@mediatek.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index c5dd63072529..d8947b28db2d 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -164,7 +164,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
struct device_node *master_np)
{
const struct iommu_ops *ops = NULL;
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
int err = NO_IOMMU;
if (!master_np)
@@ -208,20 +208,24 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
}
}
+
/*
* Two success conditions can be represented by non-negative err here:
* >0 : there is no IOMMU, or one was unavailable for non-fatal reasons
* 0 : we found an IOMMU, and dev->fwspec is initialised appropriately
* <0 : any actual error
*/
- if (!err)
- ops = dev->iommu_fwspec->ops;
+ if (!err) {
+ /* The fwspec pointer changed, read it again */
+ fwspec = dev_iommu_fwspec_get(dev);
+ ops = fwspec->ops;
+ }
/*
* If we have reason to believe the IOMMU driver missed the initial
- * add_device callback for dev, replay it to get things in order.
+ * probe for dev, replay it to get things in order.
*/
- if (ops && ops->add_device && dev->bus && !dev->iommu_group)
- err = ops->add_device(dev);
+ if (dev->bus && !device_iommu_mapped(dev))
+ err = iommu_probe_device(dev);
/* Ignore all other errors apart from EPROBE_DEFER */
if (err == -EPROBE_DEFER) {
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index 50217548c3b8..4abc0ef522a8 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -159,7 +159,7 @@ static size_t omap_dump_tlb_entries(struct omap_iommu *obj, struct seq_file *s)
return 0;
}
-static int debug_read_tlb(struct seq_file *s, void *data)
+static int tlb_show(struct seq_file *s, void *data)
{
struct omap_iommu *obj = s->private;
@@ -210,7 +210,7 @@ static void dump_ioptable(struct seq_file *s)
spin_unlock(&obj->page_table_lock);
}
-static int debug_read_pagetable(struct seq_file *s, void *data)
+static int pagetable_show(struct seq_file *s, void *data)
{
struct omap_iommu *obj = s->private;
@@ -228,35 +228,22 @@ static int debug_read_pagetable(struct seq_file *s, void *data)
return 0;
}
-#define DEBUG_SEQ_FOPS_RO(name) \
- static int debug_open_##name(struct inode *inode, struct file *file) \
- { \
- return single_open(file, debug_read_##name, inode->i_private); \
- } \
- \
- static const struct file_operations debug_##name##_fops = { \
- .open = debug_open_##name, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .release = single_release, \
- }
-
#define DEBUG_FOPS_RO(name) \
- static const struct file_operations debug_##name##_fops = { \
+ static const struct file_operations name##_fops = { \
.open = simple_open, \
.read = debug_read_##name, \
.llseek = generic_file_llseek, \
}
DEBUG_FOPS_RO(regs);
-DEBUG_SEQ_FOPS_RO(tlb);
-DEBUG_SEQ_FOPS_RO(pagetable);
+DEFINE_SHOW_ATTRIBUTE(tlb);
+DEFINE_SHOW_ATTRIBUTE(pagetable);
#define __DEBUG_ADD_FILE(attr, mode) \
{ \
struct dentry *dent; \
dent = debugfs_create_file(#attr, mode, obj->debug_dir, \
- obj, &debug_##attr##_fops); \
+ obj, &attr##_fops); \
if (!dent) \
goto err; \
}
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index ee70e9921cf1..d8595f0a987d 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -29,7 +29,7 @@
#include <linux/iommu.h>
#include <linux/iopoll.h>
#include <linux/kconfig.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/of.h>
#include <linux/of_address.h>
@@ -354,7 +354,8 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain)
static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
- struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec);
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec);
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
int ret;
@@ -365,7 +366,7 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev
/* Ensure that the domain is finalized */
pm_runtime_get_sync(qcom_iommu->dev);
- ret = qcom_iommu_init_domain(domain, qcom_iommu, dev->iommu_fwspec);
+ ret = qcom_iommu_init_domain(domain, qcom_iommu, fwspec);
pm_runtime_put_sync(qcom_iommu->dev);
if (ret < 0)
return ret;
@@ -387,7 +388,7 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev
static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec);
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
unsigned i;
@@ -500,7 +501,7 @@ static bool qcom_iommu_capable(enum iommu_cap cap)
static int qcom_iommu_add_device(struct device *dev)
{
- struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec);
+ struct qcom_iommu_dev *qcom_iommu = to_iommu(dev_iommu_fwspec_get(dev));
struct iommu_group *group;
struct device_link *link;
@@ -531,7 +532,7 @@ static int qcom_iommu_add_device(struct device *dev)
static void qcom_iommu_remove_device(struct device *dev)
{
- struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec);
+ struct qcom_iommu_dev *qcom_iommu = to_iommu(dev_iommu_fwspec_get(dev));
if (!qcom_iommu)
return;
@@ -543,6 +544,7 @@ static void qcom_iommu_remove_device(struct device *dev)
static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct qcom_iommu_dev *qcom_iommu;
struct platform_device *iommu_pdev;
unsigned asid = args->args[0];
@@ -568,14 +570,14 @@ static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
WARN_ON(asid > qcom_iommu->num_ctxs))
return -EINVAL;
- if (!dev->iommu_fwspec->iommu_priv) {
- dev->iommu_fwspec->iommu_priv = qcom_iommu;
+ if (!fwspec->iommu_priv) {
+ fwspec->iommu_priv = qcom_iommu;
} else {
/* make sure devices iommus dt node isn't referring to
* multiple different iommu devices. Multiple context
* banks are ok, but multiple devices are not:
*/
- if (WARN_ON(qcom_iommu != dev->iommu_fwspec->iommu_priv))
+ if (WARN_ON(qcom_iommu != fwspec->iommu_priv))
return -EINVAL;
}
@@ -908,7 +910,6 @@ static const struct of_device_id qcom_iommu_of_match[] = {
{ .compatible = "qcom,msm-iommu-v1" },
{ /* sentinel */ }
};
-MODULE_DEVICE_TABLE(of, qcom_iommu_of_match);
static struct platform_driver qcom_iommu_driver = {
.driver = {
@@ -934,15 +935,4 @@ static int __init qcom_iommu_init(void)
return ret;
}
-
-static void __exit qcom_iommu_exit(void)
-{
- platform_driver_unregister(&qcom_iommu_driver);
- platform_driver_unregister(&qcom_iommu_ctx_driver);
-}
-
-module_init(qcom_iommu_init);
-module_exit(qcom_iommu_exit);
-
-MODULE_DESCRIPTION("IOMMU API for QCOM IOMMU v1 implementations");
-MODULE_LICENSE("GPL v2");
+device_initcall(qcom_iommu_init);
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index ad3e2b97469e..c9ba9f377f63 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1,4 +1,9 @@
/*
+ * IOMMU API for Rockchip
+ *
+ * Module Authors: Simon Xue <xxm@rock-chips.com>
+ * Daniel Kurtz <djkurtz@chromium.org>
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
@@ -17,7 +22,7 @@
#include <linux/iopoll.h>
#include <linux/list.h>
#include <linux/mm.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/of.h>
#include <linux/of_iommu.h>
#include <linux/of_platform.h>
@@ -1281,7 +1286,6 @@ static const struct of_device_id rk_iommu_dt_ids[] = {
{ .compatible = "rockchip,iommu" },
{ /* sentinel */ }
};
-MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids);
static struct platform_driver rk_iommu_driver = {
.probe = rk_iommu_probe,
@@ -1299,8 +1303,3 @@ static int __init rk_iommu_init(void)
return platform_driver_register(&rk_iommu_driver);
}
subsys_initcall(rk_iommu_init);
-
-MODULE_DESCRIPTION("IOMMU API for Rockchip");
-MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>");
-MODULE_ALIAS("platform:rockchip-iommu");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 7b1361d57a17..da6a4e357b2b 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -3,6 +3,8 @@
*
* Copyright (c) 2010-2012, NVIDIA CORPORATION. All rights reserved.
*
+ * Author: Hiroshi DOYU <hdoyu@nvidia.com>
+ *
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
@@ -19,7 +21,8 @@
#define pr_fmt(fmt) "%s(): " fmt, __func__
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
#include <linux/platform_device.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -478,20 +481,6 @@ static int tegra_gart_probe(struct platform_device *pdev)
return 0;
}
-static int tegra_gart_remove(struct platform_device *pdev)
-{
- struct gart_device *gart = platform_get_drvdata(pdev);
-
- iommu_device_unregister(&gart->iommu);
- iommu_device_sysfs_remove(&gart->iommu);
-
- writel(0, gart->regs + GART_CONFIG);
- if (gart->savedata)
- vfree(gart->savedata);
- gart_handle = NULL;
- return 0;
-}
-
static const struct dev_pm_ops tegra_gart_pm_ops = {
.suspend = tegra_gart_suspend,
.resume = tegra_gart_resume,
@@ -501,34 +490,22 @@ static const struct of_device_id tegra_gart_of_match[] = {
{ .compatible = "nvidia,tegra20-gart", },
{ },
};
-MODULE_DEVICE_TABLE(of, tegra_gart_of_match);
static struct platform_driver tegra_gart_driver = {
.probe = tegra_gart_probe,
- .remove = tegra_gart_remove,
.driver = {
.name = "tegra-gart",
.pm = &tegra_gart_pm_ops,
.of_match_table = tegra_gart_of_match,
+ .suppress_bind_attrs = true,
},
};
-static int tegra_gart_init(void)
+static int __init tegra_gart_init(void)
{
return platform_driver_register(&tegra_gart_driver);
}
-
-static void __exit tegra_gart_exit(void)
-{
- platform_driver_unregister(&tegra_gart_driver);
-}
-
subsys_initcall(tegra_gart_init);
-module_exit(tegra_gart_exit);
-module_param(gart_debug, bool, 0644);
+module_param(gart_debug, bool, 0644);
MODULE_PARM_DESC(gart_debug, "Enable GART debugging");
-MODULE_DESCRIPTION("IOMMU API for GART in Tegra20");
-MODULE_AUTHOR("Hiroshi DOYU <hdoyu@nvidia.com>");
-MODULE_ALIAS("platform:tegra-gart");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 0d03341317c4..3a5c7dc6dc57 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -846,7 +846,7 @@ static struct iommu_group *tegra_smmu_group_get(struct tegra_smmu *smmu,
static struct iommu_group *tegra_smmu_device_group(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct tegra_smmu *smmu = dev->archdata.iommu;
struct iommu_group *group;
@@ -926,17 +926,7 @@ static int tegra_smmu_swgroups_show(struct seq_file *s, void *data)
return 0;
}
-static int tegra_smmu_swgroups_open(struct inode *inode, struct file *file)
-{
- return single_open(file, tegra_smmu_swgroups_show, inode->i_private);
-}
-
-static const struct file_operations tegra_smmu_swgroups_fops = {
- .open = tegra_smmu_swgroups_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(tegra_smmu_swgroups);
static int tegra_smmu_clients_show(struct seq_file *s, void *data)
{
@@ -964,17 +954,7 @@ static int tegra_smmu_clients_show(struct seq_file *s, void *data)
return 0;
}
-static int tegra_smmu_clients_open(struct inode *inode, struct file *file)
-{
- return single_open(file, tegra_smmu_clients_show, inode->i_private);
-}
-
-static const struct file_operations tegra_smmu_clients_fops = {
- .open = tegra_smmu_clients_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients);
static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
{
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index 0e4193cb08cf..749321eb91ae 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -15,7 +15,7 @@
* Intel SCIF driver.
*
*/
-#include <linux/dma_remapping.h>
+#include <linux/intel-iommu.h>
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
diff --git a/drivers/misc/mic/scif/scif_rma.h b/drivers/misc/mic/scif/scif_rma.h
index 84af3033a473..964dd0fc3657 100644
--- a/drivers/misc/mic/scif/scif_rma.h
+++ b/drivers/misc/mic/scif/scif_rma.h
@@ -53,7 +53,7 @@
#ifndef SCIF_RMA_H
#define SCIF_RMA_H
-#include <linux/dma_remapping.h>
+#include <linux/intel-iommu.h>
#include <linux/mmu_notifier.h>
#include "../bus/scif_bus.h"
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 46ab9c041091..005e65922608 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -245,7 +245,7 @@ static void xhci_zero_64b_regs(struct xhci_hcd *xhci)
* an iommu. Doing anything when there is no iommu is definitely
* unsafe...
*/
- if (!(xhci->quirks & XHCI_ZERO_64B_REGS) || !dev->iommu_group)
+ if (!(xhci->quirks & XHCI_ZERO_64B_REGS) || !device_iommu_mapped(dev))
return;
xhci_info(xhci, "Zeroing 64bit base registers, expecting fault\n");
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index d9fd3188615d..7651cfb14836 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -978,32 +978,6 @@ unlock:
return ret;
}
-/*
- * Turns out AMD IOMMU has a page table bug where it won't map large pages
- * to a region that previously mapped smaller pages. This should be fixed
- * soon, so this is just a temporary workaround to break mappings down into
- * PAGE_SIZE. Better to map smaller pages than nothing.
- */
-static int map_try_harder(struct vfio_domain *domain, dma_addr_t iova,
- unsigned long pfn, long npage, int prot)
-{
- long i;
- int ret = 0;
-
- for (i = 0; i < npage; i++, pfn++, iova += PAGE_SIZE) {
- ret = iommu_map(domain->domain, iova,
- (phys_addr_t)pfn << PAGE_SHIFT,
- PAGE_SIZE, prot | domain->prot);
- if (ret)
- break;
- }
-
- for (; i < npage && i > 0; i--, iova -= PAGE_SIZE)
- iommu_unmap(domain->domain, iova, PAGE_SIZE);
-
- return ret;
-}
-
static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
unsigned long pfn, long npage, int prot)
{
@@ -1013,11 +987,8 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
list_for_each_entry(d, &iommu->domain_list, next) {
ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT,
npage << PAGE_SHIFT, prot | d->prot);
- if (ret) {
- if (ret != -EBUSY ||
- map_try_harder(d, iova, pfn, npage, prot))
- goto unwind;
- }
+ if (ret)
+ goto unwind;
cond_resched();
}