diff options
Diffstat (limited to 'drivers/iommu/intel-iommu.c')
-rw-r--r-- | drivers/iommu/intel-iommu.c | 940 |
1 files changed, 423 insertions, 517 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 162b3236e72c..ac4172c02244 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -294,14 +294,16 @@ static inline void context_clear_entry(struct context_entry *context) static struct dmar_domain *si_domain; static int hw_pass_through = 1; +/* si_domain contains mulitple devices */ +#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0) + /* - * Domain represents a virtual machine, more than one devices - * across iommus may be owned in one domain, e.g. kvm guest. + * This is a DMA domain allocated through the iommu domain allocation + * interface. But one or more devices belonging to this domain have + * been chosen to use a private domain. We should avoid to use the + * map/unmap/iova_to_phys APIs on it. */ -#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0) - -/* si_domain contains mulitple devices */ -#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1) +#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1) #define for_each_domain_iommu(idx, domain) \ for (idx = 0; idx < g_num_of_iommus; idx++) \ @@ -314,7 +316,6 @@ struct dmar_rmrr_unit { u64 end_address; /* reserved end address */ struct dmar_dev_scope *devices; /* target devices */ int devices_cnt; /* target device count */ - struct iommu_resv_region *resv; /* reserved region handle */ }; struct dmar_atsr_unit { @@ -342,6 +343,9 @@ static void domain_context_clear(struct intel_iommu *iommu, struct device *dev); static int domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); +static bool device_is_rmrr_locked(struct device *dev); +static int intel_iommu_attach_device(struct iommu_domain *domain, + struct device *dev); #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON int dmar_disabled = 0; @@ -349,6 +353,7 @@ int dmar_disabled = 0; int dmar_disabled = 1; #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/ +int intel_iommu_sm; int intel_iommu_enabled = 0; EXPORT_SYMBOL_GPL(intel_iommu_enabled); @@ -356,21 +361,17 @@ static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; -static int intel_iommu_sm; static int iommu_identity_mapping; #define IDENTMAP_ALL 1 #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) -#define pasid_supported(iommu) (sm_supported(iommu) && \ - ecap_pasid((iommu)->ecap)) - int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) +#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2)) static DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); @@ -535,22 +536,11 @@ static inline void free_devinfo_mem(void *vaddr) kmem_cache_free(iommu_devinfo_cache, vaddr); } -static inline int domain_type_is_vm(struct dmar_domain *domain) -{ - return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE; -} - static inline int domain_type_is_si(struct dmar_domain *domain) { return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY; } -static inline int domain_type_is_vm_or_si(struct dmar_domain *domain) -{ - return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE | - DOMAIN_FLAG_STATIC_IDENTITY); -} - static inline int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn) { @@ -598,7 +588,9 @@ struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) int iommu_id; /* si_domain and vm domain should not get here. */ - BUG_ON(domain_type_is_vm_or_si(domain)); + if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA)) + return NULL; + for_each_domain_iommu(iommu_id, domain) break; @@ -729,12 +721,39 @@ static int iommu_dummy(struct device *dev) return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; } +/** + * is_downstream_to_pci_bridge - test if a device belongs to the PCI + * sub-hierarchy of a candidate PCI-PCI bridge + * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy + * @bridge: the candidate PCI-PCI bridge + * + * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false. + */ +static bool +is_downstream_to_pci_bridge(struct device *dev, struct device *bridge) +{ + struct pci_dev *pdev, *pbridge; + + if (!dev_is_pci(dev) || !dev_is_pci(bridge)) + return false; + + pdev = to_pci_dev(dev); + pbridge = to_pci_dev(bridge); + + if (pbridge->subordinate && + pbridge->subordinate->number <= pdev->bus->number && + pbridge->subordinate->busn_res.end >= pdev->bus->number) + return true; + + return false; +} + static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) { struct dmar_drhd_unit *drhd = NULL; struct intel_iommu *iommu; struct device *tmp; - struct pci_dev *ptmp, *pdev = NULL; + struct pci_dev *pdev = NULL; u16 segment = 0; int i; @@ -780,13 +799,7 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf goto out; } - if (!pdev || !dev_is_pci(tmp)) - continue; - - ptmp = to_pci_dev(tmp); - if (ptmp->subordinate && - ptmp->subordinate->number <= pdev->bus->number && - ptmp->subordinate->busn_res.end >= pdev->bus->number) + if (is_downstream_to_pci_bridge(dev, tmp)) goto got_pdev; } @@ -908,7 +921,6 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, return pte; } - /* return address's pte at specific level */ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, unsigned long pfn, @@ -1577,7 +1589,6 @@ static void iommu_disable_translation(struct intel_iommu *iommu) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } - static int iommu_init_domains(struct intel_iommu *iommu) { u32 ndomains, nlongs; @@ -1615,8 +1626,6 @@ static int iommu_init_domains(struct intel_iommu *iommu) return -ENOMEM; } - - /* * If Caching mode is set, then invalid translations are tagged * with domain-id 0, hence we need to pre-allocate it. We also @@ -1646,32 +1655,15 @@ static void disable_dmar_iommu(struct intel_iommu *iommu) if (!iommu->domains || !iommu->domain_ids) return; -again: spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry_safe(info, tmp, &device_domain_list, global) { - struct dmar_domain *domain; - if (info->iommu != iommu) continue; if (!info->dev || !info->domain) continue; - domain = info->domain; - __dmar_remove_one_dev_info(info); - - if (!domain_type_is_vm_or_si(domain)) { - /* - * The domain_exit() function can't be called under - * device_domain_lock, as it takes this lock itself. - * So release the lock here and re-run the loop - * afterwards. - */ - spin_unlock_irqrestore(&device_domain_lock, flags); - domain_exit(domain); - goto again; - } } spin_unlock_irqrestore(&device_domain_lock, flags); @@ -1841,71 +1833,12 @@ static inline int guestwidth_to_adjustwidth(int gaw) return agaw; } -static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, - int guest_width) -{ - int adjust_width, agaw; - unsigned long sagaw; - int err; - - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - - err = init_iova_flush_queue(&domain->iovad, - iommu_flush_iova, iova_entry_free); - if (err) - return err; - - domain_reserve_special_ranges(domain); - - /* calculate AGAW */ - if (guest_width > cap_mgaw(iommu->cap)) - guest_width = cap_mgaw(iommu->cap); - domain->gaw = guest_width; - adjust_width = guestwidth_to_adjustwidth(guest_width); - agaw = width_to_agaw(adjust_width); - sagaw = cap_sagaw(iommu->cap); - if (!test_bit(agaw, &sagaw)) { - /* hardware doesn't support it, choose a bigger one */ - pr_debug("Hardware doesn't support agaw %d\n", agaw); - agaw = find_next_bit(&sagaw, 5, agaw); - if (agaw >= 5) - return -ENODEV; - } - domain->agaw = agaw; - - if (ecap_coherent(iommu->ecap)) - domain->iommu_coherency = 1; - else - domain->iommu_coherency = 0; - - if (ecap_sc_support(iommu->ecap)) - domain->iommu_snooping = 1; - else - domain->iommu_snooping = 0; - - if (intel_iommu_superpage) - domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); - else - domain->iommu_superpage = 0; - - domain->nid = iommu->node; - - /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); - if (!domain->pgd) - return -ENOMEM; - __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); - return 0; -} - static void domain_exit(struct dmar_domain *domain) { struct page *freelist; /* Remove associated devices and clear attached or cached domains */ - rcu_read_lock(); domain_remove_dev_info(domain); - rcu_read_unlock(); /* destroy iovas */ put_iova_domain(&domain->iovad); @@ -2336,7 +2269,7 @@ static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, struct scatterlist *sg, unsigned long phys_pfn, unsigned long nr_pages, int prot) { - int ret; + int iommu_id, ret; struct intel_iommu *iommu; /* Do the real mapping first */ @@ -2344,18 +2277,8 @@ static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, if (ret) return ret; - /* Notify about the new mapping */ - if (domain_type_is_vm(domain)) { - /* VM typed domains can have more than one IOMMUs */ - int iommu_id; - - for_each_domain_iommu(iommu_id, domain) { - iommu = g_iommus[iommu_id]; - __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); - } - } else { - /* General domains only have one IOMMU */ - iommu = domain_get_iommu(domain); + for_each_domain_iommu(iommu_id, domain) { + iommu = g_iommus[iommu_id]; __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); } @@ -2435,8 +2358,18 @@ static struct dmar_domain *find_domain(struct device *dev) { struct device_domain_info *info; + if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) { + struct iommu_domain *domain; + + dev->archdata.iommu = NULL; + domain = iommu_get_domain_for_dev(dev); + if (domain) + intel_iommu_attach_device(domain, dev); + } + /* No lock here, assumes no domain exit in normal case */ info = dev->archdata.iommu; + if (likely(info)) return info->domain; return NULL; @@ -2580,6 +2513,31 @@ static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) return 0; } +static int domain_init(struct dmar_domain *domain, int guest_width) +{ + int adjust_width; + + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); + domain_reserve_special_ranges(domain); + + /* calculate AGAW */ + domain->gaw = guest_width; + adjust_width = guestwidth_to_adjustwidth(guest_width); + domain->agaw = width_to_agaw(adjust_width); + + domain->iommu_coherency = 0; + domain->iommu_snooping = 0; + domain->iommu_superpage = 0; + domain->max_addr = 0; + + /* always allocate the top pgd */ + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); + if (!domain->pgd) + return -ENOMEM; + domain_flush_cache(domain, domain->pgd, PAGE_SIZE); + return 0; +} + static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) { struct device_domain_info *info; @@ -2617,13 +2575,20 @@ static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) domain = alloc_domain(0); if (!domain) return NULL; - if (domain_init(domain, iommu, gaw)) { + + if (domain_init(domain, gaw)) { domain_exit(domain); return NULL; } -out: + if (init_iova_flush_queue(&domain->iovad, + iommu_flush_iova, + iova_entry_free)) { + pr_warn("iova flush queue initialization failed\n"); + intel_iommu_strict = 1; + } +out: return domain; } @@ -2663,29 +2628,6 @@ static struct dmar_domain *set_domain_for_dev(struct device *dev, return domain; } -static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw) -{ - struct dmar_domain *domain, *tmp; - - domain = find_domain(dev); - if (domain) - goto out; - - domain = find_or_alloc_domain(dev, gaw); - if (!domain) - goto out; - - tmp = set_domain_for_dev(dev, domain); - if (!tmp || domain != tmp) { - domain_exit(domain); - domain = tmp; - } - -out: - - return domain; -} - static int iommu_domain_identity_map(struct dmar_domain *domain, unsigned long long start, unsigned long long end) @@ -2750,75 +2692,21 @@ static int domain_prepare_identity_map(struct device *dev, return iommu_domain_identity_map(domain, start, end); } -static int iommu_prepare_identity_map(struct device *dev, - unsigned long long start, - unsigned long long end) -{ - struct dmar_domain *domain; - int ret; - - domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - if (!domain) - return -ENOMEM; - - ret = domain_prepare_identity_map(dev, domain, start, end); - if (ret) - domain_exit(domain); - - return ret; -} - -static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, - struct device *dev) -{ - if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) - return 0; - return iommu_prepare_identity_map(dev, rmrr->base_address, - rmrr->end_address); -} - -#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA -static inline void iommu_prepare_isa(void) -{ - struct pci_dev *pdev; - int ret; - - pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); - if (!pdev) - return; - - pr_info("Prepare 0-16MiB unity mapping for LPC\n"); - ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1); - - if (ret) - pr_err("Failed to create 0-16MiB identity map - floppy might not work\n"); - - pci_dev_put(pdev); -} -#else -static inline void iommu_prepare_isa(void) -{ - return; -} -#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */ - -static int md_domain_init(struct dmar_domain *domain, int guest_width); - static int __init si_domain_init(int hw) { - int nid, ret; + struct dmar_rmrr_unit *rmrr; + struct device *dev; + int i, nid, ret; si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY); if (!si_domain) return -EFAULT; - if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + if (domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { domain_exit(si_domain); return -EFAULT; } - pr_debug("Identity mapping domain allocated\n"); - if (hw) return 0; @@ -2834,6 +2722,31 @@ static int __init si_domain_init(int hw) } } + /* + * Normally we use DMA domains for devices which have RMRRs. But we + * loose this requirement for graphic and usb devices. Identity map + * the RMRRs for graphic and USB devices so that they could use the + * si_domain. + */ + for_each_rmrr_units(rmrr) { + for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, + i, dev) { + unsigned long long start = rmrr->base_address; + unsigned long long end = rmrr->end_address; + + if (device_is_rmrr_locked(dev)) + continue; + + if (WARN_ON(end < start || + end >> agaw_to_width(si_domain->agaw))) + continue; + + ret = iommu_domain_identity_map(si_domain, start, end); + if (ret) + return ret; + } + } + return 0; } @@ -2841,9 +2754,6 @@ static int identity_mapping(struct device *dev) { struct device_domain_info *info; - if (likely(!iommu_identity_mapping)) - return 0; - info = dev->archdata.iommu; if (info && info != DUMMY_DEVICE_DOMAIN_INFO) return (info->domain == si_domain); @@ -2882,7 +2792,8 @@ static bool device_has_rmrr(struct device *dev) */ for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, i, tmp) - if (tmp == dev) { + if (tmp == dev || + is_downstream_to_pci_bridge(dev, tmp)) { rcu_read_unlock(); return true; } @@ -2891,6 +2802,35 @@ static bool device_has_rmrr(struct device *dev) return false; } +/** + * device_rmrr_is_relaxable - Test whether the RMRR of this device + * is relaxable (ie. is allowed to be not enforced under some conditions) + * @dev: device handle + * + * We assume that PCI USB devices with RMRRs have them largely + * for historical reasons and that the RMRR space is not actively used post + * boot. This exclusion may change if vendors begin to abuse it. + * + * The same exception is made for graphics devices, with the requirement that + * any use of the RMRR regions will be torn down before assigning the device + * to a guest. + * + * Return: true if the RMRR is relaxable, false otherwise + */ +static bool device_rmrr_is_relaxable(struct device *dev) +{ + struct pci_dev *pdev; + + if (!dev_is_pci(dev)) + return false; + + pdev = to_pci_dev(dev); + if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) + return true; + else + return false; +} + /* * There are a couple cases where we need to restrict the functionality of * devices associated with RMRRs. The first is when evaluating a device for @@ -2905,52 +2845,51 @@ static bool device_has_rmrr(struct device *dev) * We therefore prevent devices associated with an RMRR from participating in * the IOMMU API, which eliminates them from device assignment. * - * In both cases we assume that PCI USB devices with RMRRs have them largely - * for historical reasons and that the RMRR space is not actively used post - * boot. This exclusion may change if vendors begin to abuse it. - * - * The same exception is made for graphics devices, with the requirement that - * any use of the RMRR regions will be torn down before assigning the device - * to a guest. + * In both cases, devices which have relaxable RMRRs are not concerned by this + * restriction. See device_rmrr_is_relaxable comment. */ static bool device_is_rmrr_locked(struct device *dev) { if (!device_has_rmrr(dev)) return false; - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) - return false; - } + if (device_rmrr_is_relaxable(dev)) + return false; return true; } -static int iommu_should_identity_map(struct device *dev, int startup) +/* + * Return the required default domain type for a specific device. + * + * @dev: the device in query + * @startup: true if this is during early boot + * + * Returns: + * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain + * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain + * - 0: both identity and dynamic domains work for this device + */ +static int device_def_domain_type(struct device *dev) { if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); if (device_is_rmrr_locked(dev)) - return 0; + return IOMMU_DOMAIN_DMA; /* * Prevent any device marked as untrusted from getting * placed into the statically identity mapping domain. */ if (pdev->untrusted) - return 0; + return IOMMU_DOMAIN_DMA; if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) - return 1; + return IOMMU_DOMAIN_IDENTITY; if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) - return 1; - - if (!(iommu_identity_mapping & IDENTMAP_ALL)) - return 0; + return IOMMU_DOMAIN_IDENTITY; /* * We want to start off with all devices in the 1:1 domain, and @@ -2971,94 +2910,18 @@ static int iommu_should_identity_map(struct device *dev, int startup) */ if (!pci_is_pcie(pdev)) { if (!pci_is_root_bus(pdev->bus)) - return 0; + return IOMMU_DOMAIN_DMA; if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) - return 0; + return IOMMU_DOMAIN_DMA; } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) - return 0; + return IOMMU_DOMAIN_DMA; } else { if (device_has_rmrr(dev)) - return 0; + return IOMMU_DOMAIN_DMA; } - /* - * At boot time, we don't yet know if devices will be 64-bit capable. - * Assume that they will — if they turn out not to be, then we can - * take them out of the 1:1 domain later. - */ - if (!startup) { - /* - * If the device's dma_mask is less than the system's memory - * size then this is not a candidate for identity mapping. - */ - u64 dma_mask = *dev->dma_mask; - - if (dev->coherent_dma_mask && - dev->coherent_dma_mask < dma_mask) - dma_mask = dev->coherent_dma_mask; - - return dma_mask >= dma_get_required_mask(dev); - } - - return 1; -} - -static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw) -{ - int ret; - - if (!iommu_should_identity_map(dev, 1)) - return 0; - - ret = domain_add_dev_info(si_domain, dev); - if (!ret) - dev_info(dev, "%s identity mapping\n", - hw ? "Hardware" : "Software"); - else if (ret == -ENODEV) - /* device not associated with an iommu */ - ret = 0; - - return ret; -} - - -static int __init iommu_prepare_static_identity_mapping(int hw) -{ - struct pci_dev *pdev = NULL; - struct dmar_drhd_unit *drhd; - /* To avoid a -Wunused-but-set-variable warning. */ - struct intel_iommu *iommu __maybe_unused; - struct device *dev; - int i; - int ret = 0; - - for_each_pci_dev(pdev) { - ret = dev_prepare_static_identity_mapping(&pdev->dev, hw); - if (ret) - return ret; - } - - for_each_active_iommu(iommu, drhd) - for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) { - struct acpi_device_physical_node *pn; - struct acpi_device *adev; - - if (dev->bus != &acpi_bus_type) - continue; - - adev= to_acpi_device(dev); - mutex_lock(&adev->physical_node_lock); - list_for_each_entry(pn, &adev->physical_node_list, node) { - ret = dev_prepare_static_identity_mapping(pn->dev, hw); - if (ret) - break; - } - mutex_unlock(&adev->physical_node_lock); - if (ret) - return ret; - } - - return 0; + return (iommu_identity_mapping & IDENTMAP_ALL) ? + IOMMU_DOMAIN_IDENTITY : 0; } static void intel_iommu_init_qi(struct intel_iommu *iommu) @@ -3283,11 +3146,8 @@ out_unmap: static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; - struct dmar_rmrr_unit *rmrr; - bool copied_tables = false; - struct device *dev; struct intel_iommu *iommu; - int i, ret; + int ret; /* * for each drhd @@ -3320,7 +3180,12 @@ static int __init init_dmars(void) goto error; } - for_each_active_iommu(iommu, drhd) { + for_each_iommu(iommu, drhd) { + if (drhd->ignored) { + iommu_disable_translation(iommu); + continue; + } + /* * Find the max pasid size of all IOMMU's in the system. * We need to ensure the system pasid table is no bigger @@ -3380,7 +3245,6 @@ static int __init init_dmars(void) } else { pr_info("Copied translation tables from previous kernel for %s\n", iommu->name); - copied_tables = true; } } @@ -3416,62 +3280,9 @@ static int __init init_dmars(void) check_tylersburg_isoch(); - if (iommu_identity_mapping) { - ret = si_domain_init(hw_pass_through); - if (ret) - goto free_iommu; - } - - - /* - * If we copied translations from a previous kernel in the kdump - * case, we can not assign the devices to domains now, as that - * would eliminate the old mappings. So skip this part and defer - * the assignment to device driver initialization time. - */ - if (copied_tables) - goto domains_done; - - /* - * If pass through is not set or not enabled, setup context entries for - * identity mappings for rmrr, gfx, and isa and may fall back to static - * identity mapping if iommu_identity_mapping is set. - */ - if (iommu_identity_mapping) { - ret = iommu_prepare_static_identity_mapping(hw_pass_through); - if (ret) { - pr_crit("Failed to setup IOMMU pass-through\n"); - goto free_iommu; - } - } - /* - * For each rmrr - * for each dev attached to rmrr - * do - * locate drhd for dev, alloc domain for dev - * allocate free domain - * allocate page table entries for rmrr - * if context not allocated for bus - * allocate and init context - * set present in root table for this bus - * init context with domain, translation etc - * endfor - * endfor - */ - pr_info("Setting RMRR:\n"); - for_each_rmrr_units(rmrr) { - /* some BIOS lists non-exist devices in DMAR table. */ - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, dev) { - ret = iommu_prepare_rmrr_dev(rmrr, dev); - if (ret) - pr_err("Mapping reserved region failed\n"); - } - } - - iommu_prepare_isa(); - -domains_done: + ret = si_domain_init(hw_pass_through); + if (ret) + goto free_iommu; /* * for each drhd @@ -3509,11 +3320,6 @@ domains_done: ret = dmar_set_interrupt(iommu); if (ret) goto free_iommu; - - if (!translation_pre_enabled(iommu)) - iommu_enable_translation(iommu); - - iommu_disable_protect_mem_regions(iommu); } return 0; @@ -3563,16 +3369,17 @@ static unsigned long intel_alloc_iova(struct device *dev, return iova_pfn; } -struct dmar_domain *get_valid_domain_for_dev(struct device *dev) +static struct dmar_domain *get_private_domain_for_dev(struct device *dev) { struct dmar_domain *domain, *tmp; struct dmar_rmrr_unit *rmrr; struct device *i_dev; int i, ret; + /* Device shouldn't be attached by any domains. */ domain = find_domain(dev); if (domain) - goto out; + return NULL; domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) @@ -3602,10 +3409,10 @@ struct dmar_domain *get_valid_domain_for_dev(struct device *dev) } out: - if (!domain) dev_err(dev, "Allocating domain failed\n"); - + else + domain->domain.type = IOMMU_DOMAIN_DMA; return domain; } @@ -3613,17 +3420,19 @@ out: /* Check if the dev needs to go through non-identity map and unmap process.*/ static bool iommu_need_mapping(struct device *dev) { - int found; + int ret; if (iommu_dummy(dev)) return false; - if (!iommu_identity_mapping) - return true; + ret = identity_mapping(dev); + if (ret) { + u64 dma_mask = *dev->dma_mask; - found = identity_mapping(dev); - if (found) { - if (iommu_should_identity_map(dev, 0)) + if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask) + dma_mask = dev->coherent_dma_mask; + + if (dma_mask >= dma_get_required_mask(dev)) return false; /* @@ -3631,17 +3440,20 @@ static bool iommu_need_mapping(struct device *dev) * non-identity mapping. */ dmar_remove_one_dev_info(dev); - dev_info(dev, "32bit DMA uses non-identity mapping\n"); - } else { - /* - * In case of a detached 64 bit DMA device from vm, the device - * is put into si_domain for identity mapping. - */ - if (iommu_should_identity_map(dev, 0) && - !domain_add_dev_info(si_domain, dev)) { - dev_info(dev, "64bit DMA uses identity mapping\n"); - return false; + ret = iommu_request_dma_domain_for_dev(dev); + if (ret) { + struct iommu_domain *domain; + struct dmar_domain *dmar_domain; + + domain = iommu_get_domain_for_dev(dev); + if (domain) { + dmar_domain = to_dmar_domain(domain); + dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; + } + get_private_domain_for_dev(dev); } + + dev_info(dev, "32bit DMA uses non-identity mapping\n"); } return true; @@ -3660,7 +3472,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); - domain = get_valid_domain_for_dev(dev); + domain = find_domain(dev); if (!domain) return DMA_MAPPING_ERROR; @@ -3875,7 +3687,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele if (!iommu_need_mapping(dev)) return dma_direct_map_sg(dev, sglist, nelems, dir, attrs); - domain = get_valid_domain_for_dev(dev); + domain = find_domain(dev); if (!domain) return 0; @@ -4194,13 +4006,10 @@ static void __init init_iommu_pm_ops(void) static inline void init_iommu_pm_ops(void) {} #endif /* CONFIG_PM */ - int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) { struct acpi_dmar_reserved_memory *rmrr; - int prot = DMA_PTE_READ|DMA_PTE_WRITE; struct dmar_rmrr_unit *rmrru; - size_t length; rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); if (!rmrru) @@ -4211,23 +4020,15 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) rmrru->base_address = rmrr->base_address; rmrru->end_address = rmrr->end_address; - length = rmrr->end_address - rmrr->base_address + 1; - rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot, - IOMMU_RESV_DIRECT); - if (!rmrru->resv) - goto free_rmrru; - rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), ((void *)rmrr) + rmrr->header.length, &rmrru->devices_cnt); if (rmrru->devices_cnt && rmrru->devices == NULL) - goto free_all; + goto free_rmrru; list_add(&rmrru->list, &dmar_rmrr_units); return 0; -free_all: - kfree(rmrru->resv); free_rmrru: kfree(rmrru); out: @@ -4445,7 +4246,6 @@ static void intel_iommu_free_dmars(void) list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { list_del(&rmrru->list); dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); - kfree(rmrru->resv); kfree(rmrru); } @@ -4550,42 +4350,6 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) return 0; } -/* - * Here we only respond to action of unbound device from driver. - * - * Added device is not attached to its DMAR domain here yet. That will happen - * when mapping the device to iova. - */ -static int device_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - struct dmar_domain *domain; - - if (iommu_dummy(dev)) - return 0; - - if (action == BUS_NOTIFY_REMOVED_DEVICE) { - domain = find_domain(dev); - if (!domain) - return 0; - - dmar_remove_one_dev_info(dev); - if (!domain_type_is_vm_or_si(domain) && - list_empty(&domain->devices)) - domain_exit(domain); - } else if (action == BUS_NOTIFY_ADD_DEVICE) { - if (iommu_should_identity_map(dev, 1)) - domain_add_dev_info(si_domain, dev); - } - - return 0; -} - -static struct notifier_block device_nb = { - .notifier_call = device_notifier, -}; - static int intel_iommu_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { @@ -4812,6 +4576,49 @@ static int __init platform_optin_force_iommu(void) return 1; } +static int __init probe_acpi_namespace_devices(void) +{ + struct dmar_drhd_unit *drhd; + /* To avoid a -Wunused-but-set-variable warning. */ + struct intel_iommu *iommu __maybe_unused; + struct device *dev; + int i, ret = 0; + + for_each_active_iommu(iommu, drhd) { + for_each_active_dev_scope(drhd->devices, + drhd->devices_cnt, i, dev) { + struct acpi_device_physical_node *pn; + struct iommu_group *group; + struct acpi_device *adev; + + if (dev->bus != &acpi_bus_type) + continue; + + adev = to_acpi_device(dev); + mutex_lock(&adev->physical_node_lock); + list_for_each_entry(pn, + &adev->physical_node_list, node) { + group = iommu_group_get(pn->dev); + if (group) { + iommu_group_put(group); + continue; + } + + pn->dev->bus->iommu_ops = &intel_iommu_ops; + ret = iommu_probe_device(pn->dev); + if (ret) + break; + } + mutex_unlock(&adev->physical_node_lock); + + if (ret) + return ret; + } + } + + return 0; +} + int __init intel_iommu_init(void) { int ret = -ENODEV; @@ -4901,7 +4708,6 @@ int __init intel_iommu_init(void) goto out_free_reserved_range; } up_write(&dmar_global_lock); - pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); #if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB) swiotlb = 0; @@ -4919,11 +4725,25 @@ int __init intel_iommu_init(void) } bus_set_iommu(&pci_bus_type, &intel_iommu_ops); - bus_register_notifier(&pci_bus_type, &device_nb); if (si_domain && !hw_pass_through) register_memory_notifier(&intel_iommu_memory_nb); cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL, intel_iommu_cpu_dead); + + down_read(&dmar_global_lock); + if (probe_acpi_namespace_devices()) + pr_warn("ACPI name space devices didn't probe correctly\n"); + up_read(&dmar_global_lock); + + /* Finally, we enable the DMA remapping hardware. */ + for_each_iommu(iommu, drhd) { + if (!drhd->ignored && !translation_pre_enabled(iommu)) + iommu_enable_translation(iommu); + + iommu_disable_protect_mem_regions(iommu); + } + pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); + intel_iommu_enabled = 1; intel_iommu_debugfs_init(); @@ -4962,6 +4782,7 @@ static void domain_context_clear(struct intel_iommu *iommu, struct device *dev) static void __dmar_remove_one_dev_info(struct device_domain_info *info) { + struct dmar_domain *domain; struct intel_iommu *iommu; unsigned long flags; @@ -4971,6 +4792,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) return; iommu = info->iommu; + domain = info->domain; if (info->dev) { if (dev_is_pci(info->dev) && sm_supported(iommu)) @@ -4985,9 +4807,14 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) unlink_domain_info(info); spin_lock_irqsave(&iommu->lock, flags); - domain_detach_iommu(info->domain, iommu); + domain_detach_iommu(domain, iommu); spin_unlock_irqrestore(&iommu->lock, flags); + /* free the private domain */ + if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN && + !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) + domain_exit(info->domain); + free_devinfo_mem(info); } @@ -5002,62 +4829,55 @@ static void dmar_remove_one_dev_info(struct device *dev) spin_unlock_irqrestore(&device_domain_lock, flags); } -static int md_domain_init(struct dmar_domain *domain, int guest_width) -{ - int adjust_width; - - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - domain_reserve_special_ranges(domain); - - /* calculate AGAW */ - domain->gaw = guest_width; - adjust_width = guestwidth_to_adjustwidth(guest_width); - domain->agaw = width_to_agaw(adjust_width); - - domain->iommu_coherency = 0; - domain->iommu_snooping = 0; - domain->iommu_superpage = 0; - domain->max_addr = 0; - - /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); - if (!domain->pgd) - return -ENOMEM; - domain_flush_cache(domain, domain->pgd, PAGE_SIZE); - return 0; -} - static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; struct iommu_domain *domain; - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; + switch (type) { + case IOMMU_DOMAIN_DMA: + /* fallthrough */ + case IOMMU_DOMAIN_UNMANAGED: + dmar_domain = alloc_domain(0); + if (!dmar_domain) { + pr_err("Can't allocate dmar_domain\n"); + return NULL; + } + if (domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + pr_err("Domain initialization failed\n"); + domain_exit(dmar_domain); + return NULL; + } - dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE); - if (!dmar_domain) { - pr_err("Can't allocate dmar_domain\n"); - return NULL; - } - if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { - pr_err("Domain initialization failed\n"); - domain_exit(dmar_domain); + if (type == IOMMU_DOMAIN_DMA && + init_iova_flush_queue(&dmar_domain->iovad, + iommu_flush_iova, iova_entry_free)) { + pr_warn("iova flush queue initialization failed\n"); + intel_iommu_strict = 1; + } + + domain_update_iommu_cap(dmar_domain); + + domain = &dmar_domain->domain; + domain->geometry.aperture_start = 0; + domain->geometry.aperture_end = + __DOMAIN_MAX_ADDR(dmar_domain->gaw); + domain->geometry.force_aperture = true; + + return domain; + case IOMMU_DOMAIN_IDENTITY: + return &si_domain->domain; + default: return NULL; } - domain_update_iommu_cap(dmar_domain); - - domain = &dmar_domain->domain; - domain->geometry.aperture_start = 0; - domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw); - domain->geometry.force_aperture = true; - return domain; + return NULL; } static void intel_iommu_domain_free(struct iommu_domain *domain) { - domain_exit(to_dmar_domain(domain)); + if (domain != &si_domain->domain) + domain_exit(to_dmar_domain(domain)); } /* @@ -5233,7 +5053,8 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, { int ret; - if (device_is_rmrr_locked(dev)) { + if (domain->type == IOMMU_DOMAIN_UNMANAGED && + device_is_rmrr_locked(dev)) { dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); return -EPERM; } @@ -5246,15 +5067,8 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct dmar_domain *old_domain; old_domain = find_domain(dev); - if (old_domain) { - rcu_read_lock(); + if (old_domain) dmar_remove_one_dev_info(dev); - rcu_read_unlock(); - - if (!domain_type_is_vm_or_si(old_domain) && - list_empty(&old_domain->devices)) - domain_exit(old_domain); - } } ret = prepare_domain_attach_device(domain, dev); @@ -5300,6 +5114,9 @@ static int intel_iommu_map(struct iommu_domain *domain, int prot = 0; int ret; + if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) + return -EINVAL; + if (iommu_prot & IOMMU_READ) prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) @@ -5341,6 +5158,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)); + if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) + return 0; if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -5372,6 +5191,9 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; + if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) + return 0; + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); if (pte) phys = dma_pte_addr(pte); @@ -5427,9 +5249,12 @@ static bool intel_iommu_capable(enum iommu_cap cap) static int intel_iommu_add_device(struct device *dev) { + struct dmar_domain *dmar_domain; + struct iommu_domain *domain; struct intel_iommu *iommu; struct iommu_group *group; u8 bus, devfn; + int ret; iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) @@ -5437,12 +5262,45 @@ static int intel_iommu_add_device(struct device *dev) iommu_device_link(&iommu->iommu, dev); + if (translation_pre_enabled(iommu)) + dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO; + group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) return PTR_ERR(group); iommu_group_put(group); + + domain = iommu_get_domain_for_dev(dev); + dmar_domain = to_dmar_domain(domain); + if (domain->type == IOMMU_DOMAIN_DMA) { + if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) { + ret = iommu_request_dm_for_dev(dev); + if (ret) { + dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; + domain_add_dev_info(si_domain, dev); + dev_info(dev, + "Device uses a private identity domain.\n"); + } + } + } else { + if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) { + ret = iommu_request_dma_domain_for_dev(dev); + if (ret) { + dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; + if (!get_private_domain_for_dev(dev)) { + dev_warn(dev, + "Failed to get a private domain.\n"); + return -ENOMEM; + } + + dev_info(dev, + "Device uses a private dma domain.\n"); + } + } + } + return 0; } @@ -5463,22 +5321,51 @@ static void intel_iommu_remove_device(struct device *dev) static void intel_iommu_get_resv_regions(struct device *device, struct list_head *head) { + int prot = DMA_PTE_READ | DMA_PTE_WRITE; struct iommu_resv_region *reg; struct dmar_rmrr_unit *rmrr; struct device *i_dev; int i; - rcu_read_lock(); + down_read(&dmar_global_lock); for_each_rmrr_units(rmrr) { for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, i, i_dev) { - if (i_dev != device) + struct iommu_resv_region *resv; + enum iommu_resv_type type; + size_t length; + + if (i_dev != device && + !is_downstream_to_pci_bridge(device, i_dev)) continue; - list_add_tail(&rmrr->resv->list, head); + length = rmrr->end_address - rmrr->base_address + 1; + + type = device_rmrr_is_relaxable(device) ? + IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT; + + resv = iommu_alloc_resv_region(rmrr->base_address, + length, prot, type); + if (!resv) + break; + + list_add_tail(&resv->list, head); } } - rcu_read_unlock(); + up_read(&dmar_global_lock); + +#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA + if (dev_is_pci(device)) { + struct pci_dev *pdev = to_pci_dev(device); + + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) { + reg = iommu_alloc_resv_region(0, 1UL << 24, 0, + IOMMU_RESV_DIRECT); + if (reg) + list_add_tail(®->list, head); + } + } +#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */ reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, @@ -5493,10 +5380,8 @@ static void intel_iommu_put_resv_regions(struct device *dev, { struct iommu_resv_region *entry, *next; - list_for_each_entry_safe(entry, next, head, list) { - if (entry->type == IOMMU_RESV_MSI) - kfree(entry); - } + list_for_each_entry_safe(entry, next, head, list) + kfree(entry); } int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) @@ -5508,7 +5393,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) u64 ctx_lo; int ret; - domain = get_valid_domain_for_dev(dev); + domain = find_domain(dev); if (!domain) return -EINVAL; @@ -5550,6 +5435,19 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) return ret; } +static void intel_iommu_apply_resv_region(struct device *dev, + struct iommu_domain *domain, + struct iommu_resv_region *region) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + unsigned long start, end; + + start = IOVA_PFN(region->start); + end = IOVA_PFN(region->start + region->length - 1); + + WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end)); +} + #ifdef CONFIG_INTEL_IOMMU_SVM struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) { @@ -5699,6 +5597,12 @@ intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) dmar_domain->default_pasid : -EINVAL; } +static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain, + struct device *dev) +{ + return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO; +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -5715,11 +5619,13 @@ const struct iommu_ops intel_iommu_ops = { .remove_device = intel_iommu_remove_device, .get_resv_regions = intel_iommu_get_resv_regions, .put_resv_regions = intel_iommu_put_resv_regions, + .apply_resv_region = intel_iommu_apply_resv_region, .device_group = pci_device_group, .dev_has_feat = intel_iommu_dev_has_feat, .dev_feat_enabled = intel_iommu_dev_feat_enabled, .dev_enable_feat = intel_iommu_dev_enable_feat, .dev_disable_feat = intel_iommu_dev_disable_feat, + .is_attach_deferred = intel_iommu_is_attach_deferred, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, }; |