diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-16 15:36:00 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-16 15:36:00 -0700 |
commit | 08d19f51f05a68ce89a289320ce4ed96e757df72 (patch) | |
tree | 31c5d718d0aeaff5083fe533cd6e1f9fbbe846bb /include | |
parent | 1c95e1b69073cff5ff179e592fa1a1e182c78a17 (diff) | |
parent | 2381ad241d0bea1253a37f314b270848067640bb (diff) |
Merge branch 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (134 commits)
KVM: ia64: Add intel iommu support for guests.
KVM: ia64: add directed mmio range support for kvm guests
KVM: ia64: Make pmt table be able to hold physical mmio entries.
KVM: Move irqchip_in_kernel() from ioapic.h to irq.h
KVM: Separate irq ack notification out of arch/x86/kvm/irq.c
KVM: Change is_mmio_pfn to kvm_is_mmio_pfn, and make it common for all archs
KVM: Move device assignment logic to common code
KVM: Device Assignment: Move vtd.c from arch/x86/kvm/ to virt/kvm/
KVM: VMX: enable invlpg exiting if EPT is disabled
KVM: x86: Silence various LAPIC-related host kernel messages
KVM: Device Assignment: Map mmio pages into VT-d page table
KVM: PIC: enhance IPI avoidance
KVM: MMU: add "oos_shadow" parameter to disable oos
KVM: MMU: speed up mmu_unsync_walk
KVM: MMU: out of sync shadow core
KVM: MMU: mmu_convert_notrap helper
KVM: MMU: awareness of new kvm_mmu_zap_page behaviour
KVM: MMU: mmu_parent_walk
KVM: x86: trap invlpg
KVM: MMU: sync roots on mmu reload
...
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-x86/kvm.h | 22 | ||||
-rw-r--r-- | include/asm-x86/kvm_host.h | 82 | ||||
-rw-r--r-- | include/asm-x86/msr-index.h | 3 | ||||
-rw-r--r-- | include/asm-x86/pvclock.h | 1 | ||||
-rw-r--r-- | include/linux/dma_remapping.h | 157 | ||||
-rw-r--r-- | include/linux/intel-iommu.h | 327 | ||||
-rw-r--r-- | include/linux/iova.h | 52 | ||||
-rw-r--r-- | include/linux/kvm.h | 72 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 82 |
9 files changed, 736 insertions, 62 deletions
diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h index 78e954db1e7f..ba0dd791fadf 100644 --- a/include/asm-x86/kvm.h +++ b/include/asm-x86/kvm.h @@ -208,26 +208,4 @@ struct kvm_pit_channel_state { struct kvm_pit_state { struct kvm_pit_channel_state channels[3]; }; - -#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) -#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) -#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) -#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) -#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) -#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) -#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) -#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) -#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) -#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) -#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) -#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) -#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) -#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) -#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) -#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) -#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) -#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) -#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) -#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) - #endif /* ASM_X86__KVM_H */ diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 69794547f514..411fb8cfb24e 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -57,6 +57,10 @@ #define KVM_PAGES_PER_HPAGE (KVM_HPAGE_SIZE / PAGE_SIZE) #define DE_VECTOR 0 +#define DB_VECTOR 1 +#define BP_VECTOR 3 +#define OF_VECTOR 4 +#define BR_VECTOR 5 #define UD_VECTOR 6 #define NM_VECTOR 7 #define DF_VECTOR 8 @@ -65,6 +69,7 @@ #define SS_VECTOR 12 #define GP_VECTOR 13 #define PF_VECTOR 14 +#define MF_VECTOR 16 #define MC_VECTOR 18 #define SELECTOR_TI_MASK (1 << 2) @@ -89,7 +94,7 @@ extern struct list_head vm_list; struct kvm_vcpu; struct kvm; -enum { +enum kvm_reg { VCPU_REGS_RAX = 0, VCPU_REGS_RCX = 1, VCPU_REGS_RDX = 2, @@ -108,6 +113,7 @@ enum { VCPU_REGS_R14 = 14, VCPU_REGS_R15 = 15, #endif + VCPU_REGS_RIP, NR_VCPU_REGS }; @@ -189,10 +195,20 @@ struct kvm_mmu_page { */ int multimapped; /* More than one parent_pte? */ int root_count; /* Currently serving as active root */ + bool unsync; + bool unsync_children; union { u64 *parent_pte; /* !multimapped */ struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ }; + DECLARE_BITMAP(unsync_child_bitmap, 512); +}; + +struct kvm_pv_mmu_op_buffer { + void *ptr; + unsigned len; + unsigned processed; + char buf[512] __aligned(sizeof(long)); }; /* @@ -207,6 +223,9 @@ struct kvm_mmu { gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); void (*prefetch_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page); + int (*sync_page)(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *sp); + void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); hpa_t root_hpa; int root_level; int shadow_root_level; @@ -219,8 +238,13 @@ struct kvm_vcpu_arch { int interrupt_window_open; unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); - unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */ - unsigned long rip; /* needs vcpu_load_rsp_rip() */ + /* + * rip and regs accesses must go through + * kvm_{register,rip}_{read,write} functions. + */ + unsigned long regs[NR_VCPU_REGS]; + u32 regs_avail; + u32 regs_dirty; unsigned long cr0; unsigned long cr2; @@ -237,6 +261,9 @@ struct kvm_vcpu_arch { bool tpr_access_reporting; struct kvm_mmu mmu; + /* only needed in kvm_pv_mmu_op() path, but it's hot so + * put it here to avoid allocation */ + struct kvm_pv_mmu_op_buffer mmu_op_buffer; struct kvm_mmu_memory_cache mmu_pte_chain_cache; struct kvm_mmu_memory_cache mmu_rmap_desc_cache; @@ -269,6 +296,11 @@ struct kvm_vcpu_arch { u32 error_code; } exception; + struct kvm_queued_interrupt { + bool pending; + u8 nr; + } interrupt; + struct { int active; u8 save_iopl; @@ -294,6 +326,7 @@ struct kvm_vcpu_arch { struct page *time_page; bool nmi_pending; + bool nmi_injected; u64 mtrr[0x100]; }; @@ -316,9 +349,12 @@ struct kvm_arch{ * Hash table of struct kvm_mmu_page. */ struct list_head active_mmu_pages; + struct list_head assigned_dev_head; + struct dmar_domain *intel_iommu_domain; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; + struct hlist_head irq_ack_notifier_list; int round_robin_prev_vcpu; unsigned int tss_addr; @@ -338,6 +374,7 @@ struct kvm_vm_stat { u32 mmu_flooded; u32 mmu_recycled; u32 mmu_cache_miss; + u32 mmu_unsync; u32 remote_tlb_flush; u32 lpages; }; @@ -364,6 +401,7 @@ struct kvm_vcpu_stat { u32 insn_emulation; u32 insn_emulation_fail; u32 hypercalls; + u32 irq_injections; }; struct descriptor_table { @@ -414,8 +452,7 @@ struct kvm_x86_ops { unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr); void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value, int *exception); - void (*cache_regs)(struct kvm_vcpu *vcpu); - void (*decache_regs)(struct kvm_vcpu *vcpu); + void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); @@ -528,6 +565,8 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, u32 error_code); +void kvm_pic_set_irq(void *opaque, int irq, int level); + void kvm_inject_nmi(struct kvm_vcpu *vcpu); void fx_init(struct kvm_vcpu *vcpu); @@ -550,12 +589,14 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); +void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_fix_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); +void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_enable_tdp(void); void kvm_disable_tdp(void); @@ -686,33 +727,6 @@ enum { TASK_SWITCH_GATE = 3, }; -#define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \ - trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ - vcpu, 5, d1, d2, d3, d4, d5) -#define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \ - trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ - vcpu, 4, d1, d2, d3, d4, 0) -#define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \ - trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ - vcpu, 3, d1, d2, d3, 0, 0) -#define KVMTRACE_2D(evt, vcpu, d1, d2, name) \ - trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ - vcpu, 2, d1, d2, 0, 0, 0) -#define KVMTRACE_1D(evt, vcpu, d1, name) \ - trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ - vcpu, 1, d1, 0, 0, 0, 0) -#define KVMTRACE_0D(evt, vcpu, name) \ - trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ - vcpu, 0, 0, 0, 0, 0, 0) - -#ifdef CONFIG_64BIT -# define KVM_EX_ENTRY ".quad" -# define KVM_EX_PUSH "pushq" -#else -# define KVM_EX_ENTRY ".long" -# define KVM_EX_PUSH "pushl" -#endif - /* * Hardware virtualization extension instructions may fault if a * reboot turns off virtualization while processes are running. @@ -724,11 +738,11 @@ asmlinkage void kvm_handle_fault_on_reboot(void); "666: " insn "\n\t" \ ".pushsection .fixup, \"ax\" \n" \ "667: \n\t" \ - KVM_EX_PUSH " $666b \n\t" \ + __ASM_SIZE(push) " $666b \n\t" \ "jmp kvm_handle_fault_on_reboot \n\t" \ ".popsection \n\t" \ ".pushsection __ex_table, \"a\" \n\t" \ - KVM_EX_ENTRY " 666b, 667b \n\t" \ + _ASM_PTR " 666b, 667b \n\t" \ ".popsection" #define KVM_ARCH_WANT_MMU_NOTIFIER diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h index 0bb43301a202..dabd10f0bbee 100644 --- a/include/asm-x86/msr-index.h +++ b/include/asm-x86/msr-index.h @@ -178,6 +178,9 @@ #define MSR_IA32_EBL_CR_POWERON 0x0000002a #define MSR_IA32_FEATURE_CONTROL 0x0000003a +#define FEATURE_CONTROL_LOCKED (1<<0) +#define FEATURE_CONTROL_VMXON_ENABLED (1<<2) + #define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE_BSP (1<<8) #define MSR_IA32_APICBASE_ENABLE (1<<11) diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h index 1a38f6834800..ad29e277fd6d 100644 --- a/include/asm-x86/pvclock.h +++ b/include/asm-x86/pvclock.h @@ -6,6 +6,7 @@ /* some helper functions for xen and kvm pv clock sources */ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); +unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); void pvclock_read_wallclock(struct pvclock_wall_clock *wall, struct pvclock_vcpu_time_info *vcpu, struct timespec *ts); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h new file mode 100644 index 000000000000..bff5c65f81dc --- /dev/null +++ b/include/linux/dma_remapping.h @@ -0,0 +1,157 @@ +#ifndef _DMA_REMAPPING_H +#define _DMA_REMAPPING_H + +/* + * We need a fixed PAGE_SIZE of 4K irrespective of + * arch PAGE_SIZE for IOMMU page tables. + */ +#define PAGE_SHIFT_4K (12) +#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) +#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) +#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) + +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) +#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) +#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) + + +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 val; + u64 rsvd1; +}; +#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) +static inline bool root_present(struct root_entry *root) +{ + return (root->val & 1); +} +static inline void set_root_present(struct root_entry *root) +{ + root->val |= 1; +} +static inline void set_root_value(struct root_entry *root, unsigned long value) +{ + root->val |= value & PAGE_MASK_4K; +} + +struct context_entry; +static inline struct context_entry * +get_context_addr_from_root(struct root_entry *root) +{ + return (struct context_entry *) + (root_present(root)?phys_to_virt( + root->val & PAGE_MASK_4K): + NULL); +} + +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; +#define context_present(c) ((c).lo & 1) +#define context_fault_disable(c) (((c).lo >> 1) & 1) +#define context_translation_type(c) (((c).lo >> 2) & 3) +#define context_address_root(c) ((c).lo & PAGE_MASK_4K) +#define context_address_width(c) ((c).hi & 7) +#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) + +#define context_set_present(c) do {(c).lo |= 1;} while (0) +#define context_set_fault_enable(c) \ + do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) +#define context_set_translation_type(c, val) \ + do { \ + (c).lo &= (((u64)-1) << 4) | 3; \ + (c).lo |= ((val) & 3) << 2; \ + } while (0) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define context_set_address_root(c, val) \ + do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) +#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) +#define context_set_domain_id(c, val) \ + do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) +#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) + +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-11: available + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; +#define dma_clear_pte(p) do {(p).val = 0;} while (0) + +#define DMA_PTE_READ (1) +#define DMA_PTE_WRITE (2) + +#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) +#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) +#define dma_set_pte_prot(p, prot) \ + do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) +#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +#define dma_set_pte_addr(p, addr) do {\ + (p).val |= ((addr) & PAGE_MASK_4K); } while (0) +#define dma_pte_present(p) (((p).val & 3) != 0) + +struct intel_iommu; + +struct dmar_domain { + int id; /* domain id */ + struct intel_iommu *iommu; /* back pointer to owning iommu */ + + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ + + struct dma_pte *pgd; /* virtual address */ + spinlock_t mapping_lock; /* page table lock */ + int gaw; /* max guest address width */ + + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; + +#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 + int flags; +}; + +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + u8 bus; /* PCI bus numer */ + u8 devfn; /* PCI devfn number */ + struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct dmar_domain *domain; /* pointer to domain */ +}; + +extern int init_dmars(void); +extern void free_dmar_iommu(struct intel_iommu *iommu); + +extern int dmar_disabled; + +#ifndef CONFIG_DMAR_GFX_WA +static inline void iommu_prepare_gfx_mapping(void) +{ + return; +} +#endif /* !CONFIG_DMAR_GFX_WA */ + +#endif diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h new file mode 100644 index 000000000000..2e117f30a76c --- /dev/null +++ b/include/linux/intel-iommu.h @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2006-2008 Intel Corporation + * Author: Ashok Raj <ashok.raj@intel.com> + * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> + */ + +#ifndef _INTEL_IOMMU_H_ +#define _INTEL_IOMMU_H_ + +#include <linux/types.h> +#include <linux/msi.h> +#include <linux/sysdev.h> +#include <linux/iova.h> +#include <linux/io.h> +#include <linux/dma_remapping.h> +#include <asm/cacheflush.h> + +/* + * Intel IOMMU register specification per version 1.0 public spec. + */ + +#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ +#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ +#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ +#define DMAR_GCMD_REG 0x18 /* Global command register */ +#define DMAR_GSTS_REG 0x1c /* Global status register */ +#define DMAR_RTADDR_REG 0x20 /* Root entry table */ +#define DMAR_CCMD_REG 0x28 /* Context command reg */ +#define DMAR_FSTS_REG 0x34 /* Fault Status register */ +#define DMAR_FECTL_REG 0x38 /* Fault control register */ +#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ +#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ +#define DMAR_FEUADDR_REG 0x44 /* Upper address register */ +#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ +#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ +#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ +#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ +#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ +#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ +#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ +#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ +#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ +#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ +#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ + +#define OFFSET_STRIDE (9) +/* +#define dmar_readl(dmar, reg) readl(dmar + reg) +#define dmar_readq(dmar, reg) ({ \ + u32 lo, hi; \ + lo = readl(dmar + reg); \ + hi = readl(dmar + reg + 4); \ + (((u64) hi) << 32) + lo; }) +*/ +static inline u64 dmar_readq(void __iomem *addr) +{ + u32 lo, hi; + lo = readl(addr); + hi = readl(addr + 4); + return (((u64) hi) << 32) + lo; +} + +static inline void dmar_writeq(void __iomem *addr, u64 val) +{ + writel((u32)val, addr); + writel((u32)(val >> 32), addr + 4); +} + +#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) +#define DMAR_VER_MINOR(v) ((v) & 0x0f) + +/* + * Decoding Capability Register + */ +#define cap_read_drain(c) (((c) >> 55) & 1) +#define cap_write_drain(c) (((c) >> 54) & 1) +#define cap_max_amask_val(c) (((c) >> 48) & 0x3f) +#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) +#define cap_pgsel_inv(c) (((c) >> 39) & 1) + +#define cap_super_page_val(c) (((c) >> 34) & 0xf) +#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ + * OFFSET_STRIDE) + 21) + +#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) +#define cap_max_fault_reg_offset(c) \ + (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16) + +#define cap_zlr(c) (((c) >> 22) & 1) +#define cap_isoch(c) (((c) >> 23) & 1) +#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) +#define cap_sagaw(c) (((c) >> 8) & 0x1f) +#define cap_caching_mode(c) (((c) >> 7) & 1) +#define cap_phmr(c) (((c) >> 6) & 1) +#define cap_plmr(c) (((c) >> 5) & 1) +#define cap_rwbf(c) (((c) >> 4) & 1) +#define cap_afl(c) (((c) >> 3) & 1) +#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) +/* + * Extended Capability Register + */ + +#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) +#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) +#define ecap_max_iotlb_offset(e) \ + (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) +#define ecap_coherent(e) ((e) & 0x1) +#define ecap_qis(e) ((e) & 0x2) +#define ecap_eim_support(e) ((e >> 4) & 0x1) +#define ecap_ir_support(e) ((e >> 3) & 0x1) +#define ecap_max_handle_mask(e) ((e >> 20) & 0xf) + + +/* IOTLB_REG */ +#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) +#define DMA_TLB_DSI_FLUSH (((u64)2) << 60) +#define DMA_TLB_PSI_FLUSH (((u64)3) << 60) +#define DMA_TLB_IIRG(type) ((type >> 60) & 7) +#define DMA_TLB_IAIG(val) (((val) >> 57) & 7) +#define DMA_TLB_READ_DRAIN (((u64)1) << 49) +#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) +#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) +#define DMA_TLB_IVT (((u64)1) << 63) +#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_TLB_MAX_SIZE (0x3f) + +/* INVALID_DESC */ +#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) +#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) +#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) +#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) +#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) +#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) +#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_ID_TLB_ADDR(addr) (addr) +#define DMA_ID_TLB_ADDR_MASK(mask) (mask) + +/* PMEN_REG */ +#define DMA_PMEN_EPM (((u32)1)<<31) +#define DMA_PMEN_PRS (((u32)1)<<0) + +/* GCMD_REG */ +#define DMA_GCMD_TE (((u32)1) << 31) +#define DMA_GCMD_SRTP (((u32)1) << 30) +#define DMA_GCMD_SFL (((u32)1) << 29) +#define DMA_GCMD_EAFL (((u32)1) << 28) +#define DMA_GCMD_WBF (((u32)1) << 27) +#define DMA_GCMD_QIE (((u32)1) << 26) +#define DMA_GCMD_SIRTP (((u32)1) << 24) +#define DMA_GCMD_IRE (((u32) 1) << 25) + +/* GSTS_REG */ +#define DMA_GSTS_TES (((u32)1) << 31) +#define DMA_GSTS_RTPS (((u32)1) << 30) +#define DMA_GSTS_FLS (((u32)1) << 29) +#define DMA_GSTS_AFLS (((u32)1) << 28) +#define DMA_GSTS_WBFS (((u32)1) << 27) +#define DMA_GSTS_QIES (((u32)1) << 26) +#define DMA_GSTS_IRTPS (((u32)1) << 24) +#define DMA_GSTS_IRES (((u32)1) << 25) + +/* CCMD_REG */ +#define DMA_CCMD_ICC (((u64)1) << 63) +#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) +#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) +#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) +#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) +#define DMA_CCMD_MASK_NOBIT 0 +#define DMA_CCMD_MASK_1BIT 1 +#define DMA_CCMD_MASK_2BIT 2 +#define DMA_CCMD_MASK_3BIT 3 +#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) +#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) + +/* FECTL_REG */ +#define DMA_FECTL_IM (((u32)1) << 31) + +/* FSTS_REG */ +#define DMA_FSTS_PPF ((u32)2) +#define DMA_FSTS_PFO ((u32)1) +#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) + +/* FRCD_REG, 32 bits access */ +#define DMA_FRCD_F (((u32)1) << 31) +#define dma_frcd_type(d) ((d >> 30) & 1) +#define dma_frcd_fault_reason(c) (c & 0xff) +#define dma_frcd_source_id(c) (c & 0xffff) +#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ + +#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ + +#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ +{\ + cycles_t start_time = get_cycles();\ + while (1) {\ + sts = op (iommu->reg + offset);\ + if (cond)\ + break;\ + if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ + panic("DMAR hardware is malfunctioning\n");\ + cpu_relax();\ + }\ +} + +#define QI_LENGTH 256 /* queue length */ + +enum { + QI_FREE, + QI_IN_USE, + QI_DONE +}; + +#define QI_CC_TYPE 0x1 +#define QI_IOTLB_TYPE 0x2 +#define QI_DIOTLB_TYPE 0x3 +#define QI_IEC_TYPE 0x4 +#define QI_IWD_TYPE 0x5 + +#define QI_IEC_SELECTIVE (((u64)1) << 4) +#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) +#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27)) + +#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) +#define QI_IWD_STATUS_WRITE (((u64)1) << 5) + +struct qi_desc { + u64 low, high; +}; + +struct q_inval { + spinlock_t q_lock; + struct qi_desc *desc; /* invalidation queue */ + int *desc_status; /* desc status */ + int free_head; /* first free entry */ + int free_tail; /* last free entry */ + int free_cnt; +}; + +#ifdef CONFIG_INTR_REMAP +/* 1MB - maximum possible interrupt remapping table size */ +#define INTR_REMAP_PAGE_ORDER 8 +#define INTR_REMAP_TABLE_REG_SIZE 0xf + +#define INTR_REMAP_TABLE_ENTRIES 65536 + +struct ir_table { + struct irte *base; +}; +#endif + +struct intel_iommu { + void __iomem *reg; /* Pointer to hardware regs, virtual addr */ + u64 cap; + u64 ecap; + int seg; + u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ + spinlock_t register_lock; /* protect register handling */ + int seq_id; /* sequence id of the iommu */ + +#ifdef CONFIG_DMAR + unsigned long *domain_ids; /* bitmap of domains */ + struct dmar_domain **domains; /* ptr to domains */ + spinlock_t lock; /* protect context, domain ids */ + struct root_entry *root_entry; /* virtual address */ + + unsigned int irq; + unsigned char name[7]; /* Device Name */ + struct msi_msg saved_msg; + struct sys_device sysdev; +#endif + struct q_inval *qi; /* Queued invalidation info */ +#ifdef CONFIG_INTR_REMAP + struct ir_table *ir_table; /* Interrupt remapping info */ +#endif +}; + +static inline void __iommu_flush_cache( + struct intel_iommu *iommu, void *addr, int size) +{ + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(addr, size); +} + +extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); + +extern int alloc_iommu(struct dmar_drhd_unit *drhd); +extern void free_iommu(struct intel_iommu *iommu); +extern int dmar_enable_qi(struct intel_iommu *iommu); +extern void qi_global_iec(struct intel_iommu *iommu); + +extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); + +void intel_iommu_domain_exit(struct dmar_domain *domain); +struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev); +int intel_iommu_context_mapping(struct dmar_domain *domain, + struct pci_dev *pdev); +int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova, + u64 hpa, size_t size, int prot); +void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn); +struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev); +u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova); + +#ifdef CONFIG_DMAR +int intel_iommu_found(void); +#else /* CONFIG_DMAR */ +static inline int intel_iommu_found(void) +{ + return 0; +} +#endif /* CONFIG_DMAR */ + +#endif diff --git a/include/linux/iova.h b/include/linux/iova.h new file mode 100644 index 000000000000..228f6c94b69c --- /dev/null +++ b/include/linux/iova.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This file is released under the GPLv2. + * + * Copyright (C) 2006-2008 Intel Corporation + * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> + * + */ + +#ifndef _IOVA_H_ +#define _IOVA_H_ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/rbtree.h> +#include <linux/dma-mapping.h> + +/* IO virtual address start page frame number */ +#define IOVA_START_PFN (1) + +/* iova structure */ +struct iova { + struct rb_node node; + unsigned long pfn_hi; /* IOMMU dish out addr hi */ + unsigned long pfn_lo; /* IOMMU dish out addr lo */ +}; + +/* holds all the iova translations for a domain */ +struct iova_domain { + spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */ + spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ + struct rb_root rbroot; /* iova domain rbtree root */ + struct rb_node *cached32_node; /* Save last alloced node */ + unsigned long dma_32bit_pfn; +}; + +struct iova *alloc_iova_mem(void); +void free_iova_mem(struct iova *iova); +void free_iova(struct iova_domain *iovad, unsigned long pfn); +void __free_iova(struct iova_domain *iovad, struct iova *iova); +struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, + unsigned long limit_pfn, + bool size_aligned); +struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, + unsigned long pfn_hi); +void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); +void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit); +struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); +void put_iova_domain(struct iova_domain *iovad); + +#endif diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 70a30651cd12..797fcd781242 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -311,22 +311,33 @@ struct kvm_s390_interrupt { /* This structure represents a single trace buffer record. */ struct kvm_trace_rec { - __u32 event:28; - __u32 extra_u32:3; - __u32 cycle_in:1; + /* variable rec_val + * is split into: + * bits 0 - 27 -> event id + * bits 28 -30 -> number of extra data args of size u32 + * bits 31 -> binary indicator for if tsc is in record + */ + __u32 rec_val; __u32 pid; __u32 vcpu_id; union { struct { - __u64 cycle_u64; + __u64 timestamp; __u32 extra_u32[KVM_TRC_EXTRA_MAX]; - } __attribute__((packed)) cycle; + } __attribute__((packed)) timestamp; struct { __u32 extra_u32[KVM_TRC_EXTRA_MAX]; - } nocycle; + } notimestamp; } u; }; +#define TRACE_REC_EVENT_ID(val) \ + (0x0fffffff & (val)) +#define TRACE_REC_NUM_DATA_ARGS(val) \ + (0x70000000 & ((val) << 28)) +#define TRACE_REC_TCS(val) \ + (0x80000000 & ((val) << 31)) + #define KVMIO 0xAE /* @@ -372,6 +383,10 @@ struct kvm_trace_rec { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ +#if defined(CONFIG_X86)||defined(CONFIG_IA64) +#define KVM_CAP_DEVICE_ASSIGNMENT 17 +#endif +#define KVM_CAP_IOMMU 18 /* * ioctls for VM fds @@ -401,6 +416,10 @@ struct kvm_trace_rec { _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) #define KVM_UNREGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) +#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ + struct kvm_assigned_pci_dev) +#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ + struct kvm_assigned_irq) /* * ioctls for vcpu fds @@ -440,4 +459,45 @@ struct kvm_trace_rec { #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) +#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) +#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) +#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) +#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) +#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) +#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) +#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) +#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) +#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) +#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) +#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) +#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) +#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) +#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) +#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) +#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) +#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) +#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) +#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) +#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) +#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) +#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) +#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) +#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) + +struct kvm_assigned_pci_dev { + __u32 assigned_dev_id; + __u32 busnr; + __u32 devfn; + __u32 flags; +}; + +struct kvm_assigned_irq { + __u32 assigned_dev_id; + __u32 host_irq; + __u32 guest_irq; + __u32 flags; +}; + +#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8525afc53107..3833c48fae3a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -34,6 +34,8 @@ #define KVM_REQ_MMU_RELOAD 3 #define KVM_REQ_TRIPLE_FAULT 4 #define KVM_REQ_PENDING_TIMER 5 +#define KVM_REQ_UNHALT 6 +#define KVM_REQ_MMU_SYNC 7 struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; @@ -279,12 +281,68 @@ void kvm_free_physmem(struct kvm *kvm); struct kvm *kvm_arch_create_vm(void); void kvm_arch_destroy_vm(struct kvm *kvm); +void kvm_free_all_assigned_devices(struct kvm *kvm); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); +int kvm_is_mmio_pfn(pfn_t pfn); + +struct kvm_irq_ack_notifier { + struct hlist_node link; + unsigned gsi; + void (*irq_acked)(struct kvm_irq_ack_notifier *kian); +}; + +struct kvm_assigned_dev_kernel { + struct kvm_irq_ack_notifier ack_notifier; + struct work_struct interrupt_work; + struct list_head list; + int assigned_dev_id; + int host_busnr; + int host_devfn; + int host_irq; + int guest_irq; + int irq_requested; + struct pci_dev *dev; + struct kvm *kvm; +}; +void kvm_set_irq(struct kvm *kvm, int irq, int level); +void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian); +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian); + +#ifdef CONFIG_DMAR +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, + unsigned long npages); +int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev); +int kvm_iommu_unmap_guest(struct kvm *kvm); +#else /* CONFIG_DMAR */ +static inline int kvm_iommu_map_pages(struct kvm *kvm, + gfn_t base_gfn, + unsigned long npages) +{ + return 0; +} + +static inline int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_assigned_dev_kernel + *assigned_dev) +{ + return -ENODEV; +} + +static inline int kvm_iommu_unmap_guest(struct kvm *kvm) +{ + return 0; +} +#endif /* CONFIG_DMAR */ + static inline void kvm_guest_enter(void) { account_system_vtime(current); @@ -307,6 +365,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn) return (gpa_t)gfn << PAGE_SHIFT; } +static inline hpa_t pfn_to_hpa(pfn_t pfn) +{ + return (hpa_t)pfn << PAGE_SHIFT; +} + static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) { set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); @@ -326,6 +389,25 @@ struct kvm_stats_debugfs_item { extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; +#define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 5, d1, d2, d3, d4, d5) +#define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 4, d1, d2, d3, d4, 0) +#define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 3, d1, d2, d3, 0, 0) +#define KVMTRACE_2D(evt, vcpu, d1, d2, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 2, d1, d2, 0, 0, 0) +#define KVMTRACE_1D(evt, vcpu, d1, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 1, d1, 0, 0, 0, 0) +#define KVMTRACE_0D(evt, vcpu, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 0, 0, 0, 0, 0, 0) + #ifdef CONFIG_KVM_TRACE int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg); void kvm_trace_cleanup(void); |