diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/arm/aarch32.c | 131 | ||||
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 5 | ||||
-rw-r--r-- | virt/kvm/arm/arm.c | 113 | ||||
-rw-r--r-- | virt/kvm/arm/mmio.c | 68 | ||||
-rw-r--r-- | virt/kvm/arm/mmu.c | 32 | ||||
-rw-r--r-- | virt/kvm/arm/perf.c | 6 | ||||
-rw-r--r-- | virt/kvm/arm/pmu.c | 114 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-its.c | 6 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v3.c | 5 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.c | 15 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.h | 5 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 31 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 322 | ||||
-rw-r--r-- | virt/lib/irqbypass.c | 38 |
14 files changed, 465 insertions, 426 deletions
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index c4c57ba99e90..0a356aa91aa1 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -10,10 +10,15 @@ * Author: Christoffer Dall <c.dall@virtualopensystems.com> */ +#include <linux/bits.h> #include <linux/kvm_host.h> #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> +#define DFSR_FSC_EXTABT_LPAE 0x10 +#define DFSR_FSC_EXTABT_nLPAE 0x08 +#define DFSR_LPAE BIT(9) + /* * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. */ @@ -28,25 +33,115 @@ static const u8 return_offsets[8][2] = { [7] = { 4, 4 }, /* FIQ, unused */ }; +/* + * When an exception is taken, most CPSR fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). + * + * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with + * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was + * obsoleted by the ARMv7 virtualization extensions and is RES0. + * + * For the SPSR layout seen from AArch32, see: + * - ARM DDI 0406C.d, page B1-1148 + * - ARM DDI 0487E.a, page G8-6264 + * + * For the SPSR_ELx layout for AArch32 seen from AArch64, see: + * - ARM DDI 0487E.a, page C5-426 + * + * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) +{ + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_AA32_N_BIT); + new |= (old & PSR_AA32_Z_BIT); + new |= (old & PSR_AA32_C_BIT); + new |= (old & PSR_AA32_V_BIT); + new |= (old & PSR_AA32_Q_BIT); + + // CPSR.IT[7:0] are set to zero upon any exception + // See ARM DDI 0487E.a, section G1.12.3 + // See ARM DDI 0406C.d, section B1.8.3 + + new |= (old & PSR_AA32_DIT_BIT); + + // CPSR.SSBS is set to SCTLR.DSSBS upon any exception + // See ARM DDI 0487E.a, page G8-6244 + if (sctlr & BIT(31)) + new |= PSR_AA32_SSBS_BIT; + + // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 + // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page G8-6246 + new |= (old & PSR_AA32_PAN_BIT); + if (!(sctlr & BIT(23))) + new |= PSR_AA32_PAN_BIT; + + // SS does not exist in AArch32, so ignore + + // CPSR.IL is set to zero upon any exception + // See ARM DDI 0487E.a, page G1-5527 + + new |= (old & PSR_AA32_GE_MASK); + + // CPSR.IT[7:0] are set to zero upon any exception + // See prior comment above + + // CPSR.E is set to SCTLR.EE upon any exception + // See ARM DDI 0487E.a, page G8-6245 + // See ARM DDI 0406C.d, page B4-1701 + if (sctlr & BIT(25)) + new |= PSR_AA32_E_BIT; + + // CPSR.A is unchanged upon an exception to Undefined, Supervisor + // CPSR.A is set upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_A_BIT); + if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) + new |= PSR_AA32_A_BIT; + + // CPSR.I is set upon any exception + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= PSR_AA32_I_BIT; + + // CPSR.F is set upon an exception to FIQ + // CPSR.F is unchanged upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_F_BIT); + if (mode == PSR_AA32_MODE_FIQ) + new |= PSR_AA32_F_BIT; + + // CPSR.T is set to SCTLR.TE upon any exception + // See ARM DDI 0487E.a, page G8-5514 + // See ARM DDI 0406C.d, page B1-1181 + if (sctlr & BIT(30)) + new |= PSR_AA32_T_BIT; + + new |= mode; + + return new; +} + static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) { - unsigned long cpsr; - unsigned long new_spsr_value = *vcpu_cpsr(vcpu); - bool is_thumb = (new_spsr_value & PSR_AA32_T_BIT); + unsigned long spsr = *vcpu_cpsr(vcpu); + bool is_thumb = (spsr & PSR_AA32_T_BIT); u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - cpsr = mode | PSR_AA32_I_BIT; - - if (sctlr & (1 << 30)) - cpsr |= PSR_AA32_T_BIT; - if (sctlr & (1 << 25)) - cpsr |= PSR_AA32_E_BIT; - - *vcpu_cpsr(vcpu) = cpsr; + *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); /* Note: These now point to the banked copies */ - vcpu_write_spsr(vcpu, new_spsr_value); + vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr)); *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; /* Branch to exception vector */ @@ -84,16 +179,18 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, fsr = &vcpu_cp15(vcpu, c5_DFSR); } - prepare_fault32(vcpu, PSR_AA32_MODE_ABT | PSR_AA32_A_BIT, vect_offset); + prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset); *far = addr; /* Give the guest an IMPLEMENTATION DEFINED exception */ is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - if (is_lpae) - *fsr = 1 << 9 | 0x34; - else - *fsr = 0x14; + if (is_lpae) { + *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; + } else { + /* no need to shuffle FS[4] into DFSR[10] as its 0 */ + *fsr = DFSR_FSC_EXTABT_nLPAE; + } } void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index f182b2380345..0d9438e9de2a 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -805,6 +805,7 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, switch (treg) { case TIMER_REG_TVAL: val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff; + val &= lower_32_bits(val); break; case TIMER_REG_CTL: @@ -850,7 +851,7 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, { switch (treg) { case TIMER_REG_TVAL: - timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + val; + timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val; break; case TIMER_REG_CTL: @@ -1022,7 +1023,7 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) bool kvm_arch_timer_get_input_level(int vintid) { - struct kvm_vcpu *vcpu = kvm_arm_get_running_vcpu(); + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); struct arch_timer_context *timer; if (vintid == vcpu_vtimer(vcpu)->irq.irq) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 8de4daf25097..d65a0faa46d8 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -20,8 +20,6 @@ #include <linux/irqbypass.h> #include <linux/sched/stat.h> #include <trace/events/kvm.h> -#include <kvm/arm_pmu.h> -#include <kvm/arm_psci.h> #define CREATE_TRACE_POINTS #include "trace.h" @@ -51,9 +49,6 @@ __asm__(".arch_extension virt"); DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); -/* Per-CPU variable containing the currently running vcpu. */ -static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); - /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u32 kvm_next_vmid; @@ -62,31 +57,8 @@ static DEFINE_SPINLOCK(kvm_vmid_lock); static bool vgic_present; static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); - -static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) -{ - __this_cpu_write(kvm_arm_running_vcpu, vcpu); -} - DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); -/** - * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU. - * Must be called from non-preemptible context - */ -struct kvm_vcpu *kvm_arm_get_running_vcpu(void) -{ - return __this_cpu_read(kvm_arm_running_vcpu); -} - -/** - * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus. - */ -struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) -{ - return &kvm_arm_running_vcpu; -} - int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; @@ -194,7 +166,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { - kvm_arch_vcpu_free(kvm->vcpus[i]); + kvm_vcpu_destroy(kvm->vcpus[i]); kvm->vcpus[i] = NULL; } } @@ -279,49 +251,46 @@ void kvm_arch_free_vm(struct kvm *kvm) vfree(kvm); } -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) +{ + if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) + return -EBUSY; + + if (id >= kvm->arch.max_vcpus) + return -EINVAL; + + return 0; +} + +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { int err; - struct kvm_vcpu *vcpu; - if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) { - err = -EBUSY; - goto out; - } + /* Force users to call KVM_ARM_VCPU_INIT */ + vcpu->arch.target = -1; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); - if (id >= kvm->arch.max_vcpus) { - err = -EINVAL; - goto out; - } + /* Set up the timer */ + kvm_timer_vcpu_init(vcpu); - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu) { - err = -ENOMEM; - goto out; - } + kvm_pmu_vcpu_init(vcpu); - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; + kvm_arm_reset_debug_ptr(vcpu); + + kvm_arm_pvtime_vcpu_init(&vcpu->arch); - err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); + err = kvm_vgic_vcpu_init(vcpu); if (err) - goto vcpu_uninit; + return err; - return vcpu; -vcpu_uninit: - kvm_vcpu_uninit(vcpu); -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu); -out: - return ERR_PTR(err); + return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); } void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { } -void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm))) static_branch_dec(&userspace_irqchip_in_use); @@ -329,13 +298,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvm_mmu_free_memory_caches(vcpu); kvm_timer_vcpu_terminate(vcpu); kvm_pmu_vcpu_destroy(vcpu); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu); -} -void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - kvm_arch_vcpu_free(vcpu); + kvm_arm_vcpu_destroy(vcpu); } int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) @@ -368,24 +332,6 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) preempt_enable(); } -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -{ - /* Force users to call KVM_ARM_VCPU_INIT */ - vcpu->arch.target = -1; - bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); - - /* Set up the timer */ - kvm_timer_vcpu_init(vcpu); - - kvm_pmu_vcpu_init(vcpu); - - kvm_arm_reset_debug_ptr(vcpu); - - kvm_arm_pvtime_vcpu_init(&vcpu->arch); - - return kvm_vgic_vcpu_init(vcpu); -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { int *last_ran; @@ -406,7 +352,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->cpu = cpu; vcpu->arch.host_cpu_context = &cpu_data->host_ctxt; - kvm_arm_set_running_vcpu(vcpu); kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); kvm_vcpu_load_sysregs(vcpu); @@ -432,8 +377,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_vcpu_pmu_restore_host(vcpu); vcpu->cpu = -1; - - kvm_arm_set_running_vcpu(NULL); } static void vcpu_power_off(struct kvm_vcpu *vcpu) @@ -1537,7 +1480,6 @@ static void teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); - hyp_cpu_pm_exit(); } /** @@ -1751,6 +1693,7 @@ int kvm_arch_init(void *opaque) return 0; out_hyp: + hyp_cpu_pm_exit(); if (!in_hyp_mode) teardown_hyp_mode(); out_err: diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index 70d3b449692c..aedfcff99ac5 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -5,7 +5,6 @@ */ #include <linux/kvm_host.h> -#include <asm/kvm_mmio.h> #include <asm/kvm_emulate.h> #include <trace/events/kvm.h> @@ -92,23 +91,23 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->mmio_needed = 0; - if (!run->mmio.is_write) { - len = run->mmio.len; - if (len > sizeof(unsigned long)) - return -EINVAL; - + if (!kvm_vcpu_dabt_iswrite(vcpu)) { + len = kvm_vcpu_dabt_get_as(vcpu); data = kvm_mmio_read_buf(run->mmio.data, len); - if (vcpu->arch.mmio_decode.sign_extend && + if (kvm_vcpu_dabt_issext(vcpu) && len < sizeof(unsigned long)) { mask = 1U << ((len * 8) - 1); data = (data ^ mask) - mask; } + if (!kvm_vcpu_dabt_issf(vcpu)) + data = data & 0xffffffff; + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, &data); data = vcpu_data_host_to_guest(vcpu, data, len); - vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); + vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data); } /* @@ -120,33 +119,6 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) return 0; } -static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) -{ - unsigned long rt; - int access_size; - bool sign_extend; - - if (kvm_vcpu_dabt_iss1tw(vcpu)) { - /* page table accesses IO mem: tell guest to fix its TTBR */ - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; - } - - access_size = kvm_vcpu_dabt_get_as(vcpu); - if (unlikely(access_size < 0)) - return access_size; - - *is_write = kvm_vcpu_dabt_iswrite(vcpu); - sign_extend = kvm_vcpu_dabt_issext(vcpu); - rt = kvm_vcpu_dabt_get_rd(vcpu); - - *len = access_size; - vcpu->arch.mmio_decode.sign_extend = sign_extend; - vcpu->arch.mmio_decode.rt = rt; - - return 0; -} - int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { @@ -158,15 +130,10 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, u8 data_buf[8]; /* - * Prepare MMIO operation. First decode the syndrome data we get - * from the CPU. Then try if some in-kernel emulation feels - * responsible, otherwise let user space do its magic. + * No valid syndrome? Ask userspace for help if it has + * voluntered to do so, and bail out otherwise. */ - if (kvm_vcpu_dabt_isvalid(vcpu)) { - ret = decode_hsr(vcpu, &is_write, &len); - if (ret) - return ret; - } else { + if (!kvm_vcpu_dabt_isvalid(vcpu)) { if (vcpu->kvm->arch.return_nisv_io_abort_to_user) { run->exit_reason = KVM_EXIT_ARM_NISV; run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu); @@ -178,7 +145,20 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, return -ENOSYS; } - rt = vcpu->arch.mmio_decode.rt; + /* Page table accesses IO mem: tell guest to fix its TTBR */ + if (kvm_vcpu_dabt_iss1tw(vcpu)) { + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 1; + } + + /* + * Prepare MMIO operation. First decode the syndrome data we get + * from the CPU. Then try if some in-kernel emulation feels + * responsible, otherwise let user space do its magic. + */ + is_write = kvm_vcpu_dabt_iswrite(vcpu); + len = kvm_vcpu_dabt_get_as(vcpu); + rt = kvm_vcpu_dabt_get_rd(vcpu); if (is_write) { data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 0b32a904a1bb..19c961ac4e3c 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -14,7 +14,6 @@ #include <asm/cacheflush.h> #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> -#include <asm/kvm_mmio.h> #include <asm/kvm_ras.h> #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> @@ -1377,14 +1376,8 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) { kvm_pfn_t pfn = *pfnp; gfn_t gfn = *ipap >> PAGE_SHIFT; - struct page *page = pfn_to_page(pfn); - /* - * PageTransCompoundMap() returns true for THP and - * hugetlbfs. Make sure the adjustment is done only for THP - * pages. - */ - if (!PageHuge(page) && PageTransCompoundMap(page)) { + if (kvm_is_transparent_hugepage(pfn)) { unsigned long mask; /* * The address we faulted on is backed by a transparent huge @@ -1596,16 +1589,8 @@ static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) __invalidate_icache_guest_page(pfn, size); } -static void kvm_send_hwpoison_signal(unsigned long address, - struct vm_area_struct *vma) +static void kvm_send_hwpoison_signal(unsigned long address, short lsb) { - short lsb; - - if (is_vm_hugetlb_page(vma)) - lsb = huge_page_shift(hstate_vma(vma)); - else - lsb = PAGE_SHIFT; - send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); } @@ -1678,6 +1663,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; + short vma_shift; kvm_pfn_t pfn; pgprot_t mem_type = PAGE_S2; bool logging_active = memslot_is_logging(memslot); @@ -1701,7 +1687,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - vma_pagesize = vma_kernel_pagesize(vma); + if (is_vm_hugetlb_page(vma)) + vma_shift = huge_page_shift(hstate_vma(vma)); + else + vma_shift = PAGE_SHIFT; + + vma_pagesize = 1ULL << vma_shift; if (logging_active || (vma->vm_flags & VM_PFNMAP) || !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) { @@ -1741,7 +1732,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); if (pfn == KVM_PFN_ERR_HWPOISON) { - kvm_send_hwpoison_signal(hva, vma); + kvm_send_hwpoison_signal(hva, vma_shift); return 0; } if (is_error_noslot_pfn(pfn)) @@ -2147,7 +2138,8 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) if (!kvm->arch.pgd) return 0; trace_kvm_test_age_hva(hva); - return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); + return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, + kvm_test_age_hva_handler, NULL); } void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/perf.c b/virt/kvm/arm/perf.c index 918cdc3839ea..d45b8b9a4415 100644 --- a/virt/kvm/arm/perf.c +++ b/virt/kvm/arm/perf.c @@ -13,14 +13,14 @@ static int kvm_is_in_guest(void) { - return kvm_arm_get_running_vcpu() != NULL; + return kvm_get_running_vcpu() != NULL; } static int kvm_is_user_mode(void) { struct kvm_vcpu *vcpu; - vcpu = kvm_arm_get_running_vcpu(); + vcpu = kvm_get_running_vcpu(); if (vcpu) return !vcpu_mode_priv(vcpu); @@ -32,7 +32,7 @@ static unsigned long kvm_get_guest_ip(void) { struct kvm_vcpu *vcpu; - vcpu = kvm_arm_get_running_vcpu(); + vcpu = kvm_get_running_vcpu(); if (vcpu) return *vcpu_pc(vcpu); diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 8731dfeced8b..f0d0312c0a55 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -15,6 +15,8 @@ #include <kvm/arm_vgic.h> static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 @@ -75,6 +77,13 @@ static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) return pmc; } +static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) +{ + if (kvm_pmu_idx_is_high_counter(pmc->idx)) + return pmc - 1; + else + return pmc + 1; +} /** * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain @@ -238,10 +247,11 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) */ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) { - int i; + unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); struct kvm_pmu *pmu = &vcpu->arch.pmu; + int i; - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) + for_each_set_bit(i, &mask, 32) kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); @@ -294,15 +304,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) pmc = &pmu->pmc[i]; - /* - * For high counters of chained events we must recreate the - * perf event with the long (64bit) attribute set. - */ - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(i)) { - kvm_pmu_create_perf_event(vcpu, i); - continue; - } + /* A change in the enable state may affect the chain state */ + kvm_pmu_update_pmc_chained(vcpu, i); + kvm_pmu_create_perf_event(vcpu, i); /* At this point, pmc must be the canonical */ if (pmc->perf_event) { @@ -335,15 +339,9 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) pmc = &pmu->pmc[i]; - /* - * For high counters of chained events we must recreate the - * perf event with the long (64bit) attribute unset. - */ - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(i)) { - kvm_pmu_create_perf_event(vcpu, i); - continue; - } + /* A change in the enable state may affect the chain state */ + kvm_pmu_update_pmc_chained(vcpu, i); + kvm_pmu_create_perf_event(vcpu, i); /* At this point, pmc must be the canonical */ if (pmc->perf_event) @@ -480,25 +478,45 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, */ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) { + struct kvm_pmu *pmu = &vcpu->arch.pmu; int i; - u64 type, enable, reg; - if (val == 0) + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) return; - enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + /* Weed out disabled counters */ + val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { + u64 type, reg; + if (!(val & BIT(i))) continue; - type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) - & ARMV8_PMU_EVTYPE_EVENT; - if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) - && (enable & BIT(i))) { - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + + /* PMSWINC only applies to ... SW_INC! */ + type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); + type &= ARMV8_PMU_EVTYPE_EVENT; + if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) + continue; + + /* increment this even SW_INC counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = lower_32_bits(reg); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + + if (reg) /* no overflow on the low part */ + continue; + + if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { + /* increment the high counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; - if (!reg) - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; + if (!reg) /* mark overflow on the high counter */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); + } else { + /* mark overflow on low counter */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); } } } @@ -510,10 +528,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) */ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) { - u64 mask; + unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); int i; - mask = kvm_pmu_valid_counter_mask(vcpu); if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); @@ -525,7 +542,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); if (val & ARMV8_PMU_PMCR_P) { - for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) + for_each_set_bit(i, &mask, 32) kvm_pmu_set_counter_value(vcpu, i, 0); } } @@ -582,15 +599,14 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); - if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) { + if (kvm_pmu_pmc_is_chained(pmc)) { /** * The initial sample period (overflow count) of an event. For * chained counters we only support overflow interrupts on the * high counter. */ attr.sample_period = (-counter) & GENMASK(63, 0); - if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1)) - attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; + attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, @@ -621,25 +637,33 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) * @select_idx: The number of selected counter * * Update the chained bitmap based on the event type written in the - * typer register. + * typer register and the enable state of the odd register. */ static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) { struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; + bool new_state, old_state; + + old_state = kvm_pmu_pmc_is_chained(pmc); + new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && + kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); + + if (old_state == new_state) + return; - if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) { + canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); + kvm_pmu_stop_counter(vcpu, canonical_pmc); + if (new_state) { /* * During promotion from !chained to chained we must ensure * the adjacent counter is stopped and its event destroyed */ - if (!kvm_pmu_pmc_is_chained(pmc)) - kvm_pmu_stop_counter(vcpu, pmc); - + kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); - } else { - clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); + return; } + clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); } /** diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 98c7360d9fb7..d53d34a33e35 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -839,9 +839,8 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, u32 event_id = its_cmd_get_id(its_cmd); struct its_ite *ite; - ite = find_ite(its, device_id, event_id); - if (ite && ite->collection) { + if (ite && its_is_collection_mapped(ite->collection)) { /* * Though the spec talks about removing the pending state, we * don't bother here since we clear the ITTE anyway and the @@ -2475,7 +2474,8 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT); coll_id = val & KVM_ITS_CTE_ICID_MASK; - if (target_addr >= atomic_read(&kvm->online_vcpus)) + if (target_addr != COLLECTION_NOT_MAPPED && + target_addr >= atomic_read(&kvm->online_vcpus)) return -EINVAL; collection = find_collection(its, coll_id); diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 7dfd15dbb308..ebc218840fc2 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -414,8 +414,11 @@ static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 value = vgic_cpu->pendbaser; - return extract_bytes(vgic_cpu->pendbaser, addr & 7, len); + value &= ~GICR_PENDBASER_PTZ; + + return extract_bytes(value, addr & 7, len); } static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 0d090482720d..d656ebd5f9d4 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -190,15 +190,6 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, * value later will give us the same value as we update the per-CPU variable * in the preempt notifier handlers. */ -static struct kvm_vcpu *vgic_get_mmio_requester_vcpu(void) -{ - struct kvm_vcpu *vcpu; - - preempt_disable(); - vcpu = kvm_arm_get_running_vcpu(); - preempt_enable(); - return vcpu; -} /* Must be called with irq->irq_lock held */ static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq, @@ -221,7 +212,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { - bool is_uaccess = !vgic_get_mmio_requester_vcpu(); + bool is_uaccess = !kvm_get_running_vcpu(); u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; unsigned long flags; @@ -274,7 +265,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { - bool is_uaccess = !vgic_get_mmio_requester_vcpu(); + bool is_uaccess = !kvm_get_running_vcpu(); u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; unsigned long flags; @@ -335,7 +326,7 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool active) { unsigned long flags; - struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu(); + struct kvm_vcpu *requester_vcpu = kvm_get_running_vcpu(); raw_spin_lock_irqsave(&irq->irq_lock, flags); diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 836f418f1ee8..5af2aefad435 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -98,11 +98,6 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; .uaccess_write = uwr, \ } -int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu, - struct vgic_register_region *reg_desc, - struct vgic_io_device *region, - int nr_irqs, bool offset_private); - unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 35305d6e68cc..15e5b037f92d 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -17,21 +17,6 @@ #include "async_pf.h" #include <trace/events/kvm.h> -static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu, - struct kvm_async_pf *work) -{ -#ifdef CONFIG_KVM_ASYNC_PF_SYNC - kvm_arch_async_page_present(vcpu, work); -#endif -} -static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu, - struct kvm_async_pf *work) -{ -#ifndef CONFIG_KVM_ASYNC_PF_SYNC - kvm_arch_async_page_present(vcpu, work); -#endif -} - static struct kmem_cache *async_pf_cache; int kvm_async_pf_init(void) @@ -64,7 +49,7 @@ static void async_pf_execute(struct work_struct *work) struct mm_struct *mm = apf->mm; struct kvm_vcpu *vcpu = apf->vcpu; unsigned long addr = apf->addr; - gva_t gva = apf->gva; + gpa_t cr2_or_gpa = apf->cr2_or_gpa; int locked = 1; might_sleep(); @@ -80,7 +65,8 @@ static void async_pf_execute(struct work_struct *work) if (locked) up_read(&mm->mmap_sem); - kvm_async_page_present_sync(vcpu, apf); + if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC)) + kvm_arch_async_page_present(vcpu, apf); spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); @@ -92,7 +78,7 @@ static void async_pf_execute(struct work_struct *work) * this point */ - trace_kvm_async_pf_completed(addr, gva); + trace_kvm_async_pf_completed(addr, cr2_or_gpa); if (swq_has_sleeper(&vcpu->wq)) swake_up_one(&vcpu->wq); @@ -157,7 +143,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->async_pf.lock); kvm_arch_async_page_ready(vcpu, work); - kvm_async_page_present_async(vcpu, work); + if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC)) + kvm_arch_async_page_present(vcpu, work); list_del(&work->queue); vcpu->async_pf.queued--; @@ -165,8 +152,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) } } -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, - struct kvm_arch_async_pf *arch) +int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + unsigned long hva, struct kvm_arch_async_pf *arch) { struct kvm_async_pf *work; @@ -185,7 +172,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, work->wakeup_all = false; work->vcpu = vcpu; - work->gva = gva; + work->cr2_or_gpa = cr2_or_gpa; work->addr = hva; work->arch = *arch; work->mm = current->mm; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 67eb302a7240..7e63a3236364 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -104,16 +104,16 @@ static cpumask_var_t cpus_hardware_enabled; static int kvm_usage_count; static atomic_t hardware_enable_failed; -struct kmem_cache *kvm_vcpu_cache; -EXPORT_SYMBOL_GPL(kvm_vcpu_cache); +static struct kmem_cache *kvm_vcpu_cache; static __read_mostly struct preempt_ops kvm_preempt_ops; +static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_running_vcpu); struct dentry *kvm_debugfs_dir; EXPORT_SYMBOL_GPL(kvm_debugfs_dir); static int kvm_debugfs_num_entries; -static const struct file_operations *stat_fops_per_vm[]; +static const struct file_operations stat_fops_per_vm; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); @@ -191,12 +191,24 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn) return true; } +bool kvm_is_transparent_hugepage(kvm_pfn_t pfn) +{ + struct page *page = pfn_to_page(pfn); + + if (!PageTransCompoundMap(page)) + return false; + + return is_transparent_hugepage(compound_head(page)); +} + /* * Switches to specified vcpu, until a matching vcpu_put() */ void vcpu_load(struct kvm_vcpu *vcpu) { int cpu = get_cpu(); + + __this_cpu_write(kvm_running_vcpu, vcpu); preempt_notifier_register(&vcpu->preempt_notifier); kvm_arch_vcpu_load(vcpu, cpu); put_cpu(); @@ -208,6 +220,7 @@ void vcpu_put(struct kvm_vcpu *vcpu) preempt_disable(); kvm_arch_vcpu_put(vcpu); preempt_notifier_unregister(&vcpu->preempt_notifier); + __this_cpu_write(kvm_running_vcpu, NULL); preempt_enable(); } EXPORT_SYMBOL_GPL(vcpu_put); @@ -322,11 +335,8 @@ void kvm_reload_remote_mmus(struct kvm *kvm) kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); } -int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) +static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) { - struct page *page; - int r; - mutex_init(&vcpu->mutex); vcpu->cpu = -1; vcpu->kvm = kvm; @@ -338,42 +348,28 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->pre_pcpu = -1; INIT_LIST_HEAD(&vcpu->blocked_vcpu_list); - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!page) { - r = -ENOMEM; - goto fail; - } - vcpu->run = page_address(page); - kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); vcpu->preempted = false; vcpu->ready = false; - - r = kvm_arch_vcpu_init(vcpu); - if (r < 0) - goto fail_free_run; - return 0; - -fail_free_run: - free_page((unsigned long)vcpu->run); -fail: - return r; + preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); } -EXPORT_SYMBOL_GPL(kvm_vcpu_init); -void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) +void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) { + kvm_arch_vcpu_destroy(vcpu); + /* - * no need for rcu_read_lock as VCPU_RUN is the only place that - * will change the vcpu->pid pointer and on uninit all file - * descriptors are already gone. + * No need for rcu_read_lock as VCPU_RUN is the only place that changes + * the vcpu->pid pointer, and at destruction time all file descriptors + * are already gone. */ put_pid(rcu_dereference_protected(vcpu->pid, 1)); - kvm_arch_vcpu_uninit(vcpu); + free_page((unsigned long)vcpu->run); + kmem_cache_free(kvm_vcpu_cache, vcpu); } -EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); +EXPORT_SYMBOL_GPL(kvm_vcpu_destroy); #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) @@ -650,11 +646,11 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) return -ENOMEM; stat_data->kvm = kvm; - stat_data->offset = p->offset; - stat_data->mode = p->mode ? p->mode : 0644; + stat_data->dbgfs_item = p; kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; - debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry, - stat_data, stat_fops_per_vm[p->kind]); + debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p), + kvm->debugfs_dentry, stat_data, + &stat_fops_per_vm); } return 0; } @@ -964,7 +960,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, /* * Increment the new memslot generation a second time, dropping the - * update in-progress flag and incrementing then generation based on + * update in-progress flag and incrementing the generation based on * the number of address spaces. This provides a unique and easily * identifiable generation number while the memslots are in flux. */ @@ -1117,7 +1113,7 @@ int __kvm_set_memory_region(struct kvm *kvm, * * validation of sp->gfn happens in: * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) - * - kvm_is_visible_gfn (mmu_check_roots) + * - kvm_is_visible_gfn (mmu_check_root) */ kvm_arch_flush_shadow_memslot(kvm, slot); @@ -1406,14 +1402,14 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); -unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) +unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) { struct vm_area_struct *vma; unsigned long addr, size; size = PAGE_SIZE; - addr = gfn_to_hva(kvm, gfn); + addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL); if (kvm_is_error_hva(addr)) return PAGE_SIZE; @@ -1519,7 +1515,7 @@ static inline int check_user_page_hwpoison(unsigned long addr) /* * The fast path to get the writable pfn which will be stored in @pfn, * true indicates success, otherwise false is returned. It's also the - * only part that runs if we can are in atomic context. + * only part that runs if we can in atomic context. */ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault, bool *writable, kvm_pfn_t *pfn) @@ -2010,11 +2006,8 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); void kvm_set_pfn_dirty(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) { - struct page *page = pfn_to_page(pfn); - - SetPageDirty(page); - } + if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) + SetPageDirty(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); @@ -2130,17 +2123,6 @@ static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn, return 0; } -int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, - unsigned long len) -{ - gfn_t gfn = gpa >> PAGE_SHIFT; - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); - int offset = offset_in_page(gpa); - - return __kvm_read_guest_atomic(slot, gfn, data, offset, len); -} -EXPORT_SYMBOL_GPL(kvm_read_guest_atomic); - int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len) { @@ -2237,33 +2219,36 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; gfn_t nr_pages_needed = end_gfn - start_gfn + 1; gfn_t nr_pages_avail; - int r = start_gfn <= end_gfn ? 0 : -EINVAL; - ghc->gpa = gpa; + /* Update ghc->generation before performing any error checks. */ ghc->generation = slots->generation; - ghc->len = len; - ghc->hva = KVM_HVA_ERR_BAD; + + if (start_gfn > end_gfn) { + ghc->hva = KVM_HVA_ERR_BAD; + return -EINVAL; + } /* * If the requested region crosses two memslots, we still * verify that the entire region is valid here. */ - while (!r && start_gfn <= end_gfn) { + for ( ; start_gfn <= end_gfn; start_gfn += nr_pages_avail) { ghc->memslot = __gfn_to_memslot(slots, start_gfn); ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); if (kvm_is_error_hva(ghc->hva)) - r = -EFAULT; - start_gfn += nr_pages_avail; + return -EFAULT; } /* Use the slow path for cross page reads and writes. */ - if (!r && nr_pages_needed == 1) + if (nr_pages_needed == 1) ghc->hva += offset; else ghc->memslot = NULL; - return r; + ghc->gpa = gpa; + ghc->len = len; + return 0; } int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, @@ -2284,15 +2269,17 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, BUG_ON(len + offset > ghc->len); - if (slots->generation != ghc->generation) - __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - - if (unlikely(!ghc->memslot)) - return kvm_write_guest(kvm, gpa, data, len); + if (slots->generation != ghc->generation) { + if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) + return -EFAULT; + } if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_write_guest(kvm, gpa, data, len); + r = __copy_to_user((void __user *)ghc->hva + offset, data, len); if (r) return -EFAULT; @@ -2317,15 +2304,17 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, BUG_ON(len > ghc->len); - if (slots->generation != ghc->generation) - __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - - if (unlikely(!ghc->memslot)) - return kvm_read_guest(kvm, ghc->gpa, data, len); + if (slots->generation != ghc->generation) { + if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) + return -EFAULT; + } if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_read_guest(kvm, ghc->gpa, data, len); + r = __copy_from_user(data, (void __user *)ghc->hva, len); if (r) return -EFAULT; @@ -2797,6 +2786,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) { int r; struct kvm_vcpu *vcpu; + struct page *page; if (id >= KVM_MAX_VCPU_ID) return -EINVAL; @@ -2810,17 +2800,29 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) kvm->created_vcpus++; mutex_unlock(&kvm->lock); - vcpu = kvm_arch_vcpu_create(kvm, id); - if (IS_ERR(vcpu)) { - r = PTR_ERR(vcpu); + r = kvm_arch_vcpu_precreate(kvm, id); + if (r) + goto vcpu_decrement; + + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu) { + r = -ENOMEM; goto vcpu_decrement; } - preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); + BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE); + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) { + r = -ENOMEM; + goto vcpu_free; + } + vcpu->run = page_address(page); - r = kvm_arch_vcpu_setup(vcpu); + kvm_vcpu_init(vcpu, kvm, id); + + r = kvm_arch_vcpu_create(vcpu); if (r) - goto vcpu_destroy; + goto vcpu_free_run_page; kvm_create_vcpu_debugfs(vcpu); @@ -2857,8 +2859,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) unlock_vcpu_destroy: mutex_unlock(&kvm->lock); debugfs_remove_recursive(vcpu->debugfs_dentry); -vcpu_destroy: kvm_arch_vcpu_destroy(vcpu); +vcpu_free_run_page: + free_page((unsigned long)vcpu->run); +vcpu_free: + kmem_cache_free(kvm_vcpu_cache, vcpu); vcpu_decrement: mutex_lock(&kvm->lock); kvm->created_vcpus--; @@ -4092,8 +4097,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, return -ENOENT; if (simple_attr_open(inode, file, get, - stat_data->mode & S_IWUGO ? set : NULL, - fmt)) { + KVM_DBGFS_GET_MODE(stat_data->dbgfs_item) & 0222 + ? set : NULL, + fmt)) { kvm_put_kvm(stat_data->kvm); return -ENOMEM; } @@ -4112,105 +4118,111 @@ static int kvm_debugfs_release(struct inode *inode, struct file *file) return 0; } -static int vm_stat_get_per_vm(void *data, u64 *val) +static int kvm_get_stat_per_vm(struct kvm *kvm, size_t offset, u64 *val) { - struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; + *val = *(ulong *)((void *)kvm + offset); + + return 0; +} - *val = *(ulong *)((void *)stat_data->kvm + stat_data->offset); +static int kvm_clear_stat_per_vm(struct kvm *kvm, size_t offset) +{ + *(ulong *)((void *)kvm + offset) = 0; return 0; } -static int vm_stat_clear_per_vm(void *data, u64 val) +static int kvm_get_stat_per_vcpu(struct kvm *kvm, size_t offset, u64 *val) { - struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; + int i; + struct kvm_vcpu *vcpu; - if (val) - return -EINVAL; + *val = 0; - *(ulong *)((void *)stat_data->kvm + stat_data->offset) = 0; + kvm_for_each_vcpu(i, vcpu, kvm) + *val += *(u64 *)((void *)vcpu + offset); return 0; } -static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file) +static int kvm_clear_stat_per_vcpu(struct kvm *kvm, size_t offset) { - __simple_attr_check_format("%llu\n", 0ull); - return kvm_debugfs_open(inode, file, vm_stat_get_per_vm, - vm_stat_clear_per_vm, "%llu\n"); -} + int i; + struct kvm_vcpu *vcpu; -static const struct file_operations vm_stat_get_per_vm_fops = { - .owner = THIS_MODULE, - .open = vm_stat_get_per_vm_open, - .release = kvm_debugfs_release, - .read = simple_attr_read, - .write = simple_attr_write, - .llseek = no_llseek, -}; + kvm_for_each_vcpu(i, vcpu, kvm) + *(u64 *)((void *)vcpu + offset) = 0; -static int vcpu_stat_get_per_vm(void *data, u64 *val) + return 0; +} + +static int kvm_stat_data_get(void *data, u64 *val) { - int i; + int r = -EFAULT; struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; - struct kvm_vcpu *vcpu; - *val = 0; - - kvm_for_each_vcpu(i, vcpu, stat_data->kvm) - *val += *(u64 *)((void *)vcpu + stat_data->offset); + switch (stat_data->dbgfs_item->kind) { + case KVM_STAT_VM: + r = kvm_get_stat_per_vm(stat_data->kvm, + stat_data->dbgfs_item->offset, val); + break; + case KVM_STAT_VCPU: + r = kvm_get_stat_per_vcpu(stat_data->kvm, + stat_data->dbgfs_item->offset, val); + break; + } - return 0; + return r; } -static int vcpu_stat_clear_per_vm(void *data, u64 val) +static int kvm_stat_data_clear(void *data, u64 val) { - int i; + int r = -EFAULT; struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; - struct kvm_vcpu *vcpu; if (val) return -EINVAL; - kvm_for_each_vcpu(i, vcpu, stat_data->kvm) - *(u64 *)((void *)vcpu + stat_data->offset) = 0; + switch (stat_data->dbgfs_item->kind) { + case KVM_STAT_VM: + r = kvm_clear_stat_per_vm(stat_data->kvm, + stat_data->dbgfs_item->offset); + break; + case KVM_STAT_VCPU: + r = kvm_clear_stat_per_vcpu(stat_data->kvm, + stat_data->dbgfs_item->offset); + break; + } - return 0; + return r; } -static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file) +static int kvm_stat_data_open(struct inode *inode, struct file *file) { __simple_attr_check_format("%llu\n", 0ull); - return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm, - vcpu_stat_clear_per_vm, "%llu\n"); + return kvm_debugfs_open(inode, file, kvm_stat_data_get, + kvm_stat_data_clear, "%llu\n"); } -static const struct file_operations vcpu_stat_get_per_vm_fops = { - .owner = THIS_MODULE, - .open = vcpu_stat_get_per_vm_open, +static const struct file_operations stat_fops_per_vm = { + .owner = THIS_MODULE, + .open = kvm_stat_data_open, .release = kvm_debugfs_release, - .read = simple_attr_read, - .write = simple_attr_write, - .llseek = no_llseek, -}; - -static const struct file_operations *stat_fops_per_vm[] = { - [KVM_STAT_VCPU] = &vcpu_stat_get_per_vm_fops, - [KVM_STAT_VM] = &vm_stat_get_per_vm_fops, + .read = simple_attr_read, + .write = simple_attr_write, + .llseek = no_llseek, }; static int vm_stat_get(void *_offset, u64 *val) { unsigned offset = (long)_offset; struct kvm *kvm; - struct kvm_stat_data stat_tmp = {.offset = offset}; u64 tmp_val; *val = 0; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - stat_tmp.kvm = kvm; - vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val); + kvm_get_stat_per_vm(kvm, offset, &tmp_val); *val += tmp_val; } mutex_unlock(&kvm_lock); @@ -4221,15 +4233,13 @@ static int vm_stat_clear(void *_offset, u64 val) { unsigned offset = (long)_offset; struct kvm *kvm; - struct kvm_stat_data stat_tmp = {.offset = offset}; if (val) return -EINVAL; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - stat_tmp.kvm = kvm; - vm_stat_clear_per_vm((void *)&stat_tmp, 0); + kvm_clear_stat_per_vm(kvm, offset); } mutex_unlock(&kvm_lock); @@ -4242,14 +4252,12 @@ static int vcpu_stat_get(void *_offset, u64 *val) { unsigned offset = (long)_offset; struct kvm *kvm; - struct kvm_stat_data stat_tmp = {.offset = offset}; u64 tmp_val; *val = 0; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - stat_tmp.kvm = kvm; - vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val); + kvm_get_stat_per_vcpu(kvm, offset, &tmp_val); *val += tmp_val; } mutex_unlock(&kvm_lock); @@ -4260,15 +4268,13 @@ static int vcpu_stat_clear(void *_offset, u64 val) { unsigned offset = (long)_offset; struct kvm *kvm; - struct kvm_stat_data stat_tmp = {.offset = offset}; if (val) return -EINVAL; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - stat_tmp.kvm = kvm; - vcpu_stat_clear_per_vm((void *)&stat_tmp, 0); + kvm_clear_stat_per_vcpu(kvm, offset); } mutex_unlock(&kvm_lock); @@ -4341,9 +4347,8 @@ static void kvm_init_debug(void) kvm_debugfs_num_entries = 0; for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { - int mode = p->mode ? p->mode : 0644; - debugfs_create_file(p->name, mode, kvm_debugfs_dir, - (void *)(long)p->offset, + debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p), + kvm_debugfs_dir, (void *)(long)p->offset, stat_fops[p->kind]); } } @@ -4383,8 +4388,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) WRITE_ONCE(vcpu->preempted, false); WRITE_ONCE(vcpu->ready, false); + __this_cpu_write(kvm_running_vcpu, vcpu); kvm_arch_sched_in(vcpu, cpu); - kvm_arch_vcpu_load(vcpu, cpu); } @@ -4398,6 +4403,25 @@ static void kvm_sched_out(struct preempt_notifier *pn, WRITE_ONCE(vcpu->ready, true); } kvm_arch_vcpu_put(vcpu); + __this_cpu_write(kvm_running_vcpu, NULL); +} + +/** + * kvm_get_running_vcpu - get the vcpu running on the current CPU. + * Thanks to preempt notifiers, this can also be called from + * preemptible context. + */ +struct kvm_vcpu *kvm_get_running_vcpu(void) +{ + return __this_cpu_read(kvm_running_vcpu); +} + +/** + * kvm_get_running_vcpus - get the per-CPU array of currently running vcpus. + */ +struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) +{ + return &kvm_running_vcpu; } static void check_processor_compat(void *rtn) diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c index 43de8ae78fa1..28fda42e471b 100644 --- a/virt/lib/irqbypass.c +++ b/virt/lib/irqbypass.c @@ -85,6 +85,7 @@ int irq_bypass_register_producer(struct irq_bypass_producer *producer) { struct irq_bypass_producer *tmp; struct irq_bypass_consumer *consumer; + int ret; if (!producer->token) return -EINVAL; @@ -98,20 +99,16 @@ int irq_bypass_register_producer(struct irq_bypass_producer *producer) list_for_each_entry(tmp, &producers, node) { if (tmp->token == producer->token) { - mutex_unlock(&lock); - module_put(THIS_MODULE); - return -EBUSY; + ret = -EBUSY; + goto out_err; } } list_for_each_entry(consumer, &consumers, node) { if (consumer->token == producer->token) { - int ret = __connect(producer, consumer); - if (ret) { - mutex_unlock(&lock); - module_put(THIS_MODULE); - return ret; - } + ret = __connect(producer, consumer); + if (ret) + goto out_err; break; } } @@ -121,6 +118,10 @@ int irq_bypass_register_producer(struct irq_bypass_producer *producer) mutex_unlock(&lock); return 0; +out_err: + mutex_unlock(&lock); + module_put(THIS_MODULE); + return ret; } EXPORT_SYMBOL_GPL(irq_bypass_register_producer); @@ -179,6 +180,7 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer) { struct irq_bypass_consumer *tmp; struct irq_bypass_producer *producer; + int ret; if (!consumer->token || !consumer->add_producer || !consumer->del_producer) @@ -193,20 +195,16 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer) list_for_each_entry(tmp, &consumers, node) { if (tmp->token == consumer->token || tmp == consumer) { - mutex_unlock(&lock); - module_put(THIS_MODULE); - return -EBUSY; + ret = -EBUSY; + goto out_err; } } list_for_each_entry(producer, &producers, node) { if (producer->token == consumer->token) { - int ret = __connect(producer, consumer); - if (ret) { - mutex_unlock(&lock); - module_put(THIS_MODULE); - return ret; - } + ret = __connect(producer, consumer); + if (ret) + goto out_err; break; } } @@ -216,6 +214,10 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer) mutex_unlock(&lock); return 0; +out_err: + mutex_unlock(&lock); + module_put(THIS_MODULE); + return ret; } EXPORT_SYMBOL_GPL(irq_bypass_register_consumer); |