diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-06-14 07:57:18 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-06-14 07:57:18 -0700 |
commit | 24625f7d91fb86b91e14749633a7f022f5866116 (patch) | |
tree | 037124099338938a72c0981e94b3788f6ef21ceb /arch | |
parent | 8e8afafb0b5571b7cb10b529dc60cadb7241bed4 (diff) | |
parent | e0f3f46e42064a51573914766897b4ab95d943e3 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
"While last week's pull request contained miscellaneous fixes for x86,
this one covers other architectures, selftests changes, and a bigger
series for APIC virtualization bugs that were discovered during 5.20
development. The idea is to base 5.20 development for KVM on top of
this tag.
ARM64:
- Properly reset the SVE/SME flags on vcpu load
- Fix a vgic-v2 regression regarding accessing the pending state of a
HW interrupt from userspace (and make the code common with vgic-v3)
- Fix access to the idreg range for protected guests
- Ignore 'kvm-arm.mode=protected' when using VHE
- Return an error from kvm_arch_init_vm() on allocation failure
- A bunch of small cleanups (comments, annotations, indentation)
RISC-V:
- Typo fix in arch/riscv/kvm/vmid.c
- Remove broken reference pattern from MAINTAINERS entry
x86-64:
- Fix error in page tables with MKTME enabled
- Dirty page tracking performance test extended to running a nested
guest
- Disable APICv/AVIC in cases that it cannot implement correctly"
[ This merge also fixes a misplaced end parenthesis bug introduced in
commit 3743c2f02517 ("KVM: x86: inhibit APICv/AVIC on changes to APIC
ID or APIC base") pointed out by Sean Christopherson ]
Link: https://lore.kernel.org/all/20220610191813.371682-1-seanjc@google.com/
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (34 commits)
KVM: selftests: Restrict test region to 48-bit physical addresses when using nested
KVM: selftests: Add option to run dirty_log_perf_test vCPUs in L2
KVM: selftests: Clean up LIBKVM files in Makefile
KVM: selftests: Link selftests directly with lib object files
KVM: selftests: Drop unnecessary rule for STATIC_LIBS
KVM: selftests: Add a helper to check EPT/VPID capabilities
KVM: selftests: Move VMX_EPT_VPID_CAP_AD_BITS to vmx.h
KVM: selftests: Refactor nested_map() to specify target level
KVM: selftests: Drop stale function parameter comment for nested_map()
KVM: selftests: Add option to create 2M and 1G EPT mappings
KVM: selftests: Replace x86_page_size with PG_LEVEL_XX
KVM: x86: SVM: fix nested PAUSE filtering when L0 intercepts PAUSE
KVM: x86: SVM: drop preempt-safe wrappers for avic_vcpu_load/put
KVM: x86: disable preemption around the call to kvm_arch_vcpu_{un|}blocking
KVM: x86: disable preemption while updating apicv inhibition
KVM: x86: SVM: fix avic_kick_target_vcpus_fast
KVM: x86: SVM: remove avic's broken code that updated APIC ID
KVM: x86: inhibit APICv/AVIC on changes to APIC ID or APIC base
KVM: x86: document AVIC/APICv inhibit reasons
KVM: x86/mmu: Set memory encryption "value", not "mask", in shadow PDPTRs
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 5 | ||||
-rw-r--r-- | arch/arm64/include/asm/virt.h | 3 | ||||
-rw-r--r-- | arch/arm64/kernel/cpufeature.c | 10 | ||||
-rw-r--r-- | arch/arm64/kvm/arch_timer.c | 3 | ||||
-rw-r--r-- | arch/arm64/kvm/arm.c | 10 | ||||
-rw-r--r-- | arch/arm64/kvm/fpsimd.c | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/mem_protect.c | 4 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/sys_regs.c | 42 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-mmio-v2.c | 4 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-mmio-v3.c | 40 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-mmio.c | 40 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-mmio.h | 3 | ||||
-rw-r--r-- | arch/arm64/kvm/vmid.c | 2 | ||||
-rw-r--r-- | arch/riscv/kvm/vmid.c | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 67 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 27 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/avic.c | 171 | ||||
-rw-r--r-- | arch/x86/kvm/svm/nested.c | 39 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 2 |
23 files changed, 293 insertions, 201 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 47a1e25e25bb..de32152cea04 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -363,11 +363,6 @@ struct kvm_vcpu_arch { struct kvm_pmu pmu; /* - * Anything that is not used directly from assembly code goes - * here. - */ - - /* * Guest registers we preserve during guest debugging. * * These shadow registers are updated by the kvm_handle_sys_reg diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 3c8af033a997..0e80db4327b6 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -113,6 +113,9 @@ static __always_inline bool has_vhe(void) /* * Code only run in VHE/NVHE hyp context can assume VHE is present or * absent. Otherwise fall back to caps. + * This allows the compiler to discard VHE-specific code from the + * nVHE object, reducing the number of external symbol references + * needed to link. */ if (is_vhe_hyp_code()) return true; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 42ea2bd856c6..79fac13ab2ef 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1974,15 +1974,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) #ifdef CONFIG_KVM static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) { - if (kvm_get_mode() != KVM_MODE_PROTECTED) - return false; - - if (is_kernel_in_hyp_mode()) { - pr_warn("Protected KVM not available with VHE\n"); - return false; - } - - return true; + return kvm_get_mode() == KVM_MODE_PROTECTED; } #endif /* CONFIG_KVM */ diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 4e39ace073af..3b8d062e30ea 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1230,6 +1230,9 @@ bool kvm_arch_timer_get_input_level(int vintid) struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); struct arch_timer_context *timer; + if (WARN(!vcpu, "No vcpu context!\n")) + return false; + if (vintid == vcpu_vtimer(vcpu)->irq.irq) timer = vcpu_vtimer(vcpu); else if (vintid == vcpu_ptimer(vcpu)->irq.irq) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 400bb0fe2745..a0188144a122 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -150,8 +150,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; - if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) + if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) { + ret = -ENOMEM; goto out_free_stage2_pgd; + } cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask); kvm_vgic_early_init(kvm); @@ -2271,7 +2273,11 @@ static int __init early_kvm_mode_cfg(char *arg) return -EINVAL; if (strcmp(arg, "protected") == 0) { - kvm_mode = KVM_MODE_PROTECTED; + if (!is_kernel_in_hyp_mode()) + kvm_mode = KVM_MODE_PROTECTED; + else + pr_warn_once("Protected KVM not available with VHE\n"); + return 0; } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 3d251a4d2cf7..6012b08ecb14 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -80,6 +80,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; vcpu->arch.flags |= KVM_ARM64_FP_HOST; + vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED; if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; @@ -93,6 +94,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * operations. Do this for ZA as well for now for simplicity. */ if (system_supports_sme()) { + vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED; if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 78edf077fa3b..1e78acf9662e 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -314,15 +314,11 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot) { - hyp_assert_lock_held(&host_kvm.lock); - return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); } int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) { - hyp_assert_lock_held(&host_kvm.lock); - return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, addr, size, &host_s2_pool, owner_id); } diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index b6d86e423319..35a4331ba5f3 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -243,15 +243,9 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) case SYS_ID_AA64MMFR2_EL1: return get_pvm_id_aa64mmfr2(vcpu); default: - /* - * Should never happen because all cases are covered in - * pvm_sys_reg_descs[]. - */ - WARN_ON(1); - break; + /* Unhandled ID register, RAZ */ + return 0; } - - return 0; } static u64 read_id_reg(const struct kvm_vcpu *vcpu, @@ -332,6 +326,16 @@ static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu, /* Mark the specified system register as an AArch64 feature id register. */ #define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 } +/* + * sys_reg_desc initialiser for architecturally unallocated cpufeature ID + * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2 + * (1 <= crm < 8, 0 <= Op2 < 8). + */ +#define ID_UNALLOCATED(crm, op2) { \ + Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \ + .access = pvm_access_id_aarch64, \ +} + /* Mark the specified system register as Read-As-Zero/Write-Ignored */ #define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi } @@ -375,24 +379,46 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { AARCH32(SYS_MVFR0_EL1), AARCH32(SYS_MVFR1_EL1), AARCH32(SYS_MVFR2_EL1), + ID_UNALLOCATED(3,3), AARCH32(SYS_ID_PFR2_EL1), AARCH32(SYS_ID_DFR1_EL1), AARCH32(SYS_ID_MMFR5_EL1), + ID_UNALLOCATED(3,7), /* AArch64 ID registers */ /* CRm=4 */ AARCH64(SYS_ID_AA64PFR0_EL1), AARCH64(SYS_ID_AA64PFR1_EL1), + ID_UNALLOCATED(4,2), + ID_UNALLOCATED(4,3), AARCH64(SYS_ID_AA64ZFR0_EL1), + ID_UNALLOCATED(4,5), + ID_UNALLOCATED(4,6), + ID_UNALLOCATED(4,7), AARCH64(SYS_ID_AA64DFR0_EL1), AARCH64(SYS_ID_AA64DFR1_EL1), + ID_UNALLOCATED(5,2), + ID_UNALLOCATED(5,3), AARCH64(SYS_ID_AA64AFR0_EL1), AARCH64(SYS_ID_AA64AFR1_EL1), + ID_UNALLOCATED(5,6), + ID_UNALLOCATED(5,7), AARCH64(SYS_ID_AA64ISAR0_EL1), AARCH64(SYS_ID_AA64ISAR1_EL1), + AARCH64(SYS_ID_AA64ISAR2_EL1), + ID_UNALLOCATED(6,3), + ID_UNALLOCATED(6,4), + ID_UNALLOCATED(6,5), + ID_UNALLOCATED(6,6), + ID_UNALLOCATED(6,7), AARCH64(SYS_ID_AA64MMFR0_EL1), AARCH64(SYS_ID_AA64MMFR1_EL1), AARCH64(SYS_ID_AA64MMFR2_EL1), + ID_UNALLOCATED(7,3), + ID_UNALLOCATED(7,4), + ID_UNALLOCATED(7,5), + ID_UNALLOCATED(7,6), + ID_UNALLOCATED(7,7), /* Scalable Vector Registers are restricted. */ diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c index 77a67e9d3d14..e070cda86e12 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -429,11 +429,11 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, vgic_mmio_read_pending, vgic_mmio_write_spending, - NULL, vgic_uaccess_write_spending, 1, + vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, vgic_mmio_read_pending, vgic_mmio_write_cpending, - NULL, vgic_uaccess_write_cpending, 1, + vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, vgic_mmio_read_active, vgic_mmio_write_sactive, diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index f7aa7bcd6fb8..f15e29cc63ce 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -353,42 +353,6 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu, return 0; } -static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - u32 value = 0; - int i; - - /* - * pending state of interrupt is latched in pending_latch variable. - * Userspace will save and restore pending state and line_level - * separately. - * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst - * for handling of ISPENDR and ICPENDR. - */ - for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - bool state = irq->pending_latch; - - if (irq->hw && vgic_irq_is_sgi(irq->intid)) { - int err; - - err = irq_get_irqchip_state(irq->host_irq, - IRQCHIP_STATE_PENDING, - &state); - WARN_ON(err); - } - - if (state) - value |= (1U << i); - - vgic_put_irq(vcpu->kvm, irq); - } - - return value; -} - static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) @@ -666,7 +630,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, vgic_mmio_read_pending, vgic_mmio_write_spending, - vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, + vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, vgic_mmio_read_pending, vgic_mmio_write_cpending, @@ -750,7 +714,7 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0, vgic_mmio_read_pending, vgic_mmio_write_spending, - vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, + vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0, vgic_mmio_read_pending, vgic_mmio_write_cpending, diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index 49837d3a3ef5..997d0fce2088 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -226,8 +226,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, return 0; } -unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) +static unsigned long __read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + bool is_user) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); u32 value = 0; @@ -239,6 +240,15 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, unsigned long flags; bool val; + /* + * When used from userspace with a GICv3 model: + * + * Pending state of interrupt is latched in pending_latch + * variable. Userspace will save and restore pending state + * and line_level separately. + * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst + * for handling of ISPENDR and ICPENDR. + */ raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw && vgic_irq_is_sgi(irq->intid)) { int err; @@ -248,10 +258,20 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, IRQCHIP_STATE_PENDING, &val); WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); - } else if (vgic_irq_is_mapped_level(irq)) { + } else if (!is_user && vgic_irq_is_mapped_level(irq)) { val = vgic_get_phys_line_level(irq); } else { - val = irq_is_pending(irq); + switch (vcpu->kvm->arch.vgic.vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V3: + if (is_user) { + val = irq->pending_latch; + break; + } + fallthrough; + default: + val = irq_is_pending(irq); + break; + } } value |= ((u32)val << i); @@ -263,6 +283,18 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, return value; } +unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __read_pending(vcpu, addr, len, false); +} + +unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __read_pending(vcpu, addr, len, true); +} + static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) { return (vgic_irq_is_sgi(irq->intid) && diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h index 3fa696f198a3..6082d4b66d39 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.h +++ b/arch/arm64/kvm/vgic/vgic-mmio.h @@ -149,6 +149,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); +unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c index 8d5f0506fd87..d78ae63d7c15 100644 --- a/arch/arm64/kvm/vmid.c +++ b/arch/arm64/kvm/vmid.c @@ -66,7 +66,7 @@ static void flush_context(void) * the next context-switch, we broadcast TLB flush + I-cache * invalidation over the inner shareable domain on rollover. */ - kvm_call_hyp(__kvm_flush_vm_context); + kvm_call_hyp(__kvm_flush_vm_context); } static bool check_update_reserved_vmid(u64 vmid, u64 newvmid) diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c index 9f764df125db..6cd93995fb65 100644 --- a/arch/riscv/kvm/vmid.c +++ b/arch/riscv/kvm/vmid.c @@ -97,7 +97,7 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu) * We ran out of VMIDs so we increment vmid_version and * start assigning VMIDs from 1. * - * This also means existing VMIDs assignement to all Guest + * This also means existing VMIDs assignment to all Guest * instances is invalid and we have force VMID re-assignement * for all Guest instances. The Guest instances that were not * running will automatically pick-up new VMIDs because will diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3a240a64ac68..9217bd6cf0d1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1047,14 +1047,77 @@ struct kvm_x86_msr_filter { }; enum kvm_apicv_inhibit { + + /********************************************************************/ + /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */ + /********************************************************************/ + + /* + * APIC acceleration is disabled by a module parameter + * and/or not supported in hardware. + */ APICV_INHIBIT_REASON_DISABLE, + + /* + * APIC acceleration is inhibited because AutoEOI feature is + * being used by a HyperV guest. + */ APICV_INHIBIT_REASON_HYPERV, + + /* + * APIC acceleration is inhibited because the userspace didn't yet + * enable the kernel/split irqchip. + */ + APICV_INHIBIT_REASON_ABSENT, + + /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ + * (out of band, debug measure of blocking all interrupts on this vCPU) + * was enabled, to avoid AVIC/APICv bypassing it. + */ + APICV_INHIBIT_REASON_BLOCKIRQ, + + /* + * For simplicity, the APIC acceleration is inhibited + * first time either APIC ID or APIC base are changed by the guest + * from their reset values. + */ + APICV_INHIBIT_REASON_APIC_ID_MODIFIED, + APICV_INHIBIT_REASON_APIC_BASE_MODIFIED, + + /******************************************************/ + /* INHIBITs that are relevant only to the AMD's AVIC. */ + /******************************************************/ + + /* + * AVIC is inhibited on a vCPU because it runs a nested guest. + * + * This is needed because unlike APICv, the peers of this vCPU + * cannot use the doorbell mechanism to signal interrupts via AVIC when + * a vCPU runs nested. + */ APICV_INHIBIT_REASON_NESTED, + + /* + * On SVM, the wait for the IRQ window is implemented with pending vIRQ, + * which cannot be injected when the AVIC is enabled, thus AVIC + * is inhibited while KVM waits for IRQ window. + */ APICV_INHIBIT_REASON_IRQWIN, + + /* + * PIT (i8254) 're-inject' mode, relies on EOI intercept, + * which AVIC doesn't support for edge triggered interrupts. + */ APICV_INHIBIT_REASON_PIT_REINJ, + + /* + * AVIC is inhibited because the guest has x2apic in its CPUID. + */ APICV_INHIBIT_REASON_X2APIC, - APICV_INHIBIT_REASON_BLOCKIRQ, - APICV_INHIBIT_REASON_ABSENT, + + /* + * AVIC is disabled because SEV doesn't support it. + */ APICV_INHIBIT_REASON_SEV, }; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f1bdac3f5aa8..0e68b4c937fc 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2039,6 +2039,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) } } +static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic) +{ + struct kvm *kvm = apic->vcpu->kvm; + + if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm)) + return; + + if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id) + return; + + kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED); +} + static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) { int ret = 0; @@ -2047,10 +2060,12 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) switch (reg) { case APIC_ID: /* Local APIC ID */ - if (!apic_x2apic_mode(apic)) + if (!apic_x2apic_mode(apic)) { kvm_apic_set_xapic_id(apic, val >> 24); - else + kvm_lapic_xapic_id_updated(apic); + } else { ret = 1; + } break; case APIC_TASKPRI: @@ -2336,8 +2351,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) MSR_IA32_APICBASE_BASE; if ((value & MSR_IA32_APICBASE_ENABLE) && - apic->base_address != APIC_DEFAULT_PHYS_BASE) - pr_warn_once("APIC base relocation is unsupported by KVM"); + apic->base_address != APIC_DEFAULT_PHYS_BASE) { + kvm_set_apicv_inhibit(apic->vcpu->kvm, + APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); + } } void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) @@ -2648,6 +2665,8 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); } + } else { + kvm_lapic_xapic_id_updated(vcpu->arch.apic); } return 0; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e826ee9138fa..17252f39bd7c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3411,7 +3411,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT), i << 30, PT32_ROOT_LEVEL, true); mmu->pae_root[i] = root | PT_PRESENT_MASK | - shadow_me_mask; + shadow_me_value; } mmu->root.hpa = __pa(mmu->pae_root); } else { diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 54fe03714f8a..d1bc5820ea46 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -291,58 +291,91 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu) static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source, u32 icrl, u32 icrh, u32 index) { - u32 dest, apic_id; - struct kvm_vcpu *vcpu; + u32 l1_physical_id, dest; + struct kvm_vcpu *target_vcpu; int dest_mode = icrl & APIC_DEST_MASK; int shorthand = icrl & APIC_SHORT_MASK; struct kvm_svm *kvm_svm = to_kvm_svm(kvm); - u32 *avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page); if (shorthand != APIC_DEST_NOSHORT) return -EINVAL; - /* - * The AVIC incomplete IPI #vmexit info provides index into - * the physical APIC ID table, which can be used to derive - * guest physical APIC ID. - */ + if (apic_x2apic_mode(source)) + dest = icrh; + else + dest = GET_APIC_DEST_FIELD(icrh); + if (dest_mode == APIC_DEST_PHYSICAL) { - apic_id = index; + /* broadcast destination, use slow path */ + if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST) + return -EINVAL; + if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST) + return -EINVAL; + + l1_physical_id = dest; + + if (WARN_ON_ONCE(l1_physical_id != index)) + return -EINVAL; + } else { - if (!apic_x2apic_mode(source)) { - /* For xAPIC logical mode, the index is for logical APIC table. */ - apic_id = avic_logical_id_table[index] & 0x1ff; + u32 bitmap, cluster; + int logid_index; + + if (apic_x2apic_mode(source)) { + /* 16 bit dest mask, 16 bit cluster id */ + bitmap = dest & 0xFFFF0000; + cluster = (dest >> 16) << 4; + } else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) { + /* 8 bit dest mask*/ + bitmap = dest; + cluster = 0; } else { - return -EINVAL; + /* 4 bit desk mask, 4 bit cluster id */ + bitmap = dest & 0xF; + cluster = (dest >> 4) << 2; } - } - /* - * Assuming vcpu ID is the same as physical apic ID, - * and use it to retrieve the target vCPU. - */ - vcpu = kvm_get_vcpu_by_id(kvm, apic_id); - if (!vcpu) - return -EINVAL; + if (unlikely(!bitmap)) + /* guest bug: nobody to send the logical interrupt to */ + return 0; - if (apic_x2apic_mode(vcpu->arch.apic)) - dest = icrh; - else - dest = GET_APIC_DEST_FIELD(icrh); + if (!is_power_of_2(bitmap)) + /* multiple logical destinations, use slow path */ + return -EINVAL; - /* - * Try matching the destination APIC ID with the vCPU. - */ - if (kvm_apic_match_dest(vcpu, source, shorthand, dest, dest_mode)) { - vcpu->arch.apic->irr_pending = true; - svm_complete_interrupt_delivery(vcpu, - icrl & APIC_MODE_MASK, - icrl & APIC_INT_LEVELTRIG, - icrl & APIC_VECTOR_MASK); - return 0; + logid_index = cluster + __ffs(bitmap); + + if (apic_x2apic_mode(source)) { + l1_physical_id = logid_index; + } else { + u32 *avic_logical_id_table = + page_address(kvm_svm->avic_logical_id_table_page); + + u32 logid_entry = avic_logical_id_table[logid_index]; + + if (WARN_ON_ONCE(index != logid_index)) + return -EINVAL; + + /* guest bug: non existing/reserved logical destination */ + if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK))) + return 0; + + l1_physical_id = logid_entry & + AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; + } } - return -EINVAL; + target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id); + if (unlikely(!target_vcpu)) + /* guest bug: non existing vCPU is a target of this IPI*/ + return 0; + + target_vcpu->arch.apic->irr_pending = true; + svm_complete_interrupt_delivery(target_vcpu, + icrl & APIC_MODE_MASK, + icrl & APIC_INT_LEVELTRIG, + icrl & APIC_VECTOR_MASK); + return 0; } static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source, @@ -508,35 +541,6 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) return ret; } -static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) -{ - u64 *old, *new; - struct vcpu_svm *svm = to_svm(vcpu); - u32 id = kvm_xapic_id(vcpu->arch.apic); - - if (vcpu->vcpu_id == id) - return 0; - - old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id); - new = avic_get_physical_id_entry(vcpu, id); - if (!new || !old) - return 1; - - /* We need to move physical_id_entry to new offset */ - *new = *old; - *old = 0ULL; - to_svm(vcpu)->avic_physical_id_cache = new; - - /* - * Also update the guest physical APIC ID in the logical - * APIC ID table entry if already setup the LDR. - */ - if (svm->ldr_reg) - avic_handle_ldr_update(vcpu); - - return 0; -} - static void avic_handle_dfr_update(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -555,10 +559,6 @@ static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu) AVIC_UNACCEL_ACCESS_OFFSET_MASK; switch (offset) { - case APIC_ID: - if (avic_handle_apic_id_update(vcpu)) - return 0; - break; case APIC_LDR: if (avic_handle_ldr_update(vcpu)) return 0; @@ -650,8 +650,6 @@ int avic_init_vcpu(struct vcpu_svm *svm) void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu) { - if (avic_handle_apic_id_update(vcpu) != 0) - return; avic_handle_dfr_update(vcpu); avic_handle_ldr_update(vcpu); } @@ -910,7 +908,9 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) BIT(APICV_INHIBIT_REASON_PIT_REINJ) | BIT(APICV_INHIBIT_REASON_X2APIC) | BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | - BIT(APICV_INHIBIT_REASON_SEV); + BIT(APICV_INHIBIT_REASON_SEV) | + BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | + BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); return supported & BIT(reason); } @@ -946,7 +946,7 @@ out: return ret; } -void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; int h_physical_id = kvm_cpu_get_apicid(cpu); @@ -978,7 +978,7 @@ void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); } -void __avic_vcpu_put(struct kvm_vcpu *vcpu) +void avic_vcpu_put(struct kvm_vcpu *vcpu) { u64 entry; struct vcpu_svm *svm = to_svm(vcpu); @@ -997,25 +997,6 @@ void __avic_vcpu_put(struct kvm_vcpu *vcpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } -static void avic_vcpu_load(struct kvm_vcpu *vcpu) -{ - int cpu = get_cpu(); - - WARN_ON(cpu != vcpu->cpu); - - __avic_vcpu_load(vcpu, cpu); - - put_cpu(); -} - -static void avic_vcpu_put(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - - __avic_vcpu_put(vcpu); - - preempt_enable(); -} void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { @@ -1042,7 +1023,7 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) vmcb_mark_dirty(vmcb, VMCB_AVIC); if (activated) - avic_vcpu_load(vcpu); + avic_vcpu_load(vcpu, vcpu->cpu); else avic_vcpu_put(vcpu); @@ -1075,5 +1056,5 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu) if (!kvm_vcpu_apicv_active(vcpu)) return; - avic_vcpu_load(vcpu); + avic_vcpu_load(vcpu, vcpu->cpu); } diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 3361258640a2..ba7cd26f438f 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -616,6 +616,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; struct vmcb *vmcb01 = svm->vmcb01.ptr; struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; + u32 pause_count12; + u32 pause_thresh12; /* * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2, @@ -671,27 +673,25 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) if (!nested_vmcb_needs_vls_intercept(svm)) vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; + pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0; + pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0; if (kvm_pause_in_guest(svm->vcpu.kvm)) { - /* use guest values since host doesn't use them */ - vmcb02->control.pause_filter_count = - svm->pause_filter_enabled ? - svm->nested.ctl.pause_filter_count : 0; + /* use guest values since host doesn't intercept PAUSE */ + vmcb02->control.pause_filter_count = pause_count12; + vmcb02->control.pause_filter_thresh = pause_thresh12; - vmcb02->control.pause_filter_thresh = - svm->pause_threshold_enabled ? - svm->nested.ctl.pause_filter_thresh : 0; - - } else if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) { - /* use host values when guest doesn't use them */ + } else { + /* start from host values otherwise */ vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count; vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh; - } else { - /* - * Intercept every PAUSE otherwise and - * ignore both host and guest values - */ - vmcb02->control.pause_filter_count = 0; - vmcb02->control.pause_filter_thresh = 0; + + /* ... but ensure filtering is disabled if so requested. */ + if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) { + if (!pause_count12) + vmcb02->control.pause_filter_count = 0; + if (!pause_thresh12) + vmcb02->control.pause_filter_thresh = 0; + } } nested_svm_transition_tlb_flush(vcpu); @@ -951,8 +951,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb12->control.event_inj = svm->nested.ctl.event_inj; vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; - if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02->control.pause_filter_count) + if (!kvm_pause_in_guest(vcpu->kvm)) { vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count; + vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); + + } nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1dc02cdf6960..87da90360bc7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -921,7 +921,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count; - if (kvm_pause_in_guest(vcpu->kvm) || !old) + if (kvm_pause_in_guest(vcpu->kvm)) return; control->pause_filter_count = __grow_ple_window(old, @@ -942,7 +942,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count; - if (kvm_pause_in_guest(vcpu->kvm) || !old) + if (kvm_pause_in_guest(vcpu->kvm)) return; control->pause_filter_count = @@ -1400,13 +1400,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) indirect_branch_prediction_barrier(); } if (kvm_vcpu_apicv_active(vcpu)) - __avic_vcpu_load(vcpu, cpu); + avic_vcpu_load(vcpu, cpu); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) { if (kvm_vcpu_apicv_active(vcpu)) - __avic_vcpu_put(vcpu); + avic_vcpu_put(vcpu); svm_prepare_host_switch(vcpu); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 500348c1cb35..1bddd336a27e 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -610,8 +610,8 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb); int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu); int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu); int avic_init_vcpu(struct vcpu_svm *svm); -void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -void __avic_vcpu_put(struct kvm_vcpu *vcpu); +void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void avic_vcpu_put(struct kvm_vcpu *vcpu); void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu); void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu); void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6df17ef81905..3a919e49129b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7779,7 +7779,9 @@ static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | BIT(APICV_INHIBIT_REASON_ABSENT) | BIT(APICV_INHIBIT_REASON_HYPERV) | - BIT(APICV_INHIBIT_REASON_BLOCKIRQ); + BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | + BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | + BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); return supported & BIT(reason); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5e78d5889e5e..1910e1e78b15 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9853,6 +9853,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) return; down_read(&vcpu->kvm->arch.apicv_update_lock); + preempt_disable(); activate = kvm_vcpu_apicv_activated(vcpu); @@ -9873,6 +9874,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_EVENT, vcpu); out: + preempt_enable(); up_read(&vcpu->kvm->arch.apicv_update_lock); } EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); |