diff options
author | Olof Johansson <olof@lixom.net> | 2020-02-27 10:07:47 -0800 |
---|---|---|
committer | Olof Johansson <olof@lixom.net> | 2020-02-27 10:07:49 -0800 |
commit | f9a15f39e50db14d2227083e8fe142b1262386f6 (patch) | |
tree | d07909f760cf247e5f4661a71221fe6e93a6ec03 /arch | |
parent | c689300b9c76c1cdf80547251721704421dbcd5a (diff) | |
parent | b83685bceedbeed33a6adc2d0579a011708d2b18 (diff) |
Merge tag 'tee-amdtee-fix-for-5.6' of https://git.linaro.org/people/jens.wiklander/linux-tee into arm/fixes
Fix AMDTEE memory leak in amdtee_open_session()
* tag 'tee-amdtee-fix-for-5.6' of https://git.linaro.org/people/jens.wiklander/linux-tee: (344 commits)
tee: amdtee: fix memory leak in amdtee_open_session()
Linux 5.6-rc2
ext4: improve explanation of a mount failure caused by a misconfigured kernel
Input: cyapa - replace zero-length array with flexible-array member
Input: tca6416-keypad - replace zero-length array with flexible-array member
Input: gpio_keys_polled - replace zero-length array with flexible-array member
IB/mlx5: Use div64_u64 for num_var_hw_entries calculation
nvme: fix the parameter order for nvme_get_log in nvme_get_fw_slot_info
nvme/pci: move cqe check after device shutdown
nvme: prevent warning triggered by nvme_stop_keep_alive
nvme/tcp: fix bug on double requeue when send fails
cifs: make sure we do not overflow the max EA buffer size
cifs: enable change notification for SMB2.1 dialect
netdevice.h: fix all kernel-doc and Sphinx warnings
net: dsa: tag_ar9331: Make sure there is headroom for tag
net: dsa: tag_qca: Make sure there is headroom for tag
net, ip6_tunnel: enhance tunnel locate with link check
net/smc: no peer ID in CLC decline for SMCD
net/smc: transfer fasync_list in case of fallback
net: hns3: fix a copying IPv6 address error in hclge_fd_get_flow_tuples()
...
Link: https://lore.kernel.org/r/20200227165205.GA7926@jade
Signed-off-by: Olof Johansson <olof@lixom.net>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/kernel/ftrace.c | 7 | ||||
-rw-r--r-- | arch/arm/kernel/patch.c | 19 | ||||
-rw-r--r-- | arch/arm64/include/asm/exception.h | 4 | ||||
-rw-r--r-- | arch/arm64/include/asm/spinlock.h | 6 | ||||
-rw-r--r-- | arch/arm64/kernel/kaslr.c | 1 | ||||
-rw-r--r-- | arch/arm64/kernel/process.c | 13 | ||||
-rw-r--r-- | arch/arm64/kernel/time.c | 2 | ||||
-rw-r--r-- | arch/s390/boot/uv.c | 3 | ||||
-rw-r--r-- | arch/s390/include/asm/timex.h | 2 | ||||
-rw-r--r-- | arch/x86/events/amd/core.c | 1 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 1 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c | 22 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 2 | ||||
-rw-r--r-- | arch/x86/events/msr.c | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 16 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 13 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 11 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/paging_tmpl.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 33 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 42 |
23 files changed, 143 insertions, 70 deletions
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 2a5ff69c28e6..10499d44964a 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -78,13 +78,10 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old, { unsigned long replaced; - if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) { + if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) old = __opcode_to_mem_thumb32(old); - new = __opcode_to_mem_thumb32(new); - } else { + else old = __opcode_to_mem_arm(old); - new = __opcode_to_mem_arm(new); - } if (validate) { if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE)) diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c index d0a05a3bdb96..e9e828b6bb30 100644 --- a/arch/arm/kernel/patch.c +++ b/arch/arm/kernel/patch.c @@ -16,10 +16,10 @@ struct patch { unsigned int insn; }; +#ifdef CONFIG_MMU static DEFINE_RAW_SPINLOCK(patch_lock); static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) - __acquires(&patch_lock) { unsigned int uintaddr = (uintptr_t) addr; bool module = !core_kernel_text(uintaddr); @@ -34,8 +34,6 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) if (flags) raw_spin_lock_irqsave(&patch_lock, *flags); - else - __acquire(&patch_lock); set_fixmap(fixmap, page_to_phys(page)); @@ -43,15 +41,19 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) } static void __kprobes patch_unmap(int fixmap, unsigned long *flags) - __releases(&patch_lock) { clear_fixmap(fixmap); if (flags) raw_spin_unlock_irqrestore(&patch_lock, *flags); - else - __release(&patch_lock); } +#else +static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) +{ + return addr; +} +static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { } +#endif void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap) { @@ -64,8 +66,6 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap) if (remap) waddr = patch_map(addr, FIX_TEXT_POKE0, &flags); - else - __acquire(&patch_lock); if (thumb2 && __opcode_is_thumb16(insn)) { *(u16 *)waddr = __opcode_to_mem_thumb16(insn); @@ -102,8 +102,7 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap) if (waddr != addr) { flush_kernel_vmap_range(waddr, twopage ? size / 2 : size); patch_unmap(FIX_TEXT_POKE0, &flags); - } else - __release(&patch_lock); + } flush_icache_range((uintptr_t)(addr), (uintptr_t)(addr) + size); diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index b87c6e276ab1..7a6e81ca23a8 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -33,7 +33,6 @@ static inline u32 disr_to_esr(u64 disr) asmlinkage void enter_from_user_mode(void); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); -void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr); void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, @@ -47,7 +46,4 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); void do_cp15instr(unsigned int esr, struct pt_regs *regs); void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); -void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, - struct pt_regs *regs); - #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 102404dc1e13..9083d6992603 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -18,6 +18,10 @@ * See: * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net */ -#define vcpu_is_preempted(cpu) false +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + return false; +} #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 53b8a4ee64ff..91a83104c6e8 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <linux/types.h> +#include <asm/archrandom.h> #include <asm/cacheflush.h> #include <asm/fixmap.h> #include <asm/kernel-pgtable.h> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index bbb0f0c145f6..00626057a384 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -466,6 +466,13 @@ static void ssbs_thread_switch(struct task_struct *next) if (unlikely(next->flags & PF_KTHREAD)) return; + /* + * If all CPUs implement the SSBS extension, then we just need to + * context-switch the PSTATE field. + */ + if (cpu_have_feature(cpu_feature(SSBS))) + return; + /* If the mitigation is enabled, then we leave SSBS clear. */ if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || test_tsk_thread_flag(next, TIF_SSBD)) @@ -608,8 +615,6 @@ long get_tagged_addr_ctrl(void) * only prevents the tagged address ABI enabling via prctl() and does not * disable it for tasks that already opted in to the relaxed ABI. */ -static int zero; -static int one = 1; static struct ctl_table tagged_addr_sysctl_table[] = { { @@ -618,8 +623,8 @@ static struct ctl_table tagged_addr_sysctl_table[] = { .data = &tagged_addr_disabled, .maxlen = sizeof(int), .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { } }; diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 73f06d4b3aae..eebbc8d7123e 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -23,7 +23,7 @@ #include <linux/irq.h> #include <linux/delay.h> #include <linux/clocksource.h> -#include <linux/clk-provider.h> +#include <linux/of_clk.h> #include <linux/acpi.h> #include <clocksource/arm_arch_timer.h> diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index ed007f4a6444..3f501159ee9f 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -15,7 +15,8 @@ void uv_query_info(void) if (!test_facility(158)) return; - if (uv_call(0, (uint64_t)&uvcb)) + /* rc==0x100 means that there is additional data we do not process */ + if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100) return; if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) && diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 670f14a228e5..6bf3a45ccfec 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -155,7 +155,7 @@ static inline void get_tod_clock_ext(char *clk) static inline unsigned long long get_tod_clock(void) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; + char clk[STORE_CLOCK_EXT_SIZE]; get_tod_clock_ext(clk); return *((unsigned long long *)&clk[1]); diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 1f22b6bbda68..39eb276d0277 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -250,6 +250,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 3be51aa06e67..dff6623804c2 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4765,6 +4765,7 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_ATOM_TREMONT_D: + case INTEL_FAM6_ATOM_TREMONT: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index e1daf4151e11..4814c964692c 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -40,17 +40,18 @@ * Model specific counters: * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 - * Available model: SLM,AMT,GLM,CNL + * Available model: SLM,AMT,GLM,CNL,TNT * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM, - * CNL,KBL,CML + * CNL,KBL,CML,TNT * Scope: Core * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, - * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL + * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL, + * TNT * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 @@ -60,17 +61,18 @@ * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, - * KBL,CML,ICL,TGL + * KBL,CML,ICL,TGL,TNT * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, - * GLM,CNL,KBL,CML,ICL,TGL + * GLM,CNL,KBL,CML,ICL,TGL,TNT * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW - * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, + * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL, + * TNT * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 @@ -87,7 +89,8 @@ * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 - * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL + * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, + * TNT * Scope: Package (physical package) * */ @@ -640,8 +643,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_D, glm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT_D, glm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT, glm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates), diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 4b94ae4ae369..dc43cc124e09 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1714,6 +1714,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) old = ((s64)(prev_raw_count << shift) >> shift); local64_add(new - old + count * period, &event->count); + local64_set(&hwc->period_left, -new); + perf_event_update_userpage(event); return 0; diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 6f86650b3f77..a949f6f55991 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -75,8 +75,9 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_ATOM_GOLDMONT_D: - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + case INTEL_FAM6_ATOM_TREMONT_D: + case INTEL_FAM6_ATOM_TREMONT: case INTEL_FAM6_XEON_PHI_KNL: case INTEL_FAM6_XEON_PHI_KNM: diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4dffbc10d3f8..40a0c0fd95ca 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -781,9 +781,19 @@ struct kvm_vcpu_arch { u64 msr_kvm_poll_control; /* - * Indicate whether the access faults on its page table in guest - * which is set when fix page fault and used to detect unhandeable - * instruction. + * Indicates the guest is trying to write a gfn that contains one or + * more of the PTEs used to translate the write itself, i.e. the access + * is changing its own translation in the guest page tables. KVM exits + * to userspace if emulation of the faulting instruction fails and this + * flag is set, as KVM cannot make forward progress. + * + * If emulation fails for a write to guest page tables, KVM unprotects + * (zaps) the shadow page for the target gfn and resumes the guest to + * retry the non-emulatable instruction (on hardware). Unprotecting the + * gfn doesn't allow forward progress for a self-changing access because + * doing so also zaps the translation for the gfn, i.e. retrying the + * instruction will hit a !PRESENT fault, which results in a new shadow + * page and sends KVM back to square one. */ bool write_fault_to_shadow_pgtable; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index eafc631d305c..afcd30d44cbb 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1080,9 +1080,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, result = 1; /* assumes that there are only KVM_APIC_INIT/SIPI */ apic->pending_events = (1UL << KVM_APIC_INIT); - /* make sure pending_events is visible before sending - * the request */ - smp_wmb(); kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index d55674f44a18..a647601c9e1c 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -102,6 +102,19 @@ static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu) kvm_get_active_pcid(vcpu)); } +int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, + bool prefault); + +static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + u32 err, bool prefault) +{ +#ifdef CONFIG_RETPOLINE + if (likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault)) + return kvm_tdp_page_fault(vcpu, cr2_or_gpa, err, prefault); +#endif + return vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, err, prefault); +} + /* * Currently, we have two sorts of write-protection, a) the first one * write-protects guest page to sync the guest modification, b) another one is diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 7011a4e54866..87e9ba27ada1 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4219,8 +4219,8 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, } EXPORT_SYMBOL_GPL(kvm_handle_page_fault); -static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, - bool prefault) +int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, + bool prefault) { int max_level; @@ -4925,7 +4925,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) return; context->mmu_role.as_u64 = new_role.as_u64; - context->page_fault = tdp_page_fault; + context->page_fault = kvm_tdp_page_fault; context->sync_page = nonpaging_sync_page; context->invlpg = nonpaging_invlpg; context->update_pte = nonpaging_update_pte; @@ -5436,9 +5436,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, } if (r == RET_PF_INVALID) { - r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, - lower_32_bits(error_code), - false); + r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, + lower_32_bits(error_code), false); WARN_ON(r == RET_PF_INVALID); } diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 4e1ef0473663..e4c8a4cbf407 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -33,7 +33,7 @@ #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 - #define PT_MAX_FULL_LEVELS 4 + #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL #define CMPXCHG cmpxchg #else #define CMPXCHG cmpxchg64 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a3e32d61d60c..bef0ba35f121 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2175,7 +2175,6 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; - vcpu->arch.microcode_version = 0x01000065; svm->spec_ctrl = 0; svm->virt_spec_ctrl = 0; @@ -2266,6 +2265,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) init_vmcb(svm); svm_init_osvw(vcpu); + vcpu->arch.microcode_version = 0x01000065; return 0; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 657c2eda357c..3589cd3c0fcc 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -544,7 +544,8 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, } } -static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) { +static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) +{ int msr; for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { @@ -1981,7 +1982,7 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, } /* - * Clean fields data can't de used on VMLAUNCH and when we switch + * Clean fields data can't be used on VMLAUNCH and when we switch * between different L2 guests as KVM keeps a single VMCS12 per L1. */ if (from_launch || evmcs_gpa_changed) @@ -3575,6 +3576,33 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual); } +/* + * Returns true if a debug trap is pending delivery. + * + * In KVM, debug traps bear an exception payload. As such, the class of a #DB + * exception may be inferred from the presence of an exception payload. + */ +static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.exception.pending && + vcpu->arch.exception.nr == DB_VECTOR && + vcpu->arch.exception.payload; +} + +/* + * Certain VM-exits set the 'pending debug exceptions' field to indicate a + * recognized #DB (data or single-step) that has yet to be delivered. Since KVM + * represents these debug traps with a payload that is said to be compatible + * with the 'pending debug exceptions' field, write the payload to the VMCS + * field if a VM-exit is delivered before the debug trap. + */ +static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu) +{ + if (vmx_pending_dbg_trap(vcpu)) + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, + vcpu->arch.exception.payload); +} + static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3587,6 +3615,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) test_bit(KVM_APIC_INIT, &apic->pending_events)) { if (block_nested_events) return -EBUSY; + nested_vmx_update_pending_dbg(vcpu); clear_bit(KVM_APIC_INIT, &apic->pending_events); nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); return 0; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9a6664886f2e..3be25ecae145 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2947,6 +2947,9 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static int get_ept_level(struct kvm_vcpu *vcpu) { + /* Nested EPT currently only supports 4-level walks. */ + if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) + return 4; if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) return 5; return 4; @@ -4238,7 +4241,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->msr_ia32_umwait_control = 0; - vcpu->arch.microcode_version = 0x100000000ULL; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); vmx->hv_deadline_tsc = -1; kvm_set_cr8(vcpu, 0); @@ -6763,6 +6765,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; + vcpu->arch.microcode_version = 0x100000000ULL; vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED; /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fbabb2f06273..fb5d64ebc35d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -438,6 +438,14 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu) * for #DB exceptions under VMX. */ vcpu->arch.dr6 ^= payload & DR6_RTM; + + /* + * The #DB payload is defined as compatible with the 'pending + * debug exceptions' field under VMX, not DR6. While bit 12 is + * defined in the 'pending debug exceptions' field (enabled + * breakpoint), it is reserved and must be zero in DR6. + */ + vcpu->arch.dr6 &= ~BIT(12); break; case PF_VECTOR: vcpu->arch.cr2 = payload; @@ -490,19 +498,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, vcpu->arch.exception.error_code = error_code; vcpu->arch.exception.has_payload = has_payload; vcpu->arch.exception.payload = payload; - /* - * In guest mode, payload delivery should be deferred, - * so that the L1 hypervisor can intercept #PF before - * CR2 is modified (or intercept #DB before DR6 is - * modified under nVMX). However, for ABI - * compatibility with KVM_GET_VCPU_EVENTS and - * KVM_SET_VCPU_EVENTS, we can't delay payload - * delivery unless userspace has enabled this - * functionality via the per-VM capability, - * KVM_CAP_EXCEPTION_PAYLOAD. - */ - if (!vcpu->kvm->arch.exception_payload_enabled || - !is_guest_mode(vcpu)) + if (!is_guest_mode(vcpu)) kvm_deliver_exception_payload(vcpu); return; } @@ -2448,7 +2444,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.tsc_timestamp = tsc_timestamp; vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; vcpu->last_guest_tsc = tsc_timestamp; - WARN_ON(vcpu->hv_clock.system_time < 0); + WARN_ON((s64)vcpu->hv_clock.system_time < 0); /* If the host uses TSC clocksource, then it is stable */ pvclock_flags = 0; @@ -3796,6 +3792,21 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, process_nmi(vcpu); /* + * In guest mode, payload delivery should be deferred, + * so that the L1 hypervisor can intercept #PF before + * CR2 is modified (or intercept #DB before DR6 is + * modified under nVMX). Unless the per-VM capability, + * KVM_CAP_EXCEPTION_PAYLOAD, is set, we may not defer the delivery of + * an exception payload and handle after a KVM_GET_VCPU_EVENTS. Since we + * opportunistically defer the exception payload, deliver it if the + * capability hasn't been requested before processing a + * KVM_GET_VCPU_EVENTS. + */ + if (!vcpu->kvm->arch.exception_payload_enabled && + vcpu->arch.exception.pending && vcpu->arch.exception.has_payload) + kvm_deliver_exception_payload(vcpu); + + /* * The API doesn't provide the instruction length for software * exceptions, so don't report them. As long as the guest RIP * isn't advanced, we should expect to encounter the exception @@ -8942,7 +8953,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); - kvm_make_request(KVM_REQ_EVENT, vcpu); return 1; } EXPORT_SYMBOL_GPL(kvm_task_switch); @@ -10182,7 +10192,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu)) return; - vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true); + kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true); } static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) |