From 9d609649bb29a882e7e6bf502adb45dcac178fe5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 15 Apr 2019 15:14:32 +0200 Subject: KVM: vmx: print more APICv fields in dump_vmcs The SVI, RVI, virtual-APIC page address and APIC-access page address fields were left out of dump_vmcs. Add them. KERN_CONT technically isn't SMP safe, but it's okay to use it here since the whole of dump_vmcs() is a single huge multi-line piece of output that isn't SMP-safe. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b4e7d645275a..92bd511076f6 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5723,8 +5723,16 @@ void dump_vmcs(void) if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) pr_err("TSC Multiplier = 0x%016llx\n", vmcs_read64(TSC_MULTIPLIER)); - if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) - pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); + if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) { + if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { + u16 status = vmcs_read16(GUEST_INTR_STATUS); + pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff); + } + pr_err(KERN_CONT "TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); + if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) + pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR)); + pr_err(KERN_CONT "virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR)); + } if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) -- cgit v1.2.3-58-ga151 From f16cb57be82b20c6a00a6bb1750cb77c08e56c1a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 10 Apr 2019 11:38:30 +0200 Subject: KVM: x86: clear VM_EXIT_SAVE_IA32_PAT This is not needed, PAT writes always take an MSR vmexit. Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 92bd511076f6..a8a1e533d7fb 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2288,7 +2288,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; #endif opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | - VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_CLEAR_BNDCFGS | -- cgit v1.2.3-58-ga151 From 674ea351cdeb01d2740edce31db7f2d79ce6095d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 10 Apr 2019 11:41:40 +0200 Subject: KVM: x86: optimize check for valid PAT value This check will soon be done on every nested vmentry and vmexit, "parallelize" it using bitwise operations. Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mtrr.c | 10 +--------- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/x86.h | 10 ++++++++++ 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index e9ea2d45ae66..9f72cc427158 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -48,11 +48,6 @@ static bool msr_mtrr_valid(unsigned msr) return false; } -static bool valid_pat_type(unsigned t) -{ - return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ -} - static bool valid_mtrr_type(unsigned t) { return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ @@ -67,10 +62,7 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) return false; if (msr == MSR_IA32_CR_PAT) { - for (i = 0; i < 8; i++) - if (!valid_pat_type((data >> (i * 8)) & 0xff)) - return false; - return true; + return kvm_pat_valid(data); } else if (msr == MSR_MTRRdefType) { if (data & ~0xcff) return false; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a8a1e533d7fb..c40fb667002c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1891,7 +1891,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { - if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) + if (!kvm_pat_valid(data)) return 1; vmcs_write64(GUEST_IA32_PAT, data); vcpu->arch.pat = data; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index aedc5d0d4989..eda954441213 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -347,6 +347,16 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) __this_cpu_write(current_vcpu, NULL); } + +static inline bool kvm_pat_valid(u64 data) +{ + if (data & 0xF8F8F8F8F8F8F8F8ull) + return false; + /* 0, 1, 4, 5, 6, 7 are valid values. */ + return (data | ((data & 0x0202020202020202ull) << 1)) == data; +} + void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu); void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu); + #endif -- cgit v1.2.3-58-ga151 From f6b0db1fdafad5fdbe10a4a3559da42d286435e6 Mon Sep 17 00:00:00 2001 From: Krish Sadhukhan Date: Mon, 8 Apr 2019 17:35:11 -0400 Subject: kvm: nVMX: Check "load IA32_PAT" VM-exit control on vmentry According to section "Checks on Host Control Registers and MSRs" in Intel SDM vol 3C, the following check is performed on vmentry: If the "load IA32_PAT" VM-exit control is 1, the value of the field for the IA32_PAT MSR must be one that could be written by WRMSR without fault at CPL 0. Specifically, each of the 8 bytes in the field must have one of the values 0 (UC), 1 (WC), 4 (WT), 5 (WP), 6 (WB), or 7 (UC-). Signed-off-by: Krish Sadhukhan Reviewed-by: Karl Heubaum Suggested-by: Sean Christopherson Reviewed-by: Sean Christopherson Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 6401eb7ef19c..37cd5ebac4fb 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2606,6 +2606,10 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)) return -EINVAL; + if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) && + !kvm_pat_valid(vmcs12->host_ia32_pat)) + return -EINVAL; + /* * If the load IA32_EFER VM-exit control is 1, bits reserved in the * IA32_EFER MSR must be 0 in the field for that register. In addition, -- cgit v1.2.3-58-ga151 From de2bc2bfdf419ad9078736d88cae72beae972a59 Mon Sep 17 00:00:00 2001 From: Krish Sadhukhan Date: Mon, 8 Apr 2019 17:35:12 -0400 Subject: kvm: nVMX: Check "load IA32_PAT" VM-entry control on vmentry According to section "Checking and Loading Guest State" in Intel SDM vol 3C, the following check is performed on vmentry: If the "load IA32_PAT" VM-entry control is 1, the value of the field for the IA32_PAT MSR must be one that could be written by WRMSR without fault at CPL 0. Specifically, each of the 8 bytes in the field must have one of the values 0 (UC), 1 (WC), 4 (WT), 5 (WP), 6 (WB), or 7 (UC-). Signed-off-by: Krish Sadhukhan Reviewed-by: Karl Heubaum Suggested-by: Sean Christopherson Reviewed-by: Sean Christopherson Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 37cd5ebac4fb..8dc6a43cfdf3 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2696,6 +2696,10 @@ static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu, !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) return 1; + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && + !kvm_pat_valid(vmcs12->guest_ia32_pat)) + return 1; + if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) { *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR; return 1; -- cgit v1.2.3-58-ga151 From 9c3e922ba316a5d3d8cbe41e0db97888fca5c359 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 Apr 2019 12:18:05 -0700 Subject: KVM: nVMX: Move guest non-reg state checks to VM-Exit path Per Intel's SDM, volume 3, section Checking and Loading Guest State: Because the checking and the loading occur concurrently, a failure may be discovered only after some state has been loaded. For this reason, the logical processor responds to such failures by loading state from the host-state area, as it would for a VM exit. In other words, a failed non-register state consistency check results in a VM-Exit, not VM-Fail. Moving the non-reg state checks also paves the way for renaming nested_vmx_check_vmentry_postreqs() to align with the SDM, i.e. nested_vmx_check_vmentry_guest_state(). Fixes: 26539bd0e446a ("KVM: nVMX: check vmcs12 for valid activity state") Signed-off-by: Sean Christopherson Reviewed-by: Krish Sadhukhan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 8dc6a43cfdf3..a02d2e3ded33 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2628,18 +2628,6 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, return 0; } -/* - * Checks related to Guest Non-register State - */ -static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) -{ - if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && - vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) - return -EINVAL; - - return 0; -} - static int nested_vmx_check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -2651,9 +2639,6 @@ static int nested_vmx_check_vmentry_prereqs(struct kvm_vcpu *vcpu, if (nested_check_host_control_regs(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; - if (nested_check_guest_non_reg_state(vmcs12)) - return VMXERR_ENTRY_INVALID_CONTROL_FIELD; - return 0; } @@ -2684,6 +2669,18 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, return r; } +/* + * Checks related to Guest Non-register State + */ +static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) +{ + if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && + vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) + return -EINVAL; + + return 0; +} + static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, u32 *exit_qual) @@ -2729,6 +2726,9 @@ static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu, (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) return 1; + if (nested_check_guest_non_reg_state(vmcs12)) + return 1; + return 0; } -- cgit v1.2.3-58-ga151 From 5478ba349f3f71f8d306cddaed33fa0527cc7b16 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 Apr 2019 12:18:06 -0700 Subject: KVM: nVMX: Rename and split top-level consistency checks to match SDM Rename the top-level consistency check functions to (loosely) align with the SDM. Historically, KVM has used the terms "prereq" and "postreq" to differentiate between consistency checks that lead to VM-Fail and those that lead to VM-Exit. The terms are vague and potentially misleading, e.g. "postreq" might be interpreted as occurring after VM-Entry. Note, while the SDM lumps controls and host state into a single section, "Checks on VMX Controls and Host-State Area", split them into separate top-level functions as the two categories of checks result in different VM instruction errors. This split will allow for additional cleanup. Note #2, "vmentry" is intentionally dropped from the new function names to avoid confusion with nested_check_vm_entry_controls(), and to keep the length of the functions names somewhat manageable. Suggested-by: Paolo Bonzini Signed-off-by: Sean Christopherson Reviewed-by: Krish Sadhukhan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index a02d2e3ded33..97cc17effce2 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2589,6 +2589,17 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, return 0; } +static int nested_vmx_check_controls(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + if (nested_check_vm_execution_controls(vcpu, vmcs12) || + nested_check_vm_exit_controls(vcpu, vmcs12) || + nested_check_vm_entry_controls(vcpu, vmcs12)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + return 0; +} + /* * Checks related to Host Control Registers and MSRs */ @@ -2628,14 +2639,9 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, return 0; } -static int nested_vmx_check_vmentry_prereqs(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) +static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) { - if (nested_check_vm_execution_controls(vcpu, vmcs12) || - nested_check_vm_exit_controls(vcpu, vmcs12) || - nested_check_vm_entry_controls(vcpu, vmcs12)) - return VMXERR_ENTRY_INVALID_CONTROL_FIELD; - if (nested_check_host_control_regs(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; @@ -2681,9 +2687,9 @@ static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) return 0; } -static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12, - u32 *exit_qual) +static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12, + u32 *exit_qual) { bool ia32e; @@ -3008,7 +3014,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) return -1; } - if (nested_vmx_check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) + if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) goto vmentry_fail_vmexit; } @@ -3153,7 +3159,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS : VMXERR_VMRESUME_NONLAUNCHED_VMCS); - ret = nested_vmx_check_vmentry_prereqs(vcpu, vmcs12); + ret = nested_vmx_check_controls(vcpu, vmcs12); + if (ret) + return nested_vmx_failValid(vcpu, ret); + + ret = nested_vmx_check_host_state(vcpu, vmcs12); if (ret) return nested_vmx_failValid(vcpu, ret); @@ -5488,8 +5498,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; } - if (nested_vmx_check_vmentry_prereqs(vcpu, vmcs12) || - nested_vmx_check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) + if (nested_vmx_check_controls(vcpu, vmcs12) || + nested_vmx_check_host_state(vcpu, vmcs12) || + nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) return -EINVAL; vmx->nested.dirty_vmcs12 = true; -- cgit v1.2.3-58-ga151 From 98d9e858fa966bd7132cc21d65e4c89a97f4fe2d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 12 Apr 2019 10:19:57 +0200 Subject: KVM: nVMX: Return -EINVAL when signaling failure in pre-VM-Entry helpers Convert all top-level nested VM-Enter consistency check functions to return 0/-EINVAL instead of failure codes, since now they can only ever return one failure code. This also does not give the false impression that failure information is always consumed and/or relevant, e.g. vmx_set_nested_state() only cares whether or not the checks were successful. nested_check_host_control_regs() can also now be inlined into its caller, nested_vmx_check_host_state, since the two have effectively become the same function. Based on a patch by Sean Christopherson. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 97cc17effce2..56c22e5c96c3 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2595,16 +2595,13 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu, if (nested_check_vm_execution_controls(vcpu, vmcs12) || nested_check_vm_exit_controls(vcpu, vmcs12) || nested_check_vm_entry_controls(vcpu, vmcs12)) - return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + return -EINVAL; return 0; } -/* - * Checks related to Host Control Registers and MSRs - */ -static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) +static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) { bool ia32e; @@ -2639,15 +2636,6 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, return 0; } -static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) -{ - if (nested_check_host_control_regs(vcpu, vmcs12)) - return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; - - return 0; -} - static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -3159,13 +3147,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS : VMXERR_VMRESUME_NONLAUNCHED_VMCS); - ret = nested_vmx_check_controls(vcpu, vmcs12); - if (ret) - return nested_vmx_failValid(vcpu, ret); + if (nested_vmx_check_controls(vcpu, vmcs12)) + return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); - ret = nested_vmx_check_host_state(vcpu, vmcs12); - if (ret) - return nested_vmx_failValid(vcpu, ret); + if (nested_vmx_check_host_state(vcpu, vmcs12)) + return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); /* * We're finally done with prerequisite checking, and can start with -- cgit v1.2.3-58-ga151 From c80add0f487e28c48e855245189dc28bd3d5c250 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 Apr 2019 12:18:09 -0700 Subject: KVM: nVMX: Return -EINVAL when signaling failure in VM-Entry helpers Most, but not all, helpers that are related to emulating consistency checks for nested VM-Entry return -EINVAL when a check fails. Convert the holdouts to have consistency throughout and to make it clear that the functions are signaling pass/fail as opposed to "resume guest" vs. "exit to userspace". Opportunistically fix bad indentation in nested_vmx_check_guest_state(). Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 56c22e5c96c3..b46136b099b8 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -930,7 +930,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { if (!nested_cr3_valid(vcpu, cr3)) { *entry_failure_code = ENTRY_FAIL_DEFAULT; - return 1; + return -EINVAL; } /* @@ -941,7 +941,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne !nested_ept) { if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) { *entry_failure_code = ENTRY_FAIL_PDPTE; - return 1; + return -EINVAL; } } } @@ -2373,13 +2373,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, */ if (vmx->emulation_required) { *entry_failure_code = ENTRY_FAIL_DEFAULT; - return 1; + return -EINVAL; } /* Shadow page tables on either EPT or shadow page tables. */ if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), entry_failure_code)) - return 1; + return -EINVAL; if (!enable_ept) vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; @@ -2685,15 +2685,15 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) || !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) - return 1; + return -EINVAL; if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && !kvm_pat_valid(vmcs12->guest_ia32_pat)) - return 1; + return -EINVAL; if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) { *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR; - return 1; + return -EINVAL; } /* @@ -2712,16 +2712,16 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || ((vmcs12->guest_cr0 & X86_CR0_PG) && ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) - return 1; + return -EINVAL; } if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && - (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || - (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) - return 1; + (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || + (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) + return -EINVAL; if (nested_check_guest_non_reg_state(vmcs12)) - return 1; + return -EINVAL; return 0; } -- cgit v1.2.3-58-ga151 From 11988499e62b310f3bf6f6d0a807a06d3f9ccc96 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 2 Apr 2019 08:19:15 -0700 Subject: KVM: x86: Skip EFER vs. guest CPUID checks for host-initiated writes KVM allows userspace to violate consistency checks related to the guest's CPUID model to some degree. Generally speaking, userspace has carte blanche when it comes to guest state so long as jamming invalid state won't negatively affect the host. Currently this is seems to be a non-issue as most of the interesting EFER checks are missing, e.g. NX and LME, but those will be added shortly. Proactively exempt userspace from the CPUID checks so as not to break userspace. Note, the efer_reserved_bits check still applies to userspace writes as that mask reflects the host's capabilities, e.g. KVM shouldn't allow a guest to run with NX=1 if it has been disabled in the host. Fixes: d80174745ba39 ("KVM: SVM: Only allow setting of EFER_SVME when CPUID SVM is set") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a0d1fc80ac5a..5d7dcd06d08a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1258,31 +1258,42 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) return 0; } -bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) +static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) { - if (efer & efer_reserved_bits) - return false; - if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) - return false; + return false; if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) - return false; + return false; return true; + +} +bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) +{ + if (efer & efer_reserved_bits) + return false; + + return __kvm_valid_efer(vcpu, efer); } EXPORT_SYMBOL_GPL(kvm_valid_efer); -static int set_efer(struct kvm_vcpu *vcpu, u64 efer) +static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u64 old_efer = vcpu->arch.efer; + u64 efer = msr_info->data; - if (!kvm_valid_efer(vcpu, efer)) - return 1; + if (efer & efer_reserved_bits) + return false; - if (is_paging(vcpu) - && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) - return 1; + if (!msr_info->host_initiated) { + if (!__kvm_valid_efer(vcpu, efer)) + return 1; + + if (is_paging(vcpu) && + (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) + return 1; + } efer &= ~EFER_LMA; efer |= vcpu->arch.efer & EFER_LMA; @@ -2452,7 +2463,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.arch_capabilities = data; break; case MSR_EFER: - return set_efer(vcpu, data); + return set_efer(vcpu, msr_info); case MSR_K7_HWCR: data &= ~(u64)0x40; /* ignore flush filter disable */ data &= ~(u64)0x100; /* ignore ignne emulation enable */ -- cgit v1.2.3-58-ga151 From 0a62956312e9dcd0ce5c59be4f0a8d8292a62402 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 2 Apr 2019 08:19:16 -0700 Subject: KVM: x86: Inject #GP if guest attempts to set unsupported EFER bits EFER.LME and EFER.NX are considered reserved if their respective feature bits are not advertised to the guest. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5d7dcd06d08a..38440316a806 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1266,6 +1266,13 @@ static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) return false; + if (efer & (EFER_LME | EFER_LMA) && + !guest_cpuid_has(vcpu, X86_FEATURE_LM)) + return false; + + if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX)) + return false; + return true; } -- cgit v1.2.3-58-ga151 From c110ae578ca0a10064dfbda3d786d6a733b9fe69 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 28 Mar 2019 17:24:03 +0100 Subject: kvm: move KVM_CAP_NR_MEMSLOTS to common code All architectures except MIPS were defining it in the same way, and memory slots are handled entirely by common code so there is no point in keeping the definition per-architecture. Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 5 ++--- arch/powerpc/kvm/powerpc.c | 3 --- arch/s390/kvm/kvm-s390.c | 3 --- arch/x86/kvm/x86.c | 3 --- virt/kvm/arm/arm.c | 3 --- virt/kvm/kvm_main.c | 2 ++ 6 files changed, 4 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 67068c47c591..b62ad0d94234 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1117,9 +1117,8 @@ struct kvm_userspace_memory_region { This ioctl allows the user to create, modify or delete a guest physical memory slot. Bits 0-15 of "slot" specify the slot id and this value should be less than the maximum number of user memory slots supported per -VM. The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS, -if this capability is supported by the architecture. Slots may not -overlap in guest physical address space. +VM. The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS. +Slots may not overlap in guest physical address space. If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot" specifies the address space which is being modified. They must be diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8885377ec3e0..92910b7c5bcc 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -644,9 +644,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else r = num_online_cpus(); break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4638303ba6a8..28f35d2b06cb 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -513,9 +513,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else if (sclp.has_esca && sclp.has_64bscao) r = KVM_S390_ESCA_CPU_SLOTS; break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_S390_COW: r = MACHINE_HAS_ESOP; break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 38440316a806..6c27d224f744 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3093,9 +3093,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_PV_MMU: /* obsolete */ r = 0; break; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 99c37384ba7b..be4ec5f3ba5f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -224,9 +224,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_MSI_DEVID: if (!kvm) r = -EINVAL; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dc8edc97ba85..684b67252cd5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3063,6 +3063,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #endif case KVM_CAP_MAX_VCPU_ID: return KVM_MAX_VCPU_ID; + case KVM_CAP_NR_MEMSLOTS: + return KVM_USER_MEM_SLOTS; default: break; } -- cgit v1.2.3-58-ga151 From 8479e04e7d6b1974629a0f657afa8ec5f17d2e90 Mon Sep 17 00:00:00 2001 From: Luwei Kang Date: Mon, 18 Feb 2019 19:26:07 -0500 Subject: KVM: x86: Inject PMI for KVM guest Inject a PMI for KVM guest when Intel PT working in Host-Guest mode and Guest ToPA entry memory buffer was completely filled. Signed-off-by: Luwei Kang Signed-off-by: Paolo Bonzini --- arch/x86/events/intel/core.c | 6 +++++- arch/x86/include/asm/msr-index.h | 4 ++++ arch/x86/kvm/x86.c | 10 ++++++++++ include/linux/perf_event.h | 1 + 4 files changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 8baa441d8000..386151b2c62f 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2307,7 +2307,11 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ if (__test_and_clear_bit(55, (unsigned long *)&status)) { handled++; - intel_pt_interrupt(); + if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() && + perf_guest_cbs->handle_intel_pt_intr)) + perf_guest_cbs->handle_intel_pt_intr(); + else + intel_pt_interrupt(); } /* diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ca5bc0eacb95..be40c094bc49 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -781,6 +781,10 @@ #define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 +/* PERF_GLOBAL_OVF_CTL bits */ +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55 +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT) + /* Geode defined MSRs */ #define MSR_GEODE_BUSCONT_CONF0 0x00001900 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c27d224f744..cedd396e3003 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6839,10 +6839,20 @@ static unsigned long kvm_get_guest_ip(void) return ip; } +static void kvm_handle_intel_pt_intr(void) +{ + struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu); + + kvm_make_request(KVM_REQ_PMI, vcpu); + __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT, + (unsigned long *)&vcpu->arch.pmu.global_status); +} + static struct perf_guest_info_callbacks kvm_guest_cbs = { .is_in_guest = kvm_is_in_guest, .is_user_mode = kvm_is_user_mode, .get_guest_ip = kvm_get_guest_ip, + .handle_intel_pt_intr = kvm_handle_intel_pt_intr, }; static void kvm_set_mmio_spte_mask(void) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e47ef764f613..820c4ff31bc5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -30,6 +30,7 @@ struct perf_guest_info_callbacks { int (*is_in_guest)(void); int (*is_user_mode)(void); unsigned long (*get_guest_ip)(void); + void (*handle_intel_pt_intr)(void); }; #ifdef CONFIG_HAVE_HW_BREAKPOINT -- cgit v1.2.3-58-ga151 From c715eb9fe9027ed118630adb0d59acf36b848d4f Mon Sep 17 00:00:00 2001 From: Luwei Kang Date: Mon, 18 Feb 2019 19:26:08 -0500 Subject: KVM: x86: Add support of clear Trace_ToPA_PMI status Let guests clear the Intel PT ToPA PMI status (bit 55 of MSR_CORE_PERF_GLOBAL_OVF_CTRL). Signed-off-by: Luwei Kang Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/asm/msr-index.h | 4 ++++ arch/x86/kvm/vmx/pmu_intel.c | 8 +++++++- 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a9d03af34030..275990e3415b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -469,6 +469,7 @@ struct kvm_pmu { u64 global_ovf_ctrl; u64 counter_bitmask[2]; u64 global_ctrl_mask; + u64 global_ovf_ctrl_mask; u64 reserved_bits; u8 version; struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index be40c094bc49..3bf5d84bb64f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -784,6 +784,10 @@ /* PERF_GLOBAL_OVF_CTL bits */ #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55 #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT) +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT 62 +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT) +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT 63 +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT) /* Geode defined MSRs */ #define MSR_GEODE_BUSCONT_CONF0 0x00001900 diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 5ab4a364348e..f8502c376b37 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -227,7 +227,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { + if (!(data & pmu->global_ovf_ctrl_mask)) { if (!msr_info->host_initiated) pmu->global_status &= ~data; pmu->global_ovf_ctrl = data; @@ -297,6 +297,12 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); pmu->global_ctrl_mask = ~pmu->global_ctrl; + pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask + & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF | + MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD); + if (kvm_x86_ops->pt_supported()) + pmu->global_ovf_ctrl_mask &= + ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI; entry = kvm_find_cpuid_entry(vcpu, 7, 0); if (entry && -- cgit v1.2.3-58-ga151 From 6c6a2ab962af8f197984c45d585814f9839e86d5 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Mon, 15 Apr 2019 18:45:26 +0300 Subject: KVM: VMX: Nop emulation of MSR_IA32_POWER_CTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commits 668fffa3f838 ("kvm: better MWAIT emulation for guestsâ€) and 4d5422cea3b6 ("KVM: X86: Provide a capability to disable MWAIT interceptsâ€), KVM was modified to allow an admin to configure certain guests to execute MONITOR/MWAIT inside guest without being intercepted by host. This is useful in case admin wishes to allocate a dedicated logical processor for each vCPU thread. Thus, making it safe for guest to completely control the power-state of the logical processor. The ability to use this new KVM capability was introduced to QEMU by commits 6f131f13e68d ("kvm: support -overcommit cpu-pm=on|offâ€) and 2266d4431132 ("i386/cpu: make -cpu host support monitor/mwaitâ€). However, exposing MONITOR/MWAIT to a Linux guest may cause it's intel_idle kernel module to execute c1e_promotion_disable() which will attempt to RDMSR/WRMSR from/to MSR_IA32_POWER_CTL to manipulate the "C1E Enable" bit. This behaviour was introduced by commit 32e9518005c8 ("intel_idle: export both C1 and C1Eâ€). Becuase KVM doesn't emulate this MSR, running KVM with ignore_msrs=0 will cause the above guest behaviour to raise a #GP which will cause guest to kernel panic. Therefore, add support for nop emulation of MSR_IA32_POWER_CTL to avoid #GP in guest in this scenario. Future commits can optimise emulation further by reflecting guest MSR changes to host MSR to provide guest with the ability to fine-tune the dedicated logical processor power-state. Reviewed-by: Boris Ostrovsky Signed-off-by: Liran Alon Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 6 ++++++ arch/x86/kvm/vmx/vmx.h | 2 ++ arch/x86/kvm/x86.c | 1 + 3 files changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c40fb667002c..3fe2020e3bc4 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1692,6 +1692,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_SYSENTER_ESP: msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); break; + case MSR_IA32_POWER_CTL: + msr_info->data = vmx->msr_ia32_power_ctl; + break; case MSR_IA32_BNDCFGS: if (!kvm_mpx_supported() || (!msr_info->host_initiated && @@ -1822,6 +1825,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_SYSENTER_ESP: vmcs_writel(GUEST_SYSENTER_ESP, data); break; + case MSR_IA32_POWER_CTL: + vmx->msr_ia32_power_ctl = data; + break; case MSR_IA32_BNDCFGS: if (!kvm_mpx_supported() || (!msr_info->host_initiated && diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index f879529906b4..1e42f983e0f1 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -257,6 +257,8 @@ struct vcpu_vmx { unsigned long host_debugctlmsr; + u64 msr_ia32_power_ctl; + /* * Only bits masked by msr_ia32_feature_control_valid_bits can be set in * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cedd396e3003..c09507057743 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1170,6 +1170,7 @@ static u32 emulated_msrs[] = { MSR_PLATFORM_INFO, MSR_MISC_FEATURES_ENABLES, MSR_AMD64_VIRT_SPEC_CTRL, + MSR_IA32_POWER_CTL, }; static unsigned num_emulated_msrs; -- cgit v1.2.3-58-ga151 From ee66e453db13d4837a0dcf9d43efa7a88603161b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 16 Apr 2019 13:32:44 -0700 Subject: KVM: lapic: Busy wait for timer to expire when using hv_timer ...now that VMX's preemption timer, i.e. the hv_timer, also adjusts its programmed time based on lapic_timer_advance_ns. Without the delay, a guest can see a timer interrupt arrive before the requested time when KVM is using the hv_timer to emulate the guest's interrupt. Fixes: c5ce8235cffa0 ("KVM: VMX: Optimize tscdeadline timer latency") Cc: Cc: Wanpeng Li Reviewed-by: Liran Alon Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9bf70cf84564..7eef076bc5ee 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1455,7 +1455,7 @@ static void apic_timer_expired(struct kvm_lapic *apic) if (swait_active(q)) swake_up_one(q); - if (apic_lvtt_tscdeadline(apic)) + if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) ktimer->expired_tscdeadline = ktimer->tscdeadline; } -- cgit v1.2.3-58-ga151 From f1ba5cfbe47a90f801598a908fd2157bbab2ce1a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 16 Apr 2019 13:32:45 -0700 Subject: KVM: lapic: Explicitly cancel the hv timer if it's pre-expired Explicitly call cancel_hv_timer() instead of returning %false to coerce restart_apic_timer() into canceling it by way of start_sw_timer(). Functionally, the existing code is correct in the sense that it doesn't doing anything visibily wrong, e.g. generate spurious interrupts or miss an interrupt. But it's extremely confusing and inefficient, e.g. there are multiple extraneous calls to apic_timer_expired() that effectively get dropped due to @timer_pending being %true. Cc: Wanpeng Li Cc: Liran Alon Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 7eef076bc5ee..61d1cbab877e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1693,17 +1693,25 @@ static bool start_hv_timer(struct kvm_lapic *apic) hrtimer_cancel(&ktimer->timer); /* - * Also recheck ktimer->pending, in case the sw timer triggered in - * the window. For periodic timer, leave the hv timer running for - * simplicity, and the deadline will be recomputed on the next vmexit. + * To simplify handling the periodic timer, leave the hv timer running + * even if the deadline timer has expired, i.e. rely on the resulting + * VM-Exit to recompute the periodic timer's target expiration. */ - if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) { - if (r) + if (!apic_lvtt_period(apic)) { + /* + * Cancel the hv timer if the sw timer fired while the hv timer + * was being programmed, or if the hv timer itself expired. + */ + if (atomic_read(&ktimer->pending)) { + cancel_hv_timer(apic); + } else if (r) { apic_timer_expired(apic); - return false; + cancel_hv_timer(apic); + } } - trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true); + trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, ktimer->hv_timer_in_use); + return true; } -- cgit v1.2.3-58-ga151 From f99279825ee30b829da9d3b7cf0b9d1b9b2596e6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 16 Apr 2019 13:32:46 -0700 Subject: KVM: lapic: Refactor ->set_hv_timer to use an explicit expired param Refactor kvm_x86_ops->set_hv_timer to use an explicit parameter for stating that the timer has expired. Overloading the return value is unnecessarily clever, e.g. can lead to confusion over the proper return value from start_hv_timer() when r==1. Cc: Wanpeng Li Cc: Liran Alon Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/lapic.c | 10 +++++----- arch/x86/kvm/vmx/vmx.c | 6 ++++-- 3 files changed, 11 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 275990e3415b..8d68ba0cba0c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1168,7 +1168,8 @@ struct kvm_x86_ops { uint32_t guest_irq, bool set); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); - int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc); + int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, + bool *expired); void (*cancel_hv_timer)(struct kvm_vcpu *vcpu); void (*setup_mce)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 61d1cbab877e..0fd58571c453 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1673,7 +1673,8 @@ static void cancel_hv_timer(struct kvm_lapic *apic) static bool start_hv_timer(struct kvm_lapic *apic) { struct kvm_timer *ktimer = &apic->lapic_timer; - int r; + struct kvm_vcpu *vcpu = apic->vcpu; + bool expired; WARN_ON(preemptible()); if (!kvm_x86_ops->set_hv_timer) @@ -1685,8 +1686,7 @@ static bool start_hv_timer(struct kvm_lapic *apic) if (!ktimer->tscdeadline) return false; - r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline); - if (r < 0) + if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired)) return false; ktimer->hv_timer_in_use = true; @@ -1704,13 +1704,13 @@ static bool start_hv_timer(struct kvm_lapic *apic) */ if (atomic_read(&ktimer->pending)) { cancel_hv_timer(apic); - } else if (r) { + } else if (expired) { apic_timer_expired(apic); cancel_hv_timer(apic); } } - trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, ktimer->hv_timer_in_use); + trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use); return true; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3fe2020e3bc4..60e2f0afa4d6 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7041,7 +7041,8 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift, return 0; } -static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) +static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, + bool *expired) { struct vcpu_vmx *vmx; u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; @@ -7078,7 +7079,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) return -ERANGE; vmx->hv_deadline_tsc = tscl + delta_tsc; - return delta_tsc == 0; + *expired = !delta_tsc; + return 0; } static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) -- cgit v1.2.3-58-ga151 From 4ca88b3f86cd03deecd48ca9880a7c2e7c6fb788 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 16 Apr 2019 13:32:47 -0700 Subject: KVM: lapic: Check for a pending timer intr prior to start_hv_timer() Checking for a pending non-periodic interrupt in start_hv_timer() leads to restart_apic_timer() making an unnecessary call to start_sw_timer() due to start_hv_timer() returning false. Alternatively, start_hv_timer() could return %true when there is a pending non-periodic interrupt, but that approach is less intuitive, i.e. would require a beefy comment to explain an otherwise simple check. Cc: Liran Alon Cc: Wanpeng Li Suggested-by: Liran Alon Suggested-by: Paolo Bonzini Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0fd58571c453..4e000712cb82 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1680,9 +1680,6 @@ static bool start_hv_timer(struct kvm_lapic *apic) if (!kvm_x86_ops->set_hv_timer) return false; - if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) - return false; - if (!ktimer->tscdeadline) return false; @@ -1735,8 +1732,13 @@ static void start_sw_timer(struct kvm_lapic *apic) static void restart_apic_timer(struct kvm_lapic *apic) { preempt_disable(); + + if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending)) + goto out; + if (!start_hv_timer(apic)) start_sw_timer(apic); +out: preempt_enable(); } -- cgit v1.2.3-58-ga151 From 0967fa1cd3769207cf74ae84144ce47fec65a317 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 16 Apr 2019 13:32:48 -0700 Subject: KVM: VMX: Skip delta_tsc shift-and-divide if the dividend is zero Ten percent of nothin' is... let me do the math here. Nothin' into nothin', carry the nothin'... Cc: Wanpeng Li Reviewed-by: Liran Alon Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 60e2f0afa4d6..d8f101b58ab8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7063,10 +7063,9 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, /* Convert to host delta tsc if tsc scaling is enabled */ if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && - u64_shl_div_u64(delta_tsc, + delta_tsc && u64_shl_div_u64(delta_tsc, kvm_tsc_scaling_ratio_frac_bits, - vcpu->arch.tsc_scaling_ratio, - &delta_tsc)) + vcpu->arch.tsc_scaling_ratio, &delta_tsc)) return -ERANGE; /* -- cgit v1.2.3-58-ga151 From d6a85c322348aac02eeb11681c1f623145e66697 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Apr 2019 13:15:08 +0300 Subject: KVM: vmx: clean up some debug output Smatch complains about this: arch/x86/kvm/vmx/vmx.c:5730 dump_vmcs() warn: KERN_* level not at start of string The code should be using pr_cont() instead of pr_err(). Fixes: 9d609649bb29 ("KVM: vmx: print more APICv fields in dump_vmcs") Signed-off-by: Dan Carpenter Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d8f101b58ab8..835f642e70e2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5733,10 +5733,10 @@ void dump_vmcs(void) u16 status = vmcs_read16(GUEST_INTR_STATUS); pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff); } - pr_err(KERN_CONT "TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); + pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR)); - pr_err(KERN_CONT "virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR)); + pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR)); } if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); -- cgit v1.2.3-58-ga151 From 19e38336d71d22e27b60e70334ad572d4e3d0042 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 18 Apr 2019 08:07:40 -0700 Subject: KVM: VMX: Include architectural defs header in capabilities.h The capabilities header depends on asm/vmx.h but doesn't explicitly include said file. This currently doesn't cause problems as all users of capbilities.h first include asm/vmx.h, but the issue often results in build errors if someone starts moving things around the VMX files. Fixes: 3077c1910882 ("KVM: VMX: Move capabilities structs and helpers to dedicated file") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/capabilities.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 854e144131c6..d6664ee3d127 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -2,6 +2,8 @@ #ifndef __KVM_X86_VMX_CAPS_H #define __KVM_X86_VMX_CAPS_H +#include + #include "lapic.h" extern bool __read_mostly enable_vpid; -- cgit v1.2.3-58-ga151 From 191c8137a93989825f0e9f78a198367dde677216 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 18 Apr 2019 18:32:50 +0200 Subject: x86/kvm: Implement HWCR support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware configuration register has some useful bits which can be used by guests. Implement McStatusWrEn which can be used by guests when injecting MCEs with the in-kernel mce-inject module. For that, we need to set bit 18 - McStatusWrEn - first, before writing the MCi_STATUS registers (otherwise we #GP). Add the required machinery to do so. Signed-off-by: Borislav Petkov Cc: Jim Mattson Cc: Joerg Roedel Cc: KVM Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Sean Christopherson Cc: Tom Lendacky Cc: Tony Luck Cc: Yazen Ghannam Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/x86.c | 34 +++++++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8d68ba0cba0c..2532bfb09d8b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -781,6 +781,9 @@ struct kvm_vcpu_arch { /* Flush the L1 Data cache for L1TF mitigation on VMENTER */ bool l1tf_flush_l1d; + + /* AMD MSRC001_0015 Hardware Configuration */ + u64 msr_hwcr; }; struct kvm_lpage_info { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c09507057743..a7ea346fa41e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1171,6 +1171,8 @@ static u32 emulated_msrs[] = { MSR_MISC_FEATURES_ENABLES, MSR_AMD64_VIRT_SPEC_CTRL, MSR_IA32_POWER_CTL, + + MSR_K7_HWCR, }; static unsigned num_emulated_msrs; @@ -2294,6 +2296,18 @@ static void kvmclock_sync_fn(struct work_struct *work) KVMCLOCK_SYNC_PERIOD); } +/* + * On AMD, HWCR[McStatusWrEn] controls whether setting MCi_STATUS results in #GP. + */ +static bool can_set_mci_status(struct kvm_vcpu *vcpu) +{ + /* McStatusWrEn enabled? */ + if (guest_cpuid_is_amd(vcpu)) + return !!(vcpu->arch.msr_hwcr & BIT_ULL(18)); + + return false; +} + static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u64 mcg_cap = vcpu->arch.mcg_cap; @@ -2325,9 +2339,14 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if ((offset & 0x3) == 0 && data != 0 && (data | (1 << 10)) != ~(u64)0) return -1; + + /* MCi_STATUS */ if (!msr_info->host_initiated && - (offset & 0x3) == 1 && data != 0) - return -1; + (offset & 0x3) == 1 && data != 0) { + if (!can_set_mci_status(vcpu)) + return -1; + } + vcpu->arch.mce_banks[offset] = data; break; } @@ -2476,8 +2495,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) data &= ~(u64)0x40; /* ignore flush filter disable */ data &= ~(u64)0x100; /* ignore ignne emulation enable */ data &= ~(u64)0x8; /* ignore TLB cache disable */ - data &= ~(u64)0x40000; /* ignore Mc status write enable */ - if (data != 0) { + + /* Handle McStatusWrEn */ + if (data == BIT_ULL(18)) { + vcpu->arch.msr_hwcr = data; + } else if (data != 0) { vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", data); return 1; @@ -2751,7 +2773,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_K8_SYSCFG: case MSR_K8_TSEG_ADDR: case MSR_K8_TSEG_MASK: - case MSR_K7_HWCR: case MSR_VM_HSAVE_PA: case MSR_K8_INT_PENDING_MSG: case MSR_AMD64_NB_CFG: @@ -2915,6 +2936,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_MISC_FEATURES_ENABLES: msr_info->data = vcpu->arch.msr_misc_features_enables; break; + case MSR_K7_HWCR: + msr_info->data = vcpu->arch.msr_hwcr; + break; default: if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data); -- cgit v1.2.3-58-ga151 From 2e408936b6de5a7540880ae40bc7ce43102053ba Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:31 +0100 Subject: X86/nVMX: handle_vmon: Read 4 bytes from guest memory Read the data directly from guest memory instead of the map->read->unmap sequence. This also avoids using kvm_vcpu_gpa_to_page() and kmap() which assumes that there is a "struct page" for guest memory. Suggested-by: Jim Mattson Signed-off-by: KarimAllah Ahmed Reviewed-by: Jim Mattson Reviewed-by: David Hildenbrand Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index b46136b099b8..9b1973cc1e75 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4264,7 +4264,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) { int ret; gpa_t vmptr; - struct page *page; + uint32_t revision; struct vcpu_vmx *vmx = to_vmx(vcpu); const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; @@ -4313,18 +4313,10 @@ static int handle_vmon(struct kvm_vcpu *vcpu) if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) return nested_vmx_failInvalid(vcpu); - page = kvm_vcpu_gpa_to_page(vcpu, vmptr); - if (is_error_page(page)) + if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) || + revision != VMCS12_REVISION) return nested_vmx_failInvalid(vcpu); - if (*(u32 *)kmap(page) != VMCS12_REVISION) { - kunmap(page); - kvm_release_page_clean(page); - return nested_vmx_failInvalid(vcpu); - } - kunmap(page); - kvm_release_page_clean(page); - vmx->nested.vmxon_ptr = vmptr; ret = enter_vmx_operation(vcpu); if (ret) -- cgit v1.2.3-58-ga151 From 3d5f6beb74a00bcc308612ee007e3e0c6126c397 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:32 +0100 Subject: X86/nVMX: Update the PML table without mapping and unmapping the page Update the PML table without mapping and unmapping the page. This also avoids using kvm_vcpu_gpa_to_page(..) which assumes that there is a "struct page" for guest memory. As a side-effect of using kvm_write_guest_page the page is also properly marked as dirty. Signed-off-by: KarimAllah Ahmed Reviewed-by: David Hildenbrand Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 835f642e70e2..afe45a28020c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7116,9 +7116,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); - gpa_t gpa; - struct page *page = NULL; - u64 *pml_address; + gpa_t gpa, dst; if (is_guest_mode(vcpu)) { WARN_ON_ONCE(vmx->nested.pml_full); @@ -7138,15 +7136,13 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) } gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; + dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index; - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address); - if (is_error_page(page)) + if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa, + offset_in_page(dst), sizeof(gpa))) return 0; - pml_address = kmap(page); - pml_address[vmcs12->guest_pml_index--] = gpa; - kunmap(page); - kvm_release_page_clean(page); + vmcs12->guest_pml_index--; } return 0; -- cgit v1.2.3-58-ga151 From bd53cb35a3e9adb73a834a36586e9ad80e877767 Mon Sep 17 00:00:00 2001 From: Filippo Sironi Date: Thu, 31 Jan 2019 21:24:33 +0100 Subject: X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs cmpxchg_gpte() calls get_user_pages_fast() to retrieve the number of pages and the respective struct page to map in the kernel virtual address space. This doesn't work if get_user_pages_fast() is invoked with a userspace virtual address that's backed by PFNs outside of kernel reach (e.g., when limiting the kernel memory with mem= in the command line and using /dev/mem to map memory). If get_user_pages_fast() fails, look up the VMA that back the userspace virtual address, compute the PFN and the physical address, and map it in the kernel virtual address space with memremap(). Signed-off-by: Filippo Sironi Signed-off-by: KarimAllah Ahmed Signed-off-by: Paolo Bonzini --- arch/x86/kvm/paging_tmpl.h | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6bdca39829bc..c40af67d0f44 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -141,15 +141,35 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, struct page *page; npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); - /* Check if the user is doing something meaningless. */ - if (unlikely(npages != 1)) - return -EFAULT; - - table = kmap_atomic(page); - ret = CMPXCHG(&table[index], orig_pte, new_pte); - kunmap_atomic(table); - - kvm_release_page_dirty(page); + if (likely(npages == 1)) { + table = kmap_atomic(page); + ret = CMPXCHG(&table[index], orig_pte, new_pte); + kunmap_atomic(table); + + kvm_release_page_dirty(page); + } else { + struct vm_area_struct *vma; + unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK; + unsigned long pfn; + unsigned long paddr; + + down_read(¤t->mm->mmap_sem); + vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE); + if (!vma || !(vma->vm_flags & VM_PFNMAP)) { + up_read(¤t->mm->mmap_sem); + return -EFAULT; + } + pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + paddr = pfn << PAGE_SHIFT; + table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB); + if (!table) { + up_read(¤t->mm->mmap_sem); + return -EFAULT; + } + ret = CMPXCHG(&table[index], orig_pte, new_pte); + memunmap(table); + up_read(¤t->mm->mmap_sem); + } return (ret != orig_pte); } -- cgit v1.2.3-58-ga151 From b146b839282ca98927a109d6bb0351c7380b70e4 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:35 +0100 Subject: X86/nVMX: handle_vmptrld: Use kvm_vcpu_map when copying VMCS12 from guest memory Use kvm_vcpu_map to the map the VMCS12 from guest memory because kvm_vcpu_gpa_to_page() and kmap() will only work for guest memory that has a "struct page". Signed-off-by: KarimAllah Ahmed Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 9b1973cc1e75..fb9953b8f3f1 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4593,11 +4593,10 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) return 1; if (vmx->nested.current_vmptr != vmptr) { + struct kvm_host_map map; struct vmcs12 *new_vmcs12; - struct page *page; - page = kvm_vcpu_gpa_to_page(vcpu, vmptr); - if (is_error_page(page)) { + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) { /* * Reads from an unbacked page return all 1s, * which means that the 32 bits located at the @@ -4607,12 +4606,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) return nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); } - new_vmcs12 = kmap(page); + + new_vmcs12 = map.hva; + if (new_vmcs12->hdr.revision_id != VMCS12_REVISION || (new_vmcs12->hdr.shadow_vmcs && !nested_cpu_has_vmx_shadow_vmcs(vcpu))) { - kunmap(page); - kvm_release_page_clean(page); + kvm_vcpu_unmap(vcpu, &map, false); return nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); } @@ -4624,8 +4624,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) * cached. */ memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); - kunmap(page); - kvm_release_page_clean(page); + kvm_vcpu_unmap(vcpu, &map, false); set_current_vmptr(vmx, vmptr); } -- cgit v1.2.3-58-ga151 From 31f0b6c4ba7da19192492b988f06f27bbe259082 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:36 +0100 Subject: KVM/nVMX: Use kvm_vcpu_map when mapping the L1 MSR bitmap Use kvm_vcpu_map when mapping the L1 MSR bitmap since using kvm_vcpu_gpa_to_page() and kmap() will only work for guest memory that has a "struct page". Signed-off-by: KarimAllah Ahmed Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 11 +++++------ arch/x86/kvm/vmx/vmx.h | 3 +++ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index fb9953b8f3f1..15d08413683e 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -519,9 +519,10 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { int msr; - struct page *page; unsigned long *msr_bitmap_l1; unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; + struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map; + /* * pred_cmd & spec_ctrl are trying to verify two things: * @@ -547,11 +548,10 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, !pred_cmd && !spec_ctrl) return false; - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); - if (is_error_page(page)) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map)) return false; - msr_bitmap_l1 = (unsigned long *)kmap(page); + msr_bitmap_l1 = (unsigned long *)map->hva; /* * To keep the control flow simple, pay eight 8-byte writes (sixteen @@ -604,8 +604,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W); - kunmap(page); - kvm_release_page_clean(page); + kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false); return true; } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 1e42f983e0f1..b07d4b1d63cf 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -144,6 +144,9 @@ struct nested_vmx { struct page *apic_access_page; struct page *virtual_apic_page; struct page *pi_desc_page; + + struct kvm_host_map msr_bitmap_map; + struct pi_desc *pi_desc; bool pi_pending; u16 posted_intr_nv; -- cgit v1.2.3-58-ga151 From 96c66e87deeeb3cc78a3b82a1de8e365eec206c1 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:37 +0100 Subject: KVM/nVMX: Use kvm_vcpu_map when mapping the virtual APIC page Use kvm_vcpu_map when mapping the virtual APIC page since using kvm_vcpu_gpa_to_page() and kmap() will only work for guest memory that has a "struct page". One additional semantic change is that the virtual host mapping lifecycle has changed a bit. It now has the same lifetime of the pinning of the virtual APIC page on the host side. Signed-off-by: KarimAllah Ahmed Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 30 ++++++++++-------------------- arch/x86/kvm/vmx/vmx.c | 5 ++--- arch/x86/kvm/vmx/vmx.h | 2 +- 3 files changed, 13 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 15d08413683e..a00c4ded272d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -229,10 +229,7 @@ static void free_nested(struct kvm_vcpu *vcpu) kvm_release_page_dirty(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; } - if (vmx->nested.virtual_apic_page) { - kvm_release_page_dirty(vmx->nested.virtual_apic_page); - vmx->nested.virtual_apic_page = NULL; - } + kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); if (vmx->nested.pi_desc_page) { kunmap(vmx->nested.pi_desc_page); kvm_release_page_dirty(vmx->nested.pi_desc_page); @@ -2833,6 +2830,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_host_map *map; struct page *page; u64 hpa; @@ -2865,20 +2863,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) } if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { - if (vmx->nested.virtual_apic_page) { /* shouldn't happen */ - kvm_release_page_dirty(vmx->nested.virtual_apic_page); - vmx->nested.virtual_apic_page = NULL; - } - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr); + map = &vmx->nested.virtual_apic_map; /* * If translation failed, VM entry will fail because * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull. */ - if (!is_error_page(page)) { - vmx->nested.virtual_apic_page = page; - hpa = page_to_phys(vmx->nested.virtual_apic_page); - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); + if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) { + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn)); } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) && nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) && !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { @@ -3313,11 +3305,12 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); if (max_irr != 256) { - vapic_page = kmap(vmx->nested.virtual_apic_page); + vapic_page = vmx->nested.virtual_apic_map.hva; + if (!vapic_page) + return; + __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page, &max_irr); - kunmap(vmx->nested.virtual_apic_page); - status = vmcs_read16(GUEST_INTR_STATUS); if ((u8)max_irr > ((u8)status & 0xff)) { status &= ~0xff; @@ -3958,10 +3951,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, kvm_release_page_dirty(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; } - if (vmx->nested.virtual_apic_page) { - kvm_release_page_dirty(vmx->nested.virtual_apic_page); - vmx->nested.virtual_apic_page = NULL; - } + kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); if (vmx->nested.pi_desc_page) { kunmap(vmx->nested.pi_desc_page); kvm_release_page_dirty(vmx->nested.pi_desc_page); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index afe45a28020c..8641c99a3255 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3624,14 +3624,13 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || !nested_cpu_has_vid(get_vmcs12(vcpu)) || - WARN_ON_ONCE(!vmx->nested.virtual_apic_page)) + WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn)) return false; rvi = vmx_get_rvi(); - vapic_page = kmap(vmx->nested.virtual_apic_page); + vapic_page = vmx->nested.virtual_apic_map.hva; vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); - kunmap(vmx->nested.virtual_apic_page); return ((rvi & 0xf0) > (vppr & 0xf0)); } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index b07d4b1d63cf..b03b18cf9b6b 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -142,7 +142,7 @@ struct nested_vmx { * pointers, so we must keep them pinned while L2 runs. */ struct page *apic_access_page; - struct page *virtual_apic_page; + struct kvm_host_map virtual_apic_map; struct page *pi_desc_page; struct kvm_host_map msr_bitmap_map; -- cgit v1.2.3-58-ga151 From 3278e0492554895509530d493fbfa9a9f1b27a41 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:38 +0100 Subject: KVM/nVMX: Use kvm_vcpu_map when mapping the posted interrupt descriptor table Use kvm_vcpu_map when mapping the posted interrupt descriptor table since using kvm_vcpu_gpa_to_page() and kmap() will only work for guest memory that has a "struct page". One additional semantic change is that the virtual host mapping lifecycle has changed a bit. It now has the same lifetime of the pinning of the interrupt descriptor table page on the host side. Signed-off-by: KarimAllah Ahmed Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 43 ++++++++++++------------------------------- arch/x86/kvm/vmx/vmx.h | 2 +- 2 files changed, 13 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index a00c4ded272d..69c505fcaad7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -230,12 +230,8 @@ static void free_nested(struct kvm_vcpu *vcpu) vmx->nested.apic_access_page = NULL; } kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); - if (vmx->nested.pi_desc_page) { - kunmap(vmx->nested.pi_desc_page); - kvm_release_page_dirty(vmx->nested.pi_desc_page); - vmx->nested.pi_desc_page = NULL; - vmx->nested.pi_desc = NULL; - } + kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); + vmx->nested.pi_desc = NULL; kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); @@ -2891,26 +2887,15 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) } if (nested_cpu_has_posted_intr(vmcs12)) { - if (vmx->nested.pi_desc_page) { /* shouldn't happen */ - kunmap(vmx->nested.pi_desc_page); - kvm_release_page_dirty(vmx->nested.pi_desc_page); - vmx->nested.pi_desc_page = NULL; - vmx->nested.pi_desc = NULL; - vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull); + map = &vmx->nested.pi_desc_map; + + if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) { + vmx->nested.pi_desc = + (struct pi_desc *)(((void *)map->hva) + + offset_in_page(vmcs12->posted_intr_desc_addr)); + vmcs_write64(POSTED_INTR_DESC_ADDR, + pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr)); } - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr); - if (is_error_page(page)) - return; - vmx->nested.pi_desc_page = page; - vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page); - vmx->nested.pi_desc = - (struct pi_desc *)((void *)vmx->nested.pi_desc + - (unsigned long)(vmcs12->posted_intr_desc_addr & - (PAGE_SIZE - 1))); - vmcs_write64(POSTED_INTR_DESC_ADDR, - page_to_phys(vmx->nested.pi_desc_page) + - (unsigned long)(vmcs12->posted_intr_desc_addr & - (PAGE_SIZE - 1))); } if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, @@ -3952,12 +3937,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmx->nested.apic_access_page = NULL; } kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); - if (vmx->nested.pi_desc_page) { - kunmap(vmx->nested.pi_desc_page); - kvm_release_page_dirty(vmx->nested.pi_desc_page); - vmx->nested.pi_desc_page = NULL; - vmx->nested.pi_desc = NULL; - } + kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); + vmx->nested.pi_desc = NULL; /* * We are now running in L2, mmu_notifier will force to reload the diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index b03b18cf9b6b..c0ff305d59f7 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -143,7 +143,7 @@ struct nested_vmx { */ struct page *apic_access_page; struct kvm_host_map virtual_apic_map; - struct page *pi_desc_page; + struct kvm_host_map pi_desc_map; struct kvm_host_map msr_bitmap_map; -- cgit v1.2.3-58-ga151 From 42e35f8072c339cbdcaf4de95dc6e1815c2797cf Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:39 +0100 Subject: KVM/X86: Use kvm_vcpu_map in emulator_cmpxchg_emulated Use kvm_vcpu_map in emulator_cmpxchg_emulated since using kvm_vcpu_gpa_to_page() and kmap() will only work for guest memory that has a "struct page". Signed-off-by: KarimAllah Ahmed Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a7ea346fa41e..044ece3a28a4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5557,9 +5557,9 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, unsigned int bytes, struct x86_exception *exception) { + struct kvm_host_map map; struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); gpa_t gpa; - struct page *page; char *kaddr; bool exchanged; @@ -5576,12 +5576,11 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) goto emul_write; - page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); - if (is_error_page(page)) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map)) goto emul_write; - kaddr = kmap_atomic(page); - kaddr += offset_in_page(gpa); + kaddr = map.hva + offset_in_page(gpa); + switch (bytes) { case 1: exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); @@ -5598,13 +5597,12 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, default: BUG(); } - kunmap_atomic(kaddr); - kvm_release_page_dirty(page); + + kvm_vcpu_unmap(vcpu, &map, true); if (!exchanged) return X86EMUL_CMPXCHG_FAILED; - kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); kvm_page_track_write(vcpu, gpa, new, bytes); return X86EMUL_CONTINUE; -- cgit v1.2.3-58-ga151 From 8c5fbf1a723107814c20c3f4d6343ab9d694a705 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:40 +0100 Subject: KVM/nSVM: Use the new mapping API for mapping guest memory Use the new mapping API for mapping guest memory to avoid depending on "struct page". Signed-off-by: KarimAllah Ahmed Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 94 ++++++++++++++++++++++++++---------------------------- 1 file changed, 45 insertions(+), 49 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 406b558abfef..6e374a0e3bc3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3071,32 +3071,6 @@ static inline bool nested_svm_nmi(struct vcpu_svm *svm) return false; } -static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) -{ - struct page *page; - - might_sleep(); - - page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT); - if (is_error_page(page)) - goto error; - - *_page = page; - - return kmap(page); - -error: - kvm_inject_gp(&svm->vcpu, 0); - - return NULL; -} - -static void nested_svm_unmap(struct page *page) -{ - kunmap(page); - kvm_release_page_dirty(page); -} - static int nested_svm_intercept_ioio(struct vcpu_svm *svm) { unsigned port, size, iopm_len; @@ -3299,10 +3273,11 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr static int nested_svm_vmexit(struct vcpu_svm *svm) { + int rc; struct vmcb *nested_vmcb; struct vmcb *hsave = svm->nested.hsave; struct vmcb *vmcb = svm->vmcb; - struct page *page; + struct kvm_host_map map; trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, vmcb->control.exit_info_1, @@ -3311,9 +3286,14 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) vmcb->control.exit_int_info_err, KVM_ISA_SVM); - nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); - if (!nested_vmcb) + rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(svm->nested.vmcb), &map); + if (rc) { + if (rc == -EINVAL) + kvm_inject_gp(&svm->vcpu, 0); return 1; + } + + nested_vmcb = map.hva; /* Exit Guest-Mode */ leave_guest_mode(&svm->vcpu); @@ -3417,7 +3397,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) mark_all_dirty(svm->vmcb); - nested_svm_unmap(page); + kvm_vcpu_unmap(&svm->vcpu, &map, true); nested_svm_uninit_mmu_context(&svm->vcpu); kvm_mmu_reset_context(&svm->vcpu); @@ -3483,7 +3463,7 @@ static bool nested_vmcb_checks(struct vmcb *vmcb) } static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, - struct vmcb *nested_vmcb, struct page *page) + struct vmcb *nested_vmcb, struct kvm_host_map *map) { if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) svm->vcpu.arch.hflags |= HF_HIF_MASK; @@ -3567,7 +3547,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, svm->vmcb->control.pause_filter_thresh = nested_vmcb->control.pause_filter_thresh; - nested_svm_unmap(page); + kvm_vcpu_unmap(&svm->vcpu, map, true); /* Enter Guest-Mode */ enter_guest_mode(&svm->vcpu); @@ -3587,17 +3567,23 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, static bool nested_svm_vmrun(struct vcpu_svm *svm) { + int rc; struct vmcb *nested_vmcb; struct vmcb *hsave = svm->nested.hsave; struct vmcb *vmcb = svm->vmcb; - struct page *page; + struct kvm_host_map map; u64 vmcb_gpa; vmcb_gpa = svm->vmcb->save.rax; - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); - if (!nested_vmcb) + rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(vmcb_gpa), &map); + if (rc) { + if (rc == -EINVAL) + kvm_inject_gp(&svm->vcpu, 0); return false; + } + + nested_vmcb = map.hva; if (!nested_vmcb_checks(nested_vmcb)) { nested_vmcb->control.exit_code = SVM_EXIT_ERR; @@ -3605,7 +3591,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) nested_vmcb->control.exit_info_1 = 0; nested_vmcb->control.exit_info_2 = 0; - nested_svm_unmap(page); + kvm_vcpu_unmap(&svm->vcpu, &map, true); return false; } @@ -3649,7 +3635,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) copy_vmcb_control_area(hsave, vmcb); - enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, page); + enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map); return true; } @@ -3673,21 +3659,26 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) static int vmload_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; - struct page *page; + struct kvm_host_map map; int ret; if (nested_svm_check_permissions(svm)) return 1; - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); - if (!nested_vmcb) + ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map); + if (ret) { + if (ret == -EINVAL) + kvm_inject_gp(&svm->vcpu, 0); return 1; + } + + nested_vmcb = map.hva; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; ret = kvm_skip_emulated_instruction(&svm->vcpu); nested_svm_vmloadsave(nested_vmcb, svm->vmcb); - nested_svm_unmap(page); + kvm_vcpu_unmap(&svm->vcpu, &map, true); return ret; } @@ -3695,21 +3686,26 @@ static int vmload_interception(struct vcpu_svm *svm) static int vmsave_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; - struct page *page; + struct kvm_host_map map; int ret; if (nested_svm_check_permissions(svm)) return 1; - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); - if (!nested_vmcb) + ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map); + if (ret) { + if (ret == -EINVAL) + kvm_inject_gp(&svm->vcpu, 0); return 1; + } + + nested_vmcb = map.hva; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; ret = kvm_skip_emulated_instruction(&svm->vcpu); nested_svm_vmloadsave(svm->vmcb, nested_vmcb); - nested_svm_unmap(page); + kvm_vcpu_unmap(&svm->vcpu, &map, true); return ret; } @@ -6236,7 +6232,7 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *nested_vmcb; - struct page *page; + struct kvm_host_map map; u64 guest; u64 vmcb; @@ -6244,10 +6240,10 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) vmcb = GET_SMSTATE(u64, smstate, 0x7ee0); if (guest) { - nested_vmcb = nested_svm_map(svm, vmcb, &page); - if (!nested_vmcb) + if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL) return 1; - enter_svm_guest_mode(svm, vmcb, nested_vmcb, page); + nested_vmcb = map.hva; + enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map); } return 0; } -- cgit v1.2.3-58-ga151 From 8892530598dd6333c2ca6316e486708824d587ea Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:41 +0100 Subject: KVM/nVMX: Use kvm_vcpu_map for accessing the shadow VMCS Use kvm_vcpu_map for accessing the shadow VMCS since using kvm_vcpu_gpa_to_page() and kmap() will only work for guest memory that has a "struct page". Signed-off-by: KarimAllah Ahmed Reviewed-by: Konrad Rzessutek Wilk Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 69c505fcaad7..e11c9f9c9dda 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -605,20 +605,20 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { + struct kvm_host_map map; struct vmcs12 *shadow; - struct page *page; if (!nested_cpu_has_shadow_vmcs(vmcs12) || vmcs12->vmcs_link_pointer == -1ull) return; shadow = get_shadow_vmcs12(vcpu); - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer); - memcpy(shadow, kmap(page), VMCS12_SIZE); + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)) + return; - kunmap(page); - kvm_release_page_clean(page); + memcpy(shadow, map.hva, VMCS12_SIZE); + kvm_vcpu_unmap(vcpu, &map, false); } static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu, @@ -2631,9 +2631,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - int r; - struct page *page; + int r = 0; struct vmcs12 *shadow; + struct kvm_host_map map; if (vmcs12->vmcs_link_pointer == -1ull) return 0; @@ -2641,17 +2641,16 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)) return -EINVAL; - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer); - if (is_error_page(page)) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)) return -EINVAL; - r = 0; - shadow = kmap(page); + shadow = map.hva; + if (shadow->hdr.revision_id != VMCS12_REVISION || shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)) r = -EINVAL; - kunmap(page); - kvm_release_page_clean(page); + + kvm_vcpu_unmap(vcpu, &map, false); return r; } -- cgit v1.2.3-58-ga151 From dee9c0493108b36e89d289c8fd6f4c90321d0d5e Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:42 +0100 Subject: KVM/nVMX: Use kvm_vcpu_map for accessing the enlightened VMCS Use kvm_vcpu_map for accessing the enlightened VMCS since using kvm_vcpu_gpa_to_page() and kmap() will only work for guest memory that has a "struct page". Signed-off-by: KarimAllah Ahmed Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 14 +++++--------- arch/x86/kvm/vmx/vmx.h | 2 +- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e11c9f9c9dda..52347dcc252c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -193,10 +193,8 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) if (!vmx->nested.hv_evmcs) return; - kunmap(vmx->nested.hv_evmcs_page); - kvm_release_page_dirty(vmx->nested.hv_evmcs_page); + kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true); vmx->nested.hv_evmcs_vmptr = -1ull; - vmx->nested.hv_evmcs_page = NULL; vmx->nested.hv_evmcs = NULL; } @@ -1786,13 +1784,11 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, nested_release_evmcs(vcpu); - vmx->nested.hv_evmcs_page = kvm_vcpu_gpa_to_page( - vcpu, assist_page.current_nested_vmcs); - - if (unlikely(is_error_page(vmx->nested.hv_evmcs_page))) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(assist_page.current_nested_vmcs), + &vmx->nested.hv_evmcs_map)) return 0; - vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page); + vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva; /* * Currently, KVM only supports eVMCS version 1 @@ -4350,7 +4346,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) return nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER); - if (vmx->nested.hv_evmcs_page) { + if (vmx->nested.hv_evmcs_map.hva) { if (vmptr == vmx->nested.hv_evmcs_vmptr) nested_release_evmcs(vcpu); } else { diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index c0ff305d59f7..63d37ccce3dc 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -172,7 +172,7 @@ struct nested_vmx { } smm; gpa_t hv_evmcs_vmptr; - struct page *hv_evmcs_page; + struct kvm_host_map hv_evmcs_map; struct hv_enlightened_vmcs *hv_evmcs; }; -- cgit v1.2.3-58-ga151 From e0bf2665ca26c1f6d6909de29bd382e7a5ee41e7 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:43 +0100 Subject: KVM/nVMX: Use page_address_valid in a few more locations Use page_address_valid in a few more locations that is already checking for a page aligned address that does not cross the maximum physical address. Signed-off-by: KarimAllah Ahmed Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 52347dcc252c..877593573b52 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4275,7 +4275,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case; * which replaces physical address width with 32 */ - if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) + if (!page_address_valid(vcpu, vmptr)) return nested_vmx_failInvalid(vcpu); if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) || @@ -4338,7 +4338,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; - if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) + if (!page_address_valid(vcpu, vmptr)) return nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); @@ -4545,7 +4545,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; - if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) + if (!page_address_valid(vcpu, vmptr)) return nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); -- cgit v1.2.3-58-ga151 From 0c55671f84fffe591e8435c93a8c83286fd6b8eb Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:44 +0100 Subject: kvm, x86: Properly check whether a pfn is an MMIO or not pfn_valid check is not sufficient because it only checks if a page has a struct page or not, if "mem=" was passed to the kernel some valid pages won't have a struct page. This means that if guests were assigned valid memory that lies after the mem= boundary it will be passed uncached to the guest no matter what the guest caching attributes are for this memory. Introduce a new function e820__mapped_raw_any which is equivalent to e820__mapped_any but uses the original e820 unmodified and use it to identify real *RAM*. Signed-off-by: KarimAllah Ahmed Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/e820/api.h | 1 + arch/x86/kernel/e820.c | 18 +++++++++++++++--- arch/x86/kvm/mmu.c | 5 ++++- 3 files changed, 20 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h index 62be73b23d5c..e8f58ddd06d9 100644 --- a/arch/x86/include/asm/e820/api.h +++ b/arch/x86/include/asm/e820/api.h @@ -10,6 +10,7 @@ extern struct e820_table *e820_table_firmware; extern unsigned long pci_mem_start; +extern bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type); extern bool e820__mapped_any(u64 start, u64 end, enum e820_type type); extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 2879e234e193..76dd605ee2a3 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -73,12 +73,13 @@ EXPORT_SYMBOL(pci_mem_start); * This function checks if any part of the range is mapped * with type. */ -bool e820__mapped_any(u64 start, u64 end, enum e820_type type) +static bool _e820__mapped_any(struct e820_table *table, + u64 start, u64 end, enum e820_type type) { int i; - for (i = 0; i < e820_table->nr_entries; i++) { - struct e820_entry *entry = &e820_table->entries[i]; + for (i = 0; i < table->nr_entries; i++) { + struct e820_entry *entry = &table->entries[i]; if (type && entry->type != type) continue; @@ -88,6 +89,17 @@ bool e820__mapped_any(u64 start, u64 end, enum e820_type type) } return 0; } + +bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type) +{ + return _e820__mapped_any(e820_table_firmware, start, end, type); +} +EXPORT_SYMBOL_GPL(e820__mapped_raw_any); + +bool e820__mapped_any(u64 start, u64 end, enum e820_type type) +{ + return _e820__mapped_any(e820_table, start, end, type); +} EXPORT_SYMBOL_GPL(e820__mapped_any); /* diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e10962dfc203..b1f6451022e5 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -2892,7 +2893,9 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) */ (!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn)); - return true; + return !e820__mapped_raw_any(pfn_to_hpa(pfn), + pfn_to_hpa(pfn + 1) - 1, + E820_TYPE_RAM); } /* Bits which may be returned by set_spte() */ -- cgit v1.2.3-58-ga151 From de3cd117ed2f6cb3317212f242a87ffca56c27ac Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 30 Apr 2019 10:36:17 -0700 Subject: KVM: x86: Omit caching logic for always-available GPRs Except for RSP and RIP, which are held in VMX's VMCS, GPRs are always treated "available and dirtly" on both VMX and SVM, i.e. are unconditionally loaded/saved immediately before/after VM-Enter/VM-Exit. Eliminating the unnecessary caching code reduces the size of KVM by a non-trivial amount, much of which comes from the most common code paths. E.g. on x86_64, kvm_emulate_cpuid() is reduced from 342 to 182 bytes and kvm_emulate_hypercall() from 1362 to 1143, with the total size of KVM dropping by ~1000 bytes. With CONFIG_RETPOLINE=y, the numbers are even more pronounced, e.g.: 353->182, 1418->1172 and well over 2000 bytes. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 12 +++--- arch/x86/kvm/hyperv.c | 24 ++++++------ arch/x86/kvm/kvm_cache_regs.h | 32 +++++++++++++++- arch/x86/kvm/svm.c | 26 ++++++------- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/x86.c | 89 +++++++++++++++++++++---------------------- 6 files changed, 105 insertions(+), 80 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index fd3951638ae4..fc8ccf596624 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -962,13 +962,13 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu) if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0)) return 1; - eax = kvm_register_read(vcpu, VCPU_REGS_RAX); - ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); + eax = kvm_rax_read(vcpu); + ecx = kvm_rcx_read(vcpu); kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true); - kvm_register_write(vcpu, VCPU_REGS_RAX, eax); - kvm_register_write(vcpu, VCPU_REGS_RBX, ebx); - kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); - kvm_register_write(vcpu, VCPU_REGS_RDX, edx); + kvm_rax_write(vcpu, eax); + kvm_rbx_write(vcpu, ebx); + kvm_rcx_write(vcpu, ecx); + kvm_rdx_write(vcpu, edx); return kvm_skip_emulated_instruction(vcpu); } EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 421899f6ad7b..7868daceb25b 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1526,10 +1526,10 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) longmode = is_64_bit_mode(vcpu); if (longmode) - kvm_register_write(vcpu, VCPU_REGS_RAX, result); + kvm_rax_write(vcpu, result); else { - kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32); - kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff); + kvm_rdx_write(vcpu, result >> 32); + kvm_rax_write(vcpu, result & 0xffffffff); } } @@ -1602,18 +1602,18 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) longmode = is_64_bit_mode(vcpu); if (!longmode) { - param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | - (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); - ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | - (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); - outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | - (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); + param = ((u64)kvm_rdx_read(vcpu) << 32) | + (kvm_rax_read(vcpu) & 0xffffffff); + ingpa = ((u64)kvm_rbx_read(vcpu) << 32) | + (kvm_rcx_read(vcpu) & 0xffffffff); + outgpa = ((u64)kvm_rdi_read(vcpu) << 32) | + (kvm_rsi_read(vcpu) & 0xffffffff); } #ifdef CONFIG_X86_64 else { - param = kvm_register_read(vcpu, VCPU_REGS_RCX); - ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); - outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); + param = kvm_rcx_read(vcpu); + ingpa = kvm_rdx_read(vcpu); + outgpa = kvm_r8_read(vcpu); } #endif diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index f8f56a93358b..d179b7d7860d 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -9,6 +9,34 @@ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE) +#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ +static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ +{ \ + return vcpu->arch.regs[VCPU_REGS_##uname]; \ +} \ +static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \ + unsigned long val) \ +{ \ + vcpu->arch.regs[VCPU_REGS_##uname] = val; \ +} +BUILD_KVM_GPR_ACCESSORS(rax, RAX) +BUILD_KVM_GPR_ACCESSORS(rbx, RBX) +BUILD_KVM_GPR_ACCESSORS(rcx, RCX) +BUILD_KVM_GPR_ACCESSORS(rdx, RDX) +BUILD_KVM_GPR_ACCESSORS(rbp, RBP) +BUILD_KVM_GPR_ACCESSORS(rsi, RSI) +BUILD_KVM_GPR_ACCESSORS(rdi, RDI) +#ifdef CONFIG_X86_64 +BUILD_KVM_GPR_ACCESSORS(r8, R8) +BUILD_KVM_GPR_ACCESSORS(r9, R9) +BUILD_KVM_GPR_ACCESSORS(r10, R10) +BUILD_KVM_GPR_ACCESSORS(r11, R11) +BUILD_KVM_GPR_ACCESSORS(r12, R12) +BUILD_KVM_GPR_ACCESSORS(r13, R13) +BUILD_KVM_GPR_ACCESSORS(r14, R14) +BUILD_KVM_GPR_ACCESSORS(r15, R15) +#endif + static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, enum kvm_reg reg) { @@ -83,8 +111,8 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) { - return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u) - | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); + return (kvm_rax_read(vcpu) & -1u) + | ((u64)(kvm_rdx_read(vcpu) & -1u) << 32); } static inline void enter_guest_mode(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6e374a0e3bc3..38aef3439799 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2091,7 +2091,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) init_vmcb(svm); kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true); - kvm_register_write(vcpu, VCPU_REGS_RDX, eax); + kvm_rdx_write(vcpu, eax); if (kvm_vcpu_apicv_active(vcpu) && !init_event) avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE); @@ -3388,7 +3388,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) } else { (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); } - kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); + kvm_rax_write(&svm->vcpu, hsave->save.rax); kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip); svm->vmcb->save.dr7 = 0; @@ -3496,7 +3496,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, kvm_mmu_reset_context(&svm->vcpu); svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; - kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); + kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax); kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); @@ -3787,11 +3787,11 @@ static int invlpga_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; - trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX), - kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); + trace_kvm_invlpga(svm->vmcb->save.rip, kvm_rcx_read(&svm->vcpu), + kvm_rax_read(&svm->vcpu)); /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ - kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); + kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu)); svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; return kvm_skip_emulated_instruction(&svm->vcpu); @@ -3799,7 +3799,7 @@ static int invlpga_interception(struct vcpu_svm *svm) static int skinit_interception(struct vcpu_svm *svm) { - trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); + trace_kvm_skinit(svm->vmcb->save.rip, kvm_rax_read(&svm->vcpu)); kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; @@ -3813,7 +3813,7 @@ static int wbinvd_interception(struct vcpu_svm *svm) static int xsetbv_interception(struct vcpu_svm *svm) { u64 new_bv = kvm_read_edx_eax(&svm->vcpu); - u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); + u32 index = kvm_rcx_read(&svm->vcpu); if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; @@ -4209,7 +4209,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static int rdmsr_interception(struct vcpu_svm *svm) { - u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); + u32 ecx = kvm_rcx_read(&svm->vcpu); struct msr_data msr_info; msr_info.index = ecx; @@ -4221,10 +4221,8 @@ static int rdmsr_interception(struct vcpu_svm *svm) } else { trace_kvm_msr_read(ecx, msr_info.data); - kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, - msr_info.data & 0xffffffff); - kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, - msr_info.data >> 32); + kvm_rax_write(&svm->vcpu, msr_info.data & 0xffffffff); + kvm_rdx_write(&svm->vcpu, msr_info.data >> 32); svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; return kvm_skip_emulated_instruction(&svm->vcpu); } @@ -4418,7 +4416,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) static int wrmsr_interception(struct vcpu_svm *svm) { struct msr_data msr; - u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); + u32 ecx = kvm_rcx_read(&svm->vcpu); u64 data = kvm_read_edx_eax(&svm->vcpu); msr.data = data; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 8641c99a3255..62924c5d1c06 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4926,7 +4926,7 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu) static int handle_xsetbv(struct kvm_vcpu *vcpu) { u64 new_bv = kvm_read_edx_eax(vcpu); - u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX); + u32 index = kvm_rcx_read(vcpu); if (kvm_set_xcr(vcpu, index, new_bv) == 0) return kvm_skip_emulated_instruction(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 044ece3a28a4..b352a7c137cd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1096,15 +1096,15 @@ EXPORT_SYMBOL_GPL(kvm_get_dr); bool kvm_rdpmc(struct kvm_vcpu *vcpu) { - u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); + u32 ecx = kvm_rcx_read(vcpu); u64 data; int err; err = kvm_pmu_rdpmc(vcpu, ecx, &data); if (err) return err; - kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data); - kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32); + kvm_rax_write(vcpu, (u32)data); + kvm_rdx_write(vcpu, data >> 32); return err; } EXPORT_SYMBOL_GPL(kvm_rdpmc); @@ -6586,7 +6586,7 @@ static int complete_fast_pio_out(struct kvm_vcpu *vcpu) static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) { - unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); + unsigned long val = kvm_rax_read(vcpu); int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, size, port, &val, 1); @@ -6610,8 +6610,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) } /* For size less than 4 we merge, else we zero extend */ - val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) - : 0; + val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0; /* * Since vcpu->arch.pio.count == 1 let emulator_pio_in_emulated perform @@ -6619,7 +6618,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) */ emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size, vcpu->arch.pio.port, &val, 1); - kvm_register_write(vcpu, VCPU_REGS_RAX, val); + kvm_rax_write(vcpu, val); return kvm_skip_emulated_instruction(vcpu); } @@ -6631,12 +6630,12 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, int ret; /* For size less than 4 we merge, else we zero extend */ - val = (size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0; + val = (size < 4) ? kvm_rax_read(vcpu) : 0; ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port, &val, 1); if (ret) { - kvm_register_write(vcpu, VCPU_REGS_RAX, val); + kvm_rax_write(vcpu, val); return ret; } @@ -7151,11 +7150,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) if (kvm_hv_hypercall_enabled(vcpu->kvm)) return kvm_hv_hypercall(vcpu); - nr = kvm_register_read(vcpu, VCPU_REGS_RAX); - a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); - a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); - a2 = kvm_register_read(vcpu, VCPU_REGS_RDX); - a3 = kvm_register_read(vcpu, VCPU_REGS_RSI); + nr = kvm_rax_read(vcpu); + a0 = kvm_rbx_read(vcpu); + a1 = kvm_rcx_read(vcpu); + a2 = kvm_rdx_read(vcpu); + a3 = kvm_rsi_read(vcpu); trace_kvm_hypercall(nr, a0, a1, a2, a3); @@ -7196,7 +7195,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) out: if (!op_64_bit) ret = (u32)ret; - kvm_register_write(vcpu, VCPU_REGS_RAX, ret); + kvm_rax_write(vcpu, ret); ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); @@ -8285,23 +8284,23 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; } - regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); - regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); - regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); - regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX); - regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI); - regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI); + regs->rax = kvm_rax_read(vcpu); + regs->rbx = kvm_rbx_read(vcpu); + regs->rcx = kvm_rcx_read(vcpu); + regs->rdx = kvm_rdx_read(vcpu); + regs->rsi = kvm_rsi_read(vcpu); + regs->rdi = kvm_rdi_read(vcpu); regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); - regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP); + regs->rbp = kvm_rbp_read(vcpu); #ifdef CONFIG_X86_64 - regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8); - regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9); - regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10); - regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11); - regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12); - regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13); - regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14); - regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15); + regs->r8 = kvm_r8_read(vcpu); + regs->r9 = kvm_r9_read(vcpu); + regs->r10 = kvm_r10_read(vcpu); + regs->r11 = kvm_r11_read(vcpu); + regs->r12 = kvm_r12_read(vcpu); + regs->r13 = kvm_r13_read(vcpu); + regs->r14 = kvm_r14_read(vcpu); + regs->r15 = kvm_r15_read(vcpu); #endif regs->rip = kvm_rip_read(vcpu); @@ -8321,23 +8320,23 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.emulate_regs_need_sync_from_vcpu = true; vcpu->arch.emulate_regs_need_sync_to_vcpu = false; - kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); - kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); - kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); - kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx); - kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi); - kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi); + kvm_rax_write(vcpu, regs->rax); + kvm_rbx_write(vcpu, regs->rbx); + kvm_rcx_write(vcpu, regs->rcx); + kvm_rdx_write(vcpu, regs->rdx); + kvm_rsi_write(vcpu, regs->rsi); + kvm_rdi_write(vcpu, regs->rdi); kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp); - kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp); + kvm_rbp_write(vcpu, regs->rbp); #ifdef CONFIG_X86_64 - kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8); - kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9); - kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10); - kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11); - kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12); - kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); - kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); - kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); + kvm_r8_write(vcpu, regs->r8); + kvm_r9_write(vcpu, regs->r9); + kvm_r10_write(vcpu, regs->r10); + kvm_r11_write(vcpu, regs->r11); + kvm_r12_write(vcpu, regs->r12); + kvm_r13_write(vcpu, regs->r13); + kvm_r14_write(vcpu, regs->r14); + kvm_r15_write(vcpu, regs->r15); #endif kvm_rip_write(vcpu, regs->rip); -- cgit v1.2.3-58-ga151 From 2b3eaf815ca930b1dbc11fb1ebd773e510947c0f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 30 Apr 2019 10:36:19 -0700 Subject: KVM: VMX: Use accessors for GPRs outside of dedicated caching logic ... now that there is no overhead when using dedicated accessors. Opportunistically remove a bogus "FIXME" in handle_rdmsr() regarding the upper 32 bits of RAX and RDX. Zeroing the upper 32 bits is architecturally correct as 32-bit writes in 64-bit mode unconditionally clear the upper 32 bits. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 6 +++--- arch/x86/kvm/vmx/vmx.c | 12 +++++------- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 877593573b52..d97dbea150ba 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4764,7 +4764,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - u32 index = vcpu->arch.regs[VCPU_REGS_RCX]; + u32 index = kvm_rcx_read(vcpu); u64 address; bool accessed_dirty; struct kvm_mmu *mmu = vcpu->arch.walk_mmu; @@ -4810,7 +4810,7 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12; - u32 function = vcpu->arch.regs[VCPU_REGS_RAX]; + u32 function = kvm_rax_read(vcpu); /* * VMFUNC is only supported for nested guests, but we always enable the @@ -4896,7 +4896,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, u32 exit_reason) { - u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; + u32 msr_index = kvm_rcx_read(vcpu); gpa_t bitmap; if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 62924c5d1c06..60306f19105d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4831,7 +4831,7 @@ static int handle_cpuid(struct kvm_vcpu *vcpu) static int handle_rdmsr(struct kvm_vcpu *vcpu) { - u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; + u32 ecx = kvm_rcx_read(vcpu); struct msr_data msr_info; msr_info.index = ecx; @@ -4844,18 +4844,16 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu) trace_kvm_msr_read(ecx, msr_info.data); - /* FIXME: handling of bits 32:63 of rax, rdx */ - vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u; - vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u; + kvm_rax_write(vcpu, msr_info.data & -1u); + kvm_rdx_write(vcpu, (msr_info.data >> 32) & -1u); return kvm_skip_emulated_instruction(vcpu); } static int handle_wrmsr(struct kvm_vcpu *vcpu) { struct msr_data msr; - u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; - u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) - | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); + u32 ecx = kvm_rcx_read(vcpu); + u64 data = kvm_read_edx_eax(vcpu); msr.data = data; msr.index = ecx; -- cgit v1.2.3-58-ga151 From e9c16c78503dd0482b876761d60a3d2f50ac4d86 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 30 Apr 2019 22:07:26 +0200 Subject: KVM: x86: use direct accessors for RIP and RSP Use specific inline functions for RIP and RSP instead of going through kvm_register_read and kvm_register_write, which are quite a mouthful. kvm_rsp_read and kvm_rsp_write did not exist, so add them. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/kvm_cache_regs.h | 10 ++++++++++ arch/x86/kvm/svm.c | 8 ++++---- arch/x86/kvm/vmx/nested.c | 12 ++++++------ arch/x86/kvm/x86.c | 4 ++-- 4 files changed, 22 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index d179b7d7860d..1cc6c47dc77e 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -65,6 +65,16 @@ static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val) kvm_register_write(vcpu, VCPU_REGS_RIP, val); } +static inline unsigned long kvm_rsp_read(struct kvm_vcpu *vcpu) +{ + return kvm_register_read(vcpu, VCPU_REGS_RSP); +} + +static inline void kvm_rsp_write(struct kvm_vcpu *vcpu, unsigned long val) +{ + kvm_register_write(vcpu, VCPU_REGS_RSP, val); +} + static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) { might_sleep(); /* on svm */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 38aef3439799..893686cb0044 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3389,8 +3389,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); } kvm_rax_write(&svm->vcpu, hsave->save.rax); - kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); - kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip); + kvm_rsp_write(&svm->vcpu, hsave->save.rsp); + kvm_rip_write(&svm->vcpu, hsave->save.rip); svm->vmcb->save.dr7 = 0; svm->vmcb->save.cpl = 0; svm->vmcb->control.exit_int_info = 0; @@ -3497,8 +3497,8 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax); - kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); - kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); + kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp); + kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip); /* In case we don't even reach vcpu_run, the fields are not updated */ svm->vmcb->save.rax = nested_vmcb->save.rax; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d97dbea150ba..04b40a98f60b 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2372,8 +2372,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (!enable_ept) vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; - kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); - kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); + kvm_rsp_write(vcpu, vmcs12->guest_rsp); + kvm_rip_write(vcpu, vmcs12->guest_rip); return 0; } @@ -3401,8 +3401,8 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); - vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); - vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP); + vmcs12->guest_rsp = kvm_rsp_read(vcpu); + vmcs12->guest_rip = kvm_rip_read(vcpu); vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); @@ -3585,8 +3585,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); vmx_set_efer(vcpu, vcpu->arch.efer); - kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); - kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); + kvm_rsp_write(vcpu, vmcs12->host_rsp); + kvm_rip_write(vcpu, vmcs12->host_rip); vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); vmx_set_interrupt_shadow(vcpu, 0); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b352a7c137cd..dc621f73e96b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8290,7 +8290,7 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->rdx = kvm_rdx_read(vcpu); regs->rsi = kvm_rsi_read(vcpu); regs->rdi = kvm_rdi_read(vcpu); - regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); + regs->rsp = kvm_rsp_read(vcpu); regs->rbp = kvm_rbp_read(vcpu); #ifdef CONFIG_X86_64 regs->r8 = kvm_r8_read(vcpu); @@ -8326,7 +8326,7 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvm_rdx_write(vcpu, regs->rdx); kvm_rsi_write(vcpu, regs->rsi); kvm_rdi_write(vcpu, regs->rdi); - kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp); + kvm_rsp_write(vcpu, regs->rsp); kvm_rbp_write(vcpu, regs->rbp); #ifdef CONFIG_X86_64 kvm_r8_write(vcpu, regs->r8); -- cgit v1.2.3-58-ga151 From 332d079735f5add26f4443cec2991ee03ed2ae19 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Thu, 2 May 2019 11:31:33 -0700 Subject: KVM: nVMX: KVM_SET_NESTED_STATE - Tear down old EVMCS state before setting new state Move call to nested_enable_evmcs until after free_nested() is complete. Signed-off-by: Aaron Lewis Reviewed-by: Marc Orr Reviewed-by: Peter Shier Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 04b40a98f60b..cec77f30f61c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5333,9 +5333,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->format != 0) return -EINVAL; - if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) - nested_enable_evmcs(vcpu, NULL); - if (!nested_vmx_allowed(vcpu)) return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL; @@ -5377,6 +5374,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->vmx.vmxon_pa == -1ull) return 0; + if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) + nested_enable_evmcs(vcpu, NULL); + vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa; ret = enter_vmx_operation(vcpu); if (ret) -- cgit v1.2.3-58-ga151 From 9b5db6c762adcca120667b683af55700ffd7ae29 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Thu, 2 May 2019 11:31:25 -0700 Subject: kvm: nVMX: Set nested_run_pending in vmx_set_nested_state after checks complete nested_run_pending=1 implies we have successfully entered guest mode. Move setting from external state in vmx_set_nested_state() until after all other checks are complete. Based on a patch by Aaron Lewis. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index cec77f30f61c..e58caff92694 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5420,9 +5420,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) return 0; - vmx->nested.nested_run_pending = - !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); - if (nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); @@ -5446,9 +5443,14 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; vmx->nested.dirty_vmcs12 = true; + vmx->nested.nested_run_pending = + !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); + ret = nested_vmx_enter_non_root_mode(vcpu, false); - if (ret) + if (ret) { + vmx->nested.nested_run_pending = 0; return -EINVAL; + } return 0; } -- cgit v1.2.3-58-ga151 From d69129b4e46a7b61dc956af038d143eb791f22c7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 8 May 2019 07:32:15 -0700 Subject: KVM: nVMX: Disable intercept for FS/GS base MSRs in vmcs02 when possible If L1 is using an MSR bitmap, unconditionally merge the MSR bitmaps from L0 and L1 for MSR_{KERNEL,}_{FS,GS}_BASE. KVM unconditionally exposes MSRs L1. If KVM is also running in L1 then it's highly likely L1 is also exposing the MSRs to L2, i.e. KVM doesn't need to intercept L2 accesses. Based on code from Jintack Lim. Cc: Jintack Lim Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e58caff92694..f4b1ae439763 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -514,31 +514,11 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map; - /* - * pred_cmd & spec_ctrl are trying to verify two things: - * - * 1. L0 gave a permission to L1 to actually passthrough the MSR. This - * ensures that we do not accidentally generate an L02 MSR bitmap - * from the L12 MSR bitmap that is too permissive. - * 2. That L1 or L2s have actually used the MSR. This avoids - * unnecessarily merging of the bitmap if the MSR is unused. This - * works properly because we only update the L01 MSR bitmap lazily. - * So even if L0 should pass L1 these MSRs, the L01 bitmap is only - * updated to reflect this when L1 (or its L2s) actually write to - * the MSR. - */ - bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); - bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); - /* Nothing to do if the MSR bitmap is not in use. */ if (!cpu_has_vmx_msr_bitmap() || !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) return false; - if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && - !pred_cmd && !spec_ctrl) - return false; - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map)) return false; @@ -583,13 +563,36 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, } } - if (spec_ctrl) + /* KVM unconditionally exposes the FS/GS base MSRs to L1. */ + nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, + MSR_FS_BASE, MSR_TYPE_RW); + + nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, + MSR_GS_BASE, MSR_TYPE_RW); + + nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, + MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + + /* + * Checking the L0->L1 bitmap is trying to verify two things: + * + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This + * ensures that we do not accidentally generate an L02 MSR bitmap + * from the L12 MSR bitmap that is too permissive. + * 2. That L1 or L2s have actually used the MSR. This avoids + * unnecessarily merging of the bitmap if the MSR is unused. This + * works properly because we only update the L01 MSR bitmap lazily. + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only + * updated to reflect this when L1 (or its L2s) actually write to + * the MSR. + */ + if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL)) nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, MSR_IA32_SPEC_CTRL, MSR_TYPE_R | MSR_TYPE_W); - if (pred_cmd) + if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD)) nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, MSR_IA32_PRED_CMD, -- cgit v1.2.3-58-ga151 From 61455bf26236e7f3d72705382a6437fdfd1bd0af Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 3 May 2019 01:40:25 -0700 Subject: kvm: x86: Fix L1TF mitigation for shadow MMU Currently KVM sets 5 most significant bits of physical address bits reported by CPUID (boot_cpu_data.x86_phys_bits) for nonpresent or reserved bits SPTE to mitigate L1TF attack from guest when using shadow MMU. However for some particular Intel CPUs the physical address bits of internal cache is greater than physical address bits reported by CPUID. Use the kernel's existing boot_cpu_data.x86_cache_bits to determine the five most significant bits. Doing so improves KVM's L1TF mitigation in the unlikely scenario that system RAM overlaps the high order bits of the "real" physical address space as reported by CPUID. This aligns with the kernel's warnings regarding L1TF mitigation, e.g. in the above scenario the kernel won't warn the user about lack of L1TF mitigation if x86_cache_bits is greater than x86_phys_bits. Also initialize shadow_nonpresent_or_rsvd_mask explicitly to make it consistent with other 'shadow_{xxx}_mask', and opportunistically add a WARN once if KVM's L1TF mitigation cannot be applied on a system that is marked as being susceptible to L1TF. Reviewed-by: Sean Christopherson Signed-off-by: Kai Huang Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b1f6451022e5..f7c2dee452a1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -488,16 +488,24 @@ static void kvm_mmu_reset_all_pte_masks(void) * If the CPU has 46 or less physical address bits, then set an * appropriate mask to guard against L1TF attacks. Otherwise, it is * assumed that the CPU is not vulnerable to L1TF. + * + * Some Intel CPUs address the L1 cache using more PA bits than are + * reported by CPUID. Use the PA width of the L1 cache when possible + * to achieve more effective mitigation, e.g. if system RAM overlaps + * the most significant bits of legal physical address space. */ - low_phys_bits = boot_cpu_data.x86_phys_bits; - if (boot_cpu_data.x86_phys_bits < + shadow_nonpresent_or_rsvd_mask = 0; + low_phys_bits = boot_cpu_data.x86_cache_bits; + if (boot_cpu_data.x86_cache_bits < 52 - shadow_nonpresent_or_rsvd_mask_len) { shadow_nonpresent_or_rsvd_mask = - rsvd_bits(boot_cpu_data.x86_phys_bits - + rsvd_bits(boot_cpu_data.x86_cache_bits - shadow_nonpresent_or_rsvd_mask_len, - boot_cpu_data.x86_phys_bits - 1); + boot_cpu_data.x86_cache_bits - 1); low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; - } + } else + WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF)); + shadow_nonpresent_or_rsvd_lower_gfn_mask = GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); } -- cgit v1.2.3-58-ga151 From f93f7ede087f2edcc18e4b02310df5749a6b5a61 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 8 May 2019 09:08:19 -0700 Subject: Revert "KVM: nVMX: Expose RDPMC-exiting only when guest supports PMU" The RDPMC-exiting control is dependent on the existence of the RDPMC instruction itself, i.e. is not tied to the "Architectural Performance Monitoring" feature. For all intents and purposes, the control exists on all CPUs with VMX support since RDPMC also exists on all VCPUs with VMX supported. Per Intel's SDM: The RDPMC instruction was introduced into the IA-32 Architecture in the Pentium Pro processor and the Pentium processor with MMX technology. The earlier Pentium processors have performance-monitoring counters, but they must be read with the RDMSR instruction. Because RDPMC-exiting always exists, KVM requires the control and refuses to load if it's not available. As a result, hiding the PMU from a guest breaks nested virtualization if the guest attemts to use KVM. While it's not explicitly stated in the RDPMC pseudocode, the VM-Exit check for RDPMC-exiting follows standard fault vs. VM-Exit prioritization for privileged instructions, e.g. occurs after the CPL/CR0.PE/CR4.PCE checks, but before the counter referenced in ECX is checked for validity. In other words, the original KVM behavior of injecting a #GP was correct, and the KVM unit test needs to be adjusted accordingly, e.g. eat the #GP when the unit test guest (L3 in this case) executes RDPMC without RDPMC-exiting set in the unit test host (L2). This reverts commit e51bfdb68725dc052d16241ace40ea3140f938aa. Fixes: e51bfdb68725 ("KVM: nVMX: Expose RDPMC-exiting only when guest supports PMU") Reported-by: David Hill Cc: Saar Amar Cc: Mihai Carabas Cc: Jim Mattson Cc: Liran Alon Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 60306f19105d..0db7ded18951 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6866,30 +6866,6 @@ static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) } } -static bool guest_cpuid_has_pmu(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *entry; - union cpuid10_eax eax; - - entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); - if (!entry) - return false; - - eax.full = entry->eax; - return (eax.split.version_id > 0); -} - -static void nested_vmx_procbased_ctls_update(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - bool pmu_enabled = guest_cpuid_has_pmu(vcpu); - - if (pmu_enabled) - vmx->nested.msrs.procbased_ctls_high |= CPU_BASED_RDPMC_EXITING; - else - vmx->nested.msrs.procbased_ctls_high &= ~CPU_BASED_RDPMC_EXITING; -} - static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6978,7 +6954,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (nested_vmx_allowed(vcpu)) { nested_vmx_cr_fixed1_bits_update(vcpu); nested_vmx_entry_exit_ctls_update(vcpu); - nested_vmx_procbased_ctls_update(vcpu); } if (boot_cpu_has(X86_FEATURE_INTEL_PT) && -- cgit v1.2.3-58-ga151