diff options
Diffstat (limited to 'arch/x86/kvm/vmx/nested.c')
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 365 |
1 files changed, 168 insertions, 197 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 6401eb7ef19c..1032f068f0b9 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -193,10 +193,8 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) if (!vmx->nested.hv_evmcs) return; - kunmap(vmx->nested.hv_evmcs_page); - kvm_release_page_dirty(vmx->nested.hv_evmcs_page); + kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true); vmx->nested.hv_evmcs_vmptr = -1ull; - vmx->nested.hv_evmcs_page = NULL; vmx->nested.hv_evmcs = NULL; } @@ -229,16 +227,9 @@ static void free_nested(struct kvm_vcpu *vcpu) kvm_release_page_dirty(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; } - if (vmx->nested.virtual_apic_page) { - kvm_release_page_dirty(vmx->nested.virtual_apic_page); - vmx->nested.virtual_apic_page = NULL; - } - if (vmx->nested.pi_desc_page) { - kunmap(vmx->nested.pi_desc_page); - kvm_release_page_dirty(vmx->nested.pi_desc_page); - vmx->nested.pi_desc_page = NULL; - vmx->nested.pi_desc = NULL; - } + kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); + kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); + vmx->nested.pi_desc = NULL; kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); @@ -519,39 +510,19 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { int msr; - struct page *page; unsigned long *msr_bitmap_l1; unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; - /* - * pred_cmd & spec_ctrl are trying to verify two things: - * - * 1. L0 gave a permission to L1 to actually passthrough the MSR. This - * ensures that we do not accidentally generate an L02 MSR bitmap - * from the L12 MSR bitmap that is too permissive. - * 2. That L1 or L2s have actually used the MSR. This avoids - * unnecessarily merging of the bitmap if the MSR is unused. This - * works properly because we only update the L01 MSR bitmap lazily. - * So even if L0 should pass L1 these MSRs, the L01 bitmap is only - * updated to reflect this when L1 (or its L2s) actually write to - * the MSR. - */ - bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); - bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); + struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map; /* Nothing to do if the MSR bitmap is not in use. */ if (!cpu_has_vmx_msr_bitmap() || !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) return false; - if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && - !pred_cmd && !spec_ctrl) - return false; - - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); - if (is_error_page(page)) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map)) return false; - msr_bitmap_l1 = (unsigned long *)kmap(page); + msr_bitmap_l1 = (unsigned long *)map->hva; /* * To keep the control flow simple, pay eight 8-byte writes (sixteen @@ -592,20 +563,42 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, } } - if (spec_ctrl) + /* KVM unconditionally exposes the FS/GS base MSRs to L1. */ + nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, + MSR_FS_BASE, MSR_TYPE_RW); + + nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, + MSR_GS_BASE, MSR_TYPE_RW); + + nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, + MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + + /* + * Checking the L0->L1 bitmap is trying to verify two things: + * + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This + * ensures that we do not accidentally generate an L02 MSR bitmap + * from the L12 MSR bitmap that is too permissive. + * 2. That L1 or L2s have actually used the MSR. This avoids + * unnecessarily merging of the bitmap if the MSR is unused. This + * works properly because we only update the L01 MSR bitmap lazily. + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only + * updated to reflect this when L1 (or its L2s) actually write to + * the MSR. + */ + if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL)) nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, MSR_IA32_SPEC_CTRL, MSR_TYPE_R | MSR_TYPE_W); - if (pred_cmd) + if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD)) nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, MSR_IA32_PRED_CMD, MSR_TYPE_W); - kunmap(page); - kvm_release_page_clean(page); + kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false); return true; } @@ -613,20 +606,20 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { + struct kvm_host_map map; struct vmcs12 *shadow; - struct page *page; if (!nested_cpu_has_shadow_vmcs(vmcs12) || vmcs12->vmcs_link_pointer == -1ull) return; shadow = get_shadow_vmcs12(vcpu); - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer); - memcpy(shadow, kmap(page), VMCS12_SIZE); + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)) + return; - kunmap(page); - kvm_release_page_clean(page); + memcpy(shadow, map.hva, VMCS12_SIZE); + kvm_vcpu_unmap(vcpu, &map, false); } static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu, @@ -930,7 +923,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { if (!nested_cr3_valid(vcpu, cr3)) { *entry_failure_code = ENTRY_FAIL_DEFAULT; - return 1; + return -EINVAL; } /* @@ -941,7 +934,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne !nested_ept) { if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) { *entry_failure_code = ENTRY_FAIL_PDPTE; - return 1; + return -EINVAL; } } } @@ -1794,13 +1787,11 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, nested_release_evmcs(vcpu); - vmx->nested.hv_evmcs_page = kvm_vcpu_gpa_to_page( - vcpu, assist_page.current_nested_vmcs); - - if (unlikely(is_error_page(vmx->nested.hv_evmcs_page))) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(assist_page.current_nested_vmcs), + &vmx->nested.hv_evmcs_map)) return 0; - vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page); + vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva; /* * Currently, KVM only supports eVMCS version 1 @@ -2373,19 +2364,19 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, */ if (vmx->emulation_required) { *entry_failure_code = ENTRY_FAIL_DEFAULT; - return 1; + return -EINVAL; } /* Shadow page tables on either EPT or shadow page tables. */ if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), entry_failure_code)) - return 1; + return -EINVAL; if (!enable_ept) vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; - kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); - kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); + kvm_rsp_write(vcpu, vmcs12->guest_rsp); + kvm_rip_write(vcpu, vmcs12->guest_rip); return 0; } @@ -2589,11 +2580,19 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, return 0; } -/* - * Checks related to Host Control Registers and MSRs - */ -static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) +static int nested_vmx_check_controls(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + if (nested_check_vm_execution_controls(vcpu, vmcs12) || + nested_check_vm_exit_controls(vcpu, vmcs12) || + nested_check_vm_entry_controls(vcpu, vmcs12)) + return -EINVAL; + + return 0; +} + +static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) { bool ia32e; @@ -2606,6 +2605,10 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)) return -EINVAL; + if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) && + !kvm_pat_valid(vmcs12->host_ia32_pat)) + return -EINVAL; + /* * If the load IA32_EFER VM-exit control is 1, bits reserved in the * IA32_EFER MSR must be 0 in the field for that register. In addition, @@ -2624,41 +2627,12 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, return 0; } -/* - * Checks related to Guest Non-register State - */ -static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) -{ - if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && - vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) - return -EINVAL; - - return 0; -} - -static int nested_vmx_check_vmentry_prereqs(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) -{ - if (nested_check_vm_execution_controls(vcpu, vmcs12) || - nested_check_vm_exit_controls(vcpu, vmcs12) || - nested_check_vm_entry_controls(vcpu, vmcs12)) - return VMXERR_ENTRY_INVALID_CONTROL_FIELD; - - if (nested_check_host_control_regs(vcpu, vmcs12)) - return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; - - if (nested_check_guest_non_reg_state(vmcs12)) - return VMXERR_ENTRY_INVALID_CONTROL_FIELD; - - return 0; -} - static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - int r; - struct page *page; + int r = 0; struct vmcs12 *shadow; + struct kvm_host_map map; if (vmcs12->vmcs_link_pointer == -1ull) return 0; @@ -2666,23 +2640,34 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)) return -EINVAL; - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer); - if (is_error_page(page)) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)) return -EINVAL; - r = 0; - shadow = kmap(page); + shadow = map.hva; + if (shadow->hdr.revision_id != VMCS12_REVISION || shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)) r = -EINVAL; - kunmap(page); - kvm_release_page_clean(page); + + kvm_vcpu_unmap(vcpu, &map, false); return r; } -static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12, - u32 *exit_qual) +/* + * Checks related to Guest Non-register State + */ +static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) +{ + if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && + vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) + return -EINVAL; + + return 0; +} + +static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12, + u32 *exit_qual) { bool ia32e; @@ -2690,11 +2675,15 @@ static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu, if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) || !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) - return 1; + return -EINVAL; + + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && + !kvm_pat_valid(vmcs12->guest_ia32_pat)) + return -EINVAL; if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) { *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR; - return 1; + return -EINVAL; } /* @@ -2713,13 +2702,16 @@ static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu, ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || ((vmcs12->guest_cr0 & X86_CR0_PG) && ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) - return 1; + return -EINVAL; } if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && - (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || - (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) - return 1; + (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || + (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) + return -EINVAL; + + if (nested_check_guest_non_reg_state(vmcs12)) + return -EINVAL; return 0; } @@ -2792,14 +2784,13 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) : "cc", "memory" ); - preempt_enable(); - if (vmx->msr_autoload.host.nr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); if (vmx->msr_autoload.guest.nr) vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); if (vm_fail) { + preempt_enable(); WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; @@ -2811,6 +2802,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) local_irq_enable(); if (hw_breakpoint_active()) set_debugreg(__this_cpu_read(cpu_dr7), 7); + preempt_enable(); /* * A non-failing VMEntry means we somehow entered guest mode with @@ -2832,6 +2824,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_host_map *map; struct page *page; u64 hpa; @@ -2864,20 +2857,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) } if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { - if (vmx->nested.virtual_apic_page) { /* shouldn't happen */ - kvm_release_page_dirty(vmx->nested.virtual_apic_page); - vmx->nested.virtual_apic_page = NULL; - } - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr); + map = &vmx->nested.virtual_apic_map; /* * If translation failed, VM entry will fail because * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull. */ - if (!is_error_page(page)) { - vmx->nested.virtual_apic_page = page; - hpa = page_to_phys(vmx->nested.virtual_apic_page); - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); + if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) { + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn)); } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) && nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) && !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { @@ -2898,26 +2885,15 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) } if (nested_cpu_has_posted_intr(vmcs12)) { - if (vmx->nested.pi_desc_page) { /* shouldn't happen */ - kunmap(vmx->nested.pi_desc_page); - kvm_release_page_dirty(vmx->nested.pi_desc_page); - vmx->nested.pi_desc_page = NULL; - vmx->nested.pi_desc = NULL; - vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull); + map = &vmx->nested.pi_desc_map; + + if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) { + vmx->nested.pi_desc = + (struct pi_desc *)(((void *)map->hva) + + offset_in_page(vmcs12->posted_intr_desc_addr)); + vmcs_write64(POSTED_INTR_DESC_ADDR, + pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr)); } - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr); - if (is_error_page(page)) - return; - vmx->nested.pi_desc_page = page; - vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page); - vmx->nested.pi_desc = - (struct pi_desc *)((void *)vmx->nested.pi_desc + - (unsigned long)(vmcs12->posted_intr_desc_addr & - (PAGE_SIZE - 1))); - vmcs_write64(POSTED_INTR_DESC_ADDR, - page_to_phys(vmx->nested.pi_desc_page) + - (unsigned long)(vmcs12->posted_intr_desc_addr & - (PAGE_SIZE - 1))); } if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, @@ -3000,7 +2976,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) return -1; } - if (nested_vmx_check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) + if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) goto vmentry_fail_vmexit; } @@ -3145,9 +3121,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS : VMXERR_VMRESUME_NONLAUNCHED_VMCS); - ret = nested_vmx_check_vmentry_prereqs(vcpu, vmcs12); - if (ret) - return nested_vmx_failValid(vcpu, ret); + if (nested_vmx_check_controls(vcpu, vmcs12)) + return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + + if (nested_vmx_check_host_state(vcpu, vmcs12)) + return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); /* * We're finally done with prerequisite checking, and can start with @@ -3310,11 +3288,12 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); if (max_irr != 256) { - vapic_page = kmap(vmx->nested.virtual_apic_page); + vapic_page = vmx->nested.virtual_apic_map.hva; + if (!vapic_page) + return; + __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page, &max_irr); - kunmap(vmx->nested.virtual_apic_page); - status = vmcs_read16(GUEST_INTR_STATUS); if ((u8)max_irr > ((u8)status & 0xff)) { status &= ~0xff; @@ -3425,8 +3404,8 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); - vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); - vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP); + vmcs12->guest_rsp = kvm_rsp_read(vcpu); + vmcs12->guest_rip = kvm_rip_read(vcpu); vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); @@ -3609,8 +3588,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); vmx_set_efer(vcpu, vcpu->arch.efer); - kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); - kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); + kvm_rsp_write(vcpu, vmcs12->host_rsp); + kvm_rip_write(vcpu, vmcs12->host_rip); vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); vmx_set_interrupt_shadow(vcpu, 0); @@ -3955,16 +3934,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, kvm_release_page_dirty(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; } - if (vmx->nested.virtual_apic_page) { - kvm_release_page_dirty(vmx->nested.virtual_apic_page); - vmx->nested.virtual_apic_page = NULL; - } - if (vmx->nested.pi_desc_page) { - kunmap(vmx->nested.pi_desc_page); - kvm_release_page_dirty(vmx->nested.pi_desc_page); - vmx->nested.pi_desc_page = NULL; - vmx->nested.pi_desc = NULL; - } + kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); + kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); + vmx->nested.pi_desc = NULL; /* * We are now running in L2, mmu_notifier will force to reload the @@ -4260,7 +4232,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) { int ret; gpa_t vmptr; - struct page *page; + uint32_t revision; struct vcpu_vmx *vmx = to_vmx(vcpu); const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; @@ -4306,21 +4278,13 @@ static int handle_vmon(struct kvm_vcpu *vcpu) * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case; * which replaces physical address width with 32 */ - if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) + if (!page_address_valid(vcpu, vmptr)) return nested_vmx_failInvalid(vcpu); - page = kvm_vcpu_gpa_to_page(vcpu, vmptr); - if (is_error_page(page)) + if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) || + revision != VMCS12_REVISION) return nested_vmx_failInvalid(vcpu); - if (*(u32 *)kmap(page) != VMCS12_REVISION) { - kunmap(page); - kvm_release_page_clean(page); - return nested_vmx_failInvalid(vcpu); - } - kunmap(page); - kvm_release_page_clean(page); - vmx->nested.vmxon_ptr = vmptr; ret = enter_vmx_operation(vcpu); if (ret) @@ -4377,7 +4341,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; - if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) + if (!page_address_valid(vcpu, vmptr)) return nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); @@ -4385,7 +4349,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) return nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER); - if (vmx->nested.hv_evmcs_page) { + if (vmx->nested.hv_evmcs_map.hva) { if (vmptr == vmx->nested.hv_evmcs_vmptr) nested_release_evmcs(vcpu); } else { @@ -4584,7 +4548,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; - if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) + if (!page_address_valid(vcpu, vmptr)) return nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); @@ -4597,11 +4561,10 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) return 1; if (vmx->nested.current_vmptr != vmptr) { + struct kvm_host_map map; struct vmcs12 *new_vmcs12; - struct page *page; - page = kvm_vcpu_gpa_to_page(vcpu, vmptr); - if (is_error_page(page)) { + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) { /* * Reads from an unbacked page return all 1s, * which means that the 32 bits located at the @@ -4611,12 +4574,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) return nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); } - new_vmcs12 = kmap(page); + + new_vmcs12 = map.hva; + if (new_vmcs12->hdr.revision_id != VMCS12_REVISION || (new_vmcs12->hdr.shadow_vmcs && !nested_cpu_has_vmx_shadow_vmcs(vcpu))) { - kunmap(page); - kvm_release_page_clean(page); + kvm_vcpu_unmap(vcpu, &map, false); return nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); } @@ -4628,8 +4592,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) * cached. */ memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); - kunmap(page); - kvm_release_page_clean(page); + kvm_vcpu_unmap(vcpu, &map, false); set_current_vmptr(vmx, vmptr); } @@ -4804,7 +4767,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - u32 index = vcpu->arch.regs[VCPU_REGS_RCX]; + u32 index = kvm_rcx_read(vcpu); u64 address; bool accessed_dirty; struct kvm_mmu *mmu = vcpu->arch.walk_mmu; @@ -4850,7 +4813,7 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12; - u32 function = vcpu->arch.regs[VCPU_REGS_RAX]; + u32 function = kvm_rax_read(vcpu); /* * VMFUNC is only supported for nested guests, but we always enable the @@ -4936,7 +4899,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, u32 exit_reason) { - u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; + u32 msr_index = kvm_rcx_read(vcpu); gpa_t bitmap; if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) @@ -5373,9 +5336,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->format != 0) return -EINVAL; - if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) - nested_enable_evmcs(vcpu, NULL); - if (!nested_vmx_allowed(vcpu)) return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL; @@ -5417,13 +5377,16 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->vmx.vmxon_pa == -1ull) return 0; + if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) + nested_enable_evmcs(vcpu, NULL); + vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa; ret = enter_vmx_operation(vcpu); if (ret) return ret; /* Empty 'VMXON' state is permitted */ - if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12)) + if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) return 0; if (kvm_state->vmx.vmcs_pa != -1ull) { @@ -5463,33 +5426,41 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, vmx->nested.nested_run_pending = !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); + ret = -EINVAL; if (nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); - if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12)) - return -EINVAL; + if (kvm_state->size < sizeof(*kvm_state) + VMCS12_SIZE + sizeof(*vmcs12)) + goto error_guest_mode; if (copy_from_user(shadow_vmcs12, user_kvm_nested_state->data + VMCS12_SIZE, - sizeof(*vmcs12))) - return -EFAULT; + sizeof(*vmcs12))) { + ret = -EFAULT; + goto error_guest_mode; + } if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION || !shadow_vmcs12->hdr.shadow_vmcs) - return -EINVAL; + goto error_guest_mode; } - if (nested_vmx_check_vmentry_prereqs(vcpu, vmcs12) || - nested_vmx_check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) - return -EINVAL; + if (nested_vmx_check_controls(vcpu, vmcs12) || + nested_vmx_check_host_state(vcpu, vmcs12) || + nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) + goto error_guest_mode; vmx->nested.dirty_vmcs12 = true; ret = nested_vmx_enter_non_root_mode(vcpu, false); if (ret) - return -EINVAL; + goto error_guest_mode; return 0; + +error_guest_mode: + vmx->nested.nested_run_pending = 0; + return ret; } void nested_vmx_vcpu_setup(void) |