summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-03-17 18:40:35 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-17 18:40:35 -0700
commitec0afc9311adcfb10b90e547c23250f63939f990 (patch)
tree2093d2668898a8a03f30acbfd5568e65b8c086b9 /arch/x86/kvm/x86.c
parent804f18536984939622ddca60ab6b25743e0ec68d (diff)
parent776e58ea3d3735f85678155398241d2513afa67a (diff)
Merge branch 'kvm-updates/2.6.39' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.39' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (55 commits) KVM: unbreak userspace that does not sets tss address KVM: MMU: cleanup pte write path KVM: MMU: introduce a common function to get no-dirty-logged slot KVM: fix rcu usage in init_rmode_* functions KVM: fix kvmclock regression due to missing clock update KVM: emulator: Fix permission checking in io permission bitmap KVM: emulator: Fix io permission checking for 64bit guest KVM: SVM: Load %gs earlier if CONFIG_X86_32_LAZY_GS=n KVM: x86: Remove useless regs_page pointer from kvm_lapic KVM: improve comment on rcu use in irqfd_deassign KVM: MMU: remove unused macros KVM: MMU: cleanup page alloc and free KVM: MMU: do not record gfn in kvm_mmu_pte_write KVM: MMU: move mmu pages calculated out of mmu lock KVM: MMU: set spte accessed bit properly KVM: MMU: fix kvm_mmu_slot_remove_write_access dropping intermediate W bits KVM: Start lock documentation KVM: better readability of efer_reserved_bits KVM: Clear async page fault hash after switching to real mode KVM: VMX: Initialize vm86 TSS only once. ...
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c153
1 files changed, 81 insertions, 72 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bcc0efce85bf..f1e4025f1ae2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -81,9 +81,10 @@
* - enable LME and LMA per default on 64 bit KVM
*/
#ifdef CONFIG_X86_64
-static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL;
+static
+u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
#else
-static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
+static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#endif
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
@@ -360,8 +361,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
void kvm_inject_nmi(struct kvm_vcpu *vcpu)
{
+ kvm_make_request(KVM_REQ_NMI, vcpu);
kvm_make_request(KVM_REQ_EVENT, vcpu);
- vcpu->arch.nmi_pending = 1;
}
EXPORT_SYMBOL_GPL(kvm_inject_nmi);
@@ -525,8 +526,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
kvm_x86_ops->set_cr0(vcpu, cr0);
- if ((cr0 ^ old_cr0) & X86_CR0_PG)
+ if ((cr0 ^ old_cr0) & X86_CR0_PG) {
kvm_clear_async_pf_completion_queue(vcpu);
+ kvm_async_pf_hash_reset(vcpu);
+ }
if ((cr0 ^ old_cr0) & update_bits)
kvm_mmu_reset_context(vcpu);
@@ -1017,7 +1020,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
unsigned long flags;
s64 sdiff;
- spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
+ raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = data - native_read_tsc();
ns = get_kernel_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;
@@ -1050,7 +1053,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
kvm->arch.last_tsc_write = data;
kvm->arch.last_tsc_offset = offset;
kvm_x86_ops->write_tsc_offset(vcpu, offset);
- spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
+ raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
/* Reset of TSC must disable overshoot protection below */
vcpu->arch.hv_clock.tsc_timestamp = 0;
@@ -1453,6 +1456,14 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
return 0;
}
+static void kvmclock_reset(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.time_page) {
+ kvm_release_page_dirty(vcpu->arch.time_page);
+ vcpu->arch.time_page = NULL;
+ }
+}
+
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
switch (msr) {
@@ -1510,10 +1521,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
break;
case MSR_KVM_SYSTEM_TIME_NEW:
case MSR_KVM_SYSTEM_TIME: {
- if (vcpu->arch.time_page) {
- kvm_release_page_dirty(vcpu->arch.time_page);
- vcpu->arch.time_page = NULL;
- }
+ kvmclock_reset(vcpu);
vcpu->arch.time = data;
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -1592,6 +1600,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
} else
return set_msr_hyperv(vcpu, msr, data);
break;
+ case MSR_IA32_BBL_CR_CTL3:
+ /* Drop writes to this legacy MSR -- see rdmsr
+ * counterpart for further detail.
+ */
+ pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
+ break;
default:
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
@@ -1846,6 +1860,19 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
} else
return get_msr_hyperv(vcpu, msr, pdata);
break;
+ case MSR_IA32_BBL_CR_CTL3:
+ /* This legacy MSR exists but isn't fully documented in current
+ * silicon. It is however accessed by winxp in very narrow
+ * scenarios where it sets bit #19, itself documented as
+ * a "reserved" bit. Best effort attempt to source coherent
+ * read data here should the balance of the register be
+ * interpreted by the guest:
+ *
+ * L2 cache control register 3: 64GB range, 256KB size,
+ * enabled, latency 0x1, configured
+ */
+ data = 0xbe702111;
+ break;
default:
if (!ignore_msrs) {
pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
@@ -2100,8 +2127,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (check_tsc_unstable()) {
kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
vcpu->arch.tsc_catchup = 1;
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
}
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
if (vcpu->cpu != cpu)
kvm_migrate_timers(vcpu);
vcpu->cpu = cpu;
@@ -2575,9 +2602,6 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
if (mce->status & MCI_STATUS_UC) {
if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
!kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
- printk(KERN_DEBUG "kvm: set_mce: "
- "injects mce exception while "
- "previous one is in progress!\n");
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return 0;
}
@@ -2648,8 +2672,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.interrupt.pending = events->interrupt.injected;
vcpu->arch.interrupt.nr = events->interrupt.nr;
vcpu->arch.interrupt.soft = events->interrupt.soft;
- if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
- kvm_pic_clear_isr_ack(vcpu->kvm);
if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
kvm_x86_ops->set_interrupt_shadow(vcpu,
events->interrupt.shadow);
@@ -4140,8 +4162,8 @@ static unsigned long emulator_get_cached_segment_base(int seg,
return get_segment_base(vcpu, seg);
}
-static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,
- struct kvm_vcpu *vcpu)
+static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3,
+ int seg, struct kvm_vcpu *vcpu)
{
struct kvm_segment var;
@@ -4154,6 +4176,10 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,
var.limit >>= 12;
set_desc_limit(desc, var.limit);
set_desc_base(desc, (unsigned long)var.base);
+#ifdef CONFIG_X86_64
+ if (base3)
+ *base3 = var.base >> 32;
+#endif
desc->type = var.type;
desc->s = var.s;
desc->dpl = var.dpl;
@@ -4166,8 +4192,8 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,
return true;
}
-static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg,
- struct kvm_vcpu *vcpu)
+static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3,
+ int seg, struct kvm_vcpu *vcpu)
{
struct kvm_segment var;
@@ -4175,6 +4201,9 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg,
kvm_get_segment(vcpu, &var, seg);
var.base = get_desc_base(desc);
+#ifdef CONFIG_X86_64
+ var.base |= ((u64)base3) << 32;
+#endif
var.limit = get_desc_limit(desc);
if (desc->g)
var.limit = (var.limit << 12) | 0xfff;
@@ -4390,41 +4419,16 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
vcpu->arch.emulate_ctxt.have_exception = false;
vcpu->arch.emulate_ctxt.perm_ok = false;
+ vcpu->arch.emulate_ctxt.only_vendor_specific_insn
+ = emulation_type & EMULTYPE_TRAP_UD;
+
r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len);
- if (r == X86EMUL_PROPAGATE_FAULT)
- goto done;
trace_kvm_emulate_insn_start(vcpu);
-
- /* Only allow emulation of specific instructions on #UD
- * (namely VMMCALL, sysenter, sysexit, syscall)*/
- if (emulation_type & EMULTYPE_TRAP_UD) {
- if (!c->twobyte)
- return EMULATE_FAIL;
- switch (c->b) {
- case 0x01: /* VMMCALL */
- if (c->modrm_mod != 3 || c->modrm_rm != 1)
- return EMULATE_FAIL;
- break;
- case 0x34: /* sysenter */
- case 0x35: /* sysexit */
- if (c->modrm_mod != 0 || c->modrm_rm != 0)
- return EMULATE_FAIL;
- break;
- case 0x05: /* syscall */
- if (c->modrm_mod != 0 || c->modrm_rm != 0)
- return EMULATE_FAIL;
- break;
- default:
- return EMULATE_FAIL;
- }
-
- if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
- return EMULATE_FAIL;
- }
-
++vcpu->stat.insn_emulation;
if (r) {
+ if (emulation_type & EMULTYPE_TRAP_UD)
+ return EMULATE_FAIL;
if (reexecute_instruction(vcpu, cr2))
return EMULATE_DONE;
if (emulation_type & EMULTYPE_SKIP)
@@ -4452,7 +4456,6 @@ restart:
return handle_emulation_failure(vcpu);
}
-done:
if (vcpu->arch.emulate_ctxt.have_exception) {
inject_emulated_exception(vcpu);
r = EMULATE_DONE;
@@ -4562,7 +4565,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
- spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->cpu != freq->cpu)
@@ -4572,7 +4575,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
send_ipi = 1;
}
}
- spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_lock);
if (freq->old < freq->new && send_ipi) {
/*
@@ -5185,6 +5188,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
r = 1;
goto out;
}
+ if (kvm_check_request(KVM_REQ_NMI, vcpu))
+ vcpu->arch.nmi_pending = true;
}
r = kvm_mmu_reload(vcpu);
@@ -5213,14 +5218,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_load_guest_fpu(vcpu);
kvm_load_guest_xcr0(vcpu);
- atomic_set(&vcpu->guest_mode, 1);
- smp_wmb();
+ vcpu->mode = IN_GUEST_MODE;
+
+ /* We should set ->mode before check ->requests,
+ * see the comment in make_all_cpus_request.
+ */
+ smp_mb();
local_irq_disable();
- if (!atomic_read(&vcpu->guest_mode) || vcpu->requests
+ if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
|| need_resched() || signal_pending(current)) {
- atomic_set(&vcpu->guest_mode, 0);
+ vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
local_irq_enable();
preempt_enable();
@@ -5256,7 +5265,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
- atomic_set(&vcpu->guest_mode, 0);
+ vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
local_irq_enable();
@@ -5574,7 +5583,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
int mmu_reset_needed = 0;
- int pending_vec, max_bits;
+ int pending_vec, max_bits, idx;
struct desc_ptr dt;
dt.size = sregs->idt.limit;
@@ -5603,10 +5612,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
if (sregs->cr4 & X86_CR4_OSXSAVE)
update_cpuid(vcpu);
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
if (!is_long_mode(vcpu) && is_pae(vcpu)) {
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
mmu_reset_needed = 1;
}
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
@@ -5617,8 +5629,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
if (pending_vec < max_bits) {
kvm_queue_interrupt(vcpu, pending_vec, false);
pr_debug("Set back pending irq %d\n", pending_vec);
- if (irqchip_in_kernel(vcpu->kvm))
- kvm_pic_clear_isr_ack(vcpu->kvm);
}
kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
@@ -5814,10 +5824,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.time_page) {
- kvm_release_page_dirty(vcpu->arch.time_page);
- vcpu->arch.time_page = NULL;
- }
+ kvmclock_reset(vcpu);
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
fx_free(vcpu);
@@ -5878,6 +5885,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
vcpu->arch.apf.msr_val = 0;
+ kvmclock_reset(vcpu);
+
kvm_clear_async_pf_completion_queue(vcpu);
kvm_async_pf_hash_reset(vcpu);
vcpu->arch.apf.halted = false;
@@ -6005,7 +6014,7 @@ int kvm_arch_init_vm(struct kvm *kvm)
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
- spin_lock_init(&kvm->arch.tsc_write_lock);
+ raw_spin_lock_init(&kvm->arch.tsc_write_lock);
return 0;
}
@@ -6103,7 +6112,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
int user_alloc)
{
- int npages = mem->memory_size >> PAGE_SHIFT;
+ int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
int ret;
@@ -6118,12 +6127,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
"failed to munmap memory\n");
}
+ if (!kvm->arch.n_requested_mmu_pages)
+ nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
+
spin_lock(&kvm->mmu_lock);
- if (!kvm->arch.n_requested_mmu_pages) {
- unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
+ if (nr_mmu_pages)
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
- }
-
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
spin_unlock(&kvm->mmu_lock);
}
@@ -6157,7 +6166,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
me = get_cpu();
if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
- if (atomic_xchg(&vcpu->guest_mode, 0))
+ if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE)
smp_send_reschedule(cpu);
put_cpu();
}