diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-06 10:49:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-06 10:49:01 -0700 |
commit | 6218590bcb452c3da7517d02b588d4d0a8628f73 (patch) | |
tree | 8b6a285052ac999e0e36e04f0c1e6bbfb46e84c4 /arch/x86/kvm/x86.c | |
parent | 14986a34e1289424811443a524cdd9e1688c7913 (diff) | |
parent | d9ab710b85310e4ba9295f2b494eda54cf1a355a (diff) |
Merge tag 'kvm-4.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Radim Krčmář:
"All architectures:
- move `make kvmconfig` stubs from x86
- use 64 bits for debugfs stats
ARM:
- Important fixes for not using an in-kernel irqchip
- handle SError exceptions and present them to guests if appropriate
- proxying of GICV access at EL2 if guest mappings are unsafe
- GICv3 on AArch32 on ARMv8
- preparations for GICv3 save/restore, including ABI docs
- cleanups and a bit of optimizations
MIPS:
- A couple of fixes in preparation for supporting MIPS EVA host
kernels
- MIPS SMP host & TLB invalidation fixes
PPC:
- Fix the bug which caused guests to falsely report lockups
- other minor fixes
- a small optimization
s390:
- Lazy enablement of runtime instrumentation
- up to 255 CPUs for nested guests
- rework of machine check deliver
- cleanups and fixes
x86:
- IOMMU part of AMD's AVIC for vmexit-less interrupt delivery
- Hyper-V TSC page
- per-vcpu tsc_offset in debugfs
- accelerated INS/OUTS in nVMX
- cleanups and fixes"
* tag 'kvm-4.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (140 commits)
KVM: MIPS: Drop dubious EntryHi optimisation
KVM: MIPS: Invalidate TLB by regenerating ASIDs
KVM: MIPS: Split kernel/user ASID regeneration
KVM: MIPS: Drop other CPU ASIDs on guest MMU changes
KVM: arm/arm64: vgic: Don't flush/sync without a working vgic
KVM: arm64: Require in-kernel irqchip for PMU support
KVM: PPC: Book3s PR: Allow access to unprivileged MMCR2 register
KVM: PPC: Book3S PR: Support 64kB page size on POWER8E and POWER8NVL
KVM: PPC: Book3S: Remove duplicate setting of the B field in tlbie
KVM: PPC: BookE: Fix a sanity check
KVM: PPC: Book3S HV: Take out virtual core piggybacking code
KVM: PPC: Book3S: Treat VTB as a per-subcore register, not per-thread
ARM: gic-v3: Work around definition of gic_write_bpr1
KVM: nVMX: Fix the NMI IDT-vectoring handling
KVM: VMX: Enable MSR-BASED TPR shadow even if APICv is inactive
KVM: nVMX: Fix reload apic access page warning
kvmconfig: add virtio-gpu to config fragment
config: move x86 kvm_guest.config to a common location
arm64: KVM: Remove duplicating init code for setting VMID
ARM: KVM: Support vgic-v3
...
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 171 |
1 files changed, 105 insertions, 66 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 699f8726539a..6c633de84dd7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1367,7 +1367,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) { - u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu); + u64 curr_offset = vcpu->arch.tsc_offset; vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; } @@ -1413,6 +1413,12 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) } EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); +static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) +{ + kvm_x86_ops->write_tsc_offset(vcpu, offset); + vcpu->arch.tsc_offset = offset; +} + void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) { struct kvm *kvm = vcpu->kvm; @@ -1425,7 +1431,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); offset = kvm_compute_tsc_offset(vcpu, data); - ns = get_kernel_ns(); + ns = ktime_get_boot_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; if (vcpu->arch.virtual_tsc_khz) { @@ -1522,7 +1528,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated) update_ia32_tsc_adjust_msr(vcpu, offset); - kvm_x86_ops->write_tsc_offset(vcpu, offset); + kvm_vcpu_write_tsc_offset(vcpu, offset); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); spin_lock(&kvm->arch.pvclock_gtod_sync_lock); @@ -1716,6 +1722,88 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) #endif } +static u64 __get_kvmclock_ns(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0); + struct kvm_arch *ka = &kvm->arch; + s64 ns; + + if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) { + u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc); + } else { + ns = ktime_get_boot_ns() + ka->kvmclock_offset; + } + + return ns; +} + +u64 get_kvmclock_ns(struct kvm *kvm) +{ + unsigned long flags; + s64 ns; + + local_irq_save(flags); + ns = __get_kvmclock_ns(kvm); + local_irq_restore(flags); + + return ns; +} + +static void kvm_setup_pvclock_page(struct kvm_vcpu *v) +{ + struct kvm_vcpu_arch *vcpu = &v->arch; + struct pvclock_vcpu_time_info guest_hv_clock; + + if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, + &guest_hv_clock, sizeof(guest_hv_clock)))) + return; + + /* This VCPU is paused, but it's legal for a guest to read another + * VCPU's kvmclock, so we really have to follow the specification where + * it says that version is odd if data is being modified, and even after + * it is consistent. + * + * Version field updates must be kept separate. This is because + * kvm_write_guest_cached might use a "rep movs" instruction, and + * writes within a string instruction are weakly ordered. So there + * are three writes overall. + * + * As a small optimization, only write the version field in the first + * and third write. The vcpu->pv_time cache is still valid, because the + * version field is the first in the struct. + */ + BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + + vcpu->hv_clock.version = guest_hv_clock.version + 1; + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); + + smp_wmb(); + + /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ + vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); + + if (vcpu->pvclock_set_guest_stopped_request) { + vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED; + vcpu->pvclock_set_guest_stopped_request = false; + } + + trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); + + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock)); + + smp_wmb(); + + vcpu->hv_clock.version++; + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); +} + static int kvm_guest_time_update(struct kvm_vcpu *v) { unsigned long flags, tgt_tsc_khz; @@ -1723,7 +1811,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) struct kvm_arch *ka = &v->kvm->arch; s64 kernel_ns; u64 tsc_timestamp, host_tsc; - struct pvclock_vcpu_time_info guest_hv_clock; u8 pvclock_flags; bool use_master_clock; @@ -1752,7 +1839,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) } if (!use_master_clock) { host_tsc = rdtsc(); - kernel_ns = get_kernel_ns(); + kernel_ns = ktime_get_boot_ns(); } tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); @@ -1777,8 +1864,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) local_irq_restore(flags); - if (!vcpu->pv_time_enabled) - return 0; + /* With all the info we got, fill in the values */ if (kvm_has_tsc_control) tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz); @@ -1790,64 +1876,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hw_tsc_khz = tgt_tsc_khz; } - /* With all the info we got, fill in the values */ vcpu->hv_clock.tsc_timestamp = tsc_timestamp; vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; vcpu->last_guest_tsc = tsc_timestamp; - if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, - &guest_hv_clock, sizeof(guest_hv_clock)))) - return 0; - - /* This VCPU is paused, but it's legal for a guest to read another - * VCPU's kvmclock, so we really have to follow the specification where - * it says that version is odd if data is being modified, and even after - * it is consistent. - * - * Version field updates must be kept separate. This is because - * kvm_write_guest_cached might use a "rep movs" instruction, and - * writes within a string instruction are weakly ordered. So there - * are three writes overall. - * - * As a small optimization, only write the version field in the first - * and third write. The vcpu->pv_time cache is still valid, because the - * version field is the first in the struct. - */ - BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); - - vcpu->hv_clock.version = guest_hv_clock.version + 1; - kvm_write_guest_cached(v->kvm, &vcpu->pv_time, - &vcpu->hv_clock, - sizeof(vcpu->hv_clock.version)); - - smp_wmb(); - - /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ - pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); - - if (vcpu->pvclock_set_guest_stopped_request) { - pvclock_flags |= PVCLOCK_GUEST_STOPPED; - vcpu->pvclock_set_guest_stopped_request = false; - } - /* If the host uses TSC clocksource, then it is stable */ + pvclock_flags = 0; if (use_master_clock) pvclock_flags |= PVCLOCK_TSC_STABLE_BIT; vcpu->hv_clock.flags = pvclock_flags; - trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); - - kvm_write_guest_cached(v->kvm, &vcpu->pv_time, - &vcpu->hv_clock, - sizeof(vcpu->hv_clock)); - - smp_wmb(); - - vcpu->hv_clock.version++; - kvm_write_guest_cached(v->kvm, &vcpu->pv_time, - &vcpu->hv_clock, - sizeof(vcpu->hv_clock.version)); + if (vcpu->pv_time_enabled) + kvm_setup_pvclock_page(v); + if (v == kvm_get_vcpu(v->kvm, 0)) + kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); return 0; } @@ -2746,7 +2789,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (check_tsc_unstable()) { u64 offset = kvm_compute_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); - kvm_x86_ops->write_tsc_offset(vcpu, offset); + kvm_vcpu_write_tsc_offset(vcpu, offset); vcpu->arch.tsc_catchup = 1; } if (kvm_lapic_hv_timer_in_use(vcpu) && @@ -4039,7 +4082,6 @@ long kvm_arch_vm_ioctl(struct file *filp, case KVM_SET_CLOCK: { struct kvm_clock_data user_ns; u64 now_ns; - s64 delta; r = -EFAULT; if (copy_from_user(&user_ns, argp, sizeof(user_ns))) @@ -4051,10 +4093,9 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; local_irq_disable(); - now_ns = get_kernel_ns(); - delta = user_ns.clock - now_ns; + now_ns = __get_kvmclock_ns(kvm); + kvm->arch.kvmclock_offset += user_ns.clock - now_ns; local_irq_enable(); - kvm->arch.kvmclock_offset = delta; kvm_gen_update_masterclock(kvm); break; } @@ -4062,10 +4103,8 @@ long kvm_arch_vm_ioctl(struct file *filp, struct kvm_clock_data user_ns; u64 now_ns; - local_irq_disable(); - now_ns = get_kernel_ns(); - user_ns.clock = kvm->arch.kvmclock_offset + now_ns; - local_irq_enable(); + now_ns = get_kvmclock_ns(kvm); + user_ns.clock = now_ns; user_ns.flags = 0; memset(&user_ns.pad, 0, sizeof(user_ns.pad)); @@ -6700,7 +6739,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_put_guest_xcr0(vcpu); - /* Interrupt is enabled by handle_external_intr() */ kvm_x86_ops->handle_external_intr(vcpu); ++vcpu->stat.exits; @@ -7530,7 +7568,7 @@ int kvm_arch_hardware_enable(void) * before any KVM threads can be running. Unfortunately, we can't * bring the TSCs fully up to date with real time, as we aren't yet far * enough into CPU bringup that we know how much real time has actually - * elapsed; our helper function, get_kernel_ns() will be using boot + * elapsed; our helper function, ktime_get_boot_ns() will be using boot * variables that haven't been updated yet. * * So we simply find the maximum observed TSC above, then record the @@ -7765,6 +7803,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_init(&kvm->arch.apic_map_lock); spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); + kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); pvclock_update_vm_gtod_copy(kvm); INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); |