summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-06 10:49:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-06 10:49:01 -0700
commit6218590bcb452c3da7517d02b588d4d0a8628f73 (patch)
tree8b6a285052ac999e0e36e04f0c1e6bbfb46e84c4 /arch/x86/kvm/x86.c
parent14986a34e1289424811443a524cdd9e1688c7913 (diff)
parentd9ab710b85310e4ba9295f2b494eda54cf1a355a (diff)
Merge tag 'kvm-4.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Radim Krčmář: "All architectures: - move `make kvmconfig` stubs from x86 - use 64 bits for debugfs stats ARM: - Important fixes for not using an in-kernel irqchip - handle SError exceptions and present them to guests if appropriate - proxying of GICV access at EL2 if guest mappings are unsafe - GICv3 on AArch32 on ARMv8 - preparations for GICv3 save/restore, including ABI docs - cleanups and a bit of optimizations MIPS: - A couple of fixes in preparation for supporting MIPS EVA host kernels - MIPS SMP host & TLB invalidation fixes PPC: - Fix the bug which caused guests to falsely report lockups - other minor fixes - a small optimization s390: - Lazy enablement of runtime instrumentation - up to 255 CPUs for nested guests - rework of machine check deliver - cleanups and fixes x86: - IOMMU part of AMD's AVIC for vmexit-less interrupt delivery - Hyper-V TSC page - per-vcpu tsc_offset in debugfs - accelerated INS/OUTS in nVMX - cleanups and fixes" * tag 'kvm-4.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (140 commits) KVM: MIPS: Drop dubious EntryHi optimisation KVM: MIPS: Invalidate TLB by regenerating ASIDs KVM: MIPS: Split kernel/user ASID regeneration KVM: MIPS: Drop other CPU ASIDs on guest MMU changes KVM: arm/arm64: vgic: Don't flush/sync without a working vgic KVM: arm64: Require in-kernel irqchip for PMU support KVM: PPC: Book3s PR: Allow access to unprivileged MMCR2 register KVM: PPC: Book3S PR: Support 64kB page size on POWER8E and POWER8NVL KVM: PPC: Book3S: Remove duplicate setting of the B field in tlbie KVM: PPC: BookE: Fix a sanity check KVM: PPC: Book3S HV: Take out virtual core piggybacking code KVM: PPC: Book3S: Treat VTB as a per-subcore register, not per-thread ARM: gic-v3: Work around definition of gic_write_bpr1 KVM: nVMX: Fix the NMI IDT-vectoring handling KVM: VMX: Enable MSR-BASED TPR shadow even if APICv is inactive KVM: nVMX: Fix reload apic access page warning kvmconfig: add virtio-gpu to config fragment config: move x86 kvm_guest.config to a common location arm64: KVM: Remove duplicating init code for setting VMID ARM: KVM: Support vgic-v3 ...
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c171
1 files changed, 105 insertions, 66 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 699f8726539a..6c633de84dd7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1367,7 +1367,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
{
- u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
+ u64 curr_offset = vcpu->arch.tsc_offset;
vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
}
@@ -1413,6 +1413,12 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
}
EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
+static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+{
+ kvm_x86_ops->write_tsc_offset(vcpu, offset);
+ vcpu->arch.tsc_offset = offset;
+}
+
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
struct kvm *kvm = vcpu->kvm;
@@ -1425,7 +1431,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_compute_tsc_offset(vcpu, data);
- ns = get_kernel_ns();
+ ns = ktime_get_boot_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;
if (vcpu->arch.virtual_tsc_khz) {
@@ -1522,7 +1528,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
update_ia32_tsc_adjust_msr(vcpu, offset);
- kvm_x86_ops->write_tsc_offset(vcpu, offset);
+ kvm_vcpu_write_tsc_offset(vcpu, offset);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
@@ -1716,6 +1722,88 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
#endif
}
+static u64 __get_kvmclock_ns(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0);
+ struct kvm_arch *ka = &kvm->arch;
+ s64 ns;
+
+ if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) {
+ u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+ ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc);
+ } else {
+ ns = ktime_get_boot_ns() + ka->kvmclock_offset;
+ }
+
+ return ns;
+}
+
+u64 get_kvmclock_ns(struct kvm *kvm)
+{
+ unsigned long flags;
+ s64 ns;
+
+ local_irq_save(flags);
+ ns = __get_kvmclock_ns(kvm);
+ local_irq_restore(flags);
+
+ return ns;
+}
+
+static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
+{
+ struct kvm_vcpu_arch *vcpu = &v->arch;
+ struct pvclock_vcpu_time_info guest_hv_clock;
+
+ if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
+ &guest_hv_clock, sizeof(guest_hv_clock))))
+ return;
+
+ /* This VCPU is paused, but it's legal for a guest to read another
+ * VCPU's kvmclock, so we really have to follow the specification where
+ * it says that version is odd if data is being modified, and even after
+ * it is consistent.
+ *
+ * Version field updates must be kept separate. This is because
+ * kvm_write_guest_cached might use a "rep movs" instruction, and
+ * writes within a string instruction are weakly ordered. So there
+ * are three writes overall.
+ *
+ * As a small optimization, only write the version field in the first
+ * and third write. The vcpu->pv_time cache is still valid, because the
+ * version field is the first in the struct.
+ */
+ BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+
+ vcpu->hv_clock.version = guest_hv_clock.version + 1;
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+ &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock.version));
+
+ smp_wmb();
+
+ /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
+ vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
+
+ if (vcpu->pvclock_set_guest_stopped_request) {
+ vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
+ vcpu->pvclock_set_guest_stopped_request = false;
+ }
+
+ trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
+
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+ &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock));
+
+ smp_wmb();
+
+ vcpu->hv_clock.version++;
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+ &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock.version));
+}
+
static int kvm_guest_time_update(struct kvm_vcpu *v)
{
unsigned long flags, tgt_tsc_khz;
@@ -1723,7 +1811,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
struct kvm_arch *ka = &v->kvm->arch;
s64 kernel_ns;
u64 tsc_timestamp, host_tsc;
- struct pvclock_vcpu_time_info guest_hv_clock;
u8 pvclock_flags;
bool use_master_clock;
@@ -1752,7 +1839,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
}
if (!use_master_clock) {
host_tsc = rdtsc();
- kernel_ns = get_kernel_ns();
+ kernel_ns = ktime_get_boot_ns();
}
tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
@@ -1777,8 +1864,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
local_irq_restore(flags);
- if (!vcpu->pv_time_enabled)
- return 0;
+ /* With all the info we got, fill in the values */
if (kvm_has_tsc_control)
tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
@@ -1790,64 +1876,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hw_tsc_khz = tgt_tsc_khz;
}
- /* With all the info we got, fill in the values */
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
vcpu->last_guest_tsc = tsc_timestamp;
- if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
- &guest_hv_clock, sizeof(guest_hv_clock))))
- return 0;
-
- /* This VCPU is paused, but it's legal for a guest to read another
- * VCPU's kvmclock, so we really have to follow the specification where
- * it says that version is odd if data is being modified, and even after
- * it is consistent.
- *
- * Version field updates must be kept separate. This is because
- * kvm_write_guest_cached might use a "rep movs" instruction, and
- * writes within a string instruction are weakly ordered. So there
- * are three writes overall.
- *
- * As a small optimization, only write the version field in the first
- * and third write. The vcpu->pv_time cache is still valid, because the
- * version field is the first in the struct.
- */
- BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
-
- vcpu->hv_clock.version = guest_hv_clock.version + 1;
- kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
- &vcpu->hv_clock,
- sizeof(vcpu->hv_clock.version));
-
- smp_wmb();
-
- /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
- pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
-
- if (vcpu->pvclock_set_guest_stopped_request) {
- pvclock_flags |= PVCLOCK_GUEST_STOPPED;
- vcpu->pvclock_set_guest_stopped_request = false;
- }
-
/* If the host uses TSC clocksource, then it is stable */
+ pvclock_flags = 0;
if (use_master_clock)
pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
vcpu->hv_clock.flags = pvclock_flags;
- trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
-
- kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
- &vcpu->hv_clock,
- sizeof(vcpu->hv_clock));
-
- smp_wmb();
-
- vcpu->hv_clock.version++;
- kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
- &vcpu->hv_clock,
- sizeof(vcpu->hv_clock.version));
+ if (vcpu->pv_time_enabled)
+ kvm_setup_pvclock_page(v);
+ if (v == kvm_get_vcpu(v->kvm, 0))
+ kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
return 0;
}
@@ -2746,7 +2789,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (check_tsc_unstable()) {
u64 offset = kvm_compute_tsc_offset(vcpu,
vcpu->arch.last_guest_tsc);
- kvm_x86_ops->write_tsc_offset(vcpu, offset);
+ kvm_vcpu_write_tsc_offset(vcpu, offset);
vcpu->arch.tsc_catchup = 1;
}
if (kvm_lapic_hv_timer_in_use(vcpu) &&
@@ -4039,7 +4082,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
case KVM_SET_CLOCK: {
struct kvm_clock_data user_ns;
u64 now_ns;
- s64 delta;
r = -EFAULT;
if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
@@ -4051,10 +4093,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
local_irq_disable();
- now_ns = get_kernel_ns();
- delta = user_ns.clock - now_ns;
+ now_ns = __get_kvmclock_ns(kvm);
+ kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
local_irq_enable();
- kvm->arch.kvmclock_offset = delta;
kvm_gen_update_masterclock(kvm);
break;
}
@@ -4062,10 +4103,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
struct kvm_clock_data user_ns;
u64 now_ns;
- local_irq_disable();
- now_ns = get_kernel_ns();
- user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
- local_irq_enable();
+ now_ns = get_kvmclock_ns(kvm);
+ user_ns.clock = now_ns;
user_ns.flags = 0;
memset(&user_ns.pad, 0, sizeof(user_ns.pad));
@@ -6700,7 +6739,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_put_guest_xcr0(vcpu);
- /* Interrupt is enabled by handle_external_intr() */
kvm_x86_ops->handle_external_intr(vcpu);
++vcpu->stat.exits;
@@ -7530,7 +7568,7 @@ int kvm_arch_hardware_enable(void)
* before any KVM threads can be running. Unfortunately, we can't
* bring the TSCs fully up to date with real time, as we aren't yet far
* enough into CPU bringup that we know how much real time has actually
- * elapsed; our helper function, get_kernel_ns() will be using boot
+ * elapsed; our helper function, ktime_get_boot_ns() will be using boot
* variables that haven't been updated yet.
*
* So we simply find the maximum observed TSC above, then record the
@@ -7765,6 +7803,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
mutex_init(&kvm->arch.apic_map_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
+ kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
pvclock_update_vm_gtod_copy(kvm);
INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);