diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-12 14:34:06 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-12 14:34:06 -0800 |
commit | 3370b69eb0c1f6a05f9051e8fc3e8768461a80f7 (patch) | |
tree | 722a75d9d3cc605bfd513752054dea1df806385d /arch/x86/kvm/x86.c | |
parent | be23c9d20b341a58ad7107f9e9aa5735cea3da13 (diff) | |
parent | 370289756beccdc2a6ef9c7998fe67a7ca68bfc2 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull second batch of kvm updates from Paolo Bonzini:
"Four changes:
- x86: work around two nasty cases where a benign exception occurs
while another is being delivered. The endless stream of exceptions
causes an infinite loop in the processor, which not even NMIs or
SMIs can interrupt; in the virt case, there is no possibility to
exit to the host either.
- x86: support for Skylake per-guest TSC rate. Long supported by
AMD, the patches mostly move things from there to common
arch/x86/kvm/ code.
- generic: remove local_irq_save/restore from the guest entry and
exit paths when context tracking is enabled. The patches are a few
months old, but we discussed them again at kernel summit. Andy
will pick up from here and, in 4.5, try to remove it from the user
entry/exit paths.
- PPC: Two bug fixes, see merge commit 370289756becc for details"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (21 commits)
KVM: x86: rename update_db_bp_intercept to update_bp_intercept
KVM: svm: unconditionally intercept #DB
KVM: x86: work around infinite loop in microcode when #AC is delivered
context_tracking: avoid irq_save/irq_restore on guest entry and exit
context_tracking: remove duplicate enabled check
KVM: VMX: Dump TSC multiplier in dump_vmcs()
KVM: VMX: Use a scaled host TSC for guest readings of MSR_IA32_TSC
KVM: VMX: Setup TSC scaling ratio when a vcpu is loaded
KVM: VMX: Enable and initialize VMX TSC scaling
KVM: x86: Use the correct vcpu's TSC rate to compute time scale
KVM: x86: Move TSC scaling logic out of call-back read_l1_tsc()
KVM: x86: Move TSC scaling logic out of call-back adjust_tsc_offset()
KVM: x86: Replace call-back compute_tsc_offset() with a common function
KVM: x86: Replace call-back set_tsc_khz() with a common function
KVM: x86: Add a common TSC scaling function
KVM: x86: Add a common TSC scaling ratio field in kvm_vcpu_arch
KVM: x86: Collect information for setting TSC scaling ratio
KVM: x86: declare a few variables as __read_mostly
KVM: x86: merge handle_mmio_page_fault and handle_mmio_page_fault_common
KVM: PPC: Book3S HV: Don't dynamically split core when already split
...
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 159 |
1 files changed, 137 insertions, 22 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4a6eff166fc6..00462bd63129 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -93,10 +93,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); -struct kvm_x86_ops *kvm_x86_ops; +struct kvm_x86_ops *kvm_x86_ops __read_mostly; EXPORT_SYMBOL_GPL(kvm_x86_ops); -static bool ignore_msrs = 0; +static bool __read_mostly ignore_msrs = 0; module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); unsigned int min_timer_period_us = 500; @@ -105,20 +105,25 @@ module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); static bool __read_mostly kvmclock_periodic_sync = true; module_param(kvmclock_periodic_sync, bool, S_IRUGO); -bool kvm_has_tsc_control; +bool __read_mostly kvm_has_tsc_control; EXPORT_SYMBOL_GPL(kvm_has_tsc_control); -u32 kvm_max_guest_tsc_khz; +u32 __read_mostly kvm_max_guest_tsc_khz; EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); +u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits; +EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits); +u64 __read_mostly kvm_max_tsc_scaling_ratio; +EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio); +static u64 __read_mostly kvm_default_tsc_scaling_ratio; /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ -static u32 tsc_tolerance_ppm = 250; +static u32 __read_mostly tsc_tolerance_ppm = 250; module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); /* lapic timer advance (tscdeadline mode only) in nanoseconds */ -unsigned int lapic_timer_advance_ns = 0; +unsigned int __read_mostly lapic_timer_advance_ns = 0; module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); -static bool backwards_tsc_observed = false; +static bool __read_mostly backwards_tsc_observed = false; #define KVM_NR_SHARED_MSRS 16 @@ -1249,14 +1254,53 @@ static u32 adjust_tsc_khz(u32 khz, s32 ppm) return v; } -static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) +static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) +{ + u64 ratio; + + /* Guest TSC same frequency as host TSC? */ + if (!scale) { + vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; + return 0; + } + + /* TSC scaling supported? */ + if (!kvm_has_tsc_control) { + if (user_tsc_khz > tsc_khz) { + vcpu->arch.tsc_catchup = 1; + vcpu->arch.tsc_always_catchup = 1; + return 0; + } else { + WARN(1, "user requested TSC rate below hardware speed\n"); + return -1; + } + } + + /* TSC scaling required - calculate ratio */ + ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits, + user_tsc_khz, tsc_khz); + + if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) { + WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); + return -1; + } + + vcpu->arch.tsc_scaling_ratio = ratio; + return 0; +} + +static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) { u32 thresh_lo, thresh_hi; int use_scaling = 0; /* tsc_khz can be zero if TSC calibration fails */ - if (this_tsc_khz == 0) - return; + if (this_tsc_khz == 0) { + /* set tsc_scaling_ratio to a safe value */ + vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; + return -1; + } /* Compute a scale to convert nanoseconds in TSC cycles */ kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, @@ -1276,7 +1320,7 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi); use_scaling = 1; } - kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling); + return set_tsc_khz(vcpu, this_tsc_khz, use_scaling); } static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) @@ -1322,6 +1366,48 @@ static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; } +/* + * Multiply tsc by a fixed point number represented by ratio. + * + * The most significant 64-N bits (mult) of ratio represent the + * integral part of the fixed point number; the remaining N bits + * (frac) represent the fractional part, ie. ratio represents a fixed + * point number (mult + frac * 2^(-N)). + * + * N equals to kvm_tsc_scaling_ratio_frac_bits. + */ +static inline u64 __scale_tsc(u64 ratio, u64 tsc) +{ + return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits); +} + +u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) +{ + u64 _tsc = tsc; + u64 ratio = vcpu->arch.tsc_scaling_ratio; + + if (ratio != kvm_default_tsc_scaling_ratio) + _tsc = __scale_tsc(ratio, tsc); + + return _tsc; +} +EXPORT_SYMBOL_GPL(kvm_scale_tsc); + +static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + u64 tsc; + + tsc = kvm_scale_tsc(vcpu, rdtsc()); + + return target_tsc - tsc; +} + +u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) +{ + return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc)); +} +EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); + void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) { struct kvm *kvm = vcpu->kvm; @@ -1333,7 +1419,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) u64 data = msr->data; raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); - offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); + offset = kvm_compute_tsc_offset(vcpu, data); ns = get_kernel_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; @@ -1390,7 +1476,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) } else { u64 delta = nsec_to_cycles(vcpu, elapsed); data += delta; - offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); + offset = kvm_compute_tsc_offset(vcpu, data); pr_debug("kvm: adjusted tsc offset by %llu\n", delta); } matched = true; @@ -1447,6 +1533,20 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) EXPORT_SYMBOL_GPL(kvm_write_tsc); +static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, + s64 adjustment) +{ + kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); +} + +static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) +{ + if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio) + WARN_ON(adjustment < 0); + adjustment = kvm_scale_tsc(vcpu, (u64) adjustment); + kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); +} + #ifdef CONFIG_X86_64 static cycle_t read_tsc(void) @@ -1608,7 +1708,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) static int kvm_guest_time_update(struct kvm_vcpu *v) { - unsigned long flags, this_tsc_khz; + unsigned long flags, this_tsc_khz, tgt_tsc_khz; struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_arch *ka = &v->kvm->arch; s64 kernel_ns; @@ -1645,7 +1745,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kernel_ns = get_kernel_ns(); } - tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc); + tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); /* * We may have to catch up the TSC to match elapsed wall clock @@ -1671,7 +1771,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) return 0; if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { - kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, + tgt_tsc_khz = kvm_has_tsc_control ? + vcpu->virtual_tsc_khz : this_tsc_khz; + kvm_get_time_scale(NSEC_PER_SEC / 1000, tgt_tsc_khz, &vcpu->hv_clock.tsc_shift, &vcpu->hv_clock.tsc_to_system_mul); vcpu->hw_tsc_khz = this_tsc_khz; @@ -2617,7 +2719,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); if (check_tsc_unstable()) { - u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu, + u64 offset = kvm_compute_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); kvm_x86_ops->write_tsc_offset(vcpu, offset); vcpu->arch.tsc_catchup = 1; @@ -3319,9 +3421,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (user_tsc_khz == 0) user_tsc_khz = tsc_khz; - kvm_set_tsc_khz(vcpu, user_tsc_khz); + if (!kvm_set_tsc_khz(vcpu, user_tsc_khz)) + r = 0; - r = 0; goto out; } case KVM_GET_TSC_KHZ: { @@ -6452,8 +6554,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (hw_breakpoint_active()) hw_breakpoint_restore(); - vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, - rdtsc()); + vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); @@ -7015,7 +7116,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, */ kvm_set_rflags(vcpu, rflags); - kvm_x86_ops->update_db_bp_intercept(vcpu); + kvm_x86_ops->update_bp_intercept(vcpu); r = 0; @@ -7364,6 +7465,20 @@ int kvm_arch_hardware_setup(void) if (r != 0) return r; + if (kvm_has_tsc_control) { + /* + * Make sure the user can only configure tsc_khz values that + * fit into a signed integer. + * A min value is not calculated needed because it will always + * be 1 on all machines. + */ + u64 max = min(0x7fffffffULL, + __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz)); + kvm_max_guest_tsc_khz = max; + + kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits; + } + kvm_init_msr_list(); return 0; } |