summaryrefslogtreecommitdiff
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/coalesced_mmio.c31
-rw-r--r--virt/kvm/kvm_main.c281
2 files changed, 152 insertions, 160 deletions
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 1b90acb6e3fe..375d6285475e 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -40,27 +40,6 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
return 1;
}
-static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev, u32 last)
-{
- struct kvm_coalesced_mmio_ring *ring;
- unsigned avail;
-
- /* Are we able to batch it ? */
-
- /* last is the first free entry
- * check if we don't meet the first used entry
- * there is always one unused entry in the buffer
- */
- ring = dev->kvm->coalesced_mmio_ring;
- avail = (ring->first - last - 1) % KVM_COALESCED_MMIO_MAX;
- if (avail == 0) {
- /* full */
- return 0;
- }
-
- return 1;
-}
-
static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
struct kvm_io_device *this, gpa_t addr,
int len, const void *val)
@@ -74,9 +53,15 @@ static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
spin_lock(&dev->kvm->ring_lock);
+ /*
+ * last is the index of the entry to fill. Verify userspace hasn't
+ * set last to be out of range, and that there is room in the ring.
+ * Leave one entry free in the ring so that userspace can differentiate
+ * between an empty ring and a full ring.
+ */
insert = READ_ONCE(ring->last);
- if (!coalesced_mmio_has_room(dev, insert) ||
- insert >= KVM_COALESCED_MMIO_MAX) {
+ if (insert >= KVM_COALESCED_MMIO_MAX ||
+ (insert + 1) % KVM_COALESCED_MMIO_MAX == READ_ONCE(ring->first)) {
spin_unlock(&dev->kvm->ring_lock);
return -EOPNOTSUPP;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4f81366f8b61..05cbb2548d99 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -136,8 +136,8 @@ static int kvm_no_compat_open(struct inode *inode, struct file *file)
#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \
.open = kvm_no_compat_open
#endif
-static int hardware_enable_all(void);
-static void hardware_disable_all(void);
+static int kvm_enable_virtualization(void);
+static void kvm_disable_virtualization(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
@@ -1220,7 +1220,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
if (r)
goto out_err_no_arch_destroy_vm;
- r = hardware_enable_all();
+ r = kvm_enable_virtualization();
if (r)
goto out_err_no_disable;
@@ -1263,7 +1263,7 @@ out_no_coalesced_mmio:
mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
#endif
out_err_no_mmu_notifier:
- hardware_disable_all();
+ kvm_disable_virtualization();
out_err_no_disable:
kvm_arch_destroy_vm(kvm);
out_err_no_arch_destroy_vm:
@@ -1360,7 +1360,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
#endif
kvm_arch_free_vm(kvm);
preempt_notifier_dec();
- hardware_disable_all();
+ kvm_disable_virtualization();
mmdrop(mm);
}
@@ -3270,6 +3270,9 @@ static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
int r;
unsigned long addr;
+ if (WARN_ON_ONCE(offset + len > PAGE_SIZE))
+ return -EFAULT;
+
addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
@@ -3343,6 +3346,9 @@ static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
int r;
unsigned long addr;
+ if (WARN_ON_ONCE(offset + len > PAGE_SIZE))
+ return -EFAULT;
+
addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
@@ -3373,6 +3379,9 @@ static int __kvm_write_guest_page(struct kvm *kvm,
int r;
unsigned long addr;
+ if (WARN_ON_ONCE(offset + len > PAGE_SIZE))
+ return -EFAULT;
+
addr = gfn_to_hva_memslot(memslot, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
@@ -3576,7 +3585,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
int ret;
while ((seg = next_segment(len, offset)) != 0) {
- ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
+ ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, seg);
if (ret < 0)
return ret;
offset = 0;
@@ -5566,137 +5575,67 @@ static struct miscdevice kvm_dev = {
};
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+static bool enable_virt_at_load = true;
+module_param(enable_virt_at_load, bool, 0444);
+
__visible bool kvm_rebooting;
EXPORT_SYMBOL_GPL(kvm_rebooting);
-static DEFINE_PER_CPU(bool, hardware_enabled);
+static DEFINE_PER_CPU(bool, virtualization_enabled);
+static DEFINE_MUTEX(kvm_usage_lock);
static int kvm_usage_count;
-static int __hardware_enable_nolock(void)
+__weak void kvm_arch_enable_virtualization(void)
+{
+
+}
+
+__weak void kvm_arch_disable_virtualization(void)
+{
+
+}
+
+static int kvm_enable_virtualization_cpu(void)
{
- if (__this_cpu_read(hardware_enabled))
+ if (__this_cpu_read(virtualization_enabled))
return 0;
- if (kvm_arch_hardware_enable()) {
+ if (kvm_arch_enable_virtualization_cpu()) {
pr_info("kvm: enabling virtualization on CPU%d failed\n",
raw_smp_processor_id());
return -EIO;
}
- __this_cpu_write(hardware_enabled, true);
+ __this_cpu_write(virtualization_enabled, true);
return 0;
}
-static void hardware_enable_nolock(void *failed)
-{
- if (__hardware_enable_nolock())
- atomic_inc(failed);
-}
-
static int kvm_online_cpu(unsigned int cpu)
{
- int ret = 0;
-
/*
* Abort the CPU online process if hardware virtualization cannot
* be enabled. Otherwise running VMs would encounter unrecoverable
* errors when scheduled to this CPU.
*/
- mutex_lock(&kvm_lock);
- if (kvm_usage_count)
- ret = __hardware_enable_nolock();
- mutex_unlock(&kvm_lock);
- return ret;
+ return kvm_enable_virtualization_cpu();
}
-static void hardware_disable_nolock(void *junk)
+static void kvm_disable_virtualization_cpu(void *ign)
{
- /*
- * Note, hardware_disable_all_nolock() tells all online CPUs to disable
- * hardware, not just CPUs that successfully enabled hardware!
- */
- if (!__this_cpu_read(hardware_enabled))
+ if (!__this_cpu_read(virtualization_enabled))
return;
- kvm_arch_hardware_disable();
+ kvm_arch_disable_virtualization_cpu();
- __this_cpu_write(hardware_enabled, false);
+ __this_cpu_write(virtualization_enabled, false);
}
static int kvm_offline_cpu(unsigned int cpu)
{
- mutex_lock(&kvm_lock);
- if (kvm_usage_count)
- hardware_disable_nolock(NULL);
- mutex_unlock(&kvm_lock);
+ kvm_disable_virtualization_cpu(NULL);
return 0;
}
-static void hardware_disable_all_nolock(void)
-{
- BUG_ON(!kvm_usage_count);
-
- kvm_usage_count--;
- if (!kvm_usage_count)
- on_each_cpu(hardware_disable_nolock, NULL, 1);
-}
-
-static void hardware_disable_all(void)
-{
- cpus_read_lock();
- mutex_lock(&kvm_lock);
- hardware_disable_all_nolock();
- mutex_unlock(&kvm_lock);
- cpus_read_unlock();
-}
-
-static int hardware_enable_all(void)
-{
- atomic_t failed = ATOMIC_INIT(0);
- int r;
-
- /*
- * Do not enable hardware virtualization if the system is going down.
- * If userspace initiated a forced reboot, e.g. reboot -f, then it's
- * possible for an in-flight KVM_CREATE_VM to trigger hardware enabling
- * after kvm_reboot() is called. Note, this relies on system_state
- * being set _before_ kvm_reboot(), which is why KVM uses a syscore ops
- * hook instead of registering a dedicated reboot notifier (the latter
- * runs before system_state is updated).
- */
- if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF ||
- system_state == SYSTEM_RESTART)
- return -EBUSY;
-
- /*
- * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu()
- * is called, and so on_each_cpu() between them includes the CPU that
- * is being onlined. As a result, hardware_enable_nolock() may get
- * invoked before kvm_online_cpu(), which also enables hardware if the
- * usage count is non-zero. Disable CPU hotplug to avoid attempting to
- * enable hardware multiple times.
- */
- cpus_read_lock();
- mutex_lock(&kvm_lock);
-
- r = 0;
-
- kvm_usage_count++;
- if (kvm_usage_count == 1) {
- on_each_cpu(hardware_enable_nolock, &failed, 1);
-
- if (atomic_read(&failed)) {
- hardware_disable_all_nolock();
- r = -EBUSY;
- }
- }
-
- mutex_unlock(&kvm_lock);
- cpus_read_unlock();
-
- return r;
-}
-
static void kvm_shutdown(void)
{
/*
@@ -5712,34 +5651,32 @@ static void kvm_shutdown(void)
*/
pr_info("kvm: exiting hardware virtualization\n");
kvm_rebooting = true;
- on_each_cpu(hardware_disable_nolock, NULL, 1);
+ on_each_cpu(kvm_disable_virtualization_cpu, NULL, 1);
}
static int kvm_suspend(void)
{
/*
* Secondary CPUs and CPU hotplug are disabled across the suspend/resume
- * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count
- * is stable. Assert that kvm_lock is not held to ensure the system
- * isn't suspended while KVM is enabling hardware. Hardware enabling
- * can be preempted, but the task cannot be frozen until it has dropped
- * all locks (userspace tasks are frozen via a fake signal).
+ * callbacks, i.e. no need to acquire kvm_usage_lock to ensure the usage
+ * count is stable. Assert that kvm_usage_lock is not held to ensure
+ * the system isn't suspended while KVM is enabling hardware. Hardware
+ * enabling can be preempted, but the task cannot be frozen until it has
+ * dropped all locks (userspace tasks are frozen via a fake signal).
*/
- lockdep_assert_not_held(&kvm_lock);
+ lockdep_assert_not_held(&kvm_usage_lock);
lockdep_assert_irqs_disabled();
- if (kvm_usage_count)
- hardware_disable_nolock(NULL);
+ kvm_disable_virtualization_cpu(NULL);
return 0;
}
static void kvm_resume(void)
{
- lockdep_assert_not_held(&kvm_lock);
+ lockdep_assert_not_held(&kvm_usage_lock);
lockdep_assert_irqs_disabled();
- if (kvm_usage_count)
- WARN_ON_ONCE(__hardware_enable_nolock());
+ WARN_ON_ONCE(kvm_enable_virtualization_cpu());
}
static struct syscore_ops kvm_syscore_ops = {
@@ -5747,13 +5684,95 @@ static struct syscore_ops kvm_syscore_ops = {
.resume = kvm_resume,
.shutdown = kvm_shutdown,
};
+
+static int kvm_enable_virtualization(void)
+{
+ int r;
+
+ guard(mutex)(&kvm_usage_lock);
+
+ if (kvm_usage_count++)
+ return 0;
+
+ kvm_arch_enable_virtualization();
+
+ r = cpuhp_setup_state(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online",
+ kvm_online_cpu, kvm_offline_cpu);
+ if (r)
+ goto err_cpuhp;
+
+ register_syscore_ops(&kvm_syscore_ops);
+
+ /*
+ * Undo virtualization enabling and bail if the system is going down.
+ * If userspace initiated a forced reboot, e.g. reboot -f, then it's
+ * possible for an in-flight operation to enable virtualization after
+ * syscore_shutdown() is called, i.e. without kvm_shutdown() being
+ * invoked. Note, this relies on system_state being set _before_
+ * kvm_shutdown(), e.g. to ensure either kvm_shutdown() is invoked
+ * or this CPU observes the impending shutdown. Which is why KVM uses
+ * a syscore ops hook instead of registering a dedicated reboot
+ * notifier (the latter runs before system_state is updated).
+ */
+ if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF ||
+ system_state == SYSTEM_RESTART) {
+ r = -EBUSY;
+ goto err_rebooting;
+ }
+
+ return 0;
+
+err_rebooting:
+ unregister_syscore_ops(&kvm_syscore_ops);
+ cpuhp_remove_state(CPUHP_AP_KVM_ONLINE);
+err_cpuhp:
+ kvm_arch_disable_virtualization();
+ --kvm_usage_count;
+ return r;
+}
+
+static void kvm_disable_virtualization(void)
+{
+ guard(mutex)(&kvm_usage_lock);
+
+ if (--kvm_usage_count)
+ return;
+
+ unregister_syscore_ops(&kvm_syscore_ops);
+ cpuhp_remove_state(CPUHP_AP_KVM_ONLINE);
+ kvm_arch_disable_virtualization();
+}
+
+static int kvm_init_virtualization(void)
+{
+ if (enable_virt_at_load)
+ return kvm_enable_virtualization();
+
+ return 0;
+}
+
+static void kvm_uninit_virtualization(void)
+{
+ if (enable_virt_at_load)
+ kvm_disable_virtualization();
+}
#else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
-static int hardware_enable_all(void)
+static int kvm_enable_virtualization(void)
+{
+ return 0;
+}
+
+static int kvm_init_virtualization(void)
{
return 0;
}
-static void hardware_disable_all(void)
+static void kvm_disable_virtualization(void)
+{
+
+}
+
+static void kvm_uninit_virtualization(void)
{
}
@@ -6454,15 +6473,6 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
int r;
int cpu;
-#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
- r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online",
- kvm_online_cpu, kvm_offline_cpu);
- if (r)
- return r;
-
- register_syscore_ops(&kvm_syscore_ops);
-#endif
-
/* A kmem cache lets us meet the alignment requirements of fx_save. */
if (!vcpu_align)
vcpu_align = __alignof__(struct kvm_vcpu);
@@ -6473,10 +6483,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
offsetofend(struct kvm_vcpu, stats_id)
- offsetof(struct kvm_vcpu, arch),
NULL);
- if (!kvm_vcpu_cache) {
- r = -ENOMEM;
- goto err_vcpu_cache;
- }
+ if (!kvm_vcpu_cache)
+ return -ENOMEM;
for_each_possible_cpu(cpu) {
if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu),
@@ -6510,6 +6518,10 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
kvm_gmem_init(module);
+ r = kvm_init_virtualization();
+ if (r)
+ goto err_virt;
+
/*
* Registration _must_ be the very last thing done, as this exposes
* /dev/kvm to userspace, i.e. all infrastructure must be setup!
@@ -6523,6 +6535,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
return 0;
err_register:
+ kvm_uninit_virtualization();
+err_virt:
kvm_vfio_ops_exit();
err_vfio:
kvm_async_pf_deinit();
@@ -6533,11 +6547,6 @@ err_cpu_kick_mask:
for_each_possible_cpu(cpu)
free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
kmem_cache_destroy(kvm_vcpu_cache);
-err_vcpu_cache:
-#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
- unregister_syscore_ops(&kvm_syscore_ops);
- cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE);
-#endif
return r;
}
EXPORT_SYMBOL_GPL(kvm_init);
@@ -6553,16 +6562,14 @@ void kvm_exit(void)
*/
misc_deregister(&kvm_dev);
+ kvm_uninit_virtualization();
+
debugfs_remove_recursive(kvm_debugfs_dir);
for_each_possible_cpu(cpu)
free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
kmem_cache_destroy(kvm_vcpu_cache);
kvm_vfio_ops_exit();
kvm_async_pf_deinit();
-#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
- unregister_syscore_ops(&kvm_syscore_ops);
- cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE);
-#endif
kvm_irqfd_exit();
}
EXPORT_SYMBOL_GPL(kvm_exit);