diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-15 10:18:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-15 10:18:16 -0700 |
commit | e37a07e0c29cd2cef4633b1e6db5579cc99ba4cd (patch) | |
tree | d968ca38ebb196c1cf55aa83554623b326592cf1 /virt | |
parent | a80099a152d0719e2d8d750e07f4ffa991553d30 (diff) | |
parent | d3457c877b14aaee8c52923eedf05a3b78af0476 (diff) |
Merge tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more KVM updates from Radim Krčmář:
"Second batch of KVM updates for v4.13
Common:
- add uevents for VM creation/destruction
- annotate and properly access RCU-protected objects
s390:
- rename IOCTL added in the first v4.13 merge
x86:
- emulate VMLOAD VMSAVE feature in SVM
- support paravirtual asynchronous page fault while nested
- add Hyper-V userspace interfaces for better migration
- improve master clock corner cases
- extend internal error reporting after EPT misconfig
- correct single-stepping of emulated instructions in SVM
- handle MCE during VM entry
- fix nVMX VM entry checks and nVMX VMCS shadowing"
* tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits)
kvm: x86: hyperv: make VP_INDEX managed by userspace
KVM: async_pf: Let guest support delivery of async_pf from guest mode
KVM: async_pf: Force a nested vmexit if the injected #PF is async_pf
KVM: async_pf: Add L1 guest async_pf #PF vmexit handler
KVM: x86: Simplify kvm_x86_ops->queue_exception parameter list
kvm: x86: hyperv: add KVM_CAP_HYPERV_SYNIC2
KVM: x86: make backwards_tsc_observed a per-VM variable
KVM: trigger uevents when creating or destroying a VM
KVM: SVM: Enable Virtual VMLOAD VMSAVE feature
KVM: SVM: Add Virtual VMLOAD VMSAVE feature definition
KVM: SVM: Rename lbr_ctl field in the vmcb control area
KVM: SVM: Prepare for new bit definition in lbr_ctl
KVM: SVM: handle singlestep exception when skipping emulated instructions
KVM: x86: take slots_lock in kvm_free_pit
KVM: s390: Fix KVM_S390_GET_CMMA_BITS ioctl definition
kvm: vmx: Properly handle machine check during VM-entry
KVM: x86: update master clock before computing kvmclock_offset
kvm: nVMX: Shadow "high" parts of shadowed 64-bit VMCS fields
kvm: nVMX: Fix nested_vmx_check_msr_bitmap_controls
kvm: nVMX: Validate the I/O bitmaps on nested VM-entry
...
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/eventfd.c | 8 | ||||
-rw-r--r-- | virt/kvm/irqchip.c | 2 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 131 |
3 files changed, 115 insertions, 26 deletions
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 9120edf3c94b..f2ac53ab8243 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -825,7 +825,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm, if (ret < 0) goto unlock_fail; - kvm->buses[bus_idx]->ioeventfd_count++; + kvm_get_bus(kvm, bus_idx)->ioeventfd_count++; list_add_tail(&p->list, &kvm->ioeventfds); mutex_unlock(&kvm->slots_lock); @@ -848,6 +848,7 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, { struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; + struct kvm_io_bus *bus; int ret = -ENOENT; eventfd = eventfd_ctx_fdget(args->fd); @@ -870,8 +871,9 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - if (kvm->buses[bus_idx]) - kvm->buses[bus_idx]->ioeventfd_count--; + bus = kvm_get_bus(kvm, bus_idx); + if (bus) + bus->ioeventfd_count--; ioeventfd_release(p); ret = 0; break; diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 31e40c9e81df..b1286c4e0712 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -230,7 +230,7 @@ int kvm_set_irq_routing(struct kvm *kvm, } mutex_lock(&kvm->irq_lock); - old = kvm->irq_routing; + old = rcu_dereference_protected(kvm->irq_routing, 1); rcu_assign_pointer(kvm->irq_routing, new); kvm_irq_routing_update(kvm); kvm_arch_irq_routing_update(kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 19f0ecb9b93e..82987d457b8b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -130,6 +130,12 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; +#define KVM_EVENT_CREATE_VM 0 +#define KVM_EVENT_DESTROY_VM 1 +static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); +static unsigned long long kvm_createvm_count; +static unsigned long long kvm_active_vms; + bool kvm_is_reserved_pfn(kvm_pfn_t pfn) { if (pfn_valid(pfn)) @@ -187,12 +193,23 @@ static void ack_flush(void *_completed) { } +static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait) +{ + if (unlikely(!cpus)) + cpus = cpu_online_mask; + + if (cpumask_empty(cpus)) + return false; + + smp_call_function_many(cpus, ack_flush, NULL, wait); + return true; +} + bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) { int i, cpu, me; cpumask_var_t cpus; - bool called = true; - bool wait = req & KVM_REQUEST_WAIT; + bool called; struct kvm_vcpu *vcpu; zalloc_cpumask_var(&cpus, GFP_ATOMIC); @@ -207,14 +224,9 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) if (cpus != NULL && cpu != -1 && cpu != me && kvm_request_needs_ipi(vcpu, req)) - cpumask_set_cpu(cpu, cpus); + __cpumask_set_cpu(cpu, cpus); } - if (unlikely(cpus == NULL)) - smp_call_function_many(cpu_online_mask, ack_flush, NULL, wait); - else if (!cpumask_empty(cpus)) - smp_call_function_many(cpus, ack_flush, NULL, wait); - else - called = false; + called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT)); put_cpu(); free_cpumask_var(cpus); return called; @@ -293,7 +305,12 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) { - put_pid(vcpu->pid); + /* + * no need for rcu_read_lock as VCPU_RUN is the only place that + * will change the vcpu->pid pointer and on uninit all file + * descriptors are already gone. + */ + put_pid(rcu_dereference_protected(vcpu->pid, 1)); kvm_arch_vcpu_uninit(vcpu); free_page((unsigned long)vcpu->run); } @@ -674,8 +691,8 @@ static struct kvm *kvm_create_vm(unsigned long type) if (init_srcu_struct(&kvm->irq_srcu)) goto out_err_no_irq_srcu; for (i = 0; i < KVM_NR_BUSES; i++) { - kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), - GFP_KERNEL); + rcu_assign_pointer(kvm->buses[i], + kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL)); if (!kvm->buses[i]) goto out_err; } @@ -700,9 +717,10 @@ out_err_no_srcu: hardware_disable_all(); out_err_no_disable: for (i = 0; i < KVM_NR_BUSES; i++) - kfree(kvm->buses[i]); + kfree(rcu_access_pointer(kvm->buses[i])); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - kvm_free_memslots(kvm, kvm->memslots[i]); + kvm_free_memslots(kvm, + rcu_dereference_protected(kvm->memslots[i], 1)); kvm_arch_free_vm(kvm); mmdrop(current->mm); return ERR_PTR(r); @@ -728,6 +746,7 @@ static void kvm_destroy_vm(struct kvm *kvm) int i; struct mm_struct *mm = kvm->mm; + kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); kvm_destroy_vm_debugfs(kvm); kvm_arch_sync_events(kvm); spin_lock(&kvm_lock); @@ -735,8 +754,11 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { - if (kvm->buses[i]) - kvm_io_bus_destroy(kvm->buses[i]); + struct kvm_io_bus *bus; + + bus = rcu_dereference_protected(kvm->buses[i], 1); + if (bus) + kvm_io_bus_destroy(bus); kvm->buses[i] = NULL; } kvm_coalesced_mmio_free(kvm); @@ -748,7 +770,8 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_destroy_vm(kvm); kvm_destroy_devices(kvm); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - kvm_free_memslots(kvm, kvm->memslots[i]); + kvm_free_memslots(kvm, + rcu_dereference_protected(kvm->memslots[i], 1)); cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); @@ -2551,13 +2574,14 @@ static long kvm_vcpu_ioctl(struct file *filp, if (r) return r; switch (ioctl) { - case KVM_RUN: + case KVM_RUN: { + struct pid *oldpid; r = -EINVAL; if (arg) goto out; - if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { + oldpid = rcu_access_pointer(vcpu->pid); + if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) { /* The thread running this VCPU changed. */ - struct pid *oldpid = vcpu->pid; struct pid *newpid = get_task_pid(current, PIDTYPE_PID); rcu_assign_pointer(vcpu->pid, newpid); @@ -2568,6 +2592,7 @@ static long kvm_vcpu_ioctl(struct file *filp, r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); trace_kvm_userspace_exit(vcpu->run->exit_reason, r); break; + } case KVM_GET_REGS: { struct kvm_regs *kvm_regs; @@ -3202,6 +3227,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) fput(file); return -ENOMEM; } + kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm); fd_install(r, file); return r; @@ -3563,7 +3589,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, { struct kvm_io_bus *new_bus, *bus; - bus = kvm->buses[bus_idx]; + bus = kvm_get_bus(kvm, bus_idx); if (!bus) return -ENOMEM; @@ -3592,7 +3618,7 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, int i; struct kvm_io_bus *new_bus, *bus; - bus = kvm->buses[bus_idx]; + bus = kvm_get_bus(kvm, bus_idx); if (!bus) return; @@ -3854,6 +3880,67 @@ static const struct file_operations *stat_fops[] = { [KVM_STAT_VM] = &vm_stat_fops, }; +static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) +{ + struct kobj_uevent_env *env; + char *tmp, *pathbuf = NULL; + unsigned long long created, active; + + if (!kvm_dev.this_device || !kvm) + return; + + spin_lock(&kvm_lock); + if (type == KVM_EVENT_CREATE_VM) { + kvm_createvm_count++; + kvm_active_vms++; + } else if (type == KVM_EVENT_DESTROY_VM) { + kvm_active_vms--; + } + created = kvm_createvm_count; + active = kvm_active_vms; + spin_unlock(&kvm_lock); + + env = kzalloc(sizeof(*env), GFP_KERNEL); + if (!env) + return; + + add_uevent_var(env, "CREATED=%llu", created); + add_uevent_var(env, "COUNT=%llu", active); + + if (type == KVM_EVENT_CREATE_VM) + add_uevent_var(env, "EVENT=create"); + else if (type == KVM_EVENT_DESTROY_VM) + add_uevent_var(env, "EVENT=destroy"); + + if (kvm->debugfs_dentry) { + char p[ITOA_MAX_LEN]; + + snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name); + tmp = strchrnul(p + 1, '-'); + *tmp = '\0'; + add_uevent_var(env, "PID=%s", p); + pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); + if (pathbuf) { + /* sizeof counts the final '\0' */ + int len = sizeof("STATS_PATH=") - 1; + const char *pvar = "STATS_PATH="; + + tmp = dentry_path_raw(kvm->debugfs_dentry, + pathbuf + len, + PATH_MAX - len); + if (!IS_ERR(tmp)) { + memcpy(tmp - len, pvar, len); + env->envp[env->envp_idx++] = tmp - len; + } + } + } + /* no need for checks, since we are adding at most only 5 keys */ + env->envp[env->envp_idx++] = NULL; + kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp); + kfree(env); + kfree(pathbuf); +} + static int kvm_init_debug(void) { int r = -EEXIST; |