summaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/hyperv.c10
-rw-r--r--arch/x86/kvm/pmu.c286
-rw-r--r--arch/x86/kvm/pmu.h13
-rw-r--r--arch/x86/kvm/svm/pmu.c2
-rw-r--r--arch/x86/kvm/svm/svm.c5
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c23
-rw-r--r--arch/x86/kvm/vmx/vmx.c4
-rw-r--r--arch/x86/kvm/x86.c230
-rw-r--r--arch/x86/kvm/x86.h12
9 files changed, 401 insertions, 184 deletions
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index f610b07ddcf4..b28fd020066f 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1448,8 +1448,7 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
return syndbg_set_msr(vcpu, msr, data, host);
default:
- vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
- msr, data);
+ kvm_pr_unimpl_wrmsr(vcpu, msr, data);
return 1;
}
return 0;
@@ -1570,8 +1569,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return 1;
break;
default:
- vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
- msr, data);
+ kvm_pr_unimpl_wrmsr(vcpu, msr, data);
return 1;
}
@@ -1626,7 +1624,7 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
return syndbg_get_msr(vcpu, msr, pdata, host);
default:
- vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+ kvm_pr_unimpl_rdmsr(vcpu, msr);
return 1;
}
@@ -1691,7 +1689,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
data = APIC_BUS_FREQUENCY;
break;
default:
- vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+ kvm_pr_unimpl_rdmsr(vcpu, msr);
return 1;
}
*pdata = data;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 58e5a456273a..612e6c70ce2e 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -29,9 +29,18 @@
struct x86_pmu_capability __read_mostly kvm_pmu_cap;
EXPORT_SYMBOL_GPL(kvm_pmu_cap);
-static const struct x86_cpu_id vmx_icl_pebs_cpu[] = {
+/* Precise Distribution of Instructions Retired (PDIR) */
+static const struct x86_cpu_id vmx_pebs_pdir_cpu[] = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL),
+ /* Instruction-Accurate PDIR (PDIR++) */
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, NULL),
+ {}
+};
+
+/* Precise Distribution (PDist) */
+static const struct x86_cpu_id vmx_pebs_pdist_cpu[] = {
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, NULL),
{}
};
@@ -156,6 +165,28 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
}
+static u64 pmc_get_pebs_precise_level(struct kvm_pmc *pmc)
+{
+ /*
+ * For some model specific pebs counters with special capabilities
+ * (PDIR, PDIR++, PDIST), KVM needs to raise the event precise
+ * level to the maximum value (currently 3, backwards compatible)
+ * so that the perf subsystem would assign specific hardware counter
+ * with that capability for vPMC.
+ */
+ if ((pmc->idx == 0 && x86_match_cpu(vmx_pebs_pdist_cpu)) ||
+ (pmc->idx == 32 && x86_match_cpu(vmx_pebs_pdir_cpu)))
+ return 3;
+
+ /*
+ * The non-zero precision level of guest event makes the ordinary
+ * guest event becomes a guest PEBS event and triggers the host
+ * PEBS PMI handler to determine whether the PEBS overflow PMI
+ * comes from the host counters or the guest.
+ */
+ return 1;
+}
+
static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
bool exclude_user, bool exclude_kernel,
bool intr)
@@ -187,22 +218,12 @@ static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
}
if (pebs) {
/*
- * The non-zero precision level of guest event makes the ordinary
- * guest event becomes a guest PEBS event and triggers the host
- * PEBS PMI handler to determine whether the PEBS overflow PMI
- * comes from the host counters or the guest.
- *
* For most PEBS hardware events, the difference in the software
* precision levels of guest and host PEBS events will not affect
* the accuracy of the PEBS profiling result, because the "event IP"
* in the PEBS record is calibrated on the guest side.
- *
- * On Icelake everything is fine. Other hardware (GLC+, TNT+) that
- * could possibly care here is unsupported and needs changes.
*/
- attr.precise_ip = 1;
- if (x86_match_cpu(vmx_icl_pebs_cpu) && pmc->idx == 32)
- attr.precise_ip = 3;
+ attr.precise_ip = pmc_get_pebs_precise_level(pmc);
}
event = perf_event_create_kernel_counter(&attr, -1, current,
@@ -255,48 +276,128 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
return true;
}
-static int cmp_u64(const void *pa, const void *pb)
+static int filter_cmp(const void *pa, const void *pb, u64 mask)
{
- u64 a = *(u64 *)pa;
- u64 b = *(u64 *)pb;
+ u64 a = *(u64 *)pa & mask;
+ u64 b = *(u64 *)pb & mask;
return (a > b) - (a < b);
}
+
+static int filter_sort_cmp(const void *pa, const void *pb)
+{
+ return filter_cmp(pa, pb, (KVM_PMU_MASKED_ENTRY_EVENT_SELECT |
+ KVM_PMU_MASKED_ENTRY_EXCLUDE));
+}
+
+/*
+ * For the event filter, searching is done on the 'includes' list and
+ * 'excludes' list separately rather than on the 'events' list (which
+ * has both). As a result the exclude bit can be ignored.
+ */
+static int filter_event_cmp(const void *pa, const void *pb)
+{
+ return filter_cmp(pa, pb, (KVM_PMU_MASKED_ENTRY_EVENT_SELECT));
+}
+
+static int find_filter_index(u64 *events, u64 nevents, u64 key)
+{
+ u64 *fe = bsearch(&key, events, nevents, sizeof(events[0]),
+ filter_event_cmp);
+
+ if (!fe)
+ return -1;
+
+ return fe - events;
+}
+
+static bool is_filter_entry_match(u64 filter_event, u64 umask)
+{
+ u64 mask = filter_event >> (KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT - 8);
+ u64 match = filter_event & KVM_PMU_MASKED_ENTRY_UMASK_MATCH;
+
+ BUILD_BUG_ON((KVM_PMU_ENCODE_MASKED_ENTRY(0, 0xff, 0, false) >>
+ (KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT - 8)) !=
+ ARCH_PERFMON_EVENTSEL_UMASK);
+
+ return (umask & mask) == match;
+}
+
+static bool filter_contains_match(u64 *events, u64 nevents, u64 eventsel)
+{
+ u64 event_select = eventsel & kvm_pmu_ops.EVENTSEL_EVENT;
+ u64 umask = eventsel & ARCH_PERFMON_EVENTSEL_UMASK;
+ int i, index;
+
+ index = find_filter_index(events, nevents, event_select);
+ if (index < 0)
+ return false;
+
+ /*
+ * Entries are sorted by the event select. Walk the list in both
+ * directions to process all entries with the targeted event select.
+ */
+ for (i = index; i < nevents; i++) {
+ if (filter_event_cmp(&events[i], &event_select))
+ break;
+
+ if (is_filter_entry_match(events[i], umask))
+ return true;
+ }
+
+ for (i = index - 1; i >= 0; i--) {
+ if (filter_event_cmp(&events[i], &event_select))
+ break;
+
+ if (is_filter_entry_match(events[i], umask))
+ return true;
+ }
+
+ return false;
+}
+
+static bool is_gp_event_allowed(struct kvm_x86_pmu_event_filter *f,
+ u64 eventsel)
+{
+ if (filter_contains_match(f->includes, f->nr_includes, eventsel) &&
+ !filter_contains_match(f->excludes, f->nr_excludes, eventsel))
+ return f->action == KVM_PMU_EVENT_ALLOW;
+
+ return f->action == KVM_PMU_EVENT_DENY;
+}
+
+static bool is_fixed_event_allowed(struct kvm_x86_pmu_event_filter *filter,
+ int idx)
+{
+ int fixed_idx = idx - INTEL_PMC_IDX_FIXED;
+
+ if (filter->action == KVM_PMU_EVENT_DENY &&
+ test_bit(fixed_idx, (ulong *)&filter->fixed_counter_bitmap))
+ return false;
+ if (filter->action == KVM_PMU_EVENT_ALLOW &&
+ !test_bit(fixed_idx, (ulong *)&filter->fixed_counter_bitmap))
+ return false;
+
+ return true;
+}
+
static bool check_pmu_event_filter(struct kvm_pmc *pmc)
{
- struct kvm_pmu_event_filter *filter;
+ struct kvm_x86_pmu_event_filter *filter;
struct kvm *kvm = pmc->vcpu->kvm;
- bool allow_event = true;
- __u64 key;
- int idx;
if (!static_call(kvm_x86_pmu_hw_event_available)(pmc))
return false;
filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
if (!filter)
- goto out;
+ return true;
- if (pmc_is_gp(pmc)) {
- key = pmc->eventsel & AMD64_RAW_EVENT_MASK_NB;
- if (bsearch(&key, filter->events, filter->nevents,
- sizeof(__u64), cmp_u64))
- allow_event = filter->action == KVM_PMU_EVENT_ALLOW;
- else
- allow_event = filter->action == KVM_PMU_EVENT_DENY;
- } else {
- idx = pmc->idx - INTEL_PMC_IDX_FIXED;
- if (filter->action == KVM_PMU_EVENT_DENY &&
- test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
- allow_event = false;
- if (filter->action == KVM_PMU_EVENT_ALLOW &&
- !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
- allow_event = false;
- }
+ if (pmc_is_gp(pmc))
+ return is_gp_event_allowed(filter, pmc->eventsel);
-out:
- return allow_event;
+ return is_fixed_event_allowed(filter, pmc->idx);
}
static void reprogram_counter(struct kvm_pmc *pmc)
@@ -593,43 +694,128 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
}
EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
+static bool is_masked_filter_valid(const struct kvm_x86_pmu_event_filter *filter)
+{
+ u64 mask = kvm_pmu_ops.EVENTSEL_EVENT |
+ KVM_PMU_MASKED_ENTRY_UMASK_MASK |
+ KVM_PMU_MASKED_ENTRY_UMASK_MATCH |
+ KVM_PMU_MASKED_ENTRY_EXCLUDE;
+ int i;
+
+ for (i = 0; i < filter->nevents; i++) {
+ if (filter->events[i] & ~mask)
+ return false;
+ }
+
+ return true;
+}
+
+static void convert_to_masked_filter(struct kvm_x86_pmu_event_filter *filter)
+{
+ int i, j;
+
+ for (i = 0, j = 0; i < filter->nevents; i++) {
+ /*
+ * Skip events that are impossible to match against a guest
+ * event. When filtering, only the event select + unit mask
+ * of the guest event is used. To maintain backwards
+ * compatibility, impossible filters can't be rejected :-(
+ */
+ if (filter->events[i] & ~(kvm_pmu_ops.EVENTSEL_EVENT |
+ ARCH_PERFMON_EVENTSEL_UMASK))
+ continue;
+ /*
+ * Convert userspace events to a common in-kernel event so
+ * only one code path is needed to support both events. For
+ * the in-kernel events use masked events because they are
+ * flexible enough to handle both cases. To convert to masked
+ * events all that's needed is to add an "all ones" umask_mask,
+ * (unmasked filter events don't support EXCLUDE).
+ */
+ filter->events[j++] = filter->events[i] |
+ (0xFFULL << KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT);
+ }
+
+ filter->nevents = j;
+}
+
+static int prepare_filter_lists(struct kvm_x86_pmu_event_filter *filter)
+{
+ int i;
+
+ if (!(filter->flags & KVM_PMU_EVENT_FLAG_MASKED_EVENTS))
+ convert_to_masked_filter(filter);
+ else if (!is_masked_filter_valid(filter))
+ return -EINVAL;
+
+ /*
+ * Sort entries by event select and includes vs. excludes so that all
+ * entries for a given event select can be processed efficiently during
+ * filtering. The EXCLUDE flag uses a more significant bit than the
+ * event select, and so the sorted list is also effectively split into
+ * includes and excludes sub-lists.
+ */
+ sort(&filter->events, filter->nevents, sizeof(filter->events[0]),
+ filter_sort_cmp, NULL);
+
+ i = filter->nevents;
+ /* Find the first EXCLUDE event (only supported for masked events). */
+ if (filter->flags & KVM_PMU_EVENT_FLAG_MASKED_EVENTS) {
+ for (i = 0; i < filter->nevents; i++) {
+ if (filter->events[i] & KVM_PMU_MASKED_ENTRY_EXCLUDE)
+ break;
+ }
+ }
+
+ filter->nr_includes = i;
+ filter->nr_excludes = filter->nevents - filter->nr_includes;
+ filter->includes = filter->events;
+ filter->excludes = filter->events + filter->nr_includes;
+
+ return 0;
+}
+
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
{
- struct kvm_pmu_event_filter tmp, *filter;
+ struct kvm_pmu_event_filter __user *user_filter = argp;
+ struct kvm_x86_pmu_event_filter *filter;
+ struct kvm_pmu_event_filter tmp;
struct kvm_vcpu *vcpu;
unsigned long i;
size_t size;
int r;
- if (copy_from_user(&tmp, argp, sizeof(tmp)))
+ if (copy_from_user(&tmp, user_filter, sizeof(tmp)))
return -EFAULT;
if (tmp.action != KVM_PMU_EVENT_ALLOW &&
tmp.action != KVM_PMU_EVENT_DENY)
return -EINVAL;
- if (tmp.flags != 0)
+ if (tmp.flags & ~KVM_PMU_EVENT_FLAGS_VALID_MASK)
return -EINVAL;
if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
return -E2BIG;
size = struct_size(filter, events, tmp.nevents);
- filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
+ filter = kzalloc(size, GFP_KERNEL_ACCOUNT);
if (!filter)
return -ENOMEM;
+ filter->action = tmp.action;
+ filter->nevents = tmp.nevents;
+ filter->fixed_counter_bitmap = tmp.fixed_counter_bitmap;
+ filter->flags = tmp.flags;
+
r = -EFAULT;
- if (copy_from_user(filter, argp, size))
+ if (copy_from_user(filter->events, user_filter->events,
+ sizeof(filter->events[0]) * filter->nevents))
goto cleanup;
- /* Ensure nevents can't be changed between the user copies. */
- *filter = tmp;
-
- /*
- * Sort the in-kernel list so that we can search it with bsearch.
- */
- sort(&filter->events, filter->nevents, sizeof(__u64), cmp_u64, NULL);
+ r = prepare_filter_lists(filter);
+ if (r)
+ goto cleanup;
mutex_lock(&kvm->lock);
filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index cdb91009701d..79988dafb15b 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -18,12 +18,6 @@
#define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
#define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
-struct kvm_event_hw_type_mapping {
- u8 eventsel;
- u8 unit_mask;
- unsigned event_type;
-};
-
struct kvm_pmu_ops {
bool (*hw_event_available)(struct kvm_pmc *pmc);
bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
@@ -40,6 +34,9 @@ struct kvm_pmu_ops {
void (*reset)(struct kvm_vcpu *vcpu);
void (*deliver_pmi)(struct kvm_vcpu *vcpu);
void (*cleanup)(struct kvm_vcpu *vcpu);
+
+ const u64 EVENTSEL_EVENT;
+ const int MAX_NR_GP_COUNTERS;
};
void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops);
@@ -161,7 +158,7 @@ static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
extern struct x86_pmu_capability kvm_pmu_cap;
-static inline void kvm_init_pmu_capability(void)
+static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
{
bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
@@ -180,6 +177,8 @@ static inline void kvm_init_pmu_capability(void)
}
kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2);
+ kvm_pmu_cap.num_counters_gp = min(kvm_pmu_cap.num_counters_gp,
+ pmu_ops->MAX_NR_GP_COUNTERS);
kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
KVM_PMC_MAX_FIXED);
}
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 1ff068f23841..cc77a0681800 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -231,4 +231,6 @@ struct kvm_pmu_ops amd_pmu_ops __initdata = {
.refresh = amd_pmu_refresh,
.init = amd_pmu_init,
.reset = amd_pmu_reset,
+ .EVENTSEL_EVENT = AMD64_EVENTSEL_EVENT,
+ .MAX_NR_GP_COUNTERS = KVM_AMD_PMC_MAX_GENERIC,
};
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d13cf53e7390..dd21e8b1a259 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3015,8 +3015,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
case MSR_IA32_DEBUGCTLMSR:
if (!lbrv) {
- vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
- __func__, data);
+ kvm_pr_unimpl_wrmsr(vcpu, ecx, data);
break;
}
if (data & DEBUGCTL_RESERVED_BITS)
@@ -3045,7 +3044,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_CR:
return svm_set_vm_cr(vcpu, data);
case MSR_VM_IGNNE:
- vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
+ kvm_pr_unimpl_wrmsr(vcpu, ecx, data);
break;
case MSR_AMD64_DE_CFG: {
struct kvm_msr_entry msr_entry;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index efce9ad70e4e..e8a3be0b9df9 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -22,16 +22,19 @@
#define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
-static struct kvm_event_hw_type_mapping intel_arch_events[] = {
- [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
- [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
- [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES },
- [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES },
- [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
- [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
- [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+static struct {
+ u8 eventsel;
+ u8 unit_mask;
+} const intel_arch_events[] = {
+ [0] = { 0x3c, 0x00 },
+ [1] = { 0xc0, 0x00 },
+ [2] = { 0x3c, 0x01 },
+ [3] = { 0x2e, 0x4f },
+ [4] = { 0x2e, 0x41 },
+ [5] = { 0xc4, 0x00 },
+ [6] = { 0xc5, 0x00 },
/* The above index must match CPUID 0x0A.EBX bit vector */
- [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES },
+ [7] = { 0x00, 0x03 },
};
/* mapping between fixed pmc index and intel_arch_events array */
@@ -811,4 +814,6 @@ struct kvm_pmu_ops intel_pmu_ops __initdata = {
.reset = intel_pmu_reset,
.deliver_pmi = intel_pmu_deliver_pmi,
.cleanup = intel_pmu_cleanup,
+ .EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT,
+ .MAX_NR_GP_COUNTERS = KVM_INTEL_PMC_MAX_GENERIC,
};
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index bbf60bda877e..2690d018da11 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2206,9 +2206,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated);
if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) {
- if (report_ignored_msrs)
- vcpu_unimpl(vcpu, "%s: BTF|LBR in IA32_DEBUGCTLMSR 0x%llx, nop\n",
- __func__, data);
+ kvm_pr_unimpl_wrmsr(vcpu, msr_index, data);
data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 810cbe3b3ba6..199a9ff0cd4b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1419,7 +1419,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
* may depend on host virtualization features rather than host cpu features.
*/
-static const u32 msrs_to_save_all[] = {
+static const u32 msrs_to_save_base[] = {
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_STAR,
#ifdef CONFIG_X86_64
@@ -1436,6 +1436,10 @@ static const u32 msrs_to_save_all[] = {
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
MSR_IA32_UMWAIT_CONTROL,
+ MSR_IA32_XFD, MSR_IA32_XFD_ERR,
+};
+
+static const u32 msrs_to_save_pmu[] = {
MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
@@ -1460,11 +1464,10 @@ static const u32 msrs_to_save_all[] = {
MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
-
- MSR_IA32_XFD, MSR_IA32_XFD_ERR,
};
-static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
+static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) +
+ ARRAY_SIZE(msrs_to_save_pmu)];
static unsigned num_msrs_to_save;
static const u32 emulated_msrs_all[] = {
@@ -3559,9 +3562,20 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
}
+static bool kvm_is_msr_to_save(u32 msr_index)
+{
+ unsigned int i;
+
+ for (i = 0; i < num_msrs_to_save; i++) {
+ if (msrs_to_save[i] == msr_index)
+ return true;
+ }
+
+ return false;
+}
+
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
- bool pr = false;
u32 msr = msr_info->index;
u64 data = msr_info->data;
@@ -3607,15 +3621,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data == BIT_ULL(18)) {
vcpu->arch.msr_hwcr = data;
} else if (data != 0) {
- vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
- data);
+ kvm_pr_unimpl_wrmsr(vcpu, msr, data);
return 1;
}
break;
case MSR_FAM10H_MMIO_CONF_BASE:
if (data != 0) {
- vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
- "0x%llx\n", data);
+ kvm_pr_unimpl_wrmsr(vcpu, msr, data);
return 1;
}
break;
@@ -3795,16 +3807,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
- pr = true;
- fallthrough;
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
- if (pr || data != 0)
- vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
- "0x%x data 0x%llx\n", msr, data);
+ if (data)
+ kvm_pr_unimpl_wrmsr(vcpu, msr, data);
break;
case MSR_K7_CLK_CTL:
/*
@@ -3832,9 +3841,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* Drop writes to this legacy MSR -- see rdmsr
* counterpart for further detail.
*/
- if (report_ignored_msrs)
- vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
- msr, data);
+ kvm_pr_unimpl_wrmsr(vcpu, msr, data);
break;
case MSR_AMD64_OSVW_ID_LENGTH:
if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
@@ -3882,20 +3889,18 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.guest_fpu.xfd_err = data;
break;
#endif
- case MSR_IA32_PEBS_ENABLE:
- case MSR_IA32_DS_AREA:
- case MSR_PEBS_DATA_CFG:
- case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
+ default:
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
+
/*
* Userspace is allowed to write '0' to MSRs that KVM reports
* as to-be-saved, even if an MSRs isn't fully supported.
*/
- return !msr_info->host_initiated || data;
- default:
- if (kvm_pmu_is_valid_msr(vcpu, msr))
- return kvm_pmu_set_msr(vcpu, msr_info);
+ if (msr_info->host_initiated && !data &&
+ kvm_is_msr_to_save(msr))
+ break;
+
return KVM_MSR_RET_INVALID;
}
return 0;
@@ -3985,20 +3990,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_DRAM_ENERGY_STATUS: /* DRAM controller */
msr_info->data = 0;
break;
- case MSR_IA32_PEBS_ENABLE:
- case MSR_IA32_DS_AREA:
- case MSR_PEBS_DATA_CFG:
- case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
- if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
- return kvm_pmu_get_msr(vcpu, msr_info);
- /*
- * Userspace is allowed to read MSRs that KVM reports as
- * to-be-saved, even if an MSR isn't fully supported.
- */
- if (!msr_info->host_initiated)
- return 1;
- msr_info->data = 0;
- break;
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
@@ -4254,6 +4245,17 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
default:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info);
+
+ /*
+ * Userspace is allowed to read MSRs that KVM reports as
+ * to-be-saved, even if an MSR isn't fully supported.
+ */
+ if (msr_info->host_initiated &&
+ kvm_is_msr_to_save(msr_info->index)) {
+ msr_info->data = 0;
+ break;
+ }
+
return KVM_MSR_RET_INVALID;
}
return 0;
@@ -4401,6 +4403,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_PMU_EVENT_FILTER:
+ case KVM_CAP_PMU_EVENT_MASKED_EVENTS:
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
@@ -6993,83 +6996,98 @@ out:
return r;
}
-static void kvm_init_msr_list(void)
+static void kvm_probe_msr_to_save(u32 msr_index)
{
u32 dummy[2];
+
+ if (rdmsr_safe(msr_index, &dummy[0], &dummy[1]))
+ return;
+
+ /*
+ * Even MSRs that are valid in the host may not be exposed to guests in
+ * some cases.
+ */
+ switch (msr_index) {
+ case MSR_IA32_BNDCFGS:
+ if (!kvm_mpx_supported())
+ return;
+ break;
+ case MSR_TSC_AUX:
+ if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
+ !kvm_cpu_cap_has(X86_FEATURE_RDPID))
+ return;
+ break;
+ case MSR_IA32_UMWAIT_CONTROL:
+ if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
+ return;
+ break;
+ case MSR_IA32_RTIT_CTL:
+ case MSR_IA32_RTIT_STATUS:
+ if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT))
+ return;
+ break;
+ case MSR_IA32_RTIT_CR3_MATCH:
+ if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
+ !intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
+ return;
+ break;
+ case MSR_IA32_RTIT_OUTPUT_BASE:
+ case MSR_IA32_RTIT_OUTPUT_MASK:
+ if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
+ (!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
+ !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
+ return;
+ break;
+ case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
+ if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
+ (msr_index - MSR_IA32_RTIT_ADDR0_A >=
+ intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2))
+ return;
+ break;
+ case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR_MAX:
+ if (msr_index - MSR_ARCH_PERFMON_PERFCTR0 >=
+ kvm_pmu_cap.num_counters_gp)
+ return;
+ break;
+ case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL_MAX:
+ if (msr_index - MSR_ARCH_PERFMON_EVENTSEL0 >=
+ kvm_pmu_cap.num_counters_gp)
+ return;
+ break;
+ case MSR_ARCH_PERFMON_FIXED_CTR0 ... MSR_ARCH_PERFMON_FIXED_CTR_MAX:
+ if (msr_index - MSR_ARCH_PERFMON_FIXED_CTR0 >=
+ kvm_pmu_cap.num_counters_fixed)
+ return;
+ break;
+ case MSR_IA32_XFD:
+ case MSR_IA32_XFD_ERR:
+ if (!kvm_cpu_cap_has(X86_FEATURE_XFD))
+ return;
+ break;
+ default:
+ break;
+ }
+
+ msrs_to_save[num_msrs_to_save++] = msr_index;
+}
+
+static void kvm_init_msr_list(void)
+{
unsigned i;
BUILD_BUG_ON_MSG(KVM_PMC_MAX_FIXED != 3,
- "Please update the fixed PMCs in msrs_to_saved_all[]");
+ "Please update the fixed PMCs in msrs_to_save_pmu[]");
num_msrs_to_save = 0;
num_emulated_msrs = 0;
num_msr_based_features = 0;
- for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
- if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
- continue;
-
- /*
- * Even MSRs that are valid in the host may not be exposed
- * to the guests in some cases.
- */
- switch (msrs_to_save_all[i]) {
- case MSR_IA32_BNDCFGS:
- if (!kvm_mpx_supported())
- continue;
- break;
- case MSR_TSC_AUX:
- if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
- !kvm_cpu_cap_has(X86_FEATURE_RDPID))
- continue;
- break;
- case MSR_IA32_UMWAIT_CONTROL:
- if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
- continue;
- break;
- case MSR_IA32_RTIT_CTL:
- case MSR_IA32_RTIT_STATUS:
- if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT))
- continue;
- break;
- case MSR_IA32_RTIT_CR3_MATCH:
- if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
- !intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
- continue;
- break;
- case MSR_IA32_RTIT_OUTPUT_BASE:
- case MSR_IA32_RTIT_OUTPUT_MASK:
- if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
- (!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
- !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
- continue;
- break;
- case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
- if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
- msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
- intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
- continue;
- break;
- case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR_MAX:
- if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
- min(KVM_INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
- continue;
- break;
- case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL_MAX:
- if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
- min(KVM_INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
- continue;
- break;
- case MSR_IA32_XFD:
- case MSR_IA32_XFD_ERR:
- if (!kvm_cpu_cap_has(X86_FEATURE_XFD))
- continue;
- break;
- default:
- break;
- }
+ for (i = 0; i < ARRAY_SIZE(msrs_to_save_base); i++)
+ kvm_probe_msr_to_save(msrs_to_save_base[i]);
- msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
+ if (enable_pmu) {
+ for (i = 0; i < ARRAY_SIZE(msrs_to_save_pmu); i++)
+ kvm_probe_msr_to_save(msrs_to_save_pmu[i]);
}
for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
@@ -9377,7 +9395,7 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (boot_cpu_has(X86_FEATURE_XSAVES))
rdmsrl(MSR_IA32_XSS, host_xss);
- kvm_init_pmu_capability();
+ kvm_init_pmu_capability(ops->pmu_ops);
r = ops->hardware_setup();
if (r != 0)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 9de72586f406..f3554bf05201 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -331,6 +331,18 @@ extern bool report_ignored_msrs;
extern bool eager_page_split;
+static inline void kvm_pr_unimpl_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+ if (report_ignored_msrs)
+ vcpu_unimpl(vcpu, "Unhandled WRMSR(0x%x) = 0x%llx\n", msr, data);
+}
+
+static inline void kvm_pr_unimpl_rdmsr(struct kvm_vcpu *vcpu, u32 msr)
+{
+ if (report_ignored_msrs)
+ vcpu_unimpl(vcpu, "Unhandled RDMSR(0x%x)\n", msr);
+}
+
static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
{
return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,