summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVince Weaver <vweaver1@eecs.utk.edu>2012-03-01 17:28:14 -0500
committerIngo Molnar <mingo@kernel.org>2012-06-06 17:23:35 +0200
commitc48b60538c3ba05a7a2713c4791b25405525431b (patch)
tree8482e11e2060831f1ffc5e81006d88e8cd8f5ea2
parent1ff4d58a192aea7f245981e2579765f961f6eb9c (diff)
perf/x86: Use rdpmc() rather than rdmsr() when possible in the kernel
The rdpmc instruction is faster than the equivelant rdmsr call, so use it when possible in the kernel. The perfctr kernel patches did this, after extensive testing showed rdpmc to always be faster (One can look in etc/costs in the perfctr-2.6 package to see a historical list of the overhead). I have done some tests on a 3.2 kernel, the kernel module I used was included in the first posting of this patch: rdmsr rdpmc Core2 T9900: 203.9 cycles 30.9 cycles AMD fam0fh: 56.2 cycles 9.8 cycles Atom 6/28/2: 129.7 cycles 50.6 cycles The speedup of using rdpmc is large. [ It's probably possible (and desirable) to do this without requiring a new field in the hw_perf_event structure, but the fixed events make this tricky. ] Signed-off-by: Vince Weaver <vweaver1@eecs.utk.edu> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/alpine.DEB.2.00.1203011724030.26934@cl320.eecs.utk.edu Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/cpu/perf_event.c4
-rw-r--r--include/linux/perf_event.h1
2 files changed, 4 insertions, 1 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 43c2017347e7..000a4746c7ce 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -75,7 +75,7 @@ u64 x86_perf_event_update(struct perf_event *event)
*/
again:
prev_raw_count = local64_read(&hwc->prev_count);
- rdmsrl(hwc->event_base, new_raw_count);
+ rdpmcl(hwc->event_base_rdpmc, new_raw_count);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
@@ -819,9 +819,11 @@ static inline void x86_assign_hw_event(struct perf_event *event,
} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
+ hwc->event_base_rdpmc = (hwc->idx - X86_PMC_IDX_FIXED) | 1<<30;
} else {
hwc->config_base = x86_pmu_config_addr(hwc->idx);
hwc->event_base = x86_pmu_event_addr(hwc->idx);
+ hwc->event_base_rdpmc = x86_pmu_addr_offset(hwc->idx);
}
}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 45db49f64bb4..1ce887abcc5c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -677,6 +677,7 @@ struct hw_perf_event {
u64 last_tag;
unsigned long config_base;
unsigned long event_base;
+ int event_base_rdpmc;
int idx;
int last_cpu;