summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2017-09-05 16:26:44 +0200
committerIngo Molnar <mingo@kernel.org>2017-10-27 10:31:59 +0200
commit0c1cbc18df9e38182a0604b15535699c84d7342a (patch)
tree85c7d42284fd594af32ca8d176aa8fc44e618a60 /kernel
parent7f0ec32526d2446fdd79b0c6c6d08e64ef9fb1f3 (diff)
perf/core: Fix perf_event_read()
perf_event_read() has a number of issues regarding the timekeeping bits. - The IPI didn't update group times when it found INACTIVE - The direct call would not re-check ->state after taking ctx->lock which can result in ->count and timestamps getting out of sync. And we can make use of the ordering introduced for perf_event_stop() to make it more accurate for ACTIVE. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c55
1 files changed, 39 insertions, 16 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 205a4321f678..6f74f9c35490 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2081,8 +2081,9 @@ event_sched_in(struct perf_event *event,
WRITE_ONCE(event->oncpu, smp_processor_id());
/*
- * Order event::oncpu write to happen before the ACTIVE state
- * is visible.
+ * Order event::oncpu write to happen before the ACTIVE state is
+ * visible. This allows perf_event_{stop,read}() to observe the correct
+ * ->oncpu if it sees ACTIVE.
*/
smp_wmb();
WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
@@ -3638,12 +3639,16 @@ static void __perf_event_read(void *info)
return;
raw_spin_lock(&ctx->lock);
- if (ctx->is_active) {
+ if (ctx->is_active & EVENT_TIME) {
update_context_time(ctx);
update_cgrp_time_from_event(event);
}
- update_event_times(event);
+ if (!data->group)
+ update_event_times(event);
+ else
+ update_group_times(event);
+
if (event->state != PERF_EVENT_STATE_ACTIVE)
goto unlock;
@@ -3658,7 +3663,6 @@ static void __perf_event_read(void *info)
pmu->read(event);
list_for_each_entry(sub, &event->sibling_list, group_entry) {
- update_event_times(sub);
if (sub->state == PERF_EVENT_STATE_ACTIVE) {
/*
* Use sibling's PMU rather than @event's since
@@ -3748,23 +3752,35 @@ out:
static int perf_event_read(struct perf_event *event, bool group)
{
+ enum perf_event_state state = READ_ONCE(event->state);
int event_cpu, ret = 0;
/*
* If event is enabled and currently active on a CPU, update the
* value in the event structure:
*/
- if (event->state == PERF_EVENT_STATE_ACTIVE) {
- struct perf_read_data data = {
- .event = event,
- .group = group,
- .ret = 0,
- };
+again:
+ if (state == PERF_EVENT_STATE_ACTIVE) {
+ struct perf_read_data data;
+
+ /*
+ * Orders the ->state and ->oncpu loads such that if we see
+ * ACTIVE we must also see the right ->oncpu.
+ *
+ * Matches the smp_wmb() from event_sched_in().
+ */
+ smp_rmb();
event_cpu = READ_ONCE(event->oncpu);
if ((unsigned)event_cpu >= nr_cpu_ids)
return 0;
+ data = (struct perf_read_data){
+ .event = event,
+ .group = group,
+ .ret = 0,
+ };
+
preempt_disable();
event_cpu = __perf_event_read_cpu(event, event_cpu);
@@ -3781,20 +3797,27 @@ static int perf_event_read(struct perf_event *event, bool group)
(void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
preempt_enable();
ret = data.ret;
- } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
+
+ } else if (state == PERF_EVENT_STATE_INACTIVE) {
struct perf_event_context *ctx = event->ctx;
unsigned long flags;
raw_spin_lock_irqsave(&ctx->lock, flags);
+ state = event->state;
+ if (state != PERF_EVENT_STATE_INACTIVE) {
+ raw_spin_unlock_irqrestore(&ctx->lock, flags);
+ goto again;
+ }
+
/*
- * may read while context is not active
- * (e.g., thread is blocked), in that case
- * we cannot update context time
+ * May read while context is not active (e.g., thread is
+ * blocked), in that case we cannot update context time
*/
- if (ctx->is_active) {
+ if (ctx->is_active & EVENT_TIME) {
update_context_time(ctx);
update_cgrp_time_from_event(event);
}
+
if (group)
update_group_times(event);
else