summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/events/intel/ds.c28
-rw-r--r--kernel/events/internal.h4
-rw-r--r--kernel/events/ring_buffer.c64
3 files changed, 66 insertions, 30 deletions
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 7a9f5dac5abe..7acc526b4ad2 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -684,7 +684,7 @@ struct event_constraint intel_core2_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
EVENT_CONSTRAINT_END
};
@@ -693,7 +693,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
/* Allow all events as PEBS with no flags */
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
EVENT_CONSTRAINT_END
@@ -701,7 +701,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
struct event_constraint intel_slm_pebs_event_constraints[] = {
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
/* Allow all events as PEBS with no flags */
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
EVENT_CONSTRAINT_END
@@ -726,7 +726,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
EVENT_CONSTRAINT_END
};
@@ -743,7 +743,7 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
EVENT_CONSTRAINT_END
};
@@ -752,7 +752,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -767,9 +767,9 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -783,9 +783,9 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
@@ -806,9 +806,9 @@ struct event_constraint intel_bdw_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
@@ -829,9 +829,9 @@ struct event_constraint intel_bdw_pebs_event_constraints[] = {
struct event_constraint intel_skl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
/* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 79c47076700a..3aef4191798c 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -24,7 +24,7 @@ struct ring_buffer {
atomic_t poll; /* POLL_ for wakeups */
local_t head; /* write position */
- local_t nest; /* nested writers */
+ unsigned int nest; /* nested writers */
local_t events; /* event limit */
local_t wakeup; /* wakeup stamp */
local_t lost; /* nr records lost */
@@ -41,7 +41,7 @@ struct ring_buffer {
/* AUX area */
long aux_head;
- local_t aux_nest;
+ unsigned int aux_nest;
long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */
unsigned long aux_pgoff;
int aux_nr_pages;
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 674b35383491..ffb59a4ef4ff 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -38,7 +38,12 @@ static void perf_output_get_handle(struct perf_output_handle *handle)
struct ring_buffer *rb = handle->rb;
preempt_disable();
- local_inc(&rb->nest);
+
+ /*
+ * Avoid an explicit LOAD/STORE such that architectures with memops
+ * can use them.
+ */
+ (*(volatile unsigned int *)&rb->nest)++;
handle->wakeup = local_read(&rb->wakeup);
}
@@ -46,17 +51,35 @@ static void perf_output_put_handle(struct perf_output_handle *handle)
{
struct ring_buffer *rb = handle->rb;
unsigned long head;
+ unsigned int nest;
+
+ /*
+ * If this isn't the outermost nesting, we don't have to update
+ * @rb->user_page->data_head.
+ */
+ nest = READ_ONCE(rb->nest);
+ if (nest > 1) {
+ WRITE_ONCE(rb->nest, nest - 1);
+ goto out;
+ }
again:
+ /*
+ * In order to avoid publishing a head value that goes backwards,
+ * we must ensure the load of @rb->head happens after we've
+ * incremented @rb->nest.
+ *
+ * Otherwise we can observe a @rb->head value before one published
+ * by an IRQ/NMI happening between the load and the increment.
+ */
+ barrier();
head = local_read(&rb->head);
/*
- * IRQ/NMI can happen here, which means we can miss a head update.
+ * IRQ/NMI can happen here and advance @rb->head, causing our
+ * load above to be stale.
*/
- if (!local_dec_and_test(&rb->nest))
- goto out;
-
/*
* Since the mmap() consumer (userspace) can run on a different CPU:
*
@@ -84,14 +107,23 @@ again:
* See perf_output_begin().
*/
smp_wmb(); /* B, matches C */
- rb->user_page->data_head = head;
+ WRITE_ONCE(rb->user_page->data_head, head);
/*
- * Now check if we missed an update -- rely on previous implied
- * compiler barriers to force a re-read.
+ * We must publish the head before decrementing the nest count,
+ * otherwise an IRQ/NMI can publish a more recent head value and our
+ * write will (temporarily) publish a stale value.
*/
+ barrier();
+ WRITE_ONCE(rb->nest, 0);
+
+ /*
+ * Ensure we decrement @rb->nest before we validate the @rb->head.
+ * Otherwise we cannot be sure we caught the 'last' nested update.
+ */
+ barrier();
if (unlikely(head != local_read(&rb->head))) {
- local_inc(&rb->nest);
+ WRITE_ONCE(rb->nest, 1);
goto again;
}
@@ -330,6 +362,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
struct perf_event *output_event = event;
unsigned long aux_head, aux_tail;
struct ring_buffer *rb;
+ unsigned int nest;
if (output_event->parent)
output_event = output_event->parent;
@@ -360,13 +393,16 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
if (!refcount_inc_not_zero(&rb->aux_refcount))
goto err;
+ nest = READ_ONCE(rb->aux_nest);
/*
* Nesting is not supported for AUX area, make sure nested
* writers are caught early
*/
- if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1)))
+ if (WARN_ON_ONCE(nest))
goto err_put;
+ WRITE_ONCE(rb->aux_nest, nest + 1);
+
aux_head = rb->aux_head;
handle->rb = rb;
@@ -394,7 +430,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
if (!handle->size) { /* A, matches D */
event->pending_disable = smp_processor_id();
perf_output_wakeup(handle);
- local_set(&rb->aux_nest, 0);
+ WRITE_ONCE(rb->aux_nest, 0);
goto err_put;
}
}
@@ -471,7 +507,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
perf_event_aux_event(handle->event, aux_head, size,
handle->aux_flags);
- rb->user_page->aux_head = rb->aux_head;
+ WRITE_ONCE(rb->user_page->aux_head, rb->aux_head);
if (rb_need_aux_wakeup(rb))
wakeup = true;
@@ -483,7 +519,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
handle->event = NULL;
- local_set(&rb->aux_nest, 0);
+ WRITE_ONCE(rb->aux_nest, 0);
/* can't be last */
rb_free_aux(rb);
ring_buffer_put(rb);
@@ -503,7 +539,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
rb->aux_head += size;
- rb->user_page->aux_head = rb->aux_head;
+ WRITE_ONCE(rb->user_page->aux_head, rb->aux_head);
if (rb_need_aux_wakeup(rb)) {
perf_output_wakeup(handle);
handle->wakeup = rb->aux_wakeup + rb->aux_watermark;