From 0fe1ac48bef018bed896307cd12f6ca9b5e704ab Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 13 Apr 2010 20:46:04 +0000 Subject: powerpc/perf_event: Fix oops due to perf_event_do_pending call Anton Blanchard found that large POWER systems would occasionally crash in the exception exit path when profiling with perf_events. The symptom was that an interrupt would occur late in the exit path when the MSR[RI] (recoverable interrupt) bit was clear. Interrupts should be hard-disabled at this point but they were enabled. Because the interrupt was not recoverable the system panicked. The reason is that the exception exit path was calling perf_event_do_pending after hard-disabling interrupts, and perf_event_do_pending will re-enable interrupts. The simplest and cleanest fix for this is to use the same mechanism that 32-bit powerpc does, namely to cause a self-IPI by setting the decrementer to 1. This means we can remove the tests in the exception exit path and raw_local_irq_restore. This also makes sure that the call to perf_event_do_pending from timer_interrupt() happens within irq_enter/irq_exit. (Note that calling perf_event_do_pending from timer_interrupt does not mean that there is a possible 1/HZ latency; setting the decrementer to 1 ensures that the timer interrupt will happen immediately, i.e. within one timebase tick, which is a few nanoseconds or 10s of nanoseconds.) Signed-off-by: Paul Mackerras Cc: stable@kernel.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 60 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel/time.c') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1b16b9a3e49a..0441bbdadbd1 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ -#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32) -DEFINE_PER_CPU(u8, perf_event_pending); +#ifdef CONFIG_PERF_EVENTS -void set_perf_event_pending(void) +/* + * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... + */ +#ifdef CONFIG_PPC64 +static inline unsigned long test_perf_event_pending(void) { - get_cpu_var(perf_event_pending) = 1; - set_dec(1); - put_cpu_var(perf_event_pending); + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, perf_event_pending))); + return x; } +static inline void set_perf_event_pending_flag(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (1), + "i" (offsetof(struct paca_struct, perf_event_pending))); +} + +static inline void clear_perf_event_pending(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (0), + "i" (offsetof(struct paca_struct, perf_event_pending))); +} + +#else /* 32-bit */ + +DEFINE_PER_CPU(u8, perf_event_pending); + +#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 #define test_perf_event_pending() __get_cpu_var(perf_event_pending) #define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 -#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ +#endif /* 32 vs 64 bit */ + +void set_perf_event_pending(void) +{ + preempt_disable(); + set_perf_event_pending_flag(); + set_dec(1); + preempt_enable(); +} + +#else /* CONFIG_PERF_EVENTS */ #define test_perf_event_pending() 0 #define clear_perf_event_pending() -#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ +#endif /* CONFIG_PERF_EVENTS */ /* * For iSeries shared processors, we have to let the hypervisor @@ -582,10 +617,6 @@ void timer_interrupt(struct pt_regs * regs) set_dec(DECREMENTER_MAX); #ifdef CONFIG_PPC32 - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); - } if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif @@ -604,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs) calculate_steal_time(); + if (test_perf_event_pending()) { + clear_perf_event_pending(); + perf_event_do_pending(); + } + #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES)) get_lppaca()->int_dword.fields.decr_int = 0; -- cgit v1.2.3-58-ga151