From 115684961a335a1c97074158e8f789118ac8b00d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 18 Mar 2015 09:41:35 +0100 Subject: GHES: Carve out error queueing in a separate function Make the handler more readable. No functionality change. Signed-off-by: Borislav Petkov --- drivers/acpi/apei/ghes.c | 51 +++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 22 deletions(-) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index e82d0976a5d0..fe1e41bf5609 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -797,6 +797,32 @@ static void ghes_print_queued_estatus(void) } } +/* Save estatus for further processing in IRQ context */ +static void __process_error(struct ghes *ghes) +{ +#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + u32 len, node_len; + struct ghes_estatus_node *estatus_node; + struct acpi_hest_generic_status *estatus; + + if (ghes_estatus_cached(ghes->estatus)) + return; + + len = cper_estatus_len(ghes->estatus); + node_len = GHES_ESTATUS_NODE_LEN(len); + + estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); + if (!estatus_node) + return; + + estatus_node->ghes = ghes; + estatus_node->generic = ghes->generic; + estatus = GHES_ESTATUS_FROM_NODE(estatus_node); + memcpy(estatus, ghes->estatus, len); + llist_add(&estatus_node->llnode, &ghes_estatus_llist); +#endif +} + static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) { struct ghes *ghes, *ghes_global = NULL; @@ -832,32 +858,13 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) } list_for_each_entry_rcu(ghes, &ghes_nmi, list) { -#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - u32 len, node_len; - struct ghes_estatus_node *estatus_node; - struct acpi_hest_generic_status *estatus; -#endif if (!(ghes->flags & GHES_TO_CLEAR)) continue; -#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - if (ghes_estatus_cached(ghes->estatus)) - goto next; - /* Save estatus for further processing in IRQ context */ - len = cper_estatus_len(ghes->estatus); - node_len = GHES_ESTATUS_NODE_LEN(len); - estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, - node_len); - if (estatus_node) { - estatus_node->ghes = ghes; - estatus_node->generic = ghes->generic; - estatus = GHES_ESTATUS_FROM_NODE(estatus_node); - memcpy(estatus, ghes->estatus, len); - llist_add(&estatus_node->llnode, &ghes_estatus_llist); - } -next: -#endif + + __process_error(ghes); ghes_clear_estatus(ghes); } + #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG irq_work_queue(&ghes_proc_irq_work); #endif -- cgit v1.2.3-58-ga151 From e10be03f603d521d5c8ac0bb0f48e5723ce19d58 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 18 Mar 2015 09:52:39 +0100 Subject: GHES: Carve out the panic functionality ... into another function for more clarity. No functionality change. Signed-off-by: Borislav Petkov --- drivers/acpi/apei/ghes.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index fe1e41bf5609..712ed95b1dca 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -823,6 +823,18 @@ static void __process_error(struct ghes *ghes) #endif } +static void __ghes_panic(struct ghes *ghes) +{ + oops_begin(); + ghes_print_queued_estatus(); + __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); + + /* reboot to log the error! */ + if (panic_timeout == 0) + panic_timeout = ghes_panic_timeout; + panic("Fatal hardware error!"); +} + static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) { struct ghes *ghes, *ghes_global = NULL; @@ -846,16 +858,8 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) if (ret == NMI_DONE) goto out; - if (sev_global >= GHES_SEV_PANIC) { - oops_begin(); - ghes_print_queued_estatus(); - __ghes_print_estatus(KERN_EMERG, ghes_global->generic, - ghes_global->estatus); - /* reboot to log the error! */ - if (panic_timeout == 0) - panic_timeout = ghes_panic_timeout; - panic("Fatal hardware error!"); - } + if (sev_global >= GHES_SEV_PANIC) + __ghes_panic(ghes_global); list_for_each_entry_rcu(ghes, &ghes_nmi, list) { if (!(ghes->flags & GHES_TO_CLEAR)) -- cgit v1.2.3-58-ga151 From 6169ddf846c528509e66a0fe7804393aa330a970 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 18 Mar 2015 09:55:21 +0100 Subject: GHES: Panic right after detection The moment we log an error of panic severity, there's no need to noodle through the ghes_nmi list anymore. So panic instead right then and there. Signed-off-by: Borislav Petkov --- drivers/acpi/apei/ghes.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 712ed95b1dca..0de3adcca03e 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -837,9 +837,8 @@ static void __ghes_panic(struct ghes *ghes) static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) { - struct ghes *ghes, *ghes_global = NULL; - int sev, sev_global = -1; - int ret = NMI_DONE; + struct ghes *ghes; + int sev, ret = NMI_DONE; raw_spin_lock(&ghes_nmi_lock); list_for_each_entry_rcu(ghes, &ghes_nmi, list) { @@ -847,20 +846,17 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) ghes_clear_estatus(ghes); continue; } + sev = ghes_severity(ghes->estatus->error_severity); - if (sev > sev_global) { - sev_global = sev; - ghes_global = ghes; - } + if (sev >= GHES_SEV_PANIC) + __ghes_panic(ghes); + ret = NMI_HANDLED; } if (ret == NMI_DONE) goto out; - if (sev_global >= GHES_SEV_PANIC) - __ghes_panic(ghes_global); - list_for_each_entry_rcu(ghes, &ghes_nmi, list) { if (!(ghes->flags & GHES_TO_CLEAR)) continue; -- cgit v1.2.3-58-ga151 From 2383844d4850888cfdf6d202563d2ddb4125a4e9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 18 Mar 2015 10:12:35 +0100 Subject: GHES: Elliminate double-loop in the NMI handler There's no real need to iterate twice over the HW error sources in the NMI handler. With the previous cleanups, elliminating the second loop is almost trivial. Signed-off-by: Borislav Petkov --- drivers/acpi/apei/ghes.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 0de3adcca03e..94a44bad5576 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -851,25 +851,18 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) if (sev >= GHES_SEV_PANIC) __ghes_panic(ghes); - ret = NMI_HANDLED; - } - - if (ret == NMI_DONE) - goto out; - - list_for_each_entry_rcu(ghes, &ghes_nmi, list) { if (!(ghes->flags & GHES_TO_CLEAR)) continue; __process_error(ghes); ghes_clear_estatus(ghes); + + ret = NMI_HANDLED; } #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG irq_work_queue(&ghes_proc_irq_work); #endif - -out: raw_spin_unlock(&ghes_nmi_lock); return ret; } -- cgit v1.2.3-58-ga151 From 6fe9e7c26a97105645fd24f264f1b94e21aade3e Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 27 Mar 2015 10:05:00 +0100 Subject: GHES: Make NMI handler have a single reader Since GHES sources are global, we theoretically need only a single CPU reading them per NMI instead of a thundering herd of CPUs waiting on a spinlock in NMI context for no reason at all. Do that. Signed-off-by: Jiri Kosina Signed-off-by: Borislav Petkov --- drivers/acpi/apei/ghes.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 94a44bad5576..2bfd53cbfe80 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -729,10 +729,10 @@ static struct llist_head ghes_estatus_llist; static struct irq_work ghes_proc_irq_work; /* - * NMI may be triggered on any CPU, so ghes_nmi_lock is used for - * mutual exclusion. + * NMI may be triggered on any CPU, so ghes_in_nmi is used for + * having only one concurrent reader. */ -static DEFINE_RAW_SPINLOCK(ghes_nmi_lock); +static atomic_t ghes_in_nmi = ATOMIC_INIT(0); static LIST_HEAD(ghes_nmi); @@ -840,7 +840,9 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) struct ghes *ghes; int sev, ret = NMI_DONE; - raw_spin_lock(&ghes_nmi_lock); + if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) + return ret; + list_for_each_entry_rcu(ghes, &ghes_nmi, list) { if (ghes_read_estatus(ghes, 1)) { ghes_clear_estatus(ghes); @@ -863,7 +865,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG irq_work_queue(&ghes_proc_irq_work); #endif - raw_spin_unlock(&ghes_nmi_lock); + atomic_dec(&ghes_in_nmi); return ret; } -- cgit v1.2.3-58-ga151