summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet
diff options
context:
space:
mode:
authorMichael Chan <michael.chan@broadcom.com>2021-09-05 14:10:59 -0400
committerDavid S. Miller <davem@davemloft.net>2021-09-05 20:43:04 +0100
commit1b2b91831983aeac3adcbb469aa8b0dc71453f89 (patch)
tree0da193066e0637452ce161f8cc738dc106b8f02b /drivers/net/ethernet
parent7ae9dc356f247ad9f9634b3da61a45eb72968b2e (diff)
bnxt_en: Fix possible unintended driver initiated error recovery
If error recovery is already enabled, bnxt_timer() will periodically check the heartbeat register and the reset counter. If we get an error recovery async. notification from the firmware (e.g. change in primary/secondary role), we will immediately read and update the heartbeat register and the reset counter. If the timer for the next health check expires soon after this, we may read the heartbeat register again in quick succession and find that it hasn't changed. This will trigger error recovery unintentionally. The likelihood is small because we also reset fw_health->tmr_counter which will reset the interval for the next health check. But the update is not protected and bnxt_timer() can miss the update and perform the health check without waiting for the full interval. Fix it by only reading the heartbeat register and reset counter in bnxt_async_event_process() if error recovery is trasitioning to the enabled state. Also add proper memory barriers so that when enabling for the first time, bnxt_timer() will see the tmr_counter interval and perform the health check after the full interval has elapsed. Fixes: 7e914027f757 ("bnxt_en: Enable health monitoring.") Reviewed-by: Edwin Peer <edwin.peer@broadcom.com> Signed-off-by: Michael Chan <michael.chan@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet')
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c25
1 files changed, 18 insertions, 7 deletions
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 40a390652d8d..9b86516e59a1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2202,25 +2202,34 @@ static int bnxt_async_event_process(struct bnxt *bp,
if (!fw_health)
goto async_event_process_exit;
- fw_health->enabled = EVENT_DATA1_RECOVERY_ENABLED(data1);
- fw_health->master = EVENT_DATA1_RECOVERY_MASTER_FUNC(data1);
- if (!fw_health->enabled) {
+ if (!EVENT_DATA1_RECOVERY_ENABLED(data1)) {
+ fw_health->enabled = false;
netif_info(bp, drv, bp->dev,
"Error recovery info: error recovery[0]\n");
break;
}
+ fw_health->master = EVENT_DATA1_RECOVERY_MASTER_FUNC(data1);
fw_health->tmr_multiplier =
DIV_ROUND_UP(fw_health->polling_dsecs * HZ,
bp->current_interval * 10);
fw_health->tmr_counter = fw_health->tmr_multiplier;
- fw_health->last_fw_heartbeat =
- bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG);
- fw_health->last_fw_reset_cnt =
- bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
+ if (!fw_health->enabled) {
+ fw_health->last_fw_heartbeat =
+ bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG);
+ fw_health->last_fw_reset_cnt =
+ bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
+ }
netif_info(bp, drv, bp->dev,
"Error recovery info: error recovery[1], master[%d], reset count[%u], health status: 0x%x\n",
fw_health->master, fw_health->last_fw_reset_cnt,
bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG));
+ if (!fw_health->enabled) {
+ /* Make sure tmr_counter is set and visible to
+ * bnxt_health_check() before setting enabled to true.
+ */
+ smp_wmb();
+ fw_health->enabled = true;
+ }
goto async_event_process_exit;
}
case ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION:
@@ -11258,6 +11267,8 @@ static void bnxt_fw_health_check(struct bnxt *bp)
if (!fw_health->enabled || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
return;
+ /* Make sure it is enabled before checking the tmr_counter. */
+ smp_rmb();
if (fw_health->tmr_counter) {
fw_health->tmr_counter--;
return;