diff options
author | David S. Miller <davem@davemloft.net> | 2017-06-11 16:36:48 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-06-11 16:36:48 -0400 |
commit | 77a6bb5ac00dd48934dae0df4a24461cc7893d64 (patch) | |
tree | e1a45fb7ebec315e96979a45b78602e8230041b9 | |
parent | b87fa0fafef4b16495740432f4eb8262efa500d0 (diff) | |
parent | e7ff7efae5708513a795e329909ccbe2ac367b1a (diff) |
Merge branch 'ena-fixes'
Netanel Belgazal says:
====================
Bugs fixes in ena ethernet driver
This patchset contains fixes for the bugs that were discovered so far.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/amazon/ena/ena_com.c | 35 | ||||
-rw-r--r-- | drivers/net/ethernet/amazon/ena/ena_ethtool.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/amazon/ena/ena_netdev.c | 179 | ||||
-rw-r--r-- | drivers/net/ethernet/amazon/ena/ena_netdev.h | 18 |
4 files changed, 169 insertions, 65 deletions
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 08d11cede9c9..f5b237e0bd60 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -61,6 +61,8 @@ #define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF +#define ENA_REGS_ADMIN_INTR_MASK 1 + /*****************************************************************************/ /*****************************************************************************/ /*****************************************************************************/ @@ -232,11 +234,9 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu tail_masked = admin_queue->sq.tail & queue_size_mask; /* In case of queue FULL */ - cnt = admin_queue->sq.tail - admin_queue->sq.head; + cnt = atomic_read(&admin_queue->outstanding_cmds); if (cnt >= admin_queue->q_depth) { - pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n", - admin_queue->sq.tail, admin_queue->sq.head, - admin_queue->q_depth); + pr_debug("admin queue is full.\n"); admin_queue->stats.out_of_space++; return ERR_PTR(-ENOSPC); } @@ -508,15 +508,20 @@ static int ena_com_comp_status_to_errno(u8 comp_status) static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx, struct ena_com_admin_queue *admin_queue) { - unsigned long flags; - u32 start_time; + unsigned long flags, timeout; int ret; - start_time = ((u32)jiffies_to_usecs(jiffies)); + timeout = jiffies + ADMIN_CMD_TIMEOUT_US; + + while (1) { + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_com_handle_admin_completion(admin_queue); + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + if (comp_ctx->status != ENA_CMD_SUBMITTED) + break; - while (comp_ctx->status == ENA_CMD_SUBMITTED) { - if ((((u32)jiffies_to_usecs(jiffies)) - start_time) > - ADMIN_CMD_TIMEOUT_US) { + if (time_is_before_jiffies(timeout)) { pr_err("Wait for completion (polling) timeout\n"); /* ENA didn't have any completion */ spin_lock_irqsave(&admin_queue->q_lock, flags); @@ -528,10 +533,6 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c goto err; } - spin_lock_irqsave(&admin_queue->q_lock, flags); - ena_com_handle_admin_completion(admin_queue); - spin_unlock_irqrestore(&admin_queue->q_lock, flags); - msleep(100); } @@ -1455,6 +1456,12 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev) void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling) { + u32 mask_value = 0; + + if (polling) + mask_value = ENA_REGS_ADMIN_INTR_MASK; + + writel(mask_value, ena_dev->reg_bar + ENA_REGS_INTR_MASK_OFF); ena_dev->admin_queue.polling = polling; } diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 67b2338f8fb3..3ee55e2fd694 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -80,7 +80,6 @@ static const struct ena_stats ena_stats_tx_strings[] = { ENA_STAT_TX_ENTRY(tx_poll), ENA_STAT_TX_ENTRY(doorbells), ENA_STAT_TX_ENTRY(prepare_ctx_err), - ENA_STAT_TX_ENTRY(missing_tx_comp), ENA_STAT_TX_ENTRY(bad_req_id), }; @@ -94,6 +93,7 @@ static const struct ena_stats ena_stats_rx_strings[] = { ENA_STAT_RX_ENTRY(dma_mapping_err), ENA_STAT_RX_ENTRY(bad_desc_num), ENA_STAT_RX_ENTRY(rx_copybreak_pkt), + ENA_STAT_RX_ENTRY(empty_rx_ring), }; static const struct ena_stats ena_stats_ena_com_strings[] = { diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 7c1214d78855..4f16ed38bcf3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -190,6 +190,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter) rxr->sgl_size = adapter->max_rx_sgl_size; rxr->smoothed_interval = ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); + rxr->empty_rx_queue = 0; } } @@ -1078,6 +1079,26 @@ inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring, rx_ring->per_napi_bytes = 0; } +static inline void ena_unmask_interrupt(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) +{ + struct ena_eth_io_intr_reg intr_reg; + + /* Update intr register: rx intr delay, + * tx intr delay and interrupt unmask + */ + ena_com_update_intr_reg(&intr_reg, + rx_ring->smoothed_interval, + tx_ring->smoothed_interval, + true); + + /* It is a shared MSI-X. + * Tx and Rx CQ have pointer to it. + * So we use one of them to reach the intr reg + */ + ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); +} + static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring, struct ena_ring *rx_ring) { @@ -1108,7 +1129,6 @@ static int ena_io_poll(struct napi_struct *napi, int budget) { struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); struct ena_ring *tx_ring, *rx_ring; - struct ena_eth_io_intr_reg intr_reg; u32 tx_work_done; u32 rx_work_done; @@ -1149,22 +1169,9 @@ static int ena_io_poll(struct napi_struct *napi, int budget) if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) ena_adjust_intr_moderation(rx_ring, tx_ring); - /* Update intr register: rx intr delay, - * tx intr delay and interrupt unmask - */ - ena_com_update_intr_reg(&intr_reg, - rx_ring->smoothed_interval, - tx_ring->smoothed_interval, - true); - - /* It is a shared MSI-X. - * Tx and Rx CQ have pointer to it. - * So we use one of them to reach the intr reg - */ - ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); + ena_unmask_interrupt(tx_ring, rx_ring); } - ena_update_ring_numa_node(tx_ring, rx_ring); ret = rx_work_done; @@ -1485,6 +1492,11 @@ static int ena_up_complete(struct ena_adapter *adapter) ena_napi_enable_all(adapter); + /* Enable completion queues interrupt */ + for (i = 0; i < adapter->num_queues; i++) + ena_unmask_interrupt(&adapter->tx_ring[i], + &adapter->rx_ring[i]); + /* schedule napi in case we had pending packets * from the last time we disable napi */ @@ -1532,6 +1544,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) "Failed to get TX queue handlers. TX queue num %d rc: %d\n", qid, rc); ena_com_destroy_io_queue(ena_dev, ena_qid); + return rc; } ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node); @@ -1596,6 +1609,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) "Failed to get RX queue handlers. RX queue num %d rc: %d\n", qid, rc); ena_com_destroy_io_queue(ena_dev, ena_qid); + return rc; } ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node); @@ -1981,6 +1995,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_info->tx_descs = nb_hw_desc; tx_info->last_jiffies = jiffies; + tx_info->print_once = 0; tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, tx_ring->ring_size); @@ -2550,13 +2565,44 @@ err: "Reset attempt failed. Can not reset the device\n"); } -static void check_for_missing_tx_completions(struct ena_adapter *adapter) +static int check_missing_comp_in_queue(struct ena_adapter *adapter, + struct ena_ring *tx_ring) { struct ena_tx_buffer *tx_buf; unsigned long last_jiffies; + u32 missed_tx = 0; + int i; + + for (i = 0; i < tx_ring->ring_size; i++) { + tx_buf = &tx_ring->tx_buffer_info[i]; + last_jiffies = tx_buf->last_jiffies; + if (unlikely(last_jiffies && + time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) { + if (!tx_buf->print_once) + netif_notice(adapter, tx_err, adapter->netdev, + "Found a Tx that wasn't completed on time, qid %d, index %d.\n", + tx_ring->qid, i); + + tx_buf->print_once = 1; + missed_tx++; + + if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) { + netif_err(adapter, tx_err, adapter->netdev, + "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n", + missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + return -EIO; + } + } + } + + return 0; +} + +static void check_for_missing_tx_completions(struct ena_adapter *adapter) +{ struct ena_ring *tx_ring; - int i, j, budget; - u32 missed_tx; + int i, budget, rc; /* Make sure the driver doesn't turn the device in other process */ smp_rmb(); @@ -2572,31 +2618,9 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter) for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) { tx_ring = &adapter->tx_ring[i]; - for (j = 0; j < tx_ring->ring_size; j++) { - tx_buf = &tx_ring->tx_buffer_info[j]; - last_jiffies = tx_buf->last_jiffies; - if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) { - netif_notice(adapter, tx_err, adapter->netdev, - "Found a Tx that wasn't completed on time, qid %d, index %d.\n", - tx_ring->qid, j); - - u64_stats_update_begin(&tx_ring->syncp); - missed_tx = tx_ring->tx_stats.missing_tx_comp++; - u64_stats_update_end(&tx_ring->syncp); - - /* Clear last jiffies so the lost buffer won't - * be counted twice. - */ - tx_buf->last_jiffies = 0; - - if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) { - netif_err(adapter, tx_err, adapter->netdev, - "The number of lost tx completion is above the threshold (%d > %d). Reset the device\n", - missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS); - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); - } - } - } + rc = check_missing_comp_in_queue(adapter, tx_ring); + if (unlikely(rc)) + return; budget--; if (!budget) @@ -2606,6 +2630,58 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter) adapter->last_monitored_tx_qid = i % adapter->num_queues; } +/* trigger napi schedule after 2 consecutive detections */ +#define EMPTY_RX_REFILL 2 +/* For the rare case where the device runs out of Rx descriptors and the + * napi handler failed to refill new Rx descriptors (due to a lack of memory + * for example). + * This case will lead to a deadlock: + * The device won't send interrupts since all the new Rx packets will be dropped + * The napi handler won't allocate new Rx descriptors so the device will be + * able to send new packets. + * + * This scenario can happen when the kernel's vm.min_free_kbytes is too small. + * It is recommended to have at least 512MB, with a minimum of 128MB for + * constrained environment). + * + * When such a situation is detected - Reschedule napi + */ +static void check_for_empty_rx_ring(struct ena_adapter *adapter) +{ + struct ena_ring *rx_ring; + int i, refill_required; + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return; + + if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) + return; + + for (i = 0; i < adapter->num_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + + refill_required = + ena_com_sq_empty_space(rx_ring->ena_com_io_sq); + if (unlikely(refill_required == (rx_ring->ring_size - 1))) { + rx_ring->empty_rx_queue++; + + if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.empty_rx_ring++; + u64_stats_update_end(&rx_ring->syncp); + + netif_err(adapter, drv, adapter->netdev, + "trigger refill for ring %d\n", i); + + napi_schedule(rx_ring->napi); + rx_ring->empty_rx_queue = 0; + } + } else { + rx_ring->empty_rx_queue = 0; + } + } +} + /* Check for keep alive expiration */ static void check_for_missing_keep_alive(struct ena_adapter *adapter) { @@ -2660,6 +2736,8 @@ static void ena_timer_service(unsigned long data) check_for_missing_tx_completions(adapter); + check_for_empty_rx_ring(adapter); + if (debug_area) ena_dump_stats_to_buf(adapter, debug_area); @@ -2840,6 +2918,11 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { int release_bars; + if (ena_dev->mem_bar) + devm_iounmap(&pdev->dev, ena_dev->mem_bar); + + devm_iounmap(&pdev->dev, ena_dev->reg_bar); + release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; pci_release_selected_regions(pdev, release_bars); } @@ -2927,8 +3010,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_free_ena_dev; } - ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR), - pci_resource_len(pdev, ENA_REG_BAR)); + ena_dev->reg_bar = devm_ioremap(&pdev->dev, + pci_resource_start(pdev, ENA_REG_BAR), + pci_resource_len(pdev, ENA_REG_BAR)); if (!ena_dev->reg_bar) { dev_err(&pdev->dev, "failed to remap regs bar\n"); rc = -EFAULT; @@ -2948,8 +3032,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ena_set_push_mode(pdev, ena_dev, &get_feat_ctx); if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { - ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR), - pci_resource_len(pdev, ENA_MEM_BAR)); + ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev, + pci_resource_start(pdev, ENA_MEM_BAR), + pci_resource_len(pdev, ENA_MEM_BAR)); if (!ena_dev->mem_bar) { rc = -EFAULT; goto err_device_destroy; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 0e22bce6239d..a4d3d5e21068 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -45,7 +45,7 @@ #define DRV_MODULE_VER_MAJOR 1 #define DRV_MODULE_VER_MINOR 1 -#define DRV_MODULE_VER_SUBMINOR 2 +#define DRV_MODULE_VER_SUBMINOR 7 #define DRV_MODULE_NAME "ena" #ifndef DRV_MODULE_VERSION @@ -146,7 +146,18 @@ struct ena_tx_buffer { u32 tx_descs; /* num of buffers used by this skb */ u32 num_of_bufs; - /* Save the last jiffies to detect missing tx packets */ + + /* Used for detect missing tx packets to limit the number of prints */ + u32 print_once; + /* Save the last jiffies to detect missing tx packets + * + * sets to non zero value on ena_start_xmit and set to zero on + * napi and timer_Service_routine. + * + * while this value is not protected by lock, + * a given packet is not expected to be handled by ena_start_xmit + * and by napi/timer_service at the same time. + */ unsigned long last_jiffies; struct ena_com_buf bufs[ENA_PKT_MAX_BUFS]; } ____cacheline_aligned; @@ -170,7 +181,6 @@ struct ena_stats_tx { u64 napi_comp; u64 tx_poll; u64 doorbells; - u64 missing_tx_comp; u64 bad_req_id; }; @@ -184,6 +194,7 @@ struct ena_stats_rx { u64 dma_mapping_err; u64 bad_desc_num; u64 rx_copybreak_pkt; + u64 empty_rx_ring; }; struct ena_ring { @@ -231,6 +242,7 @@ struct ena_ring { struct ena_stats_tx tx_stats; struct ena_stats_rx rx_stats; }; + int empty_rx_queue; } ____cacheline_aligned; struct ena_stats_dev { |