diff options
author | Matan Barak <matanb@mellanox.com> | 2016-04-17 17:08:40 +0300 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2016-05-18 10:45:49 -0400 |
commit | 94c6825e0ff75829207af6246782811b7c7af2c0 (patch) | |
tree | 695b147b15bbecbb234cbb06c6bd1fdab63c0abe /drivers | |
parent | e3b6d8cf8de6d07af9a27c86861edfa5b3290cb6 (diff) |
net/mlx5_core: Use tasklet for user-space CQ completion events
Previously, we've fired all our completion callbacks straight from
our ISR.
Some of those callbacks were lightweight (for example, mlx5 Ethernet
napi callbacks), but some of them did more work (for example,
the user-space RDMA stack uverbs' completion handler). Besides that,
doing more than the minimal work in ISR is generally considered wrong,
it could even lead to a hard lockup of the system. Since when a lot
of completion events are generated by the hardware, the loop over
those events could be so long, that we'll get into a hard lockup by
the system watchdog.
In order to avoid that, add a new way of invoking completion events
callbacks. In the interrupt itself, we add the CQs which receive
completion event to a per-EQ list and schedule a tasklet. In the
tasklet context we loop over all the CQs in the list and invoke the
user callback.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/cq.c | 59 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/eq.c | 12 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/main.c | 17 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 |
4 files changed, 89 insertions, 1 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index b51e42d6fbec..873a631ad155 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -39,6 +39,53 @@ #include <linux/mlx5/cq.h> #include "mlx5_core.h" +#define TASKLET_MAX_TIME 2 +#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) + +void mlx5_cq_tasklet_cb(unsigned long data) +{ + unsigned long flags; + unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES; + struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data; + struct mlx5_core_cq *mcq; + struct mlx5_core_cq *temp; + + spin_lock_irqsave(&ctx->lock, flags); + list_splice_tail_init(&ctx->list, &ctx->process_list); + spin_unlock_irqrestore(&ctx->lock, flags); + + list_for_each_entry_safe(mcq, temp, &ctx->process_list, + tasklet_ctx.list) { + list_del_init(&mcq->tasklet_ctx.list); + mcq->tasklet_ctx.comp(mcq); + if (atomic_dec_and_test(&mcq->refcount)) + complete(&mcq->free); + if (time_after(jiffies, end)) + break; + } + + if (!list_empty(&ctx->process_list)) + tasklet_schedule(&ctx->task); +} + +static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq) +{ + unsigned long flags; + struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; + + spin_lock_irqsave(&tasklet_ctx->lock, flags); + /* When migrating CQs between EQs will be implemented, please note + * that you need to sync this point. It is possible that + * while migrating a CQ, completions on the old EQs could + * still arrive. + */ + if (list_empty_careful(&cq->tasklet_ctx.list)) { + atomic_inc(&cq->refcount); + list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); + } + spin_unlock_irqrestore(&tasklet_ctx->lock, flags); +} + void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) { struct mlx5_core_cq *cq; @@ -96,6 +143,13 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, struct mlx5_create_cq_mbox_out out; struct mlx5_destroy_cq_mbox_in din; struct mlx5_destroy_cq_mbox_out dout; + int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), + c_eqn); + struct mlx5_eq *eq; + + eq = mlx5_eqn2eq(dev, eqn); + if (IS_ERR(eq)) + return PTR_ERR(eq); in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ); memset(&out, 0, sizeof(out)); @@ -111,6 +165,11 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->arm_sn = 0; atomic_set(&cq->refcount, 1); init_completion(&cq->free); + if (!cq->comp) + cq->comp = mlx5_add_cq_to_tasklet; + /* assuming CQ will be deleted before the EQ */ + cq->tasklet_ctx.priv = &eq->tasklet_ctx; + INIT_LIST_HEAD(&cq->tasklet_ctx.list); spin_lock_irq(&table->lock); err = radix_tree_insert(&table->tree, cq->cqn, cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 18fccec72c5d..0e30602ef76d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -202,7 +202,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) struct mlx5_eqe *eqe; int eqes_found = 0; int set_ci = 0; - u32 cqn; + u32 cqn = -1; u32 rsn; u8 port; @@ -320,6 +320,9 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) eq_update_ci(eq, 1); + if (cqn != -1) + tasklet_schedule(&eq->tasklet_ctx.task); + return eqes_found; } @@ -403,6 +406,12 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_irq; + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, + (unsigned long)&eq->tasklet_ctx); + /* EQs are created in ARMED state */ eq_update_ci(eq, 1); @@ -436,6 +445,7 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); synchronize_irq(eq->irqn); + tasklet_disable(&eq->tasklet_ctx.task); mlx5_buf_free(dev, &eq->buf); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6892746fd10d..aa98d0234bd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -660,6 +660,23 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, } EXPORT_SYMBOL(mlx5_vector2eqn); +struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq *eq; + + spin_lock(&table->lock); + list_for_each_entry(eq, &table->comp_eqs_list, list) + if (eq->eqn == eqn) { + spin_unlock(&table->lock); + return eq; + } + + spin_unlock(&table->lock); + + return ERR_PTR(-ENOENT); +} + static void free_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 0b0b226c789e..f0d87046af8e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -100,6 +100,8 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev); u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); +struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); +void mlx5_cq_tasklet_cb(unsigned long data); void mlx5e_init(void); void mlx5e_cleanup(void); |