diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.c | 45 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.h | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cong.c | 28 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 24 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 51 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 490 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 67 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 164 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.h | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qpc.c | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/umr.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/wr.c | 2 |
13 files changed, 649 insertions, 242 deletions
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index ff3742b0460a..1d0c8d5e745b 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -5,34 +5,41 @@ #include "cmd.h" -int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey) +int mlx5r_cmd_query_special_mkeys(struct mlx5_ib_dev *dev) { u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; + bool is_terminate, is_dump, is_null; int err; - MLX5_SET(query_special_contexts_in, in, opcode, - MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); - err = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out); - if (!err) - *mkey = MLX5_GET(query_special_contexts_out, out, - dump_fill_mkey); - return err; -} + is_terminate = MLX5_CAP_GEN(dev->mdev, terminate_scatter_list_mkey); + is_dump = MLX5_CAP_GEN(dev->mdev, dump_fill_mkey); + is_null = MLX5_CAP_GEN(dev->mdev, null_mkey); -int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey) -{ - u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; - u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; - int err; + dev->mkeys.terminate_scatter_list_mkey = MLX5_TERMINATE_SCATTER_LIST_LKEY; + if (!is_terminate && !is_dump && !is_null) + return 0; MLX5_SET(query_special_contexts_in, in, opcode, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); - err = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out); - if (!err) - *null_mkey = MLX5_GET(query_special_contexts_out, out, - null_mkey); - return err; + err = mlx5_cmd_exec_inout(dev->mdev, query_special_contexts, in, out); + if (err) + return err; + + if (is_dump) + dev->mkeys.dump_fill_mkey = MLX5_GET(query_special_contexts_out, + out, dump_fill_mkey); + + if (is_null) + dev->mkeys.null_mkey = cpu_to_be32( + MLX5_GET(query_special_contexts_out, out, null_mkey)); + + if (is_terminate) + dev->mkeys.terminate_scatter_list_mkey = + cpu_to_be32(MLX5_GET(query_special_contexts_out, out, + terminate_scatter_list_mkey)); + + return 0; } int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index ee46638db5de..93a971a40d11 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -37,8 +37,7 @@ #include <linux/kernel.h> #include <linux/mlx5/driver.h> -int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey); -int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); +int mlx5r_cmd_query_special_mkeys(struct mlx5_ib_dev *dev); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out); int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index 290ea8ac3838..f87531318feb 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -38,6 +38,7 @@ enum mlx5_ib_cong_node_type { MLX5_IB_RROCE_ECN_RP = 1, MLX5_IB_RROCE_ECN_NP = 2, + MLX5_IB_RROCE_GENERAL = 3, }; static const char * const mlx5_ib_dbg_cc_name[] = { @@ -61,6 +62,8 @@ static const char * const mlx5_ib_dbg_cc_name[] = { "np_cnp_dscp", "np_cnp_prio_mode", "np_cnp_prio", + "rtt_resp_dscp_valid", + "rtt_resp_dscp", }; #define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1) @@ -84,14 +87,18 @@ static const char * const mlx5_ib_dbg_cc_name[] = { #define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3) #define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4) +#define MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR BIT(0) + static enum mlx5_ib_cong_node_type mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset) { - if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE && - param_offset <= MLX5_IB_DBG_CC_RP_GD) + if (param_offset <= MLX5_IB_DBG_CC_RP_GD) return MLX5_IB_RROCE_ECN_RP; - else + + if (param_offset <= MLX5_IB_DBG_CC_NP_CNP_PRIO) return MLX5_IB_RROCE_ECN_NP; + + return MLX5_IB_RROCE_GENERAL; } static u32 mlx5_get_cc_param_val(void *field, int offset) @@ -157,6 +164,12 @@ static u32 mlx5_get_cc_param_val(void *field, int offset) case MLX5_IB_DBG_CC_NP_CNP_PRIO: return MLX5_GET(cong_control_r_roce_ecn_np, field, cnp_802p_prio); + case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID: + return MLX5_GET(cong_control_r_roce_general, field, + rtt_resp_dscp_valid); + case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP: + return MLX5_GET(cong_control_r_roce_general, field, + rtt_resp_dscp); default: return 0; } @@ -264,6 +277,15 @@ static void mlx5_ib_set_cc_param_mask_val(void *field, int offset, MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0); MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var); break; + case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID: + *attr_mask |= MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR; + MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp_valid, var); + break; + case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP: + *attr_mask |= MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR; + MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp_valid, 1); + MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp, var); + break; } } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 471c3455dfeb..5b988db66b8f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1756,13 +1756,9 @@ static int set_ucontext_resp(struct ib_ucontext *uctx, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_ucontext *context = to_mucontext(uctx); struct mlx5_bfreg_info *bfregi = &context->bfregi; - int err; if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { - err = mlx5_cmd_dump_fill_mkey(dev->mdev, - &resp->dump_fill_mkey); - if (err) - return err; + resp->dump_fill_mkey = dev->mkeys.dump_fill_mkey; resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; } @@ -3666,6 +3662,10 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->port[i].roce.last_port_state = IB_PORT_DOWN; } + err = mlx5r_cmd_query_special_mkeys(dev); + if (err) + return err; + err = mlx5_ib_init_multiport_master(dev); if (err) return err; @@ -4030,12 +4030,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { - int err; - - err = mlx5_mkey_cache_cleanup(dev); - if (err) - mlx5_ib_warn(dev, "mr cache cleanup failed\n"); - + mlx5_mkey_cache_cleanup(dev); mlx5r_umr_resource_cleanup(dev); } @@ -4433,6 +4428,10 @@ static int __init mlx5_ib_init(void) return -ENOMEM; } + ret = mlx5_ib_qp_event_init(); + if (ret) + goto qp_event_err; + mlx5_ib_odp_init(); ret = mlx5r_rep_init(); if (ret) @@ -4450,6 +4449,8 @@ drv_err: mp_err: mlx5r_rep_cleanup(); rep_err: + mlx5_ib_qp_event_cleanup(); +qp_event_err: destroy_workqueue(mlx5_ib_event_wq); free_page((unsigned long)xlt_emergency_page); return ret; @@ -4461,6 +4462,7 @@ static void __exit mlx5_ib_cleanup(void) auxiliary_driver_unregister(&mlx5r_mp_driver); mlx5r_rep_cleanup(); + mlx5_ib_qp_event_cleanup(); destroy_workqueue(mlx5_ib_event_wq); free_page((unsigned long)xlt_emergency_page); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 7394e7f36ba7..efa4dc6e7dee 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -617,12 +617,21 @@ enum mlx5_mkey_type { MLX5_MKEY_INDIRECT_DEVX, }; +struct mlx5r_cache_rb_key { + u8 ats:1; + unsigned int access_mode; + unsigned int access_flags; + unsigned int ndescs; +}; + struct mlx5_ib_mkey { u32 key; enum mlx5_mkey_type type; unsigned int ndescs; struct wait_queue_head wait; refcount_t usecount; + /* User Mkey must hold either a rb_key or a cache_ent. */ + struct mlx5r_cache_rb_key rb_key; struct mlx5_cache_ent *cache_ent; }; @@ -737,11 +746,11 @@ struct mlx5_cache_ent { unsigned long reserved; char name[4]; - u32 order; - u32 access_mode; - u32 page; - unsigned int ndescs; + struct rb_node node; + struct mlx5r_cache_rb_key rb_key; + + u8 is_tmp:1; u8 disabled:1; u8 fill_to_high_water:1; @@ -771,9 +780,11 @@ struct mlx5r_async_create_mkey { struct mlx5_mkey_cache { struct workqueue_struct *wq; - struct mlx5_cache_ent ent[MAX_MKEY_CACHE_ENTRIES]; - struct dentry *root; + struct rb_root rb_root; + struct mutex rb_lock; + struct dentry *fs_root; unsigned long last_add; + struct delayed_work remove_ent_dwork; }; struct mlx5_ib_port_resources { @@ -877,6 +888,8 @@ enum mlx5_ib_dbg_cc_types { MLX5_IB_DBG_CC_NP_CNP_DSCP, MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE, MLX5_IB_DBG_CC_NP_CNP_PRIO, + MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID, + MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP, MLX5_IB_DBG_CC_MAX, }; @@ -1054,6 +1067,13 @@ struct mlx5_port_caps { u8 ext_port_cap; }; + +struct mlx5_special_mkeys { + u32 dump_fill_mkey; + __be32 null_mkey; + __be32 terminate_scatter_list_mkey; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -1084,7 +1104,6 @@ struct mlx5_ib_dev { struct xarray odp_mkeys; - u32 null_mkey; struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ spinlock_t reset_flow_resource_lock; @@ -1113,6 +1132,7 @@ struct mlx5_ib_dev { struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; u16 pkey_table_len; u8 lag_ports; + struct mlx5_special_mkeys mkeys; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1319,11 +1339,15 @@ void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev); -int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev); +void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev); +struct mlx5_cache_ent * +mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, + struct mlx5r_cache_rb_key rb_key, + bool persistent_entry); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - struct mlx5_cache_ent *ent, - int access_flags); + int access_flags, int access_mode, + int ndescs); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); @@ -1347,7 +1371,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq); void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); -void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent); +int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev); void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, struct mlx5_ib_mr *mr, int flags); @@ -1366,7 +1390,10 @@ static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} -static inline void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) {} +static inline int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev) +{ + return 0; +} static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, struct mlx5_ib_mr *mr, int flags) {} diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 053fe946e45a..67356f515261 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -140,19 +140,16 @@ static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out) mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out); } - -static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, - void *to_store) +static int push_mkey_locked(struct mlx5_cache_ent *ent, bool limit_pendings, + void *to_store) { XA_STATE(xas, &ent->mkeys, 0); void *curr; - xa_lock_irq(&ent->mkeys); if (limit_pendings && - (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) { - xa_unlock_irq(&ent->mkeys); + (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) return -EAGAIN; - } + while (1) { /* * This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version @@ -191,6 +188,7 @@ static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, break; xa_lock_irq(&ent->mkeys); } + xa_lock_irq(&ent->mkeys); if (xas_error(&xas)) return xas_error(&xas); if (WARN_ON(curr)) @@ -198,6 +196,17 @@ static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, return 0; } +static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, + void *to_store) +{ + int ret; + + xa_lock_irq(&ent->mkeys); + ret = push_mkey_locked(ent, limit_pendings, to_store); + xa_unlock_irq(&ent->mkeys); + return ret; +} + static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent) { void *old; @@ -292,12 +301,14 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, + (ent->rb_key.access_mode >> 2) & 0x7); MLX5_SET(mkc, mkc, translations_octword_size, - get_mkc_octo_size(ent->access_mode, ent->ndescs)); - MLX5_SET(mkc, mkc, log_page_size, ent->page); + get_mkc_octo_size(ent->rb_key.access_mode, + ent->rb_key.ndescs)); + MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); } /* Asynchronously schedule new MRs to be populated in the cache. */ @@ -515,18 +526,22 @@ static const struct file_operations limit_fops = { static bool someone_adding(struct mlx5_mkey_cache *cache) { - unsigned int i; - - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { - struct mlx5_cache_ent *ent = &cache->ent[i]; - bool ret; + struct mlx5_cache_ent *ent; + struct rb_node *node; + bool ret; + mutex_lock(&cache->rb_lock); + for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) { + ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); ret = ent->stored < ent->limit; xa_unlock_irq(&ent->mkeys); - if (ret) + if (ret) { + mutex_unlock(&cache->rb_lock); return true; + } } + mutex_unlock(&cache->rb_lock); return false; } @@ -539,7 +554,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) { lockdep_assert_held(&ent->mkeys.xa_lock); - if (ent->disabled || READ_ONCE(ent->dev->fill_delay)) + if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp) return; if (ent->stored < ent->limit) { ent->fill_to_high_water = true; @@ -590,8 +605,8 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) if (err != -EAGAIN) { mlx5_ib_warn( dev, - "command failed order %d, err %d\n", - ent->order, err); + "add keys command failed, err %d\n", + err); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(1000)); } @@ -637,16 +652,99 @@ static void delayed_cache_work_func(struct work_struct *work) __cache_work_func(ent); } -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - struct mlx5_cache_ent *ent, - int access_flags) +static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1, + struct mlx5r_cache_rb_key key2) +{ + int res; + + res = key1.ats - key2.ats; + if (res) + return res; + + res = key1.access_mode - key2.access_mode; + if (res) + return res; + + res = key1.access_flags - key2.access_flags; + if (res) + return res; + + /* + * keep ndescs the last in the compare table since the find function + * searches for an exact match on all properties and only closest + * match in size. + */ + return key1.ndescs - key2.ndescs; +} + +static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, + struct mlx5_cache_ent *ent) +{ + struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL; + struct mlx5_cache_ent *cur; + int cmp; + + /* Figure out where to put new node */ + while (*new) { + cur = rb_entry(*new, struct mlx5_cache_ent, node); + parent = *new; + cmp = cache_ent_key_cmp(cur->rb_key, ent->rb_key); + if (cmp > 0) + new = &((*new)->rb_left); + if (cmp < 0) + new = &((*new)->rb_right); + if (cmp == 0) { + mutex_unlock(&cache->rb_lock); + return -EEXIST; + } + } + + /* Add new node and rebalance tree. */ + rb_link_node(&ent->node, parent, new); + rb_insert_color(&ent->node, &cache->rb_root); + + return 0; +} + +static struct mlx5_cache_ent * +mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev, + struct mlx5r_cache_rb_key rb_key) +{ + struct rb_node *node = dev->cache.rb_root.rb_node; + struct mlx5_cache_ent *cur, *smallest = NULL; + int cmp; + + /* + * Find the smallest ent with order >= requested_order. + */ + while (node) { + cur = rb_entry(node, struct mlx5_cache_ent, node); + cmp = cache_ent_key_cmp(cur->rb_key, rb_key); + if (cmp > 0) { + smallest = cur; + node = node->rb_left; + } + if (cmp < 0) + node = node->rb_right; + if (cmp == 0) + return cur; + } + + return (smallest && + smallest->rb_key.access_mode == rb_key.access_mode && + smallest->rb_key.access_flags == rb_key.access_flags && + smallest->rb_key.ats == rb_key.ats) ? + smallest : + NULL; +} + +static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, + struct mlx5_cache_ent *ent, + int access_flags) { struct mlx5_ib_mr *mr; int err; - if (!mlx5r_umr_can_reconfig(dev, 0, access_flags)) - return ERR_PTR(-EOPNOTSUPP); - mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -677,10 +775,48 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, return mr; } -static void clean_keys(struct mlx5_ib_dev *dev, int c) +static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev, + int access_flags) +{ + int ret = 0; + + if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) && + MLX5_CAP_GEN(dev->mdev, atomic) && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + ret |= IB_ACCESS_REMOTE_ATOMIC; + + if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + ret |= IB_ACCESS_RELAXED_ORDERING; + + if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + ret |= IB_ACCESS_RELAXED_ORDERING; + + return ret; +} + +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, + int access_flags, int access_mode, + int ndescs) +{ + struct mlx5r_cache_rb_key rb_key = { + .ndescs = ndescs, + .access_mode = access_mode, + .access_flags = get_unchangeable_access_flags(dev, access_flags) + }; + struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key); + + if (!ent) + return ERR_PTR(-EOPNOTSUPP); + + return _mlx5_mr_cache_alloc(dev, ent, access_flags); +} + +static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) { - struct mlx5_mkey_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent = &cache->ent[c]; u32 mkey; cancel_delayed_work(&ent->dwork); @@ -699,31 +835,39 @@ static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) if (!mlx5_debugfs_root || dev->is_rep) return; - debugfs_remove_recursive(dev->cache.root); - dev->cache.root = NULL; + debugfs_remove_recursive(dev->cache.fs_root); + dev->cache.fs_root = NULL; } -static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev) +static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev, + struct mlx5_cache_ent *ent) { - struct mlx5_mkey_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; + int order = order_base_2(ent->rb_key.ndescs); struct dentry *dir; - int i; if (!mlx5_debugfs_root || dev->is_rep) return; - cache->root = debugfs_create_dir("mr_cache", mlx5_debugfs_get_dev_root(dev->mdev)); + if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) + order = MLX5_IMR_KSM_CACHE_ENTRY + 2; - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - sprintf(ent->name, "%d", ent->order); - dir = debugfs_create_dir(ent->name, cache->root); - debugfs_create_file("size", 0600, dir, ent, &size_fops); - debugfs_create_file("limit", 0600, dir, ent, &limit_fops); - debugfs_create_ulong("cur", 0400, dir, &ent->stored); - debugfs_create_u32("miss", 0600, dir, &ent->miss); - } + sprintf(ent->name, "%d", order); + dir = debugfs_create_dir(ent->name, dev->cache.fs_root); + debugfs_create_file("size", 0600, dir, ent, &size_fops); + debugfs_create_file("limit", 0600, dir, ent, &limit_fops); + debugfs_create_ulong("cur", 0400, dir, &ent->stored); + debugfs_create_u32("miss", 0600, dir, &ent->miss); +} + +static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev) +{ + struct dentry *dbg_root = mlx5_debugfs_get_dev_root(dev->mdev); + struct mlx5_mkey_cache *cache = &dev->cache; + + if (!mlx5_debugfs_root || dev->is_rep) + return; + + cache->fs_root = debugfs_create_dir("mr_cache", dbg_root); } static void delay_time_func(struct timer_list *t) @@ -733,13 +877,100 @@ static void delay_time_func(struct timer_list *t) WRITE_ONCE(dev->fill_delay, 0); } +struct mlx5_cache_ent * +mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, + struct mlx5r_cache_rb_key rb_key, + bool persistent_entry) +{ + struct mlx5_cache_ent *ent; + int order; + int ret; + + ent = kzalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + return ERR_PTR(-ENOMEM); + + xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); + ent->rb_key = rb_key; + ent->dev = dev; + ent->is_tmp = !persistent_entry; + + INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); + + ret = mlx5_cache_ent_insert(&dev->cache, ent); + if (ret) { + kfree(ent); + return ERR_PTR(ret); + } + + if (persistent_entry) { + if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) + order = MLX5_IMR_KSM_CACHE_ENTRY; + else + order = order_base_2(rb_key.ndescs) - 2; + + if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && + !dev->is_rep && mlx5_core_is_pf(dev->mdev) && + mlx5r_umr_can_load_pas(dev, 0)) + ent->limit = dev->mdev->profile.mr_cache[order].limit; + else + ent->limit = 0; + + mlx5_mkey_cache_debugfs_add_ent(dev, ent); + } else { + mod_delayed_work(ent->dev->cache.wq, + &ent->dev->cache.remove_ent_dwork, + msecs_to_jiffies(30 * 1000)); + } + + return ent; +} + +static void remove_ent_work_func(struct work_struct *work) +{ + struct mlx5_mkey_cache *cache; + struct mlx5_cache_ent *ent; + struct rb_node *cur; + + cache = container_of(work, struct mlx5_mkey_cache, + remove_ent_dwork.work); + mutex_lock(&cache->rb_lock); + cur = rb_last(&cache->rb_root); + while (cur) { + ent = rb_entry(cur, struct mlx5_cache_ent, node); + cur = rb_prev(cur); + mutex_unlock(&cache->rb_lock); + + xa_lock_irq(&ent->mkeys); + if (!ent->is_tmp) { + xa_unlock_irq(&ent->mkeys); + mutex_lock(&cache->rb_lock); + continue; + } + xa_unlock_irq(&ent->mkeys); + + clean_keys(ent->dev, ent); + mutex_lock(&cache->rb_lock); + } + mutex_unlock(&cache->rb_lock); +} + int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mkey_cache *cache = &dev->cache; + struct rb_root *root = &dev->cache.rb_root; + struct mlx5r_cache_rb_key rb_key = { + .access_mode = MLX5_MKC_ACCESS_MODE_MTT, + }; struct mlx5_cache_ent *ent; + struct rb_node *node; + int ret; int i; mutex_init(&dev->slow_path_mutex); + mutex_init(&dev->cache.rb_lock); + dev->cache.rb_root = RB_ROOT; + INIT_DELAYED_WORK(&dev->cache.remove_ent_dwork, remove_ent_work_func); cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); if (!cache->wq) { mlx5_ib_warn(dev, "failed to create work queue\n"); @@ -748,52 +979,51 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); - ent->order = i + 2; - ent->dev = dev; - ent->limit = 0; - - INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); - - if (i > MKEY_CACHE_LAST_STD_ENTRY) { - mlx5_odp_init_mkey_cache_entry(ent); - continue; + mlx5_mkey_cache_debugfs_init(dev); + mutex_lock(&cache->rb_lock); + for (i = 0; i <= mkey_cache_max_order(dev); i++) { + rb_key.ndescs = 1 << (i + 2); + ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); + if (IS_ERR(ent)) { + ret = PTR_ERR(ent); + goto err; } + } - if (ent->order > mkey_cache_max_order(dev)) - continue; + ret = mlx5_odp_init_mkey_cache(dev); + if (ret) + goto err; - ent->page = PAGE_SHIFT; - ent->ndescs = 1 << ent->order; - ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; - if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && - !dev->is_rep && mlx5_core_is_pf(dev->mdev) && - mlx5r_umr_can_load_pas(dev, 0)) - ent->limit = dev->mdev->profile.mr_cache[i].limit; - else - ent->limit = 0; + mutex_unlock(&cache->rb_lock); + for (node = rb_first(root); node; node = rb_next(node)) { + ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); queue_adjust_cache_locked(ent); xa_unlock_irq(&ent->mkeys); } - mlx5_mkey_cache_debugfs_init(dev); - return 0; + +err: + mutex_unlock(&cache->rb_lock); + mlx5_mkey_cache_debugfs_cleanup(dev); + mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); + return ret; } -int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) +void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) { - unsigned int i; + struct rb_root *root = &dev->cache.rb_root; + struct mlx5_cache_ent *ent; + struct rb_node *node; if (!dev->cache.wq) - return 0; - - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { - struct mlx5_cache_ent *ent = &dev->cache.ent[i]; + return; + cancel_delayed_work_sync(&dev->cache.remove_ent_dwork); + mutex_lock(&dev->cache.rb_lock); + for (node = rb_first(root); node; node = rb_next(node)) { + ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); ent->disabled = true; xa_unlock_irq(&ent->mkeys); @@ -803,13 +1033,18 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) mlx5_mkey_cache_debugfs_cleanup(dev); mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) - clean_keys(dev, i); + node = rb_first(root); + while (node) { + ent = rb_entry(node, struct mlx5_cache_ent, node); + node = rb_next(node); + clean_keys(dev, ent); + rb_erase(&ent->node, root); + kfree(ent); + } + mutex_unlock(&dev->cache.rb_lock); destroy_workqueue(dev->cache.wq); del_timer_sync(&dev->delay_timer); - - return 0; } struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) @@ -873,23 +1108,10 @@ static int get_octo_len(u64 addr, u64 len, int page_shift) static int mkey_cache_max_order(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) - return MKEY_CACHE_LAST_STD_ENTRY + 2; + return MKEY_CACHE_LAST_STD_ENTRY; return MLX5_MAX_UMR_SHIFT; } -static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev, - unsigned int order) -{ - struct mlx5_mkey_cache *cache = &dev->cache; - - if (order < cache->ent[0].order) - return &cache->ent[0]; - order = order - cache->ent[0].order; - if (order > MKEY_CACHE_LAST_STD_ENTRY) - return NULL; - return &cache->ent[order]; -} - static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, u64 length, int access_flags, u64 iova) { @@ -916,6 +1138,9 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags) { + struct mlx5r_cache_rb_key rb_key = { + .access_mode = MLX5_MKC_ACCESS_MODE_MTT, + }; struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; @@ -928,22 +1153,26 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, 0, iova); if (WARN_ON(!page_size)) return ERR_PTR(-EINVAL); - ent = mkey_cache_ent_from_order( - dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); + + rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size); + rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags); + rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags); + ent = mkey_cache_ent_from_rb_key(dev, rb_key); /* - * Matches access in alloc_cache_mr(). If the MR can't come from the - * cache then synchronously create an uncached one. + * If the MR can't come from the cache then synchronously create an uncached + * one. */ - if (!ent || ent->limit == 0 || - !mlx5r_umr_can_reconfig(dev, 0, access_flags) || - mlx5_umem_needs_ats(dev, umem, access_flags)) { + if (!ent) { mutex_lock(&dev->slow_path_mutex); mr = reg_create(pd, umem, iova, access_flags, page_size, false); mutex_unlock(&dev->slow_path_mutex); + if (IS_ERR(mr)) + return mr; + mr->mmkey.rb_key = rb_key; return mr; } - mr = mlx5_mr_cache_alloc(dev, ent, access_flags); + mr = _mlx5_mr_cache_alloc(dev, ent, access_flags); if (IS_ERR(mr)) return mr; @@ -1030,6 +1259,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, goto err_2; } mr->mmkey.type = MLX5_MKEY_MR; + mr->mmkey.ndescs = get_octo_len(iova, umem->length, mr->page_shift); mr->umem = umem; set_mr_fields(dev, mr, umem->length, access_flags, iova); kvfree(in); @@ -1372,7 +1602,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova); if (WARN_ON(!*page_size)) return false; - return (1ULL << mr->mmkey.cache_ent->order) >= + return (mr->mmkey.cache_ent->rb_key.ndescs) >= ib_umem_num_dma_blocks(new_umem, *page_size); } @@ -1567,6 +1797,49 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) } } +static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, + struct mlx5_ib_mr *mr) +{ + struct mlx5_mkey_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int ret; + + if (mr->mmkey.cache_ent) { + xa_lock_irq(&mr->mmkey.cache_ent->mkeys); + mr->mmkey.cache_ent->in_use--; + goto end; + } + + mutex_lock(&cache->rb_lock); + ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); + if (ent) { + if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { + if (ent->disabled) { + mutex_unlock(&cache->rb_lock); + return -EOPNOTSUPP; + } + mr->mmkey.cache_ent = ent; + xa_lock_irq(&mr->mmkey.cache_ent->mkeys); + mutex_unlock(&cache->rb_lock); + goto end; + } + } + + ent = mlx5r_cache_create_ent_locked(dev, mr->mmkey.rb_key, false); + mutex_unlock(&cache->rb_lock); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + mr->mmkey.cache_ent = ent; + xa_lock_irq(&mr->mmkey.cache_ent->mkeys); + +end: + ret = push_mkey_locked(mr->mmkey.cache_ent, false, + xa_mk_value(mr->mmkey.key)); + xa_unlock_irq(&mr->mmkey.cache_ent->mkeys); + return ret; +} + int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct mlx5_ib_mr *mr = to_mmr(ibmr); @@ -1612,16 +1885,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) } /* Stop DMA */ - if (mr->mmkey.cache_ent) { - xa_lock_irq(&mr->mmkey.cache_ent->mkeys); - mr->mmkey.cache_ent->in_use--; - xa_unlock_irq(&mr->mmkey.cache_ent->mkeys); - + if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length)) if (mlx5r_umr_revoke_mr(mr) || - push_mkey(mr->mmkey.cache_ent, false, - xa_mk_value(mr->mmkey.key))) + cache_ent_find_and_store(dev, mr)) mr->mmkey.cache_ent = NULL; - } + if (!mr->mmkey.cache_ent) { rc = destroy_mkey(to_mdev(mr->ibmr.device), mr); if (rc) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index e6e021af6aa9..4a04cbc5b78a 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -104,7 +104,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, if (flags & MLX5_IB_UPD_XLT_ZAP) { for (; pklm != end; pklm++, idx++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); + pklm->key = mr_to_mdev(imr)->mkeys.null_mkey; pklm->va = 0; } return; @@ -137,7 +137,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, pklm->key = cpu_to_be32(mtt->ibmr.lkey); pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); } else { - pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); + pklm->key = mr_to_mdev(imr)->mkeys.null_mkey; pklm->va = 0; } } @@ -417,8 +417,9 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[MLX5_IMR_MTT_CACHE_ENTRY], - imr->access_flags); + mr = mlx5_mr_cache_alloc(dev, imr->access_flags, + MLX5_MKC_ACCESS_MODE_MTT, + MLX5_IMR_MTT_ENTRIES); if (IS_ERR(mr)) { ib_umem_odp_release(odp); return mr; @@ -492,9 +493,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); - imr = mlx5_mr_cache_alloc(dev, - &dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY], - access_flags); + imr = mlx5_mr_cache_alloc(dev, access_flags, MLX5_MKC_ACCESS_MODE_KSM, + mlx5_imr_ksm_entries); if (IS_ERR(imr)) { ib_umem_odp_release(umem_odp); return imr; @@ -986,7 +986,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, { int ret = 0, npages = 0; u64 io_virt; - u32 key; + __be32 key; u32 byte_count; size_t bcnt; int inline_segment; @@ -1000,7 +1000,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, struct mlx5_wqe_data_seg *dseg = wqe; io_virt = be64_to_cpu(dseg->addr); - key = be32_to_cpu(dseg->lkey); + key = dseg->lkey; byte_count = be32_to_cpu(dseg->byte_count); inline_segment = !!(byte_count & MLX5_INLINE_SEG); bcnt = byte_count & ~MLX5_INLINE_SEG; @@ -1014,7 +1014,8 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, } /* receive WQE end of sg list. */ - if (receive_queue && bcnt == 0 && key == MLX5_INVALID_LKEY && + if (receive_queue && bcnt == 0 && + key == dev->mkeys.terminate_scatter_list_mkey && io_virt == 0) break; @@ -1034,7 +1035,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, continue; } - ret = pagefault_single_data_segment(dev, NULL, key, + ret = pagefault_single_data_segment(dev, NULL, be32_to_cpu(key), io_virt, bcnt, &pfault->bytes_committed, bytes_mapped); @@ -1587,26 +1588,22 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) return err; } -void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) +int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev) { - if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) - return; + struct mlx5r_cache_rb_key rb_key = { + .access_mode = MLX5_MKC_ACCESS_MODE_KSM, + .ndescs = mlx5_imr_ksm_entries, + }; + struct mlx5_cache_ent *ent; - switch (ent->order - 2) { - case MLX5_IMR_MTT_CACHE_ENTRY: - ent->page = PAGE_SHIFT; - ent->ndescs = MLX5_IMR_MTT_ENTRIES; - ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; - ent->limit = 0; - break; + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) + return 0; - case MLX5_IMR_KSM_CACHE_ENTRY: - ent->page = MLX5_KSM_PAGE_SHIFT; - ent->ndescs = mlx5_imr_ksm_entries; - ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM; - ent->limit = 0; - break; - } + ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + return 0; } static const struct ib_device_ops mlx5_ib_dev_odp_ops = { @@ -1615,25 +1612,15 @@ static const struct ib_device_ops mlx5_ib_dev_odp_ops = { int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) { - int ret = 0; - internal_fill_odp_caps(dev); if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT)) - return ret; + return 0; ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); - if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { - ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); - if (ret) { - mlx5_ib_err(dev, "Error getting null_mkey %d\n", ret); - return ret; - } - } - mutex_init(&dev->odp_eq_mutex); - return ret; + return 0; } void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index cf953d23d18d..7cc3b973dec7 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -71,6 +71,14 @@ struct mlx5_modify_raw_qp_param { u32 port; }; +struct mlx5_ib_qp_event_work { + struct work_struct work; + struct mlx5_core_qp *qp; + int type; +}; + +static struct workqueue_struct *mlx5_ib_qp_event_wq; + static void get_cqs(enum ib_qp_type qp_type, struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq); @@ -302,51 +310,120 @@ int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, return mlx5_ib_read_user_wqe_srq(srq, wqe_index, buffer, buflen, bc); } +static void mlx5_ib_qp_err_syndrome(struct ib_qp *ibqp) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + int outlen = MLX5_ST_SZ_BYTES(query_qp_out); + struct mlx5_ib_qp *qp = to_mqp(ibqp); + void *pas_ext_union, *err_syn; + u32 *outb; + int err; + + if (!MLX5_CAP_GEN(dev->mdev, qpc_extension) || + !MLX5_CAP_GEN(dev->mdev, qp_error_syndrome)) + return; + + outb = kzalloc(outlen, GFP_KERNEL); + if (!outb) + return; + + err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen, + true); + if (err) + goto out; + + pas_ext_union = + MLX5_ADDR_OF(query_qp_out, outb, qp_pas_or_qpc_ext_and_pas); + err_syn = MLX5_ADDR_OF(qpc_extension_and_pas_list_in, pas_ext_union, + qpc_data_extension.error_syndrome); + + pr_err("%s/%d: QP %d error: %s (0x%x 0x%x 0x%x)\n", + ibqp->device->name, ibqp->port, ibqp->qp_num, + ib_wc_status_msg( + MLX5_GET(cqe_error_syndrome, err_syn, syndrome)), + MLX5_GET(cqe_error_syndrome, err_syn, vendor_error_syndrome), + MLX5_GET(cqe_error_syndrome, err_syn, hw_syndrome_type), + MLX5_GET(cqe_error_syndrome, err_syn, hw_error_syndrome)); +out: + kfree(outb); +} + +static void mlx5_ib_handle_qp_event(struct work_struct *_work) +{ + struct mlx5_ib_qp_event_work *qpe_work = + container_of(_work, struct mlx5_ib_qp_event_work, work); + struct ib_qp *ibqp = &to_mibqp(qpe_work->qp)->ibqp; + struct ib_event event = {}; + + event.device = ibqp->device; + event.element.qp = ibqp; + switch (qpe_work->type) { + case MLX5_EVENT_TYPE_PATH_MIG: + event.event = IB_EVENT_PATH_MIG; + break; + case MLX5_EVENT_TYPE_COMM_EST: + event.event = IB_EVENT_COMM_EST; + break; + case MLX5_EVENT_TYPE_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + break; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + event.event = IB_EVENT_QP_LAST_WQE_REACHED; + break; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + event.event = IB_EVENT_QP_FATAL; + break; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + event.event = IB_EVENT_PATH_MIG_ERR; + break; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + event.event = IB_EVENT_QP_REQ_ERR; + break; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + event.event = IB_EVENT_QP_ACCESS_ERR; + break; + default: + pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", + qpe_work->type, qpe_work->qp->qpn); + goto out; + } + + if ((event.event == IB_EVENT_QP_FATAL) || + (event.event == IB_EVENT_QP_ACCESS_ERR)) + mlx5_ib_qp_err_syndrome(ibqp); + + ibqp->event_handler(&event, ibqp->qp_context); + +out: + mlx5_core_res_put(&qpe_work->qp->common); + kfree(qpe_work); +} + static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) { struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; - struct ib_event event; + struct mlx5_ib_qp_event_work *qpe_work; if (type == MLX5_EVENT_TYPE_PATH_MIG) { /* This event is only valid for trans_qps */ to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port; } - if (ibqp->event_handler) { - event.device = ibqp->device; - event.element.qp = ibqp; - switch (type) { - case MLX5_EVENT_TYPE_PATH_MIG: - event.event = IB_EVENT_PATH_MIG; - break; - case MLX5_EVENT_TYPE_COMM_EST: - event.event = IB_EVENT_COMM_EST; - break; - case MLX5_EVENT_TYPE_SQ_DRAINED: - event.event = IB_EVENT_SQ_DRAINED; - break; - case MLX5_EVENT_TYPE_SRQ_LAST_WQE: - event.event = IB_EVENT_QP_LAST_WQE_REACHED; - break; - case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: - event.event = IB_EVENT_QP_FATAL; - break; - case MLX5_EVENT_TYPE_PATH_MIG_FAILED: - event.event = IB_EVENT_PATH_MIG_ERR; - break; - case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: - event.event = IB_EVENT_QP_REQ_ERR; - break; - case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: - event.event = IB_EVENT_QP_ACCESS_ERR; - break; - default: - pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn); - return; - } + if (!ibqp->event_handler) + goto out_no_handler; - ibqp->event_handler(&event, ibqp->qp_context); - } + qpe_work = kzalloc(sizeof(*qpe_work), GFP_ATOMIC); + if (!qpe_work) + goto out_no_handler; + + qpe_work->qp = qp; + qpe_work->type = type; + INIT_WORK(&qpe_work->work, mlx5_ib_handle_qp_event); + queue_work(mlx5_ib_qp_event_wq, &qpe_work->work); + return; + +out_no_handler: + mlx5_core_res_put(&qp->common); } static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, @@ -4827,7 +4904,8 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (!outb) return -ENOMEM; - err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen); + err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen, + false); if (err) goto out; @@ -5720,3 +5798,17 @@ out: mutex_unlock(&mqp->mutex); return err; } + +int mlx5_ib_qp_event_init(void) +{ + mlx5_ib_qp_event_wq = alloc_ordered_workqueue("mlx5_ib_qp_event_wq", 0); + if (!mlx5_ib_qp_event_wq) + return -ENOMEM; + + return 0; +} + +void mlx5_ib_qp_event_cleanup(void) +{ + destroy_workqueue(mlx5_ib_qp_event_wq); +} diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index 5d4e140db99c..77f9b4a54816 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -20,7 +20,7 @@ int mlx5_core_qp_modify(struct mlx5_ib_dev *dev, u16 opcode, u32 opt_param_mask, int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp); int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct); int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp, - u32 *out, int outlen); + u32 *out, int outlen, bool qpc_ext); int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct, u32 *out, int outlen); @@ -44,4 +44,6 @@ void mlx5_core_res_put(struct mlx5_core_rsc_common *res); int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn); int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn); int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); +int mlx5_ib_qp_event_init(void); +void mlx5_ib_qp_event_cleanup(void); #endif /* _MLX5_IB_QP_H */ diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index 542e4c63a8de..bae0334d6e7f 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -135,7 +135,8 @@ static int rsc_event_notifier(struct notifier_block *nb, case MLX5_RES_SQ: qp = (struct mlx5_core_qp *)common; qp->event(qp, event_type); - break; + /* Need to put resource in event handler */ + return NOTIFY_OK; case MLX5_RES_DCT: dct = (struct mlx5_core_dct *)common; if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED) @@ -504,12 +505,14 @@ void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev) } int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp, - u32 *out, int outlen) + u32 *out, int outlen, bool qpc_ext) { u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {}; MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP); MLX5_SET(query_qp_in, in, qpn, qp->qpn); + MLX5_SET(query_qp_in, in, qpc_ext, qpc_ext); + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, outlen); } diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 09b365a98bbf..a056ea835da5 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -447,7 +447,7 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, if (i < srq->msrq.max_avail_gather) { scat[i].byte_count = 0; - scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + scat[i].lkey = dev->mkeys.terminate_scatter_list_mkey; scat[i].addr = 0; } } diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index 029e9536ec28..55f4e048d947 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -636,9 +636,7 @@ int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg); cur_mtt = mtt; - rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter, - mr->umem->sgt_append.sgt.nents, - BIT(mr->page_shift)) { + rdma_umem_for_each_dma_block(mr->umem, &biter, BIT(mr->page_shift)) { if (cur_mtt == (void *)mtt + sg.length) { dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index 855f3f4fefad..df1d1b0a3ef7 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -1252,7 +1252,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, if (i < qp->rq.max_gs) { scat[i].byte_count = 0; - scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + scat[i].lkey = dev->mkeys.terminate_scatter_list_mkey; scat[i].addr = 0; } |