diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-18 09:39:22 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-18 09:39:22 -0700 |
commit | 9ea446352047d8350553250db51da2c73a610688 (patch) | |
tree | f75712fb5bfb5d2a83685023b2838f3c9ea40320 | |
parent | 9dffdb38d864ae89e16ff7b3a09451270736e35b (diff) | |
parent | 082eaa50838c6b70a8244f8b01d7ed7d686f84db (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford:
"Initial roundup of 4.6 merge window patches.
This is the first of two pull requests. It is the smaller request,
but touches for more different things (this is everything but what is
in or going into staging). The pull request for the code in
staging/rdma is on hold until after we decide what to do on the
write/writev API issue and may be partially deferred until 4.7 as a
result.
Summary:
- cxgb4 updates
- nes updates
- unification of iwarp portmapper code to core
- add drain_cq API
- various ib_core updates
- minor ipoib updates
- minor mlx4 updates
- more significant mlx5 updates (including a minor merge conflict
with net-next tree...merge is simple to resolve and Stephen's
resolution was confirmed by Mellanox)
- trivial net/9p rdma conversion
- ocrdma RoCEv2 update
- srpt updates"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (85 commits)
iwpm: crash fix for large connections test
iw_cxgb3: support for iWARP port mapping
iw_cxgb4: remove port mapper related code
iw_nes: remove port mapper related code
iwcm: common code for port mapper
net/9p: convert to new CQ API
IB/mlx5: Add support for don't trap rules
net/mlx5_core: Introduce forward to next priority action
net/mlx5_core: Create anchor of last flow table
iser: Accept arbitrary sg lists mapping if the device supports it
mlx5: Add arbitrary sg list support
IB/core: Add arbitrary sg_list support
IB/mlx5: Expose correct max_fast_reg_page_list_len
IB/mlx5: Make coding style more consistent
IB/mlx5: Convert UMR CQ to new CQ API
IB/ocrdma: Skip using unneeded intermediate variable
IB/ocrdma: Skip using unneeded intermediate variable
IB/ocrdma: Delete unnecessary variable initialisations in 11 functions
IB/core: Documentation fix in the MAD header file
IB/core: trivial prink cleanup.
...
90 files changed, 3618 insertions, 1973 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 53343ffbff7a..cb00d59da456 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1043,8 +1043,8 @@ static void ib_cache_update(struct ib_device *device, ret = ib_query_port(device, port, tprops); if (ret) { - printk(KERN_WARNING "ib_query_port failed (%d) for %s\n", - ret, device->name); + pr_warn("ib_query_port failed (%d) for %s\n", + ret, device->name); goto err; } @@ -1067,8 +1067,8 @@ static void ib_cache_update(struct ib_device *device, for (i = 0; i < pkey_cache->table_len; ++i) { ret = ib_query_pkey(device, port, i, pkey_cache->table + i); if (ret) { - printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n", - ret, device->name, i); + pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n", + ret, device->name, i); goto err; } } @@ -1078,8 +1078,8 @@ static void ib_cache_update(struct ib_device *device, ret = ib_query_gid(device, port, i, gid_cache->table + i, NULL); if (ret) { - printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", - ret, device->name, i); + pr_warn("ib_query_gid failed (%d) for %s (index %d)\n", + ret, device->name, i); goto err; } } @@ -1161,8 +1161,7 @@ int ib_cache_setup_one(struct ib_device *device) GFP_KERNEL); if (!device->cache.pkey_cache || !device->cache.lmc_cache) { - printk(KERN_WARNING "Couldn't allocate cache " - "for %s\n", device->name); + pr_warn("Couldn't allocate cache for %s\n", device->name); return -ENOMEM; } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 9729639df407..93ab0ae97208 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1206,6 +1206,10 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event, req->has_gid = true; req->service_id = req_param->primary_path->service_id; req->pkey = be16_to_cpu(req_param->primary_path->pkey); + if (req->pkey != req_param->bth_pkey) + pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" + "RDMA CMA: in the future this may cause the request to be dropped\n", + req_param->bth_pkey, req->pkey); break; case IB_CM_SIDR_REQ_RECEIVED: req->device = sidr_param->listen_id->device; @@ -1213,6 +1217,10 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event, req->has_gid = false; req->service_id = sidr_param->service_id; req->pkey = sidr_param->pkey; + if (req->pkey != sidr_param->bth_pkey) + pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" + "RDMA CMA: in the future this may cause the request to be dropped\n", + sidr_param->bth_pkey, req->pkey); break; default: return -EINVAL; @@ -1713,7 +1721,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; break; default: - printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n", + pr_err("RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } @@ -2186,8 +2194,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(id, id_priv->backlog); if (ret) - printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, " - "listening on device %s\n", ret, cma_dev->device->name); + pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", + ret, cma_dev->device->name); } static void cma_listen_on_all(struct rdma_id_private *id_priv) @@ -3239,7 +3247,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, event.status = 0; break; default: - printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n", + pr_err("RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } @@ -4003,8 +4011,8 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id if ((dev_addr->bound_dev_if == ndev->ifindex) && (net_eq(dev_net(ndev), dev_addr->net)) && memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { - printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", - ndev->name, &id_priv->id); + pr_info("RDMA CM addr change for ndev %s used by id %p\n", + ndev->name, &id_priv->id); work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; @@ -4287,7 +4295,7 @@ static int __init cma_init(void) goto err; if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table)) - printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n"); + pr_warn("RDMA CMA: failed to add netlink callback\n"); cma_configfs_init(); return 0; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 94b80a51ab68..270c7ff6cba7 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -115,8 +115,8 @@ static int ib_device_check_mandatory(struct ib_device *device) for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { if (!*(void **) ((void *) device + mandatory_table[i].offset)) { - printk(KERN_WARNING "Device %s is missing mandatory function %s\n", - device->name, mandatory_table[i].name); + pr_warn("Device %s is missing mandatory function %s\n", + device->name, mandatory_table[i].name); return -EINVAL; } } @@ -255,8 +255,8 @@ static int add_client_context(struct ib_device *device, struct ib_client *client context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) { - printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n", - device->name, client->name); + pr_warn("Couldn't allocate client context for %s/%s\n", + device->name, client->name); return -ENOMEM; } @@ -343,29 +343,29 @@ int ib_register_device(struct ib_device *device, ret = read_port_immutable(device); if (ret) { - printk(KERN_WARNING "Couldn't create per port immutable data %s\n", - device->name); + pr_warn("Couldn't create per port immutable data %s\n", + device->name); goto out; } ret = ib_cache_setup_one(device); if (ret) { - printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n"); + pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n"); goto out; } memset(&device->attrs, 0, sizeof(device->attrs)); ret = device->query_device(device, &device->attrs, &uhw); if (ret) { - printk(KERN_WARNING "Couldn't query the device attributes\n"); + pr_warn("Couldn't query the device attributes\n"); ib_cache_cleanup_one(device); goto out; } ret = ib_device_register_sysfs(device, port_callback); if (ret) { - printk(KERN_WARNING "Couldn't register device %s with driver model\n", - device->name); + pr_warn("Couldn't register device %s with driver model\n", + device->name); ib_cache_cleanup_one(device); goto out; } @@ -566,8 +566,8 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client, goto out; } - printk(KERN_WARNING "No client context found for %s/%s\n", - device->name, client->name); + pr_warn("No client context found for %s/%s\n", + device->name, client->name); out: spin_unlock_irqrestore(&device->client_data_lock, flags); @@ -960,13 +960,13 @@ static int __init ib_core_init(void) ret = class_register(&ib_class); if (ret) { - printk(KERN_WARNING "Couldn't create InfiniBand device class\n"); + pr_warn("Couldn't create InfiniBand device class\n"); goto err_comp; } ret = ibnl_init(); if (ret) { - printk(KERN_WARNING "Couldn't init IB netlink interface\n"); + pr_warn("Couldn't init IB netlink interface\n"); goto err_sysfs; } diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 6ac3683c144b..cdbb1f1a6d97 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -150,8 +150,8 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) #ifdef DEBUG if (fmr->ref_count !=0) { - printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n", - fmr, fmr->ref_count); + pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n", + fmr, fmr->ref_count); } #endif } @@ -167,7 +167,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) ret = ib_unmap_fmr(&fmr_list); if (ret) - printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret); + pr_warn(PFX "ib_unmap_fmr returned %d\n", ret); spin_lock_irq(&pool->pool_lock); list_splice(&unmap_list, &pool->free_list); @@ -222,8 +222,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, device = pd->device; if (!device->alloc_fmr || !device->dealloc_fmr || !device->map_phys_fmr || !device->unmap_fmr) { - printk(KERN_INFO PFX "Device %s does not support FMRs\n", - device->name); + pr_info(PFX "Device %s does not support FMRs\n", device->name); return ERR_PTR(-ENOSYS); } @@ -233,13 +232,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, max_remaps = device->attrs.max_map_per_fmr; pool = kmalloc(sizeof *pool, GFP_KERNEL); - if (!pool) { - printk(KERN_WARNING PFX "couldn't allocate pool struct\n"); + if (!pool) return ERR_PTR(-ENOMEM); - } pool->cache_bucket = NULL; - pool->flush_function = params->flush_function; pool->flush_arg = params->flush_arg; @@ -251,7 +247,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, GFP_KERNEL); if (!pool->cache_bucket) { - printk(KERN_WARNING PFX "Failed to allocate cache in pool\n"); + pr_warn(PFX "Failed to allocate cache in pool\n"); ret = -ENOMEM; goto out_free_pool; } @@ -275,7 +271,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, "ib_fmr(%s)", device->name); if (IS_ERR(pool->thread)) { - printk(KERN_WARNING PFX "couldn't start cleanup thread\n"); + pr_warn(PFX "couldn't start cleanup thread\n"); ret = PTR_ERR(pool->thread); goto out_free_pool; } @@ -294,11 +290,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, for (i = 0; i < params->pool_size; ++i) { fmr = kmalloc(bytes_per_fmr, GFP_KERNEL); - if (!fmr) { - printk(KERN_WARNING PFX "failed to allocate fmr " - "struct for FMR %d\n", i); + if (!fmr) goto out_fail; - } fmr->pool = pool; fmr->remap_count = 0; @@ -307,8 +300,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr); if (IS_ERR(fmr->fmr)) { - printk(KERN_WARNING PFX "fmr_create failed " - "for FMR %d\n", i); + pr_warn(PFX "fmr_create failed for FMR %d\n", + i); kfree(fmr); goto out_fail; } @@ -363,8 +356,8 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) } if (i < pool->pool_size) - printk(KERN_WARNING PFX "pool still has %d regions registered\n", - pool->pool_size - i); + pr_warn(PFX "pool still has %d regions registered\n", + pool->pool_size - i); kfree(pool->cache_bucket); kfree(pool); @@ -463,7 +456,7 @@ struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, list_add(&fmr->list, &pool->free_list); spin_unlock_irqrestore(&pool->pool_lock, flags); - printk(KERN_WARNING PFX "fmr_map returns %d\n", result); + pr_warn(PFX "fmr_map returns %d\n", result); return ERR_PTR(result); } @@ -517,8 +510,8 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) #ifdef DEBUG if (fmr->ref_count < 0) - printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n", - fmr, fmr->ref_count); + pr_warn(PFX "FMR %p has ref count %d < 0\n", + fmr, fmr->ref_count); #endif spin_unlock_irqrestore(&pool->pool_lock, flags); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index ff9163dc1596..e28a160cdab0 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -50,6 +50,8 @@ #include <rdma/iw_cm.h> #include <rdma/ib_addr.h> +#include <rdma/iw_portmap.h> +#include <rdma/rdma_netlink.h> #include "iwcm.h" @@ -57,6 +59,16 @@ MODULE_AUTHOR("Tom Tucker"); MODULE_DESCRIPTION("iWARP CM"); MODULE_LICENSE("Dual BSD/GPL"); +static struct ibnl_client_cbs iwcm_nl_cb_table[] = { + [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, + [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, + [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, + [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, + [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, + [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, + [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} +}; + static struct workqueue_struct *iwcm_wq; struct iwcm_work { struct work_struct work; @@ -402,6 +414,11 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (cm_id->mapped) { + iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); + iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); + } + (void)iwcm_deref_id(cm_id_priv); } @@ -426,6 +443,97 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id) } EXPORT_SYMBOL(iw_destroy_cm_id); +/** + * iw_cm_check_wildcard - If IP address is 0 then use original + * @pm_addr: sockaddr containing the ip to check for wildcard + * @cm_addr: sockaddr containing the actual IP address + * @cm_outaddr: sockaddr to set IP addr which leaving port + * + * Checks the pm_addr for wildcard and then sets cm_outaddr's + * IP to the actual (cm_addr). + */ +static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, + struct sockaddr_storage *cm_addr, + struct sockaddr_storage *cm_outaddr) +{ + if (pm_addr->ss_family == AF_INET) { + struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; + + if (pm4_addr->sin_addr.s_addr == INADDR_ANY) { + struct sockaddr_in *cm4_addr = + (struct sockaddr_in *)cm_addr; + struct sockaddr_in *cm4_outaddr = + (struct sockaddr_in *)cm_outaddr; + + cm4_outaddr->sin_addr = cm4_addr->sin_addr; + } + } else { + struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr; + + if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) { + struct sockaddr_in6 *cm6_addr = + (struct sockaddr_in6 *)cm_addr; + struct sockaddr_in6 *cm6_outaddr = + (struct sockaddr_in6 *)cm_outaddr; + + cm6_outaddr->sin6_addr = cm6_addr->sin6_addr; + } + } +} + +/** + * iw_cm_map - Use portmapper to map the ports + * @cm_id: connection manager pointer + * @active: Indicates the active side when true + * returns nonzero for error only if iwpm_create_mapinfo() fails + * + * Tries to add a mapping for a port using the Portmapper. If + * successful in mapping the IP/Port it will check the remote + * mapped IP address for a wildcard IP address and replace the + * zero IP address with the remote_addr. + */ +static int iw_cm_map(struct iw_cm_id *cm_id, bool active) +{ + struct iwpm_dev_data pm_reg_msg; + struct iwpm_sa_data pm_msg; + int status; + + cm_id->m_local_addr = cm_id->local_addr; + cm_id->m_remote_addr = cm_id->remote_addr; + + memcpy(pm_reg_msg.dev_name, cm_id->device->name, + sizeof(pm_reg_msg.dev_name)); + memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname, + sizeof(pm_reg_msg.if_name)); + + if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || + !iwpm_valid_pid()) + return 0; + + cm_id->mapped = true; + pm_msg.loc_addr = cm_id->local_addr; + pm_msg.rem_addr = cm_id->remote_addr; + if (active) + status = iwpm_add_and_query_mapping(&pm_msg, + RDMA_NL_IWCM); + else + status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM); + + if (!status) { + cm_id->m_local_addr = pm_msg.mapped_loc_addr; + if (active) { + cm_id->m_remote_addr = pm_msg.mapped_rem_addr; + iw_cm_check_wildcard(&pm_msg.mapped_rem_addr, + &cm_id->remote_addr, + &cm_id->m_remote_addr); + } + } + + return iwpm_create_mapinfo(&cm_id->local_addr, + &cm_id->m_local_addr, + RDMA_NL_IWCM); +} + /* * CM_ID <-- LISTEN * @@ -452,7 +560,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) case IW_CM_STATE_IDLE: cm_id_priv->state = IW_CM_STATE_LISTEN; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id->device->iwcm->create_listen(cm_id, backlog); + ret = iw_cm_map(cm_id, false); + if (!ret) + ret = cm_id->device->iwcm->create_listen(cm_id, backlog); if (ret) cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); @@ -582,39 +692,37 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_IDLE) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - wake_up_all(&cm_id_priv->connect_wait); - return -EINVAL; + ret = -EINVAL; + goto err; } /* Get the ib_qp given the QPN */ qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - wake_up_all(&cm_id_priv->connect_wait); - return -EINVAL; + ret = -EINVAL; + goto err; } cm_id->device->iwcm->add_ref(qp); cm_id_priv->qp = qp; cm_id_priv->state = IW_CM_STATE_CONN_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id->device->iwcm->connect(cm_id, iw_param); - if (ret) { - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->qp) { - cm_id->device->iwcm->rem_ref(qp); - cm_id_priv->qp = NULL; - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); - cm_id_priv->state = IW_CM_STATE_IDLE; - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - wake_up_all(&cm_id_priv->connect_wait); - } + ret = iw_cm_map(cm_id, true); + if (!ret) + ret = cm_id->device->iwcm->connect(cm_id, iw_param); + if (!ret) + return 0; /* success */ + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->qp) { + cm_id->device->iwcm->rem_ref(qp); + cm_id_priv->qp = NULL; + } + cm_id_priv->state = IW_CM_STATE_IDLE; +err: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); return ret; } EXPORT_SYMBOL(iw_cm_connect); @@ -656,8 +764,23 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, goto out; cm_id->provider_data = iw_event->provider_data; - cm_id->local_addr = iw_event->local_addr; - cm_id->remote_addr = iw_event->remote_addr; + cm_id->m_local_addr = iw_event->local_addr; + cm_id->m_remote_addr = iw_event->remote_addr; + cm_id->local_addr = listen_id_priv->id.local_addr; + + ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr, + &iw_event->remote_addr, + &cm_id->remote_addr, + RDMA_NL_IWCM); + if (ret) { + cm_id->remote_addr = iw_event->remote_addr; + } else { + iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr, + &iw_event->local_addr, + &cm_id->local_addr); + iw_event->local_addr = cm_id->local_addr; + iw_event->remote_addr = cm_id->remote_addr; + } cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); cm_id_priv->state = IW_CM_STATE_CONN_RECV; @@ -753,8 +876,10 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); if (iw_event->status == 0) { - cm_id_priv->id.local_addr = iw_event->local_addr; - cm_id_priv->id.remote_addr = iw_event->remote_addr; + cm_id_priv->id.m_local_addr = iw_event->local_addr; + cm_id_priv->id.m_remote_addr = iw_event->remote_addr; + iw_event->local_addr = cm_id_priv->id.local_addr; + iw_event->remote_addr = cm_id_priv->id.remote_addr; cm_id_priv->state = IW_CM_STATE_ESTABLISHED; } else { /* REJECTED or RESET */ @@ -1044,6 +1169,17 @@ EXPORT_SYMBOL(iw_cm_init_qp_attr); static int __init iw_cm_init(void) { + int ret; + + ret = iwpm_init(RDMA_NL_IWCM); + if (ret) + pr_err("iw_cm: couldn't init iwpm\n"); + + ret = ibnl_add_client(RDMA_NL_IWCM, RDMA_NL_IWPM_NUM_OPS, + iwcm_nl_cb_table); + if (ret) + pr_err("iw_cm: couldn't register netlink callbacks\n"); + iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); if (!iwcm_wq) return -ENOMEM; @@ -1063,6 +1199,8 @@ static void __exit iw_cm_cleanup(void) { unregister_net_sysctl_table(iwcm_ctl_table_hdr); destroy_workqueue(iwcm_wq); + ibnl_remove_client(RDMA_NL_IWCM); + iwpm_exit(RDMA_NL_IWCM); } module_init(iw_cm_init); diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 22a3abee2a54..43e3fa27102b 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -88,8 +88,8 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ); if (ret) goto pid_query_error; - ret = ibnl_put_attr(skb, nlh, IWPM_IFNAME_SIZE, - pm_msg->if_name, IWPM_NLA_REG_IF_NAME); + ret = ibnl_put_attr(skb, nlh, IFNAMSIZ, + pm_msg->if_name, IWPM_NLA_REG_IF_NAME); if (ret) goto pid_query_error; ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE, @@ -394,7 +394,7 @@ register_pid_response_exit: /* always for found nlmsg_request */ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); barrier(); - wake_up(&nlmsg_request->waitq); + up(&nlmsg_request->sem); return 0; } EXPORT_SYMBOL(iwpm_register_pid_cb); @@ -463,7 +463,7 @@ add_mapping_response_exit: /* always for found request */ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); barrier(); - wake_up(&nlmsg_request->waitq); + up(&nlmsg_request->sem); return 0; } EXPORT_SYMBOL(iwpm_add_mapping_cb); @@ -555,7 +555,7 @@ query_mapping_response_exit: /* always for found request */ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); barrier(); - wake_up(&nlmsg_request->waitq); + up(&nlmsg_request->sem); return 0; } EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb); @@ -749,7 +749,7 @@ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb) /* always for found request */ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); barrier(); - wake_up(&nlmsg_request->waitq); + up(&nlmsg_request->sem); return 0; } EXPORT_SYMBOL(iwpm_mapping_error_cb); diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 5fb089e91353..9b2bf2fb2b00 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -254,9 +254,9 @@ void iwpm_add_remote_info(struct iwpm_remote_info *rem_info) } int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, - struct sockaddr_storage *mapped_rem_addr, - struct sockaddr_storage *remote_addr, - u8 nl_client) + struct sockaddr_storage *mapped_rem_addr, + struct sockaddr_storage *remote_addr, + u8 nl_client) { struct hlist_node *tmp_hlist_node; struct hlist_head *hash_bucket_head; @@ -322,6 +322,8 @@ struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, nlmsg_request->nl_client = nl_client; nlmsg_request->request_done = 0; nlmsg_request->err_code = 0; + sema_init(&nlmsg_request->sem, 1); + down(&nlmsg_request->sem); return nlmsg_request; } @@ -364,11 +366,9 @@ struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq) int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request) { int ret; - init_waitqueue_head(&nlmsg_request->waitq); - ret = wait_event_timeout(nlmsg_request->waitq, - (nlmsg_request->request_done != 0), IWPM_NL_TIMEOUT); - if (!ret) { + ret = down_timeout(&nlmsg_request->sem, IWPM_NL_TIMEOUT); + if (ret) { ret = -EINVAL; pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n", __func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq); diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index b7b9e194ce81..af1fc14a0d3d 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -69,7 +69,7 @@ struct iwpm_nlmsg_request { u8 nl_client; u8 request_done; u16 err_code; - wait_queue_head_t waitq; + struct semaphore sem; struct kref kref; }; diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c index 1b65986c0be3..19b1ee3279b4 100644 --- a/drivers/infiniband/core/packer.c +++ b/drivers/infiniband/core/packer.c @@ -44,7 +44,7 @@ static u64 value_read(int offset, int size, void *structure) case 4: return be32_to_cpup((__be32 *) (structure + offset)); case 8: return be64_to_cpup((__be64 *) (structure + offset)); default: - printk(KERN_WARNING "Field size %d bits not handled\n", size * 8); + pr_warn("Field size %d bits not handled\n", size * 8); return 0; } } @@ -104,9 +104,8 @@ void ib_pack(const struct ib_field *desc, } else { if (desc[i].offset_bits % 8 || desc[i].size_bits % 8) { - printk(KERN_WARNING "Structure field %s of size %d " - "bits is not byte-aligned\n", - desc[i].field_name, desc[i].size_bits); + pr_warn("Structure field %s of size %d bits is not byte-aligned\n", + desc[i].field_name, desc[i].size_bits); } if (desc[i].struct_size_bytes) @@ -132,7 +131,7 @@ static void value_write(int offset, int size, u64 val, void *structure) case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break; case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break; default: - printk(KERN_WARNING "Field size %d bits not handled\n", size * 8); + pr_warn("Field size %d bits not handled\n", size * 8); } } @@ -188,9 +187,8 @@ void ib_unpack(const struct ib_field *desc, } else { if (desc[i].offset_bits % 8 || desc[i].size_bits % 8) { - printk(KERN_WARNING "Structure field %s of size %d " - "bits is not byte-aligned\n", - desc[i].field_name, desc[i].size_bits); + pr_warn("Structure field %s of size %d bits is not byte-aligned\n", + desc[i].field_name, desc[i].size_bits); } memcpy(structure + desc[i].struct_offset_bytes, diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 1e37f3515d98..b5656a2298ee 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -864,13 +864,12 @@ static void update_sm_ah(struct work_struct *work) struct ib_ah_attr ah_attr; if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { - printk(KERN_WARNING "Couldn't query port\n"); + pr_warn("Couldn't query port\n"); return; } new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL); if (!new_ah) { - printk(KERN_WARNING "Couldn't allocate new SM AH\n"); return; } @@ -880,7 +879,7 @@ static void update_sm_ah(struct work_struct *work) new_ah->pkey_index = 0; if (ib_find_pkey(port->agent->device, port->port_num, IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index)) - printk(KERN_ERR "Couldn't find index for default PKey\n"); + pr_err("Couldn't find index for default PKey\n"); memset(&ah_attr, 0, sizeof ah_attr); ah_attr.dlid = port_attr.sm_lid; @@ -889,7 +888,7 @@ static void update_sm_ah(struct work_struct *work) new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); if (IS_ERR(new_ah->ah)) { - printk(KERN_WARNING "Couldn't create new SM AH\n"); + pr_warn("Couldn't create new SM AH\n"); kfree(new_ah); return; } @@ -1221,7 +1220,7 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, rec.net = NULL; rec.ifindex = 0; rec.gid_type = IB_GID_TYPE_IB; - memset(rec.dmac, 0, ETH_ALEN); + eth_zero_addr(rec.dmac); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); @@ -1800,13 +1799,13 @@ static int __init ib_sa_init(void) ret = ib_register_client(&sa_client); if (ret) { - printk(KERN_ERR "Couldn't register ib_sa client\n"); + pr_err("Couldn't register ib_sa client\n"); goto err1; } ret = mcast_init(); if (ret) { - printk(KERN_ERR "Couldn't initialize multicast handling\n"); + pr_err("Couldn't initialize multicast handling\n"); goto err2; } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 6b4e8a008bc0..4a9aa0433b07 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1234,7 +1234,7 @@ static int find_overflow_devnum(void) ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES, "infiniband_cm"); if (ret) { - printk(KERN_ERR "ucm: couldn't register dynamic device number\n"); + pr_err("ucm: couldn't register dynamic device number\n"); return ret; } } @@ -1329,19 +1329,19 @@ static int __init ib_ucm_init(void) ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, "infiniband_cm"); if (ret) { - printk(KERN_ERR "ucm: couldn't register device number\n"); + pr_err("ucm: couldn't register device number\n"); goto error1; } ret = class_create_file(&cm_class, &class_attr_abi_version.attr); if (ret) { - printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); + pr_err("ucm: couldn't create abi_version attribute\n"); goto error2; } ret = ib_register_client(&ucm_client); if (ret) { - printk(KERN_ERR "ucm: couldn't register client\n"); + pr_err("ucm: couldn't register client\n"); goto error3; } return 0; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 8b5a934e1133..dd3bcceadfde 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -314,7 +314,7 @@ static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) } } if (!event_found) - printk(KERN_ERR "ucma_removal_event_handler: warning: connect request event wasn't found\n"); + pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n"); } static int ucma_event_handler(struct rdma_cm_id *cm_id, @@ -1716,13 +1716,13 @@ static int __init ucma_init(void) ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); if (ret) { - printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n"); + pr_err("rdma_ucm: couldn't create abi_version attr\n"); goto err1; } ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); if (!ucma_ctl_table_hdr) { - printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n"); + pr_err("rdma_ucm: couldn't register sysctl paths\n"); ret = -ENOMEM; goto err2; } diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 2116132568e7..29a45d2f8898 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -479,8 +479,8 @@ int ib_ud_header_unpack(void *buf, buf += IB_LRH_BYTES; if (header->lrh.link_version != 0) { - printk(KERN_WARNING "Invalid LRH.link_version %d\n", - header->lrh.link_version); + pr_warn("Invalid LRH.link_version %d\n", + header->lrh.link_version); return -EINVAL; } @@ -496,20 +496,20 @@ int ib_ud_header_unpack(void *buf, buf += IB_GRH_BYTES; if (header->grh.ip_version != 6) { - printk(KERN_WARNING "Invalid GRH.ip_version %d\n", - header->grh.ip_version); + pr_warn("Invalid GRH.ip_version %d\n", + header->grh.ip_version); return -EINVAL; } if (header->grh.next_header != 0x1b) { - printk(KERN_WARNING "Invalid GRH.next_header 0x%02x\n", - header->grh.next_header); + pr_warn("Invalid GRH.next_header 0x%02x\n", + header->grh.next_header); return -EINVAL; } break; default: - printk(KERN_WARNING "Invalid LRH.link_next_header %d\n", - header->lrh.link_next_header); + pr_warn("Invalid LRH.link_next_header %d\n", + header->lrh.link_next_header); return -EINVAL; } @@ -525,14 +525,13 @@ int ib_ud_header_unpack(void *buf, header->immediate_present = 1; break; default: - printk(KERN_WARNING "Invalid BTH.opcode 0x%02x\n", - header->bth.opcode); + pr_warn("Invalid BTH.opcode 0x%02x\n", header->bth.opcode); return -EINVAL; } if (header->bth.transport_header_version != 0) { - printk(KERN_WARNING "Invalid BTH.transport_header_version %d\n", - header->bth.transport_header_version); + pr_warn("Invalid BTH.transport_header_version %d\n", + header->bth.transport_header_version); return -EINVAL; } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6c6fbff19752..3638c787cb7c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1174,6 +1174,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mw *mw; + struct ib_udata udata; int ret; if (out_len < sizeof(resp)) @@ -1195,7 +1196,12 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, goto err_free; } - mw = pd->device->alloc_mw(pd, cmd.mw_type); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long)cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); + + mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata); if (IS_ERR(mw)) { ret = PTR_ERR(mw); goto err_put; @@ -3086,6 +3092,14 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) return -EPERM; + if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED) + return -EINVAL; + + if ((cmd.flow_attr.flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) && + ((cmd.flow_attr.type == IB_FLOW_ATTR_ALL_DEFAULT) || + (cmd.flow_attr.type == IB_FLOW_ATTR_MC_DEFAULT))) + return -EINVAL; + if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) return -EINVAL; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 39680aed99dd..28ba2cc81535 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -683,12 +683,28 @@ out: return ev_file; } +static int verify_command_mask(struct ib_device *ib_dev, __u32 command) +{ + u64 mask; + + if (command <= IB_USER_VERBS_CMD_OPEN_QP) + mask = ib_dev->uverbs_cmd_mask; + else + mask = ib_dev->uverbs_ex_cmd_mask; + + if (mask & ((u64)1 << command)) + return 0; + + return -1; +} + static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; struct ib_device *ib_dev; struct ib_uverbs_cmd_hdr hdr; + __u32 command; __u32 flags; int srcu_key; ssize_t ret; @@ -707,37 +723,34 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, goto out; } - flags = (hdr.command & - IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; + if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | + IB_USER_VERBS_CMD_COMMAND_MASK)) { + ret = -EINVAL; + goto out; + } - if (!flags) { - __u32 command; + command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; + if (verify_command_mask(ib_dev, command)) { + ret = -EOPNOTSUPP; + goto out; + } - if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | - IB_USER_VERBS_CMD_COMMAND_MASK)) { - ret = -EINVAL; - goto out; - } + if (!file->ucontext && + command != IB_USER_VERBS_CMD_GET_CONTEXT) { + ret = -EINVAL; + goto out; + } - command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; + flags = (hdr.command & + IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; + if (!flags) { if (command >= ARRAY_SIZE(uverbs_cmd_table) || !uverbs_cmd_table[command]) { ret = -EINVAL; goto out; } - if (!file->ucontext && - command != IB_USER_VERBS_CMD_GET_CONTEXT) { - ret = -EINVAL; - goto out; - } - - if (!(ib_dev->uverbs_cmd_mask & (1ull << command))) { - ret = -ENOSYS; - goto out; - } - if (hdr.in_words * 4 != count) { ret = -EINVAL; goto out; @@ -749,21 +762,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, hdr.out_words * 4); } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { - __u32 command; - struct ib_uverbs_ex_cmd_hdr ex_hdr; struct ib_udata ucore; struct ib_udata uhw; size_t written_count = count; - if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | - IB_USER_VERBS_CMD_COMMAND_MASK)) { - ret = -EINVAL; - goto out; - } - - command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; - if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || !uverbs_ex_cmd_table[command]) { ret = -ENOSYS; @@ -775,11 +778,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, goto out; } - if (!(ib_dev->uverbs_ex_cmd_mask & (1ull << command))) { - ret = -ENOSYS; - goto out; - } - if (count < (sizeof(hdr) + sizeof(ex_hdr))) { ret = -EINVAL; goto out; @@ -1058,7 +1056,7 @@ static int find_overflow_devnum(void) ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES, "infiniband_verbs"); if (ret) { - printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n"); + pr_err("user_verbs: couldn't register dynamic device number\n"); return ret; } } @@ -1279,14 +1277,14 @@ static int __init ib_uverbs_init(void) ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, "infiniband_verbs"); if (ret) { - printk(KERN_ERR "user_verbs: couldn't register device number\n"); + pr_err("user_verbs: couldn't register device number\n"); goto out; } uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); if (IS_ERR(uverbs_class)) { ret = PTR_ERR(uverbs_class); - printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); + pr_err("user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } @@ -1294,13 +1292,13 @@ static int __init ib_uverbs_init(void) ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); if (ret) { - printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); + pr_err("user_verbs: couldn't create abi_version attribute\n"); goto out_class; } ret = ib_register_client(&uverbs_client); if (ret) { - printk(KERN_ERR "user_verbs: couldn't register client\n"); + pr_err("user_verbs: couldn't register client\n"); goto out_class; } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5af6d024e053..5cd1e3987f2b 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1567,6 +1567,8 @@ EXPORT_SYMBOL(ib_check_mr_status); * - The last sg element is allowed to have length less than page_size. * - If sg_nents total byte length exceeds the mr max_num_sge * page_size * then only max_num_sg entries will be mapped. + * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS_REG, non of these + * constraints holds and the page_size argument is ignored. * * Returns the number of sg elements that were mapped to the memory region. * @@ -1657,3 +1659,167 @@ next_page: return i; } EXPORT_SYMBOL(ib_sg_to_pages); + +struct ib_drain_cqe { + struct ib_cqe cqe; + struct completion done; +}; + +static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, + cqe); + + complete(&cqe->done); +} + +/* + * Post a WR and block until its completion is reaped for the SQ. + */ +static void __ib_drain_sq(struct ib_qp *qp) +{ + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct ib_drain_cqe sdrain; + struct ib_send_wr swr = {}, *bad_swr; + int ret; + + if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) { + WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT, + "IB_POLL_DIRECT poll_ctx not supported for drain\n"); + return; + } + + swr.wr_cqe = &sdrain.cqe; + sdrain.cqe.done = ib_drain_qp_done; + init_completion(&sdrain.done); + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + ret = ib_post_send(qp, &swr, &bad_swr); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + wait_for_completion(&sdrain.done); +} + +/* + * Post a WR and block until its completion is reaped for the RQ. + */ +static void __ib_drain_rq(struct ib_qp *qp) +{ + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct ib_drain_cqe rdrain; + struct ib_recv_wr rwr = {}, *bad_rwr; + int ret; + + if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) { + WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT, + "IB_POLL_DIRECT poll_ctx not supported for drain\n"); + return; + } + + rwr.wr_cqe = &rdrain.cqe; + rdrain.cqe.done = ib_drain_qp_done; + init_completion(&rdrain.done); + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + ret = ib_post_recv(qp, &rwr, &bad_rwr); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + wait_for_completion(&rdrain.done); +} + +/** + * ib_drain_sq() - Block until all SQ CQEs have been consumed by the + * application. + * @qp: queue pair to drain + * + * If the device has a provider-specific drain function, then + * call that. Otherwise call the generic drain function + * __ib_drain_sq(). + * + * The caller must: + * + * ensure there is room in the CQ and SQ for the drain work request and + * completion. + * + * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be + * IB_POLL_DIRECT. + * + * ensure that there are no other contexts that are posting WRs concurrently. + * Otherwise the drain is not guaranteed. + */ +void ib_drain_sq(struct ib_qp *qp) +{ + if (qp->device->drain_sq) + qp->device->drain_sq(qp); + else + __ib_drain_sq(qp); +} +EXPORT_SYMBOL(ib_drain_sq); + +/** + * ib_drain_rq() - Block until all RQ CQEs have been consumed by the + * application. + * @qp: queue pair to drain + * + * If the device has a provider-specific drain function, then + * call that. Otherwise call the generic drain function + * __ib_drain_rq(). + * + * The caller must: + * + * ensure there is room in the CQ and RQ for the drain work request and + * completion. + * + * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be + * IB_POLL_DIRECT. + * + * ensure that there are no other contexts that are posting WRs concurrently. + * Otherwise the drain is not guaranteed. + */ +void ib_drain_rq(struct ib_qp *qp) +{ + if (qp->device->drain_rq) + qp->device->drain_rq(qp); + else + __ib_drain_rq(qp); +} +EXPORT_SYMBOL(ib_drain_rq); + +/** + * ib_drain_qp() - Block until all CQEs have been consumed by the + * application on both the RQ and SQ. + * @qp: queue pair to drain + * + * The caller must: + * + * ensure there is room in the CQ(s), SQ, and RQ for drain work requests + * and completions. + * + * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be + * IB_POLL_DIRECT. + * + * ensure that there are no other contexts that are posting WRs concurrently. + * Otherwise the drain is not guaranteed. + */ +void ib_drain_qp(struct ib_qp *qp) +{ + ib_drain_sq(qp); + ib_drain_rq(qp); +} +EXPORT_SYMBOL(ib_drain_qp); diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index f504ba73e5dc..d403231a4aff 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1877,7 +1877,7 @@ err: static int is_loopback_dst(struct iw_cm_id *cm_id) { struct net_device *dev; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr); if (!dev) @@ -1892,10 +1892,10 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iwch_ep *ep; struct rtable *rt; int err = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - if (cm_id->remote_addr.ss_family != PF_INET) { + if (cm_id->m_remote_addr.ss_family != PF_INET) { err = -ENOSYS; goto out; } @@ -1961,9 +1961,9 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) state_set(&ep->com, CONNECTING); ep->tos = IPTOS_LOWDELAY; - memcpy(&ep->com.local_addr, &cm_id->local_addr, + memcpy(&ep->com.local_addr, &cm_id->m_local_addr, sizeof(ep->com.local_addr)); - memcpy(&ep->com.remote_addr, &cm_id->remote_addr, + memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr, sizeof(ep->com.remote_addr)); /* send connect request to rnic */ @@ -1992,7 +1992,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) might_sleep(); - if (cm_id->local_addr.ss_family != PF_INET) { + if (cm_id->m_local_addr.ss_family != PF_INET) { err = -ENOSYS; goto fail1; } @@ -2008,7 +2008,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->backlog = backlog; - memcpy(&ep->com.local_addr, &cm_id->local_addr, + memcpy(&ep->com.local_addr, &cm_id->m_local_addr, sizeof(ep->com.local_addr)); /* diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 2734820d291b..42a7b8952d13 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -657,7 +657,8 @@ err: return ERR_PTR(err); } -static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata) { struct iwch_dev *rhp; struct iwch_pd *php; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index cd2ff5f9518a..651711370d55 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -302,7 +302,7 @@ void _c4iw_free_ep(struct kref *kref) if (ep->com.remote_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; cxgb4_clip_release( ep->com.dev->rdev.lldi.ports[0], @@ -314,12 +314,6 @@ void _c4iw_free_ep(struct kref *kref) dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); } - if (test_bit(RELEASE_MAPINFO, &ep->com.flags)) { - print_addr(&ep->com, __func__, "remove_mapinfo/mapping"); - iwpm_remove_mapinfo(&ep->com.local_addr, - &ep->com.mapped_local_addr); - iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW); - } kfree(ep); } @@ -455,7 +449,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) state_set(&ep->com, DEAD); if (ep->com.remote_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6 = - (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + (struct sockaddr_in6 *)&ep->com.local_addr; cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } @@ -485,12 +479,19 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) unsigned int flowclen = 80; struct fw_flowc_wr *flowc; int i; + u16 vlan = ep->l2t->vlan; + int nparams; + + if (vlan == CPL_L2T_VLAN_NONE) + nparams = 8; + else + nparams = 9; skb = get_skb(skb, flowclen, GFP_KERNEL); flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen); flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | - FW_FLOWC_WR_NPARAMS_V(8)); + FW_FLOWC_WR_NPARAMS_V(nparams)); flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen, 16)) | FW_WR_FLOWID_V(ep->hwtid)); @@ -511,9 +512,17 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[7].val = cpu_to_be32(ep->emss); - /* Pad WR to 16 byte boundary */ - flowc->mnemval[8].mnemonic = 0; - flowc->mnemval[8].val = 0; + if (nparams == 9) { + u16 pri; + + pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; + flowc->mnemval[8].val = cpu_to_be32(pri); + } else { + /* Pad WR to 16 byte boundary */ + flowc->mnemval[8].mnemonic = 0; + flowc->mnemval[8].val = 0; + } for (i = 0; i < 9; i++) { flowc->mnemval[i].r4[0] = 0; flowc->mnemval[i].r4[1] = 0; @@ -568,54 +577,6 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } -/* - * c4iw_form_pm_msg - Form a port mapper message with mapping info - */ -static void c4iw_form_pm_msg(struct c4iw_ep *ep, - struct iwpm_sa_data *pm_msg) -{ - memcpy(&pm_msg->loc_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - memcpy(&pm_msg->rem_addr, &ep->com.remote_addr, - sizeof(ep->com.remote_addr)); -} - -/* - * c4iw_form_reg_msg - Form a port mapper message with dev info - */ -static void c4iw_form_reg_msg(struct c4iw_dev *dev, - struct iwpm_dev_data *pm_msg) -{ - memcpy(pm_msg->dev_name, dev->ibdev.name, IWPM_DEVNAME_SIZE); - memcpy(pm_msg->if_name, dev->rdev.lldi.ports[0]->name, - IWPM_IFNAME_SIZE); -} - -static void c4iw_record_pm_msg(struct c4iw_ep *ep, - struct iwpm_sa_data *pm_msg) -{ - memcpy(&ep->com.mapped_local_addr, &pm_msg->mapped_loc_addr, - sizeof(ep->com.mapped_local_addr)); - memcpy(&ep->com.mapped_remote_addr, &pm_msg->mapped_rem_addr, - sizeof(ep->com.mapped_remote_addr)); -} - -static int get_remote_addr(struct c4iw_ep *parent_ep, struct c4iw_ep *child_ep) -{ - int ret; - - print_addr(&parent_ep->com, __func__, "get_remote_addr parent_ep "); - print_addr(&child_ep->com, __func__, "get_remote_addr child_ep "); - - ret = iwpm_get_remote_info(&parent_ep->com.mapped_local_addr, - &child_ep->com.mapped_remote_addr, - &child_ep->com.remote_addr, RDMA_NL_C4IW); - if (ret) - PDBG("Unable to find remote peer addr info - err %d\n", ret); - - return ret; -} - static void best_mtu(const unsigned short *mtus, unsigned short mtu, unsigned int *idx, int use_ts, int ipv6) { @@ -645,13 +606,13 @@ static int send_connect(struct c4iw_ep *ep) int wscale; int win, sizev4, sizev6, wrlen; struct sockaddr_in *la = (struct sockaddr_in *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; struct sockaddr_in *ra = (struct sockaddr_in *) - &ep->com.mapped_remote_addr; + &ep->com.remote_addr; struct sockaddr_in6 *la6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *) - &ep->com.mapped_remote_addr; + &ep->com.remote_addr; int ret; enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; u32 isn = (prandom_u32() & ~7UL) - 1; @@ -710,7 +671,7 @@ static int send_connect(struct c4iw_ep *ep) L2T_IDX_V(ep->l2t->idx) | TX_CHAN_V(ep->tx_chan) | SMAC_SEL_V(ep->smac_idx) | - DSCP_V(ep->tos) | + DSCP_V(ep->tos >> 2) | ULP_MODE_V(ULP_MODE_TCPDDP) | RCV_BUFSIZ_V(win); opt2 = RX_CHANNEL_V(0) | @@ -1829,10 +1790,10 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) req->le.filter = cpu_to_be32(cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t)); - sin = (struct sockaddr_in *)&ep->com.mapped_local_addr; + sin = (struct sockaddr_in *)&ep->com.local_addr; req->le.lport = sin->sin_port; req->le.u.ipv4.lip = sin->sin_addr.s_addr; - sin = (struct sockaddr_in *)&ep->com.mapped_remote_addr; + sin = (struct sockaddr_in *)&ep->com.remote_addr; req->le.pport = sin->sin_port; req->le.u.ipv4.pip = sin->sin_addr.s_addr; req->tcb.t_state_to_astid = @@ -1864,7 +1825,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) L2T_IDX_V(ep->l2t->idx) | TX_CHAN_V(ep->tx_chan) | SMAC_SEL_V(ep->smac_idx) | - DSCP_V(ep->tos) | + DSCP_V(ep->tos >> 2) | ULP_MODE_V(ULP_MODE_TCPDDP) | RCV_BUFSIZ_V(win)); req->tcb.opt2 = (__force __be32) (PACE_V(1) | @@ -1928,7 +1889,7 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, struct dst_entry *dst, struct c4iw_dev *cdev, - bool clear_mpa_v1, enum chip_type adapter_type) + bool clear_mpa_v1, enum chip_type adapter_type, u8 tos) { struct neighbour *n; int err, step; @@ -1958,7 +1919,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, goto out; } ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, - n, pdev, 0); + n, pdev, rt_tos2priority(tos)); if (!ep->l2t) goto out; ep->mtu = pdev->mtu; @@ -2013,13 +1974,13 @@ static int c4iw_reconnect(struct c4iw_ep *ep) { int err = 0; struct sockaddr_in *laddr = (struct sockaddr_in *) - &ep->com.cm_id->local_addr; + &ep->com.cm_id->m_local_addr; struct sockaddr_in *raddr = (struct sockaddr_in *) - &ep->com.cm_id->remote_addr; + &ep->com.cm_id->m_remote_addr; struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *) - &ep->com.cm_id->local_addr; + &ep->com.cm_id->m_local_addr; struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *) - &ep->com.cm_id->remote_addr; + &ep->com.cm_id->m_remote_addr; int iptype; __u8 *ra; @@ -2038,10 +1999,10 @@ static int c4iw_reconnect(struct c4iw_ep *ep) insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid); /* find a route */ - if (ep->com.cm_id->local_addr.ss_family == AF_INET) { + if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) { ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr, raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, 0); + raddr->sin_port, ep->com.cm_id->tos); iptype = 4; ra = (__u8 *)&raddr->sin_addr; } else { @@ -2058,7 +2019,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep) goto fail3; } err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false, - ep->com.dev->rdev.lldi.adapter_type); + ep->com.dev->rdev.lldi.adapter_type, + ep->com.cm_id->tos); if (err) { pr_err("%s - cannot alloc l2e.\n", __func__); goto fail4; @@ -2069,7 +2031,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep) ep->l2t->idx); state_set(&ep->com, CONNECTING); - ep->tos = 0; + ep->tos = ep->com.cm_id->tos; /* send connect request to rnic */ err = send_connect(ep); @@ -2109,10 +2071,10 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct sockaddr_in6 *ra6; ep = lookup_atid(t, atid); - la = (struct sockaddr_in *)&ep->com.mapped_local_addr; - ra = (struct sockaddr_in *)&ep->com.mapped_remote_addr; - la6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; - ra6 = (struct sockaddr_in6 *)&ep->com.mapped_remote_addr; + la = (struct sockaddr_in *)&ep->com.local_addr; + ra = (struct sockaddr_in *)&ep->com.remote_addr; + la6 = (struct sockaddr_in6 *)&ep->com.local_addr; + ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr; PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, status, status2errno(status)); @@ -2154,7 +2116,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) if (ep->com.remote_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; cxgb4_clip_release( ep->com.dev->rdev.lldi.ports[0], (const u32 *) @@ -2189,7 +2151,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) if (ep->com.remote_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6 = - (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + (struct sockaddr_in6 *)&ep->com.local_addr; cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } @@ -2391,6 +2353,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) u16 peer_mss = ntohs(req->tcpopt.mss); int iptype; unsigned short hdrs; + u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); parent_ep = lookup_stid(t, stid); if (!parent_ep) { @@ -2399,8 +2362,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) } if (state_read(&parent_ep->com) != LISTEN) { - printk(KERN_ERR "%s - listening ep not in LISTEN\n", - __func__); + PDBG("%s - listening ep not in LISTEN\n", __func__); goto reject; } @@ -2415,7 +2377,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) ntohs(peer_port), peer_mss); dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip, local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid))); + tos); } else { PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" , __func__, parent_ep, hwtid, @@ -2441,7 +2403,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) } err = import_ep(child_ep, iptype, peer_ip, dst, dev, false, - parent_ep->com.dev->rdev.lldi.adapter_type); + parent_ep->com.dev->rdev.lldi.adapter_type, tos); if (err) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -2459,18 +2421,9 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) child_ep->com.dev = dev; child_ep->com.cm_id = NULL; - /* - * The mapped_local and mapped_remote addresses get setup with - * the actual 4-tuple. The local address will be based on the - * actual local address of the connection, but on the port number - * of the parent listening endpoint. The remote address is - * setup based on a query to the IWPM since we don't know what it - * originally was before mapping. If no mapping was done, then - * mapped_remote == remote, and mapped_local == local. - */ if (iptype == 4) { struct sockaddr_in *sin = (struct sockaddr_in *) - &child_ep->com.mapped_local_addr; + &child_ep->com.local_addr; sin->sin_family = PF_INET; sin->sin_port = local_port; @@ -2482,12 +2435,12 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) &parent_ep->com.local_addr)->sin_port; sin->sin_addr.s_addr = *(__be32 *)local_ip; - sin = (struct sockaddr_in *)&child_ep->com.mapped_remote_addr; + sin = (struct sockaddr_in *)&child_ep->com.remote_addr; sin->sin_family = PF_INET; sin->sin_port = peer_port; sin->sin_addr.s_addr = *(__be32 *)peer_ip; } else { - sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_local_addr; + sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; sin6->sin6_family = PF_INET6; sin6->sin6_port = local_port; memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); @@ -2498,18 +2451,15 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) &parent_ep->com.local_addr)->sin6_port; memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); - sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_remote_addr; + sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr; sin6->sin6_family = PF_INET6; sin6->sin6_port = peer_port; memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16); } - memcpy(&child_ep->com.remote_addr, &child_ep->com.mapped_remote_addr, - sizeof(child_ep->com.remote_addr)); - get_remote_addr(parent_ep, child_ep); c4iw_get_ep(&parent_ep->com); child_ep->parent_ep = parent_ep; - child_ep->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); + child_ep->tos = tos; child_ep->dst = dst; child_ep->hwtid = hwtid; @@ -2522,7 +2472,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) accept_cr(child_ep, skb, req); set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); if (iptype == 6) { - sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_local_addr; + sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } @@ -2765,7 +2715,7 @@ out: if (ep->com.remote_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; cxgb4_clip_release( ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, @@ -3026,8 +2976,8 @@ static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) { struct in_device *ind; int found = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; ind = in_dev_get(dev->rdev.lldi.ports[0]); if (!ind) @@ -3072,8 +3022,8 @@ static int get_lladdr(struct net_device *dev, struct in6_addr *addr, static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) { struct in6_addr uninitialized_var(addr); - struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->local_addr; - struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->remote_addr; + struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; + struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr; if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) { memcpy(la6->sin6_addr.s6_addr, &addr, 16); @@ -3092,11 +3042,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct sockaddr_in *raddr; struct sockaddr_in6 *laddr6; struct sockaddr_in6 *raddr6; - struct iwpm_dev_data pm_reg_msg; - struct iwpm_sa_data pm_msg; __u8 *ra; int iptype; - int iwpm_err = 0; if ((conn_param->ord > cur_max_read_depth(dev)) || (conn_param->ird > cur_max_read_depth(dev))) { @@ -3144,47 +3091,17 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } insert_handle(dev, &dev->atid_idr, ep, ep->atid); - memcpy(&ep->com.local_addr, &cm_id->local_addr, + memcpy(&ep->com.local_addr, &cm_id->m_local_addr, sizeof(ep->com.local_addr)); - memcpy(&ep->com.remote_addr, &cm_id->remote_addr, + memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr, sizeof(ep->com.remote_addr)); - /* No port mapper available, go with the specified peer information */ - memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr, - sizeof(ep->com.mapped_local_addr)); - memcpy(&ep->com.mapped_remote_addr, &cm_id->remote_addr, - sizeof(ep->com.mapped_remote_addr)); - - c4iw_form_reg_msg(dev, &pm_reg_msg); - iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW); - if (iwpm_err) { - PDBG("%s: Port Mapper reg pid fail (err = %d).\n", - __func__, iwpm_err); - } - if (iwpm_valid_pid() && !iwpm_err) { - c4iw_form_pm_msg(ep, &pm_msg); - iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_C4IW); - if (iwpm_err) - PDBG("%s: Port Mapper query fail (err = %d).\n", - __func__, iwpm_err); - else - c4iw_record_pm_msg(ep, &pm_msg); - } - if (iwpm_create_mapinfo(&ep->com.local_addr, - &ep->com.mapped_local_addr, RDMA_NL_C4IW)) { - iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW); - err = -ENOMEM; - goto fail1; - } - print_addr(&ep->com, __func__, "add_query/create_mapinfo"); - set_bit(RELEASE_MAPINFO, &ep->com.flags); - - laddr = (struct sockaddr_in *)&ep->com.mapped_local_addr; - raddr = (struct sockaddr_in *)&ep->com.mapped_remote_addr; - laddr6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; - raddr6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr; + laddr = (struct sockaddr_in *)&ep->com.local_addr; + raddr = (struct sockaddr_in *)&ep->com.remote_addr; + laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr; + raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr; - if (cm_id->remote_addr.ss_family == AF_INET) { + if (cm_id->m_remote_addr.ss_family == AF_INET) { iptype = 4; ra = (__u8 *)&raddr->sin_addr; @@ -3203,7 +3120,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ra, ntohs(raddr->sin_port)); ep->dst = find_route(dev, laddr->sin_addr.s_addr, raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, 0); + raddr->sin_port, cm_id->tos); } else { iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; @@ -3234,7 +3151,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, - ep->com.dev->rdev.lldi.adapter_type); + ep->com.dev->rdev.lldi.adapter_type, cm_id->tos); if (err) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); goto fail3; @@ -3245,7 +3162,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->l2t->idx); state_set(&ep->com, CONNECTING); - ep->tos = 0; + ep->tos = cm_id->tos; /* send connect request to rnic */ err = send_connect(ep); @@ -3269,7 +3186,7 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) { int err; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) { err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0], @@ -3302,7 +3219,7 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) { int err; struct sockaddr_in *sin = (struct sockaddr_in *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; if (dev->rdev.lldi.enable_fw_ofld_conn) { do { @@ -3343,9 +3260,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) int err = 0; struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_listen_ep *ep; - struct iwpm_dev_data pm_reg_msg; - struct iwpm_sa_data pm_msg; - int iwpm_err = 0; might_sleep(); @@ -3360,7 +3274,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) ep->com.cm_id = cm_id; ep->com.dev = dev; ep->backlog = backlog; - memcpy(&ep->com.local_addr, &cm_id->local_addr, + memcpy(&ep->com.local_addr, &cm_id->m_local_addr, sizeof(ep->com.local_addr)); /* @@ -3369,10 +3283,10 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) if (dev->rdev.lldi.enable_fw_ofld_conn && ep->com.local_addr.ss_family == AF_INET) ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, - cm_id->local_addr.ss_family, ep); + cm_id->m_local_addr.ss_family, ep); else ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, - cm_id->local_addr.ss_family, ep); + cm_id->m_local_addr.ss_family, ep); if (ep->stid == -1) { printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__); @@ -3381,36 +3295,9 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) } insert_handle(dev, &dev->stid_idr, ep, ep->stid); - /* No port mapper available, go with the specified info */ - memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr, - sizeof(ep->com.mapped_local_addr)); - - c4iw_form_reg_msg(dev, &pm_reg_msg); - iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW); - if (iwpm_err) { - PDBG("%s: Port Mapper reg pid fail (err = %d).\n", - __func__, iwpm_err); - } - if (iwpm_valid_pid() && !iwpm_err) { - memcpy(&pm_msg.loc_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_C4IW); - if (iwpm_err) - PDBG("%s: Port Mapper query fail (err = %d).\n", - __func__, iwpm_err); - else - memcpy(&ep->com.mapped_local_addr, - &pm_msg.mapped_loc_addr, - sizeof(ep->com.mapped_local_addr)); - } - if (iwpm_create_mapinfo(&ep->com.local_addr, - &ep->com.mapped_local_addr, RDMA_NL_C4IW)) { - err = -ENOMEM; - goto fail3; - } - print_addr(&ep->com, __func__, "add_mapping/create_mapinfo"); + memcpy(&ep->com.local_addr, &cm_id->m_local_addr, + sizeof(ep->com.local_addr)); - set_bit(RELEASE_MAPINFO, &ep->com.flags); state_set(&ep->com, LISTEN); if (ep->com.local_addr.ss_family == AF_INET) err = create_server4(dev, ep); @@ -3421,7 +3308,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) goto out; } -fail3: cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, ep->com.local_addr.ss_family); fail2: @@ -3456,7 +3342,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) goto done; err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, __func__); - sin6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + sin6 = (struct sockaddr_in6 *)&ep->com.local_addr; cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } @@ -3580,7 +3466,7 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, state_set(&ep->com, DEAD); if (ep->com.remote_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6 = - (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + (struct sockaddr_in6 *)&ep->com.local_addr; cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index cf21df4a8bf5..b4eeb783573c 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -815,8 +815,15 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) } } out: - if (wq) + if (wq) { + if (unlikely(qhp->attr.state != C4IW_QP_STATE_RTS)) { + if (t4_sq_empty(wq)) + complete(&qhp->sq_drained); + if (t4_rq_empty(wq)) + complete(&qhp->rq_drained); + } spin_unlock(&qhp->lock); + } return ret; } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 8024ea4417b8..ae2e8b23d2dd 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -87,17 +87,6 @@ struct c4iw_debugfs_data { int pos; }; -/* registered cxgb4 netlink callbacks */ -static struct ibnl_client_cbs c4iw_nl_cb_table[] = { - [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, - [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, - [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, - [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, - [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, - [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, - [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} -}; - static int count_idrs(int id, void *p, void *data) { int *countp = data; @@ -242,13 +231,13 @@ static int dump_qp(int id, void *p, void *data) if (qp->ep) { if (qp->ep->com.local_addr.ss_family == AF_INET) { struct sockaddr_in *lsin = (struct sockaddr_in *) - &qp->ep->com.local_addr; + &qp->ep->com.cm_id->local_addr; struct sockaddr_in *rsin = (struct sockaddr_in *) - &qp->ep->com.remote_addr; + &qp->ep->com.cm_id->remote_addr; struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) - &qp->ep->com.mapped_local_addr; + &qp->ep->com.cm_id->m_local_addr; struct sockaddr_in *mapped_rsin = (struct sockaddr_in *) - &qp->ep->com.mapped_remote_addr; + &qp->ep->com.cm_id->m_remote_addr; cc = snprintf(qpd->buf + qpd->pos, space, "rc qp sq id %u rq id %u state %u " @@ -264,15 +253,15 @@ static int dump_qp(int id, void *p, void *data) ntohs(mapped_rsin->sin_port)); } else { struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) - &qp->ep->com.local_addr; + &qp->ep->com.cm_id->local_addr; struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *) - &qp->ep->com.remote_addr; + &qp->ep->com.cm_id->remote_addr; struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) - &qp->ep->com.mapped_local_addr; + &qp->ep->com.cm_id->m_local_addr; struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *) - &qp->ep->com.mapped_remote_addr; + &qp->ep->com.cm_id->m_remote_addr; cc = snprintf(qpd->buf + qpd->pos, space, "rc qp sq id %u rq id %u state %u " @@ -545,13 +534,13 @@ static int dump_ep(int id, void *p, void *data) if (ep->com.local_addr.ss_family == AF_INET) { struct sockaddr_in *lsin = (struct sockaddr_in *) - &ep->com.local_addr; + &ep->com.cm_id->local_addr; struct sockaddr_in *rsin = (struct sockaddr_in *) - &ep->com.remote_addr; + &ep->com.cm_id->remote_addr; struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) - &ep->com.mapped_local_addr; + &ep->com.cm_id->m_local_addr; struct sockaddr_in *mapped_rsin = (struct sockaddr_in *) - &ep->com.mapped_remote_addr; + &ep->com.cm_id->m_remote_addr; cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p qp %p state %d flags 0x%lx " @@ -569,13 +558,13 @@ static int dump_ep(int id, void *p, void *data) ntohs(mapped_rsin->sin_port)); } else { struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) - &ep->com.local_addr; + &ep->com.cm_id->local_addr; struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *) - &ep->com.remote_addr; + &ep->com.cm_id->remote_addr; struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.cm_id->m_local_addr; struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *) - &ep->com.mapped_remote_addr; + &ep->com.cm_id->m_remote_addr; cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p qp %p state %d flags 0x%lx " @@ -610,9 +599,9 @@ static int dump_listen_ep(int id, void *p, void *data) if (ep->com.local_addr.ss_family == AF_INET) { struct sockaddr_in *lsin = (struct sockaddr_in *) - &ep->com.local_addr; + &ep->com.cm_id->local_addr; struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) - &ep->com.mapped_local_addr; + &ep->com.cm_id->m_local_addr; cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p state %d flags 0x%lx stid %d " @@ -623,9 +612,9 @@ static int dump_listen_ep(int id, void *p, void *data) ntohs(mapped_lsin->sin_port)); } else { struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) - &ep->com.local_addr; + &ep->com.cm_id->local_addr; struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.cm_id->m_local_addr; cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p state %d flags 0x%lx stid %d " @@ -801,10 +790,9 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.start, rdev->lldi.vr->cq.size); - PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p " + PDBG("udb %pR db_reg %p gts_reg %p " "qpmask 0x%x cqmask 0x%x\n", - (unsigned)pci_resource_len(rdev->lldi.pdev, 2), - (void *)pci_resource_start(rdev->lldi.pdev, 2), + &rdev->lldi.pdev->resource[2], rdev->lldi.db_reg, rdev->lldi.gts_reg, rdev->qpmask, rdev->cqmask); @@ -1506,20 +1494,6 @@ static int __init c4iw_init_module(void) printk(KERN_WARNING MOD "could not create debugfs entry, continuing\n"); - if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS, - c4iw_nl_cb_table)) - pr_err("%s[%u]: Failed to add netlink callback\n" - , __func__, __LINE__); - - err = iwpm_init(RDMA_NL_C4IW); - if (err) { - pr_err("port mapper initialization failed with %d\n", err); - ibnl_remove_client(RDMA_NL_C4IW); - c4iw_cm_term(); - debugfs_remove_recursive(c4iw_debugfs_root); - return err; - } - cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info); return 0; @@ -1537,8 +1511,6 @@ static void __exit c4iw_exit_module(void) } mutex_unlock(&dev_mutex); cxgb4_unregister_uld(CXGB4_ULD_RDMA); - iwpm_exit(RDMA_NL_C4IW); - ibnl_remove_client(RDMA_NL_C4IW); c4iw_cm_term(); debugfs_remove_recursive(c4iw_debugfs_root); } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index fb2de75a0392..df43f871ab61 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -476,6 +476,8 @@ struct c4iw_qp { wait_queue_head_t wait; struct timer_list timer; int sq_sig_all; + struct completion rq_drained; + struct completion sq_drained; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) @@ -753,7 +755,6 @@ enum c4iw_ep_flags { CLOSE_SENT = 3, TIMEOUT = 4, QP_REFERENCED = 5, - RELEASE_MAPINFO = 6, }; enum c4iw_ep_history { @@ -790,8 +791,6 @@ struct c4iw_ep_common { struct mutex mutex; struct sockaddr_storage local_addr; struct sockaddr_storage remote_addr; - struct sockaddr_storage mapped_local_addr; - struct sockaddr_storage mapped_remote_addr; struct c4iw_wr_wait wr_wait; unsigned long flags; unsigned long history; @@ -843,45 +842,6 @@ struct c4iw_ep { struct c4iw_ep_stats stats; }; -static inline void print_addr(struct c4iw_ep_common *epc, const char *func, - const char *msg) -{ - -#define SINA(a) (&(((struct sockaddr_in *)(a))->sin_addr.s_addr)) -#define SINP(a) ntohs(((struct sockaddr_in *)(a))->sin_port) -#define SIN6A(a) (&(((struct sockaddr_in6 *)(a))->sin6_addr)) -#define SIN6P(a) ntohs(((struct sockaddr_in6 *)(a))->sin6_port) - - if (c4iw_debug) { - switch (epc->local_addr.ss_family) { - case AF_INET: - PDBG("%s %s %pI4:%u/%u <-> %pI4:%u/%u\n", - func, msg, SINA(&epc->local_addr), - SINP(&epc->local_addr), - SINP(&epc->mapped_local_addr), - SINA(&epc->remote_addr), - SINP(&epc->remote_addr), - SINP(&epc->mapped_remote_addr)); - break; - case AF_INET6: - PDBG("%s %s %pI6:%u/%u <-> %pI6:%u/%u\n", - func, msg, SIN6A(&epc->local_addr), - SIN6P(&epc->local_addr), - SIN6P(&epc->mapped_local_addr), - SIN6A(&epc->remote_addr), - SIN6P(&epc->remote_addr), - SIN6P(&epc->mapped_remote_addr)); - break; - default: - break; - } - } -#undef SINA -#undef SINP -#undef SIN6A -#undef SIN6P -} - static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) { return cm_id->provider_data; @@ -961,7 +921,8 @@ int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents); int c4iw_dealloc_mw(struct ib_mw *mw); -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); @@ -1016,6 +977,8 @@ extern int c4iw_wr_log; extern int db_fc_threshold; extern int db_coalescing_threshold; extern int use_dsgl; +void c4iw_drain_rq(struct ib_qp *qp); +void c4iw_drain_sq(struct ib_qp *qp); #endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 7849890c4781..008be07d5604 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -34,6 +34,7 @@ #include <linux/moduleparam.h> #include <rdma/ib_umem.h> #include <linux/atomic.h> +#include <rdma/ib_user_verbs.h> #include "iw_cxgb4.h" @@ -552,7 +553,8 @@ err: return ERR_PTR(err); } -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -617,12 +619,14 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, int ret = 0; int length = roundup(max_num_sg * sizeof(u64), 32); + php = to_c4iw_pd(pd); + rhp = php->rhp; + if (mr_type != IB_MR_TYPE_MEM_REG || - max_num_sg > t4_max_fr_depth(use_dsgl)) + max_num_sg > t4_max_fr_depth(&rhp->rdev.lldi.ulptx_memwrite_dsgl && + use_dsgl)) return ERR_PTR(-EINVAL); - php = to_c4iw_pd(pd); - rhp = php->rhp; mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index ec04272fbdc2..124682dc5709 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -339,7 +339,8 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro props->max_mr = c4iw_num_stags(&dev->rdev); props->max_pd = T4_MAX_NUM_PD; props->local_ca_ack_delay = 0; - props->max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl); + props->max_fast_reg_page_list_len = + t4_max_fr_depth(dev->rdev.lldi.ulptx_memwrite_dsgl && use_dsgl); return 0; } @@ -564,6 +565,8 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.get_protocol_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = c4iw_port_immutable; + dev->ibdev.drain_sq = c4iw_drain_sq; + dev->ibdev.drain_rq = c4iw_drain_rq; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index e99345eb875a..e17fb5d5e033 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -606,7 +606,7 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, } static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_reg_wr *wr, u8 *len16, u8 t5dev) + struct ib_reg_wr *wr, u8 *len16, bool dsgl_supported) { struct c4iw_mr *mhp = to_c4iw_mr(wr->mr); struct fw_ri_immd *imdp; @@ -615,7 +615,7 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32); int rem; - if (mhp->mpl_len > t4_max_fr_depth(use_dsgl)) + if (mhp->mpl_len > t4_max_fr_depth(dsgl_supported && use_dsgl)) return -EINVAL; wqe->fr.qpbinde_to_dcacpu = 0; @@ -629,7 +629,7 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff); - if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { + if (dsgl_supported && use_dsgl && (pbllen > max_fr_immd)) { struct fw_ri_dsgl *sglp; for (i = 0; i < mhp->mpl_len; i++) @@ -808,9 +808,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16, - is_t5( - qhp->rhp->rdev.lldi.adapter_type) ? - 1 : 0); + qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) @@ -1621,7 +1619,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, unsigned int sqsize, rqsize; struct c4iw_ucontext *ucontext; int ret; - struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; + struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm; + struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL; PDBG("%s ib_pd %p\n", __func__, pd); @@ -1697,6 +1696,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.max_ird = 0; qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; spin_lock_init(&qhp->lock); + init_completion(&qhp->sq_drained); + init_completion(&qhp->rq_drained); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); @@ -1706,29 +1707,30 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, goto err2; if (udata) { - mm1 = kmalloc(sizeof *mm1, GFP_KERNEL); - if (!mm1) { + sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL); + if (!sq_key_mm) { ret = -ENOMEM; goto err3; } - mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); - if (!mm2) { + rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); + if (!rq_key_mm) { ret = -ENOMEM; goto err4; } - mm3 = kmalloc(sizeof *mm3, GFP_KERNEL); - if (!mm3) { + sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL); + if (!sq_db_key_mm) { ret = -ENOMEM; goto err5; } - mm4 = kmalloc(sizeof *mm4, GFP_KERNEL); - if (!mm4) { + rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); + if (!rq_db_key_mm) { ret = -ENOMEM; goto err6; } if (t4_sq_onchip(&qhp->wq.sq)) { - mm5 = kmalloc(sizeof *mm5, GFP_KERNEL); - if (!mm5) { + ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm), + GFP_KERNEL); + if (!ma_sync_key_mm) { ret = -ENOMEM; goto err7; } @@ -1743,7 +1745,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, uresp.rq_size = qhp->wq.rq.size; uresp.rq_memsize = qhp->wq.rq.memsize; spin_lock(&ucontext->mmap_lock); - if (mm5) { + if (ma_sync_key_mm) { uresp.ma_sync_key = ucontext->key; ucontext->key += PAGE_SIZE; } else { @@ -1761,28 +1763,29 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); if (ret) goto err8; - mm1->key = uresp.sq_key; - mm1->addr = qhp->wq.sq.phys_addr; - mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize); - insert_mmap(ucontext, mm1); - mm2->key = uresp.rq_key; - mm2->addr = virt_to_phys(qhp->wq.rq.queue); - mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); - insert_mmap(ucontext, mm2); - mm3->key = uresp.sq_db_gts_key; - mm3->addr = (__force unsigned long)qhp->wq.sq.bar2_pa; - mm3->len = PAGE_SIZE; - insert_mmap(ucontext, mm3); - mm4->key = uresp.rq_db_gts_key; - mm4->addr = (__force unsigned long)qhp->wq.rq.bar2_pa; - mm4->len = PAGE_SIZE; - insert_mmap(ucontext, mm4); - if (mm5) { - mm5->key = uresp.ma_sync_key; - mm5->addr = (pci_resource_start(rhp->rdev.lldi.pdev, 0) - + PCIE_MA_SYNC_A) & PAGE_MASK; - mm5->len = PAGE_SIZE; - insert_mmap(ucontext, mm5); + sq_key_mm->key = uresp.sq_key; + sq_key_mm->addr = qhp->wq.sq.phys_addr; + sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize); + insert_mmap(ucontext, sq_key_mm); + rq_key_mm->key = uresp.rq_key; + rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue); + rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); + insert_mmap(ucontext, rq_key_mm); + sq_db_key_mm->key = uresp.sq_db_gts_key; + sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa; + sq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, sq_db_key_mm); + rq_db_key_mm->key = uresp.rq_db_gts_key; + rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa; + rq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, rq_db_key_mm); + if (ma_sync_key_mm) { + ma_sync_key_mm->key = uresp.ma_sync_key; + ma_sync_key_mm->addr = + (pci_resource_start(rhp->rdev.lldi.pdev, 0) + + PCIE_MA_SYNC_A) & PAGE_MASK; + ma_sync_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, ma_sync_key_mm); } } qhp->ibqp.qp_num = qhp->wq.sq.qid; @@ -1795,15 +1798,15 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->wq.rq.memsize, attrs->cap.max_recv_wr); return &qhp->ibqp; err8: - kfree(mm5); + kfree(ma_sync_key_mm); err7: - kfree(mm4); + kfree(rq_db_key_mm); err6: - kfree(mm3); + kfree(sq_db_key_mm); err5: - kfree(mm2); + kfree(rq_key_mm); err4: - kfree(mm1); + kfree(sq_key_mm); err3: remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); err2: @@ -1888,3 +1891,17 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; } + +void c4iw_drain_sq(struct ib_qp *ibqp) +{ + struct c4iw_qp *qp = to_c4iw_qp(ibqp); + + wait_for_completion(&qp->sq_drained); +} + +void c4iw_drain_rq(struct ib_qp *ibqp) +{ + struct c4iw_qp *qp = to_c4iw_qp(ibqp); + + wait_for_completion(&qp->rq_drained); +} diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 21cb41a60fe8..c74ef2620b85 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -310,7 +310,7 @@ static void aliasguid_query_handler(int status, if (status) { pr_debug("(port: %d) failed: status = %d\n", cb_ctx->port, status); - rec->time_to_run = ktime_get_real_ns() + 1 * NSEC_PER_SEC; + rec->time_to_run = ktime_get_boot_ns() + 1 * NSEC_PER_SEC; goto out; } @@ -416,7 +416,7 @@ next_entry: be64_to_cpu((__force __be64)rec->guid_indexes), be64_to_cpu((__force __be64)applied_guid_indexes), be64_to_cpu((__force __be64)declined_guid_indexes)); - rec->time_to_run = ktime_get_real_ns() + + rec->time_to_run = ktime_get_boot_ns() + resched_delay_sec * NSEC_PER_SEC; } else { rec->status = MLX4_GUID_INFO_STATUS_SET; @@ -708,7 +708,7 @@ static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port, } } if (resched_delay_sec) { - u64 curr_time = ktime_get_real_ns(); + u64 curr_time = ktime_get_boot_ns(); *resched_delay_sec = (low_record_time < curr_time) ? 0 : div_u64((low_record_time - curr_time), NSEC_PER_SEC); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1c7ab6cabbb8..914bc98e753f 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1643,6 +1643,56 @@ static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_ return err; } +static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev, + struct ib_flow_attr *flow_attr, + enum mlx4_net_trans_promisc_mode *type) +{ + int err = 0; + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) || + (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) || + (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) { + return -EOPNOTSUPP; + } + + if (flow_attr->num_of_specs == 0) { + type[0] = MLX4_FS_MC_SNIFFER; + type[1] = MLX4_FS_UC_SNIFFER; + } else { + union ib_flow_spec *ib_spec; + + ib_spec = (union ib_flow_spec *)(flow_attr + 1); + if (ib_spec->type != IB_FLOW_SPEC_ETH) + return -EINVAL; + + /* if all is zero than MC and UC */ + if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) { + type[0] = MLX4_FS_MC_SNIFFER; + type[1] = MLX4_FS_UC_SNIFFER; + } else { + u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01, + ib_spec->eth.mask.dst_mac[1], + ib_spec->eth.mask.dst_mac[2], + ib_spec->eth.mask.dst_mac[3], + ib_spec->eth.mask.dst_mac[4], + ib_spec->eth.mask.dst_mac[5]}; + + /* Above xor was only on MC bit, non empty mask is valid + * only if this bit is set and rest are zero. + */ + if (!is_zero_ether_addr(&mac[0])) + return -EINVAL; + + if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac)) + type[0] = MLX4_FS_MC_SNIFFER; + else + type[0] = MLX4_FS_UC_SNIFFER; + } + } + + return err; +} + static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) @@ -1653,6 +1703,10 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct mlx4_dev *dev = (to_mdev(qp->device))->dev; int is_bonded = mlx4_is_bonded(dev); + if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) && + (flow_attr->type != IB_FLOW_ATTR_NORMAL)) + return ERR_PTR(-EOPNOTSUPP); + memset(type, 0, sizeof(type)); mflow = kzalloc(sizeof(*mflow), GFP_KERNEL); @@ -1663,7 +1717,19 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, switch (flow_attr->type) { case IB_FLOW_ATTR_NORMAL: - type[0] = MLX4_FS_REGULAR; + /* If dont trap flag (continue match) is set, under specific + * condition traffic be replicated to given qp, + * without stealing it + */ + if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) { + err = mlx4_ib_add_dont_trap_rule(dev, + flow_attr, + type); + if (err) + goto err_free; + } else { + type[0] = MLX4_FS_REGULAR; + } break; case IB_FLOW_ATTR_ALL_DEFAULT: @@ -1675,8 +1741,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, break; case IB_FLOW_ATTR_SNIFFER: - type[0] = MLX4_FS_UC_SNIFFER; - type[1] = MLX4_FS_MC_SNIFFER; + type[0] = MLX4_FS_MIRROR_RX_PORT; + type[1] = MLX4_FS_MIRROR_SX_PORT; break; default: diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 52ce7b000044..1eca01cebe51 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -711,7 +711,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); -struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata); int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 242b94ec105b..ce0b5aa8eb9b 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -32,6 +32,7 @@ */ #include <linux/slab.h> +#include <rdma/ib_user_verbs.h> #include "mlx4_ib.h" @@ -334,7 +335,8 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr) return 0; } -struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mw *mw; diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 27a70159e2ea..4e851889355a 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index fd1de31e0611..a00ba4418de9 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -207,7 +207,10 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, break; case MLX5_CQE_RESP_SEND: wc->opcode = IB_WC_RECV; - wc->wc_flags = 0; + wc->wc_flags = IB_WC_IP_CSUM_OK; + if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) && + (cqe->hds_ip_ext & CQE_L4_OK)))) + wc->wc_flags = 0; break; case MLX5_CQE_RESP_SEND_IMM: wc->opcode = IB_WC_RECV; @@ -431,7 +434,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; struct mlx5_sig_err_cqe *sig_err_cqe; - struct mlx5_core_mr *mmr; + struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; uint8_t opcode; uint32_t qpn; @@ -536,17 +539,17 @@ repoll: case MLX5_CQE_SIG_ERR: sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; - read_lock(&dev->mdev->priv.mr_table.lock); - mmr = __mlx5_mr_lookup(dev->mdev, - mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); - if (unlikely(!mmr)) { - read_unlock(&dev->mdev->priv.mr_table.lock); + read_lock(&dev->mdev->priv.mkey_table.lock); + mmkey = __mlx5_mr_lookup(dev->mdev, + mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); + if (unlikely(!mmkey)) { + read_unlock(&dev->mdev->priv.mkey_table.lock); mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); return -EINVAL; } - mr = to_mibmr(mmr); + mr = to_mibmr(mmkey); get_sig_err_item(sig_err_cqe, &mr->sig->err_item); mr->sig->sig_err_exists = true; mr->sig->sigerr_count++; @@ -558,25 +561,51 @@ repoll: mr->sig->err_item.expected, mr->sig->err_item.actual); - read_unlock(&dev->mdev->priv.mr_table.lock); + read_unlock(&dev->mdev->priv.mkey_table.lock); goto repoll; } return 0; } +static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, + struct ib_wc *wc) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_ib_wc *soft_wc, *next; + int npolled = 0; + + list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) { + if (npolled >= num_entries) + break; + + mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n", + cq->mcq.cqn); + + wc[npolled++] = soft_wc->wc; + list_del(&soft_wc->list); + kfree(soft_wc); + } + + return npolled; +} + int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { struct mlx5_ib_cq *cq = to_mcq(ibcq); struct mlx5_ib_qp *cur_qp = NULL; unsigned long flags; + int soft_polled = 0; int npolled; int err = 0; spin_lock_irqsave(&cq->lock, flags); - for (npolled = 0; npolled < num_entries; npolled++) { - err = mlx5_poll_one(cq, &cur_qp, wc + npolled); + if (unlikely(!list_empty(&cq->wc_list))) + soft_polled = poll_soft_wc(cq, num_entries, wc); + + for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { + err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled); if (err) break; } @@ -587,7 +616,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) spin_unlock_irqrestore(&cq->lock, flags); if (err == 0 || err == -EAGAIN) - return npolled; + return soft_polled + npolled; else return err; } @@ -595,16 +624,27 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev; + struct mlx5_ib_cq *cq = to_mcq(ibcq); void __iomem *uar_page = mdev->priv.uuari.uars[0].map; + unsigned long irq_flags; + int ret = 0; + + spin_lock_irqsave(&cq->lock, irq_flags); + if (cq->notify_flags != IB_CQ_NEXT_COMP) + cq->notify_flags = flags & IB_CQ_SOLICITED_MASK; - mlx5_cq_arm(&to_mcq(ibcq)->mcq, + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list)) + ret = 1; + spin_unlock_irqrestore(&cq->lock, irq_flags); + + mlx5_cq_arm(&cq->mcq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT, uar_page, MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock), to_mcq(ibcq)->mcq.cons_index); - return 0; + return ret; } static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, @@ -757,6 +797,14 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) mlx5_db_free(dev->mdev, &cq->db); } +static void notify_soft_wc_handler(struct work_struct *work) +{ + struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq, + notify_work); + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -807,6 +855,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, &index, &inlen); if (err) goto err_create; + + INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } cq->cqe_size = cqe_size; @@ -832,6 +882,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, cq->mcq.comp = mlx5_ib_cq_comp; cq->mcq.event = mlx5_ib_cq_event; + INIT_LIST_HEAD(&cq->wc_list); + if (context) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { err = -EFAULT; @@ -1219,3 +1271,27 @@ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) cq = to_mcq(ibcq); return cq->cqe_size; } + +/* Called from atomic context */ +int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc) +{ + struct mlx5_ib_wc *soft_wc; + struct mlx5_ib_cq *cq = to_mcq(ibcq); + unsigned long flags; + + soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC); + if (!soft_wc) + return -ENOMEM; + + soft_wc->wc = *wc; + spin_lock_irqsave(&cq->lock, flags); + list_add_tail(&soft_wc->list, &cq->wc_list); + if (cq->notify_flags == IB_CQ_NEXT_COMP || + wc->status != IB_WC_SUCCESS) { + cq->notify_flags = 0; + schedule_work(&cq->notify_work); + } + spin_unlock_irqrestore(&cq->lock, flags); + + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c new file mode 100644 index 000000000000..53e03c8ede79 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -0,0 +1,548 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx5_ib.h" + +struct mlx5_ib_gsi_wr { + struct ib_cqe cqe; + struct ib_wc wc; + int send_flags; + bool completed:1; +}; + +struct mlx5_ib_gsi_qp { + struct ib_qp ibqp; + struct ib_qp *rx_qp; + u8 port_num; + struct ib_qp_cap cap; + enum ib_sig_type sq_sig_type; + /* Serialize qp state modifications */ + struct mutex mutex; + struct ib_cq *cq; + struct mlx5_ib_gsi_wr *outstanding_wrs; + u32 outstanding_pi, outstanding_ci; + int num_qps; + /* Protects access to the tx_qps. Post send operations synchronize + * with tx_qp creation in setup_qp(). Also protects the + * outstanding_wrs array and indices. + */ + spinlock_t lock; + struct ib_qp **tx_qps; +}; + +static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp) +{ + return container_of(qp, struct mlx5_ib_gsi_qp, ibqp); +} + +static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev) +{ + return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn); +} + +static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index) +{ + return ++index % gsi->cap.max_send_wr; +} + +#define for_each_outstanding_wr(gsi, index) \ + for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \ + index = next_outstanding(gsi, index)) + +/* Call with gsi->lock locked */ +static void generate_completions(struct mlx5_ib_gsi_qp *gsi) +{ + struct ib_cq *gsi_cq = gsi->ibqp.send_cq; + struct mlx5_ib_gsi_wr *wr; + u32 index; + + for_each_outstanding_wr(gsi, index) { + wr = &gsi->outstanding_wrs[index]; + + if (!wr->completed) + break; + + if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR || + wr->send_flags & IB_SEND_SIGNALED) + WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc)); + + wr->completed = false; + } + + gsi->outstanding_ci = index; +} + +static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx5_ib_gsi_qp *gsi = cq->cq_context; + struct mlx5_ib_gsi_wr *wr = + container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe); + u64 wr_id; + unsigned long flags; + + spin_lock_irqsave(&gsi->lock, flags); + wr->completed = true; + wr_id = wr->wc.wr_id; + wr->wc = *wc; + wr->wc.wr_id = wr_id; + wr->wc.qp = &gsi->ibqp; + + generate_completions(gsi); + spin_unlock_irqrestore(&gsi->lock, flags); +} + +struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_gsi_qp *gsi; + struct ib_qp_init_attr hw_init_attr = *init_attr; + const u8 port_num = init_attr->port_num; + const int num_pkeys = pd->device->attrs.max_pkeys; + const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0; + int ret; + + mlx5_ib_dbg(dev, "creating GSI QP\n"); + + if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) { + mlx5_ib_warn(dev, + "invalid port number %d during GSI QP creation\n", + port_num); + return ERR_PTR(-EINVAL); + } + + gsi = kzalloc(sizeof(*gsi), GFP_KERNEL); + if (!gsi) + return ERR_PTR(-ENOMEM); + + gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL); + if (!gsi->tx_qps) { + ret = -ENOMEM; + goto err_free; + } + + gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr, + sizeof(*gsi->outstanding_wrs), + GFP_KERNEL); + if (!gsi->outstanding_wrs) { + ret = -ENOMEM; + goto err_free_tx; + } + + mutex_init(&gsi->mutex); + + mutex_lock(&dev->devr.mutex); + + if (dev->devr.ports[port_num - 1].gsi) { + mlx5_ib_warn(dev, "GSI QP already exists on port %d\n", + port_num); + ret = -EBUSY; + goto err_free_wrs; + } + gsi->num_qps = num_qps; + spin_lock_init(&gsi->lock); + + gsi->cap = init_attr->cap; + gsi->sq_sig_type = init_attr->sq_sig_type; + gsi->ibqp.qp_num = 1; + gsi->port_num = port_num; + + gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0, + IB_POLL_SOFTIRQ); + if (IS_ERR(gsi->cq)) { + mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n", + PTR_ERR(gsi->cq)); + ret = PTR_ERR(gsi->cq); + goto err_free_wrs; + } + + hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI; + hw_init_attr.send_cq = gsi->cq; + if (num_qps) { + hw_init_attr.cap.max_send_wr = 0; + hw_init_attr.cap.max_send_sge = 0; + hw_init_attr.cap.max_inline_data = 0; + } + gsi->rx_qp = ib_create_qp(pd, &hw_init_attr); + if (IS_ERR(gsi->rx_qp)) { + mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n", + PTR_ERR(gsi->rx_qp)); + ret = PTR_ERR(gsi->rx_qp); + goto err_destroy_cq; + } + + dev->devr.ports[init_attr->port_num - 1].gsi = gsi; + + mutex_unlock(&dev->devr.mutex); + + return &gsi->ibqp; + +err_destroy_cq: + ib_free_cq(gsi->cq); +err_free_wrs: + mutex_unlock(&dev->devr.mutex); + kfree(gsi->outstanding_wrs); +err_free_tx: + kfree(gsi->tx_qps); +err_free: + kfree(gsi); + return ERR_PTR(ret); +} + +int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + const int port_num = gsi->port_num; + int qp_index; + int ret; + + mlx5_ib_dbg(dev, "destroying GSI QP\n"); + + mutex_lock(&dev->devr.mutex); + ret = ib_destroy_qp(gsi->rx_qp); + if (ret) { + mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n", + ret); + mutex_unlock(&dev->devr.mutex); + return ret; + } + dev->devr.ports[port_num - 1].gsi = NULL; + mutex_unlock(&dev->devr.mutex); + gsi->rx_qp = NULL; + + for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) { + if (!gsi->tx_qps[qp_index]) + continue; + WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index])); + gsi->tx_qps[qp_index] = NULL; + } + + ib_free_cq(gsi->cq); + + kfree(gsi->outstanding_wrs); + kfree(gsi->tx_qps); + kfree(gsi); + + return 0; +} + +static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) +{ + struct ib_pd *pd = gsi->rx_qp->pd; + struct ib_qp_init_attr init_attr = { + .event_handler = gsi->rx_qp->event_handler, + .qp_context = gsi->rx_qp->qp_context, + .send_cq = gsi->cq, + .recv_cq = gsi->rx_qp->recv_cq, + .cap = { + .max_send_wr = gsi->cap.max_send_wr, + .max_send_sge = gsi->cap.max_send_sge, + .max_inline_data = gsi->cap.max_inline_data, + }, + .sq_sig_type = gsi->sq_sig_type, + .qp_type = IB_QPT_UD, + .create_flags = mlx5_ib_create_qp_sqpn_qp1(), + }; + + return ib_create_qp(pd, &init_attr); +} + +static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp, + u16 qp_index) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct ib_qp_attr attr; + int mask; + int ret; + + mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT; + attr.qp_state = IB_QPS_INIT; + attr.pkey_index = qp_index; + attr.qkey = IB_QP1_QKEY; + attr.port_num = gsi->port_num; + ret = ib_modify_qp(qp, &attr, mask); + if (ret) { + mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n", + qp->qp_num, ret); + return ret; + } + + attr.qp_state = IB_QPS_RTR; + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n", + qp->qp_num, ret); + return ret; + } + + attr.qp_state = IB_QPS_RTS; + attr.sq_psn = 0; + ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) { + mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n", + qp->qp_num, ret); + return ret; + } + + return 0; +} + +static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index) +{ + struct ib_device *device = gsi->rx_qp->device; + struct mlx5_ib_dev *dev = to_mdev(device); + struct ib_qp *qp; + unsigned long flags; + u16 pkey; + int ret; + + ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey); + if (ret) { + mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n", + gsi->port_num, qp_index); + return; + } + + if (!pkey) { + mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d. Skipping.\n", + gsi->port_num, qp_index); + return; + } + + spin_lock_irqsave(&gsi->lock, flags); + qp = gsi->tx_qps[qp_index]; + spin_unlock_irqrestore(&gsi->lock, flags); + if (qp) { + mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n", + gsi->port_num, qp_index); + return; + } + + qp = create_gsi_ud_qp(gsi); + if (IS_ERR(qp)) { + mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n", + PTR_ERR(qp)); + return; + } + + ret = modify_to_rts(gsi, qp, qp_index); + if (ret) + goto err_destroy_qp; + + spin_lock_irqsave(&gsi->lock, flags); + WARN_ON_ONCE(gsi->tx_qps[qp_index]); + gsi->tx_qps[qp_index] = qp; + spin_unlock_irqrestore(&gsi->lock, flags); + + return; + +err_destroy_qp: + WARN_ON_ONCE(qp); +} + +static void setup_qps(struct mlx5_ib_gsi_qp *gsi) +{ + u16 qp_index; + + for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) + setup_qp(gsi, qp_index); +} + +int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, + int attr_mask) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + int ret; + + mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state); + + mutex_lock(&gsi->mutex); + ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask); + if (ret) { + mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret); + goto unlock; + } + + if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS) + setup_qps(gsi); + +unlock: + mutex_unlock(&gsi->mutex); + + return ret; +} + +int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + int ret; + + mutex_lock(&gsi->mutex); + ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr); + qp_init_attr->cap = gsi->cap; + mutex_unlock(&gsi->mutex); + + return ret; +} + +/* Call with gsi->lock locked */ +static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, + struct ib_ud_wr *wr, struct ib_wc *wc) +{ + struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); + struct mlx5_ib_gsi_wr *gsi_wr; + + if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) { + mlx5_ib_warn(dev, "no available GSI work request.\n"); + return -ENOMEM; + } + + gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi]; + gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi); + + if (!wc) { + memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc)); + gsi_wr->wc.pkey_index = wr->pkey_index; + gsi_wr->wc.wr_id = wr->wr.wr_id; + } else { + gsi_wr->wc = *wc; + gsi_wr->completed = true; + } + + gsi_wr->cqe.done = &handle_single_completion; + wr->wr.wr_cqe = &gsi_wr->cqe; + + return 0; +} + +/* Call with gsi->lock locked */ +static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi, + struct ib_ud_wr *wr) +{ + struct ib_wc wc = { + { .wr_id = wr->wr.wr_id }, + .status = IB_WC_SUCCESS, + .opcode = IB_WC_SEND, + .qp = &gsi->ibqp, + }; + int ret; + + ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc); + if (ret) + return ret; + + generate_completions(gsi); + + return 0; +} + +/* Call with gsi->lock locked */ +static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) +{ + struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); + int qp_index = wr->pkey_index; + + if (!mlx5_ib_deth_sqpn_cap(dev)) + return gsi->rx_qp; + + if (qp_index >= gsi->num_qps) + return NULL; + + return gsi->tx_qps[qp_index]; +} + +int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct ib_qp *tx_qp; + unsigned long flags; + int ret; + + for (; wr; wr = wr->next) { + struct ib_ud_wr cur_wr = *ud_wr(wr); + + cur_wr.wr.next = NULL; + + spin_lock_irqsave(&gsi->lock, flags); + tx_qp = get_tx_qp(gsi, &cur_wr); + if (!tx_qp) { + ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr); + if (ret) + goto err; + spin_unlock_irqrestore(&gsi->lock, flags); + continue; + } + + ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL); + if (ret) + goto err; + + ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr); + if (ret) { + /* Undo the effect of adding the outstanding wr */ + gsi->outstanding_pi = (gsi->outstanding_pi - 1) % + gsi->cap.max_send_wr; + goto err; + } + spin_unlock_irqrestore(&gsi->lock, flags); + } + + return 0; + +err: + spin_unlock_irqrestore(&gsi->lock, flags); + *bad_wr = wr; + return ret; +} + +int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + + return ib_post_recv(gsi->rx_qp, wr, bad_wr); +} + +void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi) +{ + if (!gsi) + return; + + mutex_lock(&gsi->mutex); + setup_qps(gsi); + mutex_unlock(&gsi->mutex); +} diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index b84d13a487cc..41d8a0036465 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -31,8 +31,10 @@ */ #include <linux/mlx5/cmd.h> +#include <linux/mlx5/vport.h> #include <rdma/ib_mad.h> #include <rdma/ib_smi.h> +#include <rdma/ib_pma.h> #include "mlx5_ib.h" enum { @@ -57,20 +59,12 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port); } -int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) +static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { u16 slid; int err; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); @@ -117,6 +111,156 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } +static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext, + void *out) +{ +#define MLX5_SUM_CNT(p, cntr1, cntr2) \ + (MLX5_GET64(query_vport_counter_out, p, cntr1) + \ + MLX5_GET64(query_vport_counter_out, p, cntr2)) + + pma_cnt_ext->port_xmit_data = + cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets, + transmitted_ib_multicast.octets) >> 2); + pma_cnt_ext->port_xmit_data = + cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets, + received_ib_multicast.octets) >> 2); + pma_cnt_ext->port_xmit_packets = + cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.packets, + transmitted_ib_multicast.packets)); + pma_cnt_ext->port_rcv_packets = + cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.packets, + received_ib_multicast.packets)); + pma_cnt_ext->port_unicast_xmit_packets = + MLX5_GET64_BE(query_vport_counter_out, + out, transmitted_ib_unicast.packets); + pma_cnt_ext->port_unicast_rcv_packets = + MLX5_GET64_BE(query_vport_counter_out, + out, received_ib_unicast.packets); + pma_cnt_ext->port_multicast_xmit_packets = + MLX5_GET64_BE(query_vport_counter_out, + out, transmitted_ib_multicast.packets); + pma_cnt_ext->port_multicast_rcv_packets = + MLX5_GET64_BE(query_vport_counter_out, + out, received_ib_multicast.packets); +} + +static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt, + void *out) +{ + /* Traffic counters will be reported in + * their 64bit form via ib_pma_portcounters_ext by default. + */ + void *out_pma = MLX5_ADDR_OF(ppcnt_reg, out, + counter_set); + +#define MLX5_ASSIGN_PMA_CNTR(counter_var, counter_name) { \ + counter_var = MLX5_GET_BE(typeof(counter_var), \ + ib_port_cntrs_grp_data_layout, \ + out_pma, counter_name); \ + } + + MLX5_ASSIGN_PMA_CNTR(pma_cnt->symbol_error_counter, + symbol_error_counter); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_error_recovery_counter, + link_error_recovery_counter); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_downed_counter, + link_downed_counter); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_errors, + port_rcv_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_remphys_errors, + port_rcv_remote_physical_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_switch_relay_errors, + port_rcv_switch_relay_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_discards, + port_xmit_discards); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_constraint_errors, + port_xmit_constraint_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_constraint_errors, + port_rcv_constraint_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_overrun_errors, + link_overrun_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->vl15_dropped, + vl_15_dropped); +} + +static int process_pma_cmd(struct ib_device *ibdev, u8 port_num, + const struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + int err; + void *out_cnt; + + /* Decalring support of extended counters */ + if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) { + struct ib_class_port_info cpi = {}; + + cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH; + memcpy((out_mad->data + 40), &cpi, sizeof(cpi)); + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; + } + + if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) { + struct ib_pma_portcounters_ext *pma_cnt_ext = + (struct ib_pma_portcounters_ext *)(out_mad->data + 40); + int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out); + + out_cnt = mlx5_vzalloc(sz); + if (!out_cnt) + return IB_MAD_RESULT_FAILURE; + + err = mlx5_core_query_vport_counter(dev->mdev, 0, + port_num, out_cnt, sz); + if (!err) + pma_cnt_ext_assign(pma_cnt_ext, out_cnt); + } else { + struct ib_pma_portcounters *pma_cnt = + (struct ib_pma_portcounters *)(out_mad->data + 40); + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + out_cnt = mlx5_vzalloc(sz); + if (!out_cnt) + return IB_MAD_RESULT_FAILURE; + + err = mlx5_core_query_ib_ppcnt(dev->mdev, port_num, + out_cnt, sz); + if (!err) + pma_cnt_assign(pma_cnt, out_cnt); + } + + kvfree(out_cnt); + if (err) + return IB_MAD_RESULT_FAILURE; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; + + memset(out_mad->data, 0, sizeof(out_mad->data)); + + if (MLX5_CAP_GEN(mdev, vport_counters) && + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && + in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { + return process_pma_cmd(ibdev, port_num, in_mad, out_mad); + } else { + return process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, + in_mad, out_mad); + } +} + int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) { struct ib_smp *in_mad = NULL; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 03c418ccbc98..5afbb697e691 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -487,6 +487,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; if (MLX5_CAP_GEN(mdev, xrc)) props->device_cap_flags |= IB_DEVICE_XRC; + if (MLX5_CAP_GEN(mdev, imaicl)) { + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW | + IB_DEVICE_MEM_WINDOW_TYPE_2B; + props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); + /* We support 'Gappy' memory registration too */ + props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; + } props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; @@ -504,6 +511,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, (MLX5_CAP_ETH(dev->mdev, csum_cap))) props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; + if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { + props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; + props->device_cap_flags |= IB_DEVICE_UD_TSO; + } + props->vendor_part_id = mdev->pdev->device; props->hw_ver = mdev->pdev->revision; @@ -529,7 +541,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq_sge = max_rq_sg - 1; - props->max_fast_reg_page_list_len = (unsigned int)-1; + props->max_fast_reg_page_list_len = + 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); get_atomic_caps(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); @@ -1369,11 +1382,20 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) return 0; } +static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) +{ + priority *= 2; + if (!dont_trap) + priority++; + return priority; +} + #define MLX5_FS_MAX_TYPES 10 #define MLX5_FS_MAX_ENTRIES 32000UL static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, struct ib_flow_attr *flow_attr) { + bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; struct mlx5_flow_namespace *ns = NULL; struct mlx5_ib_flow_prio *prio; struct mlx5_flow_table *ft; @@ -1383,10 +1405,12 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, int err = 0; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - if (flow_is_multicast_only(flow_attr)) + if (flow_is_multicast_only(flow_attr) && + !dont_trap) priority = MLX5_IB_FLOW_MCAST_PRIO; else - priority = flow_attr->priority; + priority = ib_prio_to_core_prio(flow_attr->priority, + dont_trap); ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS); num_entries = MLX5_FS_MAX_ENTRIES; @@ -1434,6 +1458,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, unsigned int spec_index; u32 *match_c; u32 *match_v; + u32 action; int err = 0; if (!is_valid_attr(flow_attr)) @@ -1459,9 +1484,11 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, /* Outer header support only */ match_criteria_enable = (!outer_header_zero(match_c)) << 0; + action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_c, match_v, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + action, MLX5_FS_DEFAULT_FLOW_TAG, dst); @@ -1481,6 +1508,29 @@ free: return err ? ERR_PTR(err) : handler; } +static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_dst = NULL; + struct mlx5_ib_flow_handler *handler = NULL; + + handler = create_flow_rule(dev, ft_prio, flow_attr, NULL); + if (!IS_ERR(handler)) { + handler_dst = create_flow_rule(dev, ft_prio, + flow_attr, dst); + if (IS_ERR(handler_dst)) { + mlx5_del_flow_rule(handler->rule); + kfree(handler); + handler = handler_dst; + } else { + list_add(&handler_dst->list, &handler->list); + } + } + + return handler; +} enum { LEFTOVERS_MC, LEFTOVERS_UC, @@ -1558,7 +1608,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, if (domain != IB_FLOW_DOMAIN_USER || flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) || - flow_attr->flags) + (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)) return ERR_PTR(-EINVAL); dst = kzalloc(sizeof(*dst), GFP_KERNEL); @@ -1577,8 +1627,13 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - handler = create_flow_rule(dev, ft_prio, flow_attr, - dst); + if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { + handler = create_dont_trap_rule(dev, ft_prio, + flow_attr, dst); + } else { + handler = create_flow_rule(dev, ft_prio, flow_attr, + dst); + } } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { handler = create_leftovers_rule(dev, ft_prio, flow_attr, @@ -1716,6 +1771,17 @@ static struct device_attribute *mlx5_class_attributes[] = { &dev_attr_reg_pages, }; +static void pkey_change_handler(struct work_struct *work) +{ + struct mlx5_ib_port_resources *ports = + container_of(work, struct mlx5_ib_port_resources, + pkey_change_work); + + mutex_lock(&ports->devr->mutex); + mlx5_ib_gsi_pkey_change(ports->gsi); + mutex_unlock(&ports->devr->mutex); +} + static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param) { @@ -1752,6 +1818,8 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, case MLX5_DEV_EVENT_PKEY_CHANGE: ibev.event = IB_EVENT_PKEY_CHANGE; port = (u8)param; + + schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); break; case MLX5_DEV_EVENT_GUID_CHANGE: @@ -1838,7 +1906,7 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "mr cache cleanup failed\n"); mlx5_ib_destroy_qp(dev->umrc.qp); - ib_destroy_cq(dev->umrc.cq); + ib_free_cq(dev->umrc.cq); ib_dealloc_pd(dev->umrc.pd); } @@ -1853,7 +1921,6 @@ static int create_umr_res(struct mlx5_ib_dev *dev) struct ib_pd *pd; struct ib_cq *cq; struct ib_qp *qp; - struct ib_cq_init_attr cq_attr = {}; int ret; attr = kzalloc(sizeof(*attr), GFP_KERNEL); @@ -1870,15 +1937,12 @@ static int create_umr_res(struct mlx5_ib_dev *dev) goto error_0; } - cq_attr.cqe = 128; - cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, - &cq_attr); + cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); if (IS_ERR(cq)) { mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); ret = PTR_ERR(cq); goto error_2; } - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); init_attr->send_cq = cq; init_attr->recv_cq = cq; @@ -1945,7 +2009,7 @@ error_4: mlx5_ib_destroy_qp(qp); error_3: - ib_destroy_cq(cq); + ib_free_cq(cq); error_2: ib_dealloc_pd(pd); @@ -1961,10 +2025,13 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) struct ib_srq_init_attr attr; struct mlx5_ib_dev *dev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; + int port; int ret = 0; dev = container_of(devr, struct mlx5_ib_dev, devr); + mutex_init(&devr->mutex); + devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); if (IS_ERR(devr->p0)) { ret = PTR_ERR(devr->p0); @@ -2052,6 +2119,12 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { + INIT_WORK(&devr->ports[port].pkey_change_work, + pkey_change_handler); + devr->ports[port].devr = devr; + } + return 0; error5: @@ -2070,12 +2143,20 @@ error0: static void destroy_dev_resources(struct mlx5_ib_resources *devr) { + struct mlx5_ib_dev *dev = + container_of(devr, struct mlx5_ib_dev, devr); + int port; + mlx5_ib_destroy_srq(devr->s1); mlx5_ib_destroy_srq(devr->s0); mlx5_ib_dealloc_xrcd(devr->x0); mlx5_ib_dealloc_xrcd(devr->x1); mlx5_ib_destroy_cq(devr->c0); mlx5_ib_dealloc_pd(devr->p0); + + /* Make sure no change P_Key work items are still executing */ + for (port = 0; port < dev->num_ports; ++port) + cancel_work_sync(&devr->ports[port].pkey_change_work); } static u32 get_core_cap_flags(struct ib_device *ibdev) @@ -2198,6 +2279,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_REREG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | @@ -2258,6 +2340,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; + dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr; dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; @@ -2269,6 +2352,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) mlx5_ib_internal_fill_odp_caps(dev); + if (MLX5_CAP_GEN(mdev, imaicl)) { + dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; + dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; + dev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + } + if (MLX5_CAP_GEN(mdev, xrc)) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d2b9737baa36..76b2b42e0535 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -43,6 +43,7 @@ #include <linux/mlx5/srq.h> #include <linux/types.h> #include <linux/mlx5/transobj.h> +#include <rdma/ib_user_verbs.h> #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@ -126,7 +127,7 @@ struct mlx5_ib_pd { }; #define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) -#define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1) +#define MLX5_IB_FLOW_LAST_PRIO (MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1) #if (MLX5_IB_FLOW_LAST_PRIO <= 0) #error "Invalid number of bypass priorities" #endif @@ -162,9 +163,31 @@ struct mlx5_ib_flow_db { #define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START #define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1) #define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2) + +#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 3) +#define MLX5_IB_SEND_UMR_UPDATE_PD (IB_SEND_RESERVED_START << 4) +#define MLX5_IB_SEND_UMR_UPDATE_ACCESS IB_SEND_RESERVED_END + #define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 +/* + * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI + * creates the actual hardware QP. + */ +#define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2 #define MLX5_IB_WR_UMR IB_WR_RESERVED1 +/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. + * + * These flags are intended for internal use by the mlx5_ib driver, and they + * rely on the range reserved for that use in the ib_qp_create_flags enum. + */ + +/* Create a UD QP whose source QP number is 1 */ +static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void) +{ + return IB_QP_CREATE_RESERVED_START; +} + struct wr_list { u16 opcode; u16 next; @@ -325,11 +348,14 @@ struct mlx5_ib_cq_buf { }; enum mlx5_ib_qp_flags { - MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0, - MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1, - MLX5_IB_QP_CROSS_CHANNEL = 1 << 2, - MLX5_IB_QP_MANAGED_SEND = 1 << 3, - MLX5_IB_QP_MANAGED_RECV = 1 << 4, + MLX5_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, + MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, + MLX5_IB_QP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL, + MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND, + MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV, + MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5, + /* QP uses 1 as its source QP number */ + MLX5_IB_QP_SQPN_QP1 = 1 << 6, }; struct mlx5_umr_wr { @@ -373,6 +399,14 @@ struct mlx5_ib_cq { struct ib_umem *resize_umem; int cqe_size; u32 create_flags; + struct list_head wc_list; + enum ib_cq_notify_flags notify_flags; + struct work_struct notify_work; +}; + +struct mlx5_ib_wc { + struct ib_wc wc; + struct list_head list; }; struct mlx5_ib_srq { @@ -413,7 +447,8 @@ struct mlx5_ib_mr { int ndescs; int max_descs; int desc_size; - struct mlx5_core_mr mmr; + int access_mode; + struct mlx5_core_mkey mmkey; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; @@ -425,19 +460,20 @@ struct mlx5_ib_mr { struct mlx5_core_sig_ctx *sig; int live; void *descs_alloc; + int access_flags; /* Needed for rereg MR */ +}; + +struct mlx5_ib_mw { + struct ib_mw ibmw; + struct mlx5_core_mkey mmkey; }; struct mlx5_ib_umr_context { + struct ib_cqe cqe; enum ib_wc_status status; struct completion done; }; -static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) -{ - context->status = -1; - init_completion(&context->done); -} - struct umr_common { struct ib_pd *pd; struct ib_cq *cq; @@ -487,6 +523,14 @@ struct mlx5_mr_cache { unsigned long last_add; }; +struct mlx5_ib_gsi_qp; + +struct mlx5_ib_port_resources { + struct mlx5_ib_resources *devr; + struct mlx5_ib_gsi_qp *gsi; + struct work_struct pkey_change_work; +}; + struct mlx5_ib_resources { struct ib_cq *c0; struct ib_xrcd *x0; @@ -494,6 +538,9 @@ struct mlx5_ib_resources { struct ib_pd *p0; struct ib_srq *s0; struct ib_srq *s1; + struct mlx5_ib_port_resources ports[2]; + /* Protects changes to the port resources */ + struct mutex mutex; }; struct mlx5_roce { @@ -558,9 +605,9 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp; } -static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) +static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey) { - return container_of(mmr, struct mlx5_ib_mr, mmr); + return container_of(mmkey, struct mlx5_ib_mr, mmkey); } static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) @@ -588,6 +635,11 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx5_ib_mr, ibmr); } +static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct mlx5_ib_mw, ibmw); +} + struct mlx5_ib_ah { struct ib_ah ibah; struct mlx5_av av; @@ -648,8 +700,14 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata); +int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, int zap); +int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, int access_flags, + struct ib_pd *pd, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, @@ -700,7 +758,6 @@ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); -void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); @@ -739,6 +796,23 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {} __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, int index); +/* GSI QP helper functions */ +struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr); +int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp); +int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, + int attr_mask); +int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi); + +int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); + static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -758,7 +832,7 @@ static inline u8 convert_access(int acc) static inline int is_qp1(enum ib_qp_type qp_type) { - return qp_type == IB_QPT_GSI; + return qp_type == MLX5_IB_QPT_HW_GSI; } #define MLX5_MAX_UMR_SHIFT 16 diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6000f7aeede9..4d5bff151cdf 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -40,6 +40,7 @@ #include <rdma/ib_umem_odp.h> #include <rdma/ib_verbs.h> #include "mlx5_ib.h" +#include "user.h" enum { MAX_PENDING_REG_MR = 8, @@ -57,7 +58,7 @@ static int clean_mr(struct mlx5_ib_mr *mr); static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); + int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* Wait until all page fault handlers using the mr complete. */ @@ -77,6 +78,40 @@ static int order2idx(struct mlx5_ib_dev *dev, int order) return order - cache->ent[0].order; } +static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) +{ + return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= + length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); +} + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +static void update_odp_mr(struct mlx5_ib_mr *mr) +{ + if (mr->umem->odp_data) { + /* + * This barrier prevents the compiler from moving the + * setting of umem->odp_data->private to point to our + * MR, before reg_umr finished, to ensure that the MR + * initialization have finished before starting to + * handle invalidations. + */ + smp_wmb(); + mr->umem->odp_data->private = mr; + /* + * Make sure we will see the new + * umem->odp_data->private value in the invalidation + * routines, before we can get page faults on the + * MR. Page faults can happen once we put the MR in + * the tree, below this line. Without the barrier, + * there can be a fault handling and an invalidation + * before umem->odp_data->private == mr is visible to + * the invalidation handler. + */ + smp_wmb(); + } +} +#endif + static void reg_mr_callback(int status, void *context) { struct mlx5_ib_mr *mr = context; @@ -86,7 +121,7 @@ static void reg_mr_callback(int status, void *context) struct mlx5_cache_ent *ent = &cache->ent[c]; u8 key; unsigned long flags; - struct mlx5_mr_table *table = &dev->mdev->priv.mr_table; + struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; int err; spin_lock_irqsave(&ent->lock, flags); @@ -113,7 +148,7 @@ static void reg_mr_callback(int status, void *context) spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); key = dev->mdev->priv.mkey_key++; spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); - mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; + mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; cache->last_add = jiffies; @@ -124,10 +159,10 @@ static void reg_mr_callback(int status, void *context) spin_unlock_irqrestore(&ent->lock, flags); write_lock_irqsave(&table->lock, flags); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), - &mr->mmr); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey); if (err) - pr_err("Error inserting to mr tree. 0x%x\n", -err); + pr_err("Error inserting to mkey tree. 0x%x\n", -err); write_unlock_irqrestore(&table->lock, flags); } @@ -168,7 +203,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) spin_lock_irq(&ent->lock); ent->pending++; spin_unlock_irq(&ent->lock); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in), reg_mr_callback, mr, &mr->out); if (err) { @@ -657,14 +692,14 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); seg->start_addr = 0; - err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, + err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL, NULL); if (err) goto err_in; kfree(in); - mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.rkey = mr->mmr.key; + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; mr->umem = NULL; return &mr->ibmr; @@ -693,10 +728,40 @@ static int use_umr(int order) return order <= MLX5_MAX_UMR_SHIFT; } -static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, - struct ib_sge *sg, u64 dma, int n, u32 key, - int page_shift, u64 virt_addr, u64 len, - int access_flags) +static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, + int npages, int page_shift, int *size, + __be64 **mr_pas, dma_addr_t *dma) +{ + __be64 *pas; + struct device *ddev = dev->ib_dev.dma_device; + + /* + * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. + * To avoid copying garbage after the pas array, we allocate + * a little more. + */ + *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); + *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); + if (!(*mr_pas)) + return -ENOMEM; + + pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN); + mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); + /* Clear padding after the actual pages. */ + memset(pas + npages, 0, *size - npages * sizeof(u64)); + + *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, *dma)) { + kfree(*mr_pas); + return -ENOMEM; + } + + return 0; +} + +static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr, + struct ib_sge *sg, u64 dma, int n, u32 key, + int page_shift) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -706,7 +771,6 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, sg->lkey = dev->umrc.pd->local_dma_lkey; wr->next = NULL; - wr->send_flags = 0; wr->sg_list = sg; if (n) wr->num_sge = 1; @@ -718,6 +782,19 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, umrwr->npages = n; umrwr->page_shift = page_shift; umrwr->mkey = key; +} + +static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, + struct ib_sge *sg, u64 dma, int n, u32 key, + int page_shift, u64 virt_addr, u64 len, + int access_flags) +{ + struct mlx5_umr_wr *umrwr = umr_wr(wr); + + prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift); + + wr->send_flags = 0; + umrwr->target.virt_addr = virt_addr; umrwr->length = len; umrwr->access_flags = access_flags; @@ -734,26 +811,45 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, umrwr->mkey = key; } -void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) +static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, + int access_flags, int *npages, + int *page_shift, int *ncont, int *order) { - struct mlx5_ib_umr_context *context; - struct ib_wc wc; - int err; - - while (1) { - err = ib_poll_cq(cq, 1, &wc); - if (err < 0) { - pr_warn("poll cq error %d\n", err); - return; - } - if (err == 0) - break; + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length, + access_flags, 0); + if (IS_ERR(umem)) { + mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); + return (void *)umem; + } - context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; - context->status = wc.status; - complete(&context->done); + mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order); + if (!*npages) { + mlx5_ib_warn(dev, "avoid zero region\n"); + ib_umem_release(umem); + return ERR_PTR(-EINVAL); } - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + + mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", + *npages, *ncont, *order, *page_shift); + + return umem; +} + +static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx5_ib_umr_context *context = + container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); + + context->status = wc->status; + complete(&context->done); +} + +static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) +{ + context->cqe.done = mlx5_ib_umr_done; + context->status = -1; + init_completion(&context->done); } static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, @@ -764,13 +860,12 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, struct device *ddev = dev->ib_dev.dma_device; struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; - struct mlx5_umr_wr umrwr; + struct mlx5_umr_wr umrwr = {}; struct ib_send_wr *bad; struct mlx5_ib_mr *mr; struct ib_sge sg; int size; __be64 *mr_pas; - __be64 *pas; dma_addr_t dma; int err = 0; int i; @@ -790,33 +885,17 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, if (!mr) return ERR_PTR(-EAGAIN); - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. - * To avoid copying garbage after the pas array, we allocate - * a little more. */ - size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); - mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); - if (!mr_pas) { - err = -ENOMEM; + err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas, + &dma); + if (err) goto free_mr; - } - pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN); - mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); - /* Clear padding after the actual pages. */ - memset(pas + npages, 0, size - npages * sizeof(u64)); - - dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) { - err = -ENOMEM; - goto free_pas; - } + mlx5_ib_init_umr_context(&umr_context); - memset(&umrwr, 0, sizeof(umrwr)); - umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; - prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key, + umrwr.wr.wr_cqe = &umr_context.cqe; + prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, page_shift, virt_addr, len, access_flags); - mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { @@ -830,9 +909,9 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, } } - mr->mmr.iova = virt_addr; - mr->mmr.size = len; - mr->mmr.pd = to_mpd(pd)->pdn; + mr->mmkey.iova = virt_addr; + mr->mmkey.size = len; + mr->mmkey.pd = to_mpd(pd)->pdn; mr->live = 1; @@ -840,7 +919,6 @@ unmap_dma: up(&umrc->sem); dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); -free_pas: kfree(mr_pas); free_mr: @@ -929,8 +1007,10 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); + mlx5_ib_init_umr_context(&umr_context); + memset(&wr, 0, sizeof(wr)); - wr.wr.wr_id = (u64)(unsigned long)&umr_context; + wr.wr.wr_cqe = &umr_context.cqe; sg.addr = dma; sg.length = ALIGN(npages * sizeof(u64), @@ -944,10 +1024,9 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, wr.wr.opcode = MLX5_IB_WR_UMR; wr.npages = sg.length / sizeof(u64); wr.page_shift = PAGE_SHIFT; - wr.mkey = mr->mmr.key; + wr.mkey = mr->mmkey.key; wr.target.offset = start_page_index; - mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); err = ib_post_send(umrc->qp, &wr.wr, &bad); if (err) { @@ -974,10 +1053,14 @@ free_pas: } #endif -static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, - u64 length, struct ib_umem *umem, - int npages, int page_shift, - int access_flags) +/* + * If ibmr is NULL it will be allocated by reg_create. + * Else, the given ibmr will be used. + */ +static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, + u64 virt_addr, u64 length, + struct ib_umem *umem, int npages, + int page_shift, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_create_mkey_mbox_in *in; @@ -986,7 +1069,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); - mr = kzalloc(sizeof(*mr), GFP_KERNEL); + mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -1013,7 +1096,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL, + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL, NULL, NULL); if (err) { mlx5_ib_warn(dev, "create mkey failed\n"); @@ -1024,7 +1107,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, mr->live = 1; kvfree(in); - mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); + mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); return mr; @@ -1032,11 +1115,23 @@ err_2: kvfree(in); err_1: - kfree(mr); + if (!ibmr) + kfree(mr); return ERR_PTR(err); } +static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, + int npages, u64 length, int access_flags) +{ + mr->npages = npages; + atomic_add(npages, &dev->mdev->priv.reg_pages); + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; + mr->ibmr.length = length; + mr->access_flags = access_flags; +} + struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -1052,22 +1147,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); - umem = ib_umem_get(pd->uobject->context, start, length, access_flags, - 0); - if (IS_ERR(umem)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); - return (void *)umem; - } + umem = mr_umem_get(pd, start, length, access_flags, &npages, + &page_shift, &ncont, &order); - mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); - if (!npages) { - mlx5_ib_warn(dev, "avoid zero region\n"); - err = -EINVAL; - goto error; - } - - mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", - npages, ncont, order, page_shift); + if (IS_ERR(umem)) + return (void *)umem; if (use_umr(order)) { mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, @@ -1083,45 +1167,21 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } if (!mr) - mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, - access_flags); + mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, + page_shift, access_flags); if (IS_ERR(mr)) { err = PTR_ERR(mr); goto error; } - mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); mr->umem = umem; - mr->npages = npages; - atomic_add(npages, &dev->mdev->priv.reg_pages); - mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.rkey = mr->mmr.key; + set_mr_fileds(dev, mr, npages, length, access_flags); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (umem->odp_data) { - /* - * This barrier prevents the compiler from moving the - * setting of umem->odp_data->private to point to our - * MR, before reg_umr finished, to ensure that the MR - * initialization have finished before starting to - * handle invalidations. - */ - smp_wmb(); - mr->umem->odp_data->private = mr; - /* - * Make sure we will see the new - * umem->odp_data->private value in the invalidation - * routines, before we can get page faults on the - * MR. Page faults can happen once we put the MR in - * the tree, below this line. Without the barrier, - * there can be a fault handling and an invalidation - * before umem->odp_data->private == mr is visible to - * the invalidation handler. - */ - smp_wmb(); - } + update_odp_mr(mr); #endif return &mr->ibmr; @@ -1135,15 +1195,15 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; - struct mlx5_umr_wr umrwr; + struct mlx5_umr_wr umrwr = {}; struct ib_send_wr *bad; int err; - memset(&umrwr.wr, 0, sizeof(umrwr)); - umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; - prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key); - mlx5_ib_init_umr_context(&umr_context); + + umrwr.wr.wr_cqe = &umr_context.cqe; + prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key); + down(&umrc->sem); err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { @@ -1165,6 +1225,167 @@ error: return err; } +static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr, + u64 length, int npages, int page_shift, int order, + int access_flags, int flags) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct device *ddev = dev->ib_dev.dma_device; + struct mlx5_ib_umr_context umr_context; + struct ib_send_wr *bad; + struct mlx5_umr_wr umrwr = {}; + struct ib_sge sg; + struct umr_common *umrc = &dev->umrc; + dma_addr_t dma = 0; + __be64 *mr_pas = NULL; + int size; + int err; + + mlx5_ib_init_umr_context(&umr_context); + + umrwr.wr.wr_cqe = &umr_context.cqe; + umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; + + if (flags & IB_MR_REREG_TRANS) { + err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size, + &mr_pas, &dma); + if (err) + return err; + + umrwr.target.virt_addr = virt_addr; + umrwr.length = length; + umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + } + + prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, + page_shift); + + if (flags & IB_MR_REREG_PD) { + umrwr.pd = pd; + umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD; + } + + if (flags & IB_MR_REREG_ACCESS) { + umrwr.access_flags = access_flags; + umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS; + } + + /* post send request to UMR QP */ + down(&umrc->sem); + err = ib_post_send(umrc->qp, &umrwr.wr, &bad); + + if (err) { + mlx5_ib_warn(dev, "post send failed, err %d\n", err); + } else { + wait_for_completion(&umr_context.done); + if (umr_context.status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "reg umr failed (%u)\n", + umr_context.status); + err = -EFAULT; + } + } + + up(&umrc->sem); + if (flags & IB_MR_REREG_TRANS) { + dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); + kfree(mr_pas); + } + return err; +} + +int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, int new_access_flags, + struct ib_pd *new_pd, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); + struct mlx5_ib_mr *mr = to_mmr(ib_mr); + struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; + int access_flags = flags & IB_MR_REREG_ACCESS ? + new_access_flags : + mr->access_flags; + u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address; + u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length; + int page_shift = 0; + int npages = 0; + int ncont = 0; + int order = 0; + int err; + + mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, virt_addr, length, access_flags); + + if (flags != IB_MR_REREG_PD) { + /* + * Replace umem. This needs to be done whether or not UMR is + * used. + */ + flags |= IB_MR_REREG_TRANS; + ib_umem_release(mr->umem); + mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages, + &page_shift, &ncont, &order); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + mr->umem = NULL; + return err; + } + } + + if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { + /* + * UMR can't be used - MKey needs to be replaced. + */ + if (mr->umred) { + err = unreg_umr(dev, mr); + if (err) + mlx5_ib_warn(dev, "Failed to unregister MR\n"); + } else { + err = destroy_mkey(dev, mr); + if (err) + mlx5_ib_warn(dev, "Failed to destroy MKey\n"); + } + if (err) + return err; + + mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, + page_shift, access_flags); + + if (IS_ERR(mr)) + return PTR_ERR(mr); + + mr->umred = 0; + } else { + /* + * Send a UMR WQE + */ + err = rereg_umr(pd, mr, addr, len, npages, page_shift, + order, access_flags, flags); + if (err) { + mlx5_ib_warn(dev, "Failed to rereg UMR\n"); + return err; + } + } + + if (flags & IB_MR_REREG_PD) { + ib_mr->pd = pd; + mr->mmkey.pd = to_mpd(pd)->pdn; + } + + if (flags & IB_MR_REREG_ACCESS) + mr->access_flags = access_flags; + + if (flags & IB_MR_REREG_TRANS) { + atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); + set_mr_fileds(dev, mr, npages, len, access_flags); + mr->mmkey.iova = addr; + mr->mmkey.size = len; + } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + update_odp_mr(mr); +#endif + + return 0; +} + static int mlx5_alloc_priv_descs(struct ib_device *device, struct mlx5_ib_mr *mr, @@ -1236,7 +1457,7 @@ static int clean_mr(struct mlx5_ib_mr *mr) err = destroy_mkey(dev, mr); if (err) { mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", - mr->mmr.key, err); + mr->mmkey.key, err); return err; } } else { @@ -1300,8 +1521,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_create_mkey_mbox_in *in; struct mlx5_ib_mr *mr; - int access_mode, err; - int ndescs = roundup(max_num_sg, 4); + int ndescs = ALIGN(max_num_sg, 4); + int err; mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) @@ -1319,7 +1540,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); if (mr_type == IB_MR_TYPE_MEM_REG) { - access_mode = MLX5_ACCESS_MODE_MTT; + mr->access_mode = MLX5_ACCESS_MODE_MTT; in->seg.log2_page_size = PAGE_SHIFT; err = mlx5_alloc_priv_descs(pd->device, mr, @@ -1329,6 +1550,15 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, mr->desc_size = sizeof(u64); mr->max_descs = ndescs; + } else if (mr_type == IB_MR_TYPE_SG_GAPS) { + mr->access_mode = MLX5_ACCESS_MODE_KLM; + + err = mlx5_alloc_priv_descs(pd->device, mr, + ndescs, sizeof(struct mlx5_klm)); + if (err) + goto err_free_in; + mr->desc_size = sizeof(struct mlx5_klm); + mr->max_descs = ndescs; } else if (mr_type == IB_MR_TYPE_SIGNATURE) { u32 psv_index[2]; @@ -1347,7 +1577,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, if (err) goto err_free_sig; - access_mode = MLX5_ACCESS_MODE_KLM; + mr->access_mode = MLX5_ACCESS_MODE_KLM; mr->sig->psv_memory.psv_idx = psv_index[0]; mr->sig->psv_wire.psv_idx = psv_index[1]; @@ -1361,14 +1591,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, goto err_free_in; } - in->seg.flags = MLX5_PERM_UMR_EN | access_mode; - err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), + in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode; + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL, NULL); if (err) goto err_destroy_psv; - mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.rkey = mr->mmr.key; + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; mr->umem = NULL; kfree(in); @@ -1395,6 +1625,88 @@ err_free: return ERR_PTR(err); } +struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in = NULL; + struct mlx5_ib_mw *mw = NULL; + int ndescs; + int err; + struct mlx5_ib_alloc_mw req = {}; + struct { + __u32 comp_mask; + __u32 response_length; + } resp = {}; + + err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); + if (err) + return ERR_PTR(err); + + if (req.comp_mask || req.reserved1 || req.reserved2) + return ERR_PTR(-EOPNOTSUPP); + + if (udata->inlen > sizeof(req) && + !ib_is_udata_cleared(udata, sizeof(req), + udata->inlen - sizeof(req))) + return ERR_PTR(-EOPNOTSUPP); + + ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); + + mw = kzalloc(sizeof(*mw), GFP_KERNEL); + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!mw || !in) { + err = -ENOMEM; + goto free; + } + + in->seg.status = MLX5_MKEY_STATUS_FREE; + in->seg.xlt_oct_size = cpu_to_be32(ndescs); + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM | + MLX5_PERM_LOCAL_READ; + if (type == IB_MW_TYPE_2) + in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + + err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in), + NULL, NULL, NULL); + if (err) + goto free; + + mw->ibmw.rkey = mw->mmkey.key; + + resp.response_length = min(offsetof(typeof(resp), response_length) + + sizeof(resp.response_length), udata->outlen); + if (resp.response_length) { + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) { + mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); + goto free; + } + } + + kfree(in); + return &mw->ibmw; + +free: + kfree(mw); + kfree(in); + return ERR_PTR(err); +} + +int mlx5_ib_dealloc_mw(struct ib_mw *mw) +{ + struct mlx5_ib_mw *mmw = to_mmw(mw); + int err; + + err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, + &mmw->mmkey); + if (!err) + kfree(mmw); + return err; +} + int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status) { @@ -1436,6 +1748,32 @@ done: return ret; } +static int +mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, + struct scatterlist *sgl, + unsigned short sg_nents) +{ + struct scatterlist *sg = sgl; + struct mlx5_klm *klms = mr->descs; + u32 lkey = mr->ibmr.pd->local_dma_lkey; + int i; + + mr->ibmr.iova = sg_dma_address(sg); + mr->ibmr.length = 0; + mr->ndescs = sg_nents; + + for_each_sg(sgl, sg, sg_nents, i) { + if (unlikely(i > mr->max_descs)) + break; + klms[i].va = cpu_to_be64(sg_dma_address(sg)); + klms[i].bcount = cpu_to_be32(sg_dma_len(sg)); + klms[i].key = cpu_to_be32(lkey); + mr->ibmr.length += sg_dma_len(sg); + } + + return i; +} + static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) { struct mlx5_ib_mr *mr = to_mmr(ibmr); @@ -1463,7 +1801,10 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, mr->desc_size * mr->max_descs, DMA_TO_DEVICE); - n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); + if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + n = mlx5_ib_sg_to_klms(mr, sg, sg_nents); + else + n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, mr->desc_size * mr->max_descs, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index b8d76361a48d..34e79e709c67 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -142,13 +142,13 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev, u32 key) { u32 base_key = mlx5_base_mkey(key); - struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key); - struct mlx5_ib_mr *mr = container_of(mmr, struct mlx5_ib_mr, mmr); + struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key); + struct mlx5_ib_mr *mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!mmr || mmr->key != key || !mr->live) + if (!mmkey || mmkey->key != key || !mr->live) return NULL; - return container_of(mmr, struct mlx5_ib_mr, mmr); + return container_of(mmkey, struct mlx5_ib_mr, mmkey); } static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp, @@ -232,7 +232,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp, io_virt += pfault->mpfault.bytes_committed; bcnt -= pfault->mpfault.bytes_committed; - start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT; + start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT; if (mr->umem->writable) access_mask |= ODP_WRITE_ALLOWED_BIT; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 34cb8e87c7b8..8dee8bc1e0fe 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -58,6 +58,7 @@ enum { static const u32 mlx5_ib_opcode[] = { [IB_WR_SEND] = MLX5_OPCODE_SEND, + [IB_WR_LSO] = MLX5_OPCODE_LSO, [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, @@ -72,6 +73,9 @@ static const u32 mlx5_ib_opcode[] = { [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, }; +struct mlx5_wqe_eth_pad { + u8 rsvd0[16]; +}; static int is_qp0(enum ib_qp_type qp_type) { @@ -260,11 +264,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, return 0; } -static int sq_overhead(enum ib_qp_type qp_type) +static int sq_overhead(struct ib_qp_init_attr *attr) { int size = 0; - switch (qp_type) { + switch (attr->qp_type) { case IB_QPT_XRC_INI: size += sizeof(struct mlx5_wqe_xrc_seg); /* fall through */ @@ -287,8 +291,12 @@ static int sq_overhead(enum ib_qp_type qp_type) break; case IB_QPT_UD: + if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) + size += sizeof(struct mlx5_wqe_eth_pad) + + sizeof(struct mlx5_wqe_eth_seg); + /* fall through */ case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX5_IB_QPT_HW_GSI: size += sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_datagram_seg); break; @@ -311,7 +319,7 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) int inl_size = 0; int size; - size = sq_overhead(attr->qp_type); + size = sq_overhead(attr); if (size < 0) return size; @@ -348,8 +356,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, return -EINVAL; } - qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) - - sizeof(struct mlx5_wqe_inline_seg); + qp->max_inline_data = wqe_size - sq_overhead(attr) - + sizeof(struct mlx5_wqe_inline_seg); attr->cap.max_inline_data = qp->max_inline_data; if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) @@ -590,7 +598,7 @@ static int to_mlx5_st(enum ib_qp_type type) case IB_QPT_XRC_INI: case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC; case IB_QPT_SMI: return MLX5_QP_ST_QP0; - case IB_QPT_GSI: return MLX5_QP_ST_QP1; + case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1; case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; case IB_QPT_RAW_PACKET: case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; @@ -783,7 +791,10 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, int err; uuari = &dev->mdev->priv.uuari; - if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) + if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | + IB_QP_CREATE_IPOIB_UD_LSO | + mlx5_ib_create_qp_sqpn_qp1())) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) @@ -828,6 +839,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, (*in)->ctx.params1 |= cpu_to_be32(1 << 11); (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); + if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { + (*in)->ctx.deth_sqpn = cpu_to_be32(1); + qp->flags |= MLX5_IB_QP_SQPN_QP1; + } + mlx5_fill_page_array(&qp->buf, (*in)->pas); err = mlx5_db_alloc(dev->mdev, &qp->db); @@ -1228,6 +1244,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV) qp->flags |= MLX5_IB_QP_MANAGED_RECV; } + + if (init_attr->qp_type == IB_QPT_UD && + (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) + if (!MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { + mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n"); + return -EOPNOTSUPP; + } + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; @@ -1271,6 +1295,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, ucmd.sq_wqe_count, max_wqes); return -EINVAL; } + if (init_attr->create_flags & + mlx5_ib_create_qp_sqpn_qp1()) { + mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n"); + return -EINVAL; + } err = create_user_qp(dev, pd, qp, udata, init_attr, &in, &resp, &inlen, base); if (err) @@ -1385,6 +1414,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, /* 0xffffff means we ask to work with cqe version 0 */ MLX5_SET(qpc, qpc, user_index, uidx); } + /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */ + if (init_attr->qp_type == IB_QPT_UD && + (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) { + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); + qp->flags |= MLX5_IB_QP_LSO; + } if (init_attr->qp_type == IB_QPT_RAW_PACKET) { qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; @@ -1494,7 +1530,7 @@ static void get_cqs(struct mlx5_ib_qp *qp, break; case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX5_IB_QPT_HW_GSI: case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: @@ -1657,7 +1693,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, case IB_QPT_UC: case IB_QPT_UD: case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX5_IB_QPT_HW_GSI: case MLX5_IB_QPT_REG_UMR: qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) @@ -1686,6 +1722,9 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, break; + case IB_QPT_GSI: + return mlx5_ib_gsi_create_qp(pd, init_attr); + case IB_QPT_RAW_IPV6: case IB_QPT_RAW_ETHERTYPE: case IB_QPT_MAX: @@ -1704,6 +1743,9 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp) struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); + if (unlikely(qp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_destroy_qp(qp); + destroy_qp_common(dev, mqp); kfree(mqp); @@ -2161,8 +2203,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, context = &in->ctx; err = to_mlx5_st(ibqp->qp_type); - if (err < 0) + if (err < 0) { + mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type); goto out; + } context->flags = cpu_to_be32(err << 16); @@ -2182,7 +2226,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } } - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { + if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; } else if (ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { @@ -2284,6 +2328,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->sq_crq_size |= cpu_to_be16(1 << 4); + if (qp->flags & MLX5_IB_QP_SQPN_QP1) + context->deth_sqpn = cpu_to_be32(1); mlx5_cur = to_mlx5_state(cur_state); mlx5_new = to_mlx5_state(new_state); @@ -2363,11 +2409,18 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); + enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; int err = -EINVAL; int port; enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); + + qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? + IB_QPT_GSI : ibqp->qp_type; + mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; @@ -2378,32 +2431,46 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port); } - if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && - !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, - ll)) + if (qp_type != MLX5_IB_QPT_REG_UMR && + !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) { + mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", + cur_state, new_state, ibqp->qp_type, attr_mask); goto out; + } if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || - attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) + attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) { + mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n", + attr->port_num, dev->num_ports); goto out; + } if (attr_mask & IB_QP_PKEY_INDEX) { port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; if (attr->pkey_index >= - dev->mdev->port_caps[port - 1].pkey_table_len) + dev->mdev->port_caps[port - 1].pkey_table_len) { + mlx5_ib_dbg(dev, "invalid pkey index %d\n", + attr->pkey_index); goto out; + } } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) { + mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", + attr->max_rd_atomic); goto out; + } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) { + mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", + attr->max_dest_rd_atomic); goto out; + } if (cur_state == new_state && cur_state == IB_QPS_RESET) { err = 0; @@ -2442,6 +2509,59 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, rseg->reserved = 0; } +static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, + struct ib_send_wr *wr, void *qend, + struct mlx5_ib_qp *qp, int *size) +{ + void *seg = eseg; + + memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); + + if (wr->send_flags & IB_SEND_IP_CSUM) + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | + MLX5_ETH_WQE_L4_CSUM; + + seg += sizeof(struct mlx5_wqe_eth_seg); + *size += sizeof(struct mlx5_wqe_eth_seg) / 16; + + if (wr->opcode == IB_WR_LSO) { + struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); + int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start); + u64 left, leftlen, copysz; + void *pdata = ud_wr->header; + + left = ud_wr->hlen; + eseg->mss = cpu_to_be16(ud_wr->mss); + eseg->inline_hdr_sz = cpu_to_be16(left); + + /* + * check if there is space till the end of queue, if yes, + * copy all in one shot, otherwise copy till the end of queue, + * rollback and than the copy the left + */ + leftlen = qend - (void *)eseg->inline_hdr_start; + copysz = min_t(u64, leftlen, left); + + memcpy(seg - size_of_inl_hdr_start, pdata, copysz); + + if (likely(copysz > size_of_inl_hdr_start)) { + seg += ALIGN(copysz - size_of_inl_hdr_start, 16); + *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16; + } + + if (unlikely(copysz < left)) { /* the last wqe in the queue */ + seg = mlx5_get_send_wqe(qp, 0); + left -= copysz; + pdata += copysz; + memcpy(seg, pdata, left); + seg += ALIGN(left, 16); + *size += ALIGN(left, 16) / 16; + } + } + + return seg; +} + static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { @@ -2509,6 +2629,11 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, int ndescs = mr->ndescs; memset(umr, 0, sizeof(*umr)); + + if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + /* KLMs take twice the size of MTTs */ + ndescs *= 2; + umr->flags = MLX5_UMR_CHECK_NOT_FREE; umr->klm_octowords = get_klm_octo(ndescs); umr->mkey_mask = frwr_mkey_mask(); @@ -2558,6 +2683,44 @@ static __be64 get_umr_update_mtt_mask(void) return cpu_to_be64(result); } +static __be64 get_umr_update_translation_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_access_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_A | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_pd_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_PD | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr) { @@ -2576,9 +2739,15 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, umr->mkey_mask = get_umr_update_mtt_mask(); umr->bsf_octowords = get_klm_octo(umrwr->target.offset); umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; - } else { - umr->mkey_mask = get_umr_reg_mr_mask(); } + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) + umr->mkey_mask |= get_umr_update_translation_mask(); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_ACCESS) + umr->mkey_mask |= get_umr_update_access_mask(); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD) + umr->mkey_mask |= get_umr_update_pd_mask(); + if (!umr->mkey_mask) + umr->mkey_mask = get_umr_reg_mr_mask(); } else { umr->mkey_mask = get_umr_unreg_mr_mask(); } @@ -2603,13 +2772,19 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, int ndescs = ALIGN(mr->ndescs, 8) >> 1; memset(seg, 0, sizeof(*seg)); - seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT; + + if (mr->access_mode == MLX5_ACCESS_MODE_MTT) + seg->log2_page_size = ilog2(mr->ibmr.page_size); + else if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + /* KLMs take twice the size of MTTs */ + ndescs *= 2; + + seg->flags = get_umr_flags(access) | mr->access_mode; seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); seg->start_addr = cpu_to_be64(mr->ibmr.iova); seg->len = cpu_to_be64(mr->ibmr.length); seg->xlt_oct_size = cpu_to_be32(ndescs); - seg->log2_page_size = ilog2(mr->ibmr.page_size); } static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) @@ -2630,7 +2805,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w seg->flags = convert_access(umrwr->access_flags); if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) { - seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); + if (umrwr->pd) + seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); seg->start_addr = cpu_to_be64(umrwr->target.virt_addr); } seg->len = cpu_to_be64(umrwr->length); @@ -3196,13 +3372,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); - struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; struct mlx5_wqe_xrc_seg *xrc; - struct mlx5_bf *bf = qp->bf; + struct mlx5_bf *bf; int uninitialized_var(size); - void *qend = qp->sq.qend; + void *qend; unsigned long flags; unsigned idx; int err = 0; @@ -3214,6 +3390,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, u8 next_fence = 0; u8 fence; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); + + qp = to_mqp(ibqp); + bf = qp->bf; + qend = qp->sq.qend; + spin_lock_irqsave(&qp->sq.lock, flags); for (nreq = 0; wr; nreq++, wr = wr->next) { @@ -3373,16 +3556,37 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } break; - case IB_QPT_UD: case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX5_IB_QPT_HW_GSI: set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); size += sizeof(struct mlx5_wqe_datagram_seg) / 16; if (unlikely((seg == qend))) seg = mlx5_get_send_wqe(qp, 0); break; + case IB_QPT_UD: + set_datagram_seg(seg, wr); + seg += sizeof(struct mlx5_wqe_datagram_seg); + size += sizeof(struct mlx5_wqe_datagram_seg) / 16; + + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + + /* handle qp that supports ud offload */ + if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { + struct mlx5_wqe_eth_pad *pad; + + pad = seg; + memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); + seg += sizeof(struct mlx5_wqe_eth_pad); + size += sizeof(struct mlx5_wqe_eth_pad) / 16; + seg = set_eth_seg(seg, wr, qend, qp, &size); + + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + } + break; case MLX5_IB_QPT_REG_UMR: if (wr->opcode != MLX5_IB_WR_UMR) { err = -EINVAL; @@ -3502,6 +3706,9 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int ind; int i; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); + spin_lock_irqsave(&qp->rq.lock, flags); ind = qp->rq.head & (qp->rq.wqe_cnt - 1); @@ -3822,6 +4029,10 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int err = 0; u8 raw_packet_qp_state; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, + qp_init_attr); + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* * Wait for any outstanding page faults, in case the user frees memory @@ -3874,6 +4085,8 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND; if (qp->flags & MLX5_IB_QP_MANAGED_RECV) qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; + if (qp->flags & MLX5_IB_QP_SQPN_QP1) + qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1(); qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index b94a55404a59..61bc308bb802 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -152,6 +152,13 @@ struct mlx5_ib_create_qp_resp { __u32 uuar_index; }; +struct mlx5_ib_alloc_mw { + __u32 comp_mask; + __u8 num_klms; + __u8 reserved1; + __u16 reserved2; +}; + static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, struct mlx5_ib_create_qp *ucmd, int inlen, diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig index 846dc97cf260..7964eba8e7ed 100644 --- a/drivers/infiniband/hw/nes/Kconfig +++ b/drivers/infiniband/hw/nes/Kconfig @@ -2,7 +2,6 @@ config INFINIBAND_NES tristate "NetEffect RNIC Driver" depends on PCI && INET && INFINIBAND select LIBCRC32C - select INET_LRO ---help--- This is the RDMA Network Interface Card (RNIC) driver for NetEffect Ethernet Cluster Server Adapters. diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 9f9d5c563a61..35cbb17bec12 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -111,17 +111,6 @@ static struct pci_device_id nes_pci_table[] = { MODULE_DEVICE_TABLE(pci, nes_pci_table); -/* registered nes netlink callbacks */ -static struct ibnl_client_cbs nes_nl_cb_table[] = { - [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, - [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, - [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, - [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, - [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, - [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, - [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} -}; - static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *); static int nes_net_event(struct notifier_block *, unsigned long, void *); static int nes_notifiers_registered; @@ -682,17 +671,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) } nes_notifiers_registered++; - if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS, nes_nl_cb_table)) - printk(KERN_ERR PFX "%s[%u]: Failed to add netlink callback\n", - __func__, __LINE__); - - ret = iwpm_init(RDMA_NL_NES); - if (ret) { - printk(KERN_ERR PFX "%s: port mapper initialization failed\n", - pci_name(pcidev)); - goto bail7; - } - INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status); /* Initialize network devices */ @@ -731,7 +709,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n", nesdev->netdev_count, nesdev->nesadapter->netdev_count); - ibnl_remove_client(RDMA_NL_NES); nes_notifiers_registered--; if (nes_notifiers_registered == 0) { @@ -795,8 +772,6 @@ static void nes_remove(struct pci_dev *pcidev) nesdev->nesadapter->netdev_count--; } } - ibnl_remove_client(RDMA_NL_NES); - iwpm_exit(RDMA_NL_NES); nes_notifiers_registered--; if (nes_notifiers_registered == 0) { diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index cb9f0f27308d..7f0aa23aef9d 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -482,11 +482,11 @@ static void form_cm_frame(struct sk_buff *skb, iph->ttl = 0x40; iph->protocol = 0x06; /* IPPROTO_TCP */ - iph->saddr = htonl(cm_node->mapped_loc_addr); - iph->daddr = htonl(cm_node->mapped_rem_addr); + iph->saddr = htonl(cm_node->loc_addr); + iph->daddr = htonl(cm_node->rem_addr); - tcph->source = htons(cm_node->mapped_loc_port); - tcph->dest = htons(cm_node->mapped_rem_port); + tcph->source = htons(cm_node->loc_port); + tcph->dest = htons(cm_node->rem_port); tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num); if (flags & SET_ACK) { @@ -525,125 +525,6 @@ static void form_cm_frame(struct sk_buff *skb, cm_packets_created++; } -/* - * nes_create_sockaddr - Record ip addr and tcp port in a sockaddr struct - */ -static void nes_create_sockaddr(__be32 ip_addr, __be16 port, - struct sockaddr_storage *addr) -{ - struct sockaddr_in *nes_sockaddr = (struct sockaddr_in *)addr; - nes_sockaddr->sin_family = AF_INET; - memcpy(&nes_sockaddr->sin_addr.s_addr, &ip_addr, sizeof(__be32)); - nes_sockaddr->sin_port = port; -} - -/* - * nes_create_mapinfo - Create a mapinfo object in the port mapper data base - */ -static int nes_create_mapinfo(struct nes_cm_info *cm_info) -{ - struct sockaddr_storage local_sockaddr; - struct sockaddr_storage mapped_sockaddr; - - nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port), - &local_sockaddr); - nes_create_sockaddr(htonl(cm_info->mapped_loc_addr), - htons(cm_info->mapped_loc_port), &mapped_sockaddr); - - return iwpm_create_mapinfo(&local_sockaddr, - &mapped_sockaddr, RDMA_NL_NES); -} - -/* - * nes_remove_mapinfo - Remove a mapinfo object from the port mapper data base - * and send a remove mapping op message to - * the userspace port mapper - */ -static int nes_remove_mapinfo(u32 loc_addr, u16 loc_port, - u32 mapped_loc_addr, u16 mapped_loc_port) -{ - struct sockaddr_storage local_sockaddr; - struct sockaddr_storage mapped_sockaddr; - - nes_create_sockaddr(htonl(loc_addr), htons(loc_port), &local_sockaddr); - nes_create_sockaddr(htonl(mapped_loc_addr), htons(mapped_loc_port), - &mapped_sockaddr); - - iwpm_remove_mapinfo(&local_sockaddr, &mapped_sockaddr); - return iwpm_remove_mapping(&local_sockaddr, RDMA_NL_NES); -} - -/* - * nes_form_pm_msg - Form a port mapper message with mapping info - */ -static void nes_form_pm_msg(struct nes_cm_info *cm_info, - struct iwpm_sa_data *pm_msg) -{ - nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port), - &pm_msg->loc_addr); - nes_create_sockaddr(htonl(cm_info->rem_addr), htons(cm_info->rem_port), - &pm_msg->rem_addr); -} - -/* - * nes_form_reg_msg - Form a port mapper message with dev info - */ -static void nes_form_reg_msg(struct nes_vnic *nesvnic, - struct iwpm_dev_data *pm_msg) -{ - memcpy(pm_msg->dev_name, nesvnic->nesibdev->ibdev.name, - IWPM_DEVNAME_SIZE); - memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE); -} - -static void record_sockaddr_info(struct sockaddr_storage *addr_info, - nes_addr_t *ip_addr, u16 *port_num) -{ - struct sockaddr_in *in_addr = (struct sockaddr_in *)addr_info; - - if (in_addr->sin_family == AF_INET) { - *ip_addr = ntohl(in_addr->sin_addr.s_addr); - *port_num = ntohs(in_addr->sin_port); - } -} - -/* - * nes_record_pm_msg - Save the received mapping info - */ -static void nes_record_pm_msg(struct nes_cm_info *cm_info, - struct iwpm_sa_data *pm_msg) -{ - record_sockaddr_info(&pm_msg->mapped_loc_addr, - &cm_info->mapped_loc_addr, &cm_info->mapped_loc_port); - - record_sockaddr_info(&pm_msg->mapped_rem_addr, - &cm_info->mapped_rem_addr, &cm_info->mapped_rem_port); -} - -/* - * nes_get_reminfo - Get the address info of the remote connecting peer - */ -static int nes_get_remote_addr(struct nes_cm_node *cm_node) -{ - struct sockaddr_storage mapped_loc_addr, mapped_rem_addr; - struct sockaddr_storage remote_addr; - int ret; - - nes_create_sockaddr(htonl(cm_node->mapped_loc_addr), - htons(cm_node->mapped_loc_port), &mapped_loc_addr); - nes_create_sockaddr(htonl(cm_node->mapped_rem_addr), - htons(cm_node->mapped_rem_port), &mapped_rem_addr); - - ret = iwpm_get_remote_info(&mapped_loc_addr, &mapped_rem_addr, - &remote_addr, RDMA_NL_NES); - if (ret) - nes_debug(NES_DBG_CM, "Unable to find remote peer address info\n"); - else - record_sockaddr_info(&remote_addr, &cm_node->rem_addr, - &cm_node->rem_port); - return ret; -} - /** * print_core - dump a cm core */ @@ -1266,11 +1147,10 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, loc_addr, loc_port, cm_node->rem_addr, cm_node->rem_port, rem_addr, rem_port); - if ((cm_node->mapped_loc_addr == loc_addr) && - (cm_node->mapped_loc_port == loc_port) && - (cm_node->mapped_rem_addr == rem_addr) && - (cm_node->mapped_rem_port == rem_port)) { - + if ((cm_node->loc_addr == loc_addr) && + (cm_node->loc_port == loc_port) && + (cm_node->rem_addr == rem_addr) && + (cm_node->rem_port == rem_port)) { add_ref_cm_node(cm_node); spin_unlock_irqrestore(&cm_core->ht_lock, flags); return cm_node; @@ -1287,8 +1167,8 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, * find_listener - find a cm node listening on this addr-port pair */ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, - nes_addr_t dst_addr, u16 dst_port, - enum nes_cm_listener_state listener_state, int local) + nes_addr_t dst_addr, u16 dst_port, + enum nes_cm_listener_state listener_state) { unsigned long flags; struct nes_cm_listener *listen_node; @@ -1298,13 +1178,9 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, /* walk list and find cm_node associated with this session ID */ spin_lock_irqsave(&cm_core->listen_list_lock, flags); list_for_each_entry(listen_node, &cm_core->listen_list.list, list) { - if (local) { - listen_addr = listen_node->loc_addr; - listen_port = listen_node->loc_port; - } else { - listen_addr = listen_node->mapped_loc_addr; - listen_port = listen_node->mapped_loc_port; - } + listen_addr = listen_node->loc_addr; + listen_port = listen_node->loc_port; + /* compare node pair, return node handle if a match */ if (((listen_addr == dst_addr) || listen_addr == 0x00000000) && @@ -1443,17 +1319,13 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, if (listener->nesvnic) { nes_manage_apbvt(listener->nesvnic, - listener->mapped_loc_port, + listener->loc_port, PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); - nes_remove_mapinfo(listener->loc_addr, - listener->loc_port, - listener->mapped_loc_addr, - listener->mapped_loc_port); nes_debug(NES_DBG_NLMSG, - "Delete APBVT mapped_loc_port = %04X\n", - listener->mapped_loc_port); + "Delete APBVT loc_port = %04X\n", + listener->loc_port); } nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); @@ -1602,11 +1474,6 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->rem_addr = cm_info->rem_addr; cm_node->rem_port = cm_info->rem_port; - cm_node->mapped_loc_addr = cm_info->mapped_loc_addr; - cm_node->mapped_rem_addr = cm_info->mapped_rem_addr; - cm_node->mapped_loc_port = cm_info->mapped_loc_port; - cm_node->mapped_rem_port = cm_info->mapped_rem_port; - cm_node->mpa_frame_rev = mpa_version; cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; cm_node->mpav2_ird_ord = 0; @@ -1655,10 +1522,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->loopbackpartner = NULL; /* get the mac addr for the remote node */ - oldarpindex = nes_arp_table(nesdev, cm_node->mapped_rem_addr, - NULL, NES_ARP_RESOLVE); - arpindex = nes_addr_resolve_neigh(nesvnic, - cm_node->mapped_rem_addr, oldarpindex); + oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, + NULL, NES_ARP_RESOLVE); + arpindex = nes_addr_resolve_neigh(nesvnic, cm_node->rem_addr, + oldarpindex); if (arpindex < 0) { kfree(cm_node); return NULL; @@ -1720,14 +1587,12 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0); } else { if (cm_node->apbvt_set && cm_node->nesvnic) { - nes_manage_apbvt(cm_node->nesvnic, cm_node->mapped_loc_port, + nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port, PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); } - nes_debug(NES_DBG_NLMSG, "Delete APBVT mapped_loc_port = %04X\n", - cm_node->mapped_loc_port); - nes_remove_mapinfo(cm_node->loc_addr, cm_node->loc_port, - cm_node->mapped_loc_addr, cm_node->mapped_loc_port); + nes_debug(NES_DBG_NLMSG, "Delete APBVT loc_port = %04X\n", + cm_node->loc_port); } atomic_dec(&cm_core->node_cnt); @@ -2184,7 +2049,6 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, cm_node->state = NES_CM_STATE_ESTABLISHED; if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; - nes_get_remote_addr(cm_node); handle_rcv_mpa(cm_node, skb); } else { /* rcvd ACK only */ dev_kfree_skb_any(skb); @@ -2399,17 +2263,14 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) { struct nes_cm_listener *listener; - struct iwpm_dev_data pm_reg_msg; - struct iwpm_sa_data pm_msg; unsigned long flags; - int iwpm_err = 0; nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n", cm_info->loc_addr, cm_info->loc_port); /* cannot have multiple matching listeners */ listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port, - NES_CM_LISTENER_EITHER_STATE, 1); + NES_CM_LISTENER_EITHER_STATE); if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) { /* find automatically incs ref count ??? */ @@ -2419,22 +2280,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, } if (!listener) { - nes_form_reg_msg(nesvnic, &pm_reg_msg); - iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES); - if (iwpm_err) { - nes_debug(NES_DBG_NLMSG, - "Port Mapper reg pid fail (err = %d).\n", iwpm_err); - } - if (iwpm_valid_pid() && !iwpm_err) { - nes_form_pm_msg(cm_info, &pm_msg); - iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_NES); - if (iwpm_err) - nes_debug(NES_DBG_NLMSG, - "Port Mapper query fail (err = %d).\n", iwpm_err); - else - nes_record_pm_msg(cm_info, &pm_msg); - } - /* create a CM listen node (1/2 node to compare incoming traffic to) */ listener = kzalloc(sizeof(*listener), GFP_ATOMIC); if (!listener) { @@ -2444,8 +2289,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, listener->loc_addr = cm_info->loc_addr; listener->loc_port = cm_info->loc_port; - listener->mapped_loc_addr = cm_info->mapped_loc_addr; - listener->mapped_loc_port = cm_info->mapped_loc_port; listener->reused_node = 0; atomic_set(&listener->ref_count, 1); @@ -2507,18 +2350,18 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, if (cm_info->loc_addr == cm_info->rem_addr) { loopbackremotelistener = find_listener(cm_core, - cm_node->mapped_loc_addr, cm_node->mapped_rem_port, - NES_CM_LISTENER_ACTIVE_STATE, 0); + cm_node->loc_addr, cm_node->rem_port, + NES_CM_LISTENER_ACTIVE_STATE); if (loopbackremotelistener == NULL) { create_event(cm_node, NES_CM_EVENT_ABORTED); } else { loopback_cm_info = *cm_info; loopback_cm_info.loc_port = cm_info->rem_port; loopback_cm_info.rem_port = cm_info->loc_port; - loopback_cm_info.mapped_loc_port = - cm_info->mapped_rem_port; - loopback_cm_info.mapped_rem_port = - cm_info->mapped_loc_port; + loopback_cm_info.loc_port = + cm_info->rem_port; + loopback_cm_info.rem_port = + cm_info->loc_port; loopback_cm_info.cm_id = loopbackremotelistener->cm_id; loopbackremotenode = make_cm_node(cm_core, nesvnic, &loopback_cm_info, loopbackremotelistener); @@ -2747,12 +2590,6 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, nfo.rem_addr = ntohl(iph->saddr); nfo.rem_port = ntohs(tcph->source); - /* If port mapper is available these should be mapped address info */ - nfo.mapped_loc_addr = ntohl(iph->daddr); - nfo.mapped_loc_port = ntohs(tcph->dest); - nfo.mapped_rem_addr = ntohl(iph->saddr); - nfo.mapped_rem_port = ntohs(tcph->source); - tmp_daddr = cpu_to_be32(iph->daddr); tmp_saddr = cpu_to_be32(iph->saddr); @@ -2761,8 +2598,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, do { cm_node = find_node(cm_core, - nfo.mapped_rem_port, nfo.mapped_rem_addr, - nfo.mapped_loc_port, nfo.mapped_loc_addr); + nfo.rem_port, nfo.rem_addr, + nfo.loc_port, nfo.loc_addr); if (!cm_node) { /* Only type of packet accepted are for */ @@ -2771,9 +2608,9 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, skb_handled = 0; break; } - listener = find_listener(cm_core, nfo.mapped_loc_addr, - nfo.mapped_loc_port, - NES_CM_LISTENER_ACTIVE_STATE, 0); + listener = find_listener(cm_core, nfo.loc_addr, + nfo.loc_port, + NES_CM_LISTENER_ACTIVE_STATE); if (!listener) { nfo.cm_id = NULL; nfo.conn_type = 0; @@ -2856,12 +2693,22 @@ static struct nes_cm_core *nes_cm_alloc_core(void) nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n"); cm_core->event_wq = create_singlethread_workqueue("nesewq"); + if (!cm_core->event_wq) + goto out_free_cmcore; cm_core->post_event = nes_cm_post_event; nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n"); cm_core->disconn_wq = create_singlethread_workqueue("nesdwq"); + if (!cm_core->disconn_wq) + goto out_free_wq; print_core(cm_core); return cm_core; + +out_free_wq: + destroy_workqueue(cm_core->event_wq); +out_free_cmcore: + kfree(cm_core); + return NULL; } @@ -3121,8 +2968,8 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) atomic_inc(&cm_disconnects); cm_event.event = IW_CM_EVENT_DISCONNECT; cm_event.status = disconn_status; - cm_event.local_addr = cm_id->local_addr; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.local_addr = cm_id->m_local_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; @@ -3148,8 +2995,8 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) cm_event.event = IW_CM_EVENT_CLOSE; cm_event.status = 0; cm_event.provider_data = cm_id->provider_data; - cm_event.local_addr = cm_id->local_addr; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.local_addr = cm_id->m_local_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; @@ -3240,8 +3087,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) u8 *start_ptr = &start_addr; u8 **start_buff = &start_ptr; u16 buff_len = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; ibqp = nes_get_qp(cm_id->device, conn_param->qpn); if (!ibqp) @@ -3378,11 +3225,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nes_cm_init_tsa_conn(nesqp, cm_node); nesqp->nesqp_context->tcpPorts[0] = - cpu_to_le16(cm_node->mapped_loc_port); + cpu_to_le16(cm_node->loc_port); nesqp->nesqp_context->tcpPorts[1] = - cpu_to_le16(cm_node->mapped_rem_port); + cpu_to_le16(cm_node->rem_port); - nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr); + nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr); nesqp->nesqp_context->misc2 |= cpu_to_le32( (u32)PCI_FUNC(nesdev->pcidev->devfn) << @@ -3406,9 +3253,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) memset(&nes_quad, 0, sizeof(nes_quad)); nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr); - nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port); - nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port); + nes_quad.SrcIpadr = htonl(cm_node->rem_addr); + nes_quad.TcpPorts[0] = htons(cm_node->rem_port); + nes_quad.TcpPorts[1] = htons(cm_node->loc_port); /* Produce hash key */ crc_value = get_crc_value(&nes_quad); @@ -3437,8 +3284,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_event.event = IW_CM_EVENT_ESTABLISHED; cm_event.status = 0; cm_event.provider_data = (void *)nesqp; - cm_event.local_addr = cm_id->local_addr; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.local_addr = cm_id->m_local_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; cm_event.ird = cm_node->ird_size; @@ -3508,11 +3355,8 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct nes_cm_node *cm_node; struct nes_cm_info cm_info; int apbvt_set = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; - struct iwpm_dev_data pm_reg_msg; - struct iwpm_sa_data pm_msg; - int iwpm_err = 0; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; if (cm_id->remote_addr.ss_family != AF_INET) return -ENOSYS; @@ -3558,37 +3402,13 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_info.cm_id = cm_id; cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; - /* No port mapper available, go with the specified peer information */ - cm_info.mapped_loc_addr = cm_info.loc_addr; - cm_info.mapped_loc_port = cm_info.loc_port; - cm_info.mapped_rem_addr = cm_info.rem_addr; - cm_info.mapped_rem_port = cm_info.rem_port; - - nes_form_reg_msg(nesvnic, &pm_reg_msg); - iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES); - if (iwpm_err) { - nes_debug(NES_DBG_NLMSG, - "Port Mapper reg pid fail (err = %d).\n", iwpm_err); - } - if (iwpm_valid_pid() && !iwpm_err) { - nes_form_pm_msg(&cm_info, &pm_msg); - iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_NES); - if (iwpm_err) - nes_debug(NES_DBG_NLMSG, - "Port Mapper query fail (err = %d).\n", iwpm_err); - else - nes_record_pm_msg(&cm_info, &pm_msg); - } - if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) { - nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port, - PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + nes_manage_apbvt(nesvnic, cm_info.loc_port, + PCI_FUNC(nesdev->pcidev->devfn), + NES_MANAGE_APBVT_ADD); apbvt_set = 1; } - if (nes_create_mapinfo(&cm_info)) - return -ENOMEM; - cm_id->add_ref(cm_id); /* create a connect CM node connection */ @@ -3597,14 +3417,12 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) &cm_info); if (!cm_node) { if (apbvt_set) - nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port, + nes_manage_apbvt(nesvnic, cm_info.loc_port, PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); - nes_debug(NES_DBG_NLMSG, "Delete mapped_loc_port = %04X\n", - cm_info.mapped_loc_port); - nes_remove_mapinfo(cm_info.loc_addr, cm_info.loc_port, - cm_info.mapped_loc_addr, cm_info.mapped_loc_port); + nes_debug(NES_DBG_NLMSG, "Delete loc_port = %04X\n", + cm_info.loc_port); cm_id->rem_ref(cm_id); return -ENOMEM; } @@ -3633,12 +3451,12 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) struct nes_cm_listener *cm_node; struct nes_cm_info cm_info; int err; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n", cm_id, ntohs(laddr->sin_port)); - if (cm_id->local_addr.ss_family != AF_INET) + if (cm_id->m_local_addr.ss_family != AF_INET) return -ENOSYS; nesvnic = to_nesvnic(cm_id->device); if (!nesvnic) @@ -3658,10 +3476,6 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; - /* No port mapper available, go with the specified info */ - cm_info.mapped_loc_addr = cm_info.loc_addr; - cm_info.mapped_loc_port = cm_info.loc_port; - cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info); if (!cm_node) { printk(KERN_ERR "%s[%u] Error returned from listen API call\n", @@ -3673,10 +3487,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) cm_node->tos = cm_id->tos; if (!cm_node->reused_node) { - if (nes_create_mapinfo(&cm_info)) - return -ENOMEM; - - err = nes_manage_apbvt(nesvnic, cm_node->mapped_loc_port, + err = nes_manage_apbvt(nesvnic, cm_node->loc_port, PCI_FUNC(nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); if (err) { @@ -3786,8 +3597,8 @@ static void cm_event_connected(struct nes_cm_event *event) nesvnic = to_nesvnic(nesqp->ibqp.device); nesdev = nesvnic->nesdev; nesadapter = nesdev->nesadapter; - laddr = (struct sockaddr_in *)&cm_id->local_addr; - raddr = (struct sockaddr_in *)&cm_id->remote_addr; + laddr = (struct sockaddr_in *)&cm_id->m_local_addr; + raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; cm_event_laddr = (struct sockaddr_in *)&cm_event.local_addr; if (nesqp->destroyed) @@ -3802,10 +3613,10 @@ static void cm_event_connected(struct nes_cm_event *event) /* set the QP tsa context */ nesqp->nesqp_context->tcpPorts[0] = - cpu_to_le16(cm_node->mapped_loc_port); + cpu_to_le16(cm_node->loc_port); nesqp->nesqp_context->tcpPorts[1] = - cpu_to_le16(cm_node->mapped_rem_port); - nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr); + cpu_to_le16(cm_node->rem_port); + nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr); nesqp->nesqp_context->misc2 |= cpu_to_le32( (u32)PCI_FUNC(nesdev->pcidev->devfn) << @@ -3835,9 +3646,9 @@ static void cm_event_connected(struct nes_cm_event *event) nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr); - nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port); - nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port); + nes_quad.SrcIpadr = htonl(cm_node->rem_addr); + nes_quad.TcpPorts[0] = htons(cm_node->rem_port); + nes_quad.TcpPorts[1] = htons(cm_node->loc_port); /* Produce hash key */ crc_value = get_crc_value(&nes_quad); @@ -3858,14 +3669,14 @@ static void cm_event_connected(struct nes_cm_event *event) cm_event.provider_data = cm_id->provider_data; cm_event_laddr->sin_family = AF_INET; cm_event_laddr->sin_port = laddr->sin_port; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size; cm_event.ird = cm_node->ird_size; cm_event.ord = cm_node->ord_size; - cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr); + cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr); ret = cm_id->event_handler(cm_id, &cm_event); nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); @@ -3913,8 +3724,8 @@ static void cm_event_connect_error(struct nes_cm_event *event) cm_event.event = IW_CM_EVENT_CONNECT_REPLY; cm_event.status = -ECONNRESET; cm_event.provider_data = cm_id->provider_data; - cm_event.local_addr = cm_id->local_addr; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.local_addr = cm_id->m_local_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; @@ -3970,8 +3781,8 @@ static void cm_event_reset(struct nes_cm_event *event) cm_event.event = IW_CM_EVENT_DISCONNECT; cm_event.status = -ECONNRESET; cm_event.provider_data = cm_id->provider_data; - cm_event.local_addr = cm_id->local_addr; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.local_addr = cm_id->m_local_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; @@ -3981,8 +3792,8 @@ static void cm_event_reset(struct nes_cm_event *event) cm_event.event = IW_CM_EVENT_CLOSE; cm_event.status = 0; cm_event.provider_data = cm_id->provider_data; - cm_event.local_addr = cm_id->local_addr; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.local_addr = cm_id->m_local_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node); diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 147c2c884227..d827d03e3941 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -293,8 +293,8 @@ struct nes_cm_listener { struct list_head list; struct nes_cm_core *cm_core; u8 loc_mac[ETH_ALEN]; - nes_addr_t loc_addr, mapped_loc_addr; - u16 loc_port, mapped_loc_port; + nes_addr_t loc_addr; + u16 loc_port; struct iw_cm_id *cm_id; enum nes_cm_conn_type conn_type; atomic_t ref_count; @@ -309,9 +309,7 @@ struct nes_cm_listener { /* per connection node and node state information */ struct nes_cm_node { nes_addr_t loc_addr, rem_addr; - nes_addr_t mapped_loc_addr, mapped_rem_addr; u16 loc_port, rem_port; - u16 mapped_loc_port, mapped_rem_port; u8 loc_mac[ETH_ALEN]; u8 rem_mac[ETH_ALEN]; @@ -368,11 +366,6 @@ struct nes_cm_info { u16 rem_port; nes_addr_t loc_addr; nes_addr_t rem_addr; - u16 mapped_loc_port; - u16 mapped_rem_port; - nes_addr_t mapped_loc_addr; - nes_addr_t mapped_rem_addr; - enum nes_cm_conn_type conn_type; int backlog; }; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 4713dd7ed764..a1c6481d8038 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -35,18 +35,11 @@ #include <linux/moduleparam.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> -#include <linux/ip.h> -#include <linux/tcp.h> #include <linux/if_vlan.h> -#include <linux/inet_lro.h> #include <linux/slab.h> #include "nes.h" -static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; -module_param(nes_lro_max_aggr, uint, 0444); -MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation"); - static int wide_ppm_offset; module_param(wide_ppm_offset, int, 0644); MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm"); @@ -1642,25 +1635,6 @@ static void nes_rq_wqes_timeout(unsigned long parm) } -static int nes_lro_get_skb_hdr(struct sk_buff *skb, void **iphdr, - void **tcph, u64 *hdr_flags, void *priv) -{ - unsigned int ip_len; - struct iphdr *iph; - skb_reset_network_header(skb); - iph = ip_hdr(skb); - if (iph->protocol != IPPROTO_TCP) - return -1; - ip_len = ip_hdrlen(skb); - skb_set_transport_header(skb, ip_len); - *tcph = tcp_hdr(skb); - - *hdr_flags = LRO_IPV4 | LRO_TCP; - *iphdr = iph; - return 0; -} - - /** * nes_init_nic_qp */ @@ -1895,14 +1869,6 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) return -ENOMEM; } - nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr; - nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; - nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; - nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr; - nesvnic->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID; - nesvnic->lro_mgr.dev = netdev; - nesvnic->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY; - nesvnic->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; return 0; } @@ -2809,13 +2775,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) u16 pkt_type; u16 rqes_processed = 0; u8 sq_cqes = 0; - u8 nes_use_lro = 0; head = cq->cq_head; cq_size = cq->cq_size; cq->cqes_pending = 1; - if (nesvnic->netdev->features & NETIF_F_LRO) - nes_use_lro = 1; do { if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) & NES_NIC_CQE_VALID) { @@ -2950,10 +2913,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) __vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag); } - if (nes_use_lro) - lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL); - else - netif_receive_skb(rx_skb); + napi_gro_receive(&nesvnic->napi, rx_skb); skip_rx_indicate0: ; @@ -2984,8 +2944,6 @@ skip_rx_indicate0: } while (1); - if (nes_use_lro) - lro_flush_all(&nesvnic->lro_mgr); if (sq_cqes) { barrier(); /* restart the queue if it had been stopped */ diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index c9080208aad2..1b66ef1e9937 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -33,8 +33,6 @@ #ifndef __NES_HW_H #define __NES_HW_H -#include <linux/inet_lro.h> - #define NES_PHY_TYPE_CX4 1 #define NES_PHY_TYPE_1G 2 #define NES_PHY_TYPE_ARGUS 4 @@ -1049,8 +1047,6 @@ struct nes_hw_tune_timer { #define NES_TIMER_ENABLE_LIMIT 4 #define NES_MAX_LINK_INTERRUPTS 128 #define NES_MAX_LINK_CHECK 200 -#define NES_MAX_LRO_DESCRIPTORS 32 -#define NES_LRO_MAX_AGGR 64 struct nes_adapter { u64 fw_ver; @@ -1263,9 +1259,6 @@ struct nes_vnic { u8 next_qp_nic_index; u8 of_device_registered; u8 rdma_enabled; - u32 lro_max_aggr; - struct net_lro_mgr lro_mgr; - struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS]; struct timer_list event_timer; enum ib_event_type delayed_event; enum ib_event_type last_dispatched_event; diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 6a0bdfa0ce2e..3ea9e055fdd3 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1085,9 +1085,6 @@ static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = { "Free 4Kpbls", "Free 256pbls", "Timer Inits", - "LRO aggregated", - "LRO flushed", - "LRO no_desc", "PAU CreateQPs", "PAU DestroyQPs", }; @@ -1302,9 +1299,6 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev, target_stat_values[++index] = nesadapter->free_4kpbl; target_stat_values[++index] = nesadapter->free_256pbl; target_stat_values[++index] = int_mod_timer_init; - target_stat_values[++index] = nesvnic->lro_mgr.stats.aggregated; - target_stat_values[++index] = nesvnic->lro_mgr.stats.flushed; - target_stat_values[++index] = nesvnic->lro_mgr.stats.no_desc; target_stat_values[++index] = atomic_read(&pau_qps_created); target_stat_values[++index] = atomic_read(&pau_qps_destroyed); } @@ -1709,7 +1703,6 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, netdev->hw_features |= NETIF_F_TSO; netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX; - netdev->hw_features |= NETIF_F_LRO; nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d," " nic_index = %d, logical_port = %d, mac_index = %d.\n", diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 8c4daf7f22ec..fba69a39a7eb 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -56,7 +56,8 @@ static int nes_dereg_mr(struct ib_mr *ib_mr); /** * nes_alloc_mw */ -static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type) +static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type, + struct ib_udata *udata) { struct nes_pd *nespd = to_nespd(ibpd); struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); @@ -3768,6 +3769,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.iwcm->create_listen = nes_create_listen; nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; nesibdev->ibdev.get_port_immutable = nes_port_immutable; + memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name, + sizeof(nesibdev->ibdev.iwcm->ifname)); return nesibdev; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 12503f15fbd6..45bdfa0e3b2b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -114,6 +114,7 @@ struct ocrdma_dev_attr { u8 local_ca_ack_delay; u8 ird; u8 num_ird_pages; + u8 udp_encap; }; struct ocrdma_dma_mem { @@ -356,6 +357,7 @@ struct ocrdma_ah { struct ocrdma_av *av; u16 sgid_index; u32 id; + u8 hdr_type; }; struct ocrdma_qp_hwq_info { @@ -598,4 +600,10 @@ static inline u8 ocrdma_get_ae_link_state(u32 ae_state) return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT); } +static inline bool ocrdma_is_udp_encap_supported(struct ocrdma_dev *dev) +{ + return (dev->attr.udp_encap & OCRDMA_L3_TYPE_IPV4) || + (dev->attr.udp_encap & OCRDMA_L3_TYPE_IPV6); +} + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 3790771f2baa..797362a297b2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -55,18 +55,46 @@ #define OCRDMA_VID_PCP_SHIFT 0xD +static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type) +{ + switch (hdr_type) { + case OCRDMA_L3_TYPE_IB_GRH: + return (u16)0x8915; + case OCRDMA_L3_TYPE_IPV4: + return (u16)0x0800; + case OCRDMA_L3_TYPE_IPV6: + return (u16)0x86dd; + default: + pr_err("ocrdma%d: Invalid network header\n", devid); + return 0; + } +} + static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct ib_ah_attr *attr, union ib_gid *sgid, int pdid, bool *isvlan, u16 vlan_tag) { - int status = 0; + int status; struct ocrdma_eth_vlan eth; struct ocrdma_grh grh; int eth_sz; + u16 proto_num = 0; + u8 nxthdr = 0x11; + struct iphdr ipv4; + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } sgid_addr, dgid_addr; memset(ð, 0, sizeof(eth)); memset(&grh, 0, sizeof(grh)); + /* Protocol Number */ + proto_num = ocrdma_hdr_type_to_proto_num(dev->id, ah->hdr_type); + if (!proto_num) + return -EINVAL; + nxthdr = (proto_num == 0x8915) ? 0x1b : 0x11; /* VLAN */ if (!vlan_tag || (vlan_tag > 0xFFF)) vlan_tag = dev->pvid; @@ -78,13 +106,13 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, dev->id); } eth.eth_type = cpu_to_be16(0x8100); - eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); + eth.roce_eth_type = cpu_to_be16(proto_num); vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT; eth.vlan_tag = cpu_to_be16(vlan_tag); eth_sz = sizeof(struct ocrdma_eth_vlan); *isvlan = true; } else { - eth.eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); + eth.eth_type = cpu_to_be16(proto_num); eth_sz = sizeof(struct ocrdma_eth_basic); } /* MAC */ @@ -93,18 +121,33 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, if (status) return status; ah->sgid_index = attr->grh.sgid_index; - memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid)); - memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw)); - - grh.tclass_flow = cpu_to_be32((6 << 28) | - (attr->grh.traffic_class << 24) | - attr->grh.flow_label); - /* 0x1b is next header value in GRH */ - grh.pdid_hoplimit = cpu_to_be32((pdid << 16) | - (0x1b << 8) | attr->grh.hop_limit); /* Eth HDR */ memcpy(&ah->av->eth_hdr, ð, eth_sz); - memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh)); + if (ah->hdr_type == RDMA_NETWORK_IPV4) { + *((__be16 *)&ipv4) = htons((4 << 12) | (5 << 8) | + attr->grh.traffic_class); + ipv4.id = cpu_to_be16(pdid); + ipv4.frag_off = htons(IP_DF); + ipv4.tot_len = htons(0); + ipv4.ttl = attr->grh.hop_limit; + ipv4.protocol = nxthdr; + rdma_gid2ip(&sgid_addr._sockaddr, sgid); + ipv4.saddr = sgid_addr._sockaddr_in.sin_addr.s_addr; + rdma_gid2ip(&dgid_addr._sockaddr, &attr->grh.dgid); + ipv4.daddr = dgid_addr._sockaddr_in.sin_addr.s_addr; + memcpy((u8 *)ah->av + eth_sz, &ipv4, sizeof(struct iphdr)); + } else { + memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid)); + grh.tclass_flow = cpu_to_be32((6 << 28) | + (attr->grh.traffic_class << 24) | + attr->grh.flow_label); + memcpy(&grh.dgid[0], attr->grh.dgid.raw, + sizeof(attr->grh.dgid.raw)); + grh.pdid_hoplimit = cpu_to_be32((pdid << 16) | + (nxthdr << 8) | + attr->grh.hop_limit); + memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh)); + } if (*isvlan) ah->av->valid |= OCRDMA_AV_VLAN_VALID; ah->av->valid = cpu_to_le32(ah->av->valid); @@ -128,6 +171,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) if (atomic_cmpxchg(&dev->update_sl, 1, 0)) ocrdma_init_service_level(dev); + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); @@ -148,6 +192,8 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); dev_put(sgid_attr.ndev); } + /* Get network header type for this GID */ + ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); if ((pd->uctx) && (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) && @@ -172,6 +218,11 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) ahid_addr = pd->uctx->ah_tbl.va + attr->dlid; *ahid_addr = 0; *ahid_addr |= ah->id & OCRDMA_AH_ID_MASK; + if (ocrdma_is_udp_encap_supported(dev)) { + *ahid_addr |= ((u32)ah->hdr_type & + OCRDMA_AH_L3_TYPE_MASK) << + OCRDMA_AH_L3_TYPE_SHIFT; + } if (isvlan) *ahid_addr |= (OCRDMA_AH_VLAN_VALID_MASK << OCRDMA_AH_VLAN_VALID_SHIFT); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 04a30ae67473..3856dd4c7e3d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -46,9 +46,10 @@ enum { OCRDMA_AH_ID_MASK = 0x3FF, OCRDMA_AH_VLAN_VALID_MASK = 0x01, - OCRDMA_AH_VLAN_VALID_SHIFT = 0x1F + OCRDMA_AH_VLAN_VALID_SHIFT = 0x1F, + OCRDMA_AH_L3_TYPE_MASK = 0x03, + OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ }; - struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *); int ocrdma_destroy_ah(struct ib_ah *); int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 283ca842ff74..16740dcb876b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1113,7 +1113,7 @@ mbx_err: static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe, void *payload_va) { - int status = 0; + int status; struct ocrdma_mbx_rsp *rsp = payload_va; if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >> @@ -1144,6 +1144,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_pd = (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT; + attr->udp_encap = (rsp->max_pd_ca_ack_delay & + OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK) >> + OCRDMA_MBX_QUERY_CFG_L3_TYPE_SHIFT; attr->max_dpp_pds = (rsp->max_dpp_pds_credits & OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET; @@ -2138,7 +2141,6 @@ int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state, enum ib_qp_state *old_ib_state) { unsigned long flags; - int status = 0; enum ocrdma_qp_state new_state; new_state = get_ocrdma_qp_state(new_ib_state); @@ -2163,7 +2165,7 @@ int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state, qp->state = new_state; spin_unlock_irqrestore(&qp->q_lock, flags); - return status; + return 0; } static u32 ocrdma_set_create_qp_mbx_access_flags(struct ocrdma_qp *qp) @@ -2501,7 +2503,12 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, union ib_gid sgid, zgid; struct ib_gid_attr sgid_attr; u32 vlan_id = 0xFFFF; - u8 mac_addr[6]; + u8 mac_addr[6], hdr_type; + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } sgid_addr, dgid_addr; struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); if ((ah_attr->ah_flags & IB_AH_GRH) == 0) @@ -2516,6 +2523,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, cmd->params.hop_lmt_rq_psn |= (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT); cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID; + + /* GIDs */ memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0], sizeof(cmd->params.dgid)); @@ -2538,6 +2547,16 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, return status; cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) | (mac_addr[2] << 16) | (mac_addr[3] << 24); + + hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + if (hdr_type == RDMA_NETWORK_IPV4) { + rdma_gid2ip(&sgid_addr._sockaddr, &sgid); + rdma_gid2ip(&dgid_addr._sockaddr, &ah_attr->grh.dgid); + memcpy(&cmd->params.dgid[0], + &dgid_addr._sockaddr_in.sin_addr.s_addr, 4); + memcpy(&cmd->params.sgid[0], + &sgid_addr._sockaddr_in.sin_addr.s_addr, 4); + } /* convert them to LE format. */ ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid)); ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); @@ -2558,7 +2577,9 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, cmd->params.rnt_rc_sl_fl |= (dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT; } - + cmd->params.max_sge_recv_flags |= ((hdr_type << + OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_SHIFT) & + OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_MASK); return 0; } @@ -2871,7 +2892,7 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq) static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype, struct ocrdma_dcbx_cfg *dcbxcfg) { - int status = 0; + int status; dma_addr_t pa; struct ocrdma_mqe cmd; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index f38743018cb4..3d75f65ce87e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -89,8 +89,10 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; + struct ocrdma_dev *dev; int err; + dev = get_ocrdma_dev(ibdev); err = ocrdma_query_port(ibdev, port_num, &attr); if (err) return err; @@ -98,6 +100,8 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + if (ocrdma_is_udp_encap_supported(dev)) + immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 99dd6fdf06d7..0efc9662c6d8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -140,7 +140,11 @@ enum { OCRDMA_DB_RQ_SHIFT = 24 }; -#define OCRDMA_ROUDP_FLAGS_SHIFT 0x03 +enum { + OCRDMA_L3_TYPE_IB_GRH = 0x00, + OCRDMA_L3_TYPE_IPV4 = 0x01, + OCRDMA_L3_TYPE_IPV6 = 0x02 +}; #define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */ #define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */ @@ -546,7 +550,8 @@ enum { OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT = 8, OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK = 0xFF << OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT, - + OCRDMA_MBX_QUERY_CFG_L3_TYPE_SHIFT = 3, + OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK = 0x18, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF, OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16, @@ -1107,6 +1112,8 @@ enum { OCRDMA_QP_PARAMS_STATE_MASK = BIT(5) | BIT(6) | BIT(7), OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = BIT(8), OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = BIT(9), + OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_SHIFT = 11, + OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_MASK = BIT(11) | BIT(12) | BIT(13), OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16, OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF << OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT, @@ -1735,8 +1742,11 @@ enum { /* w1 */ OCRDMA_CQE_UD_XFER_LEN_SHIFT = 16, + OCRDMA_CQE_UD_XFER_LEN_MASK = 0x1FFF, OCRDMA_CQE_PKEY_SHIFT = 0, OCRDMA_CQE_PKEY_MASK = 0xFFFF, + OCRDMA_CQE_UD_L3TYPE_SHIFT = 29, + OCRDMA_CQE_UD_L3TYPE_MASK = 0x07, /* w2 */ OCRDMA_CQE_QPN_SHIFT = 0, @@ -1861,7 +1871,7 @@ struct ocrdma_ewqe_ud_hdr { u32 rsvd_dest_qpn; u32 qkey; u32 rsvd_ahid; - u32 rsvd; + u32 hdr_type; }; /* extended wqe followed by hdr_wqe for Fast Memory register */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 255f774080a4..8bef09a8c49f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -610,7 +610,7 @@ static char *ocrdma_driver_dbg_stats(struct ocrdma_dev *dev) static void ocrdma_update_stats(struct ocrdma_dev *dev) { ulong now = jiffies, secs; - int status = 0; + int status; struct ocrdma_rdma_stats_resp *rdma_stats = (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats; @@ -641,7 +641,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp, { char tmp_str[32]; long reset; - int status = 0; + int status; struct ocrdma_stats *pstats = filp->private_data; struct ocrdma_dev *dev = pstats->dev; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 12420e4ecf3d..a8496a18e20d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -419,7 +419,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, struct ib_udata *udata) { struct ocrdma_pd *pd = NULL; - int status = 0; + int status; pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) @@ -468,7 +468,7 @@ static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx, static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd) { - int status = 0; + int status; if (dev->pd_mgr->pd_prealloc_valid) status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled); @@ -596,7 +596,7 @@ map_err: int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) { - int status = 0; + int status; struct ocrdma_mm *mm, *tmp; struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx); struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device); @@ -623,7 +623,7 @@ int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; u64 unmapped_db = (u64) dev->nic_info.unmapped_db; unsigned long len = (vma->vm_end - vma->vm_start); - int status = 0; + int status; bool found; if (vma->vm_start & (PAGE_SIZE - 1)) @@ -1285,7 +1285,7 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, struct ib_udata *udata, int dpp_offset, int dpp_credit_lmt, int srq) { - int status = 0; + int status; u64 usr_db; struct ocrdma_create_qp_uresp uresp; struct ocrdma_pd *pd = qp->pd; @@ -1494,9 +1494,7 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, */ if (status < 0) return status; - status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); - - return status; + return ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); } int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, @@ -1949,7 +1947,7 @@ int ocrdma_modify_srq(struct ib_srq *ibsrq, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata) { - int status = 0; + int status; struct ocrdma_srq *srq; srq = get_ocrdma_srq(ibsrq); @@ -2005,6 +2003,7 @@ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp, else ud_hdr->qkey = ud_wr(wr)->remote_qkey; ud_hdr->rsvd_ahid = ah->id; + ud_hdr->hdr_type = ah->hdr_type; if (ah->av->valid & OCRDMA_AV_VLAN_VALID) hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT); } @@ -2717,9 +2716,11 @@ static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, return expand; } -static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe) +static int ocrdma_update_ud_rcqe(struct ocrdma_dev *dev, struct ib_wc *ibwc, + struct ocrdma_cqe *cqe) { int status; + u16 hdr_type = 0; status = (le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT; @@ -2728,7 +2729,17 @@ static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe) ibwc->pkey_index = 0; ibwc->wc_flags = IB_WC_GRH; ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >> - OCRDMA_CQE_UD_XFER_LEN_SHIFT); + OCRDMA_CQE_UD_XFER_LEN_SHIFT) & + OCRDMA_CQE_UD_XFER_LEN_MASK; + + if (ocrdma_is_udp_encap_supported(dev)) { + hdr_type = (le32_to_cpu(cqe->ud.rxlen_pkey) >> + OCRDMA_CQE_UD_L3TYPE_SHIFT) & + OCRDMA_CQE_UD_L3TYPE_MASK; + ibwc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; + ibwc->network_hdr_type = hdr_type; + } + return status; } @@ -2791,12 +2802,15 @@ static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, struct ib_wc *ibwc) { + struct ocrdma_dev *dev; + + dev = get_ocrdma_dev(qp->ibqp.device); ibwc->opcode = IB_WC_RECV; ibwc->qp = &qp->ibqp; ibwc->status = IB_WC_SUCCESS; if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) - ocrdma_update_ud_rcqe(ibwc, cqe); + ocrdma_update_ud_rcqe(dev, ibwc, cqe); else ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen); diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index a6f3eab0f350..85be0de3ab26 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -244,6 +244,7 @@ struct ipoib_cm_tx { unsigned tx_tail; unsigned long flags; u32 mtu; + unsigned max_send_sge; }; struct ipoib_cm_rx_buf { @@ -390,6 +391,7 @@ struct ipoib_dev_priv { int hca_caps; struct ipoib_ethtool_st ethtool; struct timer_list poll_timer; + unsigned max_send_sge; }; struct ipoib_ah { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 917e46ea3bf6..c8ed53562c9b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_tx_buf *tx_req; int rc; + unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb); if (unlikely(skb->len > tx->mtu)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", @@ -719,7 +720,23 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN); return; } - + if (skb_shinfo(skb)->nr_frags > usable_sge) { + if (skb_linearize(skb) < 0) { + ipoib_warn(priv, "skb could not be linearized\n"); + ++dev->stats.tx_dropped; + ++dev->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } + /* Does skb_linearize return ok without reducing nr_frags? */ + if (skb_shinfo(skb)->nr_frags > usable_sge) { + ipoib_warn(priv, "too many frags after skb linearize\n"); + ++dev->stats.tx_dropped; + ++dev->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } + } ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n", tx->tx_head, skb->len, tx->qp->qp_num); @@ -1031,7 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ struct ib_qp *tx_qp; if (dev->features & NETIF_F_SG) - attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; + attr.cap.max_send_sge = + min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); tx_qp = ib_create_qp(priv->pd, &attr); if (PTR_ERR(tx_qp) == -EINVAL) { @@ -1040,6 +1058,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; tx_qp = ib_create_qp(priv->pd, &attr); } + tx->max_send_sge = attr.cap.max_send_sge; return tx_qp; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index fa9c42ff1fb0..899e6b7fb8a5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -538,6 +538,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_tx_buf *tx_req; int hlen, rc; void *phead; + unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb); if (skb_is_gso(skb)) { hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); @@ -561,6 +562,23 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, phead = NULL; hlen = 0; } + if (skb_shinfo(skb)->nr_frags > usable_sge) { + if (skb_linearize(skb) < 0) { + ipoib_warn(priv, "skb could not be linearized\n"); + ++dev->stats.tx_dropped; + ++dev->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } + /* Does skb_linearize return ok without reducing nr_frags? */ + if (skb_shinfo(skb)->nr_frags > usable_sge) { + ipoib_warn(priv, "too many frags after skb linearize\n"); + ++dev->stats.tx_dropped; + ++dev->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } + } ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n", skb->len, address, qpn); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index d48c5bae7877..b809c373e40e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -206,7 +206,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; if (dev->features & NETIF_F_SG) - init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; + init_attr.cap.max_send_sge = + min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { @@ -233,6 +234,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->rx_wr.next = NULL; priv->rx_wr.sg_list = priv->rx_sge; + priv->max_send_sge = init_attr.cap.max_send_sge; + return 0; out_free_send_cq: diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index c827c93f46c5..80b6bedc172f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -969,7 +969,16 @@ static umode_t iser_attr_is_visible(int param_type, int param) static int iscsi_iser_slave_alloc(struct scsi_device *sdev) { - blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); + struct iscsi_session *session; + struct iser_conn *iser_conn; + struct ib_device *ib_dev; + + session = starget_to_session(scsi_target(sdev))->dd_data; + iser_conn = session->leadconn->dd_data; + ib_dev = iser_conn->ib_conn.device->ib_device; + + if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); return 0; } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 95f0a64e076b..0351059783b1 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -458,9 +458,6 @@ struct iser_fr_pool { * @comp: iser completion context * @fr_pool: connection fast registration poool * @pi_support: Indicate device T10-PI support - * @last: last send wr to signal all flush errors were drained - * @last_cqe: cqe handler for last wr - * @last_comp: completes when all connection completions consumed */ struct ib_conn { struct rdma_cm_id *cma_id; @@ -472,10 +469,7 @@ struct ib_conn { struct iser_comp *comp; struct iser_fr_pool fr_pool; bool pi_support; - struct ib_send_wr last; - struct ib_cqe last_cqe; struct ib_cqe reg_cqe; - struct completion last_comp; }; /** @@ -617,7 +611,6 @@ void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc); void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc); void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc); void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc); -void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc); void iser_task_rdma_init(struct iscsi_iser_task *task); diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index ed54b388e7ad..81ae2e30dd12 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -729,13 +729,6 @@ void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc) kmem_cache_free(ig.desc_cache, desc); } -void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc) -{ - struct ib_conn *ib_conn = wc->qp->qp_context; - - complete(&ib_conn->last_comp); -} - void iser_task_rdma_init(struct iscsi_iser_task *iser_task) { diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 40c0f4978e2f..1b4945367e4f 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -252,14 +252,21 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn) } static int -iser_alloc_reg_res(struct ib_device *ib_device, +iser_alloc_reg_res(struct iser_device *device, struct ib_pd *pd, struct iser_reg_resources *res, unsigned int size) { + struct ib_device *ib_dev = device->ib_device; + enum ib_mr_type mr_type; int ret; - res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, size); + if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) + mr_type = IB_MR_TYPE_SG_GAPS; + else + mr_type = IB_MR_TYPE_MEM_REG; + + res->mr = ib_alloc_mr(pd, mr_type, size); if (IS_ERR(res->mr)) { ret = PTR_ERR(res->mr); iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); @@ -277,7 +284,7 @@ iser_free_reg_res(struct iser_reg_resources *rsc) } static int -iser_alloc_pi_ctx(struct ib_device *ib_device, +iser_alloc_pi_ctx(struct iser_device *device, struct ib_pd *pd, struct iser_fr_desc *desc, unsigned int size) @@ -291,7 +298,7 @@ iser_alloc_pi_ctx(struct ib_device *ib_device, pi_ctx = desc->pi_ctx; - ret = iser_alloc_reg_res(ib_device, pd, &pi_ctx->rsc, size); + ret = iser_alloc_reg_res(device, pd, &pi_ctx->rsc, size); if (ret) { iser_err("failed to allocate reg_resources\n"); goto alloc_reg_res_err; @@ -324,7 +331,7 @@ iser_free_pi_ctx(struct iser_pi_context *pi_ctx) } static struct iser_fr_desc * -iser_create_fastreg_desc(struct ib_device *ib_device, +iser_create_fastreg_desc(struct iser_device *device, struct ib_pd *pd, bool pi_enable, unsigned int size) @@ -336,12 +343,12 @@ iser_create_fastreg_desc(struct ib_device *ib_device, if (!desc) return ERR_PTR(-ENOMEM); - ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc, size); + ret = iser_alloc_reg_res(device, pd, &desc->rsc, size); if (ret) goto reg_res_alloc_failure; if (pi_enable) { - ret = iser_alloc_pi_ctx(ib_device, pd, desc, size); + ret = iser_alloc_pi_ctx(device, pd, desc, size); if (ret) goto pi_ctx_alloc_failure; } @@ -374,7 +381,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, spin_lock_init(&fr_pool->lock); fr_pool->size = 0; for (i = 0; i < cmds_max; i++) { - desc = iser_create_fastreg_desc(device->ib_device, device->pd, + desc = iser_create_fastreg_desc(device, device->pd, ib_conn->pi_support, size); if (IS_ERR(desc)) { ret = PTR_ERR(desc); @@ -663,7 +670,6 @@ void iser_conn_release(struct iser_conn *iser_conn) int iser_conn_terminate(struct iser_conn *iser_conn) { struct ib_conn *ib_conn = &iser_conn->ib_conn; - struct ib_send_wr *bad_wr; int err = 0; /* terminate the iser conn only if the conn state is UP */ @@ -688,14 +694,8 @@ int iser_conn_terminate(struct iser_conn *iser_conn) iser_err("Failed to disconnect, conn: 0x%p err %d\n", iser_conn, err); - /* post an indication that all flush errors were consumed */ - err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr); - if (err) { - iser_err("conn %p failed to post last wr", ib_conn); - return 1; - } - - wait_for_completion(&ib_conn->last_comp); + /* block until all flush errors are consumed */ + ib_drain_sq(ib_conn->qp); } return 1; @@ -954,10 +954,6 @@ void iser_conn_init(struct iser_conn *iser_conn) ib_conn->post_recv_buf_count = 0; ib_conn->reg_cqe.done = iser_reg_comp; - ib_conn->last_cqe.done = iser_last_comp; - ib_conn->last.wr_cqe = &ib_conn->last_cqe; - ib_conn->last.opcode = IB_WR_SEND; - init_completion(&ib_conn->last_comp); } /** diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 03022f6420d7..b6bf20496021 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -446,49 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) dev->max_pages_per_mr); } -static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct srp_rdma_ch *ch = cq->cq_context; - - complete(&ch->done); -} - -static struct ib_cqe srp_drain_cqe = { - .done = srp_drain_done, -}; - /** * srp_destroy_qp() - destroy an RDMA queue pair * @ch: SRP RDMA channel. * - * Change a queue pair into the error state and wait until all receive - * completions have been processed before destroying it. This avoids that - * the receive completion handler can access the queue pair while it is + * Drain the qp before destroying it. This avoids that the receive + * completion handler can access the queue pair while it is * being destroyed. */ static void srp_destroy_qp(struct srp_rdma_ch *ch) { - static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; - static struct ib_recv_wr wr = { 0 }; - struct ib_recv_wr *bad_wr; - int ret; - - wr.wr_cqe = &srp_drain_cqe; - /* Destroying a QP and reusing ch->done is only safe if not connected */ - WARN_ON_ONCE(ch->connected); - - ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE); - WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret); - if (ret) - goto out; - - init_completion(&ch->done); - ret = ib_post_recv(ch->qp, &wr, &bad_wr); - WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret); - if (ret == 0) - wait_for_completion(&ch->done); - -out: + ib_drain_rq(ch->qp); ib_destroy_qp(ch->qp); } @@ -508,7 +476,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) if (!init_attr) return -ENOMEM; - /* queue_size + 1 for ib_drain_qp */ + /* queue_size + 1 for ib_drain_rq() */ recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, ch->comp_vector, IB_POLL_SOFTIRQ); if (IS_ERR(recv_cq)) { diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 0c37fee363b1..25bdaeef2520 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -91,76 +91,32 @@ MODULE_PARM_DESC(srpt_service_guid, " instead of using the node_guid of the first HCA."); static struct ib_client srpt_client; -static void srpt_release_channel(struct srpt_rdma_ch *ch); +static void srpt_release_cmd(struct se_cmd *se_cmd); +static void srpt_free_ch(struct kref *kref); static int srpt_queue_status(struct se_cmd *cmd); static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); +static void srpt_process_wait_list(struct srpt_rdma_ch *ch); -/** - * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE. - */ -static inline -enum dma_data_direction opposite_dma_dir(enum dma_data_direction dir) -{ - switch (dir) { - case DMA_TO_DEVICE: return DMA_FROM_DEVICE; - case DMA_FROM_DEVICE: return DMA_TO_DEVICE; - default: return dir; - } -} - -/** - * srpt_sdev_name() - Return the name associated with the HCA. - * - * Examples are ib0, ib1, ... - */ -static inline const char *srpt_sdev_name(struct srpt_device *sdev) -{ - return sdev->device->name; -} - -static enum rdma_ch_state srpt_get_ch_state(struct srpt_rdma_ch *ch) -{ - unsigned long flags; - enum rdma_ch_state state; - - spin_lock_irqsave(&ch->spinlock, flags); - state = ch->state; - spin_unlock_irqrestore(&ch->spinlock, flags); - return state; -} - -static enum rdma_ch_state -srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new_state) -{ - unsigned long flags; - enum rdma_ch_state prev; - - spin_lock_irqsave(&ch->spinlock, flags); - prev = ch->state; - ch->state = new_state; - spin_unlock_irqrestore(&ch->spinlock, flags); - return prev; -} - -/** - * srpt_test_and_set_ch_state() - Test and set the channel state. - * - * Returns true if and only if the channel state has been set to the new state. +/* + * The only allowed channel state changes are those that change the channel + * state into a state with a higher numerical value. Hence the new > prev test. */ -static bool -srpt_test_and_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state old, - enum rdma_ch_state new) +static bool srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new) { unsigned long flags; enum rdma_ch_state prev; + bool changed = false; spin_lock_irqsave(&ch->spinlock, flags); prev = ch->state; - if (prev == old) + if (new > prev) { ch->state = new; + changed = true; + } spin_unlock_irqrestore(&ch->spinlock, flags); - return prev == old; + + return changed; } /** @@ -182,7 +138,7 @@ static void srpt_event_handler(struct ib_event_handler *handler, return; pr_debug("ASYNC event= %d on device= %s\n", event->event, - srpt_sdev_name(sdev)); + sdev->device->name); switch (event->event) { case IB_EVENT_PORT_ERR: @@ -220,25 +176,39 @@ static void srpt_srq_event(struct ib_event *event, void *ctx) pr_info("SRQ event %d\n", event->event); } +static const char *get_ch_state_name(enum rdma_ch_state s) +{ + switch (s) { + case CH_CONNECTING: + return "connecting"; + case CH_LIVE: + return "live"; + case CH_DISCONNECTING: + return "disconnecting"; + case CH_DRAINING: + return "draining"; + case CH_DISCONNECTED: + return "disconnected"; + } + return "???"; +} + /** * srpt_qp_event() - QP event callback function. */ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) { pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n", - event->event, ch->cm_id, ch->sess_name, srpt_get_ch_state(ch)); + event->event, ch->cm_id, ch->sess_name, ch->state); switch (event->event) { case IB_EVENT_COMM_EST: ib_cm_notify(ch->cm_id, event->event); break; case IB_EVENT_QP_LAST_WQE_REACHED: - if (srpt_test_and_set_ch_state(ch, CH_DRAINING, - CH_RELEASING)) - srpt_release_channel(ch); - else - pr_debug("%s: state %d - ignored LAST_WQE.\n", - ch->sess_name, srpt_get_ch_state(ch)); + pr_debug("%s-%d, state %s: received Last WQE event.\n", + ch->sess_name, ch->qp->qp_num, + get_ch_state_name(ch->state)); break; default: pr_err("received unrecognized IB QP event %d\n", event->event); @@ -281,7 +251,7 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad) struct ib_class_port_info *cif; cif = (struct ib_class_port_info *)mad->data; - memset(cif, 0, sizeof *cif); + memset(cif, 0, sizeof(*cif)); cif->base_version = 1; cif->class_version = 1; cif->resp_time_value = 20; @@ -340,7 +310,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, return; } - memset(iocp, 0, sizeof *iocp); + memset(iocp, 0, sizeof(*iocp)); strcpy(iocp->id_string, SRPT_ID_STRING); iocp->guid = cpu_to_be64(srpt_service_guid); iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id); @@ -390,7 +360,7 @@ static void srpt_get_svc_entries(u64 ioc_guid, } svc_entries = (struct ib_dm_svc_entries *)mad->data; - memset(svc_entries, 0, sizeof *svc_entries); + memset(svc_entries, 0, sizeof(*svc_entries)); svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid); snprintf(svc_entries->service_entries[0].name, sizeof(svc_entries->service_entries[0].name), @@ -484,7 +454,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, rsp->ah = ah; dm_mad = rsp->mad; - memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad); + memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof(*dm_mad)); dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; dm_mad->mad_hdr.status = 0; @@ -532,7 +502,7 @@ static int srpt_refresh_port(struct srpt_port *sport) struct ib_port_attr port_attr; int ret; - memset(&port_modify, 0, sizeof port_modify); + memset(&port_modify, 0, sizeof(port_modify)); port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; port_modify.clr_port_cap_mask = 0; @@ -553,7 +523,7 @@ static int srpt_refresh_port(struct srpt_port *sport) goto err_query_port; if (!sport->mad_agent) { - memset(®_req, 0, sizeof reg_req); + memset(®_req, 0, sizeof(reg_req)); reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION; set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask); @@ -841,6 +811,39 @@ out: } /** + * srpt_zerolength_write() - Perform a zero-length RDMA write. + * + * A quote from the InfiniBand specification: C9-88: For an HCA responder + * using Reliable Connection service, for each zero-length RDMA READ or WRITE + * request, the R_Key shall not be validated, even if the request includes + * Immediate data. + */ +static int srpt_zerolength_write(struct srpt_rdma_ch *ch) +{ + struct ib_send_wr wr, *bad_wr; + + memset(&wr, 0, sizeof(wr)); + wr.opcode = IB_WR_RDMA_WRITE; + wr.wr_cqe = &ch->zw_cqe; + wr.send_flags = IB_SEND_SIGNALED; + return ib_post_send(ch->qp, &wr, &bad_wr); +} + +static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct srpt_rdma_ch *ch = cq->cq_context; + + if (wc->status == IB_WC_SUCCESS) { + srpt_process_wait_list(ch); + } else { + if (srpt_set_ch_state(ch, CH_DISCONNECTED)) + schedule_work(&ch->release_work); + else + WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num); + } +} + +/** * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request. * @ioctx: Pointer to the I/O context associated with the request. * @srp_cmd: Pointer to the SRP_CMD request data. @@ -903,14 +906,14 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, db = (struct srp_direct_buf *)(srp_cmd->add_data + add_cdb_offset); - memcpy(ioctx->rbufs, db, sizeof *db); + memcpy(ioctx->rbufs, db, sizeof(*db)); *data_len = be32_to_cpu(db->len); } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) || ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) { idb = (struct srp_indirect_buf *)(srp_cmd->add_data + add_cdb_offset); - ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db; + ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof(*db); if (ioctx->n_rbuf > (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { @@ -929,7 +932,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, ioctx->rbufs = &ioctx->single_rbuf; else { ioctx->rbufs = - kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC); + kmalloc(ioctx->n_rbuf * sizeof(*db), GFP_ATOMIC); if (!ioctx->rbufs) { ioctx->n_rbuf = 0; ret = -ENOMEM; @@ -938,7 +941,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, } db = idb->desc_list; - memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db); + memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof(*db)); *data_len = be32_to_cpu(idb->len); } out: @@ -956,7 +959,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) struct ib_qp_attr *attr; int ret; - attr = kzalloc(sizeof *attr, GFP_KERNEL); + attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) return -ENOMEM; @@ -1070,7 +1073,7 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, dir = ioctx->cmd.data_direction; BUG_ON(dir == DMA_NONE); ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt, - opposite_dma_dir(dir)); + target_reverse_dma_direction(&ioctx->cmd)); ioctx->mapped_sg_count = 0; } } @@ -1107,7 +1110,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, ioctx->sg_cnt = sg_cnt = cmd->t_data_nents; count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt, - opposite_dma_dir(dir)); + target_reverse_dma_direction(cmd)); if (unlikely(!count)) return -EAGAIN; @@ -1313,10 +1316,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) /* * If the command is in a state where the target core is waiting for - * the ib_srpt driver, change the state to the next state. Changing - * the state of the command from SRPT_STATE_NEED_DATA to - * SRPT_STATE_DATA_IN ensures that srpt_xmit_response() will call this - * function a second time. + * the ib_srpt driver, change the state to the next state. */ spin_lock_irqsave(&ioctx->spinlock, flags); @@ -1325,25 +1325,17 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) case SRPT_STATE_NEED_DATA: ioctx->state = SRPT_STATE_DATA_IN; break; - case SRPT_STATE_DATA_IN: case SRPT_STATE_CMD_RSP_SENT: case SRPT_STATE_MGMT_RSP_SENT: ioctx->state = SRPT_STATE_DONE; break; default: + WARN_ONCE(true, "%s: unexpected I/O context state %d\n", + __func__, state); break; } spin_unlock_irqrestore(&ioctx->spinlock, flags); - if (state == SRPT_STATE_DONE) { - struct srpt_rdma_ch *ch = ioctx->ch; - - BUG_ON(ch->sess == NULL); - - target_put_sess_cmd(&ioctx->cmd); - goto out; - } - pr_debug("Aborting cmd with state %d and tag %lld\n", state, ioctx->cmd.tag); @@ -1351,19 +1343,16 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) case SRPT_STATE_NEW: case SRPT_STATE_DATA_IN: case SRPT_STATE_MGMT: + case SRPT_STATE_DONE: /* * Do nothing - defer abort processing until * srpt_queue_response() is invoked. */ - WARN_ON(!transport_check_aborted_status(&ioctx->cmd, false)); break; case SRPT_STATE_NEED_DATA: - /* DMA_TO_DEVICE (write) - RDMA read error. */ - - /* XXX(hch): this is a horrible layering violation.. */ - spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); - ioctx->cmd.transport_state &= ~CMD_T_ACTIVE; - spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); + pr_debug("tag %#llx: RDMA read error\n", ioctx->cmd.tag); + transport_generic_request_failure(&ioctx->cmd, + TCM_CHECK_CONDITION_ABORT_CMD); break; case SRPT_STATE_CMD_RSP_SENT: /* @@ -1371,18 +1360,16 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) * not been received in time. */ srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); - target_put_sess_cmd(&ioctx->cmd); + transport_generic_free_cmd(&ioctx->cmd, 0); break; case SRPT_STATE_MGMT_RSP_SENT: - srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - target_put_sess_cmd(&ioctx->cmd); + transport_generic_free_cmd(&ioctx->cmd, 0); break; default: WARN(1, "Unexpected command state (%d)", state); break; } -out: return state; } @@ -1422,9 +1409,14 @@ static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe); if (unlikely(wc->status != IB_WC_SUCCESS)) { + /* + * Note: if an RDMA write error completion is received that + * means that a SEND also has been posted. Defer further + * processing of the associated command until the send error + * completion has been received. + */ pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n", ioctx, wc->status); - srpt_abort_cmd(ioctx); } } @@ -1464,7 +1456,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, sense_data_len = ioctx->cmd.scsi_sense_length; WARN_ON(sense_data_len > sizeof(ioctx->sense_data)); - memset(srp_rsp, 0, sizeof *srp_rsp); + memset(srp_rsp, 0, sizeof(*srp_rsp)); srp_rsp->opcode = SRP_RSP; srp_rsp->req_lim_delta = cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); @@ -1514,7 +1506,7 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, srp_rsp = ioctx->ioctx.buf; BUG_ON(!srp_rsp); - memset(srp_rsp, 0, sizeof *srp_rsp); + memset(srp_rsp, 0, sizeof(*srp_rsp)); srp_rsp->opcode = SRP_RSP; srp_rsp->req_lim_delta = @@ -1528,80 +1520,6 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, return resp_len; } -#define NO_SUCH_LUN ((uint64_t)-1LL) - -/* - * SCSI LUN addressing method. See also SAM-2 and the section about - * eight byte LUNs. - */ -enum scsi_lun_addr_method { - SCSI_LUN_ADDR_METHOD_PERIPHERAL = 0, - SCSI_LUN_ADDR_METHOD_FLAT = 1, - SCSI_LUN_ADDR_METHOD_LUN = 2, - SCSI_LUN_ADDR_METHOD_EXTENDED_LUN = 3, -}; - -/* - * srpt_unpack_lun() - Convert from network LUN to linear LUN. - * - * Convert an 2-byte, 4-byte, 6-byte or 8-byte LUN structure in network byte - * order (big endian) to a linear LUN. Supports three LUN addressing methods: - * peripheral, flat and logical unit. See also SAM-2, section 4.9.4 (page 40). - */ -static uint64_t srpt_unpack_lun(const uint8_t *lun, int len) -{ - uint64_t res = NO_SUCH_LUN; - int addressing_method; - - if (unlikely(len < 2)) { - pr_err("Illegal LUN length %d, expected 2 bytes or more\n", - len); - goto out; - } - - switch (len) { - case 8: - if ((*((__be64 *)lun) & - cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) - goto out_err; - break; - case 4: - if (*((__be16 *)&lun[2]) != 0) - goto out_err; - break; - case 6: - if (*((__be32 *)&lun[2]) != 0) - goto out_err; - break; - case 2: - break; - default: - goto out_err; - } - - addressing_method = (*lun) >> 6; /* highest two bits of byte 0 */ - switch (addressing_method) { - case SCSI_LUN_ADDR_METHOD_PERIPHERAL: - case SCSI_LUN_ADDR_METHOD_FLAT: - case SCSI_LUN_ADDR_METHOD_LUN: - res = *(lun + 1) | (((*lun) & 0x3f) << 8); - break; - - case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN: - default: - pr_err("Unimplemented LUN addressing method %u\n", - addressing_method); - break; - } - -out: - return res; - -out_err: - pr_err("Support for multi-level LUNs has not yet been implemented\n"); - goto out; -} - static int srpt_check_stop_free(struct se_cmd *cmd) { struct srpt_send_ioctx *ioctx = container_of(cmd, @@ -1613,16 +1531,14 @@ static int srpt_check_stop_free(struct se_cmd *cmd) /** * srpt_handle_cmd() - Process SRP_CMD. */ -static int srpt_handle_cmd(struct srpt_rdma_ch *ch, - struct srpt_recv_ioctx *recv_ioctx, - struct srpt_send_ioctx *send_ioctx) +static void srpt_handle_cmd(struct srpt_rdma_ch *ch, + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx) { struct se_cmd *cmd; struct srp_cmd *srp_cmd; - uint64_t unpacked_lun; u64 data_len; enum dma_data_direction dir; - sense_reason_t ret; int rc; BUG_ON(!send_ioctx); @@ -1650,65 +1566,23 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch, if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) { pr_err("0x%llx: parsing SRP descriptor table failed.\n", srp_cmd->tag); - ret = TCM_INVALID_CDB_FIELD; - goto send_sense; + goto release_ioctx; } - unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_cmd->lun, - sizeof(srp_cmd->lun)); rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb, - &send_ioctx->sense_data[0], unpacked_lun, data_len, - TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF); + &send_ioctx->sense_data[0], + scsilun_to_int(&srp_cmd->lun), data_len, + TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF); if (rc != 0) { - ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - goto send_sense; + pr_debug("target_submit_cmd() returned %d for tag %#llx\n", rc, + srp_cmd->tag); + goto release_ioctx; } - return 0; - -send_sense: - transport_send_check_condition_and_sense(cmd, ret, 0); - return -1; -} - -/** - * srpt_rx_mgmt_fn_tag() - Process a task management function by tag. - * @ch: RDMA channel of the task management request. - * @fn: Task management function to perform. - * @req_tag: Tag of the SRP task management request. - * @mgmt_ioctx: I/O context of the task management request. - * - * Returns zero if the target core will process the task management - * request asynchronously. - * - * Note: It is assumed that the initiator serializes tag-based task management - * requests. - */ -static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag) -{ - struct srpt_device *sdev; - struct srpt_rdma_ch *ch; - struct srpt_send_ioctx *target; - int ret, i; + return; - ret = -EINVAL; - ch = ioctx->ch; - BUG_ON(!ch); - BUG_ON(!ch->sport); - sdev = ch->sport->sdev; - BUG_ON(!sdev); - spin_lock_irq(&sdev->spinlock); - for (i = 0; i < ch->rq_size; ++i) { - target = ch->ioctx_ring[i]; - if (target->cmd.se_lun == ioctx->cmd.se_lun && - target->cmd.tag == tag && - srpt_get_cmd_state(target) != SRPT_STATE_DONE) { - ret = 0; - /* now let the target core abort &target->cmd; */ - break; - } - } - spin_unlock_irq(&sdev->spinlock); - return ret; +release_ioctx: + send_ioctx->state = SRPT_STATE_DONE; + srpt_release_cmd(cmd); } static int srp_tmr_to_tcm(int fn) @@ -1744,8 +1618,6 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, struct srp_tsk_mgmt *srp_tsk; struct se_cmd *cmd; struct se_session *sess = ch->sess; - uint64_t unpacked_lun; - uint32_t tag = 0; int tcm_tmr; int rc; @@ -1761,26 +1633,10 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); send_ioctx->cmd.tag = srp_tsk->tag; tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func); - if (tcm_tmr < 0) { - send_ioctx->cmd.se_tmr_req->response = - TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED; - goto fail; - } - unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun, - sizeof(srp_tsk->lun)); - - if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK) { - rc = srpt_rx_mgmt_fn_tag(send_ioctx, srp_tsk->task_tag); - if (rc < 0) { - send_ioctx->cmd.se_tmr_req->response = - TMR_TASK_DOES_NOT_EXIST; - goto fail; - } - tag = srp_tsk->task_tag; - } - rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, unpacked_lun, - srp_tsk, tcm_tmr, GFP_KERNEL, tag, - TARGET_SCF_ACK_KREF); + rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, + scsilun_to_int(&srp_tsk->lun), srp_tsk, tcm_tmr, + GFP_KERNEL, srp_tsk->task_tag, + TARGET_SCF_ACK_KREF); if (rc != 0) { send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED; goto fail; @@ -1800,7 +1656,6 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *send_ioctx) { struct srp_cmd *srp_cmd; - enum rdma_ch_state ch_state; BUG_ON(!ch); BUG_ON(!recv_ioctx); @@ -1809,13 +1664,12 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, recv_ioctx->ioctx.dma, srp_max_req_size, DMA_FROM_DEVICE); - ch_state = srpt_get_ch_state(ch); - if (unlikely(ch_state == CH_CONNECTING)) { + if (unlikely(ch->state == CH_CONNECTING)) { list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list); goto out; } - if (unlikely(ch_state != CH_LIVE)) + if (unlikely(ch->state != CH_LIVE)) goto out; srp_cmd = recv_ioctx->ioctx.buf; @@ -1878,6 +1732,28 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc) } } +/* + * This function must be called from the context in which RDMA completions are + * processed because it accesses the wait list without protection against + * access from other threads. + */ +static void srpt_process_wait_list(struct srpt_rdma_ch *ch) +{ + struct srpt_send_ioctx *ioctx; + + while (!list_empty(&ch->cmd_wait_list) && + ch->state >= CH_LIVE && + (ioctx = srpt_get_send_ioctx(ch)) != NULL) { + struct srpt_recv_ioctx *recv_ioctx; + + recv_ioctx = list_first_entry(&ch->cmd_wait_list, + struct srpt_recv_ioctx, + wait_list); + list_del(&recv_ioctx->wait_list); + srpt_handle_new_iu(ch, recv_ioctx, ioctx); + } +} + /** * Note: Although this has not yet been observed during tests, at least in * theory it is possible that the srpt_get_send_ioctx() call invoked by @@ -1905,15 +1781,10 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) atomic_inc(&ch->sq_wr_avail); - if (wc->status != IB_WC_SUCCESS) { + if (wc->status != IB_WC_SUCCESS) pr_info("sending response for ioctx 0x%p failed" " with status %d\n", ioctx, wc->status); - atomic_dec(&ch->req_lim); - srpt_abort_cmd(ioctx); - goto out; - } - if (state != SRPT_STATE_DONE) { srpt_unmap_sg_to_ib_sge(ch, ioctx); transport_generic_free_cmd(&ioctx->cmd, 0); @@ -1922,18 +1793,7 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) " wr_id = %u.\n", ioctx->ioctx.index); } -out: - while (!list_empty(&ch->cmd_wait_list) && - srpt_get_ch_state(ch) == CH_LIVE && - (ioctx = srpt_get_send_ioctx(ch)) != NULL) { - struct srpt_recv_ioctx *recv_ioctx; - - recv_ioctx = list_first_entry(&ch->cmd_wait_list, - struct srpt_recv_ioctx, - wait_list); - list_del(&recv_ioctx->wait_list); - srpt_handle_new_iu(ch, recv_ioctx, ioctx); - } + srpt_process_wait_list(ch); } /** @@ -1950,7 +1810,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) WARN_ON(ch->rq_size < 1); ret = -ENOMEM; - qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL); + qp_init = kzalloc(sizeof(*qp_init), GFP_KERNEL); if (!qp_init) goto out; @@ -2017,168 +1877,102 @@ static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) } /** - * __srpt_close_ch() - Close an RDMA channel by setting the QP error state. + * srpt_close_ch() - Close an RDMA channel. * - * Reset the QP and make sure all resources associated with the channel will - * be deallocated at an appropriate time. + * Make sure all resources associated with the channel will be deallocated at + * an appropriate time. * - * Note: The caller must hold ch->sport->sdev->spinlock. + * Returns true if and only if the channel state has been modified into + * CH_DRAINING. */ -static void __srpt_close_ch(struct srpt_rdma_ch *ch) +static bool srpt_close_ch(struct srpt_rdma_ch *ch) { - enum rdma_ch_state prev_state; - unsigned long flags; + int ret; - spin_lock_irqsave(&ch->spinlock, flags); - prev_state = ch->state; - switch (prev_state) { - case CH_CONNECTING: - case CH_LIVE: - ch->state = CH_DISCONNECTING; - break; - default: - break; + if (!srpt_set_ch_state(ch, CH_DRAINING)) { + pr_debug("%s-%d: already closed\n", ch->sess_name, + ch->qp->qp_num); + return false; } - spin_unlock_irqrestore(&ch->spinlock, flags); - - switch (prev_state) { - case CH_CONNECTING: - ib_send_cm_rej(ch->cm_id, IB_CM_REJ_NO_RESOURCES, NULL, 0, - NULL, 0); - /* fall through */ - case CH_LIVE: - if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0) - pr_err("sending CM DREQ failed.\n"); - break; - case CH_DISCONNECTING: - break; - case CH_DRAINING: - case CH_RELEASING: - break; - } -} - -/** - * srpt_close_ch() - Close an RDMA channel. - */ -static void srpt_close_ch(struct srpt_rdma_ch *ch) -{ - struct srpt_device *sdev; - sdev = ch->sport->sdev; - spin_lock_irq(&sdev->spinlock); - __srpt_close_ch(ch); - spin_unlock_irq(&sdev->spinlock); -} + kref_get(&ch->kref); -/** - * srpt_shutdown_session() - Whether or not a session may be shut down. - */ -static int srpt_shutdown_session(struct se_session *se_sess) -{ - struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr; - unsigned long flags; + ret = srpt_ch_qp_err(ch); + if (ret < 0) + pr_err("%s-%d: changing queue pair into error state failed: %d\n", + ch->sess_name, ch->qp->qp_num, ret); - spin_lock_irqsave(&ch->spinlock, flags); - if (ch->in_shutdown) { - spin_unlock_irqrestore(&ch->spinlock, flags); - return true; + pr_debug("%s-%d: queued zerolength write\n", ch->sess_name, + ch->qp->qp_num); + ret = srpt_zerolength_write(ch); + if (ret < 0) { + pr_err("%s-%d: queuing zero-length write failed: %d\n", + ch->sess_name, ch->qp->qp_num, ret); + if (srpt_set_ch_state(ch, CH_DISCONNECTED)) + schedule_work(&ch->release_work); + else + WARN_ON_ONCE(true); } - ch->in_shutdown = true; - target_sess_cmd_list_set_waiting(se_sess); - spin_unlock_irqrestore(&ch->spinlock, flags); + kref_put(&ch->kref, srpt_free_ch); return true; } -/** - * srpt_drain_channel() - Drain a channel by resetting the IB queue pair. - * @cm_id: Pointer to the CM ID of the channel to be drained. - * - * Note: Must be called from inside srpt_cm_handler to avoid a race between - * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one() - * (the caller of srpt_cm_handler holds the cm_id spinlock; srpt_remove_one() - * waits until all target sessions for the associated IB device have been - * unregistered and target session registration involves a call to - * ib_destroy_cm_id(), which locks the cm_id spinlock and hence waits until - * this function has finished). +/* + * Change the channel state into CH_DISCONNECTING. If a channel has not yet + * reached the connected state, close it. If a channel is in the connected + * state, send a DREQ. If a DREQ has been received, send a DREP. Note: it is + * the responsibility of the caller to ensure that this function is not + * invoked concurrently with the code that accepts a connection. This means + * that this function must either be invoked from inside a CM callback + * function or that it must be invoked with the srpt_port.mutex held. */ -static void srpt_drain_channel(struct ib_cm_id *cm_id) +static int srpt_disconnect_ch(struct srpt_rdma_ch *ch) { - struct srpt_device *sdev; - struct srpt_rdma_ch *ch; int ret; - bool do_reset = false; - WARN_ON_ONCE(irqs_disabled()); + if (!srpt_set_ch_state(ch, CH_DISCONNECTING)) + return -ENOTCONN; - sdev = cm_id->context; - BUG_ON(!sdev); - spin_lock_irq(&sdev->spinlock); - list_for_each_entry(ch, &sdev->rch_list, list) { - if (ch->cm_id == cm_id) { - do_reset = srpt_test_and_set_ch_state(ch, - CH_CONNECTING, CH_DRAINING) || - srpt_test_and_set_ch_state(ch, - CH_LIVE, CH_DRAINING) || - srpt_test_and_set_ch_state(ch, - CH_DISCONNECTING, CH_DRAINING); - break; - } - } - spin_unlock_irq(&sdev->spinlock); + ret = ib_send_cm_dreq(ch->cm_id, NULL, 0); + if (ret < 0) + ret = ib_send_cm_drep(ch->cm_id, NULL, 0); - if (do_reset) { - if (ch->sess) - srpt_shutdown_session(ch->sess); + if (ret < 0 && srpt_close_ch(ch)) + ret = 0; - ret = srpt_ch_qp_err(ch); - if (ret < 0) - pr_err("Setting queue pair in error state" - " failed: %d\n", ret); - } + return ret; } -/** - * srpt_find_channel() - Look up an RDMA channel. - * @cm_id: Pointer to the CM ID of the channel to be looked up. - * - * Return NULL if no matching RDMA channel has been found. - */ -static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev, - struct ib_cm_id *cm_id) +static void __srpt_close_all_ch(struct srpt_device *sdev) { struct srpt_rdma_ch *ch; - bool found; - WARN_ON_ONCE(irqs_disabled()); - BUG_ON(!sdev); + lockdep_assert_held(&sdev->mutex); - found = false; - spin_lock_irq(&sdev->spinlock); list_for_each_entry(ch, &sdev->rch_list, list) { - if (ch->cm_id == cm_id) { - found = true; - break; - } + if (srpt_disconnect_ch(ch) >= 0) + pr_info("Closing channel %s-%d because target %s has been disabled\n", + ch->sess_name, ch->qp->qp_num, + sdev->device->name); + srpt_close_ch(ch); } - spin_unlock_irq(&sdev->spinlock); - - return found ? ch : NULL; } /** - * srpt_release_channel() - Release channel resources. - * - * Schedules the actual release because: - * - Calling the ib_destroy_cm_id() call from inside an IB CM callback would - * trigger a deadlock. - * - It is not safe to call TCM transport_* functions from interrupt context. + * srpt_shutdown_session() - Whether or not a session may be shut down. */ -static void srpt_release_channel(struct srpt_rdma_ch *ch) +static int srpt_shutdown_session(struct se_session *se_sess) +{ + return 1; +} + +static void srpt_free_ch(struct kref *kref) { - schedule_work(&ch->release_work); + struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref); + + kfree(ch); } static void srpt_release_channel_work(struct work_struct *w) @@ -2188,8 +1982,8 @@ static void srpt_release_channel_work(struct work_struct *w) struct se_session *se_sess; ch = container_of(w, struct srpt_rdma_ch, release_work); - pr_debug("ch = %p; ch->sess = %p; release_done = %p\n", ch, ch->sess, - ch->release_done); + pr_debug("%s: %s-%d; release_done = %p\n", __func__, ch->sess_name, + ch->qp->qp_num, ch->release_done); sdev = ch->sport->sdev; BUG_ON(!sdev); @@ -2197,6 +1991,7 @@ static void srpt_release_channel_work(struct work_struct *w) se_sess = ch->sess; BUG_ON(!se_sess); + target_sess_cmd_list_set_waiting(se_sess); target_wait_for_sess_cmds(se_sess); transport_deregister_session_configfs(se_sess); @@ -2211,16 +2006,15 @@ static void srpt_release_channel_work(struct work_struct *w) ch->sport->sdev, ch->rq_size, ch->rsp_size, DMA_TO_DEVICE); - spin_lock_irq(&sdev->spinlock); - list_del(&ch->list); - spin_unlock_irq(&sdev->spinlock); - + mutex_lock(&sdev->mutex); + list_del_init(&ch->list); if (ch->release_done) complete(ch->release_done); + mutex_unlock(&sdev->mutex); wake_up(&sdev->ch_releaseQ); - kfree(ch); + kref_put(&ch->kref, srpt_free_ch); } /** @@ -2266,9 +2060,9 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]), be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8])); - rsp = kzalloc(sizeof *rsp, GFP_KERNEL); - rej = kzalloc(sizeof *rej, GFP_KERNEL); - rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL); + rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); + rej = kzalloc(sizeof(*rej), GFP_KERNEL); + rep_param = kzalloc(sizeof(*rep_param), GFP_KERNEL); if (!rsp || !rej || !rep_param) { ret = -ENOMEM; @@ -2297,7 +2091,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) { rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN; - spin_lock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) { if (!memcmp(ch->i_port_id, req->initiator_port_id, 16) @@ -2305,26 +2099,16 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, && param->port == ch->sport->port && param->listen_id == ch->sport->sdev->cm_id && ch->cm_id) { - enum rdma_ch_state ch_state; - - ch_state = srpt_get_ch_state(ch); - if (ch_state != CH_CONNECTING - && ch_state != CH_LIVE) + if (srpt_disconnect_ch(ch) < 0) continue; - - /* found an existing channel */ - pr_debug("Found existing channel %s" - " cm_id= %p state= %d\n", - ch->sess_name, ch->cm_id, ch_state); - - __srpt_close_ch(ch); - + pr_info("Relogin - closed existing channel %s\n", + ch->sess_name); rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_TERMINATED; } } - spin_unlock_irq(&sdev->spinlock); + mutex_unlock(&sdev->mutex); } else rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED; @@ -2340,7 +2124,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, goto reject; } - ch = kzalloc(sizeof *ch, GFP_KERNEL); + ch = kzalloc(sizeof(*ch), GFP_KERNEL); if (!ch) { rej->reason = cpu_to_be32( SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); @@ -2349,11 +2133,14 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, goto reject; } + kref_init(&ch->kref); + ch->zw_cqe.done = srpt_zerolength_write_done; INIT_WORK(&ch->release_work, srpt_release_channel_work); memcpy(ch->i_port_id, req->initiator_port_id, 16); memcpy(ch->t_port_id, req->target_port_id, 16); ch->sport = &sdev->port[param->port - 1]; ch->cm_id = cm_id; + cm_id->context = ch; /* * Avoid QUEUE_FULL conditions by limiting the number of buffers used * for the SRP protocol to the command queue size. @@ -2453,7 +2240,7 @@ try_again: /* create cm reply */ rep_param->qp_num = ch->qp->qp_num; rep_param->private_data = (void *)rsp; - rep_param->private_data_len = sizeof *rsp; + rep_param->private_data_len = sizeof(*rsp); rep_param->rnr_retry_count = 7; rep_param->flow_control = 1; rep_param->failover_accepted = 0; @@ -2468,14 +2255,14 @@ try_again: goto release_channel; } - spin_lock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); list_add_tail(&ch->list, &sdev->rch_list); - spin_unlock_irq(&sdev->spinlock); + mutex_unlock(&sdev->mutex); goto out; release_channel: - srpt_set_ch_state(ch, CH_RELEASING); + srpt_disconnect_ch(ch); transport_deregister_session_configfs(ch->sess); transport_deregister_session(ch->sess); ch->sess = NULL; @@ -2497,7 +2284,7 @@ reject: | SRP_BUF_FORMAT_INDIRECT); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, - (void *)rej, sizeof *rej); + (void *)rej, sizeof(*rej)); out: kfree(rep_param); @@ -2507,10 +2294,23 @@ out: return ret; } -static void srpt_cm_rej_recv(struct ib_cm_id *cm_id) +static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch, + enum ib_cm_rej_reason reason, + const u8 *private_data, + u8 private_data_len) { - pr_info("Received IB REJ for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); + char *priv = NULL; + int i; + + if (private_data_len && (priv = kmalloc(private_data_len * 3 + 1, + GFP_KERNEL))) { + for (i = 0; i < private_data_len; i++) + sprintf(priv + 3 * i, " %02x", private_data[i]); + } + pr_info("Received CM REJ for ch %s-%d; reason %d%s%s.\n", + ch->sess_name, ch->qp->qp_num, reason, private_data_len ? + "; private data" : "", priv ? priv : " (?)"); + kfree(priv); } /** @@ -2519,87 +2319,23 @@ static void srpt_cm_rej_recv(struct ib_cm_id *cm_id) * An IB_CM_RTU_RECEIVED message indicates that the connection is established * and that the recipient may begin transmitting (RTU = ready to use). */ -static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id) +static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch) { - struct srpt_rdma_ch *ch; int ret; - ch = srpt_find_channel(cm_id->context, cm_id); - BUG_ON(!ch); - - if (srpt_test_and_set_ch_state(ch, CH_CONNECTING, CH_LIVE)) { - struct srpt_recv_ioctx *ioctx, *ioctx_tmp; - + if (srpt_set_ch_state(ch, CH_LIVE)) { ret = srpt_ch_qp_rts(ch, ch->qp); - list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list, - wait_list) { - list_del(&ioctx->wait_list); - srpt_handle_new_iu(ch, ioctx, NULL); - } - if (ret) + if (ret == 0) { + /* Trigger wait list processing. */ + ret = srpt_zerolength_write(ch); + WARN_ONCE(ret < 0, "%d\n", ret); + } else { srpt_close_ch(ch); + } } } -static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id) -{ - pr_info("Received IB TimeWait exit for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); -} - -static void srpt_cm_rep_error(struct ib_cm_id *cm_id) -{ - pr_info("Received IB REP error for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); -} - -/** - * srpt_cm_dreq_recv() - Process reception of a DREQ message. - */ -static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id) -{ - struct srpt_rdma_ch *ch; - unsigned long flags; - bool send_drep = false; - - ch = srpt_find_channel(cm_id->context, cm_id); - BUG_ON(!ch); - - pr_debug("cm_id= %p ch->state= %d\n", cm_id, srpt_get_ch_state(ch)); - - spin_lock_irqsave(&ch->spinlock, flags); - switch (ch->state) { - case CH_CONNECTING: - case CH_LIVE: - send_drep = true; - ch->state = CH_DISCONNECTING; - break; - case CH_DISCONNECTING: - case CH_DRAINING: - case CH_RELEASING: - WARN(true, "unexpected channel state %d\n", ch->state); - break; - } - spin_unlock_irqrestore(&ch->spinlock, flags); - - if (send_drep) { - if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0) - pr_err("Sending IB DREP failed.\n"); - pr_info("Received DREQ and sent DREP for session %s.\n", - ch->sess_name); - } -} - -/** - * srpt_cm_drep_recv() - Process reception of a DREP message. - */ -static void srpt_cm_drep_recv(struct ib_cm_id *cm_id) -{ - pr_info("Received InfiniBand DREP message for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); -} - /** * srpt_cm_handler() - IB connection manager callback function. * @@ -2612,6 +2348,7 @@ static void srpt_cm_drep_recv(struct ib_cm_id *cm_id) */ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) { + struct srpt_rdma_ch *ch = cm_id->context; int ret; ret = 0; @@ -2621,32 +2358,39 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) event->private_data); break; case IB_CM_REJ_RECEIVED: - srpt_cm_rej_recv(cm_id); + srpt_cm_rej_recv(ch, event->param.rej_rcvd.reason, + event->private_data, + IB_CM_REJ_PRIVATE_DATA_SIZE); break; case IB_CM_RTU_RECEIVED: case IB_CM_USER_ESTABLISHED: - srpt_cm_rtu_recv(cm_id); + srpt_cm_rtu_recv(ch); break; case IB_CM_DREQ_RECEIVED: - srpt_cm_dreq_recv(cm_id); + srpt_disconnect_ch(ch); break; case IB_CM_DREP_RECEIVED: - srpt_cm_drep_recv(cm_id); + pr_info("Received CM DREP message for ch %s-%d.\n", + ch->sess_name, ch->qp->qp_num); + srpt_close_ch(ch); break; case IB_CM_TIMEWAIT_EXIT: - srpt_cm_timewait_exit(cm_id); + pr_info("Received CM TimeWait exit for ch %s-%d.\n", + ch->sess_name, ch->qp->qp_num); + srpt_close_ch(ch); break; case IB_CM_REP_ERROR: - srpt_cm_rep_error(cm_id); + pr_info("Received CM REP error for ch %s-%d.\n", ch->sess_name, + ch->qp->qp_num); break; case IB_CM_DREQ_ERROR: - pr_info("Received IB DREQ ERROR event.\n"); + pr_info("Received CM DREQ ERROR event.\n"); break; case IB_CM_MRA_RECEIVED: - pr_info("Received IB MRA event\n"); + pr_info("Received CM MRA event\n"); break; default: - pr_err("received unrecognized IB CM event %d\n", event->event); + pr_err("received unrecognized CM event %d\n", event->event); break; } @@ -2755,41 +2499,14 @@ static int srpt_write_pending_status(struct se_cmd *se_cmd) */ static int srpt_write_pending(struct se_cmd *se_cmd) { - struct srpt_rdma_ch *ch; - struct srpt_send_ioctx *ioctx; + struct srpt_send_ioctx *ioctx = + container_of(se_cmd, struct srpt_send_ioctx, cmd); + struct srpt_rdma_ch *ch = ioctx->ch; enum srpt_command_state new_state; - enum rdma_ch_state ch_state; - int ret; - - ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA); WARN_ON(new_state == SRPT_STATE_DONE); - - ch = ioctx->ch; - BUG_ON(!ch); - - ch_state = srpt_get_ch_state(ch); - switch (ch_state) { - case CH_CONNECTING: - WARN(true, "unexpected channel state %d\n", ch_state); - ret = -EINVAL; - goto out; - case CH_LIVE: - break; - case CH_DISCONNECTING: - case CH_DRAINING: - case CH_RELEASING: - pr_debug("cmd with tag %lld: channel disconnecting\n", - ioctx->cmd.tag); - srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN); - ret = -EINVAL; - goto out; - } - ret = srpt_xfer_data(ch, ioctx); - -out: - return ret; + return srpt_xfer_data(ch, ioctx); } static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status) @@ -2920,36 +2637,25 @@ static void srpt_refresh_port_work(struct work_struct *work) srpt_refresh_port(sport); } -static int srpt_ch_list_empty(struct srpt_device *sdev) -{ - int res; - - spin_lock_irq(&sdev->spinlock); - res = list_empty(&sdev->rch_list); - spin_unlock_irq(&sdev->spinlock); - - return res; -} - /** * srpt_release_sdev() - Free the channel resources associated with a target. */ static int srpt_release_sdev(struct srpt_device *sdev) { - struct srpt_rdma_ch *ch, *tmp_ch; - int res; + int i, res; WARN_ON_ONCE(irqs_disabled()); BUG_ON(!sdev); - spin_lock_irq(&sdev->spinlock); - list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) - __srpt_close_ch(ch); - spin_unlock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); + for (i = 0; i < ARRAY_SIZE(sdev->port); i++) + sdev->port[i].enabled = false; + __srpt_close_all_ch(sdev); + mutex_unlock(&sdev->mutex); res = wait_event_interruptible(sdev->ch_releaseQ, - srpt_ch_list_empty(sdev)); + list_empty_careful(&sdev->rch_list)); if (res) pr_err("%s: interrupted.\n", __func__); @@ -3003,14 +2709,14 @@ static void srpt_add_one(struct ib_device *device) pr_debug("device = %p, device->dma_ops = %p\n", device, device->dma_ops); - sdev = kzalloc(sizeof *sdev, GFP_KERNEL); + sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); if (!sdev) goto err; sdev->device = device; INIT_LIST_HEAD(&sdev->rch_list); init_waitqueue_head(&sdev->ch_releaseQ); - spin_lock_init(&sdev->spinlock); + mutex_init(&sdev->mutex); sdev->pd = ib_alloc_pd(device); if (IS_ERR(sdev->pd)) @@ -3082,7 +2788,7 @@ static void srpt_add_one(struct ib_device *device) if (srpt_refresh_port(sport)) { pr_err("MAD registration failed for %s-%d.\n", - srpt_sdev_name(sdev), i); + sdev->device->name, i); goto err_ring; } snprintf(sport->port_guid, sizeof(sport->port_guid), @@ -3231,24 +2937,26 @@ static void srpt_release_cmd(struct se_cmd *se_cmd) static void srpt_close_session(struct se_session *se_sess) { DECLARE_COMPLETION_ONSTACK(release_done); - struct srpt_rdma_ch *ch; - struct srpt_device *sdev; - unsigned long res; - - ch = se_sess->fabric_sess_ptr; - WARN_ON(ch->sess != se_sess); + struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr; + struct srpt_device *sdev = ch->sport->sdev; + bool wait; - pr_debug("ch %p state %d\n", ch, srpt_get_ch_state(ch)); + pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, + ch->state); - sdev = ch->sport->sdev; - spin_lock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); BUG_ON(ch->release_done); ch->release_done = &release_done; - __srpt_close_ch(ch); - spin_unlock_irq(&sdev->spinlock); + wait = !list_empty(&ch->list); + srpt_disconnect_ch(ch); + mutex_unlock(&sdev->mutex); - res = wait_for_completion_timeout(&release_done, 60 * HZ); - WARN_ON(res == 0); + if (!wait) + return; + + while (wait_for_completion_timeout(&release_done, 180 * HZ) == 0) + pr_info("%s(%s-%d state %d): still waiting ...\n", __func__, + ch->sess_name, ch->qp->qp_num, ch->state); } /** @@ -3456,6 +3164,8 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item, { struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + struct srpt_device *sdev = sport->sdev; + struct srpt_rdma_ch *ch; unsigned long tmp; int ret; @@ -3469,11 +3179,24 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item, pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp); return -EINVAL; } - if (tmp == 1) - sport->enabled = true; - else - sport->enabled = false; + if (sport->enabled == tmp) + goto out; + sport->enabled = tmp; + if (sport->enabled) + goto out; + + mutex_lock(&sdev->mutex); + list_for_each_entry(ch, &sdev->rch_list, list) { + if (ch->sport == sport) { + pr_debug("%s: ch %p %s-%d\n", __func__, ch, + ch->sess_name, ch->qp->qp_num); + srpt_disconnect_ch(ch); + srpt_close_ch(ch); + } + } + mutex_unlock(&sdev->mutex); +out: return count; } @@ -3565,7 +3288,6 @@ static struct configfs_attribute *srpt_wwn_attrs[] = { static const struct target_core_fabric_ops srpt_template = { .module = THIS_MODULE, .name = "srpt", - .node_acl_size = sizeof(struct srpt_node_acl), .get_fabric_name = srpt_get_fabric_name, .tpg_get_wwn = srpt_get_fabric_wwn, .tpg_get_tag = srpt_get_tag, diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 09037f2b0b51..af9b8b527340 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -218,20 +218,20 @@ struct srpt_send_ioctx { /** * enum rdma_ch_state - SRP channel state. - * @CH_CONNECTING: QP is in RTR state; waiting for RTU. - * @CH_LIVE: QP is in RTS state. - * @CH_DISCONNECTING: DREQ has been received; waiting for DREP - * or DREQ has been send and waiting for DREP - * or . - * @CH_DRAINING: QP is in ERR state; waiting for last WQE event. - * @CH_RELEASING: Last WQE event has been received; releasing resources. + * @CH_CONNECTING: QP is in RTR state; waiting for RTU. + * @CH_LIVE: QP is in RTS state. + * @CH_DISCONNECTING: DREQ has been sent and waiting for DREP or DREQ has + * been received. + * @CH_DRAINING: DREP has been received or waiting for DREP timed out + * and last work request has been queued. + * @CH_DISCONNECTED: Last completion has been received. */ enum rdma_ch_state { CH_CONNECTING, CH_LIVE, CH_DISCONNECTING, CH_DRAINING, - CH_RELEASING + CH_DISCONNECTED, }; /** @@ -267,6 +267,8 @@ struct srpt_rdma_ch { struct ib_cm_id *cm_id; struct ib_qp *qp; struct ib_cq *cq; + struct ib_cqe zw_cqe; + struct kref kref; int rq_size; u32 rsp_size; atomic_t sq_wr_avail; @@ -286,7 +288,6 @@ struct srpt_rdma_ch { u8 sess_name[36]; struct work_struct release_work; struct completion *release_done; - bool in_shutdown; }; /** @@ -343,7 +344,7 @@ struct srpt_port { * @ioctx_ring: Per-HCA SRQ. * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list. * @ch_releaseQ: Enables waiting for removal from rch_list. - * @spinlock: Protects rch_list and tpg. + * @mutex: Protects rch_list. * @port: Information about the ports owned by this HCA. * @event_handler: Per-HCA asynchronous IB event handler. * @list: Node in srpt_dev_list. @@ -357,18 +358,10 @@ struct srpt_device { struct srpt_recv_ioctx **ioctx_ring; struct list_head rch_list; wait_queue_head_t ch_releaseQ; - spinlock_t spinlock; + struct mutex mutex; struct srpt_port port[2]; struct ib_event_handler event_handler; struct list_head list; }; -/** - * struct srpt_node_acl - Per-initiator ACL data (managed via configfs). - * @nacl: Target core node ACL information. - */ -struct srpt_node_acl { - struct se_node_acl nacl; -}; - #endif /* IB_SRPT_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index a072d341e205..1d2d1da40c80 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -1021,6 +1021,8 @@ struct cpl_l2t_write_req { #define L2T_W_NOREPLY_V(x) ((x) << L2T_W_NOREPLY_S) #define L2T_W_NOREPLY_F L2T_W_NOREPLY_V(1U) +#define CPL_L2T_VLAN_NONE 0xfff + struct cpl_l2t_write_rpl { union opcode_tid ot; u8 status; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index a32de30ea663..c8661c77b4e3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -561,6 +561,7 @@ enum fw_flowc_mnem { FW_FLOWC_MNEM_SNDBUF, FW_FLOWC_MNEM_MSS, FW_FLOWC_MNEM_TXDATAPLEN_MAX, + FW_FLOWC_MNEM_SCHEDCLASS = 11, }; struct fw_flowc_mnemval { diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d66c690a8597..e97094598b2d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -157,7 +157,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [29] = "802.1ad offload support", [31] = "Modifying loopback source checks using UPDATE_QP support", [32] = "Loopback source checks support", - [33] = "RoCEv2 support" + [33] = "RoCEv2 support", + [34] = "DMFS Sniffer support (UC & MC)" }; int i; @@ -810,6 +811,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; + if (field & 0x20) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER; MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_BEACON_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_BEACON; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 1d4e2e054647..42d8de892bfe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -752,8 +752,10 @@ static const u8 __promisc_mode[] = { [MLX4_FS_REGULAR] = 0x0, [MLX4_FS_ALL_DEFAULT] = 0x1, [MLX4_FS_MC_DEFAULT] = 0x3, - [MLX4_FS_UC_SNIFFER] = 0x4, - [MLX4_FS_MC_SNIFFER] = 0x5, + [MLX4_FS_MIRROR_RX_PORT] = 0x4, + [MLX4_FS_MIRROR_SX_PORT] = 0x5, + [MLX4_FS_UC_SNIFFER] = 0x6, + [MLX4_FS_MC_SNIFFER] = 0x7, }; int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5b1753233c5d..81b2013ef968 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -516,7 +516,7 @@ struct mlx5e_priv { struct mlx5_uar cq_uar; u32 pdn; u32 tdn; - struct mlx5_core_mr mr; + struct mlx5_core_mkey mkey; struct mlx5e_rq drop_rq; struct mlx5e_channel **channel; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 402994bf7e16..0c49951606b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -973,7 +973,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mr.key); + c->mkey_be = cpu_to_be32(priv->mkey.key); c->num_tc = priv->params.num_tc; mlx5e_build_channeltc_to_txq_map(priv, ix); @@ -2204,7 +2204,7 @@ static void mlx5e_build_netdev(struct net_device *netdev) } static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, - struct mlx5_core_mr *mr) + struct mlx5_core_mkey *mkey) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_create_mkey_mbox_in *in; @@ -2220,7 +2220,7 @@ static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL, + err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, NULL); kvfree(in); @@ -2269,7 +2269,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_dealloc_pd; } - err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr); + err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey); if (err) { mlx5_core_err(mdev, "create mkey failed, %d\n", err); goto err_dealloc_transport_domain; @@ -2343,7 +2343,7 @@ err_destroy_tises: mlx5e_destroy_tises(priv); err_destroy_mkey: - mlx5_core_destroy_mkey(mdev, &priv->mr); + mlx5_core_destroy_mkey(mdev, &priv->mkey); err_dealloc_transport_domain: mlx5_core_dealloc_transport_domain(mdev, priv->tdn); @@ -2377,7 +2377,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); mlx5e_close_drop_rq(priv); mlx5e_destroy_tises(priv); - mlx5_core_destroy_mkey(priv->mdev, &priv->mr); + mlx5_core_destroy_mkey(priv->mdev, &priv->mkey); mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); mlx5_core_dealloc_pd(priv->mdev, priv->pdn); mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 6f68dba8d7ed..bf3446794bd5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -77,6 +77,9 @@ #define KERNEL_NUM_PRIOS 1 #define KENREL_MIN_LEVEL 2 +#define ANCHOR_MAX_FT 1 +#define ANCHOR_NUM_PRIOS 1 +#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) struct node_caps { size_t arr_sz; long *caps; @@ -92,7 +95,7 @@ static struct init_tree_node { int max_ft; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 3, + .ar_size = 4, .children = (struct init_tree_node[]) { ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), @@ -108,6 +111,8 @@ static struct init_tree_node { FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))), + ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_MAX_FT))), } }; @@ -196,8 +201,10 @@ static void tree_put_node(struct fs_node *node) static int tree_remove_node(struct fs_node *node) { - if (atomic_read(&node->refcount) > 1) - return -EPERM; + if (atomic_read(&node->refcount) > 1) { + atomic_dec(&node->refcount); + return -EEXIST; + } tree_put_node(node); return 0; } @@ -360,6 +367,11 @@ static void del_rule(struct fs_node *node) memcpy(match_value, fte->val, sizeof(fte->val)); fs_get_obj(ft, fg->node.parent); list_del(&rule->node.list); + if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + mutex_lock(&rule->dest_attr.ft->lock); + list_del(&rule->next_ft); + mutex_unlock(&rule->dest_attr.ft->lock); + } fte->dests_size--; if (fte->dests_size) { err = mlx5_cmd_update_fte(dev, ft, @@ -465,6 +477,8 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte, ft->node.type = FS_TYPE_FLOW_TABLE; ft->type = table_type; ft->max_fte = max_fte; + INIT_LIST_HEAD(&ft->fwd_rules); + mutex_init(&ft->lock); return ft; } @@ -601,9 +615,63 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio return err; } +static int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct fs_fte *fte; + int err = 0; + + fs_get_obj(fte, rule->node.parent); + if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return -EINVAL; + lock_ref_node(&fte->node); + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + + memcpy(&rule->dest_attr, dest, sizeof(*dest)); + err = mlx5_cmd_update_fte(get_dev(&ft->node), + ft, fg->id, fte); + unlock_ref_node(&fte->node); + + return err; +} + +/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */ +static int connect_fwd_rules(struct mlx5_core_dev *dev, + struct mlx5_flow_table *new_next_ft, + struct mlx5_flow_table *old_next_ft) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *iter; + int err = 0; + + /* new_next_ft and old_next_ft could be NULL only + * when we create/destroy the anchor flow table. + */ + if (!new_next_ft || !old_next_ft) + return 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = new_next_ft; + + mutex_lock(&old_next_ft->lock); + list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules); + mutex_unlock(&old_next_ft->lock); + list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) { + err = mlx5_modify_rule_destination(iter, &dest); + if (err) + pr_err("mlx5_core: failed to modify rule to point on flow table %d\n", + new_next_ft->id); + } + return 0; +} + static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct fs_prio *prio) { + struct mlx5_flow_table *next_ft; int err = 0; /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ @@ -612,6 +680,11 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table err = connect_prev_fts(dev, ft, prio); if (err) return err; + + next_ft = find_next_chained_ft(prio); + err = connect_fwd_rules(dev, ft, next_ft); + if (err) + return err; } if (MLX5_CAP_FLOWTABLE(dev, @@ -762,6 +835,7 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest) if (!rule) return NULL; + INIT_LIST_HEAD(&rule->next_ft); rule->node.type = FS_TYPE_FLOW_DEST; memcpy(&rule->dest_attr, dest, sizeof(*dest)); @@ -782,9 +856,14 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte, return ERR_PTR(-ENOMEM); fs_get_obj(ft, fg->node.parent); - /* Add dest to dests list- added as first element after the head */ + /* Add dest to dests list- we need flow tables to be in the + * end of the list for forward to next prio rules. + */ tree_init_node(&rule->node, 1, del_rule); - list_add_tail(&rule->node.list, &fte->node.children); + if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + list_add(&rule->node.list, &fte->node.children); + else + list_add_tail(&rule->node.list, &fte->node.children); fte->dests_size++; if (fte->dests_size == 1) err = mlx5_cmd_create_fte(get_dev(&ft->node), @@ -903,6 +982,25 @@ out: return fg; } +static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + + list_for_each_entry(rule, &fte->node.children, node.list) { + if (rule->dest_attr.type == dest->type) { + if ((dest->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + dest->vport_num == rule->dest_attr.vport_num) || + (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + dest->ft == rule->dest_attr.ft) || + (dest->type == MLX5_FLOW_DESTINATION_TYPE_TIR && + dest->tir_num == rule->dest_attr.tir_num)) + return rule; + } + } + return NULL; +} + static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, u32 *match_value, u8 action, @@ -919,6 +1017,13 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); if (compare_match_value(&fg->mask, match_value, &fte->val) && action == fte->action && flow_tag == fte->flow_tag) { + rule = find_flow_rule(fte, dest); + if (rule) { + atomic_inc(&rule->node.refcount); + unlock_ref_node(&fte->node); + unlock_ref_node(&fg->node); + return rule; + } rule = add_rule_fte(fte, fg, dest); unlock_ref_node(&fte->node); if (IS_ERR(rule)) @@ -984,14 +1089,14 @@ static struct mlx5_flow_rule *add_rule_to_auto_fg(struct mlx5_flow_table *ft, return rule; } -struct mlx5_flow_rule * -mlx5_add_flow_rule(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, - u32 action, - u32 flow_tag, - struct mlx5_flow_destination *dest) +static struct mlx5_flow_rule * +_mlx5_add_flow_rule(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) { struct mlx5_flow_group *g; struct mlx5_flow_rule *rule; @@ -1014,6 +1119,63 @@ unlock: unlock_ref_node(&ft->node); return rule; } + +static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) +{ + return ((ft->type == FS_FT_NIC_RX) && + (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs))); +} + +struct mlx5_flow_rule * +mlx5_add_flow_rule(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_flow_destination gen_dest; + struct mlx5_flow_table *next_ft = NULL; + struct mlx5_flow_rule *rule = NULL; + u32 sw_action = action; + struct fs_prio *prio; + + fs_get_obj(prio, ft->node.parent); + if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + if (!fwd_next_prio_supported(ft)) + return ERR_PTR(-EOPNOTSUPP); + if (dest) + return ERR_PTR(-EINVAL); + mutex_lock(&root->chain_lock); + next_ft = find_next_chained_ft(prio); + if (next_ft) { + gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + gen_dest.ft = next_ft; + dest = &gen_dest; + action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else { + mutex_unlock(&root->chain_lock); + return ERR_PTR(-EOPNOTSUPP); + } + } + + rule = _mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, + match_value, action, flow_tag, dest); + + if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + if (!IS_ERR_OR_NULL(rule) && + (list_empty(&rule->next_ft))) { + mutex_lock(&next_ft->lock); + list_add(&rule->next_ft, &next_ft->fwd_rules); + mutex_unlock(&next_ft->lock); + rule->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + } + mutex_unlock(&root->chain_lock); + } + return rule; +} EXPORT_SYMBOL(mlx5_add_flow_rule); void mlx5_del_flow_rule(struct mlx5_flow_rule *rule) @@ -1077,6 +1239,10 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft) return 0; next_ft = find_next_chained_ft(prio); + err = connect_fwd_rules(dev, next_ft, ft); + if (err) + return err; + err = connect_prev_fts(dev, next_ft, prio); if (err) mlx5_core_warn(dev, "Failed to disconnect flow table %d\n", @@ -1126,6 +1292,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, case MLX5_FLOW_NAMESPACE_BYPASS: case MLX5_FLOW_NAMESPACE_KERNEL: case MLX5_FLOW_NAMESPACE_LEFTOVERS: + case MLX5_FLOW_NAMESPACE_ANCHOR: prio = type; break; case MLX5_FLOW_NAMESPACE_FDB: @@ -1351,6 +1518,25 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) } } +#define ANCHOR_PRIO 0 +#define ANCHOR_SIZE 1 +static int create_anchor_flow_table(struct mlx5_core_dev + *dev) +{ + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_flow_table *ft; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR); + if (!ns) + return -EINVAL; + ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE); + if (IS_ERR(ft)) { + mlx5_core_err(dev, "Failed to create last anchor flow table"); + return PTR_ERR(ft); + } + return 0; +} + static int init_root_ns(struct mlx5_core_dev *dev) { @@ -1363,6 +1549,9 @@ static int init_root_ns(struct mlx5_core_dev *dev) set_prio_attrs(dev->priv.root_ns); + if (create_anchor_flow_table(dev)) + goto cleanup; + return 0; cleanup: @@ -1392,6 +1581,15 @@ static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev, root_ns = NULL; } +static void destroy_flow_tables(struct fs_prio *prio) +{ + struct mlx5_flow_table *iter; + struct mlx5_flow_table *tmp; + + fs_for_each_ft_safe(iter, tmp, prio) + mlx5_destroy_flow_table(iter); +} + static void cleanup_root_ns(struct mlx5_core_dev *dev) { struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; @@ -1420,6 +1618,7 @@ static void cleanup_root_ns(struct mlx5_core_dev *dev) list); fs_get_obj(obj_iter_prio2, iter_prio2); + destroy_flow_tables(obj_iter_prio2); if (tree_remove_node(iter_prio2)) { mlx5_core_warn(dev, "Priority %d wasn't destroyed, refcount > 1\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 00245fd7e4bc..f37a6248a27b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -68,6 +68,11 @@ struct fs_node { struct mlx5_flow_rule { struct fs_node node; struct mlx5_flow_destination dest_attr; + /* next_ft should be accessed under chain_lock and only of + * destination type is FWD_NEXT_fT. + */ + struct list_head next_ft; + u32 sw_action; }; /* Type of children is mlx5_flow_group */ @@ -82,6 +87,10 @@ struct mlx5_flow_table { unsigned int required_groups; unsigned int num_groups; } autogroup; + /* Protect fwd_rules */ + struct mutex lock; + /* FWD rules that point on this flow table */ + struct list_head fwd_rules; }; /* Type of children is mlx5_flow_rule */ @@ -142,6 +151,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev); #define fs_list_for_each_entry(pos, root) \ list_for_each_entry(pos, root, node.list) +#define fs_list_for_each_entry_safe(pos, tmp, root) \ + list_for_each_entry_safe(pos, tmp, root, node.list) + #define fs_for_each_ns_or_ft_reverse(pos, prio) \ list_for_each_entry_reverse(pos, &(prio)->node.children, list) @@ -157,6 +169,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev); #define fs_for_each_ft(pos, prio) \ fs_list_for_each_entry(pos, &(prio)->node.children) +#define fs_for_each_ft_safe(pos, tmp, prio) \ + fs_list_for_each_entry_safe(pos, tmp, &(prio)->node.children) + #define fs_for_each_fg(pos, ft) \ fs_list_for_each_entry(pos, &(ft)->node.children) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 1545a944c309..0916bbc69269 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1117,7 +1117,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_cq_table(dev); mlx5_init_qp_table(dev); mlx5_init_srq_table(dev); - mlx5_init_mr_table(dev); + mlx5_init_mkey_table(dev); err = mlx5_init_fs(dev); if (err) { @@ -1164,7 +1164,7 @@ err_sriov: err_reg_dev: mlx5_cleanup_fs(dev); err_fs: - mlx5_cleanup_mr_table(dev); + mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cleanup_cq_table(dev); @@ -1237,7 +1237,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) #endif mlx5_cleanup_fs(dev); - mlx5_cleanup_mr_table(dev); + mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cleanup_cq_table(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 6fa22b51e460..77a7293921d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -36,25 +36,26 @@ #include <linux/mlx5/cmd.h> #include "mlx5_core.h" -void mlx5_init_mr_table(struct mlx5_core_dev *dev) +void mlx5_init_mkey_table(struct mlx5_core_dev *dev) { - struct mlx5_mr_table *table = &dev->priv.mr_table; + struct mlx5_mkey_table *table = &dev->priv.mkey_table; memset(table, 0, sizeof(*table)); rwlock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); } -void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev) +void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) { } -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, struct mlx5_create_mkey_mbox_in *in, int inlen, mlx5_cmd_cbk_t callback, void *context, struct mlx5_create_mkey_mbox_out *out) { - struct mlx5_mr_table *table = &dev->priv.mr_table; + struct mlx5_mkey_table *table = &dev->priv.mkey_table; struct mlx5_create_mkey_mbox_out lout; int err; u8 key; @@ -83,34 +84,35 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, return mlx5_cmd_status_to_err(&lout.hdr); } - mr->iova = be64_to_cpu(in->seg.start_addr); - mr->size = be64_to_cpu(in->seg.len); - mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; - mr->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; + mkey->iova = be64_to_cpu(in->seg.start_addr); + mkey->size = be64_to_cpu(in->seg.len); + mkey->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; + mkey->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", - be32_to_cpu(lout.mkey), key, mr->key); + be32_to_cpu(lout.mkey), key, mkey->key); - /* connect to MR tree */ + /* connect to mkey tree */ write_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->key), mr); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey); write_unlock_irq(&table->lock); if (err) { - mlx5_core_warn(dev, "failed radix tree insert of mr 0x%x, %d\n", - mlx5_base_mkey(mr->key), err); - mlx5_core_destroy_mkey(dev, mr); + mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n", + mlx5_base_mkey(mkey->key), err); + mlx5_core_destroy_mkey(dev, mkey); } return err; } EXPORT_SYMBOL(mlx5_core_create_mkey); -int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey) { - struct mlx5_mr_table *table = &dev->priv.mr_table; + struct mlx5_mkey_table *table = &dev->priv.mkey_table; struct mlx5_destroy_mkey_mbox_in in; struct mlx5_destroy_mkey_mbox_out out; - struct mlx5_core_mr *deleted_mr; + struct mlx5_core_mkey *deleted_mkey; unsigned long flags; int err; @@ -118,16 +120,16 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) memset(&out, 0, sizeof(out)); write_lock_irqsave(&table->lock, flags); - deleted_mr = radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key)); + deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); write_unlock_irqrestore(&table->lock, flags); - if (!deleted_mr) { - mlx5_core_warn(dev, "failed radix tree delete of mr 0x%x\n", - mlx5_base_mkey(mr->key)); + if (!deleted_mkey) { + mlx5_core_warn(dev, "failed radix tree delete of mkey 0x%x\n", + mlx5_base_mkey(mkey->key)); return -ENOENT; } in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY); - in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); + in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key)); err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); if (err) return err; @@ -139,7 +141,7 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) } EXPORT_SYMBOL(mlx5_core_destroy_mkey); -int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, struct mlx5_query_mkey_mbox_out *out, int outlen) { struct mlx5_query_mkey_mbox_in in; @@ -149,7 +151,7 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, memset(out, 0, outlen); in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY); - in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); + in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key)); err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); if (err) return err; @@ -161,7 +163,7 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, } EXPORT_SYMBOL(mlx5_core_query_mkey); -int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, u32 *mkey) { struct mlx5_query_special_ctxs_mbox_in in; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index a87e773e93f3..5635ce7ad693 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -324,6 +324,29 @@ int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap); +int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, + u8 port_num, void *out, size_t sz) +{ + u32 *in; + int err; + + in = mlx5_vzalloc(sz); + if (!in) { + err = -ENOMEM; + return err; + } + + MLX5_SET(ppcnt_reg, in, local_port, port_num); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP); + err = mlx5_core_access_reg(dev, in, sz, out, + sz, MLX5_REG_PPCNT, 0, 0); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt); + int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) { u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index c7398b95aecd..90ab09e375b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -850,3 +850,43 @@ int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev) return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); } EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce); + +int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, void *out, size_t out_sz) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in); + int is_group_manager; + void *in; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + in = mlx5_vzalloc(in_sz); + if (!in) { + err = -ENOMEM; + return err; + } + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + MLX5_SET(query_vport_counter_in, in, vport_number, 0); + } else { + err = -EPERM; + goto free; + } + } + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_vport_counter_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto free; + err = mlx5_cmd_status_to_err_v2(out); + +free: + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index a0e8cc8dcc67..8541a913f6a3 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -219,6 +219,7 @@ enum { MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31, MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, + MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, }; enum { @@ -1160,6 +1161,8 @@ enum mlx4_net_trans_promisc_mode { MLX4_FS_REGULAR = 1, MLX4_FS_ALL_DEFAULT, MLX4_FS_MC_DEFAULT, + MLX4_FS_MIRROR_RX_PORT, + MLX4_FS_MIRROR_SX_PORT, MLX4_FS_UC_SNIFFER, MLX4_FS_MC_SNIFFER, MLX4_FS_MODE_NUM, /* should be last */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 987764afa65c..9566b3b3b2c5 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -105,6 +105,29 @@ __mlx5_mask(typ, fld)) ___t; \ }) +/* Big endian getters */ +#define MLX5_GET64_BE(typ, p, fld) (*((__be64 *)(p) +\ + __mlx5_64_off(typ, fld))) + +#define MLX5_GET_BE(type_t, typ, p, fld) ({ \ + type_t tmp; \ + switch (sizeof(tmp)) { \ + case sizeof(u8): \ + tmp = (__force type_t)MLX5_GET(typ, p, fld); \ + break; \ + case sizeof(u16): \ + tmp = (__force type_t)cpu_to_be16(MLX5_GET(typ, p, fld)); \ + break; \ + case sizeof(u32): \ + tmp = (__force type_t)cpu_to_be32(MLX5_GET(typ, p, fld)); \ + break; \ + case sizeof(u64): \ + tmp = (__force type_t)MLX5_GET64_BE(typ, p, fld); \ + break; \ + } \ + tmp; \ + }) + enum { MLX5_MAX_COMMANDS = 32, MLX5_CMD_DATA_BLOCK_SIZE = 512, @@ -1284,7 +1307,8 @@ enum { MLX5_RFC_3635_COUNTERS_GROUP = 0x3, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5, MLX5_PER_PRIORITY_COUNTERS_GROUP = 0x10, - MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11 + MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11, + MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) @@ -1294,6 +1318,11 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } -#define MLX5_BY_PASS_NUM_PRIOS 9 +#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 8 +#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 8 +#define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1 +#define MLX5_BY_PASS_NUM_PRIOS (MLX5_BY_PASS_NUM_REGULAR_PRIOS +\ + MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\ + MLX5_BY_PASS_NUM_MULTICAST_PRIOS) #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1e3006dcf35d..9108904a6a56 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -338,7 +338,7 @@ struct mlx5_core_sig_ctx { u32 sigerr_count; }; -struct mlx5_core_mr { +struct mlx5_core_mkey { u64 iova; u64 size; u32 key; @@ -426,7 +426,7 @@ struct mlx5_srq_table { struct radix_tree_root tree; }; -struct mlx5_mr_table { +struct mlx5_mkey_table { /* protect radix tree */ rwlock_t lock; @@ -484,9 +484,9 @@ struct mlx5_priv { struct mlx5_cq_table cq_table; /* end: cq staff */ - /* start: mr staff */ - struct mlx5_mr_table mr_table; - /* end: mr staff */ + /* start: mkey staff */ + struct mlx5_mkey_table mkey_table; + /* end: mkey staff */ /* start: alloc staff */ /* protect buffer alocation according to numa node */ @@ -739,16 +739,18 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_query_srq_mbox_out *out); int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq); -void mlx5_init_mr_table(struct mlx5_core_dev *dev); -void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev); -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +void mlx5_init_mkey_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, struct mlx5_create_mkey_mbox_in *in, int inlen, mlx5_cmd_cbk_t callback, void *context, struct mlx5_create_mkey_mbox_out *out); -int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr); -int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey); +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, struct mlx5_query_mkey_mbox_out *out, int outlen); -int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, u32 *mkey); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); @@ -847,6 +849,8 @@ int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *odp_caps); +int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, + u8 port_num, void *out, size_t sz); static inline int fw_initializing(struct mlx5_core_dev *dev) { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 8230caa3fb6e..8dec5508d93d 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -38,6 +38,10 @@ #define MLX5_FS_DEFAULT_FLOW_TAG 0x0 +enum { + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, +}; + #define LEFTOVERS_RULE_NUM 2 static inline void build_leftovers_ft_param(int *priority, int *n_ent, @@ -52,6 +56,7 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_BYPASS, MLX5_FLOW_NAMESPACE_KERNEL, MLX5_FLOW_NAMESPACE_LEFTOVERS, + MLX5_FLOW_NAMESPACE_ANCHOR, MLX5_FLOW_NAMESPACE_FDB, }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 58eef02edc7e..9b8a02b7880f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -458,7 +458,8 @@ struct mlx5_ifc_ads_bits { }; struct mlx5_ifc_flow_table_nic_cap_bits { - u8 reserved_at_0[0x200]; + u8 nic_rx_multi_path_tirs[0x1]; + u8 reserved_at_1[0x1ff]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; @@ -736,7 +737,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; - u8 reserved_at_200[0xe]; + u8 reserved_at_200[0x3]; + u8 ipoib_basic_offloads[0x1]; + u8 reserved_at_204[0xa]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; @@ -767,10 +770,13 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cd[0x1]; u8 reserved_at_22c[0x1]; u8 apm[0x1]; - u8 reserved_at_22e[0x7]; + u8 reserved_at_22e[0x2]; + u8 imaicl[0x1]; + u8 reserved_at_231[0x4]; u8 qkv[0x1]; u8 pkv[0x1]; - u8 reserved_at_237[0x4]; + u8 set_deth_sqpn[0x1]; + u8 reserved_at_239[0x3]; u8 xrc[0x1]; u8 ud[0x1]; u8 uc[0x1]; @@ -1208,6 +1214,36 @@ struct mlx5_ifc_phys_layer_cntrs_bits { u8 reserved_at_640[0x180]; }; +struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits { + u8 symbol_error_counter[0x10]; + + u8 link_error_recovery_counter[0x8]; + + u8 link_downed_counter[0x8]; + + u8 port_rcv_errors[0x10]; + + u8 port_rcv_remote_physical_errors[0x10]; + + u8 port_rcv_switch_relay_errors[0x10]; + + u8 port_xmit_discards[0x10]; + + u8 port_xmit_constraint_errors[0x8]; + + u8 port_rcv_constraint_errors[0x8]; + + u8 reserved_at_70[0x8]; + + u8 link_overrun_errors[0x8]; + + u8 reserved_at_80[0x10]; + + u8 vl_15_dropped[0x10]; + + u8 reserved_at_a0[0xa0]; +}; + struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits { u8 transmit_queue_high[0x20]; @@ -1780,7 +1816,7 @@ struct mlx5_ifc_qpc_bits { u8 log_sq_size[0x4]; u8 reserved_at_55[0x6]; u8 rlky[0x1]; - u8 reserved_at_5c[0x4]; + u8 ulp_stateless_offload_mode[0x4]; u8 counter_set_id[0x8]; u8 uar_page[0x18]; @@ -2618,6 +2654,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout; struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout; struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout; + struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout; struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; u8 reserved_at_0[0x7c0]; }; @@ -3126,7 +3163,8 @@ struct mlx5_ifc_query_vport_counter_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_at_41[0xf]; + u8 reserved_at_41[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_at_60[0x60]; @@ -6956,6 +6994,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_peir_reg_bits peir_reg; struct mlx5_ifc_pelc_reg_bits pelc_reg; struct mlx5_ifc_pfcc_reg_bits pfcc_reg; + struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout; struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; struct mlx5_ifc_pifr_reg_bits pifr_reg; struct mlx5_ifc_pipg_reg_bits pipg_reg; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 5b8c89ffaa58..cf031a3f16c5 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -499,7 +499,8 @@ struct mlx5_qp_context { u8 reserved2[4]; __be32 next_send_psn; __be32 cqn_send; - u8 reserved3[8]; + __be32 deth_sqpn; + u8 reserved3[4]; __be32 last_acked_psn; __be32 ssn; __be32 params2; @@ -621,9 +622,9 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); } -static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) +static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) { - return radix_tree_lookup(&dev->priv.mr_table.tree, key); + return radix_tree_lookup(&dev->priv.mkey_table.tree, key); } struct mlx5_page_fault_resume_mbox_in { diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 123771003e68..a9f2bcc98cab 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -92,5 +92,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); +int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, void *out, size_t out_sz); #endif /* __MLX5_VPORT_H__ */ diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 0ff049bd9ad4..37dd534cbeab 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -424,11 +424,11 @@ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, /** * ib_mad_snoop_handler - Callback handler for snooping sent MADs. * @mad_agent: MAD agent that snooped the MAD. - * @send_wr: Work request information on the sent MAD. + * @send_buf: send MAD data buffer. * @mad_send_wc: Work completion information on the sent MAD. Valid * only for snooping that occurs on a send completion. * - * Clients snooping MADs should not modify data referenced by the @send_wr + * Clients snooping MADs should not modify data referenced by the @send_buf * or @mad_send_wc. */ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 284b00c8fea4..3a03c1d18afa 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -212,6 +212,7 @@ enum ib_device_cap_flags { IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), + IB_DEVICE_SG_GAPS_REG = (1ULL << 32), }; enum ib_signature_prot_cap { @@ -662,10 +663,15 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); * @IB_MR_TYPE_SIGNATURE: memory region that is used for * signature operations (data-integrity * capable regions) + * @IB_MR_TYPE_SG_GAPS: memory region that is capable to + * register any arbitrary sg lists (without + * the normal mr constraints - see + * ib_map_mr_sg) */ enum ib_mr_type { IB_MR_TYPE_MEM_REG, IB_MR_TYPE_SIGNATURE, + IB_MR_TYPE_SG_GAPS, }; /** @@ -1487,6 +1493,11 @@ enum ib_flow_domain { IB_FLOW_DOMAIN_NUM /* Must be last */ }; +enum ib_flow_flags { + IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */ + IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 2 /* Must be last */ +}; + struct ib_flow_eth_filter { u8 dst_mac[6]; u8 src_mac[6]; @@ -1808,7 +1819,8 @@ struct ib_device { struct scatterlist *sg, int sg_nents); struct ib_mw * (*alloc_mw)(struct ib_pd *pd, - enum ib_mw_type type); + enum ib_mw_type type, + struct ib_udata *udata); int (*dealloc_mw)(struct ib_mw *mw); struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, @@ -1846,6 +1858,8 @@ struct ib_device { int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); + void (*drain_rq)(struct ib_qp *qp); + void (*drain_sq)(struct ib_qp *qp); struct ib_dma_mapping_ops *dma_ops; @@ -3094,4 +3108,7 @@ int ib_sg_to_pages(struct ib_mr *mr, int sg_nents, int (*set_page)(struct ib_mr *, u64)); +void ib_drain_rq(struct ib_qp *qp); +void ib_drain_sq(struct ib_qp *qp); +void ib_drain_qp(struct ib_qp *qp); #endif /* IB_VERBS_H */ diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 036bd2772662..6d0065c322b7 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -83,8 +83,10 @@ struct iw_cm_id { iw_cm_handler cm_handler; /* client callback function */ void *context; /* client cb context */ struct ib_device *device; - struct sockaddr_storage local_addr; + struct sockaddr_storage local_addr; /* local addr */ struct sockaddr_storage remote_addr; + struct sockaddr_storage m_local_addr; /* nmapped local addr */ + struct sockaddr_storage m_remote_addr; /* nmapped rem addr */ void *provider_data; /* provider private data */ iw_event_handler event_handler; /* cb for provider events */ @@ -92,6 +94,7 @@ struct iw_cm_id { void (*add_ref)(struct iw_cm_id *); void (*rem_ref)(struct iw_cm_id *); u8 tos; + bool mapped; }; struct iw_cm_conn_param { @@ -123,6 +126,7 @@ struct iw_cm_verbs { int backlog); int (*destroy_listen)(struct iw_cm_id *cm_id); + char ifname[IFNAMSIZ]; }; /** diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index c19a5dc1531a..f7d7b6fec935 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -5,8 +5,8 @@ enum { RDMA_NL_RDMA_CM = 1, - RDMA_NL_NES, - RDMA_NL_C4IW, + RDMA_NL_IWCM, + RDMA_NL_RSVD, RDMA_NL_LS, /* RDMA Local Services */ RDMA_NL_NUM_CLIENTS }; diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 52b4a2f993f2..1852e383afd6 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -109,14 +109,13 @@ struct p9_trans_rdma { /** * p9_rdma_context - Keeps track of in-process WR * - * @wc_op: The original WR op for when the CQE completes in error. * @busa: Bus address to unmap when the WR completes * @req: Keeps track of requests (send) * @rc: Keepts track of replies (receive) */ struct p9_rdma_req; struct p9_rdma_context { - enum ib_wc_opcode wc_op; + struct ib_cqe cqe; dma_addr_t busa; union { struct p9_req_t *req; @@ -284,9 +283,12 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) } static void -handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, - struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) +recv_done(struct ib_cq *cq, struct ib_wc *wc) { + struct p9_client *client = cq->cq_context; + struct p9_trans_rdma *rdma = client->trans; + struct p9_rdma_context *c = + container_of(wc->wr_cqe, struct p9_rdma_context, cqe); struct p9_req_t *req; int err = 0; int16_t tag; @@ -295,7 +297,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, DMA_FROM_DEVICE); - if (status != IB_WC_SUCCESS) + if (wc->status != IB_WC_SUCCESS) goto err_out; err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); @@ -316,21 +318,32 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, req->rc = c->rc; p9_client_cb(client, req, REQ_STATUS_RCVD); + out: + up(&rdma->rq_sem); + kfree(c); return; err_out: - p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status); + p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", + req, err, wc->status); rdma->state = P9_RDMA_FLUSHING; client->status = Disconnected; + goto out; } static void -handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, - struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) +send_done(struct ib_cq *cq, struct ib_wc *wc) { + struct p9_client *client = cq->cq_context; + struct p9_trans_rdma *rdma = client->trans; + struct p9_rdma_context *c = + container_of(wc->wr_cqe, struct p9_rdma_context, cqe); + ib_dma_unmap_single(rdma->cm_id->device, c->busa, c->req->tc->size, DMA_TO_DEVICE); + up(&rdma->sq_sem); + kfree(c); } static void qp_event_handler(struct ib_event *event, void *context) @@ -339,42 +352,6 @@ static void qp_event_handler(struct ib_event *event, void *context) event->event, context); } -static void cq_comp_handler(struct ib_cq *cq, void *cq_context) -{ - struct p9_client *client = cq_context; - struct p9_trans_rdma *rdma = client->trans; - int ret; - struct ib_wc wc; - - ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); - while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { - struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id; - - switch (c->wc_op) { - case IB_WC_RECV: - handle_recv(client, rdma, c, wc.status, wc.byte_len); - up(&rdma->rq_sem); - break; - - case IB_WC_SEND: - handle_send(client, rdma, c, wc.status, wc.byte_len); - up(&rdma->sq_sem); - break; - - default: - pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n", - c->wc_op, wc.opcode, wc.status); - break; - } - kfree(c); - } -} - -static void cq_event_handler(struct ib_event *e, void *v) -{ - p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); -} - static void rdma_destroy_trans(struct p9_trans_rdma *rdma) { if (!rdma) @@ -387,7 +364,7 @@ static void rdma_destroy_trans(struct p9_trans_rdma *rdma) ib_dealloc_pd(rdma->pd); if (rdma->cq && !IS_ERR(rdma->cq)) - ib_destroy_cq(rdma->cq); + ib_free_cq(rdma->cq); if (rdma->cm_id && !IS_ERR(rdma->cm_id)) rdma_destroy_id(rdma->cm_id); @@ -408,13 +385,14 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) goto error; + c->cqe.done = recv_done; + sge.addr = c->busa; sge.length = client->msize; sge.lkey = rdma->pd->local_dma_lkey; wr.next = NULL; - c->wc_op = IB_WC_RECV; - wr.wr_id = (unsigned long) c; + wr.wr_cqe = &c->cqe; wr.sg_list = &sge; wr.num_sge = 1; return ib_post_recv(rdma->qp, &wr, &bad_wr); @@ -499,13 +477,14 @@ dont_need_post_recv: goto send_error; } + c->cqe.done = send_done; + sge.addr = c->busa; sge.length = c->req->tc->size; sge.lkey = rdma->pd->local_dma_lkey; wr.next = NULL; - c->wc_op = IB_WC_SEND; - wr.wr_id = (unsigned long) c; + wr.wr_cqe = &c->cqe; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; wr.sg_list = &sge; @@ -642,7 +621,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) struct p9_trans_rdma *rdma; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; - struct ib_cq_init_attr cq_attr = {}; /* Parse the transport specific mount options */ err = parse_opts(args, &opts); @@ -695,13 +673,11 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) goto error; /* Create the Completion Queue */ - cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1; - rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, - cq_event_handler, client, - &cq_attr); + rdma->cq = ib_alloc_cq(rdma->cm_id->device, client, + opts.sq_depth + opts.rq_depth + 1, + 0, IB_POLL_SOFTIRQ); if (IS_ERR(rdma->cq)) goto error; - ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); /* Create the Protection Domain */ rdma->pd = ib_alloc_pd(rdma->cm_id->device); |