From 024fbf9c2eae6fa10844a9a3588a3d61c89683e8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 4 Dec 2017 14:13:55 -0500 Subject: SUNRPC: Remove rpc_protocol() Since nfs4_create_referral_server was the only call site of rpc_protocol, rpc_protocol can now be removed. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e2a4184f3c5d..6e432ecd7f99 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1375,22 +1375,6 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize } EXPORT_SYMBOL_GPL(rpc_setbufsize); -/** - * rpc_protocol - Get transport protocol number for an RPC client - * @clnt: RPC client to query - * - */ -int rpc_protocol(struct rpc_clnt *clnt) -{ - int protocol; - - rcu_read_lock(); - protocol = rcu_dereference(clnt->cl_xprt)->prot; - rcu_read_unlock(); - return protocol; -} -EXPORT_SYMBOL_GPL(rpc_protocol); - /** * rpc_net_ns - Get the network namespace for this RPC client * @clnt: RPC client to query -- cgit v1.2.3-58-ga151 From 3d188805f83250409fc251bfb0a699810086885b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Jan 2018 15:47:06 -0500 Subject: SUNRPC: Chunk reading of replies from the server Read the TCP data in chunks of max 2MB so that we do not hog the socket lock. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6d0cc3b8f932..620be573d78b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -52,6 +52,8 @@ #include "sunrpc.h" +#define RPC_TCP_READ_CHUNK_SZ (3*512*1024) + static void xs_close(struct rpc_xprt *xprt); static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, struct socket *sock); @@ -1479,6 +1481,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns .offset = offset, .count = len, }; + size_t ret; dprintk("RPC: xs_tcp_data_recv started\n"); do { @@ -1507,9 +1510,14 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns /* Skip over any trailing bytes on short reads */ xs_tcp_read_discard(transport, &desc); } while (desc.count); + ret = len - desc.count; + if (ret < rd_desc->count) + rd_desc->count -= ret; + else + rd_desc->count = 0; trace_xs_tcp_data_recv(transport); dprintk("RPC: xs_tcp_data_recv done\n"); - return len - desc.count; + return ret; } static void xs_tcp_data_receive(struct sock_xprt *transport) @@ -1517,7 +1525,6 @@ static void xs_tcp_data_receive(struct sock_xprt *transport) struct rpc_xprt *xprt = &transport->xprt; struct sock *sk; read_descriptor_t rd_desc = { - .count = 2*1024*1024, .arg.data = xprt, }; unsigned long total = 0; @@ -1531,16 +1538,16 @@ static void xs_tcp_data_receive(struct sock_xprt *transport) /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ for (loop = 0; loop < 64; loop++) { + rd_desc.count = RPC_TCP_READ_CHUNK_SZ; lock_sock(sk); read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); - if (read <= 0) { + if (rd_desc.count != 0 || read < 0) { clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); release_sock(sk); break; } release_sock(sk); total += read; - rd_desc.count = 65536; } if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) queue_work(xprtiod_workqueue, &transport->recv_worker); -- cgit v1.2.3-58-ga151 From 0af3442af7c4c5b06e72708d7dd937974d1b4114 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Jan 2018 15:28:29 -0500 Subject: SUNRPC: Add explicit rescheduling points in the receive path When reading the reply from the server, insert an explicit cond_resched() to avoid starving higher priority tasks. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 620be573d78b..18803021f242 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1005,6 +1005,7 @@ static void xs_local_data_receive(struct sock_xprt *transport) struct sock *sk; int err; +restart: mutex_lock(&transport->recv_mutex); sk = transport->inet; if (sk == NULL) @@ -1018,6 +1019,11 @@ static void xs_local_data_receive(struct sock_xprt *transport) } if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) break; + if (need_resched()) { + mutex_unlock(&transport->recv_mutex); + cond_resched(); + goto restart; + } } out: mutex_unlock(&transport->recv_mutex); @@ -1096,6 +1102,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) struct sock *sk; int err; +restart: mutex_lock(&transport->recv_mutex); sk = transport->inet; if (sk == NULL) @@ -1109,6 +1116,11 @@ static void xs_udp_data_receive(struct sock_xprt *transport) } if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) break; + if (need_resched()) { + mutex_unlock(&transport->recv_mutex); + cond_resched(); + goto restart; + } } out: mutex_unlock(&transport->recv_mutex); @@ -1528,16 +1540,16 @@ static void xs_tcp_data_receive(struct sock_xprt *transport) .arg.data = xprt, }; unsigned long total = 0; - int loop; int read = 0; +restart: mutex_lock(&transport->recv_mutex); sk = transport->inet; if (sk == NULL) goto out; /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ - for (loop = 0; loop < 64; loop++) { + for (;;) { rd_desc.count = RPC_TCP_READ_CHUNK_SZ; lock_sock(sk); read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); @@ -1548,6 +1560,11 @@ static void xs_tcp_data_receive(struct sock_xprt *transport) } release_sock(sk); total += read; + if (need_resched()) { + mutex_unlock(&transport->recv_mutex); + cond_resched(); + goto restart; + } } if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) queue_work(xprtiod_workqueue, &transport->recv_worker); -- cgit v1.2.3-58-ga151 From 03ac1a76ce5e5a6052a421e1d6a5c97778e88a8c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:56:01 -0500 Subject: xprtrdma: Fix buffer leak after transport set up failure This leak has been around forever, and is exceptionally rare. EINVAL causes mount to fail with "an incorrect mount option was specified" although it's not likely that one of the mount options is incorrect. Instead, return ENODEV in this case, as this appears to be an issue with system or device configuration rather than a specific mount option. Some obsolete comments are also removed. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 6ee1ad8978f3..7f9b62822807 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -414,20 +414,10 @@ xprt_setup_rdma(struct xprt_create *args) if (rc) goto out2; - /* - * Allocate pre-registered send and receive buffers for headers and - * any inline data. Also specify any padding which will be provided - * from a preregistered zero buffer. - */ rc = rpcrdma_buffer_create(new_xprt); if (rc) goto out3; - /* - * Register a callback for connection events. This is necessary because - * connection loss notification is async. We also catch connection loss - * when reaping receives. - */ INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, xprt_rdma_connect_worker); @@ -448,8 +438,9 @@ xprt_setup_rdma(struct xprt_create *args) return xprt; out4: + rpcrdma_buffer_destroy(&new_xprt->rx_buf); xprt_rdma_free_addresses(xprt); - rc = -EINVAL; + rc = -ENODEV; out3: rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); out2: -- cgit v1.2.3-58-ga151 From d698c4a02ee02053bbebe051322ff427a2dad56a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:56:09 -0500 Subject: xprtrdma: Fix backchannel allocation of extra rpcrdma_reps The backchannel code uses rpcrdma_recv_buffer_put to add new reps to the free rep list. This also decrements rb_recv_count, which spoofs the receive overrun logic in rpcrdma_buffer_get_rep. Commit 9b06688bc3b9 ("xprtrdma: Fix additional uses of spin_lock_irqsave(rb_lock)") replaced the original open-coded list_add with a call to rpcrdma_recv_buffer_put(), but then a year later, commit 05c974669ece ("xprtrdma: Fix receive buffer accounting") added rep accounting to rpcrdma_recv_buffer_put. It was an oversight to let the backchannel continue to use this function. The fix this, let's combine the "add to free list" logic with rpcrdma_create_rep. Also, do not allocate RPCRDMA_MAX_BC_REQUESTS rpcrdma_reps in rpcrdma_buffer_create and then allocate additional rpcrdma_reps in rpcrdma_bc_setup_reps. Allocating the extra reps during backchannel set-up is sufficient. Fixes: 05c974669ece ("xprtrdma: Fix receive buffer accounting") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 12 ++---------- net/sunrpc/xprtrdma/verbs.c | 32 +++++++++++++++++++------------- net/sunrpc/xprtrdma/xprt_rdma.h | 2 +- 3 files changed, 22 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 8b818bb3518a..256c67b433c1 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -74,21 +74,13 @@ out_fail: static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, unsigned int count) { - struct rpcrdma_rep *rep; int rc = 0; while (count--) { - rep = rpcrdma_create_rep(r_xprt); - if (IS_ERR(rep)) { - pr_err("RPC: %s: reply buffer alloc failed\n", - __func__); - rc = PTR_ERR(rep); + rc = rpcrdma_create_rep(r_xprt); + if (rc) break; - } - - rpcrdma_recv_buffer_put(rep); } - return rc; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8607c029c0dd..6eecd9714051 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1093,10 +1093,17 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) return req; } -struct rpcrdma_rep * +/** + * rpcrdma_create_rep - Allocate an rpcrdma_rep object + * @r_xprt: controlling transport + * + * Returns 0 on success or a negative errno on failure. + */ +int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_rep *rep; int rc; @@ -1121,12 +1128,18 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; rep->rr_recv_wr.num_sge = 1; - return rep; + + spin_lock(&buf->rb_lock); + list_add(&rep->rr_list, &buf->rb_recv_bufs); + spin_unlock(&buf->rb_lock); + return 0; out_free: kfree(rep); out: - return ERR_PTR(rc); + dprintk("RPC: %s: reply buffer %d alloc failed\n", + __func__, rc); + return rc; } int @@ -1167,17 +1180,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) } INIT_LIST_HEAD(&buf->rb_recv_bufs); - for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) { - struct rpcrdma_rep *rep; - - rep = rpcrdma_create_rep(r_xprt); - if (IS_ERR(rep)) { - dprintk("RPC: %s: reply buffer %d alloc failed\n", - __func__, i); - rc = PTR_ERR(rep); + for (i = 0; i <= buf->rb_max_requests; i++) { + rc = rpcrdma_create_rep(r_xprt); + if (rc) goto out; - } - list_add(&rep->rr_list, &buf->rb_recv_bufs); } rc = rpcrdma_sendctxs_create(r_xprt); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 1342f743f1c4..3b63e61feae2 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -564,8 +564,8 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); * Buffer calls - xprtrdma/verbs.c */ struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); -struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *); void rpcrdma_destroy_req(struct rpcrdma_req *); +int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt); int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); -- cgit v1.2.3-58-ga151 From 42b9f5c58aa8c59c91ead0254f0c193e3438b020 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:56:18 -0500 Subject: xprtrdma: Eliminate unnecessary lock cycle in xprt_rdma_send_request The rpcrdma_req is not shared yet, and its associated Send hasn't been posted, thus RMW should be safe. There's no need for the expense of a lock cycle here. Fixes: 0ba6f37012db ("xprtrdma: Refactor rpcrdma_deferred_completion") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 7f9b62822807..7db063f8dd8a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -735,7 +735,7 @@ xprt_rdma_send_request(struct rpc_task *task) goto drop_connection; req->rl_connect_cookie = xprt->connect_cookie; - set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); + __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) goto drop_connection; -- cgit v1.2.3-58-ga151 From c34416182f041e4107e531c6083c3df9a8af96f7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:56:26 -0500 Subject: xprtrdma: Per-mode handling for Remote Invalidation Refactoring change: Remote Invalidation is particular to the memory registration mode that is use. Use a callout instead of a generic function to handle Remote Invalidation. This gets rid of the 8-byte flags field in struct rpcrdma_mw, of which only a single bit flag has been allocated. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 24 +++++++++++++++++++++--- net/sunrpc/xprtrdma/rpc_rdma.c | 24 ++++-------------------- net/sunrpc/xprtrdma/verbs.c | 1 - net/sunrpc/xprtrdma/xprt_rdma.h | 8 ++------ 4 files changed, 27 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 773e66e10a15..e1f73037b554 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -450,6 +450,26 @@ out_senderr: return ERR_PTR(-ENOTCONN); } +/* Handle a remotely invalidated mw on the @mws list + */ +static void +frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mws) +{ + struct rpcrdma_mw *mw; + + list_for_each_entry(mw, mws, mw_list) + if (mw->mw_handle == rep->rr_inv_rkey) { + struct rpcrdma_xprt *r_xprt = mw->mw_xprt; + + list_del(&mw->mw_list); + mw->frmr.fr_state = FRMR_IS_INVALID; + ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, + mw->mw_sg, mw->mw_nents, mw->mw_dir); + rpcrdma_put_mw(r_xprt, mw); + break; /* only one invalidated MR per RPC */ + } +} + /* Invalidate all memory regions that were registered for "req". * * Sleeps until it is safe for the host CPU to access the @@ -478,9 +498,6 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) list_for_each_entry(mw, mws, mw_list) { mw->frmr.fr_state = FRMR_IS_INVALID; - if (mw->mw_flags & RPCRDMA_MW_F_RI) - continue; - f = &mw->frmr; dprintk("RPC: %s: invalidating frmr %p\n", __func__, f); @@ -553,6 +570,7 @@ reset_mrs: const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { .ro_map = frwr_op_map, + .ro_reminv = frwr_op_reminv, .ro_unmap_sync = frwr_op_unmap_sync, .ro_recover_mr = frwr_op_recover_mr, .ro_open = frwr_op_open, diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index a3f2ab283aeb..d7463bce3df3 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -984,24 +984,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) return fixup_copy_count; } -/* Caller must guarantee @rep remains stable during this call. - */ -static void -rpcrdma_mark_remote_invalidation(struct list_head *mws, - struct rpcrdma_rep *rep) -{ - struct rpcrdma_mw *mw; - - if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)) - return; - - list_for_each_entry(mw, mws, mw_list) - if (mw->mw_handle == rep->rr_inv_rkey) { - mw->mw_flags = RPCRDMA_MW_F_RI; - break; /* only one invalidated MR per RPC */ - } -} - /* By convention, backchannel calls arrive via rdma_msg type * messages, and never populate the chunk lists. This makes * the RPC/RDMA header small and fixed in size, so it is @@ -1339,9 +1321,11 @@ void rpcrdma_deferred_completion(struct work_struct *work) struct rpcrdma_rep *rep = container_of(work, struct rpcrdma_rep, rr_work); struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); + struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; - rpcrdma_mark_remote_invalidation(&req->rl_registered, rep); - rpcrdma_release_rqst(rep->rr_rxprt, req); + if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) + r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered); + rpcrdma_release_rqst(r_xprt, req); rpcrdma_complete_rqst(rep); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 6eecd9714051..1cf1eb42786e 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1307,7 +1307,6 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) if (!mw) goto out_nomws; - mw->mw_flags = 0; return mw; out_nomws: diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 3b63e61feae2..e787dda3b0f5 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -272,7 +272,6 @@ struct rpcrdma_mw { struct scatterlist *mw_sg; int mw_nents; enum dma_data_direction mw_dir; - unsigned long mw_flags; union { struct rpcrdma_fmr fmr; struct rpcrdma_frmr frmr; @@ -284,11 +283,6 @@ struct rpcrdma_mw { struct list_head mw_all; }; -/* mw_flags */ -enum { - RPCRDMA_MW_F_RI = 1, -}; - /* * struct rpcrdma_req -- structure central to the request/reply sequence. * @@ -485,6 +479,8 @@ struct rpcrdma_memreg_ops { (*ro_map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool, struct rpcrdma_mw **); + void (*ro_reminv)(struct rpcrdma_rep *rep, + struct list_head *mws); void (*ro_unmap_sync)(struct rpcrdma_xprt *, struct list_head *); void (*ro_recover_mr)(struct rpcrdma_mw *); -- cgit v1.2.3-58-ga151 From 3f0e3edd6ad209ee18a3ea5c2a6e2046d2a2e783 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:56:34 -0500 Subject: xprtrdma: Remove ri_reminv_expected Clean up. Commit b5f0afbea4f2 ("xprtrdma: Per-connection pad optimization") should have removed this. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 2 -- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 2 files changed, 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 1cf1eb42786e..4a9b6f811859 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -192,7 +192,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, unsigned int rsize, wsize; /* Default settings for RPC-over-RDMA Version One */ - r_xprt->rx_ia.ri_reminv_expected = false; r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; rsize = RPCRDMA_V1_DEF_INLINE_SIZE; wsize = RPCRDMA_V1_DEF_INLINE_SIZE; @@ -200,7 +199,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, if (pmsg && pmsg->cp_magic == rpcrdma_cmp_magic && pmsg->cp_version == RPCRDMA_CMP_VERSION) { - r_xprt->rx_ia.ri_reminv_expected = true; r_xprt->rx_ia.ri_implicit_roundup = true; rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index e787dda3b0f5..80ea3db054d8 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -77,7 +77,6 @@ struct rpcrdma_ia { unsigned int ri_max_inline_write; unsigned int ri_max_inline_read; unsigned int ri_max_send_sges; - bool ri_reminv_expected; bool ri_implicit_roundup; enum ib_mr_type ri_mrtype; unsigned long ri_flags; -- cgit v1.2.3-58-ga151 From 104927042cde1f86e9f3959ba9c8b4dae1616d69 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:56:42 -0500 Subject: xprtrdma: Remove unused padding variables Clean up. Remove fields that should have been removed by commit b3221d6a53c4 ("xprtrdma: Remove logic that constructs RDMA_MSGP type calls"). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 9 +++------ net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 7db063f8dd8a..dc9000d087a9 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -67,7 +67,6 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; -static unsigned int xprt_rdma_inline_write_padding; unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; int xprt_rdma_pad_optimize; @@ -81,6 +80,7 @@ static unsigned int zero; static unsigned int max_padding = PAGE_SIZE; static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; static unsigned int max_memreg = RPCRDMA_LAST - 1; +static unsigned int dummy; static struct ctl_table_header *sunrpc_table_header; @@ -114,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = { }, { .procname = "rdma_inline_write_padding", - .data = &xprt_rdma_inline_write_padding, + .data = &dummy, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -387,8 +387,6 @@ xprt_setup_rdma(struct xprt_create *args) if (cdata.inline_rsize > cdata.rsize) cdata.inline_rsize = cdata.rsize; - cdata.padding = xprt_rdma_inline_write_padding; - /* * Create new transport instance, which includes initialized * o ia @@ -895,8 +893,7 @@ int xprt_rdma_init(void) "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", xprt_rdma_slot_table_entries, xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); - dprintk("\tPadding %d\n\tMemreg %d\n", - xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); + dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (!sunrpc_table_header) diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 80ea3db054d8..375df3d3ff59 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -437,7 +437,6 @@ struct rpcrdma_create_data_internal { unsigned int wsize; /* mount wsize - max write hdr+data */ unsigned int inline_rsize; /* max non-rdma read data payload */ unsigned int inline_wsize; /* max non-rdma write data payload */ - unsigned int padding; /* non-rdma write header padding */ }; /* -- cgit v1.2.3-58-ga151 From d461f1f2fb91b5629019b3b405528bc88c49f863 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:56:50 -0500 Subject: xprtrdma: Initialize the xprt address string array earlier This makes the address strings available for debugging messages in earlier stages of transport set up. The first benefit is to get rid of the single-use rep_remote_addr field, saving 128+ bytes in struct rpcrdma_ep. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 5 ++--- net/sunrpc/xprtrdma/verbs.c | 16 +++++++--------- net/sunrpc/xprtrdma/xprt_rdma.h | 13 ++++++++++++- 3 files changed, 21 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index dc9000d087a9..f6d171e15d7d 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -373,6 +373,7 @@ xprt_setup_rdma(struct xprt_create *args) if (rpc_get_port(sap)) xprt_set_bound(xprt); + xprt_rdma_format_addresses(xprt, sap); cdata.max_requests = xprt->max_reqs; @@ -405,7 +406,6 @@ xprt_setup_rdma(struct xprt_create *args) */ new_xprt->rx_data = cdata; new_ep = &new_xprt->rx_ep; - new_ep->rep_remote_addr = cdata.addr; rc = rpcrdma_ep_create(&new_xprt->rx_ep, &new_xprt->rx_ia, &new_xprt->rx_data); @@ -419,7 +419,6 @@ xprt_setup_rdma(struct xprt_create *args) INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, xprt_rdma_connect_worker); - xprt_rdma_format_addresses(xprt, sap); xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); if (xprt->max_payload == 0) goto out4; @@ -437,13 +436,13 @@ xprt_setup_rdma(struct xprt_create *args) out4: rpcrdma_buffer_destroy(&new_xprt->rx_buf); - xprt_rdma_free_addresses(xprt); rc = -ENODEV; out3: rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: + xprt_rdma_free_addresses(xprt); xprt_free(xprt); return ERR_PTR(rc); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4a9b6f811859..0b4d6a3f6d02 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -219,9 +219,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) struct rpcrdma_xprt *xprt = id->context; struct rpcrdma_ia *ia = &xprt->rx_ia; struct rpcrdma_ep *ep = &xprt->rx_ep; -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) - struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr; -#endif int connstate = 0; switch (event->event) { @@ -244,9 +241,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) break; case RDMA_CM_EVENT_DEVICE_REMOVAL: #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) - pr_info("rpcrdma: removing device %s for %pIS:%u\n", + pr_info("rpcrdma: removing device %s for %s:%s\n", ia->ri_device->name, - sap, rpc_get_port(sap)); + rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt)); #endif set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); ep->rep_connected = -ENODEV; @@ -269,8 +266,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) connstate = -ENETDOWN; goto connected; case RDMA_CM_EVENT_REJECTED: - dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n", - sap, rpc_get_port(sap), + dprintk("rpcrdma: connection to %s:%s rejected: %s\n", + rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt), rdma_reject_msg(id, event->status)); connstate = -ECONNREFUSED; if (event->status == IB_CM_REJ_STALE_CONN) @@ -285,8 +282,9 @@ connected: wake_up_all(&ep->rep_connect_wait); /*FALLTHROUGH*/ default: - dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n", - __func__, sap, rpc_get_port(sap), + dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n", + __func__, + rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt), ia->ri_device->name, ia->ri_ops->ro_displayname, ep, rdma_event_msg(event->event)); break; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 375df3d3ff59..0b28026e1502 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -100,7 +100,6 @@ struct rpcrdma_ep { wait_queue_head_t rep_connect_wait; struct rpcrdma_connect_private rep_cm_private; struct rdma_conn_param rep_remote_cma; - struct sockaddr_storage rep_remote_addr; struct delayed_work rep_connect_worker; }; @@ -519,6 +518,18 @@ struct rpcrdma_xprt { #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) +static inline const char * +rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt) +{ + return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]; +} + +static inline const char * +rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt) +{ + return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT]; +} + /* Setting this to 0 ensures interoperability with early servers. * Setting this to 1 enhances certain unaligned read/write performance. * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ -- cgit v1.2.3-58-ga151 From dd229cee4ed2617ccddc0937608728cd87c934c2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:56:58 -0500 Subject: xprtrdma: Remove another sockaddr_storage field (cdata::addr) Save more space in struct rpcrdma_xprt by removing the redundant "addr" field from struct rpcrdma_create_data_internal. Wherever we have rpcrdma_xprt, we also have the rpc_xprt, which has a sockaddr_storage field with the same content. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 8 ++------ net/sunrpc/xprtrdma/verbs.c | 20 +++++++++----------- net/sunrpc/xprtrdma/xprt_rdma.h | 3 +-- 3 files changed, 12 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index f6d171e15d7d..8ba0aa8c566f 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -361,9 +361,7 @@ xprt_setup_rdma(struct xprt_create *args) /* * Set up RDMA-specific connect data. */ - - sap = (struct sockaddr *)&cdata.addr; - memcpy(sap, args->dstaddr, args->addrlen); + sap = args->dstaddr; /* Ensure xprt->addr holds valid server TCP (not RDMA) * address, for any side protocols which peek at it */ @@ -397,7 +395,7 @@ xprt_setup_rdma(struct xprt_create *args) new_xprt = rpcx_to_rdmax(xprt); - rc = rpcrdma_ia_open(new_xprt, sap); + rc = rpcrdma_ia_open(new_xprt); if (rc) goto out1; @@ -483,8 +481,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) sap = (struct sockaddr_in *)&xprt->addr; sap->sin_port = htons(port); - sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; - sap->sin_port = htons(port); dprintk("RPC: %s: %u\n", __func__, port); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 0b4d6a3f6d02..d6c737d4c36b 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -294,8 +294,7 @@ connected: } static struct rdma_cm_id * -rpcrdma_create_id(struct rpcrdma_xprt *xprt, - struct rpcrdma_ia *ia, struct sockaddr *addr) +rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) { unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; struct rdma_cm_id *id; @@ -314,7 +313,9 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, } ia->ri_async_rc = -ETIMEDOUT; - rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); + rc = rdma_resolve_addr(id, NULL, + (struct sockaddr *)&xprt->rx_xprt.addr, + RDMA_RESOLVE_TIMEOUT); if (rc) { dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", __func__, rc); @@ -361,19 +362,18 @@ out: /** * rpcrdma_ia_open - Open and initialize an Interface Adapter. - * @xprt: controlling transport - * @addr: IP address of remote peer + * @xprt: transport with IA to (re)initialize * * Returns 0 on success, negative errno if an appropriate * Interface Adapter could not be found and opened. */ int -rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr) +rpcrdma_ia_open(struct rpcrdma_xprt *xprt) { struct rpcrdma_ia *ia = &xprt->rx_ia; int rc; - ia->ri_id = rpcrdma_create_id(xprt, ia, addr); + ia->ri_id = rpcrdma_create_id(xprt, ia); if (IS_ERR(ia->ri_id)) { rc = PTR_ERR(ia->ri_id); goto out_err; @@ -649,13 +649,12 @@ static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { - struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr; int rc, err; pr_info("%s: r_xprt = %p\n", __func__, r_xprt); rc = -EHOSTUNREACH; - if (rpcrdma_ia_open(r_xprt, sap)) + if (rpcrdma_ia_open(r_xprt)) goto out1; rc = -ENOMEM; @@ -687,7 +686,6 @@ static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { - struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr; struct rdma_cm_id *id, *old; int err, rc; @@ -696,7 +694,7 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, rpcrdma_ep_disconnect(ep, ia); rc = -EHOSTUNREACH; - id = rpcrdma_create_id(r_xprt, ia, sap); + id = rpcrdma_create_id(r_xprt, ia); if (IS_ERR(id)) goto out; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0b28026e1502..7c09e2ae2542 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -430,7 +430,6 @@ struct rpcrdma_buffer { * This data should be set with mount options */ struct rpcrdma_create_data_internal { - struct sockaddr_storage addr; /* RDMA server address */ unsigned int max_requests; /* max requests (slots) in flight */ unsigned int rsize; /* mount rsize - max read hdr+data */ unsigned int wsize; /* mount wsize - max write hdr+data */ @@ -543,7 +542,7 @@ extern unsigned int xprt_rdma_memreg_strategy; /* * Interface Adapter calls - xprtrdma/verbs.c */ -int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr); +int rpcrdma_ia_open(struct rpcrdma_xprt *xprt); void rpcrdma_ia_remove(struct rpcrdma_ia *ia); void rpcrdma_ia_close(struct rpcrdma_ia *); bool frwr_is_supported(struct rpcrdma_ia *); -- cgit v1.2.3-58-ga151 From 20035edf3c349638a679cbc8e861d81fbc104b53 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:57:06 -0500 Subject: xprtrdma: Support IPv6 in xprt_rdma_set_port Clean up a harmless oversight. xprtrdma's ->set_port method has never properly supported IPv6. This issue has never been a problem because NFS/RDMA mounts have always required "port=20049", thus so far, rpcbind is not invoked for these mounts. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 8ba0aa8c566f..cebcd027d085 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -474,14 +474,34 @@ xprt_rdma_close(struct rpc_xprt *xprt) rpcrdma_ep_disconnect(ep, ia); } +/** + * xprt_rdma_set_port - update server port with rpcbind result + * @xprt: controlling RPC transport + * @port: new port value + * + * Transport connect status is unchanged. + */ static void xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) { - struct sockaddr_in *sap; + struct sockaddr *sap = (struct sockaddr *)&xprt->addr; + char buf[8]; + + dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n", + __func__, xprt, + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + port); + + rpc_set_port(sap, port); - sap = (struct sockaddr_in *)&xprt->addr; - sap->sin_port = htons(port); - dprintk("RPC: %s: %u\n", __func__, port); + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); + snprintf(buf, sizeof(buf), "%u", port); + xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); + + kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); + snprintf(buf, sizeof(buf), "%4hx", port); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); } /** -- cgit v1.2.3-58-ga151 From a2b6470b1c51dee7be4faf4f6b64803a6fcf637f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:57:14 -0500 Subject: xprtrdma: Move unmap-safe logic to rpcrdma_marshal_req Clean up. This logic is related to marshaling the request, and I'd like to keep everything that touches req->rl_registered close together, for CPU cache efficiency. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 11 +++++++++++ net/sunrpc/xprtrdma/transport.c | 5 ----- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index d7463bce3df3..dd7c0aab7535 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -821,6 +821,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) rtype = rpcrdma_areadch; } + /* If this is a retransmit, discard previously registered + * chunks. Very likely the connection has been replaced, + * so these registrations are invalid and unusable. + */ + while (unlikely(!list_empty(&req->rl_registered))) { + struct rpcrdma_mw *mw; + + mw = rpcrdma_pop_mw(&req->rl_registered); + rpcrdma_defer_mr_recovery(mw); + } + /* This implementation supports the following combinations * of chunk lists in one RPC-over-RDMA Call message: * diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index cebcd027d085..d77dee5a2748 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -731,11 +731,6 @@ xprt_rdma_send_request(struct rpc_task *task) if (!xprt_connected(xprt)) goto drop_connection; - /* On retransmit, remove any previously registered chunks */ - if (unlikely(!list_empty(&req->rl_registered))) - r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, - &req->rl_registered); - rc = rpcrdma_marshal_req(r_xprt, rqst); if (rc < 0) goto failed_marshal; -- cgit v1.2.3-58-ga151 From 6c537f2c7cc06da36f6701be4c9413d7b8b47bfb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:57:23 -0500 Subject: xprtrdma: buf_free not called for CB replies Since commit 5a6d1db45569 ("SUNRPC: Add a transport-specific private field in rpc_rqst"), the rpc_rqst's for RPC-over-RDMA backchannel operations leave rq_buffer set to NULL. xprt_release does not invoke ->op->buf_free when rq_buffer is NULL. The RPCRDMA_REQ_F_BACKCHANNEL check in xprt_rdma_free is therefore redundant because xprt_rdma_free is not invoked for backchannel requests. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 1 - net/sunrpc/xprtrdma/transport.c | 3 --- net/sunrpc/xprtrdma/xprt_rdma.h | 3 +-- 3 files changed, 1 insertion(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 256c67b433c1..11fb38f4d70d 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, req = rpcrdma_create_req(r_xprt); if (IS_ERR(req)) return PTR_ERR(req); - __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags); rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, GFP_KERNEL); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index d77dee5a2748..d0cd6d411b64 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -686,9 +686,6 @@ xprt_rdma_free(struct rpc_task *task) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags)) - return; - dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 7c09e2ae2542..ed7e51304bc2 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -354,8 +354,7 @@ struct rpcrdma_req { /* rl_flags */ enum { - RPCRDMA_REQ_F_BACKCHANNEL = 0, - RPCRDMA_REQ_F_PENDING, + RPCRDMA_REQ_F_PENDING = 0, RPCRDMA_REQ_F_TX_RESOURCES, }; -- cgit v1.2.3-58-ga151 From cf73daf52750fca4b4af0ca812f542891c228066 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:57:31 -0500 Subject: xprtrdma: Split xprt_rdma_send_request Clean up. @rqst is set up differently for backchannel Replies. For example, rqst->rq_task and task->tk_client are both NULL. So it is easier to understand and maintain this code path if it is separated. Also, we can get rid of the confusing rl_connect_cookie hack in rpcrdma_bc_receive_call. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 48 +++++++++++++++++++++++++++++++-------- net/sunrpc/xprtrdma/rpc_rdma.c | 5 ---- net/sunrpc/xprtrdma/transport.c | 27 +++++++++------------- net/sunrpc/xprtrdma/xprt_rdma.h | 2 +- 4 files changed, 50 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 11fb38f4d70d..6c66a4f85507 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -187,13 +187,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) return maxmsg - RPCRDMA_HDRLEN_MIN; } -/** - * rpcrdma_bc_marshal_reply - Send backwards direction reply - * @rqst: buffer containing RPC reply data - * - * Returns zero on success. - */ -int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) +static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); @@ -220,6 +214,43 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) return 0; } +/** + * xprt_rdma_bc_send_reply - marshal and send a backchannel reply + * @rqst: RPC rqst with a backchannel RPC reply in rq_snd_buf + * + * Caller holds the transport's write lock. + * + * Returns: + * %0 if the RPC message has been sent + * %-ENOTCONN if the caller should reconnect and call again + * %-EIO if a permanent error occurred and the request was not + * sent. Do not try to send this message again. + */ +int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + int rc; + + if (!xprt_connected(rqst->rq_xprt)) + goto drop_connection; + + rc = rpcrdma_bc_marshal_reply(rqst); + if (rc < 0) + goto failed_marshal; + + if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) + goto drop_connection; + return 0; + +failed_marshal: + if (rc != -ENOTCONN) + return rc; +drop_connection: + xprt_disconnect_done(rqst->rq_xprt); + return -ENOTCONN; +} + /** * xprt_rdma_bc_destroy - Release resources for handling backchannel requests * @xprt: transport associated with these backchannel resources @@ -330,9 +361,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, __func__, rep, req); req->rl_reply = rep; - /* Defeat the retransmit detection logic in send_request */ - req->rl_connect_cookie = 0; - /* Queue rqst for ULP's callback service */ bc_serv = xprt->bc_serv; spin_lock(&bc_serv->sv_cb_lock); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index dd7c0aab7535..9207aeacd2c3 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -754,11 +754,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) __be32 *p; int ret; -#if defined(CONFIG_SUNRPC_BACKCHANNEL) - if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)) - return rpcrdma_bc_marshal_reply(rqst); -#endif - rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); xdr_init_encode(xdr, &req->rl_hdrbuf, req->rl_rdmabuf->rg_base); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index d0cd6d411b64..be8c4e62d3f2 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -699,22 +699,12 @@ xprt_rdma_free(struct rpc_task *task) * * Caller holds the transport's write lock. * - * Return values: - * 0: The request has been sent - * ENOTCONN: Caller needs to invoke connect logic then call again - * ENOBUFS: Call again later to send the request - * EIO: A permanent error occurred. The request was not sent, - * and don't try it again - * - * send_request invokes the meat of RPC RDMA. It must do the following: - * - * 1. Marshal the RPC request into an RPC RDMA request, which means - * putting a header in front of data, and creating IOVs for RDMA - * from those in the request. - * 2. In marshaling, detect opportunities for RDMA, and use them. - * 3. Post a recv message to set up asynch completion, then send - * the request (rpcrdma_ep_post). - * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). + * Returns: + * %0 if the RPC message has been sent + * %-ENOTCONN if the caller should reconnect and call again + * %-ENOBUFS if the caller should call again later + * %-EIO if a permanent error occurred and the request was not + * sent. Do not try to send this message again. */ static int xprt_rdma_send_request(struct rpc_task *task) @@ -725,6 +715,11 @@ xprt_rdma_send_request(struct rpc_task *task) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); int rc = 0; +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + if (unlikely(!rqst->rq_buffer)) + return xprt_rdma_bc_send_reply(rqst); +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ + if (!xprt_connected(xprt)) goto drop_connection; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index ed7e51304bc2..e084130d3d84 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -666,7 +666,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *); size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); -int rpcrdma_bc_marshal_reply(struct rpc_rqst *); +int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst); void xprt_rdma_bc_free_rqst(struct rpc_rqst *); void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -- cgit v1.2.3-58-ga151 From 30b5416bf0fd3bfef55543343ad1e85d32e32de4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:57:39 -0500 Subject: xprtrdma: Don't clear RPC_BC_PA_IN_USE on pre-allocated rpc_rqst's No need for the overhead of atomically setting and clearing this bit flag for every use of a pre-allocated backchannel rpc_rqst. These are a distinct pool of rpc_rqsts that are used only for callback operations, so it is safe to simply leave the bit set. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 6c66a4f85507..3c7998a72191 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -120,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) rqst->rq_xprt = &r_xprt->rx_xprt; INIT_LIST_HEAD(&rqst->rq_list); INIT_LIST_HEAD(&rqst->rq_bc_list); + __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) goto out_free; @@ -284,11 +285,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) dprintk("RPC: %s: freeing rqst %p (req %p)\n", __func__, rqst, rpcr_to_rdmar(rqst)); - smp_mb__before_atomic(); - WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); - clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); - smp_mb__after_atomic(); - spin_lock_bh(&xprt->bc_pa_lock); list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); spin_unlock_bh(&xprt->bc_pa_lock); @@ -343,7 +339,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, rqst->rq_xid = *p; rqst->rq_private_buf.len = size; - set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); buf = &rqst->rq_rcv_buf; memset(buf, 0, sizeof(*buf)); -- cgit v1.2.3-58-ga151 From ce5b3717828356ce2c61e5a2a830df970fc90fb9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:57:47 -0500 Subject: xprtrdma: Replace all usage of "frmr" with "frwr" Clean up: Over time, the industry has adopted the term "frwr" instead of "frmr". The term "frwr" is now more widely recognized. For the past couple of years I've attempted to add new code using "frwr" , but there still remains plenty of older code that still uses "frmr". Replace all usage of "frmr" to avoid confusion. While we're churning code, rename variables unhelpfully called "f" to "frwr", to improve code clarity. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprtrdma.h | 2 +- net/sunrpc/xprtrdma/frwr_ops.c | 176 ++++++++++++++++++++-------------------- net/sunrpc/xprtrdma/transport.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 2 +- net/sunrpc/xprtrdma/xprt_rdma.h | 18 ++-- 5 files changed, 100 insertions(+), 100 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 221b7a2e5406..5859563e3c1f 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -64,7 +64,7 @@ enum rpcrdma_memreg { RPCRDMA_MEMWINDOWS, RPCRDMA_MEMWINDOWS_ASYNC, RPCRDMA_MTHCAFMR, - RPCRDMA_FRMR, + RPCRDMA_FRWR, RPCRDMA_ALLPHYSICAL, RPCRDMA_LAST }; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index e1f73037b554..185eb69e5fb5 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2015 Oracle. All rights reserved. + * Copyright (c) 2015, 2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. */ /* Lightweight memory registration using Fast Registration Work - * Requests (FRWR). Also referred to sometimes as FRMR mode. + * Requests (FRWR). * * FRWR features ordered asynchronous registration and deregistration * of arbitrarily sized memory regions. This is the fastest and safest @@ -15,9 +15,9 @@ /* Normal operation * * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG - * Work Request (frmr_op_map). When the RDMA operation is finished, this + * Work Request (frwr_op_map). When the RDMA operation is finished, this * Memory Region is invalidated using a LOCAL_INV Work Request - * (frmr_op_unmap). + * (frwr_op_unmap). * * Typically these Work Requests are not signaled, and neither are RDMA * SEND Work Requests (with the exception of signaling occasionally to @@ -98,12 +98,12 @@ out_not_supported: static int frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) { - unsigned int depth = ia->ri_max_frmr_depth; - struct rpcrdma_frmr *f = &r->frmr; + unsigned int depth = ia->ri_max_frwr_depth; + struct rpcrdma_frwr *frwr = &r->frwr; int rc; - f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); - if (IS_ERR(f->fr_mr)) + frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); + if (IS_ERR(frwr->fr_mr)) goto out_mr_err; r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL); @@ -111,11 +111,11 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) goto out_list_err; sg_init_table(r->mw_sg, depth); - init_completion(&f->fr_linv_done); + init_completion(&frwr->fr_linv_done); return 0; out_mr_err: - rc = PTR_ERR(f->fr_mr); + rc = PTR_ERR(frwr->fr_mr); dprintk("RPC: %s: ib_alloc_mr status %i\n", __func__, rc); return rc; @@ -124,7 +124,7 @@ out_list_err: rc = -ENOMEM; dprintk("RPC: %s: sg allocation failure\n", __func__); - ib_dereg_mr(f->fr_mr); + ib_dereg_mr(frwr->fr_mr); return rc; } @@ -137,7 +137,7 @@ frwr_op_release_mr(struct rpcrdma_mw *r) if (!list_empty(&r->mw_list)) list_del(&r->mw_list); - rc = ib_dereg_mr(r->frmr.fr_mr); + rc = ib_dereg_mr(r->frwr.fr_mr); if (rc) pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", r, rc); @@ -148,41 +148,41 @@ frwr_op_release_mr(struct rpcrdma_mw *r) static int __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) { - struct rpcrdma_frmr *f = &r->frmr; + struct rpcrdma_frwr *frwr = &r->frwr; int rc; - rc = ib_dereg_mr(f->fr_mr); + rc = ib_dereg_mr(frwr->fr_mr); if (rc) { pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", rc, r); return rc; } - f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, - ia->ri_max_frmr_depth); - if (IS_ERR(f->fr_mr)) { + frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, + ia->ri_max_frwr_depth); + if (IS_ERR(frwr->fr_mr)) { pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", - PTR_ERR(f->fr_mr), r); - return PTR_ERR(f->fr_mr); + PTR_ERR(frwr->fr_mr), r); + return PTR_ERR(frwr->fr_mr); } - dprintk("RPC: %s: recovered FRMR %p\n", __func__, f); - f->fr_state = FRMR_IS_INVALID; + dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr); + frwr->fr_state = FRWR_IS_INVALID; return 0; } -/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. +/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR. */ static void frwr_op_recover_mr(struct rpcrdma_mw *mw) { - enum rpcrdma_frmr_state state = mw->frmr.fr_state; + enum rpcrdma_frwr_state state = mw->frwr.fr_state; struct rpcrdma_xprt *r_xprt = mw->mw_xprt; struct rpcrdma_ia *ia = &r_xprt->rx_ia; int rc; rc = __frwr_reset_mr(ia, mw); - if (state != FRMR_FLUSHED_LI) + if (state != FRWR_FLUSHED_LI) ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); if (rc) @@ -193,7 +193,7 @@ frwr_op_recover_mr(struct rpcrdma_mw *mw) return; out_release: - pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw); + pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mw); r_xprt->rx_stats.mrs_orphaned++; spin_lock(&r_xprt->rx_buf.rb_mwlock); @@ -214,31 +214,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; - ia->ri_max_frmr_depth = + ia->ri_max_frwr_depth = min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, attrs->max_fast_reg_page_list_len); dprintk("RPC: %s: device's max FR page list len = %u\n", - __func__, ia->ri_max_frmr_depth); - - /* Add room for frmr register and invalidate WRs. - * 1. FRMR reg WR for head - * 2. FRMR invalidate WR for head - * 3. N FRMR reg WRs for pagelist - * 4. N FRMR invalidate WRs for pagelist - * 5. FRMR reg WR for tail - * 6. FRMR invalidate WR for tail + __func__, ia->ri_max_frwr_depth); + + /* Add room for frwr register and invalidate WRs. + * 1. FRWR reg WR for head + * 2. FRWR invalidate WR for head + * 3. N FRWR reg WRs for pagelist + * 4. N FRWR invalidate WRs for pagelist + * 5. FRWR reg WR for tail + * 6. FRWR invalidate WR for tail * 7. The RDMA_SEND WR */ depth = 7; - /* Calculate N if the device max FRMR depth is smaller than + /* Calculate N if the device max FRWR depth is smaller than * RPCRDMA_MAX_DATA_SEGS. */ - if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { - delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; + if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) { + delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth; do { - depth += 2; /* FRMR reg + invalidate */ - delta -= ia->ri_max_frmr_depth; + depth += 2; /* FRWR reg + invalidate */ + delta -= ia->ri_max_frwr_depth; } while (delta > 0); } @@ -252,7 +252,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, } ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / - ia->ri_max_frmr_depth); + ia->ri_max_frwr_depth); return 0; } @@ -265,7 +265,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ia *ia = &r_xprt->rx_ia; return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, - RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth); + RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth); } static void @@ -286,14 +286,14 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr) static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) { - struct rpcrdma_frmr *frmr; + struct rpcrdma_frwr *frwr; struct ib_cqe *cqe; /* WARNING: Only wr_cqe and status are reliable at this point */ if (wc->status != IB_WC_SUCCESS) { cqe = wc->wr_cqe; - frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); - frmr->fr_state = FRMR_FLUSHED_FR; + frwr = container_of(cqe, struct rpcrdma_frwr, fr_cqe); + frwr->fr_state = FRWR_FLUSHED_FR; __frwr_sendcompletion_flush(wc, "fastreg"); } } @@ -307,14 +307,14 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) { - struct rpcrdma_frmr *frmr; + struct rpcrdma_frwr *frwr; struct ib_cqe *cqe; /* WARNING: Only wr_cqe and status are reliable at this point */ if (wc->status != IB_WC_SUCCESS) { cqe = wc->wr_cqe; - frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); - frmr->fr_state = FRMR_FLUSHED_LI; + frwr = container_of(cqe, struct rpcrdma_frwr, fr_cqe); + frwr->fr_state = FRWR_FLUSHED_LI; __frwr_sendcompletion_flush(wc, "localinv"); } } @@ -329,17 +329,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) { - struct rpcrdma_frmr *frmr; + struct rpcrdma_frwr *frwr; struct ib_cqe *cqe; /* WARNING: Only wr_cqe and status are reliable at this point */ cqe = wc->wr_cqe; - frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); + frwr = container_of(cqe, struct rpcrdma_frwr, fr_cqe); if (wc->status != IB_WC_SUCCESS) { - frmr->fr_state = FRMR_FLUSHED_LI; + frwr->fr_state = FRWR_FLUSHED_LI; __frwr_sendcompletion_flush(wc, "localinv"); } - complete(&frmr->fr_linv_done); + complete(&frwr->fr_linv_done); } /* Post a REG_MR Work Request to register a memory region @@ -351,8 +351,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, { struct rpcrdma_ia *ia = &r_xprt->rx_ia; bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; + struct rpcrdma_frwr *frwr; struct rpcrdma_mw *mw; - struct rpcrdma_frmr *frmr; struct ib_mr *mr; struct ib_reg_wr *reg_wr; struct ib_send_wr *bad_wr; @@ -366,14 +366,13 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, mw = rpcrdma_get_mw(r_xprt); if (!mw) return ERR_PTR(-ENOBUFS); - } while (mw->frmr.fr_state != FRMR_IS_INVALID); - frmr = &mw->frmr; - frmr->fr_state = FRMR_IS_VALID; - mr = frmr->fr_mr; - reg_wr = &frmr->fr_regwr; - - if (nsegs > ia->ri_max_frmr_depth) - nsegs = ia->ri_max_frmr_depth; + } while (mw->frwr.fr_state != FRWR_IS_INVALID); + frwr = &mw->frwr; + frwr->fr_state = FRWR_IS_VALID; + mr = frwr->fr_mr; + + if (nsegs > ia->ri_max_frwr_depth) + nsegs = ia->ri_max_frwr_depth; for (i = 0; i < nsegs;) { if (seg->mr_page) sg_set_page(&mw->mw_sg[i], @@ -402,16 +401,17 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, if (unlikely(n != mw->mw_nents)) goto out_mapmr_err; - dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n", - __func__, frmr, mw->mw_nents, mr->length); + dprintk("RPC: %s: Using frwr %p to map %u segments (%llu bytes)\n", + __func__, frwr, mw->mw_nents, mr->length); key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); + reg_wr = &frwr->fr_regwr; reg_wr->wr.next = NULL; reg_wr->wr.opcode = IB_WR_REG_MR; - frmr->fr_cqe.done = frwr_wc_fastreg; - reg_wr->wr.wr_cqe = &frmr->fr_cqe; + frwr->fr_cqe.done = frwr_wc_fastreg; + reg_wr->wr.wr_cqe = &frwr->fr_cqe; reg_wr->wr.num_sge = 0; reg_wr->wr.send_flags = 0; reg_wr->mr = mr; @@ -434,18 +434,18 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, out_dmamap_err: pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", mw->mw_sg, i); - frmr->fr_state = FRMR_IS_INVALID; + frwr->fr_state = FRWR_IS_INVALID; rpcrdma_put_mw(r_xprt, mw); return ERR_PTR(-EIO); out_mapmr_err: pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", - frmr->fr_mr, n, mw->mw_nents); + frwr->fr_mr, n, mw->mw_nents); rpcrdma_defer_mr_recovery(mw); return ERR_PTR(-EIO); out_senderr: - pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc); + pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc); rpcrdma_defer_mr_recovery(mw); return ERR_PTR(-ENOTCONN); } @@ -462,7 +462,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mws) struct rpcrdma_xprt *r_xprt = mw->mw_xprt; list_del(&mw->mw_list); - mw->frmr.fr_state = FRMR_IS_INVALID; + mw->frwr.fr_state = FRWR_IS_INVALID; ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); rpcrdma_put_mw(r_xprt, mw); @@ -483,7 +483,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) { struct ib_send_wr *first, **prev, *last, *bad_wr; struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rpcrdma_frmr *f; + struct rpcrdma_frwr *frwr; struct rpcrdma_mw *mw; int count, rc; @@ -492,20 +492,20 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) * Chain the LOCAL_INV Work Requests and post them with * a single ib_post_send() call. */ - f = NULL; + frwr = NULL; count = 0; prev = &first; list_for_each_entry(mw, mws, mw_list) { - mw->frmr.fr_state = FRMR_IS_INVALID; + mw->frwr.fr_state = FRWR_IS_INVALID; - f = &mw->frmr; - dprintk("RPC: %s: invalidating frmr %p\n", - __func__, f); + frwr = &mw->frwr; + dprintk("RPC: %s: invalidating frwr %p\n", + __func__, frwr); - f->fr_cqe.done = frwr_wc_localinv; - last = &f->fr_invwr; + frwr->fr_cqe.done = frwr_wc_localinv; + last = &frwr->fr_invwr; memset(last, 0, sizeof(*last)); - last->wr_cqe = &f->fr_cqe; + last->wr_cqe = &frwr->fr_cqe; last->opcode = IB_WR_LOCAL_INV; last->ex.invalidate_rkey = mw->mw_handle; count++; @@ -513,7 +513,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) *prev = last; prev = &last->next; } - if (!f) + if (!frwr) goto unmap; /* Strong send queue ordering guarantees that when the @@ -521,8 +521,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) * are complete. */ last->send_flags = IB_SEND_SIGNALED; - f->fr_cqe.done = frwr_wc_localinv_wake; - reinit_completion(&f->fr_linv_done); + frwr->fr_cqe.done = frwr_wc_localinv_wake; + reinit_completion(&frwr->fr_linv_done); /* Transport disconnect drains the receive CQ before it * replaces the QP. The RPC reply handler won't call us @@ -532,7 +532,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) bad_wr = NULL; rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); if (bad_wr != first) - wait_for_completion(&f->fr_linv_done); + wait_for_completion(&frwr->fr_linv_done); if (rc) goto reset_mrs; @@ -542,8 +542,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) unmap: while (!list_empty(mws)) { mw = rpcrdma_pop_mw(mws); - dprintk("RPC: %s: DMA unmapping frmr %p\n", - __func__, &mw->frmr); + dprintk("RPC: %s: DMA unmapping frwr %p\n", + __func__, &mw->frwr); ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); rpcrdma_put_mw(r_xprt, mw); @@ -551,15 +551,15 @@ unmap: return; reset_mrs: - pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc); + pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc); /* Find and reset the MRs in the LOCAL_INV WRs that did not * get posted. */ while (bad_wr) { - f = container_of(bad_wr, struct rpcrdma_frmr, - fr_invwr); - mw = container_of(f, struct rpcrdma_mw, frmr); + frwr = container_of(bad_wr, struct rpcrdma_frwr, + fr_invwr); + mw = container_of(frwr, struct rpcrdma_mw, frwr); __frwr_reset_mr(ia, mw); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index be8c4e62d3f2..ddf0d87812ef 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -67,7 +67,7 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; -unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; +unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; int xprt_rdma_pad_optimize; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index d6c737d4c36b..840579919ad0 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -388,7 +388,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt) } switch (xprt_rdma_memreg_strategy) { - case RPCRDMA_FRMR: + case RPCRDMA_FRWR: if (frwr_is_supported(ia)) { ia->ri_ops = &rpcrdma_frwr_memreg_ops; break; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index e084130d3d84..f52269afaa09 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -73,7 +73,7 @@ struct rpcrdma_ia { struct completion ri_remove_done; int ri_async_rc; unsigned int ri_max_segs; - unsigned int ri_max_frmr_depth; + unsigned int ri_max_frwr_depth; unsigned int ri_max_inline_write; unsigned int ri_max_inline_read; unsigned int ri_max_send_sges; @@ -242,17 +242,17 @@ enum { * rpcrdma_deregister_external() uses this metadata to unmap and * release these resources when an RPC is complete. */ -enum rpcrdma_frmr_state { - FRMR_IS_INVALID, /* ready to be used */ - FRMR_IS_VALID, /* in use */ - FRMR_FLUSHED_FR, /* flushed FASTREG WR */ - FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ +enum rpcrdma_frwr_state { + FRWR_IS_INVALID, /* ready to be used */ + FRWR_IS_VALID, /* in use */ + FRWR_FLUSHED_FR, /* flushed FASTREG WR */ + FRWR_FLUSHED_LI, /* flushed LOCALINV WR */ }; -struct rpcrdma_frmr { +struct rpcrdma_frwr { struct ib_mr *fr_mr; struct ib_cqe fr_cqe; - enum rpcrdma_frmr_state fr_state; + enum rpcrdma_frwr_state fr_state; struct completion fr_linv_done; union { struct ib_reg_wr fr_regwr; @@ -272,7 +272,7 @@ struct rpcrdma_mw { enum dma_data_direction mw_dir; union { struct rpcrdma_fmr fmr; - struct rpcrdma_frmr frmr; + struct rpcrdma_frwr frwr; }; struct rpcrdma_xprt *mw_xprt; u32 mw_handle; -- cgit v1.2.3-58-ga151 From 96ceddea3710f61bb5a5f2af25e684b7e1466171 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:57:55 -0500 Subject: xprtrdma: Remove usage of "mw" Clean up: struct rpcrdma_mw was named after Memory Windows, but xprtrdma no longer supports a Memory Window registration mode. Rename rpcrdma_mw and its fields to reduce confusion and make the code more sensible to read. Renaming "mw" was suggested by Tom Talpey, the author of the original xprtrdma implementation. It's a good idea, but I haven't done this until now because it's a huge diffstat for no benefit other than code readability. However, I'm about to introduce static trace points that expose a few of xprtrdma's internal data structures. They should make sense in the trace report, and it's reasonable to treat trace points as a kernel API contract which might be difficult to change later. While I'm churning things up, two additional changes: - rename variables unhelpfully called "r" to "mr", to improve code clarity, and - rename the MR-related helper functions using the form "rpcrdma_mr_", to be consistent with other areas of the code. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/fmr_ops.c | 148 ++++++++++++++++----------------- net/sunrpc/xprtrdma/frwr_ops.c | 177 ++++++++++++++++++++-------------------- net/sunrpc/xprtrdma/rpc_rdma.c | 64 +++++++-------- net/sunrpc/xprtrdma/verbs.c | 119 +++++++++++++++------------ net/sunrpc/xprtrdma/xprt_rdma.h | 62 +++++++------- 5 files changed, 292 insertions(+), 278 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 29fc84c7ff98..8bd0399b3a1c 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2015 Oracle. All rights reserved. + * Copyright (c) 2015, 2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. */ @@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia) } static int -fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) +fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) { static struct ib_fmr_attr fmr_attr = { .max_pages = RPCRDMA_MAX_FMR_SGES, @@ -55,106 +55,106 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) .page_shift = PAGE_SHIFT }; - mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, + mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, sizeof(u64), GFP_KERNEL); - if (!mw->fmr.fm_physaddrs) + if (!mr->fmr.fm_physaddrs) goto out_free; - mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, - sizeof(*mw->mw_sg), GFP_KERNEL); - if (!mw->mw_sg) + mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, + sizeof(*mr->mr_sg), GFP_KERNEL); + if (!mr->mr_sg) goto out_free; - sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES); + sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES); - mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, + mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, &fmr_attr); - if (IS_ERR(mw->fmr.fm_mr)) + if (IS_ERR(mr->fmr.fm_mr)) goto out_fmr_err; return 0; out_fmr_err: dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, - PTR_ERR(mw->fmr.fm_mr)); + PTR_ERR(mr->fmr.fm_mr)); out_free: - kfree(mw->mw_sg); - kfree(mw->fmr.fm_physaddrs); + kfree(mr->mr_sg); + kfree(mr->fmr.fm_physaddrs); return -ENOMEM; } static int -__fmr_unmap(struct rpcrdma_mw *mw) +__fmr_unmap(struct rpcrdma_mr *mr) { LIST_HEAD(l); int rc; - list_add(&mw->fmr.fm_mr->list, &l); + list_add(&mr->fmr.fm_mr->list, &l); rc = ib_unmap_fmr(&l); - list_del(&mw->fmr.fm_mr->list); + list_del(&mr->fmr.fm_mr->list); return rc; } static void -fmr_op_release_mr(struct rpcrdma_mw *r) +fmr_op_release_mr(struct rpcrdma_mr *mr) { LIST_HEAD(unmap_list); int rc; /* Ensure MW is not on any rl_registered list */ - if (!list_empty(&r->mw_list)) - list_del(&r->mw_list); + if (!list_empty(&mr->mr_list)) + list_del(&mr->mr_list); - kfree(r->fmr.fm_physaddrs); - kfree(r->mw_sg); + kfree(mr->fmr.fm_physaddrs); + kfree(mr->mr_sg); /* In case this one was left mapped, try to unmap it * to prevent dealloc_fmr from failing with EBUSY */ - rc = __fmr_unmap(r); + rc = __fmr_unmap(mr); if (rc) pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", - r, rc); + mr, rc); - rc = ib_dealloc_fmr(r->fmr.fm_mr); + rc = ib_dealloc_fmr(mr->fmr.fm_mr); if (rc) pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", - r, rc); + mr, rc); - kfree(r); + kfree(mr); } /* Reset of a single FMR. */ static void -fmr_op_recover_mr(struct rpcrdma_mw *mw) +fmr_op_recover_mr(struct rpcrdma_mr *mr) { - struct rpcrdma_xprt *r_xprt = mw->mw_xprt; + struct rpcrdma_xprt *r_xprt = mr->mr_xprt; int rc; /* ORDER: invalidate first */ - rc = __fmr_unmap(mw); + rc = __fmr_unmap(mr); /* ORDER: then DMA unmap */ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, - mw->mw_sg, mw->mw_nents, mw->mw_dir); + mr->mr_sg, mr->mr_nents, mr->mr_dir); if (rc) goto out_release; - rpcrdma_put_mw(r_xprt, mw); + rpcrdma_mr_put(mr); r_xprt->rx_stats.mrs_recovered++; return; out_release: - pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw); + pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr); r_xprt->rx_stats.mrs_orphaned++; - spin_lock(&r_xprt->rx_buf.rb_mwlock); - list_del(&mw->mw_all); - spin_unlock(&r_xprt->rx_buf.rb_mwlock); + spin_lock(&r_xprt->rx_buf.rb_mrlock); + list_del(&mr->mr_all); + spin_unlock(&r_xprt->rx_buf.rb_mrlock); - fmr_op_release_mr(mw); + fmr_op_release_mr(mr); } static int @@ -180,15 +180,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) */ static struct rpcrdma_mr_seg * fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, - int nsegs, bool writing, struct rpcrdma_mw **out) + int nsegs, bool writing, struct rpcrdma_mr **out) { struct rpcrdma_mr_seg *seg1 = seg; int len, pageoff, i, rc; - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; u64 *dma_pages; - mw = rpcrdma_get_mw(r_xprt); - if (!mw) + mr = rpcrdma_mr_get(r_xprt); + if (!mr) return ERR_PTR(-ENOBUFS); pageoff = offset_in_page(seg1->mr_offset); @@ -199,12 +199,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, nsegs = RPCRDMA_MAX_FMR_SGES; for (i = 0; i < nsegs;) { if (seg->mr_page) - sg_set_page(&mw->mw_sg[i], + sg_set_page(&mr->mr_sg[i], seg->mr_page, seg->mr_len, offset_in_page(seg->mr_offset)); else - sg_set_buf(&mw->mw_sg[i], seg->mr_offset, + sg_set_buf(&mr->mr_sg[i], seg->mr_offset, seg->mr_len); len += seg->mr_len; ++seg; @@ -214,40 +214,40 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; } - mw->mw_dir = rpcrdma_data_dir(writing); + mr->mr_dir = rpcrdma_data_dir(writing); - mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, - mw->mw_sg, i, mw->mw_dir); - if (!mw->mw_nents) + mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, + mr->mr_sg, i, mr->mr_dir); + if (!mr->mr_nents) goto out_dmamap_err; - for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++) - dma_pages[i] = sg_dma_address(&mw->mw_sg[i]); - rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents, + for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++) + dma_pages[i] = sg_dma_address(&mr->mr_sg[i]); + rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents, dma_pages[0]); if (rc) goto out_maperr; - mw->mw_handle = mw->fmr.fm_mr->rkey; - mw->mw_length = len; - mw->mw_offset = dma_pages[0] + pageoff; + mr->mr_handle = mr->fmr.fm_mr->rkey; + mr->mr_length = len; + mr->mr_offset = dma_pages[0] + pageoff; - *out = mw; + *out = mr; return seg; out_dmamap_err: pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", - mw->mw_sg, i); - rpcrdma_put_mw(r_xprt, mw); + mr->mr_sg, i); + rpcrdma_mr_put(mr); return ERR_PTR(-EIO); out_maperr: pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", len, (unsigned long long)dma_pages[0], - pageoff, mw->mw_nents, rc); + pageoff, mr->mr_nents, rc); ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, - mw->mw_sg, mw->mw_nents, mw->mw_dir); - rpcrdma_put_mw(r_xprt, mw); + mr->mr_sg, mr->mr_nents, mr->mr_dir); + rpcrdma_mr_put(mr); return ERR_PTR(-EIO); } @@ -256,13 +256,13 @@ out_maperr: * Sleeps until it is safe for the host CPU to access the * previously mapped memory regions. * - * Caller ensures that @mws is not empty before the call. This + * Caller ensures that @mrs is not empty before the call. This * function empties the list. */ static void -fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) +fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) { - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; LIST_HEAD(unmap_list); int rc; @@ -271,10 +271,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) * ib_unmap_fmr() is slow, so use a single call instead * of one call per mapped FMR. */ - list_for_each_entry(mw, mws, mw_list) { + list_for_each_entry(mr, mrs, mr_list) { dprintk("RPC: %s: unmapping fmr %p\n", - __func__, &mw->fmr); - list_add_tail(&mw->fmr.fm_mr->list, &unmap_list); + __func__, &mr->fmr); + list_add_tail(&mr->fmr.fm_mr->list, &unmap_list); } r_xprt->rx_stats.local_inv_needed++; rc = ib_unmap_fmr(&unmap_list); @@ -284,14 +284,14 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) /* ORDER: Now DMA unmap all of the req's MRs, and return * them to the free MW list. */ - while (!list_empty(mws)) { - mw = rpcrdma_pop_mw(mws); + while (!list_empty(mrs)) { + mr = rpcrdma_mr_pop(mrs); dprintk("RPC: %s: DMA unmapping fmr %p\n", - __func__, &mw->fmr); - list_del(&mw->fmr.fm_mr->list); + __func__, &mr->fmr); + list_del(&mr->fmr.fm_mr->list); ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, - mw->mw_sg, mw->mw_nents, mw->mw_dir); - rpcrdma_put_mw(r_xprt, mw); + mr->mr_sg, mr->mr_nents, mr->mr_dir); + rpcrdma_mr_put(mr); } return; @@ -299,10 +299,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) out_reset: pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); - while (!list_empty(mws)) { - mw = rpcrdma_pop_mw(mws); - list_del(&mw->fmr.fm_mr->list); - fmr_op_recover_mr(mw); + while (!list_empty(mrs)) { + mr = rpcrdma_mr_pop(mrs); + list_del(&mr->fmr.fm_mr->list); + fmr_op_recover_mr(mr); } } diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 185eb69e5fb5..8ba4b3388a98 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -17,7 +17,7 @@ * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG * Work Request (frwr_op_map). When the RDMA operation is finished, this * Memory Region is invalidated using a LOCAL_INV Work Request - * (frwr_op_unmap). + * (frwr_op_unmap_sync). * * Typically these Work Requests are not signaled, and neither are RDMA * SEND Work Requests (with the exception of signaling occasionally to @@ -26,7 +26,7 @@ * * As an optimization, frwr_op_unmap marks MRs INVALID before the * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on - * rb_mws immediately so that no work (like managing a linked list + * rb_mrs immediately so that no work (like managing a linked list * under a spinlock) is needed in the completion upcall. * * But this means that frwr_op_map() can occasionally encounter an MR @@ -60,7 +60,7 @@ * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered * with ib_dereg_mr and then are re-initialized. Because MR recovery * allocates fresh resources, it is deferred to a workqueue, and the - * recovered MRs are placed back on the rb_mws list when recovery is + * recovered MRs are placed back on the rb_mrs list when recovery is * complete. frwr_op_map allocates another MR for the current RPC while * the broken MR is reset. * @@ -96,21 +96,21 @@ out_not_supported: } static int -frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) +frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) { unsigned int depth = ia->ri_max_frwr_depth; - struct rpcrdma_frwr *frwr = &r->frwr; + struct rpcrdma_frwr *frwr = &mr->frwr; int rc; frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); if (IS_ERR(frwr->fr_mr)) goto out_mr_err; - r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL); - if (!r->mw_sg) + mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL); + if (!mr->mr_sg) goto out_list_err; - sg_init_table(r->mw_sg, depth); + sg_init_table(mr->mr_sg, depth); init_completion(&frwr->fr_linv_done); return 0; @@ -129,32 +129,32 @@ out_list_err: } static void -frwr_op_release_mr(struct rpcrdma_mw *r) +frwr_op_release_mr(struct rpcrdma_mr *mr) { int rc; - /* Ensure MW is not on any rl_registered list */ - if (!list_empty(&r->mw_list)) - list_del(&r->mw_list); + /* Ensure MR is not on any rl_registered list */ + if (!list_empty(&mr->mr_list)) + list_del(&mr->mr_list); - rc = ib_dereg_mr(r->frwr.fr_mr); + rc = ib_dereg_mr(mr->frwr.fr_mr); if (rc) pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", - r, rc); - kfree(r->mw_sg); - kfree(r); + mr, rc); + kfree(mr->mr_sg); + kfree(mr); } static int -__frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) +__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) { - struct rpcrdma_frwr *frwr = &r->frwr; + struct rpcrdma_frwr *frwr = &mr->frwr; int rc; rc = ib_dereg_mr(frwr->fr_mr); if (rc) { pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", - rc, r); + rc, mr); return rc; } @@ -162,7 +162,7 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) ia->ri_max_frwr_depth); if (IS_ERR(frwr->fr_mr)) { pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", - PTR_ERR(frwr->fr_mr), r); + PTR_ERR(frwr->fr_mr), mr); return PTR_ERR(frwr->fr_mr); } @@ -174,33 +174,33 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) /* Reset of a single FRWR. Generate a fresh rkey by replacing the MR. */ static void -frwr_op_recover_mr(struct rpcrdma_mw *mw) +frwr_op_recover_mr(struct rpcrdma_mr *mr) { - enum rpcrdma_frwr_state state = mw->frwr.fr_state; - struct rpcrdma_xprt *r_xprt = mw->mw_xprt; + enum rpcrdma_frwr_state state = mr->frwr.fr_state; + struct rpcrdma_xprt *r_xprt = mr->mr_xprt; struct rpcrdma_ia *ia = &r_xprt->rx_ia; int rc; - rc = __frwr_reset_mr(ia, mw); + rc = __frwr_mr_reset(ia, mr); if (state != FRWR_FLUSHED_LI) ib_dma_unmap_sg(ia->ri_device, - mw->mw_sg, mw->mw_nents, mw->mw_dir); + mr->mr_sg, mr->mr_nents, mr->mr_dir); if (rc) goto out_release; - rpcrdma_put_mw(r_xprt, mw); + rpcrdma_mr_put(mr); r_xprt->rx_stats.mrs_recovered++; return; out_release: - pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mw); + pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr); r_xprt->rx_stats.mrs_orphaned++; - spin_lock(&r_xprt->rx_buf.rb_mwlock); - list_del(&mw->mw_all); - spin_unlock(&r_xprt->rx_buf.rb_mwlock); + spin_lock(&r_xprt->rx_buf.rb_mrlock); + list_del(&mr->mr_all); + spin_unlock(&r_xprt->rx_buf.rb_mrlock); - frwr_op_release_mr(mw); + frwr_op_release_mr(mr); } static int @@ -347,40 +347,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) */ static struct rpcrdma_mr_seg * frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, - int nsegs, bool writing, struct rpcrdma_mw **out) + int nsegs, bool writing, struct rpcrdma_mr **out) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; struct rpcrdma_frwr *frwr; - struct rpcrdma_mw *mw; - struct ib_mr *mr; + struct rpcrdma_mr *mr; + struct ib_mr *ibmr; struct ib_reg_wr *reg_wr; struct ib_send_wr *bad_wr; int rc, i, n; u8 key; - mw = NULL; + mr = NULL; do { - if (mw) - rpcrdma_defer_mr_recovery(mw); - mw = rpcrdma_get_mw(r_xprt); - if (!mw) + if (mr) + rpcrdma_mr_defer_recovery(mr); + mr = rpcrdma_mr_get(r_xprt); + if (!mr) return ERR_PTR(-ENOBUFS); - } while (mw->frwr.fr_state != FRWR_IS_INVALID); - frwr = &mw->frwr; + } while (mr->frwr.fr_state != FRWR_IS_INVALID); + frwr = &mr->frwr; frwr->fr_state = FRWR_IS_VALID; - mr = frwr->fr_mr; if (nsegs > ia->ri_max_frwr_depth) nsegs = ia->ri_max_frwr_depth; for (i = 0; i < nsegs;) { if (seg->mr_page) - sg_set_page(&mw->mw_sg[i], + sg_set_page(&mr->mr_sg[i], seg->mr_page, seg->mr_len, offset_in_page(seg->mr_offset)); else - sg_set_buf(&mw->mw_sg[i], seg->mr_offset, + sg_set_buf(&mr->mr_sg[i], seg->mr_offset, seg->mr_len); ++seg; @@ -391,21 +390,22 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; } - mw->mw_dir = rpcrdma_data_dir(writing); + mr->mr_dir = rpcrdma_data_dir(writing); - mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir); - if (!mw->mw_nents) + mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); + if (!mr->mr_nents) goto out_dmamap_err; - n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE); - if (unlikely(n != mw->mw_nents)) + ibmr = frwr->fr_mr; + n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); + if (unlikely(n != mr->mr_nents)) goto out_mapmr_err; dprintk("RPC: %s: Using frwr %p to map %u segments (%llu bytes)\n", - __func__, frwr, mw->mw_nents, mr->length); + __func__, frwr, mr->mr_nents, ibmr->length); - key = (u8)(mr->rkey & 0x000000FF); - ib_update_fast_reg_key(mr, ++key); + key = (u8)(ibmr->rkey & 0x000000FF); + ib_update_fast_reg_key(ibmr, ++key); reg_wr = &frwr->fr_regwr; reg_wr->wr.next = NULL; @@ -414,8 +414,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, reg_wr->wr.wr_cqe = &frwr->fr_cqe; reg_wr->wr.num_sge = 0; reg_wr->wr.send_flags = 0; - reg_wr->mr = mr; - reg_wr->key = mr->rkey; + reg_wr->mr = ibmr; + reg_wr->key = ibmr->rkey; reg_wr->access = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; @@ -424,48 +424,48 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, if (rc) goto out_senderr; - mw->mw_handle = mr->rkey; - mw->mw_length = mr->length; - mw->mw_offset = mr->iova; + mr->mr_handle = ibmr->rkey; + mr->mr_length = ibmr->length; + mr->mr_offset = ibmr->iova; - *out = mw; + *out = mr; return seg; out_dmamap_err: pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", - mw->mw_sg, i); + mr->mr_sg, i); frwr->fr_state = FRWR_IS_INVALID; - rpcrdma_put_mw(r_xprt, mw); + rpcrdma_mr_put(mr); return ERR_PTR(-EIO); out_mapmr_err: pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", - frwr->fr_mr, n, mw->mw_nents); - rpcrdma_defer_mr_recovery(mw); + frwr->fr_mr, n, mr->mr_nents); + rpcrdma_mr_defer_recovery(mr); return ERR_PTR(-EIO); out_senderr: pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc); - rpcrdma_defer_mr_recovery(mw); + rpcrdma_mr_defer_recovery(mr); return ERR_PTR(-ENOTCONN); } -/* Handle a remotely invalidated mw on the @mws list +/* Handle a remotely invalidated mr on the @mrs list */ static void -frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mws) +frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) { - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; - list_for_each_entry(mw, mws, mw_list) - if (mw->mw_handle == rep->rr_inv_rkey) { - struct rpcrdma_xprt *r_xprt = mw->mw_xprt; + list_for_each_entry(mr, mrs, mr_list) + if (mr->mr_handle == rep->rr_inv_rkey) { + struct rpcrdma_xprt *r_xprt = mr->mr_xprt; - list_del(&mw->mw_list); - mw->frwr.fr_state = FRWR_IS_INVALID; + list_del(&mr->mr_list); + mr->frwr.fr_state = FRWR_IS_INVALID; ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, - mw->mw_sg, mw->mw_nents, mw->mw_dir); - rpcrdma_put_mw(r_xprt, mw); + mr->mr_sg, mr->mr_nents, mr->mr_dir); + rpcrdma_mr_put(mr); break; /* only one invalidated MR per RPC */ } } @@ -475,16 +475,16 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mws) * Sleeps until it is safe for the host CPU to access the * previously mapped memory regions. * - * Caller ensures that @mws is not empty before the call. This + * Caller ensures that @mrs is not empty before the call. This * function empties the list. */ static void -frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) +frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) { struct ib_send_wr *first, **prev, *last, *bad_wr; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_frwr *frwr; - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; int count, rc; /* ORDER: Invalidate all of the MRs first @@ -495,10 +495,11 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) frwr = NULL; count = 0; prev = &first; - list_for_each_entry(mw, mws, mw_list) { - mw->frwr.fr_state = FRWR_IS_INVALID; + list_for_each_entry(mr, mrs, mr_list) { + mr->frwr.fr_state = FRWR_IS_INVALID; + + frwr = &mr->frwr; - frwr = &mw->frwr; dprintk("RPC: %s: invalidating frwr %p\n", __func__, frwr); @@ -507,7 +508,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) memset(last, 0, sizeof(*last)); last->wr_cqe = &frwr->fr_cqe; last->opcode = IB_WR_LOCAL_INV; - last->ex.invalidate_rkey = mw->mw_handle; + last->ex.invalidate_rkey = mr->mr_handle; count++; *prev = last; @@ -537,16 +538,16 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) goto reset_mrs; /* ORDER: Now DMA unmap all of the MRs, and return - * them to the free MW list. + * them to the free MR list. */ unmap: - while (!list_empty(mws)) { - mw = rpcrdma_pop_mw(mws); + while (!list_empty(mrs)) { + mr = rpcrdma_mr_pop(mrs); dprintk("RPC: %s: DMA unmapping frwr %p\n", - __func__, &mw->frwr); + __func__, &mr->frwr); ib_dma_unmap_sg(ia->ri_device, - mw->mw_sg, mw->mw_nents, mw->mw_dir); - rpcrdma_put_mw(r_xprt, mw); + mr->mr_sg, mr->mr_nents, mr->mr_dir); + rpcrdma_mr_put(mr); } return; @@ -559,9 +560,9 @@ reset_mrs: while (bad_wr) { frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr); - mw = container_of(frwr, struct rpcrdma_mw, frwr); + mr = container_of(frwr, struct rpcrdma_mr, frwr); - __frwr_reset_mr(ia, mw); + __frwr_mr_reset(ia, mr); bad_wr = bad_wr->next; } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 9207aeacd2c3..9601af01653f 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr) } static void -xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) +xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr) { - *iptr++ = cpu_to_be32(mw->mw_handle); - *iptr++ = cpu_to_be32(mw->mw_length); - xdr_encode_hyper(iptr, mw->mw_offset); + *iptr++ = cpu_to_be32(mr->mr_handle); + *iptr++ = cpu_to_be32(mr->mr_length); + xdr_encode_hyper(iptr, mr->mr_offset); } static int -encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) +encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr) { __be32 *p; @@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) if (unlikely(!p)) return -EMSGSIZE; - xdr_encode_rdma_segment(p, mw); + xdr_encode_rdma_segment(p, mr); return 0; } static int -encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, +encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr, u32 position) { __be32 *p; @@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, *p++ = xdr_one; /* Item present */ *p++ = cpu_to_be32(position); - xdr_encode_rdma_segment(p, mw); + xdr_encode_rdma_segment(p, mr); return 0; } @@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, { struct xdr_stream *xdr = &req->rl_stream; struct rpcrdma_mr_seg *seg; - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; unsigned int pos; int nsegs; @@ -363,21 +363,21 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, do { seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, - false, &mw); + false, &mr); if (IS_ERR(seg)) return PTR_ERR(seg); - rpcrdma_push_mw(mw, &req->rl_registered); + rpcrdma_mr_push(mr, &req->rl_registered); - if (encode_read_segment(xdr, mw, pos) < 0) + if (encode_read_segment(xdr, mr, pos) < 0) return -EMSGSIZE; dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", rqst->rq_task->tk_pid, __func__, pos, - mw->mw_length, (unsigned long long)mw->mw_offset, - mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); + mr->mr_length, (unsigned long long)mr->mr_offset, + mr->mr_handle, mr->mr_nents < nsegs ? "more" : "last"); r_xprt->rx_stats.read_chunk_count++; - nsegs -= mw->mw_nents; + nsegs -= mr->mr_nents; } while (nsegs); return 0; @@ -404,7 +404,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, { struct xdr_stream *xdr = &req->rl_stream; struct rpcrdma_mr_seg *seg; - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; int nsegs, nchunks; __be32 *segcount; @@ -425,23 +425,23 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, nchunks = 0; do { seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, - true, &mw); + true, &mr); if (IS_ERR(seg)) return PTR_ERR(seg); - rpcrdma_push_mw(mw, &req->rl_registered); + rpcrdma_mr_push(mr, &req->rl_registered); - if (encode_rdma_segment(xdr, mw) < 0) + if (encode_rdma_segment(xdr, mr) < 0) return -EMSGSIZE; dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", rqst->rq_task->tk_pid, __func__, - mw->mw_length, (unsigned long long)mw->mw_offset, - mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); + mr->mr_length, (unsigned long long)mr->mr_offset, + mr->mr_handle, mr->mr_nents < nsegs ? "more" : "last"); r_xprt->rx_stats.write_chunk_count++; r_xprt->rx_stats.total_rdma_request += seg->mr_len; nchunks++; - nsegs -= mw->mw_nents; + nsegs -= mr->mr_nents; } while (nsegs); /* Update count of segments in this Write chunk */ @@ -468,7 +468,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, { struct xdr_stream *xdr = &req->rl_stream; struct rpcrdma_mr_seg *seg; - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; int nsegs, nchunks; __be32 *segcount; @@ -487,23 +487,23 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, nchunks = 0; do { seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, - true, &mw); + true, &mr); if (IS_ERR(seg)) return PTR_ERR(seg); - rpcrdma_push_mw(mw, &req->rl_registered); + rpcrdma_mr_push(mr, &req->rl_registered); - if (encode_rdma_segment(xdr, mw) < 0) + if (encode_rdma_segment(xdr, mr) < 0) return -EMSGSIZE; dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", rqst->rq_task->tk_pid, __func__, - mw->mw_length, (unsigned long long)mw->mw_offset, - mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); + mr->mr_length, (unsigned long long)mr->mr_offset, + mr->mr_handle, mr->mr_nents < nsegs ? "more" : "last"); r_xprt->rx_stats.reply_chunk_count++; r_xprt->rx_stats.total_rdma_request += seg->mr_len; nchunks++; - nsegs -= mw->mw_nents; + nsegs -= mr->mr_nents; } while (nsegs); /* Update count of segments in the Reply chunk */ @@ -821,10 +821,10 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) * so these registrations are invalid and unusable. */ while (unlikely(!list_empty(&req->rl_registered))) { - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; - mw = rpcrdma_pop_mw(&req->rl_registered); - rpcrdma_defer_mr_recovery(mw); + mr = rpcrdma_mr_pop(&req->rl_registered); + rpcrdma_mr_defer_recovery(mr); } /* This implementation supports the following combinations diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 840579919ad0..2582729f8c64 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -71,8 +71,8 @@ /* * internal functions */ -static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt); -static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf); +static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); +static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); struct workqueue_struct *rpcrdma_receive_wq __read_mostly; @@ -458,7 +458,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); } - rpcrdma_destroy_mrs(buf); + rpcrdma_mrs_destroy(buf); /* Allow waiters to continue */ complete(&ia->ri_remove_done); @@ -671,7 +671,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, goto out3; } - rpcrdma_create_mrs(r_xprt); + rpcrdma_mrs_create(r_xprt); return 0; out3: @@ -992,15 +992,15 @@ rpcrdma_mr_recovery_worker(struct work_struct *work) { struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, rb_recovery_worker.work); - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; spin_lock(&buf->rb_recovery_lock); while (!list_empty(&buf->rb_stale_mrs)) { - mw = rpcrdma_pop_mw(&buf->rb_stale_mrs); + mr = rpcrdma_mr_pop(&buf->rb_stale_mrs); spin_unlock(&buf->rb_recovery_lock); - dprintk("RPC: %s: recovering MR %p\n", __func__, mw); - mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw); + dprintk("RPC: %s: recovering MR %p\n", __func__, mr); + mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr); spin_lock(&buf->rb_recovery_lock); } @@ -1008,20 +1008,20 @@ rpcrdma_mr_recovery_worker(struct work_struct *work) } void -rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw) +rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr) { - struct rpcrdma_xprt *r_xprt = mw->mw_xprt; + struct rpcrdma_xprt *r_xprt = mr->mr_xprt; struct rpcrdma_buffer *buf = &r_xprt->rx_buf; spin_lock(&buf->rb_recovery_lock); - rpcrdma_push_mw(mw, &buf->rb_stale_mrs); + rpcrdma_mr_push(mr, &buf->rb_stale_mrs); spin_unlock(&buf->rb_recovery_lock); schedule_delayed_work(&buf->rb_recovery_worker, 0); } static void -rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt) +rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_ia *ia = &r_xprt->rx_ia; @@ -1030,30 +1030,30 @@ rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt) LIST_HEAD(all); for (count = 0; count < 32; count++) { - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; int rc; - mw = kzalloc(sizeof(*mw), GFP_KERNEL); - if (!mw) + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) break; - rc = ia->ri_ops->ro_init_mr(ia, mw); + rc = ia->ri_ops->ro_init_mr(ia, mr); if (rc) { - kfree(mw); + kfree(mr); break; } - mw->mw_xprt = r_xprt; + mr->mr_xprt = r_xprt; - list_add(&mw->mw_list, &free); - list_add(&mw->mw_all, &all); + list_add(&mr->mr_list, &free); + list_add(&mr->mr_all, &all); } - spin_lock(&buf->rb_mwlock); - list_splice(&free, &buf->rb_mws); + spin_lock(&buf->rb_mrlock); + list_splice(&free, &buf->rb_mrs); list_splice(&all, &buf->rb_all); r_xprt->rx_stats.mrs_allocated += count; - spin_unlock(&buf->rb_mwlock); + spin_unlock(&buf->rb_mrlock); dprintk("RPC: %s: created %u MRs\n", __func__, count); } @@ -1066,7 +1066,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work) struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf); - rpcrdma_create_mrs(r_xprt); + rpcrdma_mrs_create(r_xprt); } struct rpcrdma_req * @@ -1144,10 +1144,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) buf->rb_max_requests = r_xprt->rx_data.max_requests; buf->rb_bc_srv_max_requests = 0; - spin_lock_init(&buf->rb_mwlock); + spin_lock_init(&buf->rb_mrlock); spin_lock_init(&buf->rb_lock); spin_lock_init(&buf->rb_recovery_lock); - INIT_LIST_HEAD(&buf->rb_mws); + INIT_LIST_HEAD(&buf->rb_mrs); INIT_LIST_HEAD(&buf->rb_all); INIT_LIST_HEAD(&buf->rb_stale_mrs); INIT_DELAYED_WORK(&buf->rb_refresh_worker, @@ -1155,7 +1155,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) INIT_DELAYED_WORK(&buf->rb_recovery_worker, rpcrdma_mr_recovery_worker); - rpcrdma_create_mrs(r_xprt); + rpcrdma_mrs_create(r_xprt); INIT_LIST_HEAD(&buf->rb_send_bufs); INIT_LIST_HEAD(&buf->rb_allreqs); @@ -1229,26 +1229,26 @@ rpcrdma_destroy_req(struct rpcrdma_req *req) } static void -rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf) +rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) { struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf); struct rpcrdma_ia *ia = rdmab_to_ia(buf); - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; unsigned int count; count = 0; - spin_lock(&buf->rb_mwlock); + spin_lock(&buf->rb_mrlock); while (!list_empty(&buf->rb_all)) { - mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); - list_del(&mw->mw_all); + mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all); + list_del(&mr->mr_all); - spin_unlock(&buf->rb_mwlock); - ia->ri_ops->ro_release_mr(mw); + spin_unlock(&buf->rb_mrlock); + ia->ri_ops->ro_release_mr(mr); count++; - spin_lock(&buf->rb_mwlock); + spin_lock(&buf->rb_mrlock); } - spin_unlock(&buf->rb_mwlock); + spin_unlock(&buf->rb_mrlock); r_xprt->rx_stats.mrs_allocated = 0; dprintk("RPC: %s: released %u MRs\n", __func__, count); @@ -1285,26 +1285,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) spin_unlock(&buf->rb_reqslock); buf->rb_recv_count = 0; - rpcrdma_destroy_mrs(buf); + rpcrdma_mrs_destroy(buf); } -struct rpcrdma_mw * -rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) +/** + * rpcrdma_mr_get - Allocate an rpcrdma_mr object + * @r_xprt: controlling transport + * + * Returns an initialized rpcrdma_mr or NULL if no free + * rpcrdma_mr objects are available. + */ +struct rpcrdma_mr * +rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct rpcrdma_mw *mw = NULL; + struct rpcrdma_mr *mr = NULL; - spin_lock(&buf->rb_mwlock); - if (!list_empty(&buf->rb_mws)) - mw = rpcrdma_pop_mw(&buf->rb_mws); - spin_unlock(&buf->rb_mwlock); + spin_lock(&buf->rb_mrlock); + if (!list_empty(&buf->rb_mrs)) + mr = rpcrdma_mr_pop(&buf->rb_mrs); + spin_unlock(&buf->rb_mrlock); - if (!mw) - goto out_nomws; - return mw; + if (!mr) + goto out_nomrs; + return mr; -out_nomws: - dprintk("RPC: %s: no MWs available\n", __func__); +out_nomrs: + dprintk("RPC: %s: no MRs available\n", __func__); if (r_xprt->rx_ep.rep_connected != -ENODEV) schedule_delayed_work(&buf->rb_refresh_worker, 0); @@ -1314,14 +1321,20 @@ out_nomws: return NULL; } +/** + * rpcrdma_mr_put - Release an rpcrdma_mr object + * @mr: object to release + * + */ void -rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) +rpcrdma_mr_put(struct rpcrdma_mr *mr) { + struct rpcrdma_xprt *r_xprt = mr->mr_xprt; struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - spin_lock(&buf->rb_mwlock); - rpcrdma_push_mw(mw, &buf->rb_mws); - spin_unlock(&buf->rb_mwlock); + spin_lock(&buf->rb_mrlock); + rpcrdma_mr_push(mr, &buf->rb_mrs); + spin_unlock(&buf->rb_mrlock); } static struct rpcrdma_rep * diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index f52269afaa09..530ace6ed125 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -230,12 +230,12 @@ enum { }; /* - * struct rpcrdma_mw - external memory region metadata + * struct rpcrdma_mr - external memory region metadata * * An external memory region is any buffer or page that is registered * on the fly (ie, not pre-registered). * - * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During + * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During * call_allocate, rpcrdma_buffer_get() assigns one to each segment in * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep * track of registration metadata while each RPC is pending. @@ -265,20 +265,20 @@ struct rpcrdma_fmr { u64 *fm_physaddrs; }; -struct rpcrdma_mw { - struct list_head mw_list; - struct scatterlist *mw_sg; - int mw_nents; - enum dma_data_direction mw_dir; +struct rpcrdma_mr { + struct list_head mr_list; + struct scatterlist *mr_sg; + int mr_nents; + enum dma_data_direction mr_dir; union { struct rpcrdma_fmr fmr; struct rpcrdma_frwr frwr; }; - struct rpcrdma_xprt *mw_xprt; - u32 mw_handle; - u32 mw_length; - u64 mw_offset; - struct list_head mw_all; + struct rpcrdma_xprt *mr_xprt; + u32 mr_handle; + u32 mr_length; + u64 mr_offset; + struct list_head mr_all; }; /* @@ -371,19 +371,19 @@ rpcr_to_rdmar(struct rpc_rqst *rqst) } static inline void -rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list) +rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list) { - list_add_tail(&mw->mw_list, list); + list_add_tail(&mr->mr_list, list); } -static inline struct rpcrdma_mw * -rpcrdma_pop_mw(struct list_head *list) +static inline struct rpcrdma_mr * +rpcrdma_mr_pop(struct list_head *list) { - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; - mw = list_first_entry(list, struct rpcrdma_mw, mw_list); - list_del(&mw->mw_list); - return mw; + mr = list_first_entry(list, struct rpcrdma_mr, mr_list); + list_del(&mr->mr_list); + return mr; } /* @@ -393,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list) * One of these is associated with a transport instance */ struct rpcrdma_buffer { - spinlock_t rb_mwlock; /* protect rb_mws list */ - struct list_head rb_mws; + spinlock_t rb_mrlock; /* protect rb_mrs list */ + struct list_head rb_mrs; struct list_head rb_all; unsigned long rb_sc_head; @@ -473,19 +473,19 @@ struct rpcrdma_memreg_ops { struct rpcrdma_mr_seg * (*ro_map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool, - struct rpcrdma_mw **); + struct rpcrdma_mr **); void (*ro_reminv)(struct rpcrdma_rep *rep, - struct list_head *mws); + struct list_head *mrs); void (*ro_unmap_sync)(struct rpcrdma_xprt *, struct list_head *); - void (*ro_recover_mr)(struct rpcrdma_mw *); + void (*ro_recover_mr)(struct rpcrdma_mr *mr); int (*ro_open)(struct rpcrdma_ia *, struct rpcrdma_ep *, struct rpcrdma_create_data_internal *); size_t (*ro_maxpages)(struct rpcrdma_xprt *); int (*ro_init_mr)(struct rpcrdma_ia *, - struct rpcrdma_mw *); - void (*ro_release_mr)(struct rpcrdma_mw *); + struct rpcrdma_mr *); + void (*ro_release_mr)(struct rpcrdma_mr *mr); const char *ro_displayname; const int ro_send_w_inv_ok; }; @@ -574,15 +574,15 @@ void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); -struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); -void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); +struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); +void rpcrdma_mr_put(struct rpcrdma_mr *mr); +void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr); + struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); void rpcrdma_buffer_put(struct rpcrdma_req *); void rpcrdma_recv_buffer_get(struct rpcrdma_req *); void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); -void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *); - struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, gfp_t); bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); -- cgit v1.2.3-58-ga151 From ec12e479e30653bf973ca1185bbb09158e9af0b7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 14 Dec 2017 20:58:04 -0500 Subject: xprtrdma: Introduce rpcrdma_mw_unmap_and_put Clean up: Code review suggested that a common bit of code can be placed into a helper function, and this gives us fewer places to stick an "I DMA unmapped something" trace point. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/fmr_ops.c | 19 ++++++++----------- net/sunrpc/xprtrdma/frwr_ops.c | 10 ++-------- net/sunrpc/xprtrdma/verbs.c | 26 ++++++++++++++++++++++---- net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 4 files changed, 33 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 8bd0399b3a1c..7f2f2b774076 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -135,14 +135,12 @@ fmr_op_recover_mr(struct rpcrdma_mr *mr) /* ORDER: invalidate first */ rc = __fmr_unmap(mr); - - /* ORDER: then DMA unmap */ - ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, - mr->mr_sg, mr->mr_nents, mr->mr_dir); if (rc) goto out_release; - rpcrdma_mr_put(mr); + /* ORDER: then DMA unmap */ + rpcrdma_mr_unmap_and_put(mr); + r_xprt->rx_stats.mrs_recovered++; return; @@ -150,6 +148,9 @@ out_release: pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr); r_xprt->rx_stats.mrs_orphaned++; + ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, + mr->mr_sg, mr->mr_nents, mr->mr_dir); + spin_lock(&r_xprt->rx_buf.rb_mrlock); list_del(&mr->mr_all); spin_unlock(&r_xprt->rx_buf.rb_mrlock); @@ -245,9 +246,7 @@ out_maperr: pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", len, (unsigned long long)dma_pages[0], pageoff, mr->mr_nents, rc); - ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, - mr->mr_sg, mr->mr_nents, mr->mr_dir); - rpcrdma_mr_put(mr); + rpcrdma_mr_unmap_and_put(mr); return ERR_PTR(-EIO); } @@ -289,9 +288,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) dprintk("RPC: %s: DMA unmapping fmr %p\n", __func__, &mr->fmr); list_del(&mr->fmr.fm_mr->list); - ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, - mr->mr_sg, mr->mr_nents, mr->mr_dir); - rpcrdma_mr_put(mr); + rpcrdma_mr_unmap_and_put(mr); } return; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 8ba4b3388a98..35e3a54344cc 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -459,13 +459,9 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) list_for_each_entry(mr, mrs, mr_list) if (mr->mr_handle == rep->rr_inv_rkey) { - struct rpcrdma_xprt *r_xprt = mr->mr_xprt; - list_del(&mr->mr_list); mr->frwr.fr_state = FRWR_IS_INVALID; - ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, - mr->mr_sg, mr->mr_nents, mr->mr_dir); - rpcrdma_mr_put(mr); + rpcrdma_mr_unmap_and_put(mr); break; /* only one invalidated MR per RPC */ } } @@ -545,9 +541,7 @@ unmap: mr = rpcrdma_mr_pop(mrs); dprintk("RPC: %s: DMA unmapping frwr %p\n", __func__, &mr->frwr); - ib_dma_unmap_sg(ia->ri_device, - mr->mr_sg, mr->mr_nents, mr->mr_dir); - rpcrdma_mr_put(mr); + rpcrdma_mr_unmap_and_put(mr); } return; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 2582729f8c64..9cc8abc09e14 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1321,6 +1321,14 @@ out_nomrs: return NULL; } +static void +__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr) +{ + spin_lock(&buf->rb_mrlock); + rpcrdma_mr_push(mr, &buf->rb_mrs); + spin_unlock(&buf->rb_mrlock); +} + /** * rpcrdma_mr_put - Release an rpcrdma_mr object * @mr: object to release @@ -1328,13 +1336,23 @@ out_nomrs: */ void rpcrdma_mr_put(struct rpcrdma_mr *mr) +{ + __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr); +} + +/** + * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it + * @mr: object to release + * + */ +void +rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) { struct rpcrdma_xprt *r_xprt = mr->mr_xprt; - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - spin_lock(&buf->rb_mrlock); - rpcrdma_mr_push(mr, &buf->rb_mrs); - spin_unlock(&buf->rb_mrlock); + ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, + mr->mr_sg, mr->mr_nents, mr->mr_dir); + __rpcrdma_mr_put(&r_xprt->rx_buf, mr); } static struct rpcrdma_rep * diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 530ace6ed125..28ae1fb1e2f3 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -576,6 +576,7 @@ void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); void rpcrdma_mr_put(struct rpcrdma_mr *mr); +void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr); void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr); struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); -- cgit v1.2.3-58-ga151 From e48f083e19c9082352a07d0aef6a4ea0dc72dde0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 20 Jan 2018 11:16:34 -0500 Subject: rpcrdma: infrastructure for static trace points in rpcrdma.ko Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 16 ++++++++++++++++ net/sunrpc/xprtrdma/module.c | 12 +++++++----- net/sunrpc/xprtrdma/xprt_rdma.h | 2 ++ 3 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 include/trace/events/rpcrdma.h (limited to 'net') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h new file mode 100644 index 000000000000..8536d90bd286 --- /dev/null +++ b/include/trace/events/rpcrdma.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2017 Oracle. All rights reserved. + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rpcrdma + +#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RPCRDMA_H + +#include +#include + +#endif /* _TRACE_RPCRDMA_H */ + +#include diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c index 560712bd9fa2..a762d192372b 100644 --- a/net/sunrpc/xprtrdma/module.c +++ b/net/sunrpc/xprtrdma/module.c @@ -1,18 +1,20 @@ /* - * Copyright (c) 2015 Oracle. All rights reserved. + * Copyright (c) 2015, 2017 Oracle. All rights reserved. */ /* rpcrdma.ko module initialization */ +#include +#include #include #include #include -#include "xprt_rdma.h" -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif +#include + +#define CREATE_TRACE_POINTS +#include "xprt_rdma.h" MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc."); MODULE_DESCRIPTION("RPC/RDMA Transport"); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 28ae1fb1e2f3..a1ca1b638123 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -675,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); extern struct xprt_class xprt_rdma_bc; #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ + +#include -- cgit v1.2.3-58-ga151 From ab03eff58eb5b4914ec96e989e4c30e320d20ad8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Dec 2017 16:30:40 -0500 Subject: xprtrdma: Add trace points in RPC Call transmit paths Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 124 +++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/rpc_rdma.c | 8 +-- net/sunrpc/xprtrdma/verbs.c | 12 ++-- 3 files changed, 129 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 8536d90bd286..ffe660365193 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -11,6 +11,130 @@ #include #include +/** + ** Call events + **/ + +TRACE_DEFINE_ENUM(rpcrdma_noch); +TRACE_DEFINE_ENUM(rpcrdma_readch); +TRACE_DEFINE_ENUM(rpcrdma_areadch); +TRACE_DEFINE_ENUM(rpcrdma_writech); +TRACE_DEFINE_ENUM(rpcrdma_replych); + +#define xprtrdma_show_chunktype(x) \ + __print_symbolic(x, \ + { rpcrdma_noch, "inline" }, \ + { rpcrdma_readch, "read list" }, \ + { rpcrdma_areadch, "*read list" }, \ + { rpcrdma_writech, "write list" }, \ + { rpcrdma_replych, "reply chunk" }) + +TRACE_EVENT(xprtrdma_marshal, + TP_PROTO( + const struct rpc_rqst *rqst, + unsigned int hdrlen, + unsigned int rtype, + unsigned int wtype + ), + + TP_ARGS(rqst, hdrlen, rtype, wtype), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(unsigned int, hdrlen) + __field(unsigned int, headlen) + __field(unsigned int, pagelen) + __field(unsigned int, taillen) + __field(unsigned int, rtype) + __field(unsigned int, wtype) + ), + + TP_fast_assign( + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->hdrlen = hdrlen; + __entry->headlen = rqst->rq_snd_buf.head[0].iov_len; + __entry->pagelen = rqst->rq_snd_buf.page_len; + __entry->taillen = rqst->rq_snd_buf.tail[0].iov_len; + __entry->rtype = rtype; + __entry->wtype = wtype; + ), + + TP_printk("task:%u@%u xid=0x%08x: hdr=%u xdr=%u/%u/%u %s/%s", + __entry->task_id, __entry->client_id, __entry->xid, + __entry->hdrlen, + __entry->headlen, __entry->pagelen, __entry->taillen, + xprtrdma_show_chunktype(__entry->rtype), + xprtrdma_show_chunktype(__entry->wtype) + ) +); + +TRACE_EVENT(xprtrdma_post_send, + TP_PROTO( + const struct rpcrdma_req *req, + int status + ), + + TP_ARGS(req, status), + + TP_STRUCT__entry( + __field(const void *, req) + __field(int, num_sge) + __field(bool, signaled) + __field(int, status) + ), + + TP_fast_assign( + __entry->req = req; + __entry->num_sge = req->rl_sendctx->sc_wr.num_sge; + __entry->signaled = req->rl_sendctx->sc_wr.send_flags & + IB_SEND_SIGNALED; + __entry->status = status; + ), + + TP_printk("req=%p, %d SGEs%s, status=%d", + __entry->req, __entry->num_sge, + (__entry->signaled ? ", signaled" : ""), + __entry->status + ) +); + +/** + ** Completion events + **/ + +TRACE_EVENT(xprtrdma_wc_send, + TP_PROTO( + const struct rpcrdma_sendctx *sc, + const struct ib_wc *wc + ), + + TP_ARGS(sc, wc), + + TP_STRUCT__entry( + __field(const void *, req) + __field(unsigned int, unmap_count) + __field(unsigned int, status) + __field(unsigned int, vendor_err) + ), + + TP_fast_assign( + __entry->req = sc->sc_req; + __entry->unmap_count = sc->sc_unmap_count; + __entry->status = wc->status; + __entry->vendor_err = __entry->status ? wc->vendor_err : 0; + ), + + TP_printk("req=%p, unmapped %u pages: %s (%u/0x%x)", + __entry->req, __entry->unmap_count, + rdma_show_wc_status(__entry->status), + __entry->status, __entry->vendor_err + ) +); + #endif /* _TRACE_RPCRDMA_H */ #include diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 9601af01653f..162c0fd82673 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -524,9 +524,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc) struct ib_sge *sge; unsigned int count; - dprintk("RPC: %s: unmapping %u sges for sc=%p\n", - __func__, sc->sc_unmap_count, sc); - /* The first two SGEs contain the transport header and * the inline buffer. These are always left mapped so * they can be cheaply re-used. @@ -874,10 +871,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) if (ret) goto out_err; - dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n", - rqst->rq_task->tk_pid, __func__, - transfertypes[rtype], transfertypes[wtype], - xdr_stream_pos(xdr)); + trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype); ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr), &rqst->rq_snd_buf, rtype); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 9cc8abc09e14..06d86b8c7f71 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -133,6 +133,7 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) container_of(cqe, struct rpcrdma_sendctx, sc_cqe); /* WARNING: Only wr_cqe and status are reliable at this point */ + trace_xprtrdma_wc_send(sc, wc); if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) pr_err("rpcrdma: Send: %s (%u/0x%x)\n", ib_wc_status_msg(wc->status), @@ -1549,9 +1550,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, req->rl_reply = NULL; } - dprintk("RPC: %s: posting %d s/g entries\n", - __func__, send_wr->num_sge); - if (!ep->rep_send_count || test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { send_wr->send_flags |= IB_SEND_SIGNALED; @@ -1560,14 +1558,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, send_wr->send_flags &= ~IB_SEND_SIGNALED; --ep->rep_send_count; } + rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); + trace_xprtrdma_post_send(req, rc); if (rc) - goto out_postsend_err; + return -ENOTCONN; return 0; - -out_postsend_err: - pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc); - return -ENOTCONN; } int -- cgit v1.2.3-58-ga151 From b4a7f91c1d8e14596a1eb37075d9f20f213481a8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Dec 2017 16:30:48 -0500 Subject: xprtrdma: Add trace points in the RPC Reply handler paths Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 162 +++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/rpc_rdma.c | 17 ++--- net/sunrpc/xprtrdma/verbs.c | 11 +-- 3 files changed, 171 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index ffe660365193..fe8ba0b25df8 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -11,6 +11,46 @@ #include #include +/** + ** Event classes + **/ + +DECLARE_EVENT_CLASS(xprtrdma_reply_event, + TP_PROTO( + const struct rpcrdma_rep *rep + ), + + TP_ARGS(rep), + + TP_STRUCT__entry( + __field(const void *, rep) + __field(const void *, r_xprt) + __field(u32, xid) + __field(u32, version) + __field(u32, proc) + ), + + TP_fast_assign( + __entry->rep = rep; + __entry->r_xprt = rep->rr_rxprt; + __entry->xid = be32_to_cpu(rep->rr_xid); + __entry->version = be32_to_cpu(rep->rr_vers); + __entry->proc = be32_to_cpu(rep->rr_proc); + ), + + TP_printk("rxprt %p xid=0x%08x rep=%p: version %u proc %u", + __entry->r_xprt, __entry->xid, __entry->rep, + __entry->version, __entry->proc + ) +); + +#define DEFINE_REPLY_EVENT(name) \ + DEFINE_EVENT(xprtrdma_reply_event, name, \ + TP_PROTO( \ + const struct rpcrdma_rep *rep \ + ), \ + TP_ARGS(rep)) + /** ** Call events **/ @@ -102,6 +142,29 @@ TRACE_EVENT(xprtrdma_post_send, ) ); +TRACE_EVENT(xprtrdma_post_recv, + TP_PROTO( + const struct rpcrdma_rep *rep, + int status + ), + + TP_ARGS(rep, status), + + TP_STRUCT__entry( + __field(const void *, rep) + __field(int, status) + ), + + TP_fast_assign( + __entry->rep = rep; + __entry->status = status; + ), + + TP_printk("rep=%p status=%d", + __entry->rep, __entry->status + ) +); + /** ** Completion events **/ @@ -135,6 +198,105 @@ TRACE_EVENT(xprtrdma_wc_send, ) ); +TRACE_EVENT(xprtrdma_wc_receive, + TP_PROTO( + const struct rpcrdma_rep *rep, + const struct ib_wc *wc + ), + + TP_ARGS(rep, wc), + + TP_STRUCT__entry( + __field(const void *, rep) + __field(unsigned int, byte_len) + __field(unsigned int, status) + __field(unsigned int, vendor_err) + ), + + TP_fast_assign( + __entry->rep = rep; + __entry->byte_len = wc->byte_len; + __entry->status = wc->status; + __entry->vendor_err = __entry->status ? wc->vendor_err : 0; + ), + + TP_printk("rep=%p, %u bytes: %s (%u/0x%x)", + __entry->rep, __entry->byte_len, + rdma_show_wc_status(__entry->status), + __entry->status, __entry->vendor_err + ) +); + +/** + ** Reply events + **/ + +TRACE_EVENT(xprtrdma_reply, + TP_PROTO( + const struct rpc_task *task, + const struct rpcrdma_rep *rep, + const struct rpcrdma_req *req, + unsigned int credits + ), + + TP_ARGS(task, rep, req, credits), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(const void *, rep) + __field(const void *, req) + __field(u32, xid) + __field(unsigned int, credits) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->rep = rep; + __entry->req = req; + __entry->xid = be32_to_cpu(rep->rr_xid); + __entry->credits = credits; + ), + + TP_printk("task:%u@%u xid=0x%08x, %u credits, rep=%p -> req=%p", + __entry->task_id, __entry->client_id, __entry->xid, + __entry->credits, __entry->rep, __entry->req + ) +); + +TRACE_EVENT(xprtrdma_defer_cmp, + TP_PROTO( + const struct rpcrdma_rep *rep + ), + + TP_ARGS(rep), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(const void *, rep) + __field(u32, xid) + ), + + TP_fast_assign( + __entry->task_id = rep->rr_rqst->rq_task->tk_pid; + __entry->client_id = rep->rr_rqst->rq_task->tk_client->cl_clid; + __entry->rep = rep; + __entry->xid = be32_to_cpu(rep->rr_xid); + ), + + TP_printk("task:%u@%u xid=0x%08x rep=%p", + __entry->task_id, __entry->client_id, __entry->xid, + __entry->rep + ) +); + +DEFINE_REPLY_EVENT(xprtrdma_reply_vers); +DEFINE_REPLY_EVENT(xprtrdma_reply_rqst); +DEFINE_REPLY_EVENT(xprtrdma_reply_short); +DEFINE_REPLY_EVENT(xprtrdma_reply_hdr); + #endif /* _TRACE_RPCRDMA_H */ #include diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 162c0fd82673..6f774f1e4516 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1278,8 +1278,7 @@ out: * being marshaled. */ out_badheader: - dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n", - rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc)); + trace_xprtrdma_reply_hdr(rep); r_xprt->rx_stats.bad_reply_count++; status = -EIO; goto out; @@ -1323,6 +1322,7 @@ void rpcrdma_deferred_completion(struct work_struct *work) struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; + trace_xprtrdma_defer_cmp(rep); if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered); rpcrdma_release_rqst(r_xprt, req); @@ -1344,8 +1344,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) u32 credits; __be32 *p; - dprintk("RPC: %s: incoming rep %p\n", __func__, rep); - if (rep->rr_hdrbuf.head[0].iov_len == 0) goto out_badstatus; @@ -1389,8 +1387,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) rep->rr_rqst = rqst; clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); - dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", - __func__, rep, req, be32_to_cpu(rep->rr_xid)); + trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work); return; @@ -1404,8 +1401,7 @@ out_badstatus: return; out_badversion: - dprintk("RPC: %s: invalid version %d\n", - __func__, be32_to_cpu(rep->rr_vers)); + trace_xprtrdma_reply_vers(rep); goto repost; /* The RPC transaction has already been terminated, or the header @@ -1413,12 +1409,11 @@ out_badversion: */ out_norqst: spin_unlock(&xprt->recv_lock); - dprintk("RPC: %s: no match for incoming xid 0x%08x\n", - __func__, be32_to_cpu(rep->rr_xid)); + trace_xprtrdma_reply_rqst(rep); goto repost; out_shortreply: - dprintk("RPC: %s: short/invalid reply\n", __func__); + trace_xprtrdma_reply_short(rep); /* If no pending RPC transaction was matched, post a replacement * receive buffer before returning. diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 06d86b8c7f71..cfa3c0328878 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -156,13 +156,11 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) rr_cqe); /* WARNING: Only wr_id and status are reliable at this point */ + trace_xprtrdma_wc_receive(rep, wc); if (wc->status != IB_WC_SUCCESS) goto out_fail; /* status == SUCCESS means all fields in wc are trustworthy */ - dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n", - __func__, rep, wc->byte_len); - rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); rep->rr_wc_flags = wc->wc_flags; rep->rr_inv_rkey = wc->ex.invalidate_rkey; @@ -1576,17 +1574,14 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) goto out_map; rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); + trace_xprtrdma_post_recv(rep, rc); if (rc) - goto out_postrecv; + return -ENOTCONN; return 0; out_map: pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); return -EIO; - -out_postrecv: - pr_err("rpcrdma: ib_post_recv returned %i\n", rc); - return -ENOTCONN; } /** -- cgit v1.2.3-58-ga151 From 58f10ad40dd8456c0c6b1d90e8237c67db6e0801 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Dec 2017 16:30:56 -0500 Subject: xprtrdma: Add trace points to instrument memory registration Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 156 +++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/frwr_ops.c | 11 ++- net/sunrpc/xprtrdma/rpc_rdma.c | 18 +---- 3 files changed, 163 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index fe8ba0b25df8..17eb3209cd6e 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -51,10 +51,164 @@ DECLARE_EVENT_CLASS(xprtrdma_reply_event, ), \ TP_ARGS(rep)) +DECLARE_EVENT_CLASS(xprtrdma_rdch_event, + TP_PROTO( + const struct rpc_task *task, + unsigned int pos, + struct rpcrdma_mr *mr, + int nsegs + ), + + TP_ARGS(task, pos, mr, nsegs), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(const void *, mr) + __field(unsigned int, pos) + __field(int, nents) + __field(u32, handle) + __field(u32, length) + __field(u64, offset) + __field(int, nsegs) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->mr = mr; + __entry->pos = pos; + __entry->nents = mr->mr_nents; + __entry->handle = mr->mr_handle; + __entry->length = mr->mr_length; + __entry->offset = mr->mr_offset; + __entry->nsegs = nsegs; + ), + + TP_printk("task:%u@%u mr=%p pos=%u %u@0x%016llx:0x%08x (%s)", + __entry->task_id, __entry->client_id, __entry->mr, + __entry->pos, __entry->length, + (unsigned long long)__entry->offset, __entry->handle, + __entry->nents < __entry->nsegs ? "more" : "last" + ) +); + +#define DEFINE_RDCH_EVENT(name) \ + DEFINE_EVENT(xprtrdma_rdch_event, name, \ + TP_PROTO( \ + const struct rpc_task *task, \ + unsigned int pos, \ + struct rpcrdma_mr *mr, \ + int nsegs \ + ), \ + TP_ARGS(task, pos, mr, nsegs)) + +DECLARE_EVENT_CLASS(xprtrdma_wrch_event, + TP_PROTO( + const struct rpc_task *task, + struct rpcrdma_mr *mr, + int nsegs + ), + + TP_ARGS(task, mr, nsegs), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(const void *, mr) + __field(int, nents) + __field(u32, handle) + __field(u32, length) + __field(u64, offset) + __field(int, nsegs) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->mr = mr; + __entry->nents = mr->mr_nents; + __entry->handle = mr->mr_handle; + __entry->length = mr->mr_length; + __entry->offset = mr->mr_offset; + __entry->nsegs = nsegs; + ), + + TP_printk("task:%u@%u mr=%p %u@0x%016llx:0x%08x (%s)", + __entry->task_id, __entry->client_id, __entry->mr, + __entry->length, (unsigned long long)__entry->offset, + __entry->handle, + __entry->nents < __entry->nsegs ? "more" : "last" + ) +); + +#define DEFINE_WRCH_EVENT(name) \ + DEFINE_EVENT(xprtrdma_wrch_event, name, \ + TP_PROTO( \ + const struct rpc_task *task, \ + struct rpcrdma_mr *mr, \ + int nsegs \ + ), \ + TP_ARGS(task, mr, nsegs)) + +TRACE_DEFINE_ENUM(FRWR_IS_INVALID); +TRACE_DEFINE_ENUM(FRWR_IS_VALID); +TRACE_DEFINE_ENUM(FRWR_FLUSHED_FR); +TRACE_DEFINE_ENUM(FRWR_FLUSHED_LI); + +#define xprtrdma_show_frwr_state(x) \ + __print_symbolic(x, \ + { FRWR_IS_INVALID, "INVALID" }, \ + { FRWR_IS_VALID, "VALID" }, \ + { FRWR_FLUSHED_FR, "FLUSHED_FR" }, \ + { FRWR_FLUSHED_LI, "FLUSHED_LI" }) + +DECLARE_EVENT_CLASS(xprtrdma_frwr_done, + TP_PROTO( + const struct ib_wc *wc, + const struct rpcrdma_frwr *frwr + ), + + TP_ARGS(wc, frwr), + + TP_STRUCT__entry( + __field(const void *, mr) + __field(unsigned int, state) + __field(unsigned int, status) + __field(unsigned int, vendor_err) + ), + + TP_fast_assign( + __entry->mr = container_of(frwr, struct rpcrdma_mr, frwr); + __entry->state = frwr->fr_state; + __entry->status = wc->status; + __entry->vendor_err = __entry->status ? wc->vendor_err : 0; + ), + + TP_printk( + "mr=%p state=%s: %s (%u/0x%x)", + __entry->mr, xprtrdma_show_frwr_state(__entry->state), + rdma_show_wc_status(__entry->status), + __entry->status, __entry->vendor_err + ) +); + +#define DEFINE_FRWR_DONE_EVENT(name) \ + DEFINE_EVENT(xprtrdma_frwr_done, name, \ + TP_PROTO( \ + const struct ib_wc *wc, \ + const struct rpcrdma_frwr *frwr \ + ), \ + TP_ARGS(wc, frwr)) + /** ** Call events **/ +DEFINE_RDCH_EVENT(xprtrdma_read_chunk); +DEFINE_WRCH_EVENT(xprtrdma_write_chunk); +DEFINE_WRCH_EVENT(xprtrdma_reply_chunk); + TRACE_DEFINE_ENUM(rpcrdma_noch); TRACE_DEFINE_ENUM(rpcrdma_readch); TRACE_DEFINE_ENUM(rpcrdma_areadch); @@ -227,6 +381,8 @@ TRACE_EVENT(xprtrdma_wc_receive, ) ); +DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_fastreg); + /** ** Reply events **/ diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 35e3a54344cc..afbeb9b442bf 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -286,16 +286,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr) static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) { - struct rpcrdma_frwr *frwr; - struct ib_cqe *cqe; + struct ib_cqe *cqe = wc->wr_cqe; + struct rpcrdma_frwr *frwr = + container_of(cqe, struct rpcrdma_frwr, fr_cqe); /* WARNING: Only wr_cqe and status are reliable at this point */ if (wc->status != IB_WC_SUCCESS) { - cqe = wc->wr_cqe; - frwr = container_of(cqe, struct rpcrdma_frwr, fr_cqe); frwr->fr_state = FRWR_FLUSHED_FR; __frwr_sendcompletion_flush(wc, "fastreg"); } + trace_xprtrdma_wc_fastreg(wc, frwr); } /** @@ -401,9 +401,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, if (unlikely(n != mr->mr_nents)) goto out_mapmr_err; - dprintk("RPC: %s: Using frwr %p to map %u segments (%llu bytes)\n", - __func__, frwr, mr->mr_nents, ibmr->length); - key = (u8)(ibmr->rkey & 0x000000FF); ib_update_fast_reg_key(ibmr, ++key); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 6f774f1e4516..634496ca2e28 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -371,11 +371,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, if (encode_read_segment(xdr, mr, pos) < 0) return -EMSGSIZE; - dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", - rqst->rq_task->tk_pid, __func__, pos, - mr->mr_length, (unsigned long long)mr->mr_offset, - mr->mr_handle, mr->mr_nents < nsegs ? "more" : "last"); - + trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs); r_xprt->rx_stats.read_chunk_count++; nsegs -= mr->mr_nents; } while (nsegs); @@ -433,11 +429,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, if (encode_rdma_segment(xdr, mr) < 0) return -EMSGSIZE; - dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", - rqst->rq_task->tk_pid, __func__, - mr->mr_length, (unsigned long long)mr->mr_offset, - mr->mr_handle, mr->mr_nents < nsegs ? "more" : "last"); - + trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs); r_xprt->rx_stats.write_chunk_count++; r_xprt->rx_stats.total_rdma_request += seg->mr_len; nchunks++; @@ -495,11 +487,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, if (encode_rdma_segment(xdr, mr) < 0) return -EMSGSIZE; - dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", - rqst->rq_task->tk_pid, __func__, - mr->mr_length, (unsigned long long)mr->mr_offset, - mr->mr_handle, mr->mr_nents < nsegs ? "more" : "last"); - + trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs); r_xprt->rx_stats.reply_chunk_count++; r_xprt->rx_stats.total_rdma_request += seg->mr_len; nchunks++; -- cgit v1.2.3-58-ga151 From e11b7c9655d13f26c227ca70c399aac2b596033d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Dec 2017 16:31:04 -0500 Subject: xprtrdma: Add trace points in reply decoder path This includes decoding Write and Reply chunks, and fixing up inline payloads. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 93 ++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/rpc_rdma.c | 29 ++++--------- 2 files changed, 102 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 17eb3209cd6e..7336cdc173ed 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -453,6 +453,99 @@ DEFINE_REPLY_EVENT(xprtrdma_reply_rqst); DEFINE_REPLY_EVENT(xprtrdma_reply_short); DEFINE_REPLY_EVENT(xprtrdma_reply_hdr); +TRACE_EVENT(xprtrdma_fixup, + TP_PROTO( + const struct rpc_rqst *rqst, + int len, + int hdrlen + ), + + TP_ARGS(rqst, len, hdrlen), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(const void *, base) + __field(int, len) + __field(int, hdrlen) + ), + + TP_fast_assign( + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->base = rqst->rq_rcv_buf.head[0].iov_base; + __entry->len = len; + __entry->hdrlen = hdrlen; + ), + + TP_printk("task:%u@%u base=%p len=%d hdrlen=%d", + __entry->task_id, __entry->client_id, + __entry->base, __entry->len, __entry->hdrlen + ) +); + +TRACE_EVENT(xprtrdma_fixup_pg, + TP_PROTO( + const struct rpc_rqst *rqst, + int pageno, + const void *pos, + int len, + int curlen + ), + + TP_ARGS(rqst, pageno, pos, len, curlen), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(const void *, pos) + __field(int, pageno) + __field(int, len) + __field(int, curlen) + ), + + TP_fast_assign( + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->pos = pos; + __entry->pageno = pageno; + __entry->len = len; + __entry->curlen = curlen; + ), + + TP_printk("task:%u@%u pageno=%d pos=%p len=%d curlen=%d", + __entry->task_id, __entry->client_id, + __entry->pageno, __entry->pos, __entry->len, __entry->curlen + ) +); + +TRACE_EVENT(xprtrdma_decode_seg, + TP_PROTO( + u32 handle, + u32 length, + u64 offset + ), + + TP_ARGS(handle, length, offset), + + TP_STRUCT__entry( + __field(u32, handle) + __field(u32, length) + __field(u64, offset) + ), + + TP_fast_assign( + __entry->handle = handle; + __entry->length = length; + __entry->offset = offset; + ), + + TP_printk("%u@0x%016llx:0x%08x", + __entry->length, (unsigned long long)__entry->offset, + __entry->handle + ) +); + #endif /* _TRACE_RPCRDMA_H */ #include diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 634496ca2e28..1ae9b41b75a1 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -914,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) curlen = rqst->rq_rcv_buf.head[0].iov_len; if (curlen > copy_len) curlen = copy_len; - dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", - __func__, srcp, copy_len, curlen); + trace_xprtrdma_fixup(rqst, copy_len, curlen); srcp += curlen; copy_len -= curlen; @@ -935,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) if (curlen > pagelist_len) curlen = pagelist_len; - dprintk("RPC: %s: page %d" - " srcp 0x%p len %d curlen %d\n", - __func__, i, srcp, copy_len, curlen); + trace_xprtrdma_fixup_pg(rqst, i, srcp, + copy_len, curlen); destp = kmap_atomic(ppages[i]); memcpy(destp + page_base, srcp, curlen); flush_dcache_page(ppages[i]); @@ -1028,26 +1026,19 @@ out_short: static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length) { + u32 handle; + u64 offset; __be32 *p; p = xdr_inline_decode(xdr, 4 * sizeof(*p)); if (unlikely(!p)) return -EIO; - ifdebug(FACILITY) { - u64 offset; - u32 handle; - - handle = be32_to_cpup(p++); - *length = be32_to_cpup(p++); - xdr_decode_hyper(p, &offset); - dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n", - __func__, *length, (unsigned long long)offset, - handle); - } else { - *length = be32_to_cpup(p + 1); - } + handle = be32_to_cpup(p++); + *length = be32_to_cpup(p++); + xdr_decode_hyper(p, &offset); + trace_xprtrdma_decode_seg(handle, *length, offset); return 0; } @@ -1068,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length) *length += seglength; } - dprintk("RPC: %s: segcount=%u, %u bytes\n", - __func__, be32_to_cpup(p), *length); return 0; } -- cgit v1.2.3-58-ga151 From 2937fede11b1081dbbbe6228637423639165eec7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Dec 2017 16:31:12 -0500 Subject: xprtrdma: Add trace points to instrument memory invalidation Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 41 +++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/fmr_ops.c | 4 ++-- net/sunrpc/xprtrdma/frwr_ops.c | 27 +++++++++++++-------------- net/sunrpc/xprtrdma/verbs.c | 1 + 4 files changed, 57 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 7336cdc173ed..fadb4b687595 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -201,6 +201,41 @@ DECLARE_EVENT_CLASS(xprtrdma_frwr_done, ), \ TP_ARGS(wc, frwr)) +DECLARE_EVENT_CLASS(xprtrdma_mr, + TP_PROTO( + const struct rpcrdma_mr *mr + ), + + TP_ARGS(mr), + + TP_STRUCT__entry( + __field(const void *, mr) + __field(u32, handle) + __field(u32, length) + __field(u64, offset) + ), + + TP_fast_assign( + __entry->mr = mr; + __entry->handle = mr->mr_handle; + __entry->length = mr->mr_length; + __entry->offset = mr->mr_offset; + ), + + TP_printk("mr=%p %u@0x%016llx:0x%08x", + __entry->mr, __entry->length, + (unsigned long long)__entry->offset, + __entry->handle + ) +); + +#define DEFINE_MR_EVENT(name) \ + DEFINE_EVENT(xprtrdma_mr, name, \ + TP_PROTO( \ + const struct rpcrdma_mr *mr \ + ), \ + TP_ARGS(mr)) + /** ** Call events **/ @@ -382,6 +417,12 @@ TRACE_EVENT(xprtrdma_wc_receive, ); DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_fastreg); +DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li); +DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake); + +DEFINE_MR_EVENT(xprtrdma_localinv); +DEFINE_MR_EVENT(xprtrdma_dma_unmap); +DEFINE_MR_EVENT(xprtrdma_remoteinv); /** ** Reply events diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 7f2f2b774076..d5f95bb39300 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -148,6 +148,7 @@ out_release: pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr); r_xprt->rx_stats.mrs_orphaned++; + trace_xprtrdma_dma_unmap(mr); ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, mr->mr_sg, mr->mr_nents, mr->mr_dir); @@ -273,6 +274,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) list_for_each_entry(mr, mrs, mr_list) { dprintk("RPC: %s: unmapping fmr %p\n", __func__, &mr->fmr); + trace_xprtrdma_localinv(mr); list_add_tail(&mr->fmr.fm_mr->list, &unmap_list); } r_xprt->rx_stats.local_inv_needed++; @@ -285,8 +287,6 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) */ while (!list_empty(mrs)) { mr = rpcrdma_mr_pop(mrs); - dprintk("RPC: %s: DMA unmapping fmr %p\n", - __func__, &mr->fmr); list_del(&mr->fmr.fm_mr->list); rpcrdma_mr_unmap_and_put(mr); } diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index afbeb9b442bf..90f688f19783 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -182,9 +182,11 @@ frwr_op_recover_mr(struct rpcrdma_mr *mr) int rc; rc = __frwr_mr_reset(ia, mr); - if (state != FRWR_FLUSHED_LI) + if (state != FRWR_FLUSHED_LI) { + trace_xprtrdma_dma_unmap(mr); ib_dma_unmap_sg(ia->ri_device, mr->mr_sg, mr->mr_nents, mr->mr_dir); + } if (rc) goto out_release; @@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) { - struct rpcrdma_frwr *frwr; - struct ib_cqe *cqe; + struct ib_cqe *cqe = wc->wr_cqe; + struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, + fr_cqe); /* WARNING: Only wr_cqe and status are reliable at this point */ if (wc->status != IB_WC_SUCCESS) { - cqe = wc->wr_cqe; - frwr = container_of(cqe, struct rpcrdma_frwr, fr_cqe); frwr->fr_state = FRWR_FLUSHED_LI; __frwr_sendcompletion_flush(wc, "localinv"); } + trace_xprtrdma_wc_li(wc, frwr); } /** @@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) { - struct rpcrdma_frwr *frwr; - struct ib_cqe *cqe; + struct ib_cqe *cqe = wc->wr_cqe; + struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, + fr_cqe); /* WARNING: Only wr_cqe and status are reliable at this point */ - cqe = wc->wr_cqe; - frwr = container_of(cqe, struct rpcrdma_frwr, fr_cqe); if (wc->status != IB_WC_SUCCESS) { frwr->fr_state = FRWR_FLUSHED_LI; __frwr_sendcompletion_flush(wc, "localinv"); } complete(&frwr->fr_linv_done); + trace_xprtrdma_wc_li_wake(wc, frwr); } /* Post a REG_MR Work Request to register a memory region @@ -457,6 +459,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) list_for_each_entry(mr, mrs, mr_list) if (mr->mr_handle == rep->rr_inv_rkey) { list_del(&mr->mr_list); + trace_xprtrdma_remoteinv(mr); mr->frwr.fr_state = FRWR_IS_INVALID; rpcrdma_mr_unmap_and_put(mr); break; /* only one invalidated MR per RPC */ @@ -492,9 +495,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) mr->frwr.fr_state = FRWR_IS_INVALID; frwr = &mr->frwr; - - dprintk("RPC: %s: invalidating frwr %p\n", - __func__, frwr); + trace_xprtrdma_localinv(mr); frwr->fr_cqe.done = frwr_wc_localinv; last = &frwr->fr_invwr; @@ -536,8 +537,6 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) unmap: while (!list_empty(mrs)) { mr = rpcrdma_mr_pop(mrs); - dprintk("RPC: %s: DMA unmapping frwr %p\n", - __func__, &mr->frwr); rpcrdma_mr_unmap_and_put(mr); } return; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index cfa3c0328878..0e89e5529d6c 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1349,6 +1349,7 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) { struct rpcrdma_xprt *r_xprt = mr->mr_xprt; + trace_xprtrdma_dma_unmap(mr); ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, mr->mr_sg, mr->mr_nents, mr->mr_dir); __rpcrdma_mr_put(&r_xprt->rx_buf, mr); -- cgit v1.2.3-58-ga151 From 1c443effa319a02a5b8808bef7840500c29f24b2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Dec 2017 16:31:21 -0500 Subject: xprtrdma: Add trace points to instrument MR allocation and recovery Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 57 ++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/verbs.c | 6 ++--- 2 files changed, 60 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index fadb4b687595..d296d5b084c2 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -51,6 +51,37 @@ DECLARE_EVENT_CLASS(xprtrdma_reply_event, ), \ TP_ARGS(rep)) +DECLARE_EVENT_CLASS(xprtrdma_rxprt, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt + ), + + TP_ARGS(r_xprt), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + __entry->r_xprt = r_xprt; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p", + __get_str(addr), __get_str(port), __entry->r_xprt + ) +); + +#define DEFINE_RXPRT_EVENT(name) \ + DEFINE_EVENT(xprtrdma_rxprt, name, \ + TP_PROTO( \ + const struct rpcrdma_xprt *r_xprt \ + ), \ + TP_ARGS(r_xprt)) + DECLARE_EVENT_CLASS(xprtrdma_rdch_event, TP_PROTO( const struct rpc_task *task, @@ -240,6 +271,31 @@ DECLARE_EVENT_CLASS(xprtrdma_mr, ** Call events **/ +TRACE_EVENT(xprtrdma_createmrs, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt, + unsigned int count + ), + + TP_ARGS(r_xprt, count), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(unsigned int, count) + ), + + TP_fast_assign( + __entry->r_xprt = r_xprt; + __entry->count = count; + ), + + TP_printk("r_xprt=%p: created %u MRs", + __entry->r_xprt, __entry->count + ) +); + +DEFINE_RXPRT_EVENT(xprtrdma_nomrs); + DEFINE_RDCH_EVENT(xprtrdma_read_chunk); DEFINE_WRCH_EVENT(xprtrdma_write_chunk); DEFINE_WRCH_EVENT(xprtrdma_reply_chunk); @@ -423,6 +479,7 @@ DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake); DEFINE_MR_EVENT(xprtrdma_localinv); DEFINE_MR_EVENT(xprtrdma_dma_unmap); DEFINE_MR_EVENT(xprtrdma_remoteinv); +DEFINE_MR_EVENT(xprtrdma_recover_mr); /** ** Reply events diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 0e89e5529d6c..1addcc9477ba 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -998,7 +998,7 @@ rpcrdma_mr_recovery_worker(struct work_struct *work) mr = rpcrdma_mr_pop(&buf->rb_stale_mrs); spin_unlock(&buf->rb_recovery_lock); - dprintk("RPC: %s: recovering MR %p\n", __func__, mr); + trace_xprtrdma_recover_mr(mr); mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr); spin_lock(&buf->rb_recovery_lock); @@ -1054,7 +1054,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) r_xprt->rx_stats.mrs_allocated += count; spin_unlock(&buf->rb_mrlock); - dprintk("RPC: %s: created %u MRs\n", __func__, count); + trace_xprtrdma_createmrs(r_xprt, count); } static void @@ -1310,7 +1310,7 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) return mr; out_nomrs: - dprintk("RPC: %s: no MRs available\n", __func__); + trace_xprtrdma_nomrs(r_xprt); if (r_xprt->rx_ep.rep_connected != -ENODEV) schedule_delayed_work(&buf->rb_refresh_worker, 0); -- cgit v1.2.3-58-ga151 From b4744e00a39e6213d84a83a86e6d304886316f5f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Dec 2017 16:31:29 -0500 Subject: xprtrdma: Add trace points for connect events Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 75 +++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/transport.c | 12 +++---- net/sunrpc/xprtrdma/verbs.c | 33 +++++++----------- 3 files changed, 92 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index d296d5b084c2..023443d5a0bf 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -267,6 +267,81 @@ DECLARE_EVENT_CLASS(xprtrdma_mr, ), \ TP_ARGS(mr)) +/** + ** Connection events + **/ + +TRACE_EVENT(xprtrdma_conn_upcall, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt, + struct rdma_cm_event *event + ), + + TP_ARGS(r_xprt, event), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(unsigned int, event) + __field(int, status) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + __entry->r_xprt = r_xprt; + __entry->event = event->event; + __entry->status = event->status; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p: %s (%u/%d)", + __get_str(addr), __get_str(port), + __entry->r_xprt, rdma_show_cm_event(__entry->event), + __entry->event, __entry->status + ) +); + +TRACE_EVENT(xprtrdma_disconnect, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt, + int status + ), + + TP_ARGS(r_xprt, status), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(int, status) + __field(int, connected) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + __entry->r_xprt = r_xprt; + __entry->status = status; + __entry->connected = r_xprt->rx_ep.rep_connected; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p: status=%d %sconnected", + __get_str(addr), __get_str(port), + __entry->r_xprt, __entry->status, + __entry->connected == 1 ? "still " : "dis" + ) +); + +DEFINE_RXPRT_EVENT(xprtrdma_conn_start); +DEFINE_RXPRT_EVENT(xprtrdma_conn_tout); +DEFINE_RXPRT_EVENT(xprtrdma_create); +DEFINE_RXPRT_EVENT(xprtrdma_destroy); +DEFINE_RXPRT_EVENT(xprtrdma_remove); +DEFINE_RXPRT_EVENT(xprtrdma_reinsert); +DEFINE_RXPRT_EVENT(xprtrdma_reconnect); +DEFINE_RXPRT_EVENT(xprtrdma_inject_dsc); + /** ** Call events **/ diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ddf0d87812ef..25d1160dc085 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -259,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work) xprt_clear_connected(xprt); - dprintk("RPC: %s: %sconnect\n", __func__, - r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); if (rc) xprt_wake_pending_tasks(xprt, rc); - dprintk("RPC: %s: exit\n", __func__); xprt_clear_connecting(xprt); } @@ -275,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, rx_xprt); - pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); + trace_xprtrdma_inject_dsc(r_xprt); rdma_disconnect(r_xprt->rx_ia.ri_id); } @@ -295,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - dprintk("RPC: %s: called\n", __func__); + trace_xprtrdma_destroy(r_xprt); cancel_delayed_work_sync(&r_xprt->rx_connect_worker); @@ -306,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) rpcrdma_ia_close(&r_xprt->rx_ia); xprt_rdma_free_addresses(xprt); - xprt_free(xprt); - dprintk("RPC: %s: returning\n", __func__); - module_put(THIS_MODULE); } @@ -430,6 +424,7 @@ xprt_setup_rdma(struct xprt_create *args) dprintk("RPC: %s: %s:%s\n", __func__, xprt->address_strings[RPC_DISPLAY_ADDR], xprt->address_strings[RPC_DISPLAY_PORT]); + trace_xprtrdma_create(new_xprt); return xprt; out4: @@ -440,6 +435,7 @@ out3: out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: + trace_xprtrdma_destroy(new_xprt); xprt_rdma_free_addresses(xprt); xprt_free(xprt); return ERR_PTR(rc); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 1addcc9477ba..7f9e9025c42f 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -220,6 +220,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) struct rpcrdma_ep *ep = &xprt->rx_ep; int connstate = 0; + trace_xprtrdma_conn_upcall(xprt, event); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: case RDMA_CM_EVENT_ROUTE_RESOLVED: @@ -228,14 +229,10 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) break; case RDMA_CM_EVENT_ADDR_ERROR: ia->ri_async_rc = -EHOSTUNREACH; - dprintk("RPC: %s: CM address resolution error, ep 0x%p\n", - __func__, ep); complete(&ia->ri_done); break; case RDMA_CM_EVENT_ROUTE_ERROR: ia->ri_async_rc = -ENETUNREACH; - dprintk("RPC: %s: CM route resolution error, ep 0x%p\n", - __func__, ep); complete(&ia->ri_done); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: @@ -299,6 +296,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) struct rdma_cm_id *id; int rc; + trace_xprtrdma_conn_start(xprt); + init_completion(&ia->ri_done); init_completion(&ia->ri_remove_done); @@ -322,8 +321,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) } rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); if (rc < 0) { - dprintk("RPC: %s: wait() exited: %i\n", - __func__, rc); + trace_xprtrdma_conn_tout(xprt); goto out; } @@ -340,8 +338,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) } rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); if (rc < 0) { - dprintk("RPC: %s: wait() exited: %i\n", - __func__, rc); + trace_xprtrdma_conn_tout(xprt); goto out; } rc = ia->ri_async_rc; @@ -461,6 +458,8 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) /* Allow waiters to continue */ complete(&ia->ri_remove_done); + + trace_xprtrdma_remove(r_xprt); } /** @@ -471,7 +470,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) void rpcrdma_ia_close(struct rpcrdma_ia *ia) { - dprintk("RPC: %s: entering\n", __func__); if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { if (ia->ri_id->qp) rdma_destroy_qp(ia->ri_id); @@ -625,9 +623,6 @@ out1: void rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { - dprintk("RPC: %s: entering, connected is %d\n", - __func__, ep->rep_connected); - cancel_delayed_work_sync(&ep->rep_connect_worker); if (ia->ri_id->qp) { @@ -650,7 +645,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, { int rc, err; - pr_info("%s: r_xprt = %p\n", __func__, r_xprt); + trace_xprtrdma_reinsert(r_xprt); rc = -EHOSTUNREACH; if (rpcrdma_ia_open(r_xprt)) @@ -688,7 +683,7 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, struct rdma_cm_id *id, *old; int err, rc; - dprintk("RPC: %s: reconnecting...\n", __func__); + trace_xprtrdma_reconnect(r_xprt); rpcrdma_ep_disconnect(ep, ia); @@ -810,16 +805,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) int rc; rc = rdma_disconnect(ia->ri_id); - if (!rc) { + if (!rc) /* returns without wait if not connected */ wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 1); - dprintk("RPC: %s: after wait, %sconnected\n", __func__, - (ep->rep_connected == 1) ? "still " : "dis"); - } else { - dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc); + else ep->rep_connected = rc; - } + trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt, + rx_ep), rc); ib_drain_qp(ia->ri_id->qp); } -- cgit v1.2.3-58-ga151 From fc1eb8076fb0eb0641566b24007a40a7d4ae0116 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Dec 2017 16:31:37 -0500 Subject: xprtrdma: Add trace points in the client-side backchannel code paths Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 68 +++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/backchannel.c | 8 ++--- net/sunrpc/xprtrdma/xprt_rdma.h | 2 +- 3 files changed, 73 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 023443d5a0bf..95c2c0bffb65 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -267,6 +267,39 @@ DECLARE_EVENT_CLASS(xprtrdma_mr, ), \ TP_ARGS(mr)) +DECLARE_EVENT_CLASS(xprtrdma_cb_event, + TP_PROTO( + const struct rpc_rqst *rqst + ), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + __field(const void *, rqst) + __field(const void *, rep) + __field(const void *, req) + __field(u32, xid) + ), + + TP_fast_assign( + __entry->rqst = rqst; + __entry->req = rpcr_to_rdmar(rqst); + __entry->rep = rpcr_to_rdmar(rqst)->rl_reply; + __entry->xid = be32_to_cpu(rqst->rq_xid); + ), + + TP_printk("xid=0x%08x, rqst=%p req=%p rep=%p", + __entry->xid, __entry->rqst, __entry->req, __entry->rep + ) +); + +#define DEFINE_CB_EVENT(name) \ + DEFINE_EVENT(xprtrdma_cb_event, name, \ + TP_PROTO( \ + const struct rpc_rqst *rqst \ + ), \ + TP_ARGS(rqst)) + /** ** Connection events **/ @@ -719,6 +752,41 @@ TRACE_EVENT(xprtrdma_decode_seg, ) ); +/** + ** Callback events + **/ + +TRACE_EVENT(xprtrdma_cb_setup, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt, + unsigned int reqs + ), + + TP_ARGS(r_xprt, reqs), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(unsigned int, reqs) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + __entry->r_xprt = r_xprt; + __entry->reqs = reqs; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p: %u reqs", + __get_str(addr), __get_str(port), + __entry->r_xprt, __entry->reqs + ) +); + +DEFINE_CB_EVENT(xprtrdma_cb_call); +DEFINE_CB_EVENT(xprtrdma_cb_reply); + #endif /* _TRACE_RPCRDMA_H */ #include diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 3c7998a72191..ca0c5d3d493e 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -140,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) buffer->rb_bc_srv_max_requests = reqs; request_module("svcrdma"); - + trace_xprtrdma_cb_setup(r_xprt, reqs); return 0; out_free: @@ -212,6 +212,8 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, &rqst->rq_snd_buf, rpcrdma_noch)) return -EIO; + + trace_xprtrdma_cb_reply(rqst); return 0; } @@ -331,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, struct rpc_rqst, rq_bc_pa_list); list_del(&rqst->rq_bc_pa_list); spin_unlock(&xprt->bc_pa_lock); - dprintk("RPC: %s: using rqst %p\n", __func__, rqst); /* Prepare rqst */ rqst->rq_reply_bytes_recvd = 0; @@ -352,9 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, * the Upper Layer is done decoding it. */ req = rpcr_to_rdmar(rqst); - dprintk("RPC: %s: attaching rep %p to req %p\n", - __func__, rep, req); req->rl_reply = rep; + trace_xprtrdma_cb_call(rqst); /* Queue rqst for ULP's callback service */ bc_serv = xprt->bc_serv; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index a1ca1b638123..69883a960a3f 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -365,7 +365,7 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) } static inline struct rpcrdma_req * -rpcr_to_rdmar(struct rpc_rqst *rqst) +rpcr_to_rdmar(const struct rpc_rqst *rqst) { return rqst->rq_xprtdata; } -- cgit v1.2.3-58-ga151 From 643cf3237db83e1443fa61de896449858393cb72 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Dec 2017 16:31:45 -0500 Subject: xprtrdma: Add trace points to instrument QP and CQ access upcalls Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 31 +++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/verbs.c | 3 +++ 2 files changed, 34 insertions(+) (limited to 'net') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 95c2c0bffb65..1e5ae57a4f0d 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -375,6 +375,37 @@ DEFINE_RXPRT_EVENT(xprtrdma_reinsert); DEFINE_RXPRT_EVENT(xprtrdma_reconnect); DEFINE_RXPRT_EVENT(xprtrdma_inject_dsc); +TRACE_EVENT(xprtrdma_qp_error, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt, + const struct ib_event *event + ), + + TP_ARGS(r_xprt, event), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(unsigned int, event) + __string(name, event->device->name) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + __entry->r_xprt = r_xprt; + __entry->event = event->event; + __assign_str(name, event->device->name); + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p: dev %s: %s (%u)", + __get_str(addr), __get_str(port), __entry->r_xprt, + __get_str(name), rdma_show_ib_event(__entry->event), + __entry->event + ) +); + /** ** Call events **/ diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 7f9e9025c42f..fb81d3a4b0b3 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -108,7 +108,10 @@ static void rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) { struct rpcrdma_ep *ep = context; + struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, + rx_ep); + trace_xprtrdma_qp_error(r_xprt, event); pr_err("rpcrdma: %s on device %s ep %p\n", ib_event_msg(event->event), event->device->name, context); -- cgit v1.2.3-58-ga151 From ae7246762530af00109c3fb8a30031da054c0aa0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Dec 2017 16:31:53 -0500 Subject: xprtrdma: Instrument allocation/release of rpcrdma_req/rep objects Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 67 +++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/transport.c | 12 +++----- net/sunrpc/xprtrdma/verbs.c | 4 +-- 3 files changed, 74 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 1e5ae57a4f0d..50ed3f8bf534 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -783,6 +783,73 @@ TRACE_EVENT(xprtrdma_decode_seg, ) ); +/** + ** Allocation/release of rpcrdma_reqs and rpcrdma_reps + **/ + +TRACE_EVENT(xprtrdma_allocate, + TP_PROTO( + const struct rpc_task *task, + const struct rpcrdma_req *req + ), + + TP_ARGS(task, req), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(const void *, req) + __field(const void *, rep) + __field(size_t, callsize) + __field(size_t, rcvsize) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->req = req; + __entry->rep = req ? req->rl_reply : NULL; + __entry->callsize = task->tk_rqstp->rq_callsize; + __entry->rcvsize = task->tk_rqstp->rq_rcvsize; + ), + + TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)", + __entry->task_id, __entry->client_id, + __entry->req, __entry->rep, + __entry->callsize, __entry->rcvsize + ) +); + +TRACE_EVENT(xprtrdma_rpc_done, + TP_PROTO( + const struct rpc_task *task, + const struct rpcrdma_req *req + ), + + TP_ARGS(task, req), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(const void *, req) + __field(const void *, rep) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->req = req; + __entry->rep = req->rl_reply; + ), + + TP_printk("task:%u@%u req=%p rep=%p", + __entry->task_id, __entry->client_id, + __entry->req, __entry->rep + ) +); + +DEFINE_RXPRT_EVENT(xprtrdma_noreps); + /** ** Callback events **/ diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 25d1160dc085..b90179af88bf 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -640,7 +640,7 @@ xprt_rdma_allocate(struct rpc_task *task) req = rpcrdma_buffer_get(&r_xprt->rx_buf); if (req == NULL) - return -ENOMEM; + goto out_get; flags = RPCRDMA_DEF_GFP; if (RPC_IS_SWAPPER(task)) @@ -653,19 +653,18 @@ xprt_rdma_allocate(struct rpc_task *task) if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) goto out_fail; - dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n", - task->tk_pid, __func__, rqst->rq_callsize, - rqst->rq_rcvsize, req); - req->rl_cpu = smp_processor_id(); req->rl_connect_cookie = 0; /* our reserved value */ rpcrdma_set_xprtdata(rqst, req); rqst->rq_buffer = req->rl_sendbuf->rg_base; rqst->rq_rbuffer = req->rl_recvbuf->rg_base; + trace_xprtrdma_allocate(task, req); return 0; out_fail: rpcrdma_buffer_put(req); +out_get: + trace_xprtrdma_allocate(task, NULL); return -ENOMEM; } @@ -682,10 +681,9 @@ xprt_rdma_free(struct rpc_task *task) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); - if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) rpcrdma_release_rqst(r_xprt, req); + trace_xprtrdma_rpc_done(task, req); rpcrdma_buffer_put(req); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index fb81d3a4b0b3..57e1139d85bc 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1385,11 +1385,11 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) req = rpcrdma_buffer_get_req_locked(buffers); req->rl_reply = rpcrdma_buffer_get_rep(buffers); spin_unlock(&buffers->rb_lock); + return req; out_reqbuf: spin_unlock(&buffers->rb_lock); - pr_warn("RPC: %s: out of request buffers\n", __func__); return NULL; } @@ -1612,7 +1612,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) out_reqbuf: spin_unlock(&buffers->rb_lock); - pr_warn("%s: no extra receive buffers\n", __func__); + trace_xprtrdma_noreps(r_xprt); return -ENOMEM; out_rc: -- cgit v1.2.3-58-ga151 From aae2349c49198e5dc7376519201cb4647a902e0f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 3 Jan 2018 15:38:09 -0500 Subject: xprtrdma: Fix "bytes registered" accounting The contents of seg->mr_len changed when ->ro_map stopped returning the full chunk length in the first segment. Count the full length of each Write chunk, not the length of the first segment (which now can only be as large as a page). Fixes: 9d6b04097882 ("xprtrdma: Place registered MWs on a ... ") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 1ae9b41b75a1..162e5dd82466 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -431,7 +431,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs); r_xprt->rx_stats.write_chunk_count++; - r_xprt->rx_stats.total_rdma_request += seg->mr_len; + r_xprt->rx_stats.total_rdma_request += mr->mr_length; nchunks++; nsegs -= mr->mr_nents; } while (nsegs); @@ -489,7 +489,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs); r_xprt->rx_stats.reply_chunk_count++; - r_xprt->rx_stats.total_rdma_request += seg->mr_len; + r_xprt->rx_stats.total_rdma_request += mr->mr_length; nchunks++; nsegs -= mr->mr_nents; } while (nsegs); -- cgit v1.2.3-58-ga151 From 9ab6d89e74785378cf69285375a2a6e0c63c51b2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 3 Jan 2018 15:38:17 -0500 Subject: xprtrdma: Correct some documenting comments Fix kernel-doc warnings in net/sunrpc/xprtrdma/ . net/sunrpc/xprtrdma/verbs.c:1575: warning: No description found for parameter 'count' net/sunrpc/xprtrdma/verbs.c:1575: warning: Excess function parameter 'min_reqs' description in 'rpcrdma_ep_post_extra_recv' net/sunrpc/xprtrdma/backchannel.c:288: warning: No description found for parameter 'r_xprt' net/sunrpc/xprtrdma/backchannel.c:288: warning: Excess function parameter 'xprt' description in 'rpcrdma_bc_receive_call' Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index ca0c5d3d493e..ed1a4a3065ee 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -294,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) /** * rpcrdma_bc_receive_call - Handle a backward direction call - * @xprt: transport receiving the call + * @r_xprt: transport receiving the call * @rep: receive buffer containing the call * * Operational assumptions: diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 57e1139d85bc..f4eb63e8e689 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1584,7 +1584,7 @@ out_map: /** * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests * @r_xprt: transport associated with these backchannel resources - * @min_reqs: minimum number of incoming requests expected + * @count: minimum number of incoming requests expected * * Returns zero if all requested buffers were posted, or a negative errno. */ -- cgit v1.2.3-58-ga151 From 82476d9f955be0dfefadad22a87be6151cd70777 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 3 Jan 2018 15:38:25 -0500 Subject: SUNRPC: Trace xprt_timer events Track RPC timeouts: report the XID and the server address to match the content of network capture. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 4 ++++ net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtrdma/transport.c | 2 -- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 8c153f68509e..7804d857fd24 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -390,6 +390,10 @@ DECLARE_EVENT_CLASS(rpc_xprt_event, __entry->status) ); +DEFINE_EVENT(rpc_xprt_event, xprt_timer, + TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), + TP_ARGS(xprt, xid, status)); + DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst, TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), TP_ARGS(xprt, xid, status)); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 33b74fd84051..2436fd1125fc 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -940,8 +940,8 @@ static void xprt_timer(struct rpc_task *task) if (task->tk_status != -ETIMEDOUT) return; - dprintk("RPC: %5u xprt_timer\n", task->tk_pid); + trace_xprt_timer(xprt, req->rq_xid, task->tk_status); if (!req->rq_reply_bytes_recvd) { if (xprt->ops->timer) xprt->ops->timer(xprt, task); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index b90179af88bf..4b1ecfe979cf 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -516,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) static void xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) { - dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt); - xprt_force_disconnect(xprt); } -- cgit v1.2.3-58-ga151 From cf08d6f2e6e1b9f177cafbe57e7ad33a76d32c38 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 3 Jan 2018 15:38:41 -0500 Subject: SUNRPC: task_run_action should display tk_callback This shows up in every RPC: kworker/4:1-19772 [004] 3467.373443: rpc_task_run_action: task:4711@2 flags=0e81 state=0005 status=0 action=call_status kworker/4:1-19772 [004] 3467.373444: rpc_task_run_action: task:4711@2 flags=0e81 state=0005 status=0 action=call_status What's actually going on is that the first iteration of the RPC scheduler is invoking the function in tk_callback (in this case, xprt_timer), then invoking call_status on the next iteration. Feeding do_action, rather than tk_action, to the "task_run_action" trace point will now always display the correct FSM step. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b1b49edd7c4d..c292a5e1a70c 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -770,7 +770,7 @@ static void __rpc_execute(struct rpc_task *task) if (do_action == NULL) break; } - trace_rpc_task_run_action(task->tk_client, task, task->tk_action); + trace_rpc_task_run_action(task->tk_client, task, do_action); do_action(task); /* -- cgit v1.2.3-58-ga151 From 21ead9ff3dc72604d89499a1da5a18cc193ec4ff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 3 Jan 2018 15:38:49 -0500 Subject: SUNRPC: Micro-optimize __rpc_execute The common case: There are 13 to 14 actions per RPC, and tk_callback is non-NULL in only one of them. There's no need to store a NULL in the tk_callback field during each FSM step. This slightly improves throughput results in dbench and other multi- threaded benchmarks on my two-socket client on 56Gb InfiniBand, but will probably be inconsequential on slower systems. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/sched.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index c292a5e1a70c..896691afbb1a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -755,21 +755,19 @@ static void __rpc_execute(struct rpc_task *task) void (*do_action)(struct rpc_task *); /* - * Execute any pending callback first. + * Perform the next FSM step or a pending callback. + * + * tk_action may be NULL if the task has been killed. + * In particular, note that rpc_killall_tasks may + * do this at any time, so beware when dereferencing. */ - do_action = task->tk_callback; - task->tk_callback = NULL; - if (do_action == NULL) { - /* - * Perform the next FSM step. - * tk_action may be NULL if the task has been killed. - * In particular, note that rpc_killall_tasks may - * do this at any time, so beware when dereferencing. - */ - do_action = task->tk_action; - if (do_action == NULL) - break; + do_action = task->tk_action; + if (task->tk_callback) { + do_action = task->tk_callback; + task->tk_callback = NULL; } + if (!do_action) + break; trace_rpc_task_run_action(task->tk_client, task, do_action); do_action(task); -- cgit v1.2.3-58-ga151