From 632dda833e28fe43049a01b4bc53e409176e7843 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 May 2015 14:04:50 -0400 Subject: SUNRPC: Clean up bc_send() Clean up: Merge bc_send() into bc_svc_process(). Note: even thought this touches svc.c, it is a client-side change. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/bc_xprt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 2ca67b55e0fe..8df43c9f11dc 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -37,7 +37,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); -int bc_send(struct rpc_rqst *req); /* * Determine if a shared backchannel is in use -- cgit v1.2.3-58-ga151 From 0f41979164a52a1ca0f0a601f90000fc18e3a396 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Jun 2015 22:59:08 -0400 Subject: SUNRPC: Remove unused argument 'tk_ops' in rpc_run_bc_task Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 +-- net/sunrpc/clnt.c | 6 ++---- net/sunrpc/svc.c | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 5f1e6bd4c316..2aa68976a482 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -205,8 +205,7 @@ struct rpc_wait_queue { */ struct rpc_task *rpc_new_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_task(const struct rpc_task_setup *); -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *ops); +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); void rpc_exit_task(struct rpc_task *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index fa70f663eb92..f6717170480e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1031,15 +1031,13 @@ EXPORT_SYMBOL_GPL(rpc_call_async); * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run * rpc_execute against it * @req: RPC request - * @tk_ops: RPC call ops */ -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *tk_ops) +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) { struct rpc_task *task; struct xdr_buf *xbufp = &req->rq_snd_buf; struct rpc_task_setup task_setup_data = { - .callback_ops = tk_ops, + .callback_ops = &rpc_default_ops, }; dprintk("RPC: rpc_run_bc_task req= %p\n", req); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e144902d382e..51c8cad5e765 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1350,7 +1350,6 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; - static const struct rpc_call_ops reply_ops = { }; struct rpc_task *task; int error; @@ -1391,7 +1390,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, /* Finally, send the reply synchronously */ memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); - task = rpc_run_bc_task(req, &reply_ops); + task = rpc_run_bc_task(req); if (IS_ERR(task)) { error = PTR_ERR(task); goto out; -- cgit v1.2.3-58-ga151 From 0d2a970d0ae55086520e1e58e572a7acd519429c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Jun 2015 15:37:10 -0400 Subject: SUNRPC: Fix a backchannel race We need to allow the server to send a new request immediately after we've replied to the previous one. Right now, there is a window between the send and the release of the old request in rpc_put_task(), where the server could send us a new backchannel RPC call, and we have no request to service it. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 ++- net/sunrpc/backchannel_rqst.c | 37 ++++++++++++++++++++++++------------- net/sunrpc/svc.c | 6 +++++- 3 files changed, 31 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8b93ef53df3c..bc9c39d6d30d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -212,7 +212,8 @@ struct rpc_xprt { #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ - unsigned int bc_alloc_count; /* Total number of preallocs */ + int bc_alloc_count; /* Total number of preallocs */ + atomic_t bc_free_slots; spinlock_t bc_pa_lock; /* Protects the preallocated * items */ struct list_head bc_pa_list; /* List of preallocated diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 1baa41099469..9825ff0f91d6 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -37,16 +37,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) { - return xprt->bc_alloc_count > 0; + return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots); } static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) { + atomic_add(n, &xprt->bc_free_slots); xprt->bc_alloc_count += n; } static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) { + atomic_sub(n, &xprt->bc_free_slots); return xprt->bc_alloc_count -= n; } @@ -232,9 +234,15 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) struct rpc_rqst *req = NULL; dprintk("RPC: allocate a backchannel request\n"); - if (list_empty(&xprt->bc_pa_list)) + if (atomic_read(&xprt->bc_free_slots) <= 0) goto not_found; - + if (list_empty(&xprt->bc_pa_list)) { + req = xprt_alloc_bc_req(xprt, GFP_ATOMIC); + if (!req) + goto not_found; + /* Note: this 'free' request adds it to xprt->bc_pa_list */ + xprt_free_bc_request(req); + } req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, rq_bc_pa_list); req->rq_reply_bytes_recvd = 0; @@ -260,11 +268,21 @@ void xprt_free_bc_request(struct rpc_rqst *req) req->rq_connect_cookie = xprt->connect_cookie - 1; smp_mb__before_atomic(); - WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); smp_mb__after_atomic(); - if (!xprt_need_to_requeue(xprt)) { + /* + * Return it to the list of preallocations so that it + * may be reused by a new callback request. + */ + spin_lock_bh(&xprt->bc_pa_lock); + if (xprt_need_to_requeue(xprt)) { + list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); + xprt->bc_alloc_count++; + req = NULL; + } + spin_unlock_bh(&xprt->bc_pa_lock); + if (req != NULL) { /* * The last remaining session was destroyed while this * entry was in use. Free the entry and don't attempt @@ -275,14 +293,6 @@ void xprt_free_bc_request(struct rpc_rqst *req) xprt_free_allocation(req); return; } - - /* - * Return it to the list of preallocations so that it - * may be reused by a new callback request. - */ - spin_lock_bh(&xprt->bc_pa_lock); - list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); - spin_unlock_bh(&xprt->bc_pa_lock); } /* @@ -326,6 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) spin_lock(&xprt->bc_pa_lock); list_del(&req->rq_bc_pa_list); + xprt->bc_alloc_count--; spin_unlock(&xprt->bc_pa_lock); req->rq_private_buf.len = copied; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 51c8cad5e765..b47f02f60b9c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1351,6 +1351,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; struct rpc_task *task; + int proc_error; int error; dprintk("svc: %s(%p)\n", __func__, req); @@ -1382,7 +1383,10 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, svc_getnl(argv); /* CALLDIR */ /* Parse and execute the bc call */ - if (!svc_process_common(rqstp, argv, resv)) { + proc_error = svc_process_common(rqstp, argv, resv); + + atomic_inc(&req->rq_xprt->bc_free_slots); + if (!proc_error) { /* Processing error: drop the request */ xprt_free_bc_request(req); return 0; -- cgit v1.2.3-58-ga151 From 3c87ef6efb40f0e339d705c194b2224f854ec38e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:25 -0400 Subject: sunrpc: keep a count of swapfiles associated with the rpc_clnt Jerome reported seeing a warning pop when working with a swapfile on NFS. The nfs_swap_activate can end up calling sk_set_memalloc while holding the rcu_read_lock and that function can sleep. To fix that, we need to take a reference to the xprt while holding the rcu_read_lock, set the socket up for swapping and then drop that reference. But, xprt_put is not exported and having NFS deal with the underlying xprt is a bit of layering violation anyway. Fix this by adding a set of activate/deactivate functions that take a rpc_clnt pointer instead of an rpc_xprt, and have nfs_swap_activate and nfs_swap_deactivate call those. Also, add a per-rpc_clnt atomic counter to keep track of the number of active swapfiles associated with it. When the counter does a 0->1 transition, we enable swapping on the xprt, when we do a 1->0 transition we disable swapping on it. This also allows us to be a bit more selective with the RPC_TASK_SWAPPER flag. If non-swapper and swapper clnts are sharing a xprt, then we only need to flag the tasks from the swapper clnt with that flag. Acked-by: Mel Gorman Reported-by: Jerome Marchand Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 15 ++-------- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/sched.h | 16 +++++++++++ net/sunrpc/clnt.c | 67 ++++++++++++++++++++++++++++++++++++++------ net/sunrpc/xprtsock.c | 3 +- 5 files changed, 78 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8b8d83a526ce..cc4fa1ed61fc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -555,31 +555,22 @@ static int nfs_launder_page(struct page *page) return nfs_wb_page(inode, page); } -#ifdef CONFIG_NFS_SWAP static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { - int ret; struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); *span = sis->pages; - rcu_read_lock(); - ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1); - rcu_read_unlock(); - - return ret; + return rpc_clnt_swap_activate(clnt); } static void nfs_swap_deactivate(struct file *file) { struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); - rcu_read_lock(); - xs_swapper(rcu_dereference(clnt->cl_xprt), 0); - rcu_read_unlock(); + rpc_clnt_swap_deactivate(clnt); } -#endif const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, @@ -596,10 +587,8 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, .is_dirty_writeback = nfs_check_dirty_writeback, .error_remove_page = generic_error_remove_page, -#ifdef CONFIG_NFS_SWAP .swap_activate = nfs_swap_activate, .swap_deactivate = nfs_swap_deactivate, -#endif }; /* diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 598ba80ec30c..131032f15cc1 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -56,6 +56,7 @@ struct rpc_clnt { struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ + atomic_t cl_swapper; /* swapfile count */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME+1]; struct rpc_pipe_dir_head cl_pipedir_objects; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 2aa68976a482..d703f0ef37d8 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -268,4 +268,20 @@ static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, } #endif +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +int rpc_clnt_swap_activate(struct rpc_clnt *clnt); +void rpc_clnt_swap_deactivate(struct rpc_clnt *clnt); +#else +static inline int +rpc_clnt_swap_activate(struct rpc_clnt *clnt) +{ + return -EINVAL; +} + +static inline void +rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) +{ +} +#endif /* CONFIG_SUNRPC_SWAP */ + #endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f6717170480e..0ba65156a62b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -891,15 +891,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_SOFT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; - if (sk_memalloc_socks()) { - struct rpc_xprt *xprt; - - rcu_read_lock(); - xprt = rcu_dereference(clnt->cl_xprt); - if (xprt->swapper) - task->tk_flags |= RPC_TASK_SWAPPER; - rcu_read_unlock(); - } + if (atomic_read(&clnt->cl_swapper)) + task->tk_flags |= RPC_TASK_SWAPPER; /* Add to the client's list of all tasks */ spin_lock(&clnt->cl_lock); list_add_tail(&task->tk_task, &clnt->cl_tasks); @@ -2479,3 +2472,59 @@ void rpc_show_tasks(struct net *net) spin_unlock(&sn->rpc_client_lock); } #endif + +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +int +rpc_clnt_swap_activate(struct rpc_clnt *clnt) +{ + int ret = 0; + struct rpc_xprt *xprt; + + if (atomic_inc_return(&clnt->cl_swapper) == 1) { +retry: + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + if (!xprt) { + /* + * If we didn't get a reference, then we likely are + * racing with a migration event. Wait for a grace + * period and try again. + */ + synchronize_rcu(); + goto retry; + } + + ret = xs_swapper(xprt, 1); + xprt_put(xprt); + } + return ret; +} +EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate); + +void +rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) +{ + struct rpc_xprt *xprt; + + if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) { +retry: + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + if (!xprt) { + /* + * If we didn't get a reference, then we likely are + * racing with a migration event. Wait for a grace + * period and try again. + */ + synchronize_rcu(); + goto retry; + } + + xs_swapper(xprt, 0); + xprt_put(xprt); + } +} +EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate); +#endif /* CONFIG_SUNRPC_SWAP */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b142feef0cf4..f344c0f8974c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1955,7 +1955,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) msleep_interruptible(15000); } -#ifdef CONFIG_SUNRPC_SWAP +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) static void xs_set_memalloc(struct rpc_xprt *xprt) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, @@ -1987,7 +1987,6 @@ int xs_swapper(struct rpc_xprt *xprt, int enable) return err; } -EXPORT_SYMBOL_GPL(xs_swapper); #else static void xs_set_memalloc(struct rpc_xprt *xprt) { -- cgit v1.2.3-58-ga151 From 8e2281330f9930bccf77cf04027ec60b6cc0fb34 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:26 -0400 Subject: sunrpc: make xprt->swapper an atomic_t Split xs_swapper into enable/disable functions and eliminate the "enable" flag. Currently, it's racy if you have multiple swapon/swapoff operations running in parallel over the same xprt. Also fix it so that we only set it to a memalloc socket on a 0->1 transition and only clear it on a 1->0 transition. Cc: Mel Gorman Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 +++-- net/sunrpc/clnt.c | 4 ++-- net/sunrpc/xprtsock.c | 38 +++++++++++++++++++++++++------------- 3 files changed, 30 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bc9c39d6d30d..9a3308703c15 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -180,7 +180,7 @@ struct rpc_xprt { atomic_t num_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char resvport : 1; /* use a reserved port */ - unsigned int swapper; /* we're swapping over this + atomic_t swapper; /* we're swapping over this transport */ unsigned int bind_index; /* bind function index */ @@ -346,7 +346,8 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); -int xs_swapper(struct rpc_xprt *xprt, int enable); +int xs_swapper_enable(struct rpc_xprt *xprt); +void xs_swapper_disable(struct rpc_xprt *xprt); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0ba65156a62b..801044aeeedd 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2495,7 +2495,7 @@ retry: goto retry; } - ret = xs_swapper(xprt, 1); + ret = xs_swapper_enable(xprt); xprt_put(xprt); } return ret; @@ -2522,7 +2522,7 @@ retry: goto retry; } - xs_swapper(xprt, 0); + xs_swapper_disable(xprt); xprt_put(xprt); } } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f344c0f8974c..6538e6fbb7ba 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1961,31 +1961,43 @@ static void xs_set_memalloc(struct rpc_xprt *xprt) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - if (xprt->swapper) + if (atomic_read(&xprt->swapper)) sk_set_memalloc(transport->inet); } /** - * xs_swapper - Tag this transport as being used for swap. + * xs_swapper_enable - Tag this transport as being used for swap. * @xprt: transport to tag - * @enable: enable/disable * + * Take a reference to this transport on behalf of the rpc_clnt, and + * optionally mark it for swapping if it wasn't already. */ -int xs_swapper(struct rpc_xprt *xprt, int enable) +int +xs_swapper_enable(struct rpc_xprt *xprt) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - int err = 0; - if (enable) { - xprt->swapper++; - xs_set_memalloc(xprt); - } else if (xprt->swapper) { - xprt->swapper--; - sk_clear_memalloc(transport->inet); - } + if (atomic_inc_return(&xprt->swapper) == 1) + sk_set_memalloc(transport->inet); + return 0; +} - return err; +/** + * xs_swapper_disable - Untag this transport as being used for swap. + * @xprt: transport to tag + * + * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the + * swapper refcount goes to 0, untag the socket as a memalloc socket. + */ +void +xs_swapper_disable(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + + if (atomic_dec_and_test(&xprt->swapper)) + sk_clear_memalloc(transport->inet); } #else static void xs_set_memalloc(struct rpc_xprt *xprt) -- cgit v1.2.3-58-ga151 From d67fa4d85a2143b46052b2e9ccc6749a4c97b2de Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:29 -0400 Subject: sunrpc: turn swapper_enable/disable functions into rpc_xprt_ops RDMA xprts don't have a sock_xprt, but an rdma_xprt, so the xs_swapper_enable/disable functions will likely oops when fed an RDMA xprt. Turn these functions into rpc_xprt_ops so that that doesn't occur. For now the RDMA versions are no-ops that just return -EINVAL on an attempt to swapon. Cc: Chuck Lever Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 16 ++++++++++++++-- net/sunrpc/clnt.c | 4 ++-- net/sunrpc/xprtrdma/transport.c | 15 ++++++++++++++- net/sunrpc/xprtsock.c | 31 +++++++++++++++++++++++++------ 4 files changed, 55 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9a3308703c15..b6096592b1d4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -133,6 +133,8 @@ struct rpc_xprt_ops { void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); + int (*enable_swap)(struct rpc_xprt *xprt); + void (*disable_swap)(struct rpc_xprt *xprt); }; /* @@ -328,6 +330,18 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * return p + xprt->tsh_size; } +static inline int +xprt_enable_swap(struct rpc_xprt *xprt) +{ + return xprt->ops->enable_swap(xprt); +} + +static inline void +xprt_disable_swap(struct rpc_xprt *xprt) +{ + xprt->ops->disable_swap(xprt); +} + /* * Transport switch helper functions */ @@ -346,8 +360,6 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); -int xs_swapper_enable(struct rpc_xprt *xprt); -void xs_swapper_disable(struct rpc_xprt *xprt); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 801044aeeedd..576d6ae39f25 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2495,7 +2495,7 @@ retry: goto retry; } - ret = xs_swapper_enable(xprt); + ret = xprt_enable_swap(xprt); xprt_put(xprt); } return ret; @@ -2522,7 +2522,7 @@ retry: goto retry; } - xs_swapper_disable(xprt); + xprt_disable_swap(xprt); xprt_put(xprt); } } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 54f23b1be986..ebf6fe759f0e 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -682,6 +682,17 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) r_xprt->rx_stats.bad_reply_count); } +static int +xprt_rdma_enable_swap(struct rpc_xprt *xprt) +{ + return -EINVAL; +} + +static void +xprt_rdma_disable_swap(struct rpc_xprt *xprt) +{ +} + /* * Plumbing for rpc transport switch and kernel module */ @@ -700,7 +711,9 @@ static struct rpc_xprt_ops xprt_rdma_procs = { .send_request = xprt_rdma_send_request, .close = xprt_rdma_close, .destroy = xprt_rdma_destroy, - .print_stats = xprt_rdma_print_stats + .print_stats = xprt_rdma_print_stats, + .enable_swap = xprt_rdma_enable_swap, + .disable_swap = xprt_rdma_disable_swap, }; static struct xprt_class xprt_rdma = { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e0efd8514886..bf20726d4ab5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1980,14 +1980,14 @@ static void xs_set_memalloc(struct rpc_xprt *xprt) } /** - * xs_swapper_enable - Tag this transport as being used for swap. + * xs_enable_swap - Tag this transport as being used for swap. * @xprt: transport to tag * * Take a reference to this transport on behalf of the rpc_clnt, and * optionally mark it for swapping if it wasn't already. */ -int -xs_swapper_enable(struct rpc_xprt *xprt) +static int +xs_enable_swap(struct rpc_xprt *xprt) { struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); @@ -2002,14 +2002,14 @@ xs_swapper_enable(struct rpc_xprt *xprt) } /** - * xs_swapper_disable - Untag this transport as being used for swap. + * xs_disable_swap - Untag this transport as being used for swap. * @xprt: transport to tag * * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the * swapper refcount goes to 0, untag the socket as a memalloc socket. */ -void -xs_swapper_disable(struct rpc_xprt *xprt) +static void +xs_disable_swap(struct rpc_xprt *xprt) { struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); @@ -2025,6 +2025,17 @@ xs_swapper_disable(struct rpc_xprt *xprt) static void xs_set_memalloc(struct rpc_xprt *xprt) { } + +static int +xs_enable_swap(struct rpc_xprt *xprt) +{ + return -EINVAL; +} + +static void +xs_disable_swap(struct rpc_xprt *xprt) +{ +} #endif static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) @@ -2488,6 +2499,8 @@ static struct rpc_xprt_ops xs_local_ops = { .close = xs_close, .destroy = xs_destroy, .print_stats = xs_local_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; static struct rpc_xprt_ops xs_udp_ops = { @@ -2507,6 +2520,8 @@ static struct rpc_xprt_ops xs_udp_ops = { .close = xs_close, .destroy = xs_destroy, .print_stats = xs_udp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; static struct rpc_xprt_ops xs_tcp_ops = { @@ -2523,6 +2538,8 @@ static struct rpc_xprt_ops xs_tcp_ops = { .close = xs_tcp_shutdown, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; /* @@ -2540,6 +2557,8 @@ static struct rpc_xprt_ops bc_tcp_ops = { .close = bc_close, .destroy = bc_destroy, .print_stats = xs_tcp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; static int xs_init_anyaddr(const int family, struct sockaddr *sap) -- cgit v1.2.3-58-ga151 From 11598b8ff2b97cf034d0c025cf125c92b574bafc Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 10 Jun 2015 16:54:28 -0400 Subject: NFS: Remove unused nfs_rw_ops->rw_release() function This was only ever set to nfs_writeback_release_common(), a function which is completely empty. Let's just drop this function pointer and simplify the code a bit. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 -- fs/nfs/write.c | 6 ------ include/linux/nfs_page.h | 1 - 3 files changed, 9 deletions(-) (limited to 'include') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 282b39369510..e9953ab1ac40 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -690,8 +690,6 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, static void nfs_pgio_release(void *calldata) { struct nfs_pgio_header *hdr = calldata; - if (hdr->rw_ops->rw_release) - hdr->rw_ops->rw_release(hdr); nfs_pgio_data_destroy(hdr); hdr->completion_ops->completion(hdr); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dfc19f1575a1..d0f1f210342e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1347,11 +1347,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -static void nfs_writeback_release_common(struct nfs_pgio_header *hdr) -{ - /* do nothing! */ -} - /* * Special version of should_remove_suid() that ignores capabilities. */ @@ -2012,7 +2007,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = { .rw_mode = FMODE_WRITE, .rw_alloc_header = nfs_writehdr_alloc, .rw_free_header = nfs_writehdr_free, - .rw_release = nfs_writeback_release_common, .rw_done = nfs_writeback_done, .rw_result = nfs_writeback_result, .rw_initiate = nfs_initiate_write, diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3eb072dbce83..f2f650f136ee 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -67,7 +67,6 @@ struct nfs_rw_ops { const fmode_t rw_mode; struct nfs_pgio_header *(*rw_alloc_header)(void); void (*rw_free_header)(struct nfs_pgio_header *); - void (*rw_release)(struct nfs_pgio_header *); int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *, struct inode *); void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); -- cgit v1.2.3-58-ga151 From 4a06825839889cc1756d0dd8a52d6b1071ee0263 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 May 2015 14:02:25 -0400 Subject: SUNRPC: Transport fault injection It has been exceptionally useful to exercise the logic that handles local immediate errors and RDMA connection loss. To enable developers to test this regularly and repeatably, add logic to simulate connection loss every so often. Fault injection is disabled by default. It is enabled with $ sudo echo xxx > /sys/kernel/debug/sunrpc/inject_fault/disconnect where "xxx" is a large positive number of transport method calls before a disconnect. A value of several thousand is usually a good number that allows reasonable forward progress while still causing a lot of connection drops. These hooks are disabled when SUNRPC_DEBUG is turned off. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 19 ++++++++++ net/sunrpc/clnt.c | 1 + net/sunrpc/debugfs.c | 77 +++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprt.c | 2 ++ net/sunrpc/xprtrdma/transport.c | 11 ++++++ net/sunrpc/xprtsock.c | 10 ++++++ 6 files changed, 120 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b6096592b1d4..0fb9acbb4780 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -135,6 +135,7 @@ struct rpc_xprt_ops { void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); int (*enable_swap)(struct rpc_xprt *xprt); void (*disable_swap)(struct rpc_xprt *xprt); + void (*inject_disconnect)(struct rpc_xprt *xprt); }; /* @@ -244,6 +245,7 @@ struct rpc_xprt { const char *address_strings[RPC_DISPLAY_MAX]; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct dentry *debugfs; /* debugfs directory */ + atomic_t inject_disconnect; #endif }; @@ -445,6 +447,23 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) return test_and_set_bit(XPRT_BINDING, &xprt->state); } +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +extern unsigned int rpc_inject_disconnect; +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ + if (!rpc_inject_disconnect) + return; + if (atomic_dec_return(&xprt->inject_disconnect)) + return; + atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); + xprt->ops->inject_disconnect(xprt); +} +#else +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ +} +#endif + #endif /* __KERNEL__*/ #endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 576d6ae39f25..f41ed882ed3c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1605,6 +1605,7 @@ call_allocate(struct rpc_task *task) req->rq_callsize + req->rq_rcvsize); if (req->rq_buffer != NULL) return; + xprt_inject_disconnect(xprt); dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index 82962f7e6e88..7cc1b8a6ef6d 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -10,9 +10,12 @@ #include "netns.h" static struct dentry *topdir; +static struct dentry *rpc_fault_dir; static struct dentry *rpc_clnt_dir; static struct dentry *rpc_xprt_dir; +unsigned int rpc_inject_disconnect; + struct rpc_clnt_iter { struct rpc_clnt *clnt; loff_t pos; @@ -257,6 +260,8 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt) debugfs_remove_recursive(xprt->debugfs); xprt->debugfs = NULL; } + + atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); } void @@ -266,11 +271,78 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt) xprt->debugfs = NULL; } +static int +fault_open(struct inode *inode, struct file *filp) +{ + filp->private_data = kmalloc(128, GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + return 0; +} + +static int +fault_release(struct inode *inode, struct file *filp) +{ + kfree(filp->private_data); + return 0; +} + +static ssize_t +fault_disconnect_read(struct file *filp, char __user *user_buf, + size_t len, loff_t *offset) +{ + char *buffer = (char *)filp->private_data; + size_t size; + + size = sprintf(buffer, "%u\n", rpc_inject_disconnect); + return simple_read_from_buffer(user_buf, len, offset, buffer, size); +} + +static ssize_t +fault_disconnect_write(struct file *filp, const char __user *user_buf, + size_t len, loff_t *offset) +{ + char buffer[16]; + + len = min(len, sizeof(buffer) - 1); + if (copy_from_user(buffer, user_buf, len)) + return -EFAULT; + buffer[len] = '\0'; + if (kstrtouint(buffer, 10, &rpc_inject_disconnect)) + return -EINVAL; + return len; +} + +static const struct file_operations fault_disconnect_fops = { + .owner = THIS_MODULE, + .open = fault_open, + .read = fault_disconnect_read, + .write = fault_disconnect_write, + .release = fault_release, +}; + +static struct dentry * +inject_fault_dir(struct dentry *topdir) +{ + struct dentry *faultdir; + + faultdir = debugfs_create_dir("inject_fault", topdir); + if (!faultdir) + return NULL; + + if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir, + NULL, &fault_disconnect_fops)) + return NULL; + + return faultdir; +} + void __exit sunrpc_debugfs_exit(void) { debugfs_remove_recursive(topdir); topdir = NULL; + rpc_fault_dir = NULL; rpc_clnt_dir = NULL; rpc_xprt_dir = NULL; } @@ -282,6 +354,10 @@ sunrpc_debugfs_init(void) if (!topdir) return; + rpc_fault_dir = inject_fault_dir(topdir); + if (!rpc_fault_dir) + goto out_remove; + rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); if (!rpc_clnt_dir) goto out_remove; @@ -294,5 +370,6 @@ sunrpc_debugfs_init(void) out_remove: debugfs_remove_recursive(topdir); topdir = NULL; + rpc_fault_dir = NULL; rpc_clnt_dir = NULL; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1d4fe24af06a..e1fb538e10e0 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -967,6 +967,7 @@ void xprt_transmit(struct rpc_task *task) task->tk_status = status; return; } + xprt_inject_disconnect(xprt); dprintk("RPC: %5u xmit complete\n", task->tk_pid); task->tk_flags |= RPC_TASK_SENT; @@ -1285,6 +1286,7 @@ void xprt_release(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); if (req->rq_buffer) xprt->ops->buf_free(req->rq_buffer); + xprt_inject_disconnect(xprt); if (req->rq_cred != NULL) put_rpccred(req->rq_cred); task->tk_rqstp = NULL; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ebf6fe759f0e..b8aac23b1b0c 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -246,6 +246,16 @@ xprt_rdma_connect_worker(struct work_struct *work) xprt_clear_connecting(xprt); } +static void +xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, + rx_xprt); + + pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); + rdma_disconnect(r_xprt->rx_ia.ri_id); +} + /* * xprt_rdma_destroy * @@ -714,6 +724,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = { .print_stats = xprt_rdma_print_stats, .enable_swap = xprt_rdma_enable_swap, .disable_swap = xprt_rdma_disable_swap, + .inject_disconnect = xprt_rdma_inject_disconnect }; static struct xprt_class xprt_rdma = { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bf20726d4ab5..fda8ec8c74c0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -866,6 +866,13 @@ static void xs_close(struct rpc_xprt *xprt) xprt_disconnect_done(xprt); } +static void xs_inject_disconnect(struct rpc_xprt *xprt) +{ + dprintk("RPC: injecting transport disconnect on xprt=%p\n", + xprt); + xprt_disconnect_done(xprt); +} + static void xs_xprt_free(struct rpc_xprt *xprt) { xs_free_peer_addresses(xprt); @@ -2522,6 +2529,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .print_stats = xs_udp_print_stats, .enable_swap = xs_enable_swap, .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; static struct rpc_xprt_ops xs_tcp_ops = { @@ -2540,6 +2548,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, .enable_swap = xs_enable_swap, .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; /* @@ -2559,6 +2568,7 @@ static struct rpc_xprt_ops bc_tcp_ops = { .print_stats = xs_tcp_print_stats, .enable_swap = xs_enable_swap, .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; static int xs_init_anyaddr(const int family, struct sockaddr *sap) -- cgit v1.2.3-58-ga151 From 7e53df111beea8db2543424d07bdee2a630698c3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:53:04 -0400 Subject: xprtrdma: Remove rpcrdma_ia::ri_memreg_strategy Clean up: This field is no longer used. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Reviewed-by: Devesh Sharma Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprtrdma.h | 3 ++- net/sunrpc/xprtrdma/verbs.c | 3 --- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index c984c85981ea..b17613052cc3 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -56,7 +56,8 @@ #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ -/* memory registration strategies */ +/* Memory registration strategies, by number. + * This is part of a kernel / user space API. Do not remove. */ enum rpcrdma_memreg { RPCRDMA_BOUNCEBUFFERS = 0, RPCRDMA_REGISTER, diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index db9303a6a145..cc1a52609974 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -665,9 +665,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) dprintk("RPC: %s: memory registration strategy is '%s'\n", __func__, ia->ri_ops->ro_displayname); - /* Else will do memory reg/dereg for each chunk */ - ia->ri_memreg_strategy = memreg; - rwlock_init(&ia->ri_qplock); return 0; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index f19376d35750..3ecee38bf1a0 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -70,7 +70,6 @@ struct rpcrdma_ia { int ri_have_dma_lkey; struct completion ri_done; int ri_async_rc; - enum rpcrdma_memreg ri_memreg_strategy; unsigned int ri_max_frmr_depth; struct ib_device_attr ri_devattr; struct ib_qp_attr ri_qp_attr; -- cgit v1.2.3-58-ga151 From 764ad8ba8cd4c6f836fca9378f8c5121aece0842 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:56 -0400 Subject: nfs: increase size of EXCHANGE_ID name string buffer The current buffer is much too small if you have a relatively long hostname. Bring it up to the size of the one that SETCLIENTID has. Cc: Reported-by: Michael Skralivetsky Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 93ab6071bbe9..e9e9a8dcfb47 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1142,7 +1142,7 @@ struct nfs41_state_protection { struct nfs4_op_map allow; }; -#define NFS4_EXCHANGE_ID_LEN (48) +#define NFS4_EXCHANGE_ID_LEN (127) struct nfs41_exchange_id_args { struct nfs_client *client; nfs4_verifier *verifier; -- cgit v1.2.3-58-ga151 From 3a6bb738792500e8b4534c0350c13a132bac0492 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:57 -0400 Subject: nfs: convert setclientid and exchange_id encoders to use clp->cl_owner_id ...instead of buffers that are part of their arg structs. We already hold a reference to the client, so we might as well use the allocated buffer. In the event that we can't allocate the clp->cl_owner_id, then just return -ENOMEM. Note too that we switch from a GFP_KERNEL allocation here to GFP_NOFS. It's possible we could end up trying to do a SETCLIENTID or EXCHANGE_ID in order to reclaim some memory, and the GFP_KERNEL allocations in the existing code could cause recursion back into NFS reclaim. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 21 ++++++++++++++++----- fs/nfs/nfs4xdr.c | 8 +++++--- include/linux/nfs_xdr.h | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d689ea37be84..b2afe9556147 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5046,7 +5046,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, struct nfs4_setclientid setclientid = { .sc_verifier = &sc_verifier, .sc_prog = program, - .sc_cb_ident = clp->cl_cb_ident, + .sc_clnt = clp, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], @@ -5076,6 +5076,12 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, nfs4_init_nonuniform_client_string(clp, setclientid.sc_name, sizeof(setclientid.sc_name)); + + if (!clp->cl_owner_id) { + status = -ENOMEM; + goto out; + } + /* cb_client4 */ setclientid.sc_netid_len = nfs4_init_callback_netid(clp, @@ -5085,9 +5091,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); - dprintk("NFS call setclientid auth=%s, '%.*s'\n", + dprintk("NFS call setclientid auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, - setclientid.sc_name_len, setclientid.sc_name); + clp->cl_owner_id); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) { status = PTR_ERR(task); @@ -6850,9 +6856,14 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, nfs4_init_boot_verifier(clp, &verifier); args.id_len = nfs4_init_uniform_client_string(clp, args.id, sizeof(args.id)); - dprintk("NFS call exchange_id auth=%s, '%.*s'\n", + if (!clp->cl_owner_id) { + status = -ENOMEM; + goto out; + } + + dprintk("NFS call exchange_id auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, - args.id_len, args.id); + clp->cl_owner_id); res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), GFP_NOFS); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0aea97841d30..f58e8a979397 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1667,13 +1667,14 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr); encode_nfs4_verifier(xdr, setclientid->sc_verifier); - encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); + encode_string(xdr, strlen(setclientid->sc_clnt->cl_owner_id), + setclientid->sc_clnt->cl_owner_id); p = reserve_space(xdr, 4); *p = cpu_to_be32(setclientid->sc_prog); encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); p = reserve_space(xdr, 4); - *p = cpu_to_be32(setclientid->sc_cb_ident); + *p = cpu_to_be32(setclientid->sc_clnt->cl_cb_ident); } static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) @@ -1747,7 +1748,8 @@ static void encode_exchange_id(struct xdr_stream *xdr, encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); encode_nfs4_verifier(xdr, args->verifier); - encode_string(xdr, args->id_len, args->id); + encode_string(xdr, strlen(args->client->cl_owner_id), + args->client->cl_owner_id); encode_uint32(xdr, args->flags); encode_uint32(xdr, args->state_protect.how); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e9e9a8dcfb47..6c0b423d781b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -994,7 +994,7 @@ struct nfs4_setclientid { char sc_netid[RPCBIND_MAXNETIDLEN + 1]; unsigned int sc_uaddr_len; char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; - u32 sc_cb_ident; + struct nfs_client *sc_clnt; struct rpc_cred *sc_cred; }; -- cgit v1.2.3-58-ga151 From 873e385116b2cc5c7daca8f51881371fadb90970 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:44:00 -0400 Subject: nfs: make nfs4_init_uniform_client_string use a dynamically allocated buffer Change the uniform client string generator to dynamically allocate the NFSv4 client name string buffer. With this patch, we can eliminate the buffers that are embedded within the "args" structs and simply use the name string that is hanging off the client. This uniform string case is a little simpler than the nonuniform since we don't need to deal with RCU, but we do have two different cases, depending on whether there is a uniquifier or not. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 112 +++++++++++++++++++++++++++++++++--------------- include/linux/nfs_xdr.h | 6 --- 2 files changed, 77 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c20322636a68..d181090124db 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5007,28 +5007,78 @@ retry: return 0; } -static unsigned int -nfs4_init_uniform_client_string(struct nfs_client *clp, - char *buf, size_t len) +static int +nfs4_init_uniquifier_client_string(struct nfs_client *clp) { - const char *nodename = clp->cl_rpcclient->cl_nodename; - unsigned int result; + int result; + size_t len; + char *str; + + len = 10 + 10 + 1 + 10 + 1 + + strlen(nfs4_client_id_uniquifier) + 1 + + strlen(clp->cl_rpcclient->cl_nodename) + 1; + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; + + result = scnprintf(str, len, "Linux NFSv%u.%u %s/%s", + clp->rpc_ops->version, clp->cl_minorversion, + nfs4_client_id_uniquifier, + clp->cl_rpcclient->cl_nodename); + if (result >= len) { + kfree(str); + return -EINVAL; + } + clp->cl_owner_id = str; + return 0; +} + +static int +nfs4_init_uniform_client_string(struct nfs_client *clp) +{ + int result; + size_t len; + char *str; if (clp->cl_owner_id != NULL) - return strlcpy(buf, clp->cl_owner_id, len); + return 0; if (nfs4_client_id_uniquifier[0] != '\0') - result = scnprintf(buf, len, "Linux NFSv%u.%u %s/%s", - clp->rpc_ops->version, - clp->cl_minorversion, - nfs4_client_id_uniquifier, - nodename); - else - result = scnprintf(buf, len, "Linux NFSv%u.%u %s", - clp->rpc_ops->version, clp->cl_minorversion, - nodename); - clp->cl_owner_id = kstrdup(buf, GFP_KERNEL); - return result; + return nfs4_init_uniquifier_client_string(clp); + + len = 10 + 10 + 1 + 10 + 1 + + strlen(clp->cl_rpcclient->cl_nodename) + 1; + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; + + result = scnprintf(str, len, "Linux NFSv%u.%u %s", + clp->rpc_ops->version, clp->cl_minorversion, + clp->cl_rpcclient->cl_nodename); + if (result >= len) { + kfree(str); + return -EINVAL; + } + clp->cl_owner_id = str; + return 0; } /* @@ -5095,20 +5145,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); - if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) { - setclientid.sc_name_len = - nfs4_init_uniform_client_string(clp, - setclientid.sc_name, - sizeof(setclientid.sc_name)); - if (!clp->cl_owner_id) { - status = -ENOMEM; - goto out; - } - } else { + + if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) + status = nfs4_init_uniform_client_string(clp); + else status = nfs4_init_nonuniform_client_string(clp); - if (status) - goto out; - } + + if (status) + goto out; /* cb_client4 */ setclientid.sc_netid_len = @@ -6882,12 +6926,10 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, }; nfs4_init_boot_verifier(clp, &verifier); - args.id_len = nfs4_init_uniform_client_string(clp, args.id, - sizeof(args.id)); - if (!clp->cl_owner_id) { - status = -ENOMEM; + + status = nfs4_init_uniform_client_string(clp); + if (status) goto out; - } dprintk("NFS call exchange_id auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6c0b423d781b..c959e7eb5bd4 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -984,11 +984,8 @@ struct nfs4_readlink_res { struct nfs4_sequence_res seq_res; }; -#define NFS4_SETCLIENTID_NAMELEN (127) struct nfs4_setclientid { const nfs4_verifier * sc_verifier; - unsigned int sc_name_len; - char sc_name[NFS4_SETCLIENTID_NAMELEN + 1]; u32 sc_prog; unsigned int sc_netid_len; char sc_netid[RPCBIND_MAXNETIDLEN + 1]; @@ -1142,12 +1139,9 @@ struct nfs41_state_protection { struct nfs4_op_map allow; }; -#define NFS4_EXCHANGE_ID_LEN (127) struct nfs41_exchange_id_args { struct nfs_client *client; nfs4_verifier *verifier; - unsigned int id_len; - char id[NFS4_EXCHANGE_ID_LEN]; u32 flags; struct nfs41_state_protection state_protect; }; -- cgit v1.2.3-58-ga151 From be3a5d233922d73f27002ce2767f6ec03c3f473d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Jun 2015 19:51:55 +0800 Subject: NFSv.2/pnfs Add a LAYOUTSTATS rpc function Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42.h | 9 +++- fs/nfs/nfs42proc.c | 27 +++++++++++ fs/nfs/nfs42xdr.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 4 +- fs/nfs/nfs4xdr.c | 1 + include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 43 +++++++++++++++++ 8 files changed, 205 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 7afb8947dfdf..ff66ae700b89 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -5,11 +5,18 @@ #ifndef __LINUX_FS_NFS_NFS4_2_H #define __LINUX_FS_NFS_NFS4_2_H +/* + * FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support + * more? Need to consider not to pre-alloc too much for a compound. + */ +#define PNFS_LAYOUTSTATS_MAXDEV (4) + /* nfs4.2proc.c */ int nfs42_proc_allocate(struct file *, loff_t, loff_t); int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); - +int nfs42_proc_layoutstats_generic(struct nfs_server *, + struct nfs42_layoutstat_data *); /* nfs4.2xdr.h */ extern struct rpc_procinfo nfs4_2_procedures[]; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 3a9e75235f30..ac929685350b 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -165,3 +165,30 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); } + +static const struct rpc_call_ops nfs42_layoutstat_ops = { +}; + +int nfs42_proc_layoutstats_generic(struct nfs_server *server, + struct nfs42_layoutstat_data *data) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + }; + struct rpc_task_setup task_setup = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs42_layoutstat_ops, + .callback_data = data, + .flags = RPC_TASK_ASYNC, + }; + struct rpc_task *task; + + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + task = rpc_run_task(&task_setup); + if (IS_ERR(task)) + return PTR_ERR(task); + return 0; +} diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 1a25b27248f2..9aae0205ef11 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -4,6 +4,8 @@ #ifndef __LINUX_FS_NFS_NFS4_2XDR_H #define __LINUX_FS_NFS_NFS4_2XDR_H +#include "nfs42.h" + #define encode_fallocate_maxsz (encode_stateid_maxsz + \ 2 /* offset */ + \ 2 /* length */) @@ -22,6 +24,16 @@ 1 /* whence */ + \ 2 /* offset */ + \ 2 /* length */) +#define encode_io_info_maxsz 4 +#define encode_layoutstats_maxsz (op_decode_hdr_maxsz + \ + 2 /* offset */ + \ + 2 /* length */ + \ + encode_stateid_maxsz + \ + encode_io_info_maxsz + \ + encode_io_info_maxsz + \ + 1 /* opaque devaddr4 length */ + \ + XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) +#define decode_layoutstats_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -45,6 +57,14 @@ #define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_seek_maxsz) +#define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz) +#define NFS4_dec_layoutstats_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) static void encode_fallocate(struct xdr_stream *xdr, @@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr, encode_uint32(xdr, args->sa_what); } +static void encode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args, + struct nfs42_layoutstat_devinfo *devinfo, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr); + p = reserve_space(xdr, 8 + 8); + p = xdr_encode_hyper(p, devinfo->offset); + p = xdr_encode_hyper(p, devinfo->length); + encode_nfs4_stateid(xdr, &args->stateid); + p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4); + p = xdr_encode_hyper(p, devinfo->read_count); + p = xdr_encode_hyper(p, devinfo->read_bytes); + p = xdr_encode_hyper(p, devinfo->write_count); + p = xdr_encode_hyper(p, devinfo->write_bytes); + p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data, + NFS4_DEVICEID4_SIZE); + /* Encode layoutupdate4 */ + *p++ = cpu_to_be32(devinfo->layout_type); + if (devinfo->layoutstats_encode != NULL) + devinfo->layoutstats_encode(xdr, args, devinfo); + else + encode_uint32(xdr, 0); +} + /* * Encode ALLOCATE request */ @@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode LAYOUTSTATS request + */ +static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args) +{ + int i; + + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < args->num_dev; i++) + encode_layoutstats(xdr, args, &args->devinfo[i], &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -169,6 +238,28 @@ out_overflow: return -EIO; } +static int decode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + int status; + __be32 *p; + + status = decode_op_hdr(xdr, OP_LAYOUTSTATS); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + + res->rpc_status = be32_to_cpup(p++); + return 0; + +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + /* * Decode ALLOCATE request */ @@ -246,4 +337,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode LAYOUTSTATS request + */ +static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + struct compound_hdr hdr; + int status, i; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < res->num_dev; i++) { + status = decode_layoutstats(xdr, res); + if (status) + goto out; + } +out: + res->rpc_status = status; + return status; +} + #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index fdef424b0cd3..ea3bee919a76 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -233,6 +233,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); +extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e9cd45f1d60f..643ce3a91b22 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -476,8 +476,8 @@ struct nfs4_call_sync_data { struct nfs4_sequence_res *seq_res; }; -static void nfs4_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) +void nfs4_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply) { args->sa_slot = NULL; args->sa_cache_this = cache_reply; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a07555d6968c..558cd65dbdb7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7431,6 +7431,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(SEEK, enc_seek, dec_seek), PROC(ALLOCATE, enc_allocate, dec_allocate), PROC(DEALLOCATE, enc_deallocate, dec_deallocate), + PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 32201c269890..b8e72aad919c 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -500,6 +500,7 @@ enum { NFSPROC4_CLNT_SEEK, NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, + NFSPROC4_CLNT_LAYOUTSTATS, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c959e7eb5bd4..7bbe50504211 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -316,6 +316,49 @@ struct nfs4_layoutreturn { int rpc_status; }; +#define PNFS_LAYOUTSTATS_MAXSIZE 256 + +struct nfs42_layoutstat_args; +struct nfs42_layoutstat_devinfo; +typedef void (*layoutstats_encode_t)(struct xdr_stream *, + struct nfs42_layoutstat_args *, + struct nfs42_layoutstat_devinfo *); + +/* Per file per deviceid layoutstats */ +struct nfs42_layoutstat_devinfo { + struct nfs4_deviceid dev_id; + __u64 offset; + __u64 length; + __u64 read_count; + __u64 read_bytes; + __u64 write_count; + __u64 write_bytes; + __u32 layout_type; + layoutstats_encode_t layoutstats_encode; + void *layout_private; +}; + +struct nfs42_layoutstat_args { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + struct inode *inode; + nfs4_stateid stateid; + int num_dev; + struct nfs42_layoutstat_devinfo *devinfo; +}; + +struct nfs42_layoutstat_res { + struct nfs4_sequence_res seq_res; + int num_dev; + int rpc_status; +}; + +struct nfs42_layoutstat_data { + struct inode *inode; + struct nfs42_layoutstat_args args; + struct nfs42_layoutstat_res res; +}; + struct stateowner_id { __u64 create_time; __u32 uniquifier; -- cgit v1.2.3-58-ga151 From 1bfe3b259ff2e579b2acb190f874397d53274497 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:03 +0800 Subject: nfs42: serialize LAYOUTSTATS calls of the same file There is no need to report concurrently. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 3 +++ fs/nfs/pnfs.c | 7 +++++++ include/linux/nfs_fs.h | 1 + 3 files changed, 11 insertions(+) (limited to 'include') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ee0248340a42..06c74cd93875 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -204,6 +204,9 @@ nfs42_layoutstat_release(void *calldata) nfss->pnfs_curr_ld->cleanup_layoutstats(data); pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags); + smp_mb__after_atomic(); nfs_iput_and_deactive(data->inode); kfree(data->args.devinfo); kfree(data); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6279cff7df74..5b5224341bcd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2257,6 +2257,7 @@ pnfs_report_layoutstat(struct inode *inode) { struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; struct nfs_server *server = NFS_SERVER(inode); + struct nfs_inode *nfsi = NFS_I(inode); struct nfs42_layoutstat_data *data; struct pnfs_layout_hdr *hdr; int status = 0; @@ -2264,6 +2265,9 @@ pnfs_report_layoutstat(struct inode *inode) if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) goto out; + if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) + goto out; + spin_lock(&inode->i_lock); if (!NFS_I(inode)->layout) { spin_unlock(&inode->i_lock); @@ -2296,6 +2300,9 @@ out_free: kfree(data); out_put: pnfs_put_layout_hdr(hdr); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); + smp_mb__after_atomic(); goto out; } EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b95f914ce083..f91b5ade30c9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -219,6 +219,7 @@ struct nfs_inode { #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ +#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { -- cgit v1.2.3-58-ga151 From 6c5a0d891543873aefc3aaf846c1e7afe0982ff9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 27 Jun 2015 11:45:46 -0400 Subject: NFSv4.2: LAYOUTSTATS is optional to implement Make it so, by checking the return value for NFS4ERR_MOTSUPP and caching the information as a server capability. Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 10 ++++++++-- fs/nfs/nfs4proc.c | 3 ++- fs/nfs/pnfs.c | 3 +++ include/linux/nfs_fs_sb.h | 1 + 4 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 06c74cd93875..f486b80f927a 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -189,9 +189,15 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) if (!nfs4_sequence_done(task, &data->res.seq_res)) return; - /* well, we don't care about errors at all! */ - if (task->tk_status) + switch (task->tk_status) { + case 0: + break; + case -ENOTSUPP: + case -EOPNOTSUPP: + NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS; + default: dprintk("%s server returns %d\n", __func__, task->tk_status); + } } static void diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 643ce3a91b22..8eab42407d39 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8635,7 +8635,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_ATOMIC_OPEN_V1 | NFS_CAP_ALLOCATE | NFS_CAP_DEALLOCATE - | NFS_CAP_SEEK, + | NFS_CAP_SEEK + | NFS_CAP_LAYOUTSTATS, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 40bacebb5b97..0ba9a02c9566 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2266,6 +2266,9 @@ pnfs_report_layoutstat(struct inode *inode) if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) goto out; + if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) + goto out; + if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) goto out; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5e1273d4de14..a2ea1491d3df 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -237,5 +237,6 @@ struct nfs_server { #define NFS_CAP_SEEK (1U << 19) #define NFS_CAP_ALLOCATE (1U << 20) #define NFS_CAP_DEALLOCATE (1U << 21) +#define NFS_CAP_LAYOUTSTATS (1U << 22) #endif -- cgit v1.2.3-58-ga151