From d31ae2548142b7cd12404929ef3a13ae27c9d961 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 1 Aug 2017 12:00:39 -0400 Subject: sunrpc: Const-ify all instances of struct rpc_xprt_ops After transport instance creation, these function pointers never change. Mark them as constant to prevent their use as an attack vector for code injections. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index eab1c749e192..f60c55ca2e2e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -174,7 +174,7 @@ enum xprt_transports { struct rpc_xprt { struct kref kref; /* Reference count */ - struct rpc_xprt_ops * ops; /* transport methods */ + const struct rpc_xprt_ops *ops; /* transport methods */ const struct rpc_timeout *timeout; /* timeout parms */ struct sockaddr_storage addr; /* server address */ -- cgit v1.2.3-58-ga151 From 41c8f70f5a3db7e06179186b6525fd9ee1d7d314 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Aug 2017 14:30:11 -0400 Subject: xprtrdma: Harden backchannel call decoding Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 13 +++++++++ net/sunrpc/xprtrdma/backchannel.c | 40 ++++++--------------------- net/sunrpc/xprtrdma/rpc_rdma.c | 58 +++++++++++++++++++++++++-------------- 3 files changed, 59 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 261b48a2701d..86b59e3525a5 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -239,6 +239,19 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +/** + * xdr_stream_remaining - Return the number of bytes remaining in the stream + * @xdr: pointer to struct xdr_stream + * + * Return value: + * Number of bytes remaining in @xdr before xdr->end + */ +static inline size_t +xdr_stream_remaining(const struct xdr_stream *xdr) +{ + return xdr->nwords << 2; +} + ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); /** diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 03f6b5840764..183a103e08a8 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -271,9 +271,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) * @xprt: transport receiving the call * @rep: receive buffer containing the call * - * Called in the RPC reply handler, which runs in a tasklet. - * Be quick about it. - * * Operational assumptions: * o Backchannel credits are ignored, just as the NFS server * forechannel currently does @@ -284,7 +281,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) { struct rpc_xprt *xprt = &r_xprt->rx_xprt; - struct rpcrdma_msg *headerp; struct svc_serv *bc_serv; struct rpcrdma_req *req; struct rpc_rqst *rqst; @@ -292,24 +288,15 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, size_t size; __be32 *p; - headerp = rdmab_to_msg(rep->rr_rdmabuf); + p = xdr_inline_decode(&rep->rr_stream, 0); + size = xdr_stream_remaining(&rep->rr_stream); + #ifdef RPCRDMA_BACKCHANNEL_DEBUG pr_info("RPC: %s: callback XID %08x, length=%u\n", - __func__, be32_to_cpu(headerp->rm_xid), rep->rr_len); - pr_info("RPC: %s: %*ph\n", __func__, rep->rr_len, headerp); + __func__, be32_to_cpup(p), size); + pr_info("RPC: %s: %*ph\n", __func__, size, p); #endif - /* Sanity check: - * Need at least enough bytes for RPC/RDMA header, as code - * here references the header fields by array offset. Also, - * backward calls are always inline, so ensure there - * are some bytes beyond the RPC/RDMA header. - */ - if (rep->rr_len < RPCRDMA_HDRLEN_MIN + 24) - goto out_short; - p = (__be32 *)((unsigned char *)headerp + RPCRDMA_HDRLEN_MIN); - size = rep->rr_len - RPCRDMA_HDRLEN_MIN; - /* Grab a free bc rqst */ spin_lock(&xprt->bc_pa_lock); if (list_empty(&xprt->bc_pa_list)) { @@ -325,7 +312,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, /* Prepare rqst */ rqst->rq_reply_bytes_recvd = 0; rqst->rq_bytes_sent = 0; - rqst->rq_xid = headerp->rm_xid; + rqst->rq_xid = *p; rqst->rq_private_buf.len = size; set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); @@ -337,9 +324,9 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, buf->len = size; /* The receive buffer has to be hooked to the rpcrdma_req - * so that it can be reposted after the server is done - * parsing it but just before sending the backward - * direction reply. + * so that it is not released while the req is pointing + * to its buffer, and so that it can be reposted after + * the Upper Layer is done decoding it. */ req = rpcr_to_rdmar(rqst); dprintk("RPC: %s: attaching rep %p to req %p\n", @@ -367,13 +354,4 @@ out_overflow: * when the connection is re-established. */ return; - -out_short: - pr_warn("RPC/RDMA short backward direction call\n"); - - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) - xprt_disconnect_done(xprt); - else - pr_warn("RPC: %s: reposting rep %p\n", - __func__, rep); } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 24f58c7b3106..9b5ab598ab7b 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -949,35 +949,59 @@ rpcrdma_mark_remote_invalidation(struct list_head *mws, } } -#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* By convention, backchannel calls arrive via rdma_msg type * messages, and never populate the chunk lists. This makes * the RPC/RDMA header small and fixed in size, so it is * straightforward to check the RPC header's direction field. */ static bool -rpcrdma_is_bcall(struct rpcrdma_msg *headerp) +rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, + __be32 xid, __be32 proc) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) { - __be32 *p = (__be32 *)headerp; + struct xdr_stream *xdr = &rep->rr_stream; + __be32 *p; - if (headerp->rm_type != rdma_msg) + if (proc != rdma_msg) return false; - if (headerp->rm_body.rm_chunks[0] != xdr_zero) + + /* Peek at stream contents without advancing. */ + p = xdr_inline_decode(xdr, 0); + + /* Chunk lists */ + if (*p++ != xdr_zero) return false; - if (headerp->rm_body.rm_chunks[1] != xdr_zero) + if (*p++ != xdr_zero) return false; - if (headerp->rm_body.rm_chunks[2] != xdr_zero) + if (*p++ != xdr_zero) return false; - /* sanity */ - if (p[7] != headerp->rm_xid) + /* RPC header */ + if (*p++ != xid) return false; - /* call direction */ - if (p[8] != cpu_to_be32(RPC_CALL)) + if (*p != cpu_to_be32(RPC_CALL)) return false; + /* Now that we are sure this is a backchannel call, + * advance to the RPC header. + */ + p = xdr_inline_decode(xdr, 3 * sizeof(*p)); + if (unlikely(!p)) + goto out_short; + + rpcrdma_bc_receive_call(r_xprt, rep); + return true; + +out_short: + pr_warn("RPC/RDMA short backward direction call\n"); + if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) + xprt_disconnect_done(&r_xprt->rx_xprt); return true; } +#else /* CONFIG_SUNRPC_BACKCHANNEL */ +{ + return false; +} #endif /* CONFIG_SUNRPC_BACKCHANNEL */ /* Process received RPC/RDMA messages. @@ -1020,10 +1044,8 @@ rpcrdma_reply_handler(struct work_struct *work) proc = *p++; headerp = rdmab_to_msg(rep->rr_rdmabuf); -#if defined(CONFIG_SUNRPC_BACKCHANNEL) - if (rpcrdma_is_bcall(headerp)) - goto out_bcall; -#endif + if (rpcrdma_is_bcall(r_xprt, rep, xid, proc)) + return; /* Match incoming rpcrdma_rep to an rpcrdma_req to * get context for handling any incoming chunks. @@ -1159,12 +1181,6 @@ out_badstatus: } return; -#if defined(CONFIG_SUNRPC_BACKCHANNEL) -out_bcall: - rpcrdma_bc_receive_call(r_xprt, rep); - return; -#endif - /* If the incoming reply terminated a pending RPC, the next * RPC call will post a replacement receive buffer as it is * being marshaled. -- cgit v1.2.3-58-ga151 From dee83046e73cb7ebbbae955c1ef0f4f55a0f44f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Jul 2017 10:51:02 -0400 Subject: NFS: Remove unuse function nfs_page_group_lock_wait() Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 21 --------------------- include/linux/nfs_page.h | 1 - 2 files changed, 22 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index a6f2bbd709ba..ced7974622dd 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -165,27 +165,6 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock) return -EAGAIN; } -/* - * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it - * @req - a request in the group - * - * This is a blocking call to wait for the group lock to be cleared. - */ -void -nfs_page_group_lock_wait(struct nfs_page *req) -{ - struct nfs_page *head = req->wb_head; - - WARN_ON_ONCE(head != head->wb_head); - - if (!test_bit(PG_HEADLOCK, &head->wb_flags)) - return; - set_bit(PG_CONTENDED1, &head->wb_flags); - smp_mb__after_atomic(); - wait_on_bit(&head->wb_flags, PG_HEADLOCK, - TASK_UNINTERRUPTIBLE); -} - /* * nfs_page_group_unlock - unlock the head of the page group * @req - request in group that is to be unlocked diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index d67b67ae6c8b..de1d24cedaa2 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -140,7 +140,6 @@ extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern int nfs_page_group_lock(struct nfs_page *, bool); -extern void nfs_page_group_lock_wait(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); -- cgit v1.2.3-58-ga151 From 1344b7ea172b4911a8ee8a6ff26c5bc6b5abb302 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Jul 2017 10:54:14 -0400 Subject: NFS: Remove unused parameter from nfs_page_group_lock() nfs_page_group_lock() is now always called with the 'nonblock' parameter set to 'false'. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 31 +++++++++++-------------------- fs/nfs/write.c | 6 +++--- include/linux/nfs_page.h | 2 +- 3 files changed, 15 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ced7974622dd..af6731dd4324 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -134,19 +134,14 @@ EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); /* * nfs_page_group_lock - lock the head of the page group * @req - request in group that is to be locked - * @nonblock - if true don't block waiting for lock * - * this lock must be held if modifying the page group list + * this lock must be held when traversing or modifying the page + * group list * - * return 0 on success, < 0 on error: -EDELAY if nonblocking or the - * result from wait_on_bit_lock - * - * NOTE: calling with nonblock=false should always have set the - * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock - * with TASK_UNINTERRUPTIBLE), so there is no need to check the result. + * return 0 on success, < 0 on error */ int -nfs_page_group_lock(struct nfs_page *req, bool nonblock) +nfs_page_group_lock(struct nfs_page *req) { struct nfs_page *head = req->wb_head; @@ -155,14 +150,10 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock) if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) return 0; - if (!nonblock) { - set_bit(PG_CONTENDED1, &head->wb_flags); - smp_mb__after_atomic(); - return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + set_bit(PG_CONTENDED1, &head->wb_flags); + smp_mb__after_atomic(); + return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, TASK_UNINTERRUPTIBLE); - } - - return -EAGAIN; } /* @@ -225,7 +216,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) { bool ret; - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); ret = nfs_page_group_sync_on_bit_locked(req, bit); nfs_page_group_unlock(req); @@ -1016,7 +1007,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, unsigned int bytes_left = 0; unsigned int offset, pgbase; - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); subreq = req; bytes_left = subreq->wb_bytes; @@ -1038,7 +1029,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (mirror->pg_recoalesce) return 0; /* retry add_request for this subreq */ - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); continue; } @@ -1135,7 +1126,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, for (midx = 0; midx < desc->pg_mirror_count; midx++) { if (midx) { - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); /* find the last request */ for (lastreq = req->wb_head; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 20d44ea328b6..0f418d825185 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -271,7 +271,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req) unsigned int pos = 0; unsigned int len = nfs_page_length(req->wb_page); - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); do { tmp = nfs_page_group_search_locked(req->wb_head, pos); @@ -480,7 +480,7 @@ try_again: } spin_unlock(&inode->i_lock); - ret = nfs_page_group_lock(head, false); + ret = nfs_page_group_lock(head); if (ret < 0) { nfs_unlock_and_release_request(head); return ERR_PTR(ret); @@ -501,7 +501,7 @@ try_again: nfs_page_group_unlock(head); ret = nfs_wait_on_request(subreq); if (!ret) - ret = nfs_page_group_lock(head, false); + ret = nfs_page_group_lock(head); if (ret < 0) { nfs_unroll_locks(inode, head, subreq); nfs_release_request(subreq); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index de1d24cedaa2..2f4fdafb6746 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -139,7 +139,7 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); -extern int nfs_page_group_lock(struct nfs_page *, bool); +extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); -- cgit v1.2.3-58-ga151 From e824f99adaaf1ed0e03eac8574599af6d992163d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 Aug 2017 11:53:49 -0400 Subject: NFSv4: Use a mutex to protect the per-inode commit lists The commit lists can get very large, so using the inode->i_lock can end up affecting general metadata performance. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- fs/nfs/inode.c | 1 + fs/nfs/pnfs_nfs.c | 15 +++++++-------- fs/nfs/write.c | 24 ++++++++++++------------ include/linux/nfs_fs.h | 1 + 5 files changed, 23 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 6fb9fad2d1e6..d2972d537469 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -616,13 +616,13 @@ nfs_direct_write_scan_commit_list(struct inode *inode, struct list_head *list, struct nfs_commit_info *cinfo) { - spin_lock(&cinfo->inode->i_lock); + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); #ifdef CONFIG_NFS_V4_1 if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); #endif nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); - spin_unlock(&cinfo->inode->i_lock); + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); } static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 109279d6d91b..34d9ebbc0dfd 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2016,6 +2016,7 @@ static void init_once(void *foo) nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->commit_info.rpcs_out, 0); init_rwsem(&nfsi->rmdir_sem); + mutex_init(&nfsi->commit_mutex); nfs4_init_once(nfsi); } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 25f28fa64c57..2cdee8ce2094 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -98,14 +98,13 @@ pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, if (!nfs_lock_request(req)) continue; kref_get(&req->wb_kref); - if (cond_resched_lock(&cinfo->inode->i_lock)) - list_safe_reset_next(req, tmp, wb_list); nfs_request_remove_commit_list(req, cinfo); clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); nfs_list_add_request(req, dst); ret++; if ((ret == max) && !cinfo->dreq) break; + cond_resched(); } return ret; } @@ -119,7 +118,7 @@ pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, struct list_head *dst = &bucket->committing; int ret; - lockdep_assert_held(&cinfo->inode->i_lock); + lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max); if (ret) { cinfo->ds->nwritten -= ret; @@ -142,7 +141,7 @@ int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, { int i, rv = 0, cnt; - lockdep_assert_held(&cinfo->inode->i_lock); + lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i], cinfo, max); @@ -162,7 +161,7 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, int nwritten; int i; - lockdep_assert_held(&cinfo->inode->i_lock); + lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); restart: for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { nwritten = pnfs_generic_transfer_commit_list(&b->written, @@ -953,12 +952,12 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, struct list_head *list; struct pnfs_commit_bucket *buckets; - spin_lock(&cinfo->inode->i_lock); + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); buckets = cinfo->ds->buckets; list = &buckets[ds_commit_idx].written; if (list_empty(list)) { if (!pnfs_is_valid_lseg(lseg)) { - spin_unlock(&cinfo->inode->i_lock); + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); cinfo->completion_ops->resched_write(cinfo, req); return; } @@ -975,7 +974,7 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, cinfo->ds->nwritten++; nfs_request_add_commit_list_locked(req, list, cinfo); - spin_unlock(&cinfo->inode->i_lock); + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); nfs_mark_page_unstable(req->wb_page, cinfo); } EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8d8fa6d4cfcc..5ab5ca24b48a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -195,7 +195,7 @@ nfs_page_find_swap_request(struct page *page) struct nfs_page *req = NULL; if (!PageSwapCache(page)) return NULL; - spin_lock(&inode->i_lock); + mutex_lock(&nfsi->commit_mutex); if (PageSwapCache(page)) { req = nfs_page_search_commits_for_head_request_locked(nfsi, page); @@ -204,7 +204,7 @@ nfs_page_find_swap_request(struct page *page) kref_get(&req->wb_kref); } } - spin_unlock(&inode->i_lock); + mutex_unlock(&nfsi->commit_mutex); return req; } @@ -856,7 +856,8 @@ nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, * number of outstanding requests requiring a commit as well as * the MM page stats. * - * The caller must hold cinfo->inode->i_lock, and the nfs_page lock. + * The caller must hold NFS_I(cinfo->inode)->commit_mutex, and the + * nfs_page lock. */ void nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, @@ -884,9 +885,9 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked); void nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo) { - spin_lock(&cinfo->inode->i_lock); + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo); - spin_unlock(&cinfo->inode->i_lock); + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); if (req->wb_page) nfs_mark_page_unstable(req->wb_page, cinfo); } @@ -964,11 +965,11 @@ nfs_clear_request_commit(struct nfs_page *req) struct nfs_commit_info cinfo; nfs_init_cinfo_from_inode(&cinfo, inode); - spin_lock(&inode->i_lock); + mutex_lock(&NFS_I(inode)->commit_mutex); if (!pnfs_clear_request_commit(req, &cinfo)) { nfs_request_remove_commit_list(req, &cinfo); } - spin_unlock(&inode->i_lock); + mutex_unlock(&NFS_I(inode)->commit_mutex); nfs_clear_page_commit(req->wb_page); } } @@ -1027,7 +1028,7 @@ nfs_reqs_to_commit(struct nfs_commit_info *cinfo) return cinfo->mds->ncommit; } -/* cinfo->inode->i_lock held by caller */ +/* NFS_I(cinfo->inode)->commit_mutex held by caller */ int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_commit_info *cinfo, int max) @@ -1039,13 +1040,12 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, if (!nfs_lock_request(req)) continue; kref_get(&req->wb_kref); - if (cond_resched_lock(&cinfo->inode->i_lock)) - list_safe_reset_next(req, tmp, wb_list); nfs_request_remove_commit_list(req, cinfo); nfs_list_add_request(req, dst); ret++; if ((ret == max) && !cinfo->dreq) break; + cond_resched(); } return ret; } @@ -1065,7 +1065,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, { int ret = 0; - spin_lock(&cinfo->inode->i_lock); + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); if (cinfo->mds->ncommit > 0) { const int max = INT_MAX; @@ -1073,7 +1073,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, cinfo, max); ret += pnfs_scan_commit_lists(inode, cinfo, max - ret); } - spin_unlock(&cinfo->inode->i_lock); + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); return ret; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5cc91d6381a3..121a702888b4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -163,6 +163,7 @@ struct nfs_inode { /* Readers: in-flight sillydelete RPC calls */ /* Writers: rmdir */ struct rw_semaphore rmdir_sem; + struct mutex commit_mutex; #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; -- cgit v1.2.3-58-ga151 From a6b6d5b85abf4914bbceade5dddd54c345c64136 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 Aug 2017 15:39:46 -0400 Subject: NFS: Use an atomic_long_t to count the number of requests Rather than forcing us to take the inode->i_lock just in order to bump the number. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- fs/nfs/delegation.c | 2 +- fs/nfs/inode.c | 7 +++---- fs/nfs/pagelist.c | 4 +--- fs/nfs/write.c | 18 +++++------------- include/linux/nfs_fs.h | 4 ++-- 6 files changed, 13 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 5427cdf04c5a..14358de173fb 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -51,7 +51,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, goto out_iput; res->size = i_size_read(inode); res->change_attr = delegation->change_attr; - if (nfsi->nrequests != 0) + if (nfs_have_writebacks(inode)) res->change_attr++; res->ctime = inode->i_ctime; res->mtime = inode->i_mtime; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d7df5e67b0c1..606dd3871f66 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1089,7 +1089,7 @@ bool nfs4_delegation_flush_on_close(const struct inode *inode) delegation = rcu_dereference(nfsi->delegation); if (delegation == NULL || !(delegation->type & FMODE_WRITE)) goto out; - if (nfsi->nrequests < delegation->pagemod_limit) + if (atomic_long_read(&nfsi->nrequests) < delegation->pagemod_limit) ret = false; out: rcu_read_unlock(); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 34d9ebbc0dfd..0480eb02299a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1285,7 +1285,6 @@ static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi) static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { - struct nfs_inode *nfsi = NFS_I(inode); unsigned long ret = 0; if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) @@ -1315,7 +1314,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) - && nfsi->nrequests == 0) { + && !nfs_have_writebacks(inode)) { i_size_write(inode, nfs_size_to_loff_t(fattr->size)); ret |= NFS_INO_INVALID_ATTR; } @@ -1823,7 +1822,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (new_isize != cur_isize) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ - if (nfsi->nrequests == 0 || new_isize > cur_isize) { + if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { i_size_write(inode, new_isize); if (!have_writers) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; @@ -2012,7 +2011,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_LIST_HEAD(&nfsi->commit_info.list); - nfsi->nrequests = 0; + atomic_long_set(&nfsi->nrequests, 0); nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->commit_info.rpcs_out, 0); init_rwsem(&nfsi->rmdir_sem); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index af6731dd4324..ec97c301899b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -258,9 +258,7 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) inode = page_file_mapping(req->wb_page)->host; set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); - spin_lock(&inode->i_lock); - NFS_I(inode)->nrequests++; - spin_unlock(&inode->i_lock); + atomic_long_inc(&NFS_I(inode)->nrequests); } } } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5ab5ca24b48a..08093552f115 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -434,9 +434,7 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) { nfs_release_request(subreq); - spin_lock(&inode->i_lock); - NFS_I(inode)->nrequests--; - spin_unlock(&inode->i_lock); + atomic_long_dec(&NFS_I(inode)->nrequests); } /* subreq is now totally disconnected from page group or any @@ -567,9 +565,7 @@ try_again: if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) { set_bit(PG_INODE_REF, &head->wb_flags); kref_get(&head->wb_kref); - spin_lock(&inode->i_lock); - NFS_I(inode)->nrequests++; - spin_unlock(&inode->i_lock); + atomic_long_inc(&NFS_I(inode)->nrequests); } nfs_page_group_unlock(head); @@ -755,7 +751,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfs_lock_request(req); spin_lock(&inode->i_lock); - if (!nfsi->nrequests && + if (!nfs_have_writebacks(inode) && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; /* @@ -767,7 +763,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); } - nfsi->nrequests++; + atomic_long_inc(&nfsi->nrequests); /* this a head request for a page group - mark it as having an * extra reference so sub groups can follow suit. * This flag also informs pgio layer when to bump nrequests when @@ -786,6 +782,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *head; + atomic_long_dec(&nfsi->nrequests); if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { head = req->wb_head; @@ -795,11 +792,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) ClearPagePrivate(head->wb_page); clear_bit(PG_MAPPED, &head->wb_flags); } - nfsi->nrequests--; - spin_unlock(&inode->i_lock); - } else { - spin_lock(&inode->i_lock); - nfsi->nrequests--; spin_unlock(&inode->i_lock); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 121a702888b4..238fdc4c46df 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -154,7 +154,7 @@ struct nfs_inode { */ __be32 cookieverf[2]; - unsigned long nrequests; + atomic_long_t nrequests; struct nfs_mds_commit_info commit_info; /* Open contexts for shared mmap writes */ @@ -511,7 +511,7 @@ extern void nfs_commit_free(struct nfs_commit_data *data); static inline int nfs_have_writebacks(struct inode *inode) { - return NFS_I(inode)->nrequests != 0; + return atomic_long_read(&NFS_I(inode)->nrequests) != 0; } /* -- cgit v1.2.3-58-ga151 From 5cb953d4b1e70a09084f71594c45d47458346bc2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 Aug 2017 17:04:12 -0400 Subject: NFS: Use an atomic_long_t to count the number of commits Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- fs/nfs/write.c | 12 +++++++----- include/linux/nfs_xdr.h | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0480eb02299a..134d9f560240 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2012,7 +2012,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_LIST_HEAD(&nfsi->commit_info.list); atomic_long_set(&nfsi->nrequests, 0); - nfsi->commit_info.ncommit = 0; + atomic_long_set(&nfsi->commit_info.ncommit, 0); atomic_set(&nfsi->commit_info.rpcs_out, 0); init_rwsem(&nfsi->rmdir_sem); mutex_init(&nfsi->commit_mutex); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 08093552f115..12479c25028e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -857,7 +857,7 @@ nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, { set_bit(PG_CLEAN, &req->wb_flags); nfs_list_add_request(req, dst); - cinfo->mds->ncommit++; + atomic_long_inc(&cinfo->mds->ncommit); } EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked); @@ -903,7 +903,7 @@ nfs_request_remove_commit_list(struct nfs_page *req, if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) return; nfs_list_remove_request(req); - cinfo->mds->ncommit--; + atomic_long_dec(&cinfo->mds->ncommit); } EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); @@ -1017,7 +1017,7 @@ out: unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { - return cinfo->mds->ncommit; + return atomic_long_read(&cinfo->mds->ncommit); } /* NFS_I(cinfo->inode)->commit_mutex held by caller */ @@ -1057,8 +1057,10 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, { int ret = 0; + if (!atomic_long_read(&cinfo->mds->ncommit)) + return 0; mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); - if (cinfo->mds->ncommit > 0) { + if (atomic_long_read(&cinfo->mds->ncommit) > 0) { const int max = INT_MAX; ret = nfs_scan_commit_list(&cinfo->mds->list, dst, @@ -1890,7 +1892,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) int ret = 0; /* no commits means nothing needs to be done */ - if (!nfsi->commit_info.ncommit) + if (!atomic_long_read(&nfsi->commit_info.ncommit)) return ret; if (wbc->sync_mode == WB_SYNC_NONE) { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 62cbcb842f99..164d5359d4ab 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1476,7 +1476,7 @@ struct nfs_pgio_header { struct nfs_mds_commit_info { atomic_t rpcs_out; - unsigned long ncommit; + atomic_long_t ncommit; struct list_head list; }; -- cgit v1.2.3-58-ga151 From 729749bb8da186e68d97d1b0439f0b1e0059c41d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 13 Aug 2017 10:03:59 -0400 Subject: SUNRPC: Don't hold the transport lock across socket copy operations Instead add a mechanism to ensure that the request doesn't disappear from underneath us while copying from the socket. We do this by preventing xprt_release() from freeing the XDR buffers until the flag RPC_TASK_MSG_RECV has been cleared from the request. Signed-off-by: Trond Myklebust Reviewed-by: Chuck Lever --- include/linux/sunrpc/sched.h | 2 ++ include/linux/sunrpc/xprt.h | 2 ++ net/sunrpc/xprt.c | 43 +++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtsock.c | 23 ++++++++++++++++++----- 4 files changed, 65 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 50a99a117da7..c1768f9d993b 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -139,6 +139,8 @@ struct rpc_task_setup { #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 #define RPC_TASK_ACTIVE 2 +#define RPC_TASK_MSG_RECV 3 +#define RPC_TASK_MSG_RECV_WAIT 4 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index eab1c749e192..65b9e0224753 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -372,6 +372,8 @@ void xprt_write_space(struct rpc_xprt *xprt); void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); void xprt_complete_rqst(struct rpc_task *task, int copied); +void xprt_pin_rqst(struct rpc_rqst *req); +void xprt_unpin_rqst(struct rpc_rqst *req); void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 4654a9934269..3eb9ec16eec4 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -844,6 +844,48 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) } EXPORT_SYMBOL_GPL(xprt_lookup_rqst); +/** + * xprt_pin_rqst - Pin a request on the transport receive list + * @req: Request to pin + * + * Caller must ensure this is atomic with the call to xprt_lookup_rqst() + * so should be holding the xprt transport lock. + */ +void xprt_pin_rqst(struct rpc_rqst *req) +{ + set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate); +} + +/** + * xprt_unpin_rqst - Unpin a request on the transport receive list + * @req: Request to pin + * + * Caller should be holding the xprt transport lock. + */ +void xprt_unpin_rqst(struct rpc_rqst *req) +{ + struct rpc_task *task = req->rq_task; + + clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate); + if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate)) + wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV); +} + +static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req) +__must_hold(&req->rq_xprt->transport_lock) +{ + struct rpc_task *task = req->rq_task; + + if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) { + spin_unlock_bh(&req->rq_xprt->transport_lock); + set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate); + wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV, + TASK_UNINTERRUPTIBLE); + clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate); + spin_lock_bh(&req->rq_xprt->transport_lock); + } +} + static void xprt_update_rtt(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; @@ -1295,6 +1337,7 @@ void xprt_release(struct rpc_task *task) list_del(&req->rq_list); xprt->last_used = jiffies; xprt_schedule_autodisconnect(xprt); + xprt_wait_on_pinned_rqst(req); spin_unlock_bh(&xprt->transport_lock); if (req->rq_buffer) xprt->ops->buf_free(task); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4f154d388748..04dbc7027712 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -973,6 +973,8 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt, rovr = xprt_lookup_rqst(xprt, *xp); if (!rovr) goto out_unlock; + xprt_pin_rqst(rovr); + spin_unlock_bh(&xprt->transport_lock); task = rovr->rq_task; copied = rovr->rq_private_buf.buflen; @@ -981,11 +983,14 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt, if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) { dprintk("RPC: sk_buff copy failed\n"); - goto out_unlock; + spin_lock_bh(&xprt->transport_lock); + goto out_unpin; } + spin_lock_bh(&xprt->transport_lock); xprt_complete_rqst(task, copied); - +out_unpin: + xprt_unpin_rqst(rovr); out_unlock: spin_unlock_bh(&xprt->transport_lock); } @@ -1054,6 +1059,8 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, rovr = xprt_lookup_rqst(xprt, *xp); if (!rovr) goto out_unlock; + xprt_pin_rqst(rovr); + spin_unlock_bh(&xprt->transport_lock); task = rovr->rq_task; if ((copied = rovr->rq_private_buf.buflen) > repsize) @@ -1062,14 +1069,17 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, /* Suck it into the iovec, verify checksum if not done by hw. */ if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); - goto out_unlock; + spin_lock_bh(&xprt->transport_lock); + goto out_unpin; } __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); + spin_lock_bh(&xprt->transport_lock); xprt_adjust_cwnd(xprt, task, copied); xprt_complete_rqst(task, copied); - +out_unpin: + xprt_unpin_rqst(rovr); out_unlock: spin_unlock_bh(&xprt->transport_lock); } @@ -1351,12 +1361,15 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, spin_unlock_bh(&xprt->transport_lock); return -1; } + xprt_pin_rqst(req); + spin_unlock_bh(&xprt->transport_lock); xs_tcp_read_common(xprt, desc, req); + spin_lock_bh(&xprt->transport_lock); if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) xprt_complete_rqst(req->rq_task, transport->tcp_copied); - + xprt_unpin_rqst(req); spin_unlock_bh(&xprt->transport_lock); return 0; } -- cgit v1.2.3-58-ga151 From ce7c252a8c741aba7c38f817b86e34361f561e42 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Aug 2017 15:30:35 -0400 Subject: SUNRPC: Add a separate spinlock to protect the RPC request receive list This further reduces contention with the transport_lock, and allows us to convert to using a non-bh-safe spinlock, since the list is now never accessed from a bh context. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/svcsock.c | 6 +++--- net/sunrpc/xprt.c | 20 ++++++++++++-------- net/sunrpc/xprtrdma/rpc_rdma.c | 8 ++++---- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 7 +++++-- net/sunrpc/xprtsock.c | 30 ++++++++++++++++-------------- 6 files changed, 41 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 65b9e0224753..a97e6de5f9f2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -232,6 +232,7 @@ struct rpc_xprt { */ spinlock_t transport_lock; /* lock transport info */ spinlock_t reserve_lock; /* lock slot table */ + spinlock_t recv_lock; /* lock receive list */ u32 xid; /* Next XID value to use */ struct rpc_task * snd_task; /* Task blocked in send */ struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 2b720fa35c4f..272063ca81e8 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1001,7 +1001,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) if (!bc_xprt) return -EAGAIN; - spin_lock_bh(&bc_xprt->transport_lock); + spin_lock(&bc_xprt->recv_lock); req = xprt_lookup_rqst(bc_xprt, xid); if (!req) goto unlock_notfound; @@ -1019,7 +1019,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) memcpy(dst->iov_base, src->iov_base, src->iov_len); xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); rqstp->rq_arg.len = 0; - spin_unlock_bh(&bc_xprt->transport_lock); + spin_unlock(&bc_xprt->recv_lock); return 0; unlock_notfound: printk(KERN_NOTICE @@ -1028,7 +1028,7 @@ unlock_notfound: __func__, ntohl(calldir), bc_xprt, ntohl(xid)); unlock_eagain: - spin_unlock_bh(&bc_xprt->transport_lock); + spin_unlock(&bc_xprt->recv_lock); return -EAGAIN; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3eb9ec16eec4..2af189c5ac3e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -872,17 +872,17 @@ void xprt_unpin_rqst(struct rpc_rqst *req) } static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req) -__must_hold(&req->rq_xprt->transport_lock) +__must_hold(&req->rq_xprt->recv_lock) { struct rpc_task *task = req->rq_task; if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) { - spin_unlock_bh(&req->rq_xprt->transport_lock); + spin_unlock(&req->rq_xprt->recv_lock); set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate); wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV, TASK_UNINTERRUPTIBLE); clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate); - spin_lock_bh(&req->rq_xprt->transport_lock); + spin_lock(&req->rq_xprt->recv_lock); } } @@ -1008,13 +1008,13 @@ void xprt_transmit(struct rpc_task *task) /* * Add to the list only if we're expecting a reply */ - spin_lock_bh(&xprt->transport_lock); /* Update the softirq receive buffer */ memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(req->rq_private_buf)); /* Add request to the receive list */ + spin_lock(&xprt->recv_lock); list_add_tail(&req->rq_list, &xprt->recv); - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); xprt_reset_majortimeo(req); /* Turn off autodisconnect */ del_singleshot_timer_sync(&xprt->timer); @@ -1329,15 +1329,18 @@ void xprt_release(struct rpc_task *task) task->tk_ops->rpc_count_stats(task, task->tk_calldata); else if (task->tk_client) rpc_count_iostats(task, task->tk_client->cl_metrics); + spin_lock(&xprt->recv_lock); + if (!list_empty(&req->rq_list)) { + list_del(&req->rq_list); + xprt_wait_on_pinned_rqst(req); + } + spin_unlock(&xprt->recv_lock); spin_lock_bh(&xprt->transport_lock); xprt->ops->release_xprt(xprt, task); if (xprt->ops->release_request) xprt->ops->release_request(task); - if (!list_empty(&req->rq_list)) - list_del(&req->rq_list); xprt->last_used = jiffies; xprt_schedule_autodisconnect(xprt); - xprt_wait_on_pinned_rqst(req); spin_unlock_bh(&xprt->transport_lock); if (req->rq_buffer) xprt->ops->buf_free(task); @@ -1361,6 +1364,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); + spin_lock_init(&xprt->recv_lock); INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index ca4d6e4528f3..dfa748a0c8de 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1051,7 +1051,7 @@ rpcrdma_reply_handler(struct work_struct *work) * RPC completion while holding the transport lock to ensure * the rep, rqst, and rq_task pointers remain stable. */ - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); rqst = xprt_lookup_rqst(xprt, headerp->rm_xid); if (!rqst) goto out_norqst; @@ -1136,7 +1136,7 @@ out: xprt_release_rqst_cong(rqst->rq_task); xprt_complete_rqst(rqst->rq_task, status); - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", __func__, xprt, rqst, status); return; @@ -1187,12 +1187,12 @@ out_rdmaerr: r_xprt->rx_stats.bad_reply_count++; goto out; -/* The req was still available, but by the time the transport_lock +/* The req was still available, but by the time the recv_lock * was acquired, the rqst and task had been released. Thus the RPC * has already been terminated. */ out_norqst: - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); rpcrdma_buffer_put(req); dprintk("RPC: %s: race, no rqst left for req %p\n", __func__, req); diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index c676ed0efb5a..0d574cda242d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -52,7 +52,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, if (src->iov_len < 24) goto out_shortreply; - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); req = xprt_lookup_rqst(xprt, xid); if (!req) goto out_notfound; @@ -69,17 +69,20 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, else if (credits > r_xprt->rx_buf.rb_bc_max_requests) credits = r_xprt->rx_buf.rb_bc_max_requests; + spin_lock_bh(&xprt->transport_lock); cwnd = xprt->cwnd; xprt->cwnd = credits << RPC_CWNDSHIFT; if (xprt->cwnd > cwnd) xprt_release_rqst_cong(req->rq_task); + spin_unlock_bh(&xprt->transport_lock); + ret = 0; xprt_complete_rqst(req->rq_task, rcvbuf->len); rcvbuf->len = 0; out_unlock: - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); out: return ret; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a344bea15fc7..2b918137aaa0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -969,12 +969,12 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt, return; /* Look up and lock the request corresponding to the given XID */ - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); rovr = xprt_lookup_rqst(xprt, *xp); if (!rovr) goto out_unlock; xprt_pin_rqst(rovr); - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); task = rovr->rq_task; copied = rovr->rq_private_buf.buflen; @@ -983,16 +983,16 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt, if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) { dprintk("RPC: sk_buff copy failed\n"); - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); goto out_unpin; } - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); xprt_complete_rqst(task, copied); out_unpin: xprt_unpin_rqst(rovr); out_unlock: - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); } static void xs_local_data_receive(struct sock_xprt *transport) @@ -1055,12 +1055,12 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, return; /* Look up and lock the request corresponding to the given XID */ - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); rovr = xprt_lookup_rqst(xprt, *xp); if (!rovr) goto out_unlock; xprt_pin_rqst(rovr); - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); task = rovr->rq_task; if ((copied = rovr->rq_private_buf.buflen) > repsize) @@ -1069,7 +1069,7 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, /* Suck it into the iovec, verify checksum if not done by hw. */ if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); goto out_unpin; } @@ -1077,11 +1077,13 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, spin_lock_bh(&xprt->transport_lock); xprt_adjust_cwnd(xprt, task, copied); + spin_unlock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); xprt_complete_rqst(task, copied); out_unpin: xprt_unpin_rqst(rovr); out_unlock: - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); } static void xs_udp_data_receive(struct sock_xprt *transport) @@ -1344,24 +1346,24 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); /* Find and lock the request corresponding to this xid */ - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); req = xprt_lookup_rqst(xprt, transport->tcp_xid); if (!req) { dprintk("RPC: XID %08x request not found!\n", ntohl(transport->tcp_xid)); - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); return -1; } xprt_pin_rqst(req); - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); xs_tcp_read_common(xprt, desc, req); - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) xprt_complete_rqst(req->rq_task, transport->tcp_copied); xprt_unpin_rqst(req); - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); return 0; } -- cgit v1.2.3-58-ga151 From 3bde7afdabe9f37974af806abe646c2ca43c67c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Aug 2017 11:33:25 -0400 Subject: NFS: Remove unused parameter gfp_flags from nfs_pageio_init() Now that the mirror allocation has been moved, the parameter can go. Also remove the redundant symbol export. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 4 +--- fs/nfs/read.c | 2 +- fs/nfs/write.c | 2 +- include/linux/nfs_page.h | 3 +-- 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index b7d193e2a243..ee66d8c3336c 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -711,8 +711,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, const struct nfs_pgio_completion_ops *compl_ops, const struct nfs_rw_ops *rw_ops, size_t bsize, - int io_flags, - gfp_t gfp_flags) + int io_flags) { desc->pg_moreio = 0; desc->pg_inode = inode; @@ -733,7 +732,6 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_mirrors = desc->pg_mirrors_static; nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); } -EXPORT_SYMBOL_GPL(nfs_pageio_init); /** * nfs_pgio_result - Basic pageio error handling diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a8421d9dab6a..0d42573d423d 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -68,7 +68,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, pg_ops = server->pnfs_curr_ld->pg_read_ops; #endif nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops, - server->rsize, 0, GFP_KERNEL); + server->rsize, 0); } EXPORT_SYMBOL_GPL(nfs_pageio_init_read); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b1af5dee5e0a..ae78ac0a7a8a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1452,7 +1452,7 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, pg_ops = server->pnfs_curr_ld->pg_write_ops; #endif nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops, - server->wsize, ioflags, GFP_NOIO); + server->wsize, ioflags); } EXPORT_SYMBOL_GPL(nfs_pageio_init_write); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index d67b67ae6c8b..8b1a35aad0c3 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -125,8 +125,7 @@ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, const struct nfs_pgio_completion_ops *compl_ops, const struct nfs_rw_ops *rw_ops, size_t bsize, - int how, - gfp_t gfp_flags); + int how); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, struct nfs_page *); extern int nfs_pageio_resend(struct nfs_pageio_descriptor *, -- cgit v1.2.3-58-ga151 From 03c6f7d64ac9c0a37cca91392ac4be8993a8f53d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 16 Aug 2017 12:47:27 +1000 Subject: NFS: remove jiffies field from access cache This field hasn't been used since commit 57b691819ee2 ("NFS: Cache access checks more aggressively"). Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 ---- fs/nfs/nfs4proc.c | 1 - include/linux/nfs_fs.h | 1 - 3 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3522b1249019..5ceaeb1f6fb6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2260,7 +2260,6 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str spin_lock(&inode->i_lock); retry = false; } - res->jiffies = cache->jiffies; res->cred = cache->cred; res->mask = cache->mask; list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru); @@ -2296,7 +2295,6 @@ static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, goto out; if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; - res->jiffies = cache->jiffies; res->cred = cache->cred; res->mask = cache->mask; err = 0; @@ -2344,7 +2342,6 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) if (cache == NULL) return; RB_CLEAR_NODE(&cache->rb_node); - cache->jiffies = set->jiffies; cache->cred = get_rpccred(set->cred); cache->mask = set->mask; @@ -2432,7 +2429,6 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE | NFS_MAY_WRITE | NFS_MAY_READ; cache.cred = cred; - cache.jiffies = jiffies; status = NFS_PROTO(inode)->access(inode, &cache); if (status != 0) { if (status == -ESTALE) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 08cc97488904..6c61e2b99635 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2285,7 +2285,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred, mask = NFS4_ACCESS_READ; cache.cred = cred; - cache.jiffies = jiffies; nfs_access_set_mask(&cache, opendata->o_res.access_result); nfs_access_add_cache(state->inode, &cache); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 238fdc4c46df..a0282ceaa48b 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -49,7 +49,6 @@ struct nfs_access_entry { struct rb_node rb_node; struct list_head lru; - unsigned long jiffies; struct rpc_cred * cred; __u32 mask; struct rcu_head rcu_head; -- cgit v1.2.3-58-ga151