diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-05 19:15:29 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-05 19:15:29 -0700 |
commit | 5e4d659713f52c1c9dfc2fea9d319b80a53d4bc9 (patch) | |
tree | 2196fadbef032074b84d74955a05ec88e89198b0 | |
parent | 274c0e74e508c939a4ae5ef3890fddb4af537b76 (diff) | |
parent | 880a3a5325489a143269a8e172e7563ebf9897bc (diff) |
Merge tag 'nfsd-4.17' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields:
"Chuck Lever did a bunch of work on nfsd tracepoints, on RDMA, and on
server xdr decoding (with an eye towards eliminating a data copy in
the RDMA case).
I did some refactoring of the delegation code in preparation for
eliminating some delegation self-conflicts and implementing write
delegations"
* tag 'nfsd-4.17' of git://linux-nfs.org/~bfields/linux: (40 commits)
nfsd: fix incorrect umasks
sunrpc: remove incorrect HMAC request initialization
NFSD: Clean up legacy NFS SYMLINK argument XDR decoders
NFSD: Clean up legacy NFS WRITE argument XDR decoders
nfsd: Trace NFSv4 COMPOUND execution
nfsd: Add I/O trace points in the NFSv4 read proc
nfsd: Add I/O trace points in the NFSv4 write path
nfsd: Add "nfsd_" to trace point names
nfsd: Record request byte count, not count of vectors
nfsd: Fix NFSD trace points
svc: Report xprt dequeue latency
sunrpc: Report per-RPC execution stats
sunrpc: Re-purpose trace_svc_process
sunrpc: Save remote presentation address in svc_xprt for trace events
sunrpc: Simplify trace_svc_recv
sunrpc: Simplify do_enqueue tracing
sunrpc: Move trace_svc_xprt_dequeue()
sunrpc: Update show_svc_xprt_flags() to include recently added flags
svc: Simplify ->xpo_secure_port
sunrpc: Remove unneeded pointer dereference
...
31 files changed, 721 insertions, 515 deletions
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 9c36d614bf89..346ed161756d 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -57,8 +57,8 @@ static struct task_struct *nlmsvc_task; static struct svc_rqst *nlmsvc_rqst; unsigned long nlmsvc_timeout; -atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0); -DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq); +static atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0); +static DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq); unsigned int lockd_net_id; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 1d0ce3c57d93..6259a4b8579f 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -192,6 +192,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp) struct nfsd3_writeres *resp = rqstp->rq_resp; __be32 nfserr; unsigned long cnt = argp->len; + unsigned int nvecs; dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n", SVCFH_fmt(&argp->fh), @@ -201,9 +202,12 @@ nfsd3_proc_write(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; + nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt); + if (!nvecs) + RETURN_STATUS(nfserr_io); nfserr = nfsd_write(rqstp, &resp->fh, argp->offset, - rqstp->rq_vec, argp->vlen, - &cnt, resp->committed); + rqstp->rq_vec, nvecs, &cnt, + resp->committed); resp->count = cnt; RETURN_STATUS(nfserr); } @@ -279,6 +283,16 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) struct nfsd3_diropres *resp = rqstp->rq_resp; __be32 nfserr; + if (argp->tlen == 0) + RETURN_STATUS(nfserr_inval); + if (argp->tlen > NFS3_MAXPATHLEN) + RETURN_STATUS(nfserr_nametoolong); + + argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, + argp->tlen); + if (IS_ERR(argp->tname)) + RETURN_STATUS(nfserrno(PTR_ERR(argp->tname))); + dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n", SVCFH_fmt(&argp->ffh), argp->flen, argp->fname, diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 1a70581e1cb2..3192b544a441 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -391,7 +391,7 @@ int nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_writeargs *args = rqstp->rq_argp; - unsigned int len, v, hdr, dlen; + unsigned int len, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); struct kvec *head = rqstp->rq_arg.head; struct kvec *tail = rqstp->rq_arg.tail; @@ -433,17 +433,9 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) args->count = max_blocksize; len = args->len = max_blocksize; } - rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = head->iov_len - hdr; - v = 0; - while (len > rqstp->rq_vec[v].iov_len) { - len -= rqstp->rq_vec[v].iov_len; - v++; - rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]); - rqstp->rq_vec[v].iov_len = PAGE_SIZE; - } - rqstp->rq_vec[v].iov_len = len; - args->vlen = v + 1; + + args->first.iov_base = (void *)p; + args->first.iov_len = head->iov_len - hdr; return 1; } @@ -489,51 +481,24 @@ int nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_symlinkargs *args = rqstp->rq_argp; - unsigned int len, avail; - char *old, *new; - struct kvec *vec; + char *base = (char *)p; + size_t dlen; if (!(p = decode_fh(p, &args->ffh)) || - !(p = decode_filename(p, &args->fname, &args->flen)) - ) + !(p = decode_filename(p, &args->fname, &args->flen))) return 0; p = decode_sattr3(p, &args->attrs); - /* now decode the pathname, which might be larger than the first page. - * As we have to check for nul's anyway, we copy it into a new page - * This page appears in the rq_res.pages list, but as pages_len is always - * 0, it won't get in the way - */ - len = ntohl(*p++); - if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) - return 0; - args->tname = new = page_address(*(rqstp->rq_next_page++)); - args->tlen = len; - /* first copy and check from the first page */ - old = (char*)p; - vec = &rqstp->rq_arg.head[0]; - if ((void *)old > vec->iov_base + vec->iov_len) - return 0; - avail = vec->iov_len - (old - (char*)vec->iov_base); - while (len && avail && *old) { - *new++ = *old++; - len--; - avail--; - } - /* now copy next page if there is one */ - if (len && !avail && rqstp->rq_arg.page_len) { - avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE); - old = page_address(rqstp->rq_arg.pages[0]); - } - while (len && avail && *old) { - *new++ = *old++; - len--; - avail--; - } - *new = '\0'; - if (len) - return 0; + args->tlen = ntohl(*p++); + + args->first.iov_base = p; + args->first.iov_len = rqstp->rq_arg.head[0].iov_len; + args->first.iov_len -= (char *)p - base; + dlen = args->first.iov_len + rqstp->rq_arg.page_len + + rqstp->rq_arg.tail[0].iov_len; + if (dlen < XDR_QUADLEN(args->tlen) << 2) + return 0; return 1; } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 49b0a9e7ff18..1f04d2a70d25 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -223,8 +223,8 @@ static int nfs_cb_stat_to_errno(int status) return -status; } -static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected, - int *status) +static int decode_cb_op_status(struct xdr_stream *xdr, + enum nfs_cb_opnum4 expected, int *status) { __be32 *p; u32 op; diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 7d888369f85a..228faf00a594 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -165,7 +165,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid) struct nfs4_client *clp = ls->ls_stid.sc_client; struct nfs4_file *fp = ls->ls_stid.sc_file; - trace_layoutstate_free(&ls->ls_stid.sc_stateid); + trace_nfsd_layoutstate_free(&ls->ls_stid.sc_stateid); spin_lock(&clp->cl_lock); list_del_init(&ls->ls_perclnt); @@ -264,7 +264,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, list_add(&ls->ls_perfile, &fp->fi_lo_states); spin_unlock(&fp->fi_lock); - trace_layoutstate_alloc(&ls->ls_stid.sc_stateid); + trace_nfsd_layoutstate_alloc(&ls->ls_stid.sc_stateid); return ls; } @@ -334,7 +334,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) if (list_empty(&ls->ls_layouts)) goto out_unlock; - trace_layout_recall(&ls->ls_stid.sc_stateid); + trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid); refcount_inc(&ls->ls_stid.sc_count); nfsd4_run_cb(&ls->ls_recall); @@ -507,7 +507,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, false, lrp->lr_layout_type, &ls); if (nfserr) { - trace_layout_return_lookup_fail(&lrp->lr_sid); + trace_nfsd_layout_return_lookup_fail(&lrp->lr_sid); return nfserr; } @@ -523,7 +523,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid); lrp->lrs_present = 1; } else { - trace_layoutstate_unhash(&ls->ls_stid.sc_stateid); + trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid); nfs4_unhash_stid(&ls->ls_stid); lrp->lrs_present = 0; } @@ -694,7 +694,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) /* * Unknown error or non-responding client, we'll need to fence. */ - trace_layout_recall_fail(&ls->ls_stid.sc_stateid); + trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid); ops = nfsd4_layout_ops[ls->ls_layout_type]; if (ops->fence_client) @@ -703,7 +703,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) nfsd4_cb_layout_fail(ls); return -1; case -NFS4ERR_NOMATCHING_LAYOUT: - trace_layout_recall_done(&ls->ls_stid.sc_stateid); + trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid); task->tk_status = 0; return 1; } @@ -716,7 +716,7 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb) container_of(cb, struct nfs4_layout_stateid, ls_recall); LIST_HEAD(reaplist); - trace_layout_recall_release(&ls->ls_stid.sc_stateid); + trace_nfsd_layout_recall_release(&ls->ls_stid.sc_stateid); nfsd4_return_all_layouts(ls, &reaplist); nfsd4_free_layouts(&reaplist); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a0bed2b2004d..5d99e8810b85 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -32,6 +32,7 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/fs_struct.h> #include <linux/file.h> #include <linux/falloc.h> #include <linux/slab.h> @@ -252,11 +253,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru * Note: create modes (UNCHECKED,GUARDED...) are the same * in NFSv4 as in v3 except EXCLUSIVE4_1. */ + current->fs->umask = open->op_umask; status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, open->op_fname.len, &open->op_iattr, *resfh, open->op_createmode, (u32 *)open->op_verf.data, &open->op_truncate, &open->op_created); + current->fs->umask = 0; if (!status && open->op_label.len) nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); @@ -603,6 +606,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; + current->fs->umask = create->cr_umask; switch (create->cr_type) { case NF4LNK: status = nfsd_symlink(rqstp, &cstate->current_fh, @@ -611,20 +615,22 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; case NF4BLK: + status = nfserr_inval; rdev = MKDEV(create->cr_specdata1, create->cr_specdata2); if (MAJOR(rdev) != create->cr_specdata1 || MINOR(rdev) != create->cr_specdata2) - return nfserr_inval; + goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, &create->cr_iattr, S_IFBLK, rdev, &resfh); break; case NF4CHR: + status = nfserr_inval; rdev = MKDEV(create->cr_specdata1, create->cr_specdata2); if (MAJOR(rdev) != create->cr_specdata1 || MINOR(rdev) != create->cr_specdata2) - return nfserr_inval; + goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, &create->cr_iattr,S_IFCHR, rdev, &resfh); @@ -668,6 +674,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fh_dup2(&cstate->current_fh, &resfh); out: fh_put(&resfh); +out_umask: + current->fs->umask = 0; return status; } @@ -751,6 +759,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (read->rd_offset >= OFFSET_MAX) return nfserr_inval; + trace_nfsd_read_start(rqstp, &cstate->current_fh, + read->rd_offset, read->rd_length); + /* * If we do a zero copy read, then a client will see read data * that reflects the state of the file *after* performing the @@ -783,6 +794,8 @@ nfsd4_read_release(union nfsd4_op_u *u) { if (u->read.rd_filp) fput(u->read.rd_filp); + trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, + u->read.rd_offset, u->read.rd_length); } static __be32 @@ -1001,6 +1014,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (write->wr_offset >= OFFSET_MAX) return nfserr_inval; + cnt = write->wr_buflen; + trace_nfsd_write_start(rqstp, &cstate->current_fh, + write->wr_offset, cnt); status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, stateid, WR_STATE, &filp, NULL); if (status) { @@ -1008,7 +1024,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } - cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp)); @@ -1021,7 +1036,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fput(filp); write->wr_bytes_written = cnt; - + trace_nfsd_write_done(rqstp, &cstate->current_fh, + write->wr_offset, cnt); return status; } @@ -1106,7 +1122,6 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, else { copy->cp_res.wr_bytes_written = bytes; copy->cp_res.wr_stable_how = NFS_UNSTABLE; - copy->cp_consecutive = 1; copy->cp_synchronous = 1; gen_boot_verifier(©->cp_res.wr_verifier, SVC_NET(rqstp)); status = nfs_ok; @@ -1412,7 +1427,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp, nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid, true, lgp->lg_layout_type, &ls); if (nfserr) { - trace_layout_get_lookup_fail(&lgp->lg_sid); + trace_nfsd_layout_get_lookup_fail(&lgp->lg_sid); goto out; } @@ -1481,7 +1496,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp, false, lcp->lc_layout_type, &ls); if (nfserr) { - trace_layout_commit_lookup_fail(&lcp->lc_sid); + trace_nfsd_layout_commit_lookup_fail(&lcp->lc_sid); /* fixup error code as per RFC5661 */ if (nfserr == nfserr_bad_stateid) nfserr = nfserr_badlayout; @@ -1714,12 +1729,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) goto encode_op; } + trace_nfsd_compound(rqstp, args->opcnt); while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; - dprintk("nfsv4 compound op #%d/%d: %d (%s)\n", - resp->opcnt, args->opcnt, op->opnum, - nfsd4_op_name(op->opnum)); /* * The XDR decode routines may have pre-set op->status; * for example, if there is a miscellaneous XDR error @@ -1793,9 +1806,8 @@ encode_op: status = op->status; } - dprintk("nfsv4 compound op %p opcnt %d #%d: %d: status %d\n", - args->ops, args->opcnt, resp->opcnt, op->opnum, - be32_to_cpu(status)); + trace_nfsd_compound_status(args->opcnt, resp->opcnt, status, + nfsd4_op_name(op->opnum)); nfsd4_cstate_clear_replay(cstate); nfsd4_increment_op_stats(op->opnum); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 61b770e39809..fc74d6f46bd5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -98,6 +98,7 @@ enum nfsd4_st_mutex_lock_subclass { */ static DECLARE_WAIT_QUEUE_HEAD(close_wq); +static struct kmem_cache *client_slab; static struct kmem_cache *openowner_slab; static struct kmem_cache *lockowner_slab; static struct kmem_cache *file_slab; @@ -806,7 +807,8 @@ static void block_delegations(struct knfsd_fh *fh) } static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh, +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, + struct svc_fh *current_fh, struct nfs4_clnt_odstate *odstate) { struct nfs4_delegation *dp; @@ -837,6 +839,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh, dp->dl_retries = 1; nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); + get_nfs4_file(fp); + dp->dl_stid.sc_file = fp; return dp; out_dec: atomic_long_dec(&num_delegations); @@ -874,19 +878,35 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid) spin_unlock(&stid->sc_lock); } -static void nfs4_put_deleg_lease(struct nfs4_file *fp) +static void put_deleg_file(struct nfs4_file *fp) { struct file *filp = NULL; spin_lock(&fp->fi_lock); - if (fp->fi_deleg_file && --fp->fi_delegees == 0) + if (--fp->fi_delegees == 0) swap(filp, fp->fi_deleg_file); spin_unlock(&fp->fi_lock); - if (filp) { - vfs_setlease(filp, F_UNLCK, NULL, (void **)&fp); + if (filp) fput(filp); - } +} + +static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp) +{ + struct nfs4_file *fp = dp->dl_stid.sc_file; + struct file *filp = fp->fi_deleg_file; + + WARN_ON_ONCE(!fp->fi_delegees); + + vfs_setlease(filp, F_UNLCK, NULL, (void **)&dp); + put_deleg_file(fp); +} + +static void destroy_unhashed_deleg(struct nfs4_delegation *dp) +{ + put_clnt_odstate(dp->dl_clnt_odstate); + nfs4_unlock_deleg_lease(dp); + nfs4_put_stid(&dp->dl_stid); } void nfs4_unhash_stid(struct nfs4_stid *s) @@ -895,20 +915,16 @@ void nfs4_unhash_stid(struct nfs4_stid *s) } /** - * nfs4_get_existing_delegation - Discover if this delegation already exists + * nfs4_delegation_exists - Discover if this delegation already exists * @clp: a pointer to the nfs4_client we're granting a delegation to * @fp: a pointer to the nfs4_file we're granting a delegation on * * Return: - * On success: NULL if an existing delegation was not found. - * - * On error: -EAGAIN if one was previously granted to this nfs4_client - * for this nfs4_file. - * + * On success: true iff an existing delegation is found */ -static int -nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) +static bool +nfs4_delegation_exists(struct nfs4_client *clp, struct nfs4_file *fp) { struct nfs4_delegation *searchdp = NULL; struct nfs4_client *searchclp = NULL; @@ -919,10 +935,10 @@ nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { searchclp = searchdp->dl_stid.sc_client; if (clp == searchclp) { - return -EAGAIN; + return true; } } - return 0; + return false; } /** @@ -941,16 +957,13 @@ nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) static int hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { - int status; struct nfs4_client *clp = dp->dl_stid.sc_client; lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); - status = nfs4_get_existing_delegation(clp, fp); - if (status) - return status; - ++fp->fi_delegees; + if (nfs4_delegation_exists(clp, fp)) + return -EAGAIN; refcount_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); @@ -986,11 +999,8 @@ static void destroy_delegation(struct nfs4_delegation *dp) spin_lock(&state_lock); unhashed = unhash_delegation_locked(dp); spin_unlock(&state_lock); - if (unhashed) { - put_clnt_odstate(dp->dl_clnt_odstate); - nfs4_put_deleg_lease(dp->dl_stid.sc_file); - nfs4_put_stid(&dp->dl_stid); - } + if (unhashed) + destroy_unhashed_deleg(dp); } static void revoke_delegation(struct nfs4_delegation *dp) @@ -999,17 +1009,14 @@ static void revoke_delegation(struct nfs4_delegation *dp) WARN_ON(!list_empty(&dp->dl_recall_lru)); - put_clnt_odstate(dp->dl_clnt_odstate); - nfs4_put_deleg_lease(dp->dl_stid.sc_file); - - if (clp->cl_minorversion == 0) - nfs4_put_stid(&dp->dl_stid); - else { + if (clp->cl_minorversion) { dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; + refcount_inc(&dp->dl_stid.sc_count); spin_lock(&clp->cl_lock); list_add(&dp->dl_recall_lru, &clp->cl_revoked); spin_unlock(&clp->cl_lock); } + destroy_unhashed_deleg(dp); } /* @@ -1794,7 +1801,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) struct nfs4_client *clp; int i; - clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); + clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); @@ -1825,7 +1832,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) err_no_hashtbl: kfree(clp->cl_name.data); err_no_name: - kfree(clp); + kmem_cache_free(client_slab, clp); return NULL; } @@ -1845,7 +1852,7 @@ free_client(struct nfs4_client *clp) kfree(clp->cl_ownerstr_hashtbl); kfree(clp->cl_name.data); idr_destroy(&clp->cl_stateids); - kfree(clp); + kmem_cache_free(client_slab, clp); } /* must be called under the client_lock */ @@ -1911,9 +1918,7 @@ __destroy_client(struct nfs4_client *clp) while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); - put_clnt_odstate(dp->dl_clnt_odstate); - nfs4_put_deleg_lease(dp->dl_stid.sc_file); - nfs4_put_stid(&dp->dl_stid); + destroy_unhashed_deleg(dp); } while (!list_empty(&clp->cl_revoked)) { dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru); @@ -2953,7 +2958,7 @@ out_no_session: static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) { if (!session) - return 0; + return false; return !memcmp(sid, &session->se_sessionid, sizeof(*sid)); } @@ -3471,21 +3476,26 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, void nfsd4_free_slabs(void) { - kmem_cache_destroy(odstate_slab); + kmem_cache_destroy(client_slab); kmem_cache_destroy(openowner_slab); kmem_cache_destroy(lockowner_slab); kmem_cache_destroy(file_slab); kmem_cache_destroy(stateid_slab); kmem_cache_destroy(deleg_slab); + kmem_cache_destroy(odstate_slab); } int nfsd4_init_slabs(void) { + client_slab = kmem_cache_create("nfsd4_clients", + sizeof(struct nfs4_client), 0, 0, NULL); + if (client_slab == NULL) + goto out; openowner_slab = kmem_cache_create("nfsd4_openowners", sizeof(struct nfs4_openowner), 0, 0, NULL); if (openowner_slab == NULL) - goto out; + goto out_free_client_slab; lockowner_slab = kmem_cache_create("nfsd4_lockowners", sizeof(struct nfs4_lockowner), 0, 0, NULL); if (lockowner_slab == NULL) @@ -3518,6 +3528,8 @@ out_free_lockowner_slab: kmem_cache_destroy(lockowner_slab); out_free_openowner_slab: kmem_cache_destroy(openowner_slab); +out_free_client_slab: + kmem_cache_destroy(client_slab); out: dprintk("nfsd4: out of memory while initializing nfsv4\n"); return -ENOMEM; @@ -3945,17 +3957,9 @@ static bool nfsd_break_deleg_cb(struct file_lock *fl) { bool ret = false; - struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; - struct nfs4_delegation *dp; + struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; + struct nfs4_file *fp = dp->dl_stid.sc_file; - if (!fp) { - WARN(1, "(%p)->fl_owner NULL\n", fl); - return ret; - } - if (fp->fi_had_conflict) { - WARN(1, "duplicate break on %p\n", fp); - return ret; - } /* * We don't want the locks code to timeout the lease for us; * we'll remove it ourself if a delegation isn't returned @@ -3965,15 +3969,7 @@ nfsd_break_deleg_cb(struct file_lock *fl) spin_lock(&fp->fi_lock); fp->fi_had_conflict = true; - /* - * If there are no delegations on the list, then return true - * so that the lease code will go ahead and delete it. - */ - if (list_empty(&fp->fi_delegations)) - ret = true; - else - list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) - nfsd_break_one_deleg(dp); + nfsd_break_one_deleg(dp); spin_unlock(&fp->fi_lock); return ret; } @@ -4297,7 +4293,8 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; } -static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) +static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, + int flag) { struct file_lock *fl; @@ -4308,124 +4305,88 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) fl->fl_flags = FL_DELEG; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl->fl_end = OFFSET_MAX; - fl->fl_owner = (fl_owner_t)fp; + fl->fl_owner = (fl_owner_t)dp; fl->fl_pid = current->tgid; + fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file; return fl; } -/** - * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer - * @dp: a pointer to the nfs4_delegation we're adding. - * - * Return: - * On success: Return code will be 0 on success. - * - * On error: -EAGAIN if there was an existing delegation. - * nonzero if there is an error in other cases. - * - */ - -static int nfs4_setlease(struct nfs4_delegation *dp) -{ - struct nfs4_file *fp = dp->dl_stid.sc_file; - struct file_lock *fl; - struct file *filp; - int status = 0; - - fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ); - if (!fl) - return -ENOMEM; - filp = find_readable_file(fp); - if (!filp) { - /* We should always have a readable file here */ - WARN_ON_ONCE(1); - locks_free_lock(fl); - return -EBADF; - } - fl->fl_file = filp; - status = vfs_setlease(filp, fl->fl_type, &fl, NULL); - if (fl) - locks_free_lock(fl); - if (status) - goto out_fput; - spin_lock(&state_lock); - spin_lock(&fp->fi_lock); - /* Did the lease get broken before we took the lock? */ - status = -EAGAIN; - if (fp->fi_had_conflict) - goto out_unlock; - /* Race breaker */ - if (fp->fi_deleg_file) { - status = hash_delegation_locked(dp, fp); - goto out_unlock; - } - fp->fi_deleg_file = filp; - fp->fi_delegees = 0; - status = hash_delegation_locked(dp, fp); - spin_unlock(&fp->fi_lock); - spin_unlock(&state_lock); - if (status) { - /* Should never happen, this is a new fi_deleg_file */ - WARN_ON_ONCE(1); - goto out_fput; - } - return 0; -out_unlock: - spin_unlock(&fp->fi_lock); - spin_unlock(&state_lock); -out_fput: - fput(filp); - return status; -} - static struct nfs4_delegation * nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) { - int status; + int status = 0; struct nfs4_delegation *dp; + struct file *filp; + struct file_lock *fl; + /* + * The fi_had_conflict and nfs_get_existing_delegation checks + * here are just optimizations; we'll need to recheck them at + * the end: + */ if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); + filp = find_readable_file(fp); + if (!filp) { + /* We should always have a readable file here */ + WARN_ON_ONCE(1); + return ERR_PTR(-EBADF); + } spin_lock(&state_lock); spin_lock(&fp->fi_lock); - status = nfs4_get_existing_delegation(clp, fp); + if (nfs4_delegation_exists(clp, fp)) + status = -EAGAIN; + else if (!fp->fi_deleg_file) { + fp->fi_deleg_file = filp; + /* increment early to prevent fi_deleg_file from being + * cleared */ + fp->fi_delegees = 1; + filp = NULL; + } else + fp->fi_delegees++; spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); - + if (filp) + fput(filp); if (status) return ERR_PTR(status); - dp = alloc_init_deleg(clp, fh, odstate); + status = -ENOMEM; + dp = alloc_init_deleg(clp, fp, fh, odstate); if (!dp) - return ERR_PTR(-ENOMEM); + goto out_delegees; + + fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); + if (!fl) + goto out_stid; + + status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL); + if (fl) + locks_free_lock(fl); + if (status) + goto out_clnt_odstate; - get_nfs4_file(fp); spin_lock(&state_lock); spin_lock(&fp->fi_lock); - dp->dl_stid.sc_file = fp; - if (!fp->fi_deleg_file) { - spin_unlock(&fp->fi_lock); - spin_unlock(&state_lock); - status = nfs4_setlease(dp); - goto out; - } - if (fp->fi_had_conflict) { + if (fp->fi_had_conflict) status = -EAGAIN; - goto out_unlock; - } - status = hash_delegation_locked(dp, fp); -out_unlock: + else + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); -out: - if (status) { - put_clnt_odstate(dp->dl_clnt_odstate); - nfs4_put_stid(&dp->dl_stid); - return ERR_PTR(status); - } + + if (status) + destroy_unhashed_deleg(dp); return dp; +out_clnt_odstate: + put_clnt_odstate(dp->dl_clnt_odstate); +out_stid: + nfs4_put_stid(&dp->dl_stid); +out_delegees: + put_deleg_file(fp); + return ERR_PTR(status); } static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) @@ -5521,15 +5482,26 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; stp->st_stid.sc_type = NFS4_CLOSED_STID; + + /* + * Technically we don't _really_ have to increment or copy it, since + * it should just be gone after this operation and we clobber the + * copied value below, but we continue to do so here just to ensure + * that racing ops see that there was a state change. + */ nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); nfsd4_close_open_stateid(stp); mutex_unlock(&stp->st_mutex); - /* See RFC5661 sectionm 18.2.4 */ - if (stp->st_stid.sc_client->cl_minorversion) - memcpy(&close->cl_stateid, &close_stateid, - sizeof(close->cl_stateid)); + /* v4.1+ suggests that we send a special stateid in here, since the + * clients should just ignore this anyway. Since this is not useful + * for v4.0 clients either, we set it to the special close_stateid + * universally. + * + * See RFC5661 section 18.2.4, and RFC7530 section 16.2.5 + */ + memcpy(&close->cl_stateid, &close_stateid, sizeof(close->cl_stateid)); /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); @@ -7264,9 +7236,7 @@ nfs4_state_shutdown_net(struct net *net) list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); - put_clnt_odstate(dp->dl_clnt_odstate); - nfs4_put_deleg_lease(dp->dl_stid.sc_file); - nfs4_put_stid(&dp->dl_stid); + destroy_unhashed_deleg(dp); } nfsd4_client_tracking_exit(net); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e502fd16246b..1d048dd95464 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -33,7 +33,6 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <linux/fs_struct.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/namei.h> @@ -682,7 +681,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl, &create->cr_label, - ¤t->fs->umask); + &create->cr_umask); if (status) goto out; @@ -927,7 +926,6 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) case NFS4_OPEN_NOCREATE: break; case NFS4_OPEN_CREATE: - current->fs->umask = 0; READ_BUF(4); open->op_createmode = be32_to_cpup(p++); switch (open->op_createmode) { @@ -935,7 +933,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) case NFS4_CREATE_GUARDED: status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl, &open->op_label, - ¤t->fs->umask); + &open->op_umask); if (status) goto out; break; @@ -950,7 +948,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl, &open->op_label, - ¤t->fs->umask); + &open->op_umask); if (status) goto out; break; @@ -1759,7 +1757,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) p = xdr_decode_hyper(p, ©->cp_src_pos); p = xdr_decode_hyper(p, ©->cp_dst_pos); p = xdr_decode_hyper(p, ©->cp_count); - copy->cp_consecutive = be32_to_cpup(p++); + p++; /* ca_consecutive: we always do consecutive copies */ copy->cp_synchronous = be32_to_cpup(p++); tmp = be32_to_cpup(p); /* Source server list not supported */ @@ -3427,8 +3425,9 @@ static __be32 nfsd4_encode_splice_read( return nfserr_resource; len = maxcount; - nfserr = nfsd_splice_read(read->rd_rqstp, file, - read->rd_offset, &maxcount); + nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, + file, read->rd_offset, &maxcount); + read->rd_length = maxcount; if (nfserr) { /* * nfsd_splice_actor may have already messed with the @@ -3511,8 +3510,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, read->rd_vlen = v; len = maxcount; - nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec, - read->rd_vlen, &maxcount); + nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, + resp->rqstp->rq_vec, read->rd_vlen, &maxcount); + read->rd_length = maxcount; if (nfserr) return nfserr; xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); @@ -4214,7 +4214,7 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr; p = xdr_reserve_space(&resp->xdr, 4 + 4); - *p++ = cpu_to_be32(copy->cp_consecutive); + *p++ = xdr_one; /* cr_consecutive */ *p++ = cpu_to_be32(copy->cp_synchronous); return 0; } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 8aa011820c4a..a008e7634181 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -87,13 +87,23 @@ nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry, return nfserr_inval; } +static bool nfsd_originating_port_ok(struct svc_rqst *rqstp, int flags) +{ + if (flags & NFSEXP_INSECURE_PORT) + return true; + /* We don't require gss requests to use low ports: */ + if (rqstp->rq_cred.cr_flavor >= RPC_AUTH_GSS) + return true; + return test_bit(RQ_SECURE, &rqstp->rq_flags); +} + static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, struct svc_export *exp) { int flags = nfsexp_flags(rqstp, exp); /* Check if the request originated from a secure port. */ - if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && !(flags & NFSEXP_INSECURE_PORT)) { + if (!nfsd_originating_port_ok(rqstp, flags)) { RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); dprintk("nfsd: request from insecure port %s!\n", svc_print_addr(rqstp, buf, sizeof(buf))); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 43c0419b8ddb..f107f9fa8e15 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -212,13 +212,18 @@ nfsd_proc_write(struct svc_rqst *rqstp) struct nfsd_attrstat *resp = rqstp->rq_resp; __be32 nfserr; unsigned long cnt = argp->len; + unsigned int nvecs; dprintk("nfsd: WRITE %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), argp->len, argp->offset); - nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset, - rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC); + nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt); + if (!nvecs) + return nfserr_io; + nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), + argp->offset, rqstp->rq_vec, nvecs, + &cnt, NFS_DATA_SYNC); return nfsd_return_attrs(nfserr, resp); } @@ -444,17 +449,19 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) struct svc_fh newfh; __be32 nfserr; + if (argp->tlen > NFS_MAXPATHLEN) + return nfserr_nametoolong; + + argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, + argp->tlen); + if (IS_ERR(argp->tname)) + return nfserrno(PTR_ERR(argp->tname)); + dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n", SVCFH_fmt(&argp->ffh), argp->flen, argp->fname, argp->tlen, argp->tname); fh_init(&newfh, NFS_FHSIZE); - /* - * Crazy hack: the request fits in a page, and already-decoded - * attributes follow argp->tname, so it's safe to just write a - * null to ensure it's null-terminated: - */ - argp->tname[argp->tlen] = '\0'; nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, argp->tname, &newfh); diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 79b6064f8977..a43e8260520a 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -71,22 +71,6 @@ decode_filename(__be32 *p, char **namp, unsigned int *lenp) } static __be32 * -decode_pathname(__be32 *p, char **namp, unsigned int *lenp) -{ - char *name; - unsigned int i; - - if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) { - for (i = 0, name = *namp; i < *lenp; i++, name++) { - if (*name == '\0') - return NULL; - } - } - - return p; -} - -static __be32 * decode_sattr(__be32 *p, struct iattr *iap) { u32 tmp, tmp1; @@ -287,7 +271,6 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) struct nfsd_writeargs *args = rqstp->rq_argp; unsigned int len, hdr, dlen; struct kvec *head = rqstp->rq_arg.head; - int v; p = decode_fh(p, &args->fh); if (!p) @@ -323,17 +306,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) if (dlen < XDR_QUADLEN(len)*4) return 0; - rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = head->iov_len - hdr; - v = 0; - while (len > rqstp->rq_vec[v].iov_len) { - len -= rqstp->rq_vec[v].iov_len; - v++; - rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]); - rqstp->rq_vec[v].iov_len = PAGE_SIZE; - } - rqstp->rq_vec[v].iov_len = len; - args->vlen = v + 1; + args->first.iov_base = (void *)p; + args->first.iov_len = head->iov_len - hdr; return 1; } @@ -394,14 +368,39 @@ int nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_symlinkargs *args = rqstp->rq_argp; + char *base = (char *)p; + size_t xdrlen; if ( !(p = decode_fh(p, &args->ffh)) - || !(p = decode_filename(p, &args->fname, &args->flen)) - || !(p = decode_pathname(p, &args->tname, &args->tlen))) + || !(p = decode_filename(p, &args->fname, &args->flen))) return 0; - p = decode_sattr(p, &args->attrs); - return xdr_argsize_check(rqstp, p); + args->tlen = ntohl(*p++); + if (args->tlen == 0) + return 0; + + args->first.iov_base = p; + args->first.iov_len = rqstp->rq_arg.head[0].iov_len; + args->first.iov_len -= (char *)p - base; + + /* This request is never larger than a page. Therefore, + * transport will deliver either: + * 1. pathname in the pagelist -> sattr is in the tail. + * 2. everything in the head buffer -> sattr is in the head. + */ + if (rqstp->rq_arg.page_len) { + if (args->tlen != rqstp->rq_arg.page_len) + return 0; + p = rqstp->rq_arg.tail[0].iov_base; + } else { + xdrlen = XDR_QUADLEN(args->tlen); + if (xdrlen > args->first.iov_len - (8 * sizeof(__be32))) + return 0; + p += xdrlen; + } + decode_sattr(p, &args->attrs); + + return 1; } int diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 8b2f1d92c579..80933e4334d8 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -11,39 +11,79 @@ #include <linux/tracepoint.h> #include "nfsfh.h" +TRACE_EVENT(nfsd_compound, + TP_PROTO(const struct svc_rqst *rqst, + u32 args_opcnt), + TP_ARGS(rqst, args_opcnt), + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, args_opcnt) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->args_opcnt = args_opcnt; + ), + TP_printk("xid=0x%08x opcnt=%u", + __entry->xid, __entry->args_opcnt) +) + +TRACE_EVENT(nfsd_compound_status, + TP_PROTO(u32 args_opcnt, + u32 resp_opcnt, + __be32 status, + const char *name), + TP_ARGS(args_opcnt, resp_opcnt, status, name), + TP_STRUCT__entry( + __field(u32, args_opcnt) + __field(u32, resp_opcnt) + __field(int, status) + __string(name, name) + ), + TP_fast_assign( + __entry->args_opcnt = args_opcnt; + __entry->resp_opcnt = resp_opcnt; + __entry->status = be32_to_cpu(status); + __assign_str(name, name); + ), + TP_printk("op=%u/%u %s status=%d", + __entry->resp_opcnt, __entry->args_opcnt, + __get_str(name), __entry->status) +) + DECLARE_EVENT_CLASS(nfsd_io_class, TP_PROTO(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, - int len), + unsigned long len), TP_ARGS(rqstp, fhp, offset, len), TP_STRUCT__entry( - __field(__be32, xid) - __field_struct(struct knfsd_fh, fh) + __field(u32, xid) + __field(u32, fh_hash) __field(loff_t, offset) - __field(int, len) + __field(unsigned long, len) ), TP_fast_assign( - __entry->xid = rqstp->rq_xid, - fh_copy_shallow(&__entry->fh, &fhp->fh_handle); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); __entry->offset = offset; __entry->len = len; ), - TP_printk("xid=0x%x fh=0x%x offset=%lld len=%d", - __be32_to_cpu(__entry->xid), knfsd_fh_hash(&__entry->fh), + TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu", + __entry->xid, __entry->fh_hash, __entry->offset, __entry->len) ) #define DEFINE_NFSD_IO_EVENT(name) \ -DEFINE_EVENT(nfsd_io_class, name, \ +DEFINE_EVENT(nfsd_io_class, nfsd_##name, \ TP_PROTO(struct svc_rqst *rqstp, \ struct svc_fh *fhp, \ loff_t offset, \ - int len), \ + unsigned long len), \ TP_ARGS(rqstp, fhp, offset, len)) DEFINE_NFSD_IO_EVENT(read_start); -DEFINE_NFSD_IO_EVENT(read_opened); +DEFINE_NFSD_IO_EVENT(read_splice); +DEFINE_NFSD_IO_EVENT(read_vector); DEFINE_NFSD_IO_EVENT(read_io_done); DEFINE_NFSD_IO_EVENT(read_done); DEFINE_NFSD_IO_EVENT(write_start); @@ -51,6 +91,40 @@ DEFINE_NFSD_IO_EVENT(write_opened); DEFINE_NFSD_IO_EVENT(write_io_done); DEFINE_NFSD_IO_EVENT(write_done); +DECLARE_EVENT_CLASS(nfsd_err_class, + TP_PROTO(struct svc_rqst *rqstp, + struct svc_fh *fhp, + loff_t offset, + int status), + TP_ARGS(rqstp, fhp, offset, status), + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, fh_hash) + __field(loff_t, offset) + __field(int, status) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->offset = offset; + __entry->status = status; + ), + TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld status=%d", + __entry->xid, __entry->fh_hash, + __entry->offset, __entry->status) +) + +#define DEFINE_NFSD_ERR_EVENT(name) \ +DEFINE_EVENT(nfsd_err_class, nfsd_##name, \ + TP_PROTO(struct svc_rqst *rqstp, \ + struct svc_fh *fhp, \ + loff_t offset, \ + int len), \ + TP_ARGS(rqstp, fhp, offset, len)) + +DEFINE_NFSD_ERR_EVENT(read_err); +DEFINE_NFSD_ERR_EVENT(write_err); + #include "state.h" DECLARE_EVENT_CLASS(nfsd_stateid_class, @@ -76,7 +150,7 @@ DECLARE_EVENT_CLASS(nfsd_stateid_class, ) #define DEFINE_STATEID_EVENT(name) \ -DEFINE_EVENT(nfsd_stateid_class, name, \ +DEFINE_EVENT(nfsd_stateid_class, nfsd_##name, \ TP_PROTO(stateid_t *stp), \ TP_ARGS(stp)) DEFINE_STATEID_EVENT(layoutstate_alloc); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a3c9bfa77def..2410b093a2e6 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -881,20 +881,24 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, return __splice_from_pipe(pipe, sd, nfsd_splice_actor); } -static __be32 -nfsd_finish_read(struct file *file, unsigned long *count, int host_err) +static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, + unsigned long *count, int host_err) { if (host_err >= 0) { nfsdstats.io_read += host_err; *count = host_err; fsnotify_access(file); + trace_nfsd_read_io_done(rqstp, fhp, offset, *count); return 0; - } else + } else { + trace_nfsd_read_err(rqstp, fhp, offset, host_err); return nfserrno(host_err); + } } -__be32 nfsd_splice_read(struct svc_rqst *rqstp, - struct file *file, loff_t offset, unsigned long *count) +__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, unsigned long *count) { struct splice_desc sd = { .len = 0, @@ -904,21 +908,23 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, }; int host_err; + trace_nfsd_read_splice(rqstp, fhp, offset, *count); rqstp->rq_next_page = rqstp->rq_respages + 1; host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); - return nfsd_finish_read(file, count, host_err); + return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err); } -__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, - unsigned long *count) +__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, + struct kvec *vec, int vlen, unsigned long *count) { struct iov_iter iter; int host_err; + trace_nfsd_read_vector(rqstp, fhp, offset, *count); iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count); host_err = vfs_iter_read(file, &iter, &offset, 0); - - return nfsd_finish_read(file, count, host_err); + return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err); } /* @@ -965,13 +971,15 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, { struct svc_export *exp; struct iov_iter iter; - __be32 err = 0; + __be32 nfserr; int host_err; int use_wgather; loff_t pos = offset; unsigned int pflags = current->flags; rwf_t flags = 0; + trace_nfsd_write_opened(rqstp, fhp, offset, *cnt); + if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) /* * We want less throttling in balance_dirty_pages() @@ -994,22 +1002,23 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, host_err = vfs_iter_write(file, &iter, &pos, flags); if (host_err < 0) goto out_nfserr; - *cnt = host_err; - nfsdstats.io_write += host_err; + nfsdstats.io_write += *cnt; fsnotify_modify(file); if (stable && use_wgather) host_err = wait_for_concurrent_writes(file); out_nfserr: - dprintk("nfsd: write complete host_err=%d\n", host_err); - if (host_err >= 0) - err = 0; - else - err = nfserrno(host_err); + if (host_err >= 0) { + trace_nfsd_write_io_done(rqstp, fhp, offset, *cnt); + nfserr = nfs_ok; + } else { + trace_nfsd_write_err(rqstp, fhp, offset, host_err); + nfserr = nfserrno(host_err); + } if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) current_restore_flags(pflags, PF_LESS_THROTTLE); - return err; + return nfserr; } /* @@ -1024,27 +1033,23 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct raparms *ra; __be32 err; - trace_read_start(rqstp, fhp, offset, vlen); + trace_nfsd_read_start(rqstp, fhp, offset, *count); err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); if (err) return err; ra = nfsd_init_raparms(file); - trace_read_opened(rqstp, fhp, offset, vlen); - if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags)) - err = nfsd_splice_read(rqstp, file, offset, count); + err = nfsd_splice_read(rqstp, fhp, file, offset, count); else - err = nfsd_readv(file, offset, vec, vlen, count); - - trace_read_io_done(rqstp, fhp, offset, vlen); + err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count); if (ra) nfsd_put_raparams(file, ra); fput(file); - trace_read_done(rqstp, fhp, offset, vlen); + trace_nfsd_read_done(rqstp, fhp, offset, *count); return err; } @@ -1061,18 +1066,16 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, struct file *file = NULL; __be32 err = 0; - trace_write_start(rqstp, fhp, offset, vlen); + trace_nfsd_write_start(rqstp, fhp, offset, *cnt); err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); if (err) goto out; - trace_write_opened(rqstp, fhp, offset, vlen); err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable); - trace_write_io_done(rqstp, fhp, offset, vlen); fput(file); out: - trace_write_done(rqstp, fhp, offset, vlen); + trace_nfsd_write_done(rqstp, fhp, offset, *cnt); return err; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index be6d8e00453f..a7e107309f76 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -78,10 +78,13 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); struct raparms; -__be32 nfsd_splice_read(struct svc_rqst *, - struct file *, loff_t, unsigned long *); -__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int, - unsigned long *); +__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, + unsigned long *count); +__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, + struct kvec *vec, int vlen, + unsigned long *count); __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 2f4f22e6b8cb..ea7cca3a64b7 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -34,7 +34,7 @@ struct nfsd_writeargs { svc_fh fh; __u32 offset; int len; - int vlen; + struct kvec first; }; struct nfsd_createargs { @@ -72,6 +72,7 @@ struct nfsd_symlinkargs { char * tname; unsigned int tlen; struct iattr attrs; + struct kvec first; }; struct nfsd_readdirargs { diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 056bf8a7364e..2cb29e961a76 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -41,7 +41,7 @@ struct nfsd3_writeargs { __u32 count; int stable; __u32 len; - int vlen; + struct kvec first; }; struct nfsd3_createargs { @@ -90,6 +90,7 @@ struct nfsd3_symlinkargs { char * tname; unsigned int tlen; struct iattr attrs; + struct kvec first; }; struct nfsd3_readdirargs { diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index bc29511b6405..17c453a7999c 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -110,6 +110,7 @@ struct nfsd4_create { struct { u32 datalen; char *data; + struct kvec first; } link; /* NF4LNK */ struct { u32 specdata1; @@ -118,12 +119,14 @@ struct nfsd4_create { } u; u32 cr_bmval[3]; /* request */ struct iattr cr_iattr; /* request */ + int cr_umask; /* request */ struct nfsd4_change_info cr_cinfo; /* response */ struct nfs4_acl *cr_acl; struct xdr_netobj cr_label; }; #define cr_datalen u.link.datalen #define cr_data u.link.data +#define cr_first u.link.first #define cr_specdata1 u.dev.specdata1 #define cr_specdata2 u.dev.specdata2 @@ -228,6 +231,7 @@ struct nfsd4_open { u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */ u32 op_create; /* request */ u32 op_createmode; /* request */ + int op_umask; /* request */ u32 op_bmval[3]; /* request */ struct iattr op_iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */ nfs4_verifier op_verf __attribute__((aligned(32))); @@ -518,7 +522,6 @@ struct nfsd4_copy { u64 cp_count; /* both */ - bool cp_consecutive; bool cp_synchronous; /* response */ diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 786ae2255f05..574368e8a16f 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -272,6 +272,7 @@ struct svc_rqst { #define RQ_BUSY (6) /* request is busy */ #define RQ_DATA (7) /* request has data */ unsigned long rq_flags; /* flags field */ + ktime_t rq_qtime; /* enqueue time */ void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ @@ -283,6 +284,7 @@ struct svc_rqst { int rq_reserved; /* space on socket outq * reserved for this request */ + ktime_t rq_stime; /* start time */ struct cache_req rq_chandle; /* handle passed to caches for * request delaying @@ -493,6 +495,10 @@ void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); char * svc_print_addr(struct svc_rqst *, char *, size_t); +unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, + struct kvec *first, size_t total); +char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, + struct kvec *first, size_t total); #define RPC_MAX_ADDRBUFLEN (63U) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 4b731b046bcd..7337e1221590 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -132,9 +132,6 @@ struct svcxprt_rdma { #define RDMAXPRT_CONN_PENDING 3 #define RPCRDMA_LISTEN_BACKLOG 10 -/* The default ORD value is based on two outstanding full-size writes with a - * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ -#define RPCRDMA_ORD (64/4) #define RPCRDMA_MAX_REQUESTS 32 /* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 1caf7bc83306..c3d72066d4b1 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -25,7 +25,7 @@ struct svc_xprt_ops { void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); - int (*xpo_secure_port)(struct svc_rqst *); + void (*xpo_secure_port)(struct svc_rqst *rqstp); void (*xpo_kill_temp_xprt)(struct svc_xprt *); }; @@ -83,6 +83,7 @@ struct svc_xprt { size_t xpt_locallen; /* length of address */ struct sockaddr_storage xpt_remote; /* remote peer's address */ size_t xpt_remotelen; /* length of address */ + char xpt_remotebuf[INET6_ADDRSTRLEN + 10]; struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ struct list_head xpt_users; /* callbacks on free */ @@ -152,7 +153,10 @@ static inline void svc_xprt_set_remote(struct svc_xprt *xprt, { memcpy(&xprt->xpt_remote, sa, salen); xprt->xpt_remotelen = salen; + snprintf(xprt->xpt_remotebuf, sizeof(xprt->xpt_remotebuf) - 1, + "%pISpc", sa); } + static inline unsigned short svc_addr_port(const struct sockaddr *sa) { const struct sockaddr_in *sin = (const struct sockaddr_in *)sa; diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 970c91a83173..922cb8968fb2 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -485,31 +485,55 @@ TRACE_EVENT(xs_tcp_data_recv, { (1UL << RQ_BUSY), "RQ_BUSY"}) TRACE_EVENT(svc_recv, - TP_PROTO(struct svc_rqst *rqst, int status), + TP_PROTO(struct svc_rqst *rqst, int len), - TP_ARGS(rqst, status), + TP_ARGS(rqst, len), TP_STRUCT__entry( __field(u32, xid) - __field(int, status) + __field(int, len) __field(unsigned long, flags) - __dynamic_array(unsigned char, addr, rqst->rq_addrlen) + __string(addr, rqst->rq_xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xid = status > 0 ? be32_to_cpu(rqst->rq_xid) : 0; - __entry->status = status; + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->len = len; __entry->flags = rqst->rq_flags; - memcpy(__get_dynamic_array(addr), - &rqst->rq_addr, rqst->rq_addrlen); + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s", - (struct sockaddr *)__get_dynamic_array(addr), - __entry->xid, __entry->status, + TP_printk("addr=%s xid=0x%08x len=%d flags=%s", + __get_str(addr), __entry->xid, __entry->len, show_rqstp_flags(__entry->flags)) ); +TRACE_EVENT(svc_process, + TP_PROTO(const struct svc_rqst *rqst, const char *name), + + TP_ARGS(rqst, name), + + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, vers) + __field(u32, proc) + __string(service, name) + __string(addr, rqst->rq_xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->vers = rqst->rq_vers; + __entry->proc = rqst->rq_proc; + __assign_str(service, name); + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s xid=0x%08x service=%s vers=%u proc=%u", + __get_str(addr), __entry->xid, + __get_str(service), __entry->vers, __entry->proc) +); + DECLARE_EVENT_CLASS(svc_rqst_event, TP_PROTO(struct svc_rqst *rqst), @@ -519,20 +543,18 @@ DECLARE_EVENT_CLASS(svc_rqst_event, TP_STRUCT__entry( __field(u32, xid) __field(unsigned long, flags) - __dynamic_array(unsigned char, addr, rqst->rq_addrlen) + __string(addr, rqst->rq_xprt->xpt_remotebuf) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqst->rq_xid); __entry->flags = rqst->rq_flags; - memcpy(__get_dynamic_array(addr), - &rqst->rq_addr, rqst->rq_addrlen); + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("addr=%pIScp rq_xid=0x%08x flags=%s", - (struct sockaddr *)__get_dynamic_array(addr), - __entry->xid, - show_rqstp_flags(__entry->flags)) + TP_printk("addr=%s xid=0x%08x flags=%s", + __get_str(addr), __entry->xid, + show_rqstp_flags(__entry->flags)) ); DEFINE_EVENT(svc_rqst_event, svc_defer, @@ -553,27 +575,21 @@ DECLARE_EVENT_CLASS(svc_rqst_status, __field(u32, xid) __field(int, status) __field(unsigned long, flags) - __dynamic_array(unsigned char, addr, rqst->rq_addrlen) + __string(addr, rqst->rq_xprt->xpt_remotebuf) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqst->rq_xid); __entry->status = status; __entry->flags = rqst->rq_flags; - memcpy(__get_dynamic_array(addr), - &rqst->rq_addr, rqst->rq_addrlen); + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("addr=%pIScp rq_xid=0x%08x status=%d flags=%s", - (struct sockaddr *)__get_dynamic_array(addr), - __entry->xid, - __entry->status, show_rqstp_flags(__entry->flags)) + TP_printk("addr=%s xid=0x%08x status=%d flags=%s", + __get_str(addr), __entry->xid, + __entry->status, show_rqstp_flags(__entry->flags)) ); -DEFINE_EVENT(svc_rqst_status, svc_process, - TP_PROTO(struct svc_rqst *rqst, int status), - TP_ARGS(rqst, status)); - DEFINE_EVENT(svc_rqst_status, svc_send, TP_PROTO(struct svc_rqst *rqst, int status), TP_ARGS(rqst, status)); @@ -591,7 +607,9 @@ DEFINE_EVENT(svc_rqst_status, svc_send, { (1UL << XPT_OLD), "XPT_OLD"}, \ { (1UL << XPT_LISTENER), "XPT_LISTENER"}, \ { (1UL << XPT_CACHE_AUTH), "XPT_CACHE_AUTH"}, \ - { (1UL << XPT_LOCAL), "XPT_LOCAL"}) + { (1UL << XPT_LOCAL), "XPT_LOCAL"}, \ + { (1UL << XPT_KILL_TEMP), "XPT_KILL_TEMP"}, \ + { (1UL << XPT_CONG_CTRL), "XPT_CONG_CTRL"}) TRACE_EVENT(svc_xprt_do_enqueue, TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst), @@ -602,26 +620,19 @@ TRACE_EVENT(svc_xprt_do_enqueue, __field(struct svc_xprt *, xprt) __field(int, pid) __field(unsigned long, flags) - __dynamic_array(unsigned char, addr, xprt != NULL ? - xprt->xpt_remotelen : 0) + __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( __entry->xprt = xprt; __entry->pid = rqst? rqst->rq_task->pid : 0; - if (xprt) { - memcpy(__get_dynamic_array(addr), - &xprt->xpt_remote, - xprt->xpt_remotelen); - __entry->flags = xprt->xpt_flags; - } else - __entry->flags = 0; - ), - - TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt, - __get_dynamic_array_len(addr) != 0 ? - (struct sockaddr *)__get_dynamic_array(addr) : NULL, - __entry->pid, show_svc_xprt_flags(__entry->flags)) + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("xprt=%p addr=%s pid=%d flags=%s", + __entry->xprt, __get_str(addr), + __entry->pid, show_svc_xprt_flags(__entry->flags)) ); DECLARE_EVENT_CLASS(svc_xprt_event, @@ -632,35 +643,50 @@ DECLARE_EVENT_CLASS(svc_xprt_event, TP_STRUCT__entry( __field(struct svc_xprt *, xprt) __field(unsigned long, flags) - __dynamic_array(unsigned char, addr, xprt != NULL ? - xprt->xpt_remotelen : 0) + __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( __entry->xprt = xprt; - if (xprt) { - memcpy(__get_dynamic_array(addr), - &xprt->xpt_remote, - xprt->xpt_remotelen); - __entry->flags = xprt->xpt_flags; - } else - __entry->flags = 0; - ), - - TP_printk("xprt=0x%p addr=%pIScp flags=%s", __entry->xprt, - __get_dynamic_array_len(addr) != 0 ? - (struct sockaddr *)__get_dynamic_array(addr) : NULL, - show_svc_xprt_flags(__entry->flags)) -); + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); + ), -DEFINE_EVENT(svc_xprt_event, svc_xprt_dequeue, - TP_PROTO(struct svc_xprt *xprt), - TP_ARGS(xprt)); + TP_printk("xprt=%p addr=%s flags=%s", + __entry->xprt, __get_str(addr), + show_svc_xprt_flags(__entry->flags)) +); DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space, TP_PROTO(struct svc_xprt *xprt), TP_ARGS(xprt)); +TRACE_EVENT(svc_xprt_dequeue, + TP_PROTO(struct svc_rqst *rqst), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + __field(struct svc_xprt *, xprt) + __field(unsigned long, flags) + __field(unsigned long, wakeup) + __string(addr, rqst->rq_xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->xprt = rqst->rq_xprt; + __entry->flags = rqst->rq_xprt->xpt_flags; + __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(), + rqst->rq_qtime)); + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); + ), + + TP_printk("xprt=%p addr=%s flags=%s wakeup-us=%lu", + __entry->xprt, __get_str(addr), + show_svc_xprt_flags(__entry->flags), + __entry->wakeup) +); + TRACE_EVENT(svc_wake_up, TP_PROTO(int pid), @@ -686,28 +712,42 @@ TRACE_EVENT(svc_handle_xprt, __field(struct svc_xprt *, xprt) __field(int, len) __field(unsigned long, flags) - __dynamic_array(unsigned char, addr, xprt != NULL ? - xprt->xpt_remotelen : 0) + __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( __entry->xprt = xprt; __entry->len = len; - if (xprt) { - memcpy(__get_dynamic_array(addr), - &xprt->xpt_remote, - xprt->xpt_remotelen); - __entry->flags = xprt->xpt_flags; - } else - __entry->flags = 0; - ), - - TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt, - __get_dynamic_array_len(addr) != 0 ? - (struct sockaddr *)__get_dynamic_array(addr) : NULL, + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("xprt=%p addr=%s len=%d flags=%s", + __entry->xprt, __get_str(addr), __entry->len, show_svc_xprt_flags(__entry->flags)) ); +TRACE_EVENT(svc_stats_latency, + TP_PROTO(const struct svc_rqst *rqst), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + __field(u32, xid) + __field(unsigned long, execute) + __string(addr, rqst->rq_xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->execute = ktime_to_us(ktime_sub(ktime_get(), + rqst->rq_stime)); + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s xid=0x%08x execute-us=%lu", + __get_str(addr), __entry->xid, __entry->execute) +); DECLARE_EVENT_CLASS(svc_deferred_event, TP_PROTO(struct svc_deferred_req *dr), @@ -716,18 +756,16 @@ DECLARE_EVENT_CLASS(svc_deferred_event, TP_STRUCT__entry( __field(u32, xid) - __dynamic_array(unsigned char, addr, dr->addrlen) + __string(addr, dr->xprt->xpt_remotebuf) ), TP_fast_assign( __entry->xid = be32_to_cpu(*(__be32 *)(dr->args + (dr->xprt_hlen>>2))); - memcpy(__get_dynamic_array(addr), &dr->addr, dr->addrlen); + __assign_str(addr, dr->xprt->xpt_remotebuf); ), - TP_printk("addr=%pIScp xid=0x%08x", - (struct sockaddr *)__get_dynamic_array(addr), - __entry->xid) + TP_printk("addr=%s xid=0x%08x", __get_str(addr), __entry->xid) ); DEFINE_EVENT(svc_deferred_event, svc_drop_deferred, diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 12649c9fedab..8654494b4d0a 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -237,9 +237,6 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); - err = crypto_ahash_init(req); - if (err) - goto out; err = crypto_ahash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength); if (err) goto out; diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 1d74d653e6c0..94a2b3f082a8 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -177,6 +177,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text, u64 seq_send; u8 *cksumkey; unsigned int cksum_usage; + __be64 seq_send_be64; dprintk("RPC: %s\n", __func__); @@ -187,7 +188,9 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text, spin_lock(&krb5_seq_lock); seq_send = ctx->seq_send64++; spin_unlock(&krb5_seq_lock); - *((__be64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send); + + seq_send_be64 = cpu_to_be64(seq_send); + memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8); if (ctx->initiate) { cksumkey = ctx->initiator_sign; diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index dcf9515d9aef..b601a73cc9db 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -155,10 +155,12 @@ gss_verify_mic_v2(struct krb5_ctx *ctx, u8 flags; int i; unsigned int cksum_usage; + __be16 be16_ptr; dprintk("RPC: %s\n", __func__); - if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC) + memcpy(&be16_ptr, (char *) ptr, 2); + if (be16_to_cpu(be16_ptr) != KG2_TOK_MIC) return GSS_S_DEFECTIVE_TOKEN; flags = ptr[2]; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index c536cc24b3d1..cdda4744c9b1 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1450,8 +1450,8 @@ static ssize_t write_flush(struct file *file, const char __user *buf, struct cache_detail *cd) { char tbuf[20]; - char *bp, *ep; - time_t then, now; + char *ep; + time_t now; if (*ppos || count > sizeof(tbuf)-1) return -EINVAL; @@ -1461,24 +1461,24 @@ static ssize_t write_flush(struct file *file, const char __user *buf, simple_strtoul(tbuf, &ep, 0); if (*ep && *ep != '\n') return -EINVAL; + /* Note that while we check that 'buf' holds a valid number, + * we always ignore the value and just flush everything. + * Making use of the number leads to races. + */ - bp = tbuf; - then = get_expiry(&bp); now = seconds_since_boot(); - cd->nextcheck = now; - /* Can only set flush_time to 1 second beyond "now", or - * possibly 1 second beyond flushtime. This is because - * flush_time never goes backwards so it mustn't get too far - * ahead of time. + /* Always flush everything, so behave like cache_purge() + * Do this by advancing flush_time to the current time, + * or by one second if it has already reached the current time. + * Newly added cache entries will always have ->last_refresh greater + * that ->flush_time, so they don't get flushed prematurely. */ - if (then >= now) { - /* Want to flush everything, so behave like cache_purge() */ - if (cd->flush_time >= now) - now = cd->flush_time + 1; - then = now; - } - cd->flush_time = then; + if (cd->flush_time >= now) + now = cd->flush_time + 1; + + cd->flush_time = now; + cd->nextcheck = now; cache_flush(); *ppos += count; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 387cc4add6f6..30a4226baf03 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1255,6 +1255,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) /* Syntactic check complete */ serv->sv_stats->rpccnt++; + trace_svc_process(rqstp, progp->pg_name); /* Build the reply header. */ statp = resv->iov_base +resv->iov_len; @@ -1431,14 +1432,10 @@ svc_process(struct svc_rqst *rqstp) } /* Returns 1 for send, 0 for drop */ - if (likely(svc_process_common(rqstp, argv, resv))) { - int ret = svc_send(rqstp); + if (likely(svc_process_common(rqstp, argv, resv))) + return svc_send(rqstp); - trace_svc_process(rqstp, ret); - return ret; - } out_drop: - trace_svc_process(rqstp, 0); svc_drop(rqstp); return 0; } @@ -1536,3 +1533,112 @@ u32 svc_max_payload(const struct svc_rqst *rqstp) return max; } EXPORT_SYMBOL_GPL(svc_max_payload); + +/** + * svc_fill_write_vector - Construct data argument for VFS write call + * @rqstp: svc_rqst to operate on + * @first: buffer containing first section of write payload + * @total: total number of bytes of write payload + * + * Returns the number of elements populated in the data argument array. + */ +unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first, + size_t total) +{ + struct kvec *vec = rqstp->rq_vec; + struct page **pages; + unsigned int i; + + /* Some types of transport can present the write payload + * entirely in rq_arg.pages. In this case, @first is empty. + */ + i = 0; + if (first->iov_len) { + vec[i].iov_base = first->iov_base; + vec[i].iov_len = min_t(size_t, total, first->iov_len); + total -= vec[i].iov_len; + ++i; + } + + WARN_ON_ONCE(rqstp->rq_arg.page_base != 0); + pages = rqstp->rq_arg.pages; + while (total) { + vec[i].iov_base = page_address(*pages); + vec[i].iov_len = min_t(size_t, total, PAGE_SIZE); + total -= vec[i].iov_len; + ++i; + + ++pages; + } + + WARN_ON_ONCE(i > ARRAY_SIZE(rqstp->rq_vec)); + return i; +} +EXPORT_SYMBOL_GPL(svc_fill_write_vector); + +/** + * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call + * @rqstp: svc_rqst to operate on + * @first: buffer containing first section of pathname + * @total: total length of the pathname argument + * + * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is + * released automatically when @rqstp is recycled. + */ +char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, + size_t total) +{ + struct xdr_buf *arg = &rqstp->rq_arg; + struct page **pages; + char *result; + + /* VFS API demands a NUL-terminated pathname. This function + * uses a page from @rqstp as the pathname buffer, to enable + * direct placement. Thus the total buffer size is PAGE_SIZE. + * Space in this buffer for NUL-termination requires that we + * cap the size of the returned symlink pathname just a + * little early. + */ + if (total > PAGE_SIZE - 1) + return ERR_PTR(-ENAMETOOLONG); + + /* Some types of transport can present the pathname entirely + * in rq_arg.pages. If not, then copy the pathname into one + * page. + */ + pages = arg->pages; + WARN_ON_ONCE(arg->page_base != 0); + if (first->iov_base == 0) { + result = page_address(*pages); + result[total] = '\0'; + } else { + size_t len, remaining; + char *dst; + + result = page_address(*(rqstp->rq_next_page++)); + dst = result; + remaining = total; + + len = min_t(size_t, total, first->iov_len); + memcpy(dst, first->iov_base, len); + dst += len; + remaining -= len; + + /* No more than one page left */ + if (remaining) { + len = min_t(size_t, remaining, PAGE_SIZE); + memcpy(dst, page_address(*pages), len); + dst += len; + } + + *dst = '\0'; + } + + /* Sanity check: we don't allow the pathname argument to + * contain a NUL byte. + */ + if (strlen(result) != total) + return ERR_PTR(-EINVAL); + return result; +} +EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index f9307bd6644b..5185efb9027b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -173,6 +173,7 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, set_bit(XPT_BUSY, &xprt->xpt_flags); rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); xprt->xpt_net = get_net(net); + strcpy(xprt->xpt_remotebuf, "uninitialized"); } EXPORT_SYMBOL_GPL(svc_xprt_init); @@ -382,25 +383,21 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) int cpu; if (!svc_xprt_has_something_to_do(xprt)) - goto out; + return; /* Mark transport as busy. It will remain in this state until * the provider calls svc_xprt_received. We update XPT_BUSY * atomically because it also guards against trying to enqueue * the transport twice. */ - if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) { - /* Don't enqueue transport while already enqueued */ - dprintk("svc: transport %p busy, not enqueued\n", xprt); - goto out; - } + if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) + return; cpu = get_cpu(); pool = svc_pool_for_cpu(xprt->xpt_server, cpu); atomic_long_inc(&pool->sp_stats.packets); - dprintk("svc: transport %p put into queue\n", xprt); spin_lock_bh(&pool->sp_lock); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); pool->sp_stats.sockets_queued++; @@ -412,6 +409,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) continue; atomic_long_inc(&pool->sp_stats.threads_woken); + rqstp->rq_qtime = ktime_get(); wake_up_process(rqstp->rq_task); goto out_unlock; } @@ -420,7 +418,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) out_unlock: rcu_read_unlock(); put_cpu(); -out: trace_svc_xprt_do_enqueue(xprt, rqstp); } EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); @@ -454,13 +451,9 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) struct svc_xprt, xpt_ready); list_del_init(&xprt->xpt_ready); svc_xprt_get(xprt); - - dprintk("svc: transport %p dequeued, inuse=%d\n", - xprt, kref_read(&xprt->xpt_ref)); } spin_unlock_bh(&pool->sp_lock); out: - trace_svc_xprt_dequeue(xprt); return xprt; } @@ -492,7 +485,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp) { struct svc_xprt *xprt = rqstp->rq_xprt; - rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); + xprt->xpt_ops->xpo_release_rqst(rqstp); kfree(rqstp->rq_deferred); rqstp->rq_deferred = NULL; @@ -538,7 +531,6 @@ void svc_wake_up(struct svc_serv *serv) if (test_bit(RQ_BUSY, &rqstp->rq_flags)) continue; rcu_read_unlock(); - dprintk("svc: daemon %p woken up.\n", rqstp); wake_up_process(rqstp->rq_task); trace_svc_wake_up(rqstp->rq_task->pid); return; @@ -734,6 +726,7 @@ out_found: rqstp->rq_chandle.thread_wait = 5*HZ; else rqstp->rq_chandle.thread_wait = 1*HZ; + trace_svc_xprt_dequeue(rqstp); return rqstp->rq_xprt; } @@ -789,7 +782,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) len = svc_deferred_recv(rqstp); else len = xprt->xpt_ops->xpo_recvfrom(rqstp); - dprintk("svc: got len=%d\n", len); + rqstp->rq_stime = ktime_get(); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } @@ -844,10 +837,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) clear_bit(XPT_OLD, &xprt->xpt_flags); - if (xprt->xpt_ops->xpo_secure_port(rqstp)) - set_bit(RQ_SECURE, &rqstp->rq_flags); - else - clear_bit(RQ_SECURE, &rqstp->rq_flags); + xprt->xpt_ops->xpo_secure_port(rqstp); rqstp->rq_chandle.defer = svc_defer; rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]); @@ -859,7 +849,6 @@ out_release: rqstp->rq_res.len = 0; svc_xprt_release(rqstp); out: - trace_svc_recv(rqstp, err); return err; } EXPORT_SYMBOL_GPL(svc_recv); @@ -889,7 +878,7 @@ int svc_send(struct svc_rqst *rqstp) goto out; /* release the receive skb before sending the reply */ - rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); + xprt->xpt_ops->xpo_release_rqst(rqstp); /* calculate over-all length */ xb = &rqstp->rq_res; @@ -899,6 +888,7 @@ int svc_send(struct svc_rqst *rqstp) /* Grab mutex to serialize outgoing data. */ mutex_lock(&xprt->xpt_mutex); + trace_svc_stats_latency(rqstp); if (test_bit(XPT_DEAD, &xprt->xpt_flags) || test_bit(XPT_CLOSE, &xprt->xpt_flags)) len = -ENOTCONN; @@ -906,12 +896,12 @@ int svc_send(struct svc_rqst *rqstp) len = xprt->xpt_ops->xpo_sendto(rqstp); mutex_unlock(&xprt->xpt_mutex); rpc_wake_up(&xprt->xpt_bc_pending); + trace_svc_send(rqstp, len); svc_xprt_release(rqstp); if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) len = 0; out: - trace_svc_send(rqstp, len); return len; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 08cd951aaeea..5445145e639c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -391,9 +391,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, release_sock(sock->sk); } -static int svc_sock_secure_port(struct svc_rqst *rqstp) +static void svc_sock_secure_port(struct svc_rqst *rqstp) { - return svc_port_is_privileged(svc_addr(rqstp)); + if (svc_port_is_privileged(svc_addr(rqstp))) + set_bit(RQ_SECURE, &rqstp->rq_flags); + else + clear_bit(RQ_SECURE, &rqstp->rq_flags); } /* @@ -1309,6 +1312,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags); if (sk->sk_state == TCP_LISTEN) { dprintk("setting up TCP socket for listening\n"); + strcpy(svsk->sk_xprt.xpt_remotebuf, "listener"); set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); sk->sk_data_ready = svc_tcp_listen_data_ready; set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index a4a8f6989ee7..dd8a431dc2ae 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -51,9 +51,9 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT /* RPC/RDMA parameters */ -unsigned int svcrdma_ord = RPCRDMA_ORD; +unsigned int svcrdma_ord = 16; /* historical default */ static unsigned int min_ord = 1; -static unsigned int max_ord = 4096; +static unsigned int max_ord = 255; unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS; unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS; static unsigned int min_max_requests = 4; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 19e9c6b33042..3d45015dca97 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -110,15 +110,16 @@ * the RDMA_RECV completion. The SGL should contain full pages up until the * last one. */ -static void rdma_build_arg_xdr(struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *ctxt, - u32 byte_count) +static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *ctxt) { struct page *page; - u32 bc; int sge_no; + u32 len; - /* Swap the page in the SGE with the page in argpages */ + /* The reply path assumes the Call's transport header resides + * in rqstp->rq_pages[0]. + */ page = ctxt->pages[0]; put_page(rqstp->rq_pages[0]); rqstp->rq_pages[0] = page; @@ -126,35 +127,35 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, /* Set up the XDR head */ rqstp->rq_arg.head[0].iov_base = page_address(page); rqstp->rq_arg.head[0].iov_len = - min_t(size_t, byte_count, ctxt->sge[0].length); - rqstp->rq_arg.len = byte_count; - rqstp->rq_arg.buflen = byte_count; + min_t(size_t, ctxt->byte_len, ctxt->sge[0].length); + rqstp->rq_arg.len = ctxt->byte_len; + rqstp->rq_arg.buflen = ctxt->byte_len; /* Compute bytes past head in the SGL */ - bc = byte_count - rqstp->rq_arg.head[0].iov_len; + len = ctxt->byte_len - rqstp->rq_arg.head[0].iov_len; /* If data remains, store it in the pagelist */ - rqstp->rq_arg.page_len = bc; + rqstp->rq_arg.page_len = len; rqstp->rq_arg.page_base = 0; sge_no = 1; - while (bc && sge_no < ctxt->count) { + while (len && sge_no < ctxt->count) { page = ctxt->pages[sge_no]; put_page(rqstp->rq_pages[sge_no]); rqstp->rq_pages[sge_no] = page; - bc -= min_t(u32, bc, ctxt->sge[sge_no].length); + len -= min_t(u32, len, ctxt->sge[sge_no].length); sge_no++; } rqstp->rq_respages = &rqstp->rq_pages[sge_no]; rqstp->rq_next_page = rqstp->rq_respages + 1; /* If not all pages were used from the SGL, free the remaining ones */ - bc = sge_no; + len = sge_no; while (sge_no < ctxt->count) { page = ctxt->pages[sge_no++]; put_page(page); } - ctxt->count = bc; + ctxt->count = len; /* Set up tail */ rqstp->rq_arg.tail[0].iov_base = NULL; @@ -534,10 +535,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) ctxt, rdma_xprt, rqstp); atomic_inc(&rdma_stat_recv); - /* Build up the XDR from the receive buffers. */ - rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len); + svc_rdma_build_arg_xdr(rqstp, ctxt); - /* Decode the RDMA header. */ p = (__be32 *)rqstp->rq_arg.head[0].iov_base; ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg); if (ret < 0) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 9ad12a215b51..96cc8f6597d3 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -69,7 +69,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *); static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt); static int svc_rdma_has_wspace(struct svc_xprt *xprt); -static int svc_rdma_secure_port(struct svc_rqst *); +static void svc_rdma_secure_port(struct svc_rqst *); static void svc_rdma_kill_temp_xprt(struct svc_xprt *); static const struct svc_xprt_ops svc_rdma_ops = { @@ -330,9 +330,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) flushed: if (wc->status != IB_WC_WR_FLUSH_ERR) - pr_warn("svcrdma: receive: %s (%u/0x%x)\n", - ib_wc_status_msg(wc->status), - wc->status, wc->vendor_err); + pr_err("svcrdma: Recv: %s (%u/0x%x)\n", + ib_wc_status_msg(wc->status), + wc->status, wc->vendor_err); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 1); @@ -401,8 +401,10 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, */ set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags); - if (listener) + if (listener) { + strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener"); set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); + } return cma_xprt; } @@ -762,13 +764,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (!svc_rdma_prealloc_ctxts(newxprt)) goto errout; - /* - * Limit ORD based on client limit, local device limit, and - * configured svcrdma limit. - */ - newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord); - newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord); - newxprt->sc_pd = ib_alloc_pd(dev, 0); if (IS_ERR(newxprt->sc_pd)) { dprintk("svcrdma: error creating PD for connect request\n"); @@ -843,15 +838,18 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 0; - conn_param.initiator_depth = newxprt->sc_ord; + conn_param.initiator_depth = min_t(int, newxprt->sc_ord, + dev->attrs.max_qp_init_rd_atom); + if (!conn_param.initiator_depth) { + dprintk("svcrdma: invalid ORD setting\n"); + ret = -EINVAL; + goto errout; + } conn_param.private_data = &pmsg; conn_param.private_data_len = sizeof(pmsg); ret = rdma_accept(newxprt->sc_cm_id, &conn_param); - if (ret) { - dprintk("svcrdma: failed to accept new connection, ret=%d\n", - ret); + if (ret) goto errout; - } dprintk("svcrdma: new connection %p accepted:\n", newxprt); sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; @@ -862,7 +860,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth); dprintk(" rdma_rw_ctxs : %d\n", ctxts); dprintk(" max_requests : %d\n", newxprt->sc_max_requests); - dprintk(" ord : %d\n", newxprt->sc_ord); + dprintk(" ord : %d\n", conn_param.initiator_depth); return &newxprt->sc_xprt; @@ -992,9 +990,9 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) return 1; } -static int svc_rdma_secure_port(struct svc_rqst *rqstp) +static void svc_rdma_secure_port(struct svc_rqst *rqstp) { - return 1; + set_bit(RQ_SECURE, &rqstp->rq_flags); } static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) |