diff options
30 files changed, 299 insertions, 217 deletions
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 96c1d14c18f1..c2a128678e6e 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -187,7 +187,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) continue; if (!rpc_cmp_addr(nlm_addr(block->b_host), addr)) continue; - if (nfs_compare_fh(NFS_FH(file_inode(fl_blocked->fl_file)) ,fh) != 0) + if (nfs_compare_fh(NFS_FH(locks_inode(fl_blocked->fl_file)), fh) != 0) continue; /* Alright, we found a lock. Set the return status * and wake up the caller diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index a2c0dfc6fdc0..d20b92f271c2 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -128,7 +128,7 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) char *nodename = req->a_host->h_rpcclnt->cl_nodename; nlmclnt_next_cookie(&argp->cookie); - memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh)); + memcpy(&lock->fh, NFS_FH(locks_inode(fl->fl_file)), sizeof(struct nfs_fh)); lock->caller = nodename; lock->oh.data = req->a_owner; lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 3701bccab478..74330daeab71 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -405,8 +405,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, __be32 ret; dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n", - file_inode(file->f_file)->i_sb->s_id, - file_inode(file->f_file)->i_ino, + locks_inode(file->f_file)->i_sb->s_id, + locks_inode(file->f_file)->i_ino, lock->fl.fl_type, lock->fl.fl_pid, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end, @@ -511,8 +511,8 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, __be32 ret; dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", - file_inode(file->f_file)->i_sb->s_id, - file_inode(file->f_file)->i_ino, + locks_inode(file->f_file)->i_sb->s_id, + locks_inode(file->f_file)->i_ino, lock->fl.fl_type, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); @@ -566,8 +566,8 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock) int error; dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n", - file_inode(file->f_file)->i_sb->s_id, - file_inode(file->f_file)->i_ino, + locks_inode(file->f_file)->i_sb->s_id, + locks_inode(file->f_file)->i_ino, lock->fl.fl_pid, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); @@ -595,8 +595,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l int status = 0; dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n", - file_inode(file->f_file)->i_sb->s_id, - file_inode(file->f_file)->i_ino, + locks_inode(file->f_file)->i_sb->s_id, + locks_inode(file->f_file)->i_ino, lock->fl.fl_pid, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 4ec3d6e03e76..899360ba3b84 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -44,7 +44,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f) static inline void nlm_debug_print_file(char *msg, struct nlm_file *file) { - struct inode *inode = file_inode(file->f_file); + struct inode *inode = locks_inode(file->f_file); dprintk("lockd: %s %s/%ld\n", msg, inode->i_sb->s_id, inode->i_ino); @@ -414,7 +414,7 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file) { struct super_block *sb = datap; - return sb == file_inode(file->f_file)->i_sb; + return sb == locks_inode(file->f_file)->i_sb; } /** diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 36358d435cb0..426f55005697 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -102,6 +102,7 @@ struct nfsd_net { time_t nfsd4_lease; time_t nfsd4_grace; + bool somebody_reclaimed; bool nfsd_net_up; bool lockd_up; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 6259a4b8579f..9eb8086ea841 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -202,7 +202,8 @@ nfsd3_proc_write(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; - nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt); + nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, + &argp->first, cnt); if (!nvecs) RETURN_STATUS(nfserr_io); nfserr = nfsd_write(rqstp, &resp->fh, argp->offset, @@ -289,6 +290,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) RETURN_STATUS(nfserr_nametoolong); argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, + page_address(rqstp->rq_arg.pages[0]), argp->tlen); if (IS_ERR(argp->tname)) RETURN_STATUS(nfserrno(PTR_ERR(argp->tname))); @@ -302,6 +304,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) fh_init(&resp->fh, NFS3_FHSIZE); nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, argp->tname, &resp->fh); + kfree(argp->tname); RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 1f04d2a70d25..601bf33c26a0 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -746,30 +746,17 @@ static int max_cb_time(struct net *net) return max(nn->nfsd4_lease/10, (time_t)1) * HZ; } -static struct rpc_cred *callback_cred; - -int set_callback_cred(void) -{ - if (callback_cred) - return 0; - callback_cred = rpc_lookup_machine_cred("nfs"); - if (!callback_cred) - return -ENOMEM; - return 0; -} - -void cleanup_callback_cred(void) -{ - if (callback_cred) { - put_rpccred(callback_cred); - callback_cred = NULL; - } -} - static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) { if (clp->cl_minorversion == 0) { - return get_rpccred(callback_cred); + char *principal = clp->cl_cred.cr_targ_princ ? + clp->cl_cred.cr_targ_princ : "nfs"; + struct rpc_cred *cred; + + cred = rpc_lookup_machine_cred(principal); + if (!IS_ERR(cred)) + get_rpccred(cred); + return cred; } else { struct rpc_auth *auth = client->cl_auth; struct auth_cred acred = {}; @@ -980,6 +967,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback break; case -ESERVERFAULT: ++session->se_cb_seq_nr; + /* Fall through */ case 1: case -NFS4ERR_BADSESSION: nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status); diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 228faf00a594..2b36aa037ce0 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -133,27 +133,20 @@ void nfsd4_setup_layout_type(struct svc_export *exp) if (!(exp->ex_flags & NFSEXP_PNFS)) return; - /* - * If flex file is configured, use it by default. Otherwise - * check if the file system supports exporting a block-like layout. - * If the block device supports reservations prefer the SCSI layout, - * otherwise advertise the block layout. - */ #ifdef CONFIG_NFSD_FLEXFILELAYOUT exp->ex_layout_types |= 1 << LAYOUT_FLEX_FILES; #endif #ifdef CONFIG_NFSD_BLOCKLAYOUT - /* overwrite flex file layout selection if needed */ if (sb->s_export_op->get_uuid && sb->s_export_op->map_blocks && sb->s_export_op->commit_blocks) exp->ex_layout_types |= 1 << LAYOUT_BLOCK_VOLUME; #endif #ifdef CONFIG_NFSD_SCSILAYOUT - /* overwrite block layout selection if needed */ if (sb->s_export_op->map_blocks && sb->s_export_op->commit_blocks && - sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops) + sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops && + blk_queue_scsi_passthrough(sb->s_bdev->bd_disk->queue)) exp->ex_layout_types |= 1 << LAYOUT_SCSI; #endif } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5d99e8810b85..b7bc6e1a85ac 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -354,6 +354,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct svc_fh *resfh = NULL; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); + bool reclaim = false; dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n", (int)open->op_fname.len, open->op_fname.data, @@ -424,6 +425,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + reclaim = true; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: status = do_open_fhandle(rqstp, cstate, open); @@ -452,6 +454,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, WARN(status && open->op_created, "nfsd4_process_open2 failed to open newly-created file! status=%u\n", be32_to_cpu(status)); + if (reclaim && !status) + nn->somebody_reclaimed = true; out: if (resfh && resfh != &cstate->current_fh) { fh_dup2(&cstate->current_fh, resfh); @@ -982,24 +986,6 @@ out: return status; } -static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) -{ - int i = 1; - int buflen = write->wr_buflen; - - vec[0].iov_base = write->wr_head.iov_base; - vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len); - buflen -= vec[0].iov_len; - - while (buflen) { - vec[i].iov_base = page_address(write->wr_pagelist[i - 1]); - vec[i].iov_len = min_t(int, PAGE_SIZE, buflen); - buflen -= vec[i].iov_len; - i++; - } - return i; -} - static __be32 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -1027,7 +1013,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_how_written = write->wr_stable_how; gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp)); - nvecs = fill_in_write_vector(rqstp->rq_vec, write); + nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist, + &write->wr_head, write->wr_buflen); + if (!nvecs) + return nfserr_io; WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp, @@ -1599,7 +1588,7 @@ static const char *nfsd4_op_name(unsigned opnum); */ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) { - struct nfsd4_op *op = &args->ops[0]; + struct nfsd4_op *first_op = &args->ops[0]; /* These ordering requirements don't apply to NFSv4.0: */ if (args->minorversion == 0) @@ -1607,12 +1596,17 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) /* This is weird, but OK, not our problem: */ if (args->opcnt == 0) return nfs_ok; - if (op->status == nfserr_op_illegal) + if (first_op->status == nfserr_op_illegal) return nfs_ok; - if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP)) + if (!(nfsd4_ops[first_op->opnum].op_flags & ALLOWED_AS_FIRST_OP)) return nfserr_op_not_in_session; - if (op->opnum == OP_SEQUENCE) + if (first_op->opnum == OP_SEQUENCE) return nfs_ok; + /* + * So first_op is something allowed outside a session, like + * EXCHANGE_ID; but then it has to be the only op in the + * compound: + */ if (args->opcnt != 1) return nfserr_not_only_op; return nfs_ok; @@ -1726,6 +1720,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (status) { op = &args->ops[0]; op->status = status; + resp->opcnt = 1; goto encode_op; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 857141446d6b..b0ca0efd2875 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1979,8 +1979,10 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source) target->cr_principal = kstrdup(source->cr_principal, GFP_KERNEL); target->cr_raw_principal = kstrdup(source->cr_raw_principal, GFP_KERNEL); - if ((source->cr_principal && ! target->cr_principal) || - (source->cr_raw_principal && ! target->cr_raw_principal)) + target->cr_targ_princ = kstrdup(source->cr_targ_princ, GFP_KERNEL); + if ((source->cr_principal && !target->cr_principal) || + (source->cr_raw_principal && !target->cr_raw_principal) || + (source->cr_targ_princ && !target->cr_targ_princ)) return -ENOMEM; target->cr_flavor = source->cr_flavor; @@ -2057,6 +2059,7 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) || (!gid_eq(cr1->cr_gid, cr2->cr_gid)) || !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) return false; + /* XXX: check that cr_targ_princ fields match ? */ if (cr1->cr_principal == cr2->cr_principal) return true; if (!cr1->cr_principal || !cr2->cr_principal) @@ -2956,18 +2959,18 @@ out_no_session: return status; } -static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) +static bool nfsd4_compound_in_session(struct nfsd4_compound_state *cstate, struct nfs4_sessionid *sid) { - if (!session) + if (!cstate->session) return false; - return !memcmp(sid, &session->se_sessionid, sizeof(*sid)); + return !memcmp(sid, &cstate->session->se_sessionid, sizeof(*sid)); } __be32 nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - struct nfsd4_destroy_session *sessionid = &u->destroy_session; + struct nfs4_sessionid *sessionid = &u->destroy_session.sessionid; struct nfsd4_session *ses; __be32 status; int ref_held_by_me = 0; @@ -2975,14 +2978,14 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate, struct nfsd_net *nn = net_generic(net, nfsd_net_id); status = nfserr_not_only_op; - if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { + if (nfsd4_compound_in_session(cstate, sessionid)) { if (!nfsd4_last_compound_op(r)) goto out; ref_held_by_me++; } - dump_sessionid(__func__, &sessionid->sessionid); + dump_sessionid(__func__, sessionid); spin_lock(&nn->client_lock); - ses = find_in_sessionid_hashtbl(&sessionid->sessionid, net, &status); + ses = find_in_sessionid_hashtbl(sessionid, net, &status); if (!ses) goto out_client_lock; status = nfserr_wrong_cred; @@ -3945,9 +3948,9 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) /* * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease - * callback (and since the lease code is serialized by the kernel - * lock) we know the server hasn't removed the lease yet, we know - * it's safe to take a reference. + * callback (and since the lease code is serialized by the + * i_lock) we know the server hasn't removed the lease yet, and + * we know it's safe to take a reference. */ refcount_inc(&dp->dl_stid.sc_count); nfsd4_run_cb(&dp->dl_recall); @@ -4693,6 +4696,28 @@ nfsd4_end_grace(struct nfsd_net *nn) */ } +/* + * If we've waited a lease period but there are still clients trying to + * reclaim, wait a little longer to give them a chance to finish. + */ +static bool clients_still_reclaiming(struct nfsd_net *nn) +{ + unsigned long now = get_seconds(); + unsigned long double_grace_period_end = nn->boot_time + + 2 * nn->nfsd4_lease; + + if (!nn->somebody_reclaimed) + return false; + nn->somebody_reclaimed = false; + /* + * If we've given them *two* lease times to reclaim, and they're + * still not done, give up: + */ + if (time_after(now, double_grace_period_end)) + return false; + return true; +} + static time_t nfs4_laundromat(struct nfsd_net *nn) { @@ -4706,6 +4731,11 @@ nfs4_laundromat(struct nfsd_net *nn) time_t t, new_timeo = nn->nfsd4_lease; dprintk("NFSD: laundromat service - starting\n"); + + if (clients_still_reclaiming(nn)) { + new_timeo = 0; + goto out; + } nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); spin_lock(&nn->client_lock); @@ -4803,7 +4833,7 @@ nfs4_laundromat(struct nfsd_net *nn) posix_unblock_lock(&nbl->nbl_lock); free_blocked_lock(nbl); } - +out: new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); return new_timeo; } @@ -6053,6 +6083,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, case 0: /* success! */ nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); status = 0; + if (lock->lk_reclaim) + nn->somebody_reclaimed = true; break; case FILE_LOCK_DEFERRED: nbl = NULL; @@ -6293,7 +6325,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) return status; } - inode = file_inode(filp); + inode = locks_inode(filp); flctx = inode->i_flctx; if (flctx && !list_empty_careful(&flctx->flc_posix)) { @@ -7199,14 +7231,10 @@ nfs4_state_start(void) { int ret; - ret = set_callback_cred(); - if (ret) - return ret; - laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); if (laundry_wq == NULL) { ret = -ENOMEM; - goto out_cleanup_cred; + goto out; } ret = nfsd4_create_callback_queue(); if (ret) @@ -7217,8 +7245,7 @@ nfs4_state_start(void) out_free_laundry: destroy_workqueue(laundry_wq); -out_cleanup_cred: - cleanup_callback_cred(); +out: return ret; } @@ -7255,7 +7282,6 @@ nfs4_state_shutdown(void) { destroy_workqueue(laundry_wq); nfsd4_destroy_callback_queue(); - cleanup_callback_cred(); } static void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a96843c59fc1..418fa9c78186 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1390,10 +1390,8 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, p += XDR_QUADLEN(dummy); } - /* ssp_window and ssp_num_gss_handles */ + /* ignore ssp_window and ssp_num_gss_handles: */ READ_BUF(8); - dummy = be32_to_cpup(p++); - dummy = be32_to_cpup(p++); break; default: goto xdr_error; @@ -2006,6 +2004,31 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, return p; } +/* + * ctime (in NFSv4, time_metadata) is not writeable, and the client + * doesn't really care what resolution could theoretically be stored by + * the filesystem. + * + * The client cares how close together changes can be while still + * guaranteeing ctime changes. For most filesystems (which have + * timestamps with nanosecond fields) that is limited by the resolution + * of the time returned from current_time() (which I'm assuming to be + * 1/HZ). + */ +static __be32 *encode_time_delta(__be32 *p, struct inode *inode) +{ + struct timespec ts; + u32 ns; + + ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran); + ts = ns_to_timespec(ns); + + p = xdr_encode_hyper(p, ts.tv_sec); + *p++ = cpu_to_be32(ts.tv_nsec); + + return p; +} + static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c) { *p++ = cpu_to_be32(c->atomic); @@ -2797,9 +2820,7 @@ out_acl: p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(1); - *p++ = cpu_to_be32(0); + p = encode_time_delta(p, d_inode(dentry)); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { p = xdr_reserve_space(xdr, 12); @@ -2868,6 +2889,16 @@ out_acl: goto out; } + if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) { + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out_resource; + if (IS_I_VERSION(d_inode(dentry))) + *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR); + else + *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA); + } + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { status = nfsd4_encode_security_label(xdr, rqstp, context, contextlen); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index d107b4426f7e..7fb9f7c667b1 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -73,7 +73,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size); #endif -static ssize_t (*write_op[])(struct file *, char *, size_t) = { +static ssize_t (*const write_op[])(struct file *, char *, size_t) = { [NFSD_Fh] = write_filehandle, [NFSD_FO_UnlockIP] = write_unlock_ip, [NFSD_FO_UnlockFS] = write_unlock_fs, @@ -1237,8 +1237,9 @@ static __net_init int nfsd_init_net(struct net *net) retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; - nn->nfsd4_lease = 90; /* default lease time */ - nn->nfsd4_grace = 90; + nn->nfsd4_lease = 45; /* default lease time */ + nn->nfsd4_grace = 45; + nn->somebody_reclaimed = false; nn->clverifier_counter = prandom_u32(); nn->clientid_counter = prandom_u32(); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 3fce905d0365..066899929863 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -360,6 +360,7 @@ void nfsd_lockd_shutdown(void); #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ + FATTR4_WORD2_CHANGE_ATTR_TYPE | \ FATTR4_WORD2_MODE_UMASK | \ NFSD4_2_SECURITY_ATTRS) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index a008e7634181..b319080288c3 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -451,7 +451,7 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp) switch (fsid_type) { case FSID_DEV: if (!old_valid_dev(exp_sb(exp)->s_dev)) - return 0; + return false; /* FALL THROUGH */ case FSID_MAJOR_MINOR: case FSID_ENCODE_DEV: @@ -461,13 +461,13 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp) case FSID_UUID8: case FSID_UUID16: if (!is_root_export(exp)) - return 0; + return false; /* fall through */ case FSID_UUID4_INUM: case FSID_UUID16_INUM: return exp->ex_uuid != NULL; } - return 1; + return true; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index f107f9fa8e15..0d20fd161225 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -218,7 +218,8 @@ nfsd_proc_write(struct svc_rqst *rqstp) SVCFH_fmt(&argp->fh), argp->len, argp->offset); - nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt); + nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, + &argp->first, cnt); if (!nvecs) return nfserr_io; nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), @@ -453,6 +454,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) return nfserr_nametoolong; argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, + page_address(rqstp->rq_arg.pages[0]), argp->tlen); if (IS_ERR(argp->tname)) return nfserrno(PTR_ERR(argp->tname)); @@ -465,6 +467,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, argp->tname, &newfh); + kfree(argp->tname); fh_put(&argp->ffh); fh_put(&newfh); return nfserr; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index f3772ea8ba0d..0b15dac7e609 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -617,8 +617,6 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn); extern __be32 nfs4_check_open_reclaim(clientid_t *clid, struct nfsd4_compound_state *cstate, struct nfsd_net *nn); -extern int set_callback_cred(void); -extern void cleanup_callback_cred(void); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 4fd95dbeb52f..b065ef406770 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -299,7 +299,7 @@ int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr); static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) { - return file_inode(file->f_file); + return locks_inode(file->f_file); } static inline int __nlm_privileged_request4(const struct sockaddr *sap) @@ -359,7 +359,7 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) static inline int nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) { - return file_inode(fl1->fl_file) == file_inode(fl2->fl_file) + return locks_inode(fl1->fl_file) == locks_inode(fl2->fl_file) && fl1->fl_pid == fl2->fl_pid && fl1->fl_owner == fl2->fl_owner && fl1->fl_start == fl2->fl_start diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 57ffaa20d564..0877ed333733 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -374,6 +374,13 @@ enum lock_type4 { NFS4_WRITEW_LT = 4 }; +enum change_attr_type4 { + NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0, + NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1, + NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2, + NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3, + NFS4_CHANGE_TYPE_IS_UNDEFINED = 4 +}; /* Mandatory Attributes */ #define FATTR4_WORD0_SUPPORTED_ATTRS (1UL << 0) @@ -441,6 +448,7 @@ enum lock_type4 { #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) +#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) #define FATTR4_WORD2_MODE_UMASK (1UL << 17) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 574368e8a16f..73e130a840ce 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -496,9 +496,11 @@ void svc_reserve(struct svc_rqst *rqstp, int space); struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); char * svc_print_addr(struct svc_rqst *, char *, size_t); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, + struct page **pages, struct kvec *first, size_t total); char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, - struct kvec *first, size_t total); + struct kvec *first, void *p, + size_t total); #define RPC_MAX_ADDRBUFLEN (63U) diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 7c3656505847..04e404a07882 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -31,6 +31,7 @@ struct svc_cred { /* name of form servicetype@hostname, passed down by * rpc.svcgssd, or computed from the above: */ char *cr_principal; + char *cr_targ_princ; struct gss_api_mech *cr_gss_mech; }; @@ -39,6 +40,7 @@ static inline void init_svc_cred(struct svc_cred *cred) cred->cr_group_info = NULL; cred->cr_raw_principal = NULL; cred->cr_principal = NULL; + cred->cr_targ_princ = NULL; cred->cr_gss_mech = NULL; } @@ -48,6 +50,7 @@ static inline void free_svc_cred(struct svc_cred *cred) put_group_info(cred->cr_group_info); kfree(cred->cr_raw_principal); kfree(cred->cr_principal); + kfree(cred->cr_targ_princ); gss_mech_put(cred->cr_gss_mech); init_svc_cred(cred); } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 0fc397fae42b..962afb8912d5 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -284,7 +284,12 @@ err: return p; } -#define UPCALL_BUF_LEN 128 +/* XXX: Need some documentation about why UPCALL_BUF_LEN is so small. + * Is user space expecting no more than UPCALL_BUF_LEN bytes? + * Note that there are now _two_ NI_MAXHOST sized data items + * being passed in this string. + */ +#define UPCALL_BUF_LEN 256 struct gss_upcall_msg { refcount_t count; @@ -456,18 +461,44 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, buflen -= len; p += len; gss_msg->msg.len = len; + + /* + * target= is a full service principal that names the remote + * identity that we are authenticating to. + */ if (target_name) { len = scnprintf(p, buflen, "target=%s ", target_name); buflen -= len; p += len; gss_msg->msg.len += len; } - if (service_name != NULL) { - len = scnprintf(p, buflen, "service=%s ", service_name); + + /* + * gssd uses service= and srchost= to select a matching key from + * the system's keytab to use as the source principal. + * + * service= is the service name part of the source principal, + * or "*" (meaning choose any). + * + * srchost= is the hostname part of the source principal. When + * not provided, gssd uses the local hostname. + */ + if (service_name) { + char *c = strchr(service_name, '@'); + + if (!c) + len = scnprintf(p, buflen, "service=%s ", + service_name); + else + len = scnprintf(p, buflen, + "service=%.*s srchost=%s ", + (int)(c - service_name), + service_name, c + 1); buflen -= len; p += len; gss_msg->msg.len += len; } + if (mech->gm_upcall_enctypes) { len = scnprintf(p, buflen, "enctypes=%s ", mech->gm_upcall_enctypes); diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 8654494b4d0a..d31f868b643c 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -169,7 +169,7 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, struct scatterlist sg[1]; int err = -1; u8 *checksumdata; - u8 rc4salt[4]; + u8 *rc4salt; struct crypto_ahash *md5; struct crypto_ahash *hmac_md5; struct ahash_request *req; @@ -183,14 +183,18 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, return GSS_S_FAILURE; } + rc4salt = kmalloc_array(4, sizeof(*rc4salt), GFP_NOFS); + if (!rc4salt) + return GSS_S_FAILURE; + if (arcfour_hmac_md5_usage_to_salt(usage, rc4salt)) { dprintk("%s: invalid usage value %u\n", __func__, usage); - return GSS_S_FAILURE; + goto out_free_rc4salt; } checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); if (!checksumdata) - return GSS_S_FAILURE; + goto out_free_rc4salt; md5 = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(md5)) @@ -258,6 +262,8 @@ out_free_md5: crypto_free_ahash(md5); out_free_cksum: kfree(checksumdata); +out_free_rc4salt: + kfree(rc4salt); return err ? GSS_S_FAILURE : 0; } @@ -373,7 +379,6 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, struct scatterlist sg[1]; int err = -1; u8 *checksumdata; - unsigned int checksumlen; if (kctx->gk5e->keyed_cksum == 0) { dprintk("%s: expected keyed hash for %s\n", @@ -393,7 +398,6 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) goto out_free_cksum; - checksumlen = crypto_ahash_digestsize(tfm); req = ahash_request_alloc(tfm, GFP_NOFS); if (!req) diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index a737c2da0837..9a1347fec6f4 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -440,7 +440,6 @@ static u32 gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, struct page **pages) { - int blocksize; u8 *ptr, *plainhdr; s32 now; u8 flags = 0x00; @@ -473,7 +472,6 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, *ptr++ = 0xff; be16ptr = (__be16 *)ptr; - blocksize = crypto_skcipher_blocksize(kctx->acceptor_enc); *be16ptr++ = 0; /* "inner" token header always uses 0 for RRC */ *be16ptr++ = 0; diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 1c7c49dbf8ba..73dcda060335 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -234,6 +234,35 @@ static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg) return 0; } +static char *gssp_stringify(struct xdr_netobj *netobj) +{ + return kstrndup(netobj->data, netobj->len, GFP_KERNEL); +} + +static void gssp_hostbased_service(char **principal) +{ + char *c; + + if (!*principal) + return; + + /* terminate and remove realm part */ + c = strchr(*principal, '@'); + if (c) { + *c = '\0'; + + /* change service-hostname delimiter */ + c = strchr(*principal, '/'); + if (c) + *c = '@'; + } + if (!c) { + /* not a service principal */ + kfree(*principal); + *principal = NULL; + } +} + /* * Public functions */ @@ -262,6 +291,7 @@ int gssp_accept_sec_context_upcall(struct net *net, */ .exported_context_token.len = GSSX_max_output_handle_sz, .mech.len = GSS_OID_MAX_LEN, + .targ_name.display_name.len = GSSX_max_princ_sz, .src_name.display_name.len = GSSX_max_princ_sz }; struct gssx_res_accept_sec_context res = { @@ -275,6 +305,7 @@ int gssp_accept_sec_context_upcall(struct net *net, .rpc_cred = NULL, /* FIXME ? */ }; struct xdr_netobj client_name = { 0 , NULL }; + struct xdr_netobj target_name = { 0, NULL }; int ret; if (data->in_handle.len != 0) @@ -285,8 +316,6 @@ int gssp_accept_sec_context_upcall(struct net *net, if (ret) return ret; - /* use nfs/ for targ_name ? */ - ret = gssp_call(net, &msg); gssp_free_receive_pages(&arg); @@ -304,6 +333,7 @@ int gssp_accept_sec_context_upcall(struct net *net, kfree(rctxh.mech.data); } client_name = rctxh.src_name.display_name; + target_name = rctxh.targ_name.display_name; } if (res.options.count == 1) { @@ -325,32 +355,22 @@ int gssp_accept_sec_context_upcall(struct net *net, } /* convert to GSS_NT_HOSTBASED_SERVICE form and set into creds */ - if (data->found_creds && client_name.data != NULL) { - char *c; - - data->creds.cr_raw_principal = kstrndup(client_name.data, - client_name.len, GFP_KERNEL); - - data->creds.cr_principal = kstrndup(client_name.data, - client_name.len, GFP_KERNEL); - if (data->creds.cr_principal) { - /* terminate and remove realm part */ - c = strchr(data->creds.cr_principal, '@'); - if (c) { - *c = '\0'; - - /* change service-hostname delimiter */ - c = strchr(data->creds.cr_principal, '/'); - if (c) *c = '@'; - } - if (!c) { - /* not a service principal */ - kfree(data->creds.cr_principal); - data->creds.cr_principal = NULL; - } + if (data->found_creds) { + if (client_name.data) { + data->creds.cr_raw_principal = + gssp_stringify(&client_name); + data->creds.cr_principal = + gssp_stringify(&client_name); + gssp_hostbased_service(&data->creds.cr_principal); + } + if (target_name.data) { + data->creds.cr_targ_princ = + gssp_stringify(&target_name); + gssp_hostbased_service(&data->creds.cr_targ_princ); } } kfree(client_name.data); + kfree(target_name.data); return ret; } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 30a4226baf03..d13e05f1a990 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1537,16 +1537,16 @@ EXPORT_SYMBOL_GPL(svc_max_payload); /** * svc_fill_write_vector - Construct data argument for VFS write call * @rqstp: svc_rqst to operate on + * @pages: list of pages containing data payload * @first: buffer containing first section of write payload * @total: total number of bytes of write payload * - * Returns the number of elements populated in the data argument array. + * Fills in rqstp::rq_vec, and returns the number of elements. */ -unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first, - size_t total) +unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages, + struct kvec *first, size_t total) { struct kvec *vec = rqstp->rq_vec; - struct page **pages; unsigned int i; /* Some types of transport can present the write payload @@ -1560,14 +1560,11 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first, ++i; } - WARN_ON_ONCE(rqstp->rq_arg.page_base != 0); - pages = rqstp->rq_arg.pages; while (total) { vec[i].iov_base = page_address(*pages); vec[i].iov_len = min_t(size_t, total, PAGE_SIZE); total -= vec[i].iov_len; ++i; - ++pages; } @@ -1580,65 +1577,48 @@ EXPORT_SYMBOL_GPL(svc_fill_write_vector); * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call * @rqstp: svc_rqst to operate on * @first: buffer containing first section of pathname + * @p: buffer containing remaining section of pathname * @total: total length of the pathname argument * - * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is - * released automatically when @rqstp is recycled. + * The VFS symlink API demands a NUL-terminated pathname in mapped memory. + * Returns pointer to a NUL-terminated string, or an ERR_PTR. Caller must free + * the returned string. */ char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, - size_t total) + void *p, size_t total) { - struct xdr_buf *arg = &rqstp->rq_arg; - struct page **pages; - char *result; - - /* VFS API demands a NUL-terminated pathname. This function - * uses a page from @rqstp as the pathname buffer, to enable - * direct placement. Thus the total buffer size is PAGE_SIZE. - * Space in this buffer for NUL-termination requires that we - * cap the size of the returned symlink pathname just a - * little early. - */ - if (total > PAGE_SIZE - 1) - return ERR_PTR(-ENAMETOOLONG); + size_t len, remaining; + char *result, *dst; - /* Some types of transport can present the pathname entirely - * in rq_arg.pages. If not, then copy the pathname into one - * page. - */ - pages = arg->pages; - WARN_ON_ONCE(arg->page_base != 0); - if (first->iov_base == 0) { - result = page_address(*pages); - result[total] = '\0'; - } else { - size_t len, remaining; - char *dst; + result = kmalloc(total + 1, GFP_KERNEL); + if (!result) + return ERR_PTR(-ESERVERFAULT); - result = page_address(*(rqstp->rq_next_page++)); - dst = result; - remaining = total; + dst = result; + remaining = total; - len = min_t(size_t, total, first->iov_len); + len = min_t(size_t, total, first->iov_len); + if (len) { memcpy(dst, first->iov_base, len); dst += len; remaining -= len; + } - /* No more than one page left */ - if (remaining) { - len = min_t(size_t, remaining, PAGE_SIZE); - memcpy(dst, page_address(*pages), len); - dst += len; - } - - *dst = '\0'; + if (remaining) { + len = min_t(size_t, remaining, PAGE_SIZE); + memcpy(dst, p, len); + dst += len; } - /* Sanity check: we don't allow the pathname argument to + *dst = '\0'; + + /* Sanity check: Linux doesn't allow the pathname argument to * contain a NUL byte. */ - if (strlen(result) != total) + if (strlen(result) != total) { + kfree(result); return ERR_PTR(-EINVAL); + } return result; } EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname); diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 357ba90c382d..134bef6a451e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -94,7 +94,6 @@ static int read_reset_stat(struct ctl_table *table, int write, atomic_set(stat, 0); else { char str_buf[32]; - char *data; int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat)); if (len >= 32) return -EFAULT; @@ -103,7 +102,6 @@ static int read_reset_stat(struct ctl_table *table, int write, *lenp = 0; return 0; } - data = &str_buf[*ppos]; len -= *ppos; if (len > *lenp) len = *lenp; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 2ef75e885411..b24d5b8f2fee 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -365,9 +365,6 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, arg->page_base = 0; arg->buflen = ctxt->rc_byte_len; arg->len = ctxt->rc_byte_len; - - rqstp->rq_respages = &rqstp->rq_pages[0]; - rqstp->rq_next_page = rqstp->rq_respages + 1; } /* This accommodates the largest possible Write chunk, @@ -729,6 +726,12 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) svc_rdma_build_arg_xdr(rqstp, ctxt); + /* Prevent svc_xprt_release from releasing pages in rq_pages + * if we return 0 or an error. + */ + rqstp->rq_respages = rqstp->rq_pages; + rqstp->rq_next_page = rqstp->rq_respages; + p = (__be32 *)rqstp->rq_arg.head[0].iov_base; ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg); if (ret < 0) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 04cb3363172a..dc1951759a8e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -680,6 +680,7 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, struct svc_rdma_read_info *info, __be32 *p) { + unsigned int i; int ret; ret = -EINVAL; @@ -702,6 +703,12 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, info->ri_chunklen += rs_length; } + /* Pages under I/O have been copied to head->rc_pages. + * Prevent their premature release by svc_xprt_release() . + */ + for (i = 0; i < info->ri_readctxt->rc_page_count; i++) + rqstp->rq_pages[i] = NULL; + return ret; } @@ -817,7 +824,6 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head, __be32 *p) { struct svc_rdma_read_info *info; - struct page **page; int ret; /* The request (with page list) is constructed in @@ -844,27 +850,15 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, ret = svc_rdma_build_normal_read_chunk(rqstp, info, p); else ret = svc_rdma_build_pz_read_chunk(rqstp, info, p); - - /* Mark the start of the pages that can be used for the reply */ - if (info->ri_pageoff > 0) - info->ri_pageno++; - rqstp->rq_respages = &rqstp->rq_pages[info->ri_pageno]; - rqstp->rq_next_page = rqstp->rq_respages + 1; - if (ret < 0) - goto out; + goto out_err; ret = svc_rdma_post_chunk_ctxt(&info->ri_cc); - -out: - /* Read sink pages have been moved from rqstp->rq_pages to - * head->rc_arg.pages. Force svc_recv to refill those slots - * in rq_pages. - */ - for (page = rqstp->rq_pages; page < rqstp->rq_respages; page++) - *page = NULL; - if (ret < 0) - svc_rdma_read_info_free(info); + goto out_err; + return 0; + +out_err: + svc_rdma_read_info_free(info); return ret; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index ffef0c508f1a..8602a5f1b515 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -656,7 +656,9 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, ctxt->sc_pages[i] = rqstp->rq_respages[i]; rqstp->rq_respages[i] = NULL; } - rqstp->rq_next_page = rqstp->rq_respages + 1; + + /* Prevent svc_xprt_release from releasing pages in rq_pages */ + rqstp->rq_next_page = rqstp->rq_respages; } /* Prepare the portion of the RPC Reply that will be transmitted diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 547b2cdf1427..2848cafd4a17 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -296,7 +296,6 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.src_addr; - int ret = 0; trace_svcrdma_cm_event(event, sap); @@ -315,7 +314,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, break; } - return ret; + return 0; } static int rdma_cma_handler(struct rdma_cm_id *cma_id, |