diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-26 13:05:26 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-26 13:05:26 -0700 |
commit | c7a2c49ea6c9eebbe44ff2c08b663b2905ee2c13 (patch) | |
tree | 116bf34a5f0d9c6b4598f8775be60c9505f38e4d /include/linux | |
parent | 033078a9afe504ac9e615d10c4b35d634450b637 (diff) | |
parent | 331bc71cb1751d78f6807ad8e6162b07c67cdd1b (diff) |
Merge tag 'nfs-for-4.20-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Highlights include:
Stable fixes:
- Fix the NFSv4.1 r/wsize sanity checking
- Reset the RPC/RDMA credit grant properly after a disconnect
- Fix a missed page unlock after pg_doio()
Features and optimisations:
- Overhaul of the RPC client socket code to eliminate a locking
bottleneck and reduce the latency when transmitting lots of
requests in parallel.
- Allow parallelisation of the RPCSEC_GSS encoding of an RPC request.
- Convert the RPC client socket receive code to use iovec_iter() for
improved efficiency.
- Convert several NFS and RPC lookup operations to use RCU instead of
taking global locks.
- Avoid the need for BH-safe locks in the RPC/RDMA back channel.
Bugfixes and cleanups:
- Fix lock recovery during NFSv4 delegation recalls
- Fix the NFSv4 + NFSv4.1 "lookup revalidate + open file" case.
- Fixes for the RPC connection metrics
- Various RPC client layer cleanups to consolidate stream based
sockets
- RPC/RDMA connection cleanups
- Simplify the RPC/RDMA cleanup after memory operation failures
- Clean ups for NFS v4.2 copy completion and NFSv4 open state
reclaim"
* tag 'nfs-for-4.20-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (97 commits)
SUNRPC: Convert the auth cred cache to use refcount_t
SUNRPC: Convert auth creds to use refcount_t
SUNRPC: Simplify lookup code
SUNRPC: Clean up the AUTH cache code
NFS: change sign of nfs_fh length
sunrpc: safely reallow resvport min/max inversion
nfs: remove redundant call to nfs_context_set_write_error()
nfs: Fix a missed page unlock after pg_doio()
SUNRPC: Fix a compile warning for cmpxchg64()
NFSv4.x: fix lock recovery during delegation recall
SUNRPC: use cmpxchg64() in gss_seq_send64_fetch_and_inc()
xprtrdma: Squelch a sparse warning
xprtrdma: Clean up xprt_rdma_disconnect_inject
xprtrdma: Add documenting comments
xprtrdma: Report when there were zero posted Receives
xprtrdma: Move rb_flags initialization
xprtrdma: Don't disable BH's in backchannel server
xprtrdma: Remove memory address of "ep" from an error message
xprtrdma: Rename rpcrdma_qp_async_error_upcall
xprtrdma: Simplify RPC wake-ups on connect
...
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/nfs_fs.h | 2 | ||||
-rw-r--r-- | include/linux/nfs_fs_sb.h | 3 | ||||
-rw-r--r-- | include/linux/nfs_xdr.h | 28 | ||||
-rw-r--r-- | include/linux/sunrpc/auth.h | 18 | ||||
-rw-r--r-- | include/linux/sunrpc/auth_gss.h | 1 | ||||
-rw-r--r-- | include/linux/sunrpc/bc_xprt.h | 1 | ||||
-rw-r--r-- | include/linux/sunrpc/gss_krb5.h | 3 | ||||
-rw-r--r-- | include/linux/sunrpc/sched.h | 10 | ||||
-rw-r--r-- | include/linux/sunrpc/svc_xprt.h | 1 | ||||
-rw-r--r-- | include/linux/sunrpc/xdr.h | 11 | ||||
-rw-r--r-- | include/linux/sunrpc/xprt.h | 35 | ||||
-rw-r--r-- | include/linux/sunrpc/xprtsock.h | 36 |
12 files changed, 95 insertions, 54 deletions
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a0831e9d19c9..6e0417c02279 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -62,6 +62,7 @@ struct nfs_lock_context { struct nfs_open_context *open_context; fl_owner_t lockowner; atomic_t io_count; + struct rcu_head rcu_head; }; struct nfs4_state; @@ -82,6 +83,7 @@ struct nfs_open_context { struct list_head list; struct nfs4_threshold *mdsthreshold; + struct rcu_head rcu_head; }; struct nfs_open_dir_context { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index bf39d9c92201..0fc0b9135d46 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -228,6 +228,9 @@ struct nfs_server { unsigned short mountd_port; unsigned short mountd_protocol; struct rpc_wait_queue uoc_rpcwaitq; + + /* XDR related information */ + unsigned int read_hdrsize; }; /* Server capabilities */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index bd1c889a9ed9..0e016252cfc6 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -608,8 +608,13 @@ struct nfs_pgio_args { __u32 count; unsigned int pgbase; struct page ** pages; - const u32 * bitmask; /* used by write */ - enum nfs3_stable_how stable; /* used by write */ + union { + unsigned int replen; /* used by read */ + struct { + const u32 * bitmask; /* used by write */ + enum nfs3_stable_how stable; /* used by write */ + }; + }; }; struct nfs_pgio_res { @@ -617,10 +622,16 @@ struct nfs_pgio_res { struct nfs_fattr * fattr; __u32 count; __u32 op_status; - int eof; /* used by read */ - struct nfs_writeverf * verf; /* used by write */ - const struct nfs_server *server; /* used by write */ - + union { + struct { + unsigned int replen; /* used by read */ + int eof; /* used by read */ + }; + struct { + struct nfs_writeverf * verf; /* used by write */ + const struct nfs_server *server; /* used by write */ + }; + }; }; /* @@ -1471,11 +1482,10 @@ struct nfs_pgio_header { const struct nfs_rw_ops *rw_ops; struct nfs_io_completion *io_completion; struct nfs_direct_req *dreq; - spinlock_t lock; - /* fields protected by lock */ + int pnfs_error; int error; /* merge with pnfs_error */ - unsigned long good_bytes; /* boundary of good data */ + unsigned int good_bytes; /* boundary of good data */ unsigned long flags; /* diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 58a6765c1c5e..c4db9424b63b 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -67,7 +67,7 @@ struct rpc_cred { const struct rpc_credops *cr_ops; unsigned long cr_expire; /* when to gc */ unsigned long cr_flags; /* various flags */ - atomic_t cr_count; /* ref count */ + refcount_t cr_count; /* ref count */ kuid_t cr_uid; @@ -100,7 +100,7 @@ struct rpc_auth { * differ from the flavor in * au_ops->au_flavor in gss * case) */ - atomic_t au_count; /* Reference counter */ + refcount_t au_count; /* Reference counter */ struct rpc_cred_cache * au_credcache; /* per-flavor data */ @@ -157,6 +157,7 @@ struct rpc_credops { int (*crkey_timeout)(struct rpc_cred *); bool (*crkey_to_expire)(struct rpc_cred *); char * (*crstringify_acceptor)(struct rpc_cred *); + bool (*crneed_reencode)(struct rpc_task *); }; extern const struct rpc_authops authunix_ops; @@ -192,6 +193,7 @@ __be32 * rpcauth_marshcred(struct rpc_task *, __be32 *); __be32 * rpcauth_checkverf(struct rpc_task *, __be32 *); int rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj); int rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj); +bool rpcauth_xmit_need_reencode(struct rpc_task *task); int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); @@ -204,11 +206,11 @@ bool rpcauth_cred_key_to_expire(struct rpc_auth *, struct rpc_cred *); char * rpcauth_stringify_acceptor(struct rpc_cred *); static inline -struct rpc_cred * get_rpccred(struct rpc_cred *cred) +struct rpc_cred *get_rpccred(struct rpc_cred *cred) { - if (cred != NULL) - atomic_inc(&cred->cr_count); - return cred; + if (cred != NULL && refcount_inc_not_zero(&cred->cr_count)) + return cred; + return NULL; } /** @@ -224,9 +226,7 @@ struct rpc_cred * get_rpccred(struct rpc_cred *cred) static inline struct rpc_cred * get_rpccred_rcu(struct rpc_cred *cred) { - if (atomic_inc_not_zero(&cred->cr_count)) - return cred; - return NULL; + return get_rpccred(cred); } #endif /* __KERNEL__ */ diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 0c9eac351aab..30427b729070 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -70,6 +70,7 @@ struct gss_cl_ctx { refcount_t count; enum rpc_gss_proc gc_proc; u32 gc_seq; + u32 gc_seq_xmit; spinlock_t gc_seq_lock; struct gss_ctx *gc_gss_ctx; struct xdr_netobj gc_wire_ctx; diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 4397a4824c81..28721cf73ec3 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -34,6 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef CONFIG_SUNRPC_BACKCHANNEL struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid); void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); +void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index f6e8ceafafd8..131424cefc6a 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -118,7 +118,8 @@ struct krb5_ctx { u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; }; -extern spinlock_t krb5_seq_lock; +extern u32 gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx); +extern u64 gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx); /* The length of the Kerberos GSS token header */ #define GSS_KRB5_TOK_HDR_LEN (16) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 592653becd91..7b540c066594 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -140,8 +140,9 @@ struct rpc_task_setup { #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 #define RPC_TASK_ACTIVE 2 -#define RPC_TASK_MSG_RECV 3 -#define RPC_TASK_MSG_RECV_WAIT 4 +#define RPC_TASK_NEED_XMIT 3 +#define RPC_TASK_NEED_RECV 4 +#define RPC_TASK_MSG_PIN_WAIT 5 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) @@ -188,7 +189,6 @@ struct rpc_timer { struct rpc_wait_queue { spinlock_t lock; struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */ - pid_t owner; /* process id of last task serviced */ unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char nr; /* # tasks remaining for cookie */ @@ -204,7 +204,6 @@ struct rpc_wait_queue { * from a single cookie. The aim is to improve * performance of NFS operations such as read/write. */ -#define RPC_BATCH_COUNT 16 #define RPC_IS_PRIORITY(q) ((q)->maxpriority > 0) /* @@ -234,6 +233,9 @@ void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq, struct rpc_task *task); void rpc_wake_up_queued_task(struct rpc_wait_queue *, struct rpc_task *); +void rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *, + struct rpc_task *, + int); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq, diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index c3d72066d4b1..6b7a86c4d6e6 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -84,7 +84,6 @@ struct svc_xprt { struct sockaddr_storage xpt_remote; /* remote peer's address */ size_t xpt_remotelen; /* length of address */ char xpt_remotebuf[INET6_ADDRSTRLEN + 10]; - struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ struct list_head xpt_users; /* callbacks on free */ struct net *xpt_net; diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2bd68177a442..43106ffa6788 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -18,6 +18,7 @@ #include <asm/unaligned.h> #include <linux/scatterlist.h> +struct bio_vec; struct rpc_rqst; /* @@ -52,12 +53,14 @@ struct xdr_buf { struct kvec head[1], /* RPC header + non-page data */ tail[1]; /* Appended after page data */ + struct bio_vec *bvec; struct page ** pages; /* Array of pages */ unsigned int page_base, /* Start of page data */ page_len, /* Length of page data */ flags; /* Flags for data disposition */ #define XDRBUF_READ 0x01 /* target of file read */ #define XDRBUF_WRITE 0x02 /* source of file write */ +#define XDRBUF_SPARSE_PAGES 0x04 /* Page array is sparse */ unsigned int buflen, /* Total length of storage buffer */ len; /* Length of XDR encoded message */ @@ -69,6 +72,8 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) buf->head[0].iov_base = start; buf->head[0].iov_len = len; buf->tail[0].iov_len = 0; + buf->bvec = NULL; + buf->pages = NULL; buf->page_len = 0; buf->flags = 0; buf->len = 0; @@ -115,6 +120,9 @@ __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); void xdr_terminate_string(struct xdr_buf *, const u32); +size_t xdr_buf_pagecount(struct xdr_buf *buf); +int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp); +void xdr_free_bvec(struct xdr_buf *buf); static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len) { @@ -177,10 +185,7 @@ struct xdr_skb_reader { typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len); -size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len); extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); -extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, - struct xdr_skb_reader *, xdr_skb_read_actor); extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32); extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 336fd1a19cca..a4ab4f8d9140 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -82,7 +82,14 @@ struct rpc_rqst { struct page **rq_enc_pages; /* scratch pages for use by gss privacy code */ void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ - struct list_head rq_list; + + union { + struct list_head rq_list; /* Slot allocation list */ + struct rb_node rq_recv; /* Receive queue */ + }; + + struct list_head rq_xmit; /* Send queue */ + struct list_head rq_xmit2; /* Send queue */ void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize; @@ -103,6 +110,7 @@ struct rpc_rqst { /* A cookie used to track the state of the transport connection */ + atomic_t rq_pin; /* * Partial send handling @@ -133,7 +141,8 @@ struct rpc_xprt_ops { void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); - int (*send_request)(struct rpc_task *task); + void (*prepare_request)(struct rpc_rqst *req); + int (*send_request)(struct rpc_rqst *req); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_request)(struct rpc_task *task); @@ -234,9 +243,12 @@ struct rpc_xprt { */ spinlock_t transport_lock; /* lock transport info */ spinlock_t reserve_lock; /* lock slot table */ - spinlock_t recv_lock; /* lock receive list */ + spinlock_t queue_lock; /* send/receive queue lock */ u32 xid; /* Next XID value to use */ struct rpc_task * snd_task; /* Task blocked in send */ + + struct list_head xmit_queue; /* Send queue */ + struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ @@ -248,7 +260,8 @@ struct rpc_xprt { struct list_head bc_pa_list; /* List of preallocated * backchannel rpc_rqst's */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */ - struct list_head recv; + + struct rb_root recv_queue; /* Receive queue */ struct { unsigned long bind_count, /* total number of binds */ @@ -325,15 +338,18 @@ struct xprt_class { struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); -void xprt_request_init(struct rpc_task *task); void xprt_retry_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req); -void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); +void xprt_request_prepare(struct rpc_rqst *req); bool xprt_prepare_transmit(struct rpc_task *task); +void xprt_request_enqueue_transmit(struct rpc_task *task); +void xprt_request_enqueue_receive(struct rpc_task *task); +void xprt_request_wait_receive(struct rpc_task *task); +bool xprt_request_need_retransmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); void xprt_end_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); @@ -373,8 +389,8 @@ int xprt_load_transport(const char *); void xprt_set_retrans_timeout_def(struct rpc_task *task); void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); -void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action); -void xprt_write_space(struct rpc_xprt *xprt); +void xprt_wait_for_buffer_space(struct rpc_xprt *xprt); +bool xprt_write_space(struct rpc_xprt *xprt); void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); void xprt_update_rtt(struct rpc_task *task); @@ -382,6 +398,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied); void xprt_pin_rqst(struct rpc_rqst *req); void xprt_unpin_rqst(struct rpc_rqst *req); void xprt_release_rqst_cong(struct rpc_task *task); +bool xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); @@ -400,6 +417,8 @@ void xprt_unlock_connect(struct rpc_xprt *, void *); #define XPRT_BINDING (5) #define XPRT_CLOSING (6) #define XPRT_CONGESTED (9) +#define XPRT_CWND_WAIT (10) +#define XPRT_WRITE_SPACE (11) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index ae0f99b9b965..458bfe0137f5 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -30,15 +30,25 @@ struct sock_xprt { /* * State of TCP reply receive */ - __be32 tcp_fraghdr, - tcp_xid, - tcp_calldir; + struct { + struct { + __be32 fraghdr, + xid, + calldir; + } __attribute__((packed)); - u32 tcp_offset, - tcp_reclen; + u32 offset, + len; - unsigned long tcp_copied, - tcp_flags; + unsigned long copied; + } recv; + + /* + * State of TCP transmit queue + */ + struct { + u32 offset; + } xmit; /* * Connection of transports @@ -68,20 +78,8 @@ struct sock_xprt { }; /* - * TCP receive state flags - */ -#define TCP_RCV_LAST_FRAG (1UL << 0) -#define TCP_RCV_COPY_FRAGHDR (1UL << 1) -#define TCP_RCV_COPY_XID (1UL << 2) -#define TCP_RCV_COPY_DATA (1UL << 3) -#define TCP_RCV_READ_CALLDIR (1UL << 4) -#define TCP_RCV_COPY_CALLDIR (1UL << 5) - -/* * TCP RPC flags */ -#define TCP_RPC_REPLY (1UL << 6) - #define XPRT_SOCK_CONNECTING 1U #define XPRT_SOCK_DATA_READY (2) #define XPRT_SOCK_UPD_TIMEOUT (3) |