diff options
Diffstat (limited to 'net/sunrpc')
-rw-r--r-- | net/sunrpc/auth_gss/svcauth_gss.c | 12 | ||||
-rw-r--r-- | net/sunrpc/svc.c | 49 | ||||
-rw-r--r-- | net/sunrpc/svc_xprt.c | 33 | ||||
-rw-r--r-- | net/sunrpc/svcauth_unix.c | 23 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 174 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma.c | 21 |
6 files changed, 243 insertions, 69 deletions
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 9c843974bb48..c4a566737085 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -257,11 +257,11 @@ static int rsi_parse(struct cache_detail *cd, rsii.h.flags = 0; /* expiry */ - expiry = get_expiry(&mesg); - status = -EINVAL; - if (expiry == 0) + status = get_expiry(&mesg, &expiry); + if (status) goto out; + status = -EINVAL; /* major/minor */ len = qword_get(&mesg, buf, mlen); if (len <= 0) @@ -483,11 +483,11 @@ static int rsc_parse(struct cache_detail *cd, rsci.h.flags = 0; /* expiry */ - expiry = get_expiry(&mesg); - status = -EINVAL; - if (expiry == 0) + status = get_expiry(&mesg, &expiry); + if (status) goto out; + status = -EINVAL; rscp = rsc_lookup(cd, &rsci); if (!rscp) goto out; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index fea7ce8fba14..26367cf4c17a 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -649,6 +649,8 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp) return rqstp; + pagevec_init(&rqstp->rq_pvec); + __set_bit(RQ_BUSY, &rqstp->rq_flags); rqstp->rq_server = serv; rqstp->rq_pool = pool; @@ -842,9 +844,21 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads); * * When replacing a page in rq_pages, batch the release of the * replaced pages to avoid hammering the page allocator. + * + * Return values: + * %true: page replaced + * %false: array bounds checking failed */ -void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) +bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) { + struct page **begin = rqstp->rq_pages; + struct page **end = &rqstp->rq_pages[RPCSVC_MAXPAGES]; + + if (unlikely(rqstp->rq_next_page < begin || rqstp->rq_next_page > end)) { + trace_svc_replace_page_err(rqstp); + return false; + } + if (*rqstp->rq_next_page) { if (!pagevec_space(&rqstp->rq_pvec)) __pagevec_release(&rqstp->rq_pvec); @@ -853,9 +867,28 @@ void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) get_page(page); *(rqstp->rq_next_page++) = page; + return true; } EXPORT_SYMBOL_GPL(svc_rqst_replace_page); +/** + * svc_rqst_release_pages - Release Reply buffer pages + * @rqstp: RPC transaction context + * + * Release response pages that might still be in flight after + * svc_send, and any spliced filesystem-owned pages. + */ +void svc_rqst_release_pages(struct svc_rqst *rqstp) +{ + int i, count = rqstp->rq_next_page - rqstp->rq_respages; + + if (count) { + release_pages(rqstp->rq_respages, count); + for (i = 0; i < count; i++) + rqstp->rq_respages[i] = NULL; + } +} + /* * Called from a server thread as it's exiting. Caller must hold the "service * mutex" for the service. @@ -863,6 +896,7 @@ EXPORT_SYMBOL_GPL(svc_rqst_replace_page); void svc_rqst_free(struct svc_rqst *rqstp) { + pagevec_release(&rqstp->rq_pvec); svc_release_buffer(rqstp); if (rqstp->rq_scratch_page) put_page(rqstp->rq_scratch_page); @@ -1431,11 +1465,12 @@ err_system_err: goto sendit; } -/* - * Process the RPC request. +/** + * svc_process - Execute one RPC transaction + * @rqstp: RPC transaction context + * */ -int -svc_process(struct svc_rqst *rqstp) +void svc_process(struct svc_rqst *rqstp) { struct kvec *resv = &rqstp->rq_res.head[0]; __be32 *p; @@ -1471,7 +1506,8 @@ svc_process(struct svc_rqst *rqstp) if (!svc_process_common(rqstp)) goto out_drop; - return svc_send(rqstp); + svc_send(rqstp); + return; out_baddir: svc_printk(rqstp, "bad direction 0x%08x, dropping request\n", @@ -1479,7 +1515,6 @@ out_baddir: rqstp->rq_server->sv_stats->rpcbadfmt++; out_drop: svc_drop(rqstp); - return 0; } EXPORT_SYMBOL_GPL(svc_process); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index ba629297da4e..84e5d7d31481 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) if (xpt_flags & BIT(XPT_BUSY)) return false; - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) return true; if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { if (xprt->xpt_ops->xpo_has_wspace(xprt) && @@ -541,8 +541,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp) kfree(rqstp->rq_deferred); rqstp->rq_deferred = NULL; - pagevec_release(&rqstp->rq_pvec); - svc_free_res_pages(rqstp); + svc_rqst_release_pages(rqstp); rqstp->rq_res.page_len = 0; rqstp->rq_res.page_base = 0; @@ -667,8 +666,6 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) struct xdr_buf *arg = &rqstp->rq_arg; unsigned long pages, filled, ret; - pagevec_init(&rqstp->rq_pvec); - pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT; if (pages > RPCSVC_MAXPAGES) { pr_warn_once("svc: warning: pages=%lu > RPCSVC_MAXPAGES=%lu\n", @@ -704,6 +701,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) arg->page_len = (pages-2)*PAGE_SIZE; arg->len = (pages-1)*PAGE_SIZE; arg->tail[0].iov_len = 0; + + rqstp->rq_xid = xdr_zero; return 0; } @@ -829,6 +828,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) module_put(xprt->xpt_class->xcl_owner); } svc_xprt_received(xprt); + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { + xprt->xpt_ops->xpo_handshake(xprt); + svc_xprt_received(xprt); } else if (svc_xprt_reserve_slot(rqstp, xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", @@ -909,18 +911,20 @@ void svc_drop(struct svc_rqst *rqstp) } EXPORT_SYMBOL_GPL(svc_drop); -/* - * Return reply to client. +/** + * svc_send - Return reply to client + * @rqstp: RPC transaction context + * */ -int svc_send(struct svc_rqst *rqstp) +void svc_send(struct svc_rqst *rqstp) { struct svc_xprt *xprt; - int len = -EFAULT; struct xdr_buf *xb; + int status; xprt = rqstp->rq_xprt; if (!xprt) - goto out; + return; /* calculate over-all length */ xb = &rqstp->rq_res; @@ -930,15 +934,10 @@ int svc_send(struct svc_rqst *rqstp) trace_svc_xdr_sendto(rqstp->rq_xid, xb); trace_svc_stats_latency(rqstp); - len = xprt->xpt_ops->xpo_sendto(rqstp); + status = xprt->xpt_ops->xpo_sendto(rqstp); - trace_svc_send(rqstp, len); + trace_svc_send(rqstp, status); svc_xprt_release(rqstp); - - if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) - len = 0; -out: - return len; } /* diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 4246363cb095..174783f804fa 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -17,8 +17,9 @@ #include <net/ipv6.h> #include <linux/kernel.h> #include <linux/user_namespace.h> -#define RPCDBG_FACILITY RPCDBG_AUTH +#include <trace/events/sunrpc.h> +#define RPCDBG_FACILITY RPCDBG_AUTH #include "netns.h" @@ -225,9 +226,9 @@ static int ip_map_parse(struct cache_detail *cd, return -EINVAL; } - expiry = get_expiry(&mesg); - if (expiry ==0) - return -EINVAL; + err = get_expiry(&mesg, &expiry); + if (err) + return err; /* domainname, or empty for NEGATIVE */ len = qword_get(&mesg, buf, mlen); @@ -506,9 +507,9 @@ static int unix_gid_parse(struct cache_detail *cd, uid = make_kuid(current_user_ns(), id); ug.uid = uid; - expiry = get_expiry(&mesg); - if (expiry == 0) - return -EINVAL; + err = get_expiry(&mesg, &expiry); + if (err) + return err; rv = get_int(&mesg, &gids); if (rv || gids < 0 || gids > 8192) @@ -832,6 +833,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct svc_cred *cred = &rqstp->rq_cred; + struct svc_xprt *xprt = rqstp->rq_xprt; u32 flavor, len; void *body; __be32 *p; @@ -865,14 +867,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp) if (cred->cr_group_info == NULL) return SVC_CLOSE; - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { + if (xprt->xpt_ops->xpo_handshake) { p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8); if (!p) return SVC_CLOSE; + trace_svc_tls_start(xprt); *p++ = rpc_auth_null; *p++ = cpu_to_be32(8); memcpy(p, "STARTTLS", 8); + + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); } else { + trace_svc_tls_unavailable(xprt); if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, RPC_AUTH_NULL, NULL, 0) < 0) return SVC_CLOSE; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 03a4f5615086..a51c9b989d58 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -43,9 +43,12 @@ #include <net/udp.h> #include <net/tcp.h> #include <net/tcp_states.h> +#include <net/tls.h> +#include <net/handshake.h> #include <linux/uaccess.h> #include <linux/highmem.h> #include <asm/ioctls.h> +#include <linux/key.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/clnt.h> @@ -63,6 +66,12 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT +/* To-do: to avoid tying up an nfsd thread while waiting for a + * handshake request, the request could instead be deferred. + */ +enum { + SVC_HANDSHAKE_TO = 5U * HZ +}; static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, int flags); @@ -216,6 +225,49 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) return len; } +static int +svc_tcp_sock_process_cmsg(struct svc_sock *svsk, struct msghdr *msg, + struct cmsghdr *cmsg, int ret) +{ + if (cmsg->cmsg_level == SOL_TLS && + cmsg->cmsg_type == TLS_GET_RECORD_TYPE) { + u8 content_type = *((u8 *)CMSG_DATA(cmsg)); + + switch (content_type) { + case TLS_RECORD_TYPE_DATA: + /* TLS sets EOR at the end of each application data + * record, even though there might be more frames + * waiting to be decrypted. + */ + msg->msg_flags &= ~MSG_EOR; + break; + case TLS_RECORD_TYPE_ALERT: + ret = -ENOTCONN; + break; + default: + ret = -EAGAIN; + } + } + return ret; +} + +static int +svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg) +{ + union { + struct cmsghdr cmsg; + u8 buf[CMSG_SPACE(sizeof(u8))]; + } u; + int ret; + + msg->msg_control = &u; + msg->msg_controllen = sizeof(u); + ret = sock_recvmsg(svsk->sk_sock, msg, MSG_DONTWAIT); + if (unlikely(msg->msg_controllen != sizeof(u))) + ret = svc_tcp_sock_process_cmsg(svsk, msg, &u.cmsg, ret); + return ret; +} + #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek) { @@ -263,7 +315,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, iov_iter_advance(&msg.msg_iter, seek); buflen -= seek; } - len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); + len = svc_tcp_sock_recv_cmsg(svsk, &msg); if (len > 0) svc_flush_bvec(bvec, len, seek); @@ -315,6 +367,8 @@ static void svc_data_ready(struct sock *sk) rmb(); svsk->sk_odata(sk); trace_svcsock_data_ready(&svsk->sk_xprt, 0); + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags)) + return; if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) svc_xprt_enqueue(&svsk->sk_xprt); } @@ -352,6 +406,88 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) sock_no_linger(svsk->sk_sock->sk); } +/** + * svc_tcp_handshake_done - Handshake completion handler + * @data: address of xprt to wake + * @status: status of handshake + * @peerid: serial number of key containing the remote peer's identity + * + * If a security policy is specified as an export option, we don't + * have a specific export here to check. So we set a "TLS session + * is present" flag on the xprt and let an upper layer enforce local + * security policy. + */ +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid) +{ + struct svc_xprt *xprt = data; + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + + if (!status) { + if (peerid != TLS_NO_PEERID) + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags); + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags); + } + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); + complete_all(&svsk->sk_handshake_done); +} + +/** + * svc_tcp_handshake - Perform a transport-layer security handshake + * @xprt: connected transport endpoint + * + */ +static void svc_tcp_handshake(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + struct sock *sk = svsk->sk_sock->sk; + struct tls_handshake_args args = { + .ta_sock = svsk->sk_sock, + .ta_done = svc_tcp_handshake_done, + .ta_data = xprt, + }; + int ret; + + trace_svc_tls_upcall(xprt); + + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags); + init_completion(&svsk->sk_handshake_done); + + ret = tls_server_hello_x509(&args, GFP_KERNEL); + if (ret) { + trace_svc_tls_not_started(xprt); + goto out_failed; + } + + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done, + SVC_HANDSHAKE_TO); + if (ret <= 0) { + if (tls_handshake_cancel(sk)) { + trace_svc_tls_timed_out(xprt); + goto out_close; + } + } + + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) { + trace_svc_tls_unavailable(xprt); + goto out_close; + } + + /* Mark the transport ready in case the remote sent RPC + * traffic before the kernel received the handshake + * completion downcall. + */ + set_bit(XPT_DATA, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); + return; + +out_close: + set_bit(XPT_CLOSE, &xprt->xpt_flags); +out_failed: + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); + set_bit(XPT_DATA, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); +} + /* * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo */ @@ -877,7 +1013,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk, iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want); - len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); + len = svc_tcp_sock_recv_cmsg(svsk, &msg); if (len < 0) return len; svsk->sk_tcplen += len; @@ -1213,6 +1349,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, + .xpo_handshake = svc_tcp_handshake, }; static struct svc_xprt_class svc_tcp_class = { @@ -1293,26 +1430,37 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, struct socket *sock, int flags) { + struct file *filp = NULL; struct svc_sock *svsk; struct sock *inet; int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); - int err = 0; svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); if (!svsk) return ERR_PTR(-ENOMEM); + if (!sock->file) { + filp = sock_alloc_file(sock, O_NONBLOCK, NULL); + if (IS_ERR(filp)) { + kfree(svsk); + return ERR_CAST(filp); + } + } + inet = sock->sk; - /* Register socket with portmapper */ - if (pmap_register) + if (pmap_register) { + int err; + err = svc_register(serv, sock_net(sock->sk), inet->sk_family, inet->sk_protocol, ntohs(inet_sk(inet)->inet_sport)); - - if (err < 0) { - kfree(svsk); - return ERR_PTR(err); + if (err < 0) { + if (filp) + fput(filp); + kfree(svsk); + return ERR_PTR(err); + } } svsk->sk_sock = sock; @@ -1525,10 +1673,12 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) static void svc_sock_free(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + struct socket *sock = svsk->sk_sock; - if (svsk->sk_sock->file) - sockfd_put(svsk->sk_sock); + tls_handshake_cancel(sock->sk); + if (sock->file) + sockfd_put(sock); else - sock_release(svsk->sk_sock); + sock_release(sock); kfree(svsk); } diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 5bc20e9d09cd..f0d5eeed4c88 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -212,24 +212,6 @@ static struct ctl_table svcrdma_parm_table[] = { { }, }; -static struct ctl_table svcrdma_table[] = { - { - .procname = "svc_rdma", - .mode = 0555, - .child = svcrdma_parm_table - }, - { }, -}; - -static struct ctl_table svcrdma_root_table[] = { - { - .procname = "sunrpc", - .mode = 0555, - .child = svcrdma_table - }, - { }, -}; - static void svc_rdma_proc_cleanup(void) { if (!svcrdma_table_header) @@ -263,7 +245,8 @@ static int svc_rdma_proc_init(void) if (rc) goto out_err; - svcrdma_table_header = register_sysctl_table(svcrdma_root_table); + svcrdma_table_header = register_sysctl("sunrpc/svc_rdma", + svcrdma_parm_table); return 0; out_err: |