diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-07-01 14:38:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-07-01 14:38:25 -0700 |
commit | dfab92f27c600fea3cadc6e2cb39f092024e1fef (patch) | |
tree | 52698267f05f49023f0e34101fc438971ac14765 | |
parent | f8566aa4f1766bb0267b7a0ed89c1d2c4a82ee1a (diff) | |
parent | 5b4a82a0724af1dfd1320826e0266117b6a57fbd (diff) |
Merge tag 'nfs-for-6.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Stable fixes and other bugfixes:
- nfs: don't report STATX_BTIME in ->getattr
- Revert 'NFSv4: Retry LOCK on OLD_STATEID during delegation return'
since it breaks NFSv4 state recovery.
- NFSv4.1: freeze the session table upon receiving NFS4ERR_BADSESSION
- Fix the NFSv4.2 xattr cache shrinker_id
- Force a ctime update after a NFSv4.2 SETXATTR call
Features and cleanups:
- NFS and RPC over TLS client code from Chuck Lever
- Support for use of abstract unix socket addresses with the rpcbind
daemon
- Sysfs API to allow shutdown of the kernel RPC client and prevent
umount() hangs if the server is known to be permanently down
- XDR cleanups from Anna"
* tag 'nfs-for-6.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (33 commits)
Revert "NFSv4: Retry LOCK on OLD_STATEID during delegation return"
NFS: Don't cleanup sysfs superblock entry if uninitialized
nfs: don't report STATX_BTIME in ->getattr
NFSv4.1: freeze the session table upon receiving NFS4ERR_BADSESSION
NFSv4.2: fix wrong shrinker_id
NFSv4: Clean up some shutdown loops
NFS: Cancel all existing RPC tasks when shutdown
NFS: add sysfs shutdown knob
NFS: add a sysfs link to the acl rpc_client
NFS: add a sysfs link to the lockd rpc_client
NFS: Add sysfs links to sunrpc clients for nfs_clients
NFS: add superblock sysfs entries
NFS: Make all of /sys/fs/nfs network-namespace unique
NFS: Open-code the nfs_kset kset_create_and_add()
NFS: rename nfs_client_kobj to nfs_net_kobj
NFS: rename nfs_client_kset to nfs_kset
NFS: Add an "xprtsec=" NFS mount option
NFS: Have struct nfs_client carry a TLS policy field
SUNRPC: Add a TCP-with-TLS RPC transport class
SUNRPC: Capture CMSG metadata on client-side receive
...
31 files changed, 1561 insertions, 430 deletions
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index e3972aa3045a..5d85715be763 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -93,6 +93,12 @@ void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host, struct block->b_status = nlm_lck_blocked; } +struct rpc_clnt *nlmclnt_rpc_clnt(struct nlm_host *host) +{ + return host->h_rpcclnt; +} +EXPORT_SYMBOL_GPL(nlmclnt_rpc_clnt); + /* * Queue up a lock for blocking so that the GRANTED request can see it */ diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f50e025ae406..e4c5f193ed5e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -184,6 +184,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_net = get_net(cl_init->net); clp->cl_principal = "*"; + clp->cl_xprtsec = cl_init->xprtsec; return clp; error_cleanup: @@ -326,6 +327,10 @@ again: sap)) continue; + /* Match the xprt security policy */ + if (clp->cl_xprtsec.policy != data->xprtsec.policy) + continue; + refcount_inc(&clp->cl_count); return clp; } @@ -458,6 +463,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, switch (proto) { case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_TCP_TLS: case XPRT_TRANSPORT_RDMA: if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_TCP_RETRANS; @@ -510,6 +516,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, .version = clp->rpc_ops->version, .authflavor = flavor, .cred = cl_init->cred, + .xprtsec = cl_init->xprtsec, }; if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags)) @@ -592,6 +599,7 @@ static int nfs_start_lockd(struct nfs_server *server) server->nlm_host = host; server->destroy = nfs_destroy_server; + nfs_sysfs_link_rpc_client(server, nlmclnt_rpc_clnt(host), NULL); return 0; } @@ -621,6 +629,7 @@ int nfs_init_server_rpcclient(struct nfs_server *server, if (server->flags & NFS_MOUNT_SOFT) server->client->cl_softrtry = 1; + nfs_sysfs_link_rpc_client(server, server->client, NULL); return 0; } EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient); @@ -675,6 +684,7 @@ static int nfs_init_server(struct nfs_server *server, .cred = server->cred, .nconnect = ctx->nfs_server.nconnect, .init_flags = (1UL << NFS_CS_REUSEPORT), + .xprtsec = ctx->xprtsec, }; struct nfs_client *clp; int error; @@ -690,6 +700,8 @@ static int nfs_init_server(struct nfs_server *server, return PTR_ERR(clp); server->nfs_client = clp; + nfs_sysfs_add_server(server); + nfs_sysfs_link_rpc_client(server, clp->cl_rpcclient, "_state"); /* Initialise the client representation from the mount data */ server->flags = ctx->flags; @@ -944,6 +956,8 @@ void nfs_server_remove_lists(struct nfs_server *server) } EXPORT_SYMBOL_GPL(nfs_server_remove_lists); +static DEFINE_IDA(s_sysfs_ids); + /* * Allocate and initialise a server record */ @@ -955,6 +969,12 @@ struct nfs_server *nfs_alloc_server(void) if (!server) return NULL; + server->s_sysfs_id = ida_alloc(&s_sysfs_ids, GFP_KERNEL); + if (server->s_sysfs_id < 0) { + kfree(server); + return NULL; + } + server->client = server->client_acl = ERR_PTR(-EINVAL); /* Zero out the NFS state stuff */ @@ -1001,6 +1021,12 @@ void nfs_free_server(struct nfs_server *server) nfs_put_client(server->nfs_client); + if (server->kobj.state_initialized) { + nfs_sysfs_remove_server(server); + kobject_put(&server->kobj); + } + ida_free(&s_sysfs_ids, server->s_sysfs_id); + ida_destroy(&server->lockowner_id); ida_destroy(&server->openowner_id); nfs_free_iostats(server->io_stats); @@ -1102,6 +1128,11 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, server->fsid = fattr->fsid; + nfs_sysfs_add_server(server); + + nfs_sysfs_link_rpc_client(server, + server->nfs_client->cl_rpcclient, "_state"); + error = nfs_init_server_rpcclient(server, source->client->cl_timeout, flavor); @@ -1385,6 +1416,7 @@ error_0: void nfs_fs_proc_exit(void) { remove_proc_subtree("fs/nfsfs", NULL); + ida_destroy(&s_sysfs_ids); } #endif /* CONFIG_PROC_FS */ diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 9bcd53d5c7d4..853e8d609bb3 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -18,6 +18,9 @@ #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> + +#include <net/handshake.h> + #include "nfs.h" #include "internal.h" @@ -88,6 +91,7 @@ enum nfs_param { Opt_vers, Opt_wsize, Opt_write, + Opt_xprtsec, }; enum { @@ -194,6 +198,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_string("vers", Opt_vers), fsparam_enum ("write", Opt_write, nfs_param_enums_write), fsparam_u32 ("wsize", Opt_wsize), + fsparam_string("xprtsec", Opt_xprtsec), {} }; @@ -267,6 +272,20 @@ static const struct constant_table nfs_secflavor_tokens[] = { {} }; +enum { + Opt_xprtsec_none, + Opt_xprtsec_tls, + Opt_xprtsec_mtls, + nr__Opt_xprtsec +}; + +static const struct constant_table nfs_xprtsec_policies[] = { + { "none", Opt_xprtsec_none }, + { "tls", Opt_xprtsec_tls }, + { "mtls", Opt_xprtsec_mtls }, + {} +}; + /* * Sanity-check a server address provided by the mount command. * @@ -320,9 +339,21 @@ static int nfs_validate_transport_protocol(struct fs_context *fc, default: ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; } + + if (ctx->xprtsec.policy != RPC_XPRTSEC_NONE) + switch (ctx->nfs_server.protocol) { + case XPRT_TRANSPORT_TCP: + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP_TLS; + break; + default: + goto out_invalid_xprtsec_policy; + } + return 0; out_invalid_transport_udp: return nfs_invalf(fc, "NFS: Unsupported transport protocol udp"); +out_invalid_xprtsec_policy: + return nfs_invalf(fc, "NFS: Transport does not support xprtsec"); } /* @@ -430,6 +461,29 @@ static int nfs_parse_security_flavors(struct fs_context *fc, return 0; } +static int nfs_parse_xprtsec_policy(struct fs_context *fc, + struct fs_parameter *param) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + + trace_nfs_mount_assign(param->key, param->string); + + switch (lookup_constant(nfs_xprtsec_policies, param->string, -1)) { + case Opt_xprtsec_none: + ctx->xprtsec.policy = RPC_XPRTSEC_NONE; + break; + case Opt_xprtsec_tls: + ctx->xprtsec.policy = RPC_XPRTSEC_TLS_ANON; + break; + case Opt_xprtsec_mtls: + ctx->xprtsec.policy = RPC_XPRTSEC_TLS_X509; + break; + default: + return nfs_invalf(fc, "NFS: Unrecognized transport security policy"); + } + return 0; +} + static int nfs_parse_version_string(struct fs_context *fc, const char *string) { @@ -696,6 +750,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, if (ret < 0) return ret; break; + case Opt_xprtsec: + ret = nfs_parse_xprtsec_policy(fc, param); + if (ret < 0) + return ret; + break; case Opt_proto: if (!param->string) @@ -791,16 +850,19 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, ctx->mount_server.addrlen = len; break; case Opt_nconnect: + trace_nfs_mount_assign(param->key, param->string); if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_CONNECTIONS) goto out_of_bounds; ctx->nfs_server.nconnect = result.uint_32; break; case Opt_max_connect: + trace_nfs_mount_assign(param->key, param->string); if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_TRANSPORTS) goto out_of_bounds; ctx->nfs_server.max_connect = result.uint_32; break; case Opt_lookupcache: + trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_lookupcache_all: ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); @@ -817,6 +879,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, } break; case Opt_local_lock: + trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_local_lock_all: ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | @@ -837,6 +900,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, } break; case Opt_write: + trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_write_lazy: ctx->flags &= @@ -1569,6 +1633,9 @@ static int nfs_init_fs_context(struct fs_context *fc) ctx->selected_flavor = RPC_AUTH_MAXFLAVOR; ctx->minorversion = 0; ctx->need_mount = true; + ctx->xprtsec.policy = RPC_XPRTSEC_NONE; + ctx->xprtsec.cert_serial = TLS_NO_CERT; + ctx->xprtsec.privkey_serial = TLS_NO_PRIVKEY; fc->s_iflags |= SB_I_STABLE_WRITES; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a910b9a638c5..8172dd4135a1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -845,7 +845,7 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID | STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME | - STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME | + STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_CHANGE_COOKIE; if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) { diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b5f21d35d30e..913c09806c7f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -81,6 +81,7 @@ struct nfs_client_initdata { struct net *net; const struct rpc_timeout *timeparms; const struct cred *cred; + struct xprtsec_parms xprtsec; }; /* @@ -101,6 +102,7 @@ struct nfs_fs_context { unsigned int bsize; struct nfs_auth_info auth_info; rpc_authflavor_t selected_flavor; + struct xprtsec_parms xprtsec; char *client_address; unsigned int version; unsigned int minorversion; diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 669cda757a5c..eff3802c5e03 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -4,6 +4,8 @@ #include <linux/sunrpc/addr.h> #include "internal.h" #include "nfs3_fs.h" +#include "netns.h" +#include "sysfs.h" #ifdef CONFIG_NFS_V3_ACL static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; @@ -31,6 +33,8 @@ static void nfs_init_server_aclclient(struct nfs_server *server) if (IS_ERR(server->client_acl)) goto out_noacl; + nfs_sysfs_link_rpc_client(server, server->client_acl, NULL); + /* No errors! Assume that Sun nfsacls are supported */ server->caps |= NFS_CAP_ACLS; return; @@ -93,6 +97,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, .net = mds_clp->cl_net, .timeparms = &ds_timeout, .cred = mds_srv->cred, + .xprtsec = mds_clp->cl_xprtsec, }; struct nfs_client *clp; char buf[INET6_ADDRSTRLEN + 1]; @@ -102,8 +107,12 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, return ERR_PTR(-EINVAL); cl_init.hostname = buf; - if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) - cl_init.nconnect = mds_clp->cl_nconnect; + switch (ds_proto) { + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_TCP_TLS: + if (mds_clp->cl_nconnect > 1) + cl_init.nconnect = mds_clp->cl_nconnect; + } if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 93e306bf4430..63802d195556 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1190,15 +1190,19 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name, const void *buf, size_t buflen, int flags) { struct nfs_server *server = NFS_SERVER(inode); + __u32 bitmask[NFS_BITMASK_SZ]; struct page *pages[NFS4XATTR_MAXPAGES]; struct nfs42_setxattrargs arg = { .fh = NFS_FH(inode), + .bitmask = bitmask, .xattr_pages = pages, .xattr_len = buflen, .xattr_name = name, .xattr_flags = flags, }; - struct nfs42_setxattrres res; + struct nfs42_setxattrres res = { + .server = server, + }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETXATTR], .rpc_argp = &arg, @@ -1210,13 +1214,22 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name, if (buflen > server->sxasize) return -ERANGE; + res.fattr = nfs_alloc_fattr(); + if (!res.fattr) + return -ENOMEM; + if (buflen > 0) { np = nfs4_buf_to_pages_noslab(buf, buflen, arg.xattr_pages); - if (np < 0) - return np; + if (np < 0) { + ret = np; + goto out; + } } else np = 0; + nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask, + inode, NFS_INO_INVALID_CHANGE); + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); trace_nfs4_setxattr(inode, name, ret); @@ -1224,9 +1237,13 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name, for (; np > 0; np--) put_page(pages[np - 1]); - if (!ret) + if (!ret) { nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0); + ret = nfs_post_op_update_inode(inode, res.fattr); + } +out: + kfree(res.fattr); return ret; } diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index 76ae11834206..911f634ba3da 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -991,6 +991,29 @@ static void nfs4_xattr_cache_init_once(void *p) INIT_LIST_HEAD(&cache->dispose); } +static int nfs4_xattr_shrinker_init(struct shrinker *shrinker, + struct list_lru *lru, const char *name) +{ + int ret = 0; + + ret = register_shrinker(shrinker, name); + if (ret) + return ret; + + ret = list_lru_init_memcg(lru, shrinker); + if (ret) + unregister_shrinker(shrinker); + + return ret; +} + +static void nfs4_xattr_shrinker_destroy(struct shrinker *shrinker, + struct list_lru *lru) +{ + unregister_shrinker(shrinker); + list_lru_destroy(lru); +} + int __init nfs4_xattr_cache_init(void) { int ret = 0; @@ -1002,44 +1025,30 @@ int __init nfs4_xattr_cache_init(void) if (nfs4_xattr_cache_cachep == NULL) return -ENOMEM; - ret = list_lru_init_memcg(&nfs4_xattr_large_entry_lru, - &nfs4_xattr_large_entry_shrinker); - if (ret) - goto out4; - - ret = list_lru_init_memcg(&nfs4_xattr_entry_lru, - &nfs4_xattr_entry_shrinker); - if (ret) - goto out3; - - ret = list_lru_init_memcg(&nfs4_xattr_cache_lru, - &nfs4_xattr_cache_shrinker); - if (ret) - goto out2; - - ret = register_shrinker(&nfs4_xattr_cache_shrinker, "nfs-xattr_cache"); + ret = nfs4_xattr_shrinker_init(&nfs4_xattr_cache_shrinker, + &nfs4_xattr_cache_lru, + "nfs-xattr_cache"); if (ret) goto out1; - ret = register_shrinker(&nfs4_xattr_entry_shrinker, "nfs-xattr_entry"); + ret = nfs4_xattr_shrinker_init(&nfs4_xattr_entry_shrinker, + &nfs4_xattr_entry_lru, + "nfs-xattr_entry"); if (ret) - goto out; + goto out2; - ret = register_shrinker(&nfs4_xattr_large_entry_shrinker, - "nfs-xattr_large_entry"); + ret = nfs4_xattr_shrinker_init(&nfs4_xattr_large_entry_shrinker, + &nfs4_xattr_large_entry_lru, + "nfs-xattr_large_entry"); if (!ret) return 0; - unregister_shrinker(&nfs4_xattr_entry_shrinker); -out: - unregister_shrinker(&nfs4_xattr_cache_shrinker); -out1: - list_lru_destroy(&nfs4_xattr_cache_lru); + nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker, + &nfs4_xattr_entry_lru); out2: - list_lru_destroy(&nfs4_xattr_entry_lru); -out3: - list_lru_destroy(&nfs4_xattr_large_entry_lru); -out4: + nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker, + &nfs4_xattr_cache_lru); +out1: kmem_cache_destroy(nfs4_xattr_cache_cachep); return ret; @@ -1047,11 +1056,11 @@ out4: void nfs4_xattr_cache_exit(void) { - unregister_shrinker(&nfs4_xattr_large_entry_shrinker); - unregister_shrinker(&nfs4_xattr_entry_shrinker); - unregister_shrinker(&nfs4_xattr_cache_shrinker); - list_lru_destroy(&nfs4_xattr_large_entry_lru); - list_lru_destroy(&nfs4_xattr_entry_lru); - list_lru_destroy(&nfs4_xattr_cache_lru); + nfs4_xattr_shrinker_destroy(&nfs4_xattr_large_entry_shrinker, + &nfs4_xattr_large_entry_lru); + nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker, + &nfs4_xattr_entry_lru); + nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker, + &nfs4_xattr_cache_lru); kmem_cache_destroy(nfs4_xattr_cache_cachep); } diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index a6df815a140c..95234208dc9e 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -7,6 +7,9 @@ #include "nfs42.h" +/* Not limited by NFS itself, limited by the generic xattr code */ +#define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX) + #define encode_fallocate_maxsz (encode_stateid_maxsz + \ 2 /* offset */ + \ 2 /* length */) @@ -89,6 +92,18 @@ 2 /* dst offset */ + \ 2 /* count */) #define decode_clone_maxsz (op_decode_hdr_maxsz) +#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \ + nfs4_xattr_name_maxsz) +#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz) +#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \ + 1 + nfs4_xattr_name_maxsz + 1) +#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) +#define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1) +#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 + 1) +#define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \ + nfs4_xattr_name_maxsz) +#define decode_removexattr_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ @@ -186,55 +201,40 @@ decode_putfh_maxsz + \ decode_clone_maxsz + \ decode_getattr_maxsz) - -/* Not limited by NFS itself, limited by the generic xattr code */ -#define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX) - -#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \ - nfs4_xattr_name_maxsz) -#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz) -#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \ - 1 + nfs4_xattr_name_maxsz + 1) -#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) -#define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1) -#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 + 1) -#define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \ - nfs4_xattr_name_maxsz) -#define decode_removexattr_maxsz (op_decode_hdr_maxsz + \ - decode_change_info_maxsz) - -#define NFS4_enc_getxattr_sz (compound_encode_hdr_maxsz + \ - encode_sequence_maxsz + \ - encode_putfh_maxsz + \ - encode_getxattr_maxsz) -#define NFS4_dec_getxattr_sz (compound_decode_hdr_maxsz + \ - decode_sequence_maxsz + \ - decode_putfh_maxsz + \ - decode_getxattr_maxsz) -#define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \ - encode_sequence_maxsz + \ - encode_putfh_maxsz + \ - encode_setxattr_maxsz) -#define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \ - decode_sequence_maxsz + \ - decode_putfh_maxsz + \ - decode_setxattr_maxsz) -#define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \ - encode_sequence_maxsz + \ - encode_putfh_maxsz + \ - encode_listxattrs_maxsz) -#define NFS4_dec_listxattrs_sz (compound_decode_hdr_maxsz + \ - decode_sequence_maxsz + \ - decode_putfh_maxsz + \ - decode_listxattrs_maxsz) -#define NFS4_enc_removexattr_sz (compound_encode_hdr_maxsz + \ - encode_sequence_maxsz + \ - encode_putfh_maxsz + \ - encode_removexattr_maxsz) -#define NFS4_dec_removexattr_sz (compound_decode_hdr_maxsz + \ - decode_sequence_maxsz + \ - decode_putfh_maxsz + \ - decode_removexattr_maxsz) +#define NFS4_enc_getxattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_getxattr_maxsz) +#define NFS4_dec_getxattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_getxattr_maxsz) +#define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_setxattr_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_setxattr_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_listxattrs_maxsz) +#define NFS4_dec_listxattrs_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_listxattrs_maxsz) +#define NFS4_enc_removexattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_removexattr_maxsz) +#define NFS4_dec_removexattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_removexattr_maxsz) /* * These values specify the maximum amount of data that is not @@ -317,6 +317,18 @@ static void encode_copy(struct xdr_stream *xdr, encode_nl4_server(xdr, args->cp_src); } +static void encode_copy_commit(struct xdr_stream *xdr, + const struct nfs42_copy_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr); + p = reserve_space(xdr, 12); + p = xdr_encode_hyper(p, args->dst_pos); + *p = cpu_to_be32(args->count); +} + static void encode_offload_cancel(struct xdr_stream *xdr, const struct nfs42_offload_status_args *args, struct compound_hdr *hdr) @@ -452,20 +464,6 @@ static void encode_setxattr(struct xdr_stream *xdr, xdr_write_pages(xdr, arg->xattr_pages, 0, arg->xattr_len); } -static int decode_setxattr(struct xdr_stream *xdr, - struct nfs4_change_info *cinfo) -{ - int status; - - status = decode_op_hdr(xdr, OP_SETXATTR); - if (status) - goto out; - status = decode_change_info(xdr, cinfo); -out: - return status; -} - - static void encode_getxattr(struct xdr_stream *xdr, const char *name, struct compound_hdr *hdr) { @@ -473,43 +471,6 @@ static void encode_getxattr(struct xdr_stream *xdr, const char *name, encode_string(xdr, strlen(name), name); } -static int decode_getxattr(struct xdr_stream *xdr, - struct nfs42_getxattrres *res, - struct rpc_rqst *req) -{ - int status; - __be32 *p; - u32 len, rdlen; - - status = decode_op_hdr(xdr, OP_GETXATTR); - if (status) - return status; - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - return -EIO; - - len = be32_to_cpup(p); - - /* - * Only check against the page length here. The actual - * requested length may be smaller, but that is only - * checked against after possibly caching a valid reply. - */ - if (len > req->rq_rcv_buf.page_len) - return -ERANGE; - - res->xattr_len = len; - - if (len > 0) { - rdlen = xdr_read_pages(xdr, len); - if (rdlen < len) - return -EIO; - } - - return 0; -} - static void encode_removexattr(struct xdr_stream *xdr, const char *name, struct compound_hdr *hdr) { @@ -517,21 +478,6 @@ static void encode_removexattr(struct xdr_stream *xdr, const char *name, encode_string(xdr, strlen(name), name); } - -static int decode_removexattr(struct xdr_stream *xdr, - struct nfs4_change_info *cinfo) -{ - int status; - - status = decode_op_hdr(xdr, OP_REMOVEXATTR); - if (status) - goto out; - - status = decode_change_info(xdr, cinfo); -out: - return status; -} - static void encode_listxattrs(struct xdr_stream *xdr, const struct nfs42_listxattrsargs *arg, struct compound_hdr *hdr) @@ -553,104 +499,6 @@ static void encode_listxattrs(struct xdr_stream *xdr, *p = cpu_to_be32(arg->count + 8 + 4); } -static int decode_listxattrs(struct xdr_stream *xdr, - struct nfs42_listxattrsres *res) -{ - int status; - __be32 *p; - u32 count, len, ulen; - size_t left, copied; - char *buf; - - status = decode_op_hdr(xdr, OP_LISTXATTRS); - if (status) { - /* - * Special case: for LISTXATTRS, NFS4ERR_TOOSMALL - * should be translated to ERANGE. - */ - if (status == -ETOOSMALL) - status = -ERANGE; - /* - * Special case: for LISTXATTRS, NFS4ERR_NOXATTR - * should be translated to success with zero-length reply. - */ - if (status == -ENODATA) { - res->eof = true; - status = 0; - } - goto out; - } - - p = xdr_inline_decode(xdr, 8); - if (unlikely(!p)) - return -EIO; - - xdr_decode_hyper(p, &res->cookie); - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - return -EIO; - - left = res->xattr_len; - buf = res->xattr_buf; - - count = be32_to_cpup(p); - copied = 0; - - /* - * We have asked for enough room to encode the maximum number - * of possible attribute names, so everything should fit. - * - * But, don't rely on that assumption. Just decode entries - * until they don't fit anymore, just in case the server did - * something odd. - */ - while (count--) { - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - return -EIO; - - len = be32_to_cpup(p); - if (len > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) { - status = -ERANGE; - goto out; - } - - p = xdr_inline_decode(xdr, len); - if (unlikely(!p)) - return -EIO; - - ulen = len + XATTR_USER_PREFIX_LEN + 1; - if (buf) { - if (ulen > left) { - status = -ERANGE; - goto out; - } - - memcpy(buf, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); - memcpy(buf + XATTR_USER_PREFIX_LEN, p, len); - - buf[ulen - 1] = 0; - buf += ulen; - left -= ulen; - } - copied += ulen; - } - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - return -EIO; - - res->eof = be32_to_cpup(p); - res->copied = copied; - -out: - if (status == -ERANGE && res->xattr_len == XATTR_LIST_MAX) - status = -E2BIG; - - return status; -} - /* * Encode ALLOCATE request */ @@ -671,18 +519,6 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, encode_nops(&hdr); } -static void encode_copy_commit(struct xdr_stream *xdr, - const struct nfs42_copy_args *args, - struct compound_hdr *hdr) -{ - __be32 *p; - - encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr); - p = reserve_space(xdr, 12); - p = xdr_encode_hyper(p, args->dst_pos); - *p = cpu_to_be32(args->count); -} - /* * Encode COPY request */ @@ -871,6 +707,90 @@ static void nfs4_xdr_enc_layouterror(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode SETXATTR request + */ +static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_setxattrargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_setxattr(xdr, args, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); + encode_nops(&hdr); +} + +/* + * Encode GETXATTR request + */ +static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_getxattrargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + uint32_t replen; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + replen = hdr.replen + op_decode_hdr_maxsz + 1; + encode_getxattr(xdr, args->xattr_name, &hdr); + + rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->xattr_len, + replen); + + encode_nops(&hdr); +} + +/* + * Encode LISTXATTR request + */ +static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfs42_listxattrsargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + uint32_t replen; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + replen = hdr.replen + op_decode_hdr_maxsz + 2 + 1; + encode_listxattrs(xdr, args, &hdr); + + rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, replen); + + encode_nops(&hdr); +} + +/* + * Encode REMOVEXATTR request + */ +static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfs42_removexattrargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_removexattr(xdr, args->xattr_name, &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -1192,6 +1112,168 @@ static int decode_layouterror(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_LAYOUTERROR); } +static int decode_setxattr(struct xdr_stream *xdr, + struct nfs4_change_info *cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_SETXATTR); + if (status) + goto out; + status = decode_change_info(xdr, cinfo); +out: + return status; +} + +static int decode_getxattr(struct xdr_stream *xdr, + struct nfs42_getxattrres *res, + struct rpc_rqst *req) +{ + int status; + __be32 *p; + u32 len, rdlen; + + status = decode_op_hdr(xdr, OP_GETXATTR); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + len = be32_to_cpup(p); + + /* + * Only check against the page length here. The actual + * requested length may be smaller, but that is only + * checked against after possibly caching a valid reply. + */ + if (len > req->rq_rcv_buf.page_len) + return -ERANGE; + + res->xattr_len = len; + + if (len > 0) { + rdlen = xdr_read_pages(xdr, len); + if (rdlen < len) + return -EIO; + } + + return 0; +} + +static int decode_removexattr(struct xdr_stream *xdr, + struct nfs4_change_info *cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_REMOVEXATTR); + if (status) + goto out; + + status = decode_change_info(xdr, cinfo); +out: + return status; +} + +static int decode_listxattrs(struct xdr_stream *xdr, + struct nfs42_listxattrsres *res) +{ + int status; + __be32 *p; + u32 count, len, ulen; + size_t left, copied; + char *buf; + + status = decode_op_hdr(xdr, OP_LISTXATTRS); + if (status) { + /* + * Special case: for LISTXATTRS, NFS4ERR_TOOSMALL + * should be translated to ERANGE. + */ + if (status == -ETOOSMALL) + status = -ERANGE; + /* + * Special case: for LISTXATTRS, NFS4ERR_NOXATTR + * should be translated to success with zero-length reply. + */ + if (status == -ENODATA) { + res->eof = true; + status = 0; + } + goto out; + } + + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + return -EIO; + + xdr_decode_hyper(p, &res->cookie); + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + left = res->xattr_len; + buf = res->xattr_buf; + + count = be32_to_cpup(p); + copied = 0; + + /* + * We have asked for enough room to encode the maximum number + * of possible attribute names, so everything should fit. + * + * But, don't rely on that assumption. Just decode entries + * until they don't fit anymore, just in case the server did + * something odd. + */ + while (count--) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + len = be32_to_cpup(p); + if (len > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) { + status = -ERANGE; + goto out; + } + + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return -EIO; + + ulen = len + XATTR_USER_PREFIX_LEN + 1; + if (buf) { + if (ulen > left) { + status = -ERANGE; + goto out; + } + + memcpy(buf, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); + memcpy(buf + XATTR_USER_PREFIX_LEN, p, len); + + buf[ulen - 1] = 0; + buf += ulen; + left -= ulen; + } + copied += ulen; + } + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + res->eof = be32_to_cpup(p); + res->copied = copied; + +out: + if (status == -ERANGE && res->xattr_len == XATTR_LIST_MAX) + status = -E2BIG; + + return status; +} + /* * Decode ALLOCATE request */ @@ -1481,22 +1563,9 @@ out: return status; } -#ifdef CONFIG_NFS_V4_2 -static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, - const void *data) -{ - const struct nfs42_setxattrargs *args = data; - struct compound_hdr hdr = { - .minorversion = nfs4_xdr_minorversion(&args->seq_args), - }; - - encode_compound_hdr(xdr, req, &hdr); - encode_sequence(xdr, &args->seq_args, &hdr); - encode_putfh(xdr, args->fh, &hdr); - encode_setxattr(xdr, args, &hdr); - encode_nops(&hdr); -} - +/* + * Decode SETXATTR request + */ static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, void *data) { @@ -1513,33 +1582,17 @@ static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_setxattr(xdr, &res->cinfo); + if (status) + goto out; + status = decode_getfattr(xdr, res->fattr, res->server); out: return status; } -static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr, - const void *data) -{ - const struct nfs42_getxattrargs *args = data; - struct compound_hdr hdr = { - .minorversion = nfs4_xdr_minorversion(&args->seq_args), - }; - uint32_t replen; - - encode_compound_hdr(xdr, req, &hdr); - encode_sequence(xdr, &args->seq_args, &hdr); - encode_putfh(xdr, args->fh, &hdr); - replen = hdr.replen + op_decode_hdr_maxsz + 1; - encode_getxattr(xdr, args->xattr_name, &hdr); - - rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->xattr_len, - replen); - - encode_nops(&hdr); -} - +/* + * Decode GETXATTR request + */ static int nfs4_xdr_dec_getxattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr, void *data) { @@ -1561,26 +1614,9 @@ out: return status; } -static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req, - struct xdr_stream *xdr, const void *data) -{ - const struct nfs42_listxattrsargs *args = data; - struct compound_hdr hdr = { - .minorversion = nfs4_xdr_minorversion(&args->seq_args), - }; - uint32_t replen; - - encode_compound_hdr(xdr, req, &hdr); - encode_sequence(xdr, &args->seq_args, &hdr); - encode_putfh(xdr, args->fh, &hdr); - replen = hdr.replen + op_decode_hdr_maxsz + 2 + 1; - encode_listxattrs(xdr, args, &hdr); - - rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, replen); - - encode_nops(&hdr); -} - +/* + * Decode LISTXATTR request + */ static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp, struct xdr_stream *xdr, void *data) { @@ -1604,21 +1640,9 @@ out: return status; } -static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req, - struct xdr_stream *xdr, const void *data) -{ - const struct nfs42_removexattrargs *args = data; - struct compound_hdr hdr = { - .minorversion = nfs4_xdr_minorversion(&args->seq_args), - }; - - encode_compound_hdr(xdr, req, &hdr); - encode_sequence(xdr, &args->seq_args, &hdr); - encode_putfh(xdr, args->fh, &hdr); - encode_removexattr(xdr, args->xattr_name, &hdr); - encode_nops(&hdr); -} - +/* + * Decode REMOVEXATTR request + */ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req, struct xdr_stream *xdr, void *data) { @@ -1640,5 +1664,4 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req, out: return status; } -#endif #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index d3051b051a56..d9114a754db7 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -18,6 +18,7 @@ #include "nfs4idmap.h" #include "pnfs.h" #include "netns.h" +#include "sysfs.h" #define NFSDBG_FACILITY NFSDBG_CLIENT @@ -896,7 +897,8 @@ static int nfs4_set_client(struct nfs_server *server, int proto, const struct rpc_timeout *timeparms, u32 minorversion, unsigned int nconnect, unsigned int max_connect, - struct net *net) + struct net *net, + struct xprtsec_parms *xprtsec) { struct nfs_client_initdata cl_init = { .hostname = hostname, @@ -909,6 +911,7 @@ static int nfs4_set_client(struct nfs_server *server, .net = net, .timeparms = timeparms, .cred = server->cred, + .xprtsec = *xprtsec, }; struct nfs_client *clp; @@ -916,8 +919,11 @@ static int nfs4_set_client(struct nfs_server *server, __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags); else cl_init.max_connect = max_connect; - if (proto == XPRT_TRANSPORT_TCP) + switch (proto) { + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_TCP_TLS: cl_init.nconnect = nconnect; + } if (server->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); @@ -947,6 +953,9 @@ static int nfs4_set_client(struct nfs_server *server, set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state); server->nfs_client = clp; + nfs_sysfs_add_server(server); + nfs_sysfs_link_rpc_client(server, clp->cl_rpcclient, "_state"); + return 0; } @@ -978,6 +987,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, .net = mds_clp->cl_net, .timeparms = &ds_timeout, .cred = mds_srv->cred, + .xprtsec = mds_srv->nfs_client->cl_xprtsec, }; char buf[INET6_ADDRSTRLEN + 1]; @@ -985,9 +995,13 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, return ERR_PTR(-EINVAL); cl_init.hostname = buf; - if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) { - cl_init.nconnect = mds_clp->cl_nconnect; - cl_init.max_connect = NFS_MAX_TRANSPORTS; + switch (ds_proto) { + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_TCP_TLS: + if (mds_clp->cl_nconnect > 1) { + cl_init.nconnect = mds_clp->cl_nconnect; + cl_init.max_connect = NFS_MAX_TRANSPORTS; + } } if (mds_srv->flags & NFS_MOUNT_NORESVPORT) @@ -1157,7 +1171,8 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) ctx->minorversion, ctx->nfs_server.nconnect, ctx->nfs_server.max_connect, - fc->net_ns); + fc->net_ns, + &ctx->xprtsec); if (error < 0) return error; @@ -1219,8 +1234,8 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_client *parent_client; struct nfs_server *server, *parent_server; + int proto, error; bool auth_probe; - int error; server = nfs_alloc_server(); if (!server) @@ -1247,23 +1262,28 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) parent_client->cl_mvops->minor_version, parent_client->cl_nconnect, parent_client->cl_max_connect, - parent_client->cl_net); + parent_client->cl_net, + &parent_client->cl_xprtsec); if (!error) goto init_server; #endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */ + proto = XPRT_TRANSPORT_TCP; + if (parent_client->cl_xprtsec.policy != RPC_XPRTSEC_NONE) + proto = XPRT_TRANSPORT_TCP_TLS; rpc_set_port(&ctx->nfs_server.address, NFS_PORT); error = nfs4_set_client(server, ctx->nfs_server.hostname, &ctx->nfs_server._address, ctx->nfs_server.addrlen, parent_client->cl_ipaddr, - XPRT_TRANSPORT_TCP, + proto, parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, parent_client->cl_nconnect, parent_client->cl_max_connect, - parent_client->cl_net); + parent_client->cl_net, + &parent_client->cl_xprtsec); if (error < 0) goto error; @@ -1314,6 +1334,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, .dstaddr = (struct sockaddr *)sap, .addrlen = salen, .servername = hostname, + /* cel: bleh. We might need to pass TLS parameters here */ }; char buf[INET6_ADDRSTRLEN + 1]; struct sockaddr_storage address; @@ -1336,7 +1357,8 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, error = nfs4_set_client(server, hostname, sap, salen, buf, clp->cl_proto, clnt->cl_timeout, clp->cl_minorversion, - clp->cl_nconnect, clp->cl_max_connect, net); + clp->cl_nconnect, clp->cl_max_connect, + net, &clp->cl_xprtsec); clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); if (error != 0) { nfs_server_insert_lists(server); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d3665390c4cb..e1a886b58354 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -921,6 +921,7 @@ out: out_noaction: return ret; session_recover: + set_bit(NFS4_SLOT_TBL_DRAINING, &session->fc_slot_table.slot_tbl_state); nfs4_schedule_session_recovery(session, status); dprintk("%s ERROR: %d Reset session\n", __func__, status); nfs41_sequence_free_slot(res); @@ -7159,7 +7160,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) { struct nfs4_lockdata *data = calldata; struct nfs4_lock_state *lsp = data->lsp; - struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry)); if (!nfs4_sequence_done(task, &data->res.seq_res)) return; @@ -7167,7 +7167,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->rpc_status = task->tk_status; switch (task->tk_status) { case 0: - renew_lease(server, data->timestamp); + renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), + data->timestamp); if (data->arg.new_lock && !data->cancelled) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) @@ -7188,8 +7189,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) if (!nfs4_stateid_match(&data->arg.open_stateid, &lsp->ls_state->open_stateid)) goto out_restart; - else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN) - goto out_restart; } else if (!nfs4_stateid_match(&data->arg.lock_stateid, &lsp->ls_stateid)) goto out_restart; @@ -9371,7 +9370,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) return; trace_nfs4_sequence(clp, task->tk_status); - if (task->tk_status < 0) { + if (task->tk_status < 0 && !task->tk_client->cl_shutdown) { dprintk("%s ERROR %d\n", __func__, task->tk_status); if (refcount_read(&clp->cl_count) == 1) return; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index bbe49315d99e..e079987af4a3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1210,6 +1210,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) struct task_struct *task; char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; + if (clp->cl_rpcclient->cl_shutdown) + return; + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) { wake_up_var(&clp->cl_state); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 30e53e93049e..2284f749d892 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -59,6 +59,8 @@ #include <linux/uaccess.h> #include <linux/nfs_ssc.h> +#include <uapi/linux/tls.h> + #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" @@ -68,6 +70,8 @@ #include "nfs4session.h" #include "pnfs.h" #include "nfs.h" +#include "netns.h" +#include "sysfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -491,6 +495,16 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ); seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries); seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); + switch (clp->cl_xprtsec.policy) { + case RPC_XPRTSEC_TLS_ANON: + seq_puts(m, ",xprtsec=tls"); + break; + case RPC_XPRTSEC_TLS_X509: + seq_puts(m, ",xprtsec=mtls"); + break; + default: + break; + } if (version != 4) nfs_show_mountd_options(m, nfss, showdefaults); @@ -1077,6 +1091,7 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx) &sb->s_blocksize_bits); nfs_super_set_maxbytes(sb, server->maxfilesize); + nfs_sysfs_move_server_to_sb(sb); server->has_sec_mnt_opts = ctx->has_sec_mnt_opts; } @@ -1319,13 +1334,14 @@ error_splat_super: } /* - * Destroy an NFS2/3 superblock + * Destroy an NFS superblock */ void nfs_kill_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); dev_t dev = s->s_dev; + nfs_sysfs_move_sb_to_server(server); generic_shutdown_super(s); nfs_fscache_release_super_cookie(s); diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 0cbcd2dfa732..acda8f033d30 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -12,17 +12,18 @@ #include <linux/string.h> #include <linux/nfs_fs.h> #include <linux/rcupdate.h> +#include <linux/lockd/lockd.h> #include "nfs4_fs.h" #include "netns.h" #include "sysfs.h" -struct kobject *nfs_client_kobj; -static struct kset *nfs_client_kset; +static struct kset *nfs_kset; -static void nfs_netns_object_release(struct kobject *kobj) +static void nfs_kset_release(struct kobject *kobj) { - kfree(kobj); + struct kset *kset = container_of(kobj, struct kset, kobj); + kfree(kset); } static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type( @@ -31,46 +32,42 @@ static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type( return &net_ns_type_operations; } -static struct kobj_type nfs_netns_object_type = { - .release = nfs_netns_object_release, +static struct kobj_type nfs_kset_type = { + .release = nfs_kset_release, .sysfs_ops = &kobj_sysfs_ops, .child_ns_type = nfs_netns_object_child_ns_type, }; -static struct kobject *nfs_netns_object_alloc(const char *name, - struct kset *kset, struct kobject *parent) +int nfs_sysfs_init(void) { - struct kobject *kobj; + int ret; + + nfs_kset = kzalloc(sizeof(*nfs_kset), GFP_KERNEL); + if (!nfs_kset) + return -ENOMEM; - kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); - if (kobj) { - kobj->kset = kset; - if (kobject_init_and_add(kobj, &nfs_netns_object_type, - parent, "%s", name) == 0) - return kobj; - kobject_put(kobj); + ret = kobject_set_name(&nfs_kset->kobj, "nfs"); + if (ret) { + kfree(nfs_kset); + return ret; } - return NULL; -} -int nfs_sysfs_init(void) -{ - nfs_client_kset = kset_create_and_add("nfs", NULL, fs_kobj); - if (!nfs_client_kset) - return -ENOMEM; - nfs_client_kobj = nfs_netns_object_alloc("net", nfs_client_kset, NULL); - if (!nfs_client_kobj) { - kset_unregister(nfs_client_kset); - nfs_client_kset = NULL; - return -ENOMEM; + nfs_kset->kobj.parent = fs_kobj; + nfs_kset->kobj.ktype = &nfs_kset_type; + nfs_kset->kobj.kset = NULL; + + ret = kset_register(nfs_kset); + if (ret) { + kfree(nfs_kset); + return ret; } + return 0; } void nfs_sysfs_exit(void) { - kobject_put(nfs_client_kobj); - kset_unregister(nfs_client_kset); + kset_unregister(nfs_kset); } static ssize_t nfs_netns_identifier_show(struct kobject *kobj, @@ -127,7 +124,6 @@ static void nfs_netns_client_release(struct kobject *kobj) kobject); kfree(rcu_dereference_raw(c->identifier)); - kfree(c); } static const void *nfs_netns_client_namespace(const struct kobject *kobj) @@ -151,6 +147,25 @@ static struct kobj_type nfs_netns_client_type = { .namespace = nfs_netns_client_namespace, }; +static void nfs_netns_object_release(struct kobject *kobj) +{ + struct nfs_netns_client *c = container_of(kobj, + struct nfs_netns_client, + nfs_net_kobj); + kfree(c); +} + +static const void *nfs_netns_namespace(const struct kobject *kobj) +{ + return container_of(kobj, struct nfs_netns_client, nfs_net_kobj)->net; +} + +static struct kobj_type nfs_netns_object_type = { + .release = nfs_netns_object_release, + .sysfs_ops = &kobj_sysfs_ops, + .namespace = nfs_netns_namespace, +}; + static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent, struct net *net) { @@ -159,10 +174,19 @@ static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent, p = kzalloc(sizeof(*p), GFP_KERNEL); if (p) { p->net = net; - p->kobject.kset = nfs_client_kset; + p->kobject.kset = nfs_kset; + p->nfs_net_kobj.kset = nfs_kset; + + if (kobject_init_and_add(&p->nfs_net_kobj, &nfs_netns_object_type, + parent, "net") != 0) { + kobject_put(&p->nfs_net_kobj); + return NULL; + } + if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type, - parent, "nfs_client") == 0) + &p->nfs_net_kobj, "nfs_client") == 0) return p; + kobject_put(&p->kobject); } return NULL; @@ -172,7 +196,7 @@ void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net) { struct nfs_netns_client *clp; - clp = nfs_netns_client_alloc(nfs_client_kobj, net); + clp = nfs_netns_client_alloc(&nfs_kset->kobj, net); if (clp) { netns->nfs_client = clp; kobject_uevent(&clp->kobject, KOBJ_ADD); @@ -187,6 +211,149 @@ void nfs_netns_sysfs_destroy(struct nfs_net *netns) kobject_uevent(&clp->kobject, KOBJ_REMOVE); kobject_del(&clp->kobject); kobject_put(&clp->kobject); + kobject_del(&clp->nfs_net_kobj); + kobject_put(&clp->nfs_net_kobj); netns->nfs_client = NULL; } } + +static bool shutdown_match_client(const struct rpc_task *task, const void *data) +{ + return true; +} + +static void shutdown_client(struct rpc_clnt *clnt) +{ + clnt->cl_shutdown = 1; + rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL); +} + +static ssize_t +shutdown_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct nfs_server *server = container_of(kobj, struct nfs_server, kobj); + bool shutdown = server->flags & NFS_MOUNT_SHUTDOWN; + return sysfs_emit(buf, "%d\n", shutdown); +} + +static ssize_t +shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct nfs_server *server; + int ret, val; + + server = container_of(kobj, struct nfs_server, kobj); + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; + + if (val != 1) + return -EINVAL; + + /* already shut down? */ + if (server->flags & NFS_MOUNT_SHUTDOWN) + goto out; + + server->flags |= NFS_MOUNT_SHUTDOWN; + shutdown_client(server->client); + shutdown_client(server->nfs_client->cl_rpcclient); + + if (!IS_ERR(server->client_acl)) + shutdown_client(server->client_acl); + + if (server->nlm_host) + shutdown_client(server->nlm_host->h_rpcclnt); +out: + return count; +} + +static struct kobj_attribute nfs_sysfs_attr_shutdown = __ATTR_RW(shutdown); + +#define RPC_CLIENT_NAME_SIZE 64 + +void nfs_sysfs_link_rpc_client(struct nfs_server *server, + struct rpc_clnt *clnt, const char *uniq) +{ + char name[RPC_CLIENT_NAME_SIZE]; + int ret; + + strcpy(name, clnt->cl_program->name); + strcat(name, uniq ? uniq : ""); + strcat(name, "_client"); + + ret = sysfs_create_link_nowarn(&server->kobj, + &clnt->cl_sysfs->kobject, name); + if (ret < 0) + pr_warn("NFS: can't create link to %s in sysfs (%d)\n", + name, ret); +} +EXPORT_SYMBOL_GPL(nfs_sysfs_link_rpc_client); + +static void nfs_sysfs_sb_release(struct kobject *kobj) +{ + /* no-op: why? see lib/kobject.c kobject_cleanup() */ +} + +static const void *nfs_netns_server_namespace(const struct kobject *kobj) +{ + return container_of(kobj, struct nfs_server, kobj)->nfs_client->cl_net; +} + +static struct kobj_type nfs_sb_ktype = { + .release = nfs_sysfs_sb_release, + .sysfs_ops = &kobj_sysfs_ops, + .namespace = nfs_netns_server_namespace, + .child_ns_type = nfs_netns_object_child_ns_type, +}; + +void nfs_sysfs_add_server(struct nfs_server *server) +{ + int ret; + + ret = kobject_init_and_add(&server->kobj, &nfs_sb_ktype, + &nfs_kset->kobj, "server-%d", server->s_sysfs_id); + if (ret < 0) { + pr_warn("NFS: nfs sysfs add server-%d failed (%d)\n", + server->s_sysfs_id, ret); + return; + } + ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_shutdown.attr, + nfs_netns_server_namespace(&server->kobj)); + if (ret < 0) + pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n", + server->s_sysfs_id, ret); +} +EXPORT_SYMBOL_GPL(nfs_sysfs_add_server); + +void nfs_sysfs_move_server_to_sb(struct super_block *s) +{ + struct nfs_server *server = s->s_fs_info; + int ret; + + ret = kobject_rename(&server->kobj, s->s_id); + if (ret < 0) + pr_warn("NFS: rename sysfs %s failed (%d)\n", + server->kobj.name, ret); +} + +void nfs_sysfs_move_sb_to_server(struct nfs_server *server) +{ + const char *s; + int ret = -ENOMEM; + + s = kasprintf(GFP_KERNEL, "server-%d", server->s_sysfs_id); + if (s) + ret = kobject_rename(&server->kobj, s); + if (ret < 0) + pr_warn("NFS: rename sysfs %s failed (%d)\n", + server->kobj.name, ret); +} + +/* unlink, not dec-ref */ +void nfs_sysfs_remove_server(struct nfs_server *server) +{ + kobject_del(&server->kobj); +} diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h index 5501ef573c32..c5d1990cade5 100644 --- a/fs/nfs/sysfs.h +++ b/fs/nfs/sysfs.h @@ -10,11 +10,12 @@ struct nfs_netns_client { struct kobject kobject; + struct kobject nfs_net_kobj; struct net *net; const char __rcu *identifier; }; -extern struct kobject *nfs_client_kobj; +extern struct kobject *nfs_net_kobj; extern int nfs_sysfs_init(void); extern void nfs_sysfs_exit(void); @@ -22,4 +23,11 @@ extern void nfs_sysfs_exit(void); void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net); void nfs_netns_sysfs_destroy(struct nfs_net *netns); +void nfs_sysfs_link_rpc_client(struct nfs_server *server, + struct rpc_clnt *clnt, const char *sysfs_prefix); +void nfs_sysfs_add_server(struct nfs_server *s); +void nfs_sysfs_move_server_to_sb(struct super_block *s); +void nfs_sysfs_move_sb_to_server(struct nfs_server *s); +void nfs_sysfs_remove_server(struct nfs_server *s); + #endif diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 3bc9f7410e21..c53c81242e72 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -20,6 +20,7 @@ /* Dummy declarations */ struct svc_rqst; struct rpc_task; +struct rpc_clnt; /* * This is the set of functions for lockd->nfsd communication @@ -56,6 +57,7 @@ struct nlmclnt_initdata { extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init); extern void nlmclnt_done(struct nlm_host *host); +extern struct rpc_clnt *nlmclnt_rpc_clnt(struct nlm_host *host); /* * NLM client operations provide a means to modify RPC processing of NLM diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index ea2f7e6b1b0b..20eeba8b009d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -63,7 +63,8 @@ struct nfs_client { u32 cl_minorversion;/* NFSv4 minorversion */ unsigned int cl_nconnect; /* Number of connections */ unsigned int cl_max_connect; /* max number of xprts allowed */ - const char * cl_principal; /* used for machine cred */ + const char * cl_principal; /* used for machine cred */ + struct xprtsec_parms cl_xprtsec; /* xprt security policy */ #if IS_ENABLED(CONFIG_NFS_V4) struct list_head cl_ds_clients; /* auth flavor data servers */ @@ -153,6 +154,7 @@ struct nfs_server { #define NFS_MOUNT_WRITE_EAGER 0x01000000 #define NFS_MOUNT_WRITE_WAIT 0x02000000 #define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000 +#define NFS_MOUNT_SHUTDOWN 0x08000000 unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ @@ -183,6 +185,7 @@ struct nfs_server { change_attr_type;/* Description of change attribute */ struct nfs_fsid fsid; + int s_sysfs_id; /* sysfs dentry index */ __u64 maxfilesize; /* maximum file size */ struct timespec64 time_delta; /* smallest time granularity */ unsigned long mount_time; /* when this fs was mounted */ @@ -259,6 +262,7 @@ struct nfs_server { /* User namespace info */ const struct cred *cred; bool has_sec_mnt_opts; + struct kobject kobj; }; /* Server capabilities */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 29a1b39794bf..12bbb5c63664 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1528,6 +1528,7 @@ struct nfs42_seek_res { struct nfs42_setxattrargs { struct nfs4_sequence_args seq_args; struct nfs_fh *fh; + const u32 *bitmask; const char *xattr_name; u32 xattr_flags; size_t xattr_len; @@ -1537,6 +1538,8 @@ struct nfs42_setxattrargs { struct nfs42_setxattrres { struct nfs4_sequence_res seq_res; struct nfs4_change_info cinfo; + struct nfs_fattr *fattr; + const struct nfs_server *server; }; struct nfs42_getxattrargs { diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 3e6ce288a7fc..61e58327b1aa 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -120,6 +120,7 @@ struct rpc_authops { struct rpcsec_gss_info *); int (*key_timeout)(struct rpc_auth *, struct rpc_cred *); + int (*ping)(struct rpc_clnt *clnt); }; struct rpc_credops { @@ -144,6 +145,7 @@ struct rpc_credops { extern const struct rpc_authops authunix_ops; extern const struct rpc_authops authnull_ops; +extern const struct rpc_authops authtls_ops; int __init rpc_init_authunix(void); int __init rpcauth_init_module(void); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 770ef2cb5775..4f41d839face 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -30,7 +30,13 @@ #include <linux/sunrpc/xprtmultipath.h> struct rpc_inode; -struct rpc_sysfs_client; +struct rpc_sysfs_client { + struct kobject kobject; + struct net *net; + struct rpc_clnt *clnt; + struct rpc_xprt_switch *xprt_switch; +}; + /* * The high-level client handle @@ -57,7 +63,9 @@ struct rpc_clnt { cl_discrtry : 1,/* disconnect before retry */ cl_noretranstimeo: 1,/* No retransmit timeouts */ cl_autobind : 1,/* use getport() */ - cl_chatty : 1;/* be verbose */ + cl_chatty : 1,/* be verbose */ + cl_shutdown : 1;/* rpc immediate -EIO */ + struct xprtsec_parms cl_xprtsec; /* transport security policy */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ @@ -139,6 +147,7 @@ struct rpc_create_args { struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ const struct cred *cred; unsigned int max_connect; + struct xprtsec_parms xprtsec; }; struct rpc_add_xprt_test { diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b9f59aabee53..b52411bcfe4e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -129,6 +129,21 @@ struct rpc_rqst { #define rq_svec rq_snd_buf.head #define rq_slen rq_snd_buf.len +/* RPC transport layer security policies */ +enum xprtsec_policies { + RPC_XPRTSEC_NONE = 0, + RPC_XPRTSEC_TLS_ANON, + RPC_XPRTSEC_TLS_X509, +}; + +struct xprtsec_parms { + enum xprtsec_policies policy; + + /* authentication material */ + key_serial_t cert_serial; + key_serial_t privkey_serial; +}; + struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); @@ -185,6 +200,7 @@ enum xprt_transports { XPRT_TRANSPORT_RDMA = 256, XPRT_TRANSPORT_BC_RDMA = XPRT_TRANSPORT_RDMA | XPRT_TRANSPORT_BC, XPRT_TRANSPORT_LOCAL = 257, + XPRT_TRANSPORT_TCP_TLS = 258, }; struct rpc_sysfs_xprt; @@ -229,6 +245,7 @@ struct rpc_xprt { */ unsigned long bind_timeout, reestablish_timeout; + struct xprtsec_parms xprtsec; unsigned int connect_cookie; /* A cookie that gets bumped every time the transport is reconnected */ @@ -333,6 +350,7 @@ struct xprt_create { struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ struct rpc_xprt_switch *bc_xps; unsigned int flags; + struct xprtsec_parms xprtsec; }; struct xprt_class { diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 38284f25eddf..700a1e6c047c 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -57,9 +57,11 @@ struct sock_xprt { struct work_struct error_worker; struct work_struct recv_worker; struct mutex recv_mutex; + struct completion handshake_done; struct sockaddr_storage srcaddr; unsigned short srcport; int xprt_err; + struct rpc_clnt *clnt; /* * UDP socket buffer size parameters @@ -90,5 +92,6 @@ struct sock_xprt { #define XPRT_SOCK_WAKE_DISCONNECT (7) #define XPRT_SOCK_CONNECT_SENT (8) #define XPRT_SOCK_NOSPACE (9) +#define XPRT_SOCK_IGNORE_RECV (10) #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 69e42ef30979..43711753616a 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -139,36 +139,68 @@ DEFINE_RPC_CLNT_EVENT(release); DEFINE_RPC_CLNT_EVENT(replace_xprt); DEFINE_RPC_CLNT_EVENT(replace_xprt_err); +TRACE_DEFINE_ENUM(RPC_XPRTSEC_NONE); +TRACE_DEFINE_ENUM(RPC_XPRTSEC_TLS_X509); + +#define rpc_show_xprtsec_policy(policy) \ + __print_symbolic(policy, \ + { RPC_XPRTSEC_NONE, "none" }, \ + { RPC_XPRTSEC_TLS_ANON, "tls-anon" }, \ + { RPC_XPRTSEC_TLS_X509, "tls-x509" }) + +#define rpc_show_create_flags(flags) \ + __print_flags(flags, "|", \ + { RPC_CLNT_CREATE_HARDRTRY, "HARDRTRY" }, \ + { RPC_CLNT_CREATE_AUTOBIND, "AUTOBIND" }, \ + { RPC_CLNT_CREATE_NONPRIVPORT, "NONPRIVPORT" }, \ + { RPC_CLNT_CREATE_NOPING, "NOPING" }, \ + { RPC_CLNT_CREATE_DISCRTRY, "DISCRTRY" }, \ + { RPC_CLNT_CREATE_QUIET, "QUIET" }, \ + { RPC_CLNT_CREATE_INFINITE_SLOTS, \ + "INFINITE_SLOTS" }, \ + { RPC_CLNT_CREATE_NO_IDLE_TIMEOUT, \ + "NO_IDLE_TIMEOUT" }, \ + { RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT, \ + "NO_RETRANS_TIMEOUT" }, \ + { RPC_CLNT_CREATE_SOFTERR, "SOFTERR" }, \ + { RPC_CLNT_CREATE_REUSEPORT, "REUSEPORT" }) + TRACE_EVENT(rpc_clnt_new, TP_PROTO( const struct rpc_clnt *clnt, const struct rpc_xprt *xprt, - const char *program, - const char *server + const struct rpc_create_args *args ), - TP_ARGS(clnt, xprt, program, server), + TP_ARGS(clnt, xprt, args), TP_STRUCT__entry( __field(unsigned int, client_id) + __field(unsigned long, xprtsec) + __field(unsigned long, flags) + __string(program, clnt->cl_program->name) + __string(server, xprt->servername) __string(addr, xprt->address_strings[RPC_DISPLAY_ADDR]) __string(port, xprt->address_strings[RPC_DISPLAY_PORT]) - __string(program, program) - __string(server, server) ), TP_fast_assign( __entry->client_id = clnt->cl_clid; + __entry->xprtsec = args->xprtsec.policy; + __entry->flags = args->flags; + __assign_str(program, clnt->cl_program->name); + __assign_str(server, xprt->servername); __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]); __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); - __assign_str(program, program); - __assign_str(server, server); ), - TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER - " peer=[%s]:%s program=%s server=%s", + TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER " peer=[%s]:%s" + " program=%s server=%s xprtsec=%s flags=%s", __entry->client_id, __get_str(addr), __get_str(port), - __get_str(program), __get_str(server)) + __get_str(program), __get_str(server), + rpc_show_xprtsec_policy(__entry->xprtsec), + rpc_show_create_flags(__entry->flags) + ) ); TRACE_EVENT(rpc_clnt_new_err, @@ -1493,6 +1525,50 @@ TRACE_EVENT(rpcb_unregister, ) ); +/** + ** RPC-over-TLS tracepoints + **/ + +DECLARE_EVENT_CLASS(rpc_tls_class, + TP_PROTO( + const struct rpc_clnt *clnt, + const struct rpc_xprt *xprt + ), + + TP_ARGS(clnt, xprt), + + TP_STRUCT__entry( + __field(unsigned long, requested_policy) + __field(u32, version) + __string(servername, xprt->servername) + __string(progname, clnt->cl_program->name) + ), + + TP_fast_assign( + __entry->requested_policy = clnt->cl_xprtsec.policy; + __entry->version = clnt->cl_vers; + __assign_str(servername, xprt->servername); + __assign_str(progname, clnt->cl_program->name) + ), + + TP_printk("server=%s %sv%u requested_policy=%s", + __get_str(servername), __get_str(progname), __entry->version, + rpc_show_xprtsec_policy(__entry->requested_policy) + ) +); + +#define DEFINE_RPC_TLS_EVENT(name) \ + DEFINE_EVENT(rpc_tls_class, rpc_tls_##name, \ + TP_PROTO( \ + const struct rpc_clnt *clnt, \ + const struct rpc_xprt *xprt \ + ), \ + TP_ARGS(clnt, xprt)) + +DEFINE_RPC_TLS_EVENT(unavailable); +DEFINE_RPC_TLS_EVENT(not_started); + + /* Record an xdr_buf containing a fully-formed RPC message */ DECLARE_EVENT_CLASS(svc_xdr_msg_class, TP_PROTO( diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 1c8de397d6ad..f89c10fe7e6a 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ - auth.o auth_null.o auth_unix.o \ + auth.o auth_null.o auth_tls.o auth_unix.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ addr.o rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o sysfs.o \ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index fb75a883503f..2f16f9d17966 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -32,7 +32,7 @@ static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS; static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = { [RPC_AUTH_NULL] = (const struct rpc_authops __force __rcu *)&authnull_ops, [RPC_AUTH_UNIX] = (const struct rpc_authops __force __rcu *)&authunix_ops, - NULL, /* others can be loadable modules */ + [RPC_AUTH_TLS] = (const struct rpc_authops __force __rcu *)&authtls_ops, }; static LIST_HEAD(cred_unused); diff --git a/net/sunrpc/auth_tls.c b/net/sunrpc/auth_tls.c new file mode 100644 index 000000000000..de7678f8a23d --- /dev/null +++ b/net/sunrpc/auth_tls.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, 2022 Oracle. All rights reserved. + * + * The AUTH_TLS credential is used only to probe a remote peer + * for RPC-over-TLS support. + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/sunrpc/clnt.h> + +static const char *starttls_token = "STARTTLS"; +static const size_t starttls_len = 8; + +static struct rpc_auth tls_auth; +static struct rpc_cred tls_cred; + +static void tls_encode_probe(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + const void *obj) +{ +} + +static int tls_decode_probe(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + void *obj) +{ + return 0; +} + +static const struct rpc_procinfo rpcproc_tls_probe = { + .p_encode = tls_encode_probe, + .p_decode = tls_decode_probe, +}; + +static void rpc_tls_probe_call_prepare(struct rpc_task *task, void *data) +{ + task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT; + rpc_call_start(task); +} + +static void rpc_tls_probe_call_done(struct rpc_task *task, void *data) +{ +} + +static const struct rpc_call_ops rpc_tls_probe_ops = { + .rpc_call_prepare = rpc_tls_probe_call_prepare, + .rpc_call_done = rpc_tls_probe_call_done, +}; + +static int tls_probe(struct rpc_clnt *clnt) +{ + struct rpc_message msg = { + .rpc_proc = &rpcproc_tls_probe, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .rpc_message = &msg, + .rpc_op_cred = &tls_cred, + .callback_ops = &rpc_tls_probe_ops, + .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN, + }; + struct rpc_task *task; + int status; + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + status = task->tk_status; + rpc_put_task(task); + return status; +} + +static struct rpc_auth *tls_create(const struct rpc_auth_create_args *args, + struct rpc_clnt *clnt) +{ + refcount_inc(&tls_auth.au_count); + return &tls_auth; +} + +static void tls_destroy(struct rpc_auth *auth) +{ +} + +static struct rpc_cred *tls_lookup_cred(struct rpc_auth *auth, + struct auth_cred *acred, int flags) +{ + return get_rpccred(&tls_cred); +} + +static void tls_destroy_cred(struct rpc_cred *cred) +{ +} + +static int tls_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags) +{ + return 1; +} + +static int tls_marshal(struct rpc_task *task, struct xdr_stream *xdr) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 4 * XDR_UNIT); + if (!p) + return -EMSGSIZE; + /* Credential */ + *p++ = rpc_auth_tls; + *p++ = xdr_zero; + /* Verifier */ + *p++ = rpc_auth_null; + *p = xdr_zero; + return 0; +} + +static int tls_refresh(struct rpc_task *task) +{ + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags); + return 0; +} + +static int tls_validate(struct rpc_task *task, struct xdr_stream *xdr) +{ + __be32 *p; + void *str; + + p = xdr_inline_decode(xdr, XDR_UNIT); + if (!p) + return -EIO; + if (*p != rpc_auth_null) + return -EIO; + if (xdr_stream_decode_opaque_inline(xdr, &str, starttls_len) != starttls_len) + return -EIO; + if (memcmp(str, starttls_token, starttls_len)) + return -EIO; + return 0; +} + +const struct rpc_authops authtls_ops = { + .owner = THIS_MODULE, + .au_flavor = RPC_AUTH_TLS, + .au_name = "NULL", + .create = tls_create, + .destroy = tls_destroy, + .lookup_cred = tls_lookup_cred, + .ping = tls_probe, +}; + +static struct rpc_auth tls_auth = { + .au_cslack = NUL_CALLSLACK, + .au_rslack = NUL_REPLYSLACK, + .au_verfsize = NUL_REPLYSLACK, + .au_ralign = NUL_REPLYSLACK, + .au_ops = &authtls_ops, + .au_flavor = RPC_AUTH_TLS, + .au_count = REFCOUNT_INIT(1), +}; + +static const struct rpc_credops tls_credops = { + .cr_name = "AUTH_TLS", + .crdestroy = tls_destroy_cred, + .crmatch = tls_match, + .crmarshal = tls_marshal, + .crwrap_req = rpcauth_wrap_req_encode, + .crrefresh = tls_refresh, + .crvalidate = tls_validate, + .crunwrap_resp = rpcauth_unwrap_resp_decode, +}; + +static struct rpc_cred tls_cred = { + .cr_lru = LIST_HEAD_INIT(tls_cred.cr_lru), + .cr_auth = &tls_auth, + .cr_ops = &tls_credops, + .cr_count = REFCOUNT_INIT(2), + .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, +}; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d2ee56634308..d7c697af3762 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -385,6 +385,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, if (!clnt) goto out_err; clnt->cl_parent = parent ? : clnt; + clnt->cl_xprtsec = args->xprtsec; err = rpc_alloc_clid(clnt); if (err) @@ -434,7 +435,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, if (parent) refcount_inc(&parent->cl_count); - trace_rpc_clnt_new(clnt, xprt, program->name, args->servername); + trace_rpc_clnt_new(clnt, xprt, args); return clnt; out_no_path: @@ -532,6 +533,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) .addrlen = args->addrsize, .servername = args->servername, .bc_xprt = args->bc_xprt, + .xprtsec = args->xprtsec, }; char servername[48]; struct rpc_clnt *clnt; @@ -565,8 +567,12 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) servername[0] = '\0'; switch (args->address->sa_family) { case AF_LOCAL: - snprintf(servername, sizeof(servername), "%s", - sun->sun_path); + if (sun->sun_path[0]) + snprintf(servername, sizeof(servername), "%s", + sun->sun_path); + else + snprintf(servername, sizeof(servername), "@%s", + sun->sun_path+1); break; case AF_INET: snprintf(servername, sizeof(servername), "%pI4", @@ -727,6 +733,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt, struct rpc_clnt *parent; int err; + args->xprtsec = clnt->cl_xprtsec; xprt = xprt_create_transport(args); if (IS_ERR(xprt)) return PTR_ERR(xprt); @@ -1717,6 +1724,11 @@ call_start(struct rpc_task *task) trace_rpc_request(task); + if (task->tk_client->cl_shutdown) { + rpc_call_rpcerror(task, -EIO); + return; + } + /* Increment call count (version might not be valid for ping) */ if (clnt->cl_program->version[clnt->cl_vers]) clnt->cl_program->version[clnt->cl_vers]->counts[idx]++; @@ -2826,6 +2838,9 @@ static int rpc_ping(struct rpc_clnt *clnt) struct rpc_task *task; int status; + if (clnt->cl_auth->au_ops->ping) + return clnt->cl_auth->au_ops->ping(clnt); + task = rpc_call_null_helper(clnt, NULL, NULL, 0, NULL, NULL); if (IS_ERR(task)) return PTR_ERR(task); @@ -3046,6 +3061,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, if (!xprtargs->ident) xprtargs->ident = ident; + xprtargs->xprtsec = clnt->cl_xprtsec; xprt = xprt_create_transport(xprtargs); if (IS_ERR(xprt)) { ret = PTR_ERR(xprt); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 5a8e6d46809a..5988a5c5ff3f 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -36,6 +36,7 @@ #include "netns.h" #define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock" +#define RPCBIND_SOCK_ABSTRACT_NAME "\0/run/rpcbind.sock" #define RPCBIND_PROGRAM (100000u) #define RPCBIND_PORT (111u) @@ -216,21 +217,22 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, sn->rpcb_users = 1; } +/* Evaluate to actual length of the `sockaddr_un' structure. */ +# define SUN_LEN(ptr) (offsetof(struct sockaddr_un, sun_path) \ + + 1 + strlen((ptr)->sun_path + 1)) + /* * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local_unix(struct net *net) +static int rpcb_create_af_local(struct net *net, + const struct sockaddr_un *addr) { - static const struct sockaddr_un rpcb_localaddr_rpcbind = { - .sun_family = AF_LOCAL, - .sun_path = RPCBIND_SOCK_PATHNAME, - }; struct rpc_create_args args = { .net = net, .protocol = XPRT_TRANSPORT_LOCAL, - .address = (struct sockaddr *)&rpcb_localaddr_rpcbind, - .addrsize = sizeof(rpcb_localaddr_rpcbind), + .address = (struct sockaddr *)addr, + .addrsize = SUN_LEN(addr), .servername = "localhost", .program = &rpcb_program, .version = RPCBVERS_2, @@ -269,6 +271,26 @@ out: return result; } +static int rpcb_create_local_abstract(struct net *net) +{ + static const struct sockaddr_un rpcb_localaddr_abstract = { + .sun_family = AF_LOCAL, + .sun_path = RPCBIND_SOCK_ABSTRACT_NAME, + }; + + return rpcb_create_af_local(net, &rpcb_localaddr_abstract); +} + +static int rpcb_create_local_unix(struct net *net) +{ + static const struct sockaddr_un rpcb_localaddr_unix = { + .sun_family = AF_LOCAL, + .sun_path = RPCBIND_SOCK_PATHNAME, + }; + + return rpcb_create_af_local(net, &rpcb_localaddr_unix); +} + /* * Returns zero on success, otherwise a negative errno value * is returned. @@ -332,7 +354,8 @@ int rpcb_create_local(struct net *net) if (rpcb_get_local(net)) goto out; - if (rpcb_create_local_unix(net) != 0) + if (rpcb_create_local_abstract(net) != 0 && + rpcb_create_local_unix(net) != 0) result = rpcb_create_local_net(net); out: diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 0d0db4e1064e..5c8ecdaaa985 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -239,6 +239,7 @@ static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj, if (!xprt) return 0; if (!(xprt->xprt_class->ident == XPRT_TRANSPORT_TCP || + xprt->xprt_class->ident == XPRT_TRANSPORT_TCP_TLS || xprt->xprt_class->ident == XPRT_TRANSPORT_RDMA)) { xprt_put(xprt); return -EOPNOTSUPP; diff --git a/net/sunrpc/sysfs.h b/net/sunrpc/sysfs.h index 6620cebd1037..d2dd77a0a0e9 100644 --- a/net/sunrpc/sysfs.h +++ b/net/sunrpc/sysfs.h @@ -5,13 +5,6 @@ #ifndef __SUNRPC_SYSFS_H #define __SUNRPC_SYSFS_H -struct rpc_sysfs_client { - struct kobject kobject; - struct net *net; - struct rpc_clnt *clnt; - struct rpc_xprt_switch *xprt_switch; -}; - struct rpc_sysfs_xprt_switch { struct kobject kobject; struct net *net; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 5f9030b81c9e..9f010369100a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -47,6 +47,9 @@ #include <net/checksum.h> #include <net/udp.h> #include <net/tcp.h> +#include <net/tls.h> +#include <net/handshake.h> + #include <linux/bvec.h> #include <linux/highmem.h> #include <linux/uio.h> @@ -96,6 +99,7 @@ static struct ctl_table_header *sunrpc_table_header; static struct xprt_class xs_local_transport; static struct xprt_class xs_udp_transport; static struct xprt_class xs_tcp_transport; +static struct xprt_class xs_tcp_tls_transport; static struct xprt_class xs_bc_tcp_transport; /* @@ -187,6 +191,11 @@ static struct ctl_table xs_tunables_table[] = { */ #define XS_IDLE_DISC_TO (5U * 60 * HZ) +/* + * TLS handshake timeout. + */ +#define XS_TLS_HANDSHAKE_TO (10U * HZ) + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # undef RPC_DEBUG_DATA # define RPCDBG_FACILITY RPCDBG_TRANS @@ -253,7 +262,12 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) switch (sap->sa_family) { case AF_LOCAL: sun = xs_addr_un(xprt); - strscpy(buf, sun->sun_path, sizeof(buf)); + if (sun->sun_path[0]) { + strscpy(buf, sun->sun_path, sizeof(buf)); + } else { + buf[0] = '@'; + strscpy(buf+1, sun->sun_path+1, sizeof(buf)-1); + } xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); break; @@ -342,13 +356,56 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) return want; } +static int +xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, + struct cmsghdr *cmsg, int ret) +{ + if (cmsg->cmsg_level == SOL_TLS && + cmsg->cmsg_type == TLS_GET_RECORD_TYPE) { + u8 content_type = *((u8 *)CMSG_DATA(cmsg)); + + switch (content_type) { + case TLS_RECORD_TYPE_DATA: + /* TLS sets EOR at the end of each application data + * record, even though there might be more frames + * waiting to be decrypted. + */ + msg->msg_flags &= ~MSG_EOR; + break; + case TLS_RECORD_TYPE_ALERT: + ret = -ENOTCONN; + break; + default: + ret = -EAGAIN; + } + } + return ret; +} + +static int +xs_sock_recv_cmsg(struct socket *sock, struct msghdr *msg, int flags) +{ + union { + struct cmsghdr cmsg; + u8 buf[CMSG_SPACE(sizeof(u8))]; + } u; + int ret; + + msg->msg_control = &u; + msg->msg_controllen = sizeof(u); + ret = sock_recvmsg(sock, msg, flags); + if (msg->msg_controllen != sizeof(u)) + ret = xs_sock_process_cmsg(sock, msg, &u.cmsg, ret); + return ret; +} + static ssize_t xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek) { ssize_t ret; if (seek != 0) iov_iter_advance(&msg->msg_iter, seek); - ret = sock_recvmsg(sock, msg, flags); + ret = xs_sock_recv_cmsg(sock, msg, flags); return ret > 0 ? ret + seek : ret; } @@ -374,7 +431,7 @@ xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, size_t count) { iov_iter_discard(&msg->msg_iter, ITER_DEST, count); - return sock_recvmsg(sock, msg, flags); + return xs_sock_recv_cmsg(sock, msg, flags); } #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE @@ -695,6 +752,8 @@ static void xs_poll_check_readable(struct sock_xprt *transport) { clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); + if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state)) + return; if (!xs_poll_socket_readable(transport)) return; if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) @@ -1191,6 +1250,8 @@ static void xs_reset_transport(struct sock_xprt *transport) if (atomic_read(&transport->xprt.swapper)) sk_clear_memalloc(sk); + tls_handshake_cancel(sk); + kernel_sock_shutdown(sock, SHUT_RDWR); mutex_lock(&transport->recv_mutex); @@ -1380,6 +1441,10 @@ static void xs_data_ready(struct sock *sk) trace_xs_data_ready(xprt); transport->old_data_ready(sk); + + if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state)) + return; + /* Any data means we had a useful conversation, so * then we don't need to delay the next reconnect */ @@ -2360,6 +2425,267 @@ out_unlock: current_restore_flags(pflags, PF_MEMALLOC); } +/* + * Transfer the connected socket to @upper_transport, then mark that + * xprt CONNECTED. + */ +static int xs_tcp_tls_finish_connecting(struct rpc_xprt *lower_xprt, + struct sock_xprt *upper_transport) +{ + struct sock_xprt *lower_transport = + container_of(lower_xprt, struct sock_xprt, xprt); + struct rpc_xprt *upper_xprt = &upper_transport->xprt; + + if (!upper_transport->inet) { + struct socket *sock = lower_transport->sock; + struct sock *sk = sock->sk; + + /* Avoid temporary address, they are bad for long-lived + * connections such as NFS mounts. + * RFC4941, section 3.6 suggests that: + * Individual applications, which have specific + * knowledge about the normal duration of connections, + * MAY override this as appropriate. + */ + if (xs_addr(upper_xprt)->sa_family == PF_INET6) + ip6_sock_set_addr_preferences(sk, IPV6_PREFER_SRC_PUBLIC); + + xs_tcp_set_socket_timeouts(upper_xprt, sock); + tcp_sock_set_nodelay(sk); + + lock_sock(sk); + + /* @sk is already connected, so it now has the RPC callbacks. + * Reach into @lower_transport to save the original ones. + */ + upper_transport->old_data_ready = lower_transport->old_data_ready; + upper_transport->old_state_change = lower_transport->old_state_change; + upper_transport->old_write_space = lower_transport->old_write_space; + upper_transport->old_error_report = lower_transport->old_error_report; + sk->sk_user_data = upper_xprt; + + /* socket options */ + sock_reset_flag(sk, SOCK_LINGER); + + xprt_clear_connected(upper_xprt); + + upper_transport->sock = sock; + upper_transport->inet = sk; + upper_transport->file = lower_transport->file; + + release_sock(sk); + + /* Reset lower_transport before shutting down its clnt */ + mutex_lock(&lower_transport->recv_mutex); + lower_transport->inet = NULL; + lower_transport->sock = NULL; + lower_transport->file = NULL; + + xprt_clear_connected(lower_xprt); + xs_sock_reset_connection_flags(lower_xprt); + xs_stream_reset_connect(lower_transport); + mutex_unlock(&lower_transport->recv_mutex); + } + + if (!xprt_bound(upper_xprt)) + return -ENOTCONN; + + xs_set_memalloc(upper_xprt); + + if (!xprt_test_and_set_connected(upper_xprt)) { + upper_xprt->connect_cookie++; + clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state); + xprt_clear_connecting(upper_xprt); + + upper_xprt->stat.connect_count++; + upper_xprt->stat.connect_time += (long)jiffies - + upper_xprt->stat.connect_start; + xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING); + } + return 0; +} + +/** + * xs_tls_handshake_done - TLS handshake completion handler + * @data: address of xprt to wake + * @status: status of handshake + * @peerid: serial number of key containing the remote's identity + * + */ +static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid) +{ + struct rpc_xprt *lower_xprt = data; + struct sock_xprt *lower_transport = + container_of(lower_xprt, struct sock_xprt, xprt); + + lower_transport->xprt_err = status ? -EACCES : 0; + complete(&lower_transport->handshake_done); + xprt_put(lower_xprt); +} + +static int xs_tls_handshake_sync(struct rpc_xprt *lower_xprt, struct xprtsec_parms *xprtsec) +{ + struct sock_xprt *lower_transport = + container_of(lower_xprt, struct sock_xprt, xprt); + struct tls_handshake_args args = { + .ta_sock = lower_transport->sock, + .ta_done = xs_tls_handshake_done, + .ta_data = xprt_get(lower_xprt), + .ta_peername = lower_xprt->servername, + }; + struct sock *sk = lower_transport->inet; + int rc; + + init_completion(&lower_transport->handshake_done); + set_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state); + lower_transport->xprt_err = -ETIMEDOUT; + switch (xprtsec->policy) { + case RPC_XPRTSEC_TLS_ANON: + rc = tls_client_hello_anon(&args, GFP_KERNEL); + if (rc) + goto out_put_xprt; + break; + case RPC_XPRTSEC_TLS_X509: + args.ta_my_cert = xprtsec->cert_serial; + args.ta_my_privkey = xprtsec->privkey_serial; + rc = tls_client_hello_x509(&args, GFP_KERNEL); + if (rc) + goto out_put_xprt; + break; + default: + rc = -EACCES; + goto out_put_xprt; + } + + rc = wait_for_completion_interruptible_timeout(&lower_transport->handshake_done, + XS_TLS_HANDSHAKE_TO); + if (rc <= 0) { + if (!tls_handshake_cancel(sk)) { + if (rc == 0) + rc = -ETIMEDOUT; + goto out_put_xprt; + } + } + + rc = lower_transport->xprt_err; + +out: + xs_stream_reset_connect(lower_transport); + clear_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state); + return rc; + +out_put_xprt: + xprt_put(lower_xprt); + goto out; +} + +/** + * xs_tcp_tls_setup_socket - establish a TLS session on a TCP socket + * @work: queued work item + * + * Invoked by a work queue tasklet. + * + * For RPC-with-TLS, there is a two-stage connection process. + * + * The "upper-layer xprt" is visible to the RPC consumer. Once it has + * been marked connected, the consumer knows that a TCP connection and + * a TLS session have been established. + * + * A "lower-layer xprt", created in this function, handles the mechanics + * of connecting the TCP socket, performing the RPC_AUTH_TLS probe, and + * then driving the TLS handshake. Once all that is complete, the upper + * layer xprt is marked connected. + */ +static void xs_tcp_tls_setup_socket(struct work_struct *work) +{ + struct sock_xprt *upper_transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_clnt *upper_clnt = upper_transport->clnt; + struct rpc_xprt *upper_xprt = &upper_transport->xprt; + struct rpc_create_args args = { + .net = upper_xprt->xprt_net, + .protocol = upper_xprt->prot, + .address = (struct sockaddr *)&upper_xprt->addr, + .addrsize = upper_xprt->addrlen, + .timeout = upper_clnt->cl_timeout, + .servername = upper_xprt->servername, + .program = upper_clnt->cl_program, + .prognumber = upper_clnt->cl_prog, + .version = upper_clnt->cl_vers, + .authflavor = RPC_AUTH_TLS, + .cred = upper_clnt->cl_cred, + .xprtsec = { + .policy = RPC_XPRTSEC_NONE, + }, + }; + unsigned int pflags = current->flags; + struct rpc_clnt *lower_clnt; + struct rpc_xprt *lower_xprt; + int status; + + if (atomic_read(&upper_xprt->swapper)) + current->flags |= PF_MEMALLOC; + + xs_stream_start_connect(upper_transport); + + /* This implicitly sends an RPC_AUTH_TLS probe */ + lower_clnt = rpc_create(&args); + if (IS_ERR(lower_clnt)) { + trace_rpc_tls_unavailable(upper_clnt, upper_xprt); + clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state); + xprt_clear_connecting(upper_xprt); + xprt_wake_pending_tasks(upper_xprt, PTR_ERR(lower_clnt)); + xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING); + goto out_unlock; + } + + /* RPC_AUTH_TLS probe was successful. Try a TLS handshake on + * the lower xprt. + */ + rcu_read_lock(); + lower_xprt = rcu_dereference(lower_clnt->cl_xprt); + rcu_read_unlock(); + status = xs_tls_handshake_sync(lower_xprt, &upper_xprt->xprtsec); + if (status) { + trace_rpc_tls_not_started(upper_clnt, upper_xprt); + goto out_close; + } + + status = xs_tcp_tls_finish_connecting(lower_xprt, upper_transport); + if (status) + goto out_close; + + trace_rpc_socket_connect(upper_xprt, upper_transport->sock, 0); + if (!xprt_test_and_set_connected(upper_xprt)) { + upper_xprt->connect_cookie++; + clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state); + xprt_clear_connecting(upper_xprt); + + upper_xprt->stat.connect_count++; + upper_xprt->stat.connect_time += (long)jiffies - + upper_xprt->stat.connect_start; + xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING); + } + rpc_shutdown_client(lower_clnt); + +out_unlock: + current_restore_flags(pflags, PF_MEMALLOC); + upper_transport->clnt = NULL; + xprt_unlock_connect(upper_xprt, upper_transport); + return; + +out_close: + rpc_shutdown_client(lower_clnt); + + /* xprt_force_disconnect() wakes tasks with a fixed tk_status code. + * Wake them first here to ensure they get our tk_status code. + */ + xprt_wake_pending_tasks(upper_xprt, status); + xs_tcp_force_close(upper_xprt); + xprt_clear_connecting(upper_xprt); + goto out_unlock; +} + /** * xs_connect - connect a socket to a remote endpoint * @xprt: pointer to transport structure @@ -2391,6 +2717,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) } else dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); + transport->clnt = task->tk_client; queue_delayed_work(xprtiod_workqueue, &transport->connect_worker, delay); @@ -2858,7 +3185,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) switch (sun->sun_family) { case AF_LOCAL: - if (sun->sun_path[0] != '/') { + if (sun->sun_path[0] != '/' && sun->sun_path[0] != '\0') { dprintk("RPC: bad AF_LOCAL address: %s\n", sun->sun_path); ret = ERR_PTR(-EINVAL); @@ -3045,6 +3372,94 @@ out_err: } /** + * xs_setup_tcp_tls - Set up transport to use a TCP with TLS + * @args: rpc transport creation arguments + * + */ +static struct rpc_xprt *xs_setup_tcp_tls(struct xprt_create *args) +{ + struct sockaddr *addr = args->dstaddr; + struct rpc_xprt *xprt; + struct sock_xprt *transport; + struct rpc_xprt *ret; + unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries; + + if (args->flags & XPRT_CREATE_INFINITE_SLOTS) + max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT; + + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, + max_slot_table_size); + if (IS_ERR(xprt)) + return xprt; + transport = container_of(xprt, struct sock_xprt, xprt); + + xprt->prot = IPPROTO_TCP; + xprt->xprt_class = &xs_tcp_transport; + xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; + + xprt->bind_timeout = XS_BIND_TO; + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + xprt->idle_timeout = XS_IDLE_DISC_TO; + + xprt->ops = &xs_tcp_ops; + xprt->timeout = &xs_tcp_default_timeout; + + xprt->max_reconnect_timeout = xprt->timeout->to_maxval; + xprt->connect_timeout = xprt->timeout->to_initval * + (xprt->timeout->to_retries + 1); + + INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); + INIT_WORK(&transport->error_worker, xs_error_handle); + + switch (args->xprtsec.policy) { + case RPC_XPRTSEC_TLS_ANON: + case RPC_XPRTSEC_TLS_X509: + xprt->xprtsec = args->xprtsec; + INIT_DELAYED_WORK(&transport->connect_worker, + xs_tcp_tls_setup_socket); + break; + default: + ret = ERR_PTR(-EACCES); + goto out_err; + } + + switch (addr->sa_family) { + case AF_INET: + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + xprt_set_bound(xprt); + + xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); + break; + case AF_INET6: + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + xprt_set_bound(xprt); + + xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); + break; + default: + ret = ERR_PTR(-EAFNOSUPPORT); + goto out_err; + } + + if (xprt_bound(xprt)) + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); + else + dprintk("RPC: set up xprt to %s (autobind) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PROTO]); + + if (try_module_get(THIS_MODULE)) + return xprt; + ret = ERR_PTR(-EINVAL); +out_err: + xs_xprt_free(xprt); + return ret; +} + +/** * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket * @args: rpc transport creation arguments * @@ -3153,6 +3568,15 @@ static struct xprt_class xs_tcp_transport = { .netid = { "tcp", "tcp6", "" }, }; +static struct xprt_class xs_tcp_tls_transport = { + .list = LIST_HEAD_INIT(xs_tcp_tls_transport.list), + .name = "tcp-with-tls", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_TCP_TLS, + .setup = xs_setup_tcp_tls, + .netid = { "tcp", "tcp6", "" }, +}; + static struct xprt_class xs_bc_tcp_transport = { .list = LIST_HEAD_INIT(xs_bc_tcp_transport.list), .name = "tcp NFSv4.1 backchannel", @@ -3174,6 +3598,7 @@ int init_socket_xprt(void) xprt_register_transport(&xs_local_transport); xprt_register_transport(&xs_udp_transport); xprt_register_transport(&xs_tcp_transport); + xprt_register_transport(&xs_tcp_tls_transport); xprt_register_transport(&xs_bc_tcp_transport); return 0; @@ -3193,6 +3618,7 @@ void cleanup_socket_xprt(void) xprt_unregister_transport(&xs_local_transport); xprt_unregister_transport(&xs_udp_transport); xprt_unregister_transport(&xs_tcp_transport); + xprt_unregister_transport(&xs_tcp_tls_transport); xprt_unregister_transport(&xs_bc_tcp_transport); } |