summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-17 12:15:03 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-17 12:15:03 -0800
commit74f602dc96dd854c7b2034947798c1e2a6b84066 (patch)
tree3fa9f58c0e36e2caa7478d4d47bb0263e267f1ac
parentbe695ee29e8fc0af266d9f1882868c47da01a790 (diff)
parent52104f274e2d7f134d34bab11cada8913d4544e2 (diff)
Merge tag 'nfs-for-5.11-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: Features: - NFSv3: Add emulation of lookupp() to improve open_by_filehandle() support - A series of patches to improve readdir performance, particularly with large directories - Basic support for using NFS/RDMA with the pNFS files and flexfiles drivers - Micro-optimisations for RDMA - RDMA tracing improvements Bugfixes: - Fix a long standing bug with xs_read_xdr_buf() when receiving partial pages (Dan Aloni) - Various fixes for getxattr and listxattr, when used over non-TCP transports - Fixes for containerised NFS from Sargun Dhillon - switch nfsiod to be an UNBOUND workqueue (Neil Brown) - READDIR should not ask for security label information if there is no LSM policy (Olga Kornievskaia) - Avoid using interval-based rebinding with TCP in lockd (Calum Mackay) - A series of RPC and NFS layer fixes to support the NFSv4.2 READ_PLUS code - A couple of fixes for pnfs/flexfiles read failover Cleanups: - Various cleanups for the SUNRPC xdr code in conjunction with the READ_PLUS fixes" * tag 'nfs-for-5.11-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (90 commits) NFS/pNFS: Fix a typo in ff_layout_resend_pnfs_read() pNFS/flexfiles: Avoid spurious layout returns in ff_layout_choose_ds_for_read NFSv4/pnfs: Add tracing for the deviceid cache fs/lockd: convert comma to semicolon NFSv4.2: fix error return on memory allocation failure NFSv4.2/pnfs: Don't use READ_PLUS with pNFS yet NFSv4.2: Deal with potential READ_PLUS data extent buffer overflow NFSv4.2: Don't error when exiting early on a READ_PLUS buffer overflow NFSv4.2: Handle hole lengths that exceed the READ_PLUS read buffer NFSv4.2: decode_read_plus_hole() needs to check the extent offset NFSv4.2: decode_read_plus_data() must skip padding after data segment NFSv4.2: Ensure we always reset the result->count in decode_read_plus() SUNRPC: When expanding the buffer, we may need grow the sparse pages SUNRPC: Cleanup - constify a number of xdr_buf helpers SUNRPC: Clean up open coded setting of the xdr_stream 'nwords' field SUNRPC: _copy_to/from_pages() now check for zero length SUNRPC: Cleanup xdr_shrink_bufhead() SUNRPC: Fix xdr_expand_hole() SUNRPC: Fixes for xdr_align_data() SUNRPC: _shift_data_left/right_pages should check the shift length ...
-rw-r--r--fs/lockd/host.c22
-rw-r--r--fs/nfs/client.c8
-rw-r--r--fs/nfs/dir.c719
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c17
-rw-r--r--fs/nfs/fs_context.c21
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/internal.h6
-rw-r--r--fs/nfs/nfs2xdr.c19
-rw-r--r--fs/nfs/nfs3proc.c83
-rw-r--r--fs/nfs/nfs3xdr.c29
-rw-r--r--fs/nfs/nfs42proc.c48
-rw-r--r--fs/nfs/nfs42xdr.c101
-rw-r--r--fs/nfs/nfs4client.c2
-rw-r--r--fs/nfs/nfs4proc.c71
-rw-r--r--fs/nfs/nfs4session.h2
-rw-r--r--fs/nfs/nfs4super.c2
-rw-r--r--fs/nfs/nfs4trace.h75
-rw-r--r--fs/nfs/nfs4xdr.c48
-rw-r--r--fs/nfs/pnfs.c33
-rw-r--r--fs/nfs/pnfs.h7
-rw-r--r--fs/nfs/pnfs_dev.c23
-rw-r--r--fs/nfs/pnfs_nfs.c105
-rw-r--r--fs/nfs/proc.c18
-rw-r--r--include/linux/nfs_fs.h9
-rw-r--r--include/linux/nfs_xdr.h17
-rw-r--r--include/linux/sunrpc/xdr.h27
-rw-r--r--include/linux/sunrpc/xprt.h3
-rw-r--r--include/trace/events/rpcrdma.h450
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/debugfs.c4
-rw-r--r--net/sunrpc/sched.c65
-rw-r--r--net/sunrpc/xdr.c817
-rw-r--r--net/sunrpc/xprt.c117
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c81
-rw-r--r--net/sunrpc/xprtrdma/module.c1
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c72
-rw-r--r--net/sunrpc/xprtrdma/transport.c8
-rw-r--r--net/sunrpc/xprtrdma/verbs.c30
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h9
-rw-r--r--net/sunrpc/xprtsock.c7
41 files changed, 1952 insertions, 1244 deletions
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 0afb6d59bad0..f802223e71ab 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -163,7 +163,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
host->h_nsmhandle = nsm;
host->h_addrbuf = nsm->sm_addrbuf;
host->net = ni->net;
- host->h_cred = get_cred(ni->cred),
+ host->h_cred = get_cred(ni->cred);
strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
out:
@@ -439,12 +439,7 @@ nlm_bind_host(struct nlm_host *host)
* RPC rebind is required
*/
if ((clnt = host->h_rpcclnt) != NULL) {
- if (time_after_eq(jiffies, host->h_nextrebind)) {
- rpc_force_rebind(clnt);
- host->h_nextrebind = jiffies + NLM_HOST_REBIND;
- dprintk("lockd: next rebind in %lu jiffies\n",
- host->h_nextrebind - jiffies);
- }
+ nlm_rebind_host(host);
} else {
unsigned long increment = nlmsvc_timeout;
struct rpc_timeout timeparms = {
@@ -494,13 +489,20 @@ nlm_bind_host(struct nlm_host *host)
return clnt;
}
-/*
- * Force a portmap lookup of the remote lockd port
+/**
+ * nlm_rebind_host - If needed, force a portmap lookup of the peer's lockd port
+ * @host: NLM host handle for peer
+ *
+ * This is not needed when using a connection-oriented protocol, such as TCP.
+ * The existing autobind mechanism is sufficient to force a rebind when
+ * required, e.g. on connection state transitions.
*/
void
nlm_rebind_host(struct nlm_host *host)
{
- dprintk("lockd: rebind host %s\n", host->h_name);
+ if (host->h_proto != IPPROTO_UDP)
+ return;
+
if (host->h_rpcclnt && time_after_eq(jiffies, host->h_nextrebind)) {
rpc_force_rebind(host->h_rpcclnt);
host->h_nextrebind = jiffies + NLM_HOST_REBIND;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 4b8cc93913f7..ff5c4d0d6d13 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -571,7 +571,7 @@ static int nfs_start_lockd(struct nfs_server *server)
1 : 0,
.net = clp->cl_net,
.nlmclnt_ops = clp->cl_nfs_mod->rpc_ops->nlmclnt_ops,
- .cred = current_cred(),
+ .cred = server->cred,
};
if (nlm_init.nfs_version > 3)
@@ -781,8 +781,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
- if (server->dtsize > PAGE_SIZE * NFS_MAX_READDIR_PAGES)
- server->dtsize = PAGE_SIZE * NFS_MAX_READDIR_PAGES;
+ if (server->dtsize > NFS_MAX_FILE_IO_SIZE)
+ server->dtsize = NFS_MAX_FILE_IO_SIZE;
if (server->dtsize > server->rsize)
server->dtsize = server->rsize;
@@ -985,7 +985,7 @@ struct nfs_server *nfs_create_server(struct fs_context *fc)
if (!server)
return ERR_PTR(-ENOMEM);
- server->cred = get_cred(current_cred());
+ server->cred = get_cred(fc->cred);
error = -ENOMEM;
fattr = nfs_alloc_fattr();
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8a24fe20dccf..ef827ae193d2 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -68,7 +68,7 @@ const struct address_space_operations nfs_dir_aops = {
.freepage = nfs_readdir_clear_array,
};
-static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, const struct cred *cred)
+static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir)
{
struct nfs_inode *nfsi = NFS_I(dir);
struct nfs_open_dir_context *ctx;
@@ -78,7 +78,6 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
ctx->attr_gencount = nfsi->attr_gencount;
ctx->dir_cookie = 0;
ctx->dup_cookie = 0;
- ctx->cred = get_cred(cred);
spin_lock(&dir->i_lock);
if (list_empty(&nfsi->open_files) &&
(nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
@@ -96,7 +95,6 @@ static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_cont
spin_lock(&dir->i_lock);
list_del(&ctx->list);
spin_unlock(&dir->i_lock);
- put_cred(ctx->cred);
kfree(ctx);
}
@@ -113,7 +111,7 @@ nfs_opendir(struct inode *inode, struct file *filp)
nfs_inc_stats(inode, NFSIOS_VFSOPEN);
- ctx = alloc_nfs_open_dir_context(inode, current_cred());
+ ctx = alloc_nfs_open_dir_context(inode);
if (IS_ERR(ctx)) {
res = PTR_ERR(ctx);
goto out;
@@ -133,43 +131,55 @@ nfs_closedir(struct inode *inode, struct file *filp)
struct nfs_cache_array_entry {
u64 cookie;
u64 ino;
- struct qstr string;
+ const char *name;
+ unsigned int name_len;
unsigned char d_type;
};
struct nfs_cache_array {
- int size;
- int eof_index;
u64 last_cookie;
+ unsigned int size;
+ unsigned char page_full : 1,
+ page_is_eof : 1,
+ cookies_are_ordered : 1;
struct nfs_cache_array_entry array[];
};
-typedef struct {
+struct nfs_readdir_descriptor {
struct file *file;
struct page *page;
struct dir_context *ctx;
- unsigned long page_index;
- u64 *dir_cookie;
+ pgoff_t page_index;
+ u64 dir_cookie;
u64 last_cookie;
+ u64 dup_cookie;
loff_t current_index;
loff_t prev_index;
+ __be32 verf[NFS_DIR_VERIFIER_SIZE];
unsigned long dir_verifier;
unsigned long timestamp;
unsigned long gencount;
+ unsigned long attr_gencount;
unsigned int cache_entry_index;
+ signed char duped;
bool plus;
bool eof;
-} nfs_readdir_descriptor_t;
+};
-static
-void nfs_readdir_init_array(struct page *page)
+static void nfs_readdir_array_init(struct nfs_cache_array *array)
+{
+ memset(array, 0, sizeof(struct nfs_cache_array));
+}
+
+static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie)
{
struct nfs_cache_array *array;
array = kmap_atomic(page);
- memset(array, 0, sizeof(struct nfs_cache_array));
- array->eof_index = -1;
+ nfs_readdir_array_init(array);
+ array->last_cookie = last_cookie;
+ array->cookies_are_ordered = 1;
kunmap_atomic(array);
}
@@ -184,61 +194,177 @@ void nfs_readdir_clear_array(struct page *page)
array = kmap_atomic(page);
for (i = 0; i < array->size; i++)
- kfree(array->array[i].string.name);
- array->size = 0;
+ kfree(array->array[i].name);
+ nfs_readdir_array_init(array);
kunmap_atomic(array);
}
+static struct page *
+nfs_readdir_page_array_alloc(u64 last_cookie, gfp_t gfp_flags)
+{
+ struct page *page = alloc_page(gfp_flags);
+ if (page)
+ nfs_readdir_page_init_array(page, last_cookie);
+ return page;
+}
+
+static void nfs_readdir_page_array_free(struct page *page)
+{
+ if (page) {
+ nfs_readdir_clear_array(page);
+ put_page(page);
+ }
+}
+
+static void nfs_readdir_array_set_eof(struct nfs_cache_array *array)
+{
+ array->page_is_eof = 1;
+ array->page_full = 1;
+}
+
+static bool nfs_readdir_array_is_full(struct nfs_cache_array *array)
+{
+ return array->page_full;
+}
+
/*
* the caller is responsible for freeing qstr.name
* when called by nfs_readdir_add_to_array, the strings will be freed in
* nfs_clear_readdir_array()
*/
-static
-int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
+static const char *nfs_readdir_copy_name(const char *name, unsigned int len)
{
- string->len = len;
- string->name = kmemdup_nul(name, len, GFP_KERNEL);
- if (string->name == NULL)
- return -ENOMEM;
+ const char *ret = kmemdup_nul(name, len, GFP_KERNEL);
+
/*
* Avoid a kmemleak false positive. The pointer to the name is stored
* in a page cache page which kmemleak does not scan.
*/
- kmemleak_not_leak(string->name);
- string->hash = full_name_hash(NULL, name, len);
+ if (ret != NULL)
+ kmemleak_not_leak(ret);
+ return ret;
+}
+
+/*
+ * Check that the next array entry lies entirely within the page bounds
+ */
+static int nfs_readdir_array_can_expand(struct nfs_cache_array *array)
+{
+ struct nfs_cache_array_entry *cache_entry;
+
+ if (array->page_full)
+ return -ENOSPC;
+ cache_entry = &array->array[array->size + 1];
+ if ((char *)cache_entry - (char *)array > PAGE_SIZE) {
+ array->page_full = 1;
+ return -ENOSPC;
+ }
return 0;
}
static
int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
{
- struct nfs_cache_array *array = kmap(page);
+ struct nfs_cache_array *array;
struct nfs_cache_array_entry *cache_entry;
+ const char *name;
int ret;
- cache_entry = &array->array[array->size];
+ name = nfs_readdir_copy_name(entry->name, entry->len);
+ if (!name)
+ return -ENOMEM;
- /* Check that this entry lies within the page bounds */
- ret = -ENOSPC;
- if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE)
+ array = kmap_atomic(page);
+ ret = nfs_readdir_array_can_expand(array);
+ if (ret) {
+ kfree(name);
goto out;
+ }
+ cache_entry = &array->array[array->size];
cache_entry->cookie = entry->prev_cookie;
cache_entry->ino = entry->ino;
cache_entry->d_type = entry->d_type;
- ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
- if (ret)
- goto out;
+ cache_entry->name_len = entry->len;
+ cache_entry->name = name;
array->last_cookie = entry->cookie;
+ if (array->last_cookie <= cache_entry->cookie)
+ array->cookies_are_ordered = 0;
array->size++;
if (entry->eof != 0)
- array->eof_index = array->size;
+ nfs_readdir_array_set_eof(array);
out:
- kunmap(page);
+ kunmap_atomic(array);
+ return ret;
+}
+
+static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
+ pgoff_t index, u64 last_cookie)
+{
+ struct page *page;
+
+ page = grab_cache_page(mapping, index);
+ if (page && !PageUptodate(page)) {
+ nfs_readdir_page_init_array(page, last_cookie);
+ if (invalidate_inode_pages2_range(mapping, index + 1, -1) < 0)
+ nfs_zap_mapping(mapping->host, mapping);
+ SetPageUptodate(page);
+ }
+
+ return page;
+}
+
+static u64 nfs_readdir_page_last_cookie(struct page *page)
+{
+ struct nfs_cache_array *array;
+ u64 ret;
+
+ array = kmap_atomic(page);
+ ret = array->last_cookie;
+ kunmap_atomic(array);
return ret;
}
+static bool nfs_readdir_page_needs_filling(struct page *page)
+{
+ struct nfs_cache_array *array;
+ bool ret;
+
+ array = kmap_atomic(page);
+ ret = !nfs_readdir_array_is_full(array);
+ kunmap_atomic(array);
+ return ret;
+}
+
+static void nfs_readdir_page_set_eof(struct page *page)
+{
+ struct nfs_cache_array *array;
+
+ array = kmap_atomic(page);
+ nfs_readdir_array_set_eof(array);
+ kunmap_atomic(array);
+}
+
+static void nfs_readdir_page_unlock_and_put(struct page *page)
+{
+ unlock_page(page);
+ put_page(page);
+}
+
+static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
+ pgoff_t index, u64 cookie)
+{
+ struct page *page;
+
+ page = nfs_readdir_page_get_locked(mapping, index, cookie);
+ if (page) {
+ if (nfs_readdir_page_last_cookie(page) == cookie)
+ return page;
+ nfs_readdir_page_unlock_and_put(page);
+ }
+ return NULL;
+}
+
static inline
int is_32bit_api(void)
{
@@ -258,8 +384,8 @@ bool nfs_readdir_use_cookie(const struct file *filp)
return true;
}
-static
-int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
+static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
+ struct nfs_readdir_descriptor *desc)
{
loff_t diff = desc->ctx->pos - desc->current_index;
unsigned int index;
@@ -267,13 +393,13 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
if (diff < 0)
goto out_eof;
if (diff >= array->size) {
- if (array->eof_index >= 0)
+ if (array->page_is_eof)
goto out_eof;
return -EAGAIN;
}
index = (unsigned int)diff;
- *desc->dir_cookie = array->array[index].cookie;
+ desc->dir_cookie = array->array[index].cookie;
desc->cache_entry_index = index;
return 0;
out_eof:
@@ -290,41 +416,55 @@ nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi)
return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags);
}
-static
-int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
+static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array,
+ u64 cookie)
+{
+ if (!array->cookies_are_ordered)
+ return true;
+ /* Optimisation for monotonically increasing cookies */
+ if (cookie >= array->last_cookie)
+ return false;
+ if (array->size && cookie < array->array[0].cookie)
+ return false;
+ return true;
+}
+
+static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
+ struct nfs_readdir_descriptor *desc)
{
int i;
loff_t new_pos;
int status = -EAGAIN;
+ if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie))
+ goto check_eof;
+
for (i = 0; i < array->size; i++) {
- if (array->array[i].cookie == *desc->dir_cookie) {
+ if (array->array[i].cookie == desc->dir_cookie) {
struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
- struct nfs_open_dir_context *ctx = desc->file->private_data;
new_pos = desc->current_index + i;
- if (ctx->attr_gencount != nfsi->attr_gencount ||
+ if (desc->attr_gencount != nfsi->attr_gencount ||
!nfs_readdir_inode_mapping_valid(nfsi)) {
- ctx->duped = 0;
- ctx->attr_gencount = nfsi->attr_gencount;
+ desc->duped = 0;
+ desc->attr_gencount = nfsi->attr_gencount;
} else if (new_pos < desc->prev_index) {
- if (ctx->duped > 0
- && ctx->dup_cookie == *desc->dir_cookie) {
+ if (desc->duped > 0
+ && desc->dup_cookie == desc->dir_cookie) {
if (printk_ratelimit()) {
pr_notice("NFS: directory %pD2 contains a readdir loop."
"Please contact your server vendor. "
- "The file: %.*s has duplicate cookie %llu\n",
- desc->file, array->array[i].string.len,
- array->array[i].string.name, *desc->dir_cookie);
+ "The file: %s has duplicate cookie %llu\n",
+ desc->file, array->array[i].name, desc->dir_cookie);
}
status = -ELOOP;
goto out;
}
- ctx->dup_cookie = *desc->dir_cookie;
- ctx->duped = -1;
+ desc->dup_cookie = desc->dir_cookie;
+ desc->duped = -1;
}
if (nfs_readdir_use_cookie(desc->file))
- desc->ctx->pos = *desc->dir_cookie;
+ desc->ctx->pos = desc->dir_cookie;
else
desc->ctx->pos = new_pos;
desc->prev_index = new_pos;
@@ -332,24 +472,24 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
return 0;
}
}
- if (array->eof_index >= 0) {
+check_eof:
+ if (array->page_is_eof) {
status = -EBADCOOKIE;
- if (*desc->dir_cookie == array->last_cookie)
+ if (desc->dir_cookie == array->last_cookie)
desc->eof = true;
}
out:
return status;
}
-static
-int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
+static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
{
struct nfs_cache_array *array;
int status;
- array = kmap(desc->page);
+ array = kmap_atomic(desc->page);
- if (*desc->dir_cookie == 0)
+ if (desc->dir_cookie == 0)
status = nfs_readdir_search_for_pos(array, desc);
else
status = nfs_readdir_search_for_cookie(array, desc);
@@ -359,17 +499,29 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
desc->current_index += array->size;
desc->page_index++;
}
- kunmap(desc->page);
+ kunmap_atomic(array);
return status;
}
/* Fill a page with xdr information before transferring to the cache page */
-static
-int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
- struct nfs_entry *entry, struct file *file, struct inode *inode)
+static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
+ __be32 *verf, u64 cookie,
+ struct page **pages, size_t bufsize,
+ __be32 *verf_res)
{
- struct nfs_open_dir_context *ctx = file->private_data;
- const struct cred *cred = ctx->cred;
+ struct inode *inode = file_inode(desc->file);
+ struct nfs_readdir_arg arg = {
+ .dentry = file_dentry(desc->file),
+ .cred = desc->file->f_cred,
+ .verf = verf,
+ .cookie = cookie,
+ .pages = pages,
+ .page_len = bufsize,
+ .plus = desc->plus,
+ };
+ struct nfs_readdir_res res = {
+ .verf = verf_res,
+ };
unsigned long timestamp, gencount;
int error;
@@ -377,14 +529,13 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
timestamp = jiffies;
gencount = nfs_inc_attr_generation_counter();
desc->dir_verifier = nfs_save_change_attribute(inode);
- error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages,
- NFS_SERVER(inode)->dtsize, desc->plus);
+ error = NFS_PROTO(inode)->readdir(&arg, &res);
if (error < 0) {
/* We requested READDIRPLUS, but the server doesn't grok it */
if (error == -ENOTSUPP && desc->plus) {
NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
- desc->plus = false;
+ desc->plus = arg.plus = false;
goto again;
}
goto error;
@@ -395,7 +546,7 @@ error:
return error;
}
-static int xdr_decode(nfs_readdir_descriptor_t *desc,
+static int xdr_decode(struct nfs_readdir_descriptor *desc,
struct nfs_entry *entry, struct xdr_stream *xdr)
{
struct inode *inode = file_inode(desc->file);
@@ -557,24 +708,23 @@ out:
}
/* Perform conversion from xdr to cache array */
-static
-int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
- struct page **xdr_pages, struct page *page, unsigned int buflen)
-{
+static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
+ struct nfs_entry *entry,
+ struct page **xdr_pages,
+ unsigned int buflen,
+ struct page **arrays,
+ size_t narrays)
+{
+ struct address_space *mapping = desc->file->f_mapping;
struct xdr_stream stream;
struct xdr_buf buf;
- struct page *scratch;
- struct nfs_cache_array *array;
- unsigned int count = 0;
+ struct page *scratch, *new, *page = *arrays;
int status;
scratch = alloc_page(GFP_KERNEL);
if (scratch == NULL)
return -ENOMEM;
- if (buflen == 0)
- goto out_nopages;
-
xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
xdr_set_scratch_page(&stream, scratch);
@@ -583,209 +733,238 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
entry->label->len = NFS4_MAXLABELLEN;
status = xdr_decode(desc, entry, &stream);
- if (status != 0) {
- if (status == -EAGAIN)
- status = 0;
+ if (status != 0)
break;
- }
-
- count++;
if (desc->plus)
nfs_prime_dcache(file_dentry(desc->file), entry,
desc->dir_verifier);
status = nfs_readdir_add_to_array(entry, page);
- if (status != 0)
- break;
- } while (!entry->eof);
+ if (status != -ENOSPC)
+ continue;
-out_nopages:
- if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
- array = kmap(page);
- array->eof_index = array->size;
+ if (page->mapping != mapping) {
+ if (!--narrays)
+ break;
+ new = nfs_readdir_page_array_alloc(entry->prev_cookie,
+ GFP_KERNEL);
+ if (!new)
+ break;
+ arrays++;
+ *arrays = page = new;
+ } else {
+ new = nfs_readdir_page_get_next(mapping,
+ page->index + 1,
+ entry->prev_cookie);
+ if (!new)
+ break;
+ if (page != *arrays)
+ nfs_readdir_page_unlock_and_put(page);
+ page = new;
+ }
+ status = nfs_readdir_add_to_array(entry, page);
+ } while (!status && !entry->eof);
+
+ switch (status) {
+ case -EBADCOOKIE:
+ if (entry->eof) {
+ nfs_readdir_page_set_eof(page);
+ status = 0;
+ }
+ break;
+ case -ENOSPC:
+ case -EAGAIN:
status = 0;
- kunmap(page);
+ break;
}
+ if (page != *arrays)
+ nfs_readdir_page_unlock_and_put(page);
+
put_page(scratch);
return status;
}
-static
-void nfs_readdir_free_pages(struct page **pages, unsigned int npages)
+static void nfs_readdir_free_pages(struct page **pages, size_t npages)
{
- unsigned int i;
- for (i = 0; i < npages; i++)
- put_page(pages[i]);
+ while (npages--)
+ put_page(pages[npages]);
+ kfree(pages);
}
/*
* nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
* to nfs_readdir_free_pages()
*/
-static
-int nfs_readdir_alloc_pages(struct page **pages, unsigned int npages)
+static struct page **nfs_readdir_alloc_pages(size_t npages)
{
- unsigned int i;
+ struct page **pages;
+ size_t i;
+ pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return NULL;
for (i = 0; i < npages; i++) {
struct page *page = alloc_page(GFP_KERNEL);
if (page == NULL)
goto out_freepages;
pages[i] = page;
}
- return 0;
+ return pages;
out_freepages:
nfs_readdir_free_pages(pages, i);
- return -ENOMEM;
+ return NULL;
}
-static
-int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
+static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
+ __be32 *verf_arg, __be32 *verf_res,
+ struct page **arrays, size_t narrays)
{
- struct page *pages[NFS_MAX_READDIR_PAGES];
- struct nfs_entry entry;
- struct file *file = desc->file;
- struct nfs_cache_array *array;
+ struct page **pages;
+ struct page *page = *arrays;
+ struct nfs_entry *entry;
+ size_t array_size;
+ struct inode *inode = file_inode(desc->file);
+ size_t dtsize = NFS_SERVER(inode)->dtsize;
int status = -ENOMEM;
- unsigned int array_size = ARRAY_SIZE(pages);
-
- nfs_readdir_init_array(page);
- entry.prev_cookie = 0;
- entry.cookie = desc->last_cookie;
- entry.eof = 0;
- entry.fh = nfs_alloc_fhandle();
- entry.fattr = nfs_alloc_fattr();
- entry.server = NFS_SERVER(inode);
- if (entry.fh == NULL || entry.fattr == NULL)
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+ entry->cookie = nfs_readdir_page_last_cookie(page);
+ entry->fh = nfs_alloc_fhandle();
+ entry->fattr = nfs_alloc_fattr();
+ entry->server = NFS_SERVER(inode);
+ if (entry->fh == NULL || entry->fattr == NULL)
goto out;
- entry.label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
- if (IS_ERR(entry.label)) {
- status = PTR_ERR(entry.label);
+ entry->label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
+ if (IS_ERR(entry->label)) {
+ status = PTR_ERR(entry->label);
goto out;
}
- array = kmap(page);
+ array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ pages = nfs_readdir_alloc_pages(array_size);
+ if (!pages)
+ goto out_release_label;
- status = nfs_readdir_alloc_pages(pages, array_size);
- if (status < 0)
- goto out_release_array;
do {
unsigned int pglen;
- status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
-
+ status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie,
+ pages, dtsize,
+ verf_res);
if (status < 0)
break;
+
pglen = status;
- status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
- if (status < 0) {
- if (status == -ENOSPC)
- status = 0;
+ if (pglen == 0) {
+ nfs_readdir_page_set_eof(page);
break;
}
- } while (array->eof_index < 0);
+
+ status = nfs_readdir_page_filler(desc, entry, pages, pglen,
+ arrays, narrays);
+ } while (!status && nfs_readdir_page_needs_filling(page));
nfs_readdir_free_pages(pages, array_size);
-out_release_array:
- kunmap(page);
- nfs4_label_free(entry.label);
+out_release_label:
+ nfs4_label_free(entry->label);
out:
- nfs_free_fattr(entry.fattr);
- nfs_free_fhandle(entry.fh);
+ nfs_free_fattr(entry->fattr);
+ nfs_free_fhandle(entry->fh);
+ kfree(entry);
return status;
}
-/*
- * Now we cache directories properly, by converting xdr information
- * to an array that can be used for lookups later. This results in
- * fewer cache pages, since we can store more information on each page.
- * We only need to convert from xdr once so future lookups are much simpler
- */
-static
-int nfs_readdir_filler(void *data, struct page* page)
+static void nfs_readdir_page_put(struct nfs_readdir_descriptor *desc)
{
- nfs_readdir_descriptor_t *desc = data;
- struct inode *inode = file_inode(desc->file);
- int ret;
-
- ret = nfs_readdir_xdr_to_array(desc, page, inode);
- if (ret < 0)
- goto error;
- SetPageUptodate(page);
-
- if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
- /* Should never happen */
- nfs_zap_mapping(inode, inode->i_mapping);
- }
- unlock_page(page);
- return 0;
- error:
- nfs_readdir_clear_array(page);
- unlock_page(page);
- return ret;
+ put_page(desc->page);
+ desc->page = NULL;
}
-static
-void cache_page_release(nfs_readdir_descriptor_t *desc)
+static void
+nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
{
- put_page(desc->page);
- desc->page = NULL;
+ unlock_page(desc->page);
+ nfs_readdir_page_put(desc);
}
-static
-struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
+static struct page *
+nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
{
- return read_cache_page(desc->file->f_mapping, desc->page_index,
- nfs_readdir_filler, desc);
+ return nfs_readdir_page_get_locked(desc->file->f_mapping,
+ desc->page_index,
+ desc->last_cookie);
}
/*
* Returns 0 if desc->dir_cookie was found on page desc->page_index
* and locks the page to prevent removal from the page cache.
*/
-static
-int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc)
+static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
{
struct inode *inode = file_inode(desc->file);
struct nfs_inode *nfsi = NFS_I(inode);
+ __be32 verf[NFS_DIR_VERIFIER_SIZE];
int res;
- desc->page = get_cache_page(desc);
- if (IS_ERR(desc->page))
- return PTR_ERR(desc->page);
- res = lock_page_killable(desc->page);
- if (res != 0)
- goto error;
- res = -EAGAIN;
- if (desc->page->mapping != NULL) {
- res = nfs_readdir_search_array(desc);
- if (res == 0) {
- nfsi->page_index = desc->page_index;
- return 0;
+ desc->page = nfs_readdir_page_get_cached(desc);
+ if (!desc->page)
+ return -ENOMEM;
+ if (nfs_readdir_page_needs_filling(desc->page)) {
+ res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
+ &desc->page, 1);
+ if (res < 0) {
+ nfs_readdir_page_unlock_and_put_cached(desc);
+ if (res == -EBADCOOKIE || res == -ENOTSYNC) {
+ invalidate_inode_pages2(desc->file->f_mapping);
+ desc->page_index = 0;
+ return -EAGAIN;
+ }
+ return res;
}
+ memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf));
}
- unlock_page(desc->page);
-error:
- cache_page_release(desc);
+ res = nfs_readdir_search_array(desc);
+ if (res == 0) {
+ nfsi->page_index = desc->page_index;
+ return 0;
+ }
+ nfs_readdir_page_unlock_and_put_cached(desc);
return res;
}
+static bool nfs_readdir_dont_search_cache(struct nfs_readdir_descriptor *desc)
+{
+ struct address_space *mapping = desc->file->f_mapping;
+ struct inode *dir = file_inode(desc->file);
+ unsigned int dtsize = NFS_SERVER(dir)->dtsize;
+ loff_t size = i_size_read(dir);
+
+ /*
+ * Default to uncached readdir if the page cache is empty, and
+ * we're looking for a non-zero cookie in a large directory.
+ */
+ return desc->dir_cookie != 0 && mapping->nrpages == 0 && size > dtsize;
+}
+
/* Search for desc->dir_cookie from the beginning of the page cache */
-static inline
-int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
+static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
{
int res;
- if (desc->page_index == 0) {
- desc->current_index = 0;
- desc->prev_index = 0;
- desc->last_cookie = 0;
- }
+ if (nfs_readdir_dont_search_cache(desc))
+ return -EBADCOOKIE;
+
do {
+ if (desc->page_index == 0) {
+ desc->current_index = 0;
+ desc->prev_index = 0;
+ desc->last_cookie = 0;
+ }
res = find_and_lock_cache_page(desc);
} while (res == -EAGAIN);
return res;
@@ -794,43 +973,41 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
/*
* Once we've found the start of the dirent within a page: fill 'er up...
*/
-static
-int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
+static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
{
struct file *file = desc->file;
- int i = 0;
- int res = 0;
- struct nfs_cache_array *array = NULL;
- struct nfs_open_dir_context *ctx = file->private_data;
+ struct nfs_inode *nfsi = NFS_I(file_inode(file));
+ struct nfs_cache_array *array;
+ unsigned int i = 0;
array = kmap(desc->page);
for (i = desc->cache_entry_index; i < array->size; i++) {
struct nfs_cache_array_entry *ent;
ent = &array->array[i];
- if (!dir_emit(desc->ctx, ent->string.name, ent->string.len,
+ if (!dir_emit(desc->ctx, ent->name, ent->name_len,
nfs_compat_user_ino64(ent->ino), ent->d_type)) {
desc->eof = true;
break;
}
+ memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf));
if (i < (array->size-1))
- *desc->dir_cookie = array->array[i+1].cookie;
+ desc->dir_cookie = array->array[i+1].cookie;
else
- *desc->dir_cookie = array->last_cookie;
+ desc->dir_cookie = array->last_cookie;
if (nfs_readdir_use_cookie(file))
- desc->ctx->pos = *desc->dir_cookie;
+ desc->ctx->pos = desc->dir_cookie;
else
desc->ctx->pos++;
- if (ctx->duped != 0)
- ctx->duped = 1;
+ if (desc->duped != 0)
+ desc->duped = 1;
}
- if (array->eof_index >= 0)
+ if (array->page_is_eof)
desc->eof = true;
kunmap(desc->page);
- dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
- (unsigned long long)*desc->dir_cookie, res);
- return res;
+ dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
+ (unsigned long long)desc->dir_cookie);
}
/*
@@ -845,40 +1022,41 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
* we should already have a complete representation of the
* directory in the page cache by the time we get here.
*/
-static inline
-int uncached_readdir(nfs_readdir_descriptor_t *desc)
+static int uncached_readdir(struct nfs_readdir_descriptor *desc)
{
- struct page *page = NULL;
- int status;
- struct inode *inode = file_inode(desc->file);
- struct nfs_open_dir_context *ctx = desc->file->private_data;
+ struct page **arrays;
+ size_t i, sz = 512;
+ __be32 verf[NFS_DIR_VERIFIER_SIZE];
+ int status = -ENOMEM;
- dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
- (unsigned long long)*desc->dir_cookie);
+ dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %llu\n",
+ (unsigned long long)desc->dir_cookie);
- page = alloc_page(GFP_HIGHUSER);
- if (!page) {
- status = -ENOMEM;
+ arrays = kcalloc(sz, sizeof(*arrays), GFP_KERNEL);
+ if (!arrays)
+ goto out;
+ arrays[0] = nfs_readdir_page_array_alloc(desc->dir_cookie, GFP_KERNEL);
+ if (!arrays[0])
goto out;
- }
desc->page_index = 0;
- desc->last_cookie = *desc->dir_cookie;
- desc->page = page;
- ctx->duped = 0;
-
- status = nfs_readdir_xdr_to_array(desc, page, inode);
- if (status < 0)
- goto out_release;
-
- status = nfs_do_filldir(desc);
-
- out_release:
- nfs_readdir_clear_array(desc->page);
- cache_page_release(desc);
- out:
- dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
- __func__, status);
+ desc->last_cookie = desc->dir_cookie;
+ desc->duped = 0;
+
+ status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
+
+ for (i = 0; !desc->eof && i < sz && arrays[i]; i++) {
+ desc->page = arrays[i];
+ nfs_do_filldir(desc);
+ }
+ desc->page = NULL;
+
+
+ for (i = 0; i < sz && arrays[i]; i++)
+ nfs_readdir_page_array_free(arrays[i]);
+out:
+ kfree(arrays);
+ dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
return status;
}
@@ -891,14 +1069,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
struct nfs_open_dir_context *dir_ctx = file->private_data;
- nfs_readdir_descriptor_t my_desc = {
- .file = file,
- .ctx = ctx,
- .dir_cookie = &dir_ctx->dir_cookie,
- .plus = nfs_use_readdirplus(inode, ctx),
- },
- *desc = &my_desc;
- int res = 0;
+ struct nfs_readdir_descriptor *desc;
+ int res;
dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
file, (long long)ctx->pos);
@@ -910,10 +1082,27 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
* to either find the entry with the appropriate number or
* revalidate the cookie.
*/
- if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
+ if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) {
res = nfs_revalidate_mapping(inode, file->f_mapping);
- if (res < 0)
+ if (res < 0)
+ goto out;
+ }
+
+ res = -ENOMEM;
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc)
goto out;
+ desc->file = file;
+ desc->ctx = ctx;
+ desc->plus = nfs_use_readdirplus(inode, ctx);
+
+ spin_lock(&file->f_lock);
+ desc->dir_cookie = dir_ctx->dir_cookie;
+ desc->dup_cookie = dir_ctx->dup_cookie;
+ desc->duped = dir_ctx->duped;
+ desc->attr_gencount = dir_ctx->attr_gencount;
+ memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
+ spin_unlock(&file->f_lock);
do {
res = readdir_search_pagecache(desc);
@@ -921,11 +1110,13 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
if (res == -EBADCOOKIE) {
res = 0;
/* This means either end of directory */
- if (*desc->dir_cookie && !desc->eof) {
+ if (desc->dir_cookie && !desc->eof) {
/* Or that the server has 'lost' a cookie */
res = uncached_readdir(desc);
if (res == 0)
continue;
+ if (res == -EBADCOOKIE || res == -ENOTSYNC)
+ res = 0;
}
break;
}
@@ -940,15 +1131,21 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
if (res < 0)
break;
- res = nfs_do_filldir(desc);
- unlock_page(desc->page);
- cache_page_release(desc);
- if (res < 0)
- break;
+ nfs_do_filldir(desc);
+ nfs_readdir_page_unlock_and_put_cached(desc);
} while (!desc->eof);
+
+ spin_lock(&file->f_lock);
+ dir_ctx->dir_cookie = desc->dir_cookie;
+ dir_ctx->dup_cookie = desc->dup_cookie;
+ dir_ctx->duped = desc->duped;
+ dir_ctx->attr_gencount = desc->attr_gencount;
+ memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
+ spin_unlock(&file->f_lock);
+
+ kfree(desc);
+
out:
- if (res > 0)
- res = 0;
dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
return res;
}
@@ -984,6 +1181,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
dir_ctx->dir_cookie = offset;
else
dir_ctx->dir_cookie = 0;
+ if (offset == 0)
+ memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
dir_ctx->duped = 0;
}
spin_unlock(&filp->f_lock);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 4252ce633533..872112bffcab 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -740,16 +740,12 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_pnfs_ds *ds;
- bool fail_return = false;
u32 idx;
/* mirrors are initially sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
- if (idx+1 == fls->mirror_array_cnt)
- fail_return = !check_device;
-
mirror = FF_LAYOUT_COMP(lseg, idx);
- ds = nfs4_ff_layout_prepare_ds(lseg, mirror, fail_return);
+ ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
if (!ds)
continue;
@@ -1056,7 +1052,7 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
u32 idx = hdr->pgio_mirror_idx + 1;
u32 new_idx = 0;
- if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx))
+ if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx))
ff_layout_send_layouterror(hdr->lseg);
else
pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
@@ -2284,7 +2280,6 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
struct sockaddr *sap = (struct sockaddr *)&da->da_addr;
char portbuf[RPCBIND_MAXUADDRPLEN];
char addrbuf[RPCBIND_MAXUADDRLEN];
- char *netid;
unsigned short port;
int len, netid_len;
__be32 *p;
@@ -2294,18 +2289,13 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
return;
port = ntohs(((struct sockaddr_in *)sap)->sin_port);
- netid = "tcp";
- netid_len = 3;
break;
case AF_INET6:
if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
return;
port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
- netid = "tcp6";
- netid_len = 4;
break;
default:
- /* we only support tcp and tcp6 */
WARN_ON_ONCE(1);
return;
}
@@ -2313,8 +2303,9 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff);
len = strlcat(addrbuf, portbuf, sizeof(addrbuf));
+ netid_len = strlen(da->da_netid);
p = xdr_reserve_space(xdr, 4 + netid_len);
- xdr_encode_opaque(p, netid, netid_len);
+ xdr_encode_opaque(p, da->da_netid, netid_len);
p = xdr_reserve_space(xdr, 4 + len);
xdr_encode_opaque(p, addrbuf, len);
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 29ec8b09a52d..06894bcdea2d 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -510,13 +510,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
break;
case Opt_tcp:
- ctx->flags |= NFS_MOUNT_TCP;
- ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- break;
case Opt_rdma:
ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */
- ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
- xprt_load_transport(param->key);
+ ret = xprt_find_transport_ident(param->key);
+ if (ret < 0)
+ goto out_bad_transport;
+ ctx->nfs_server.protocol = ret;
break;
case Opt_acl:
if (result.negated)
@@ -670,11 +669,13 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
case Opt_xprt_rdma:
/* vector side protocols to TCP */
ctx->flags |= NFS_MOUNT_TCP;
- ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
- xprt_load_transport(param->string);
+ ret = xprt_find_transport_ident(param->string);
+ if (ret < 0)
+ goto out_bad_transport;
+ ctx->nfs_server.protocol = ret;
break;
default:
- return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
+ goto out_bad_transport;
}
ctx->protofamily = protofamily;
@@ -697,7 +698,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_xprt_rdma: /* not used for side protocols */
default:
- return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
+ goto out_bad_transport;
}
ctx->mountfamily = mountfamily;
break;
@@ -787,6 +788,8 @@ out_invalid_address:
return nfs_invalf(fc, "NFS: Bad IP address specified");
out_of_bounds:
return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key);
+out_bad_transport:
+ return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
}
/*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index aa6493905bbe..522aa10a1a3e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -229,7 +229,6 @@ static void nfs_zap_caches_locked(struct inode *inode)
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = jiffies;
- memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_DATA
@@ -1237,7 +1236,6 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
{
- struct nfs_inode *nfsi = NFS_I(inode);
int ret;
if (mapping->nrpages != 0) {
@@ -1250,11 +1248,6 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
if (ret < 0)
return ret;
}
- if (S_ISDIR(inode->i_mode)) {
- spin_lock(&inode->i_lock);
- memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
- spin_unlock(&inode->i_lock);
- }
nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
nfs_fscache_wait_on_invalidate(inode);
@@ -2180,7 +2173,7 @@ static int nfsiod_start(void)
{
struct workqueue_struct *wq;
dprintk("RPC: creating workqueue nfsiod\n");
- wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
+ wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (wq == NULL)
return -ENOMEM;
nfsiod_workqueue = wq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 6673a77884d9..b840d0a91c9d 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -56,12 +56,6 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry)
#define NFS_UNSPEC_RETRANS (UINT_MAX)
#define NFS_UNSPEC_TIMEO (UINT_MAX)
-/*
- * Maximum number of pages that readdir can use for creating
- * a vmapped array of pages.
- */
-#define NFS_MAX_READDIR_PAGES 8
-
struct nfs_client_initdata {
unsigned long init_flags;
const char *hostname; /* Hostname of the server */
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index f6676af37d5d..7fba7711e6b3 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -34,6 +34,7 @@
* Declare the space requirements for NFS arguments and replies as
* number of 32bit-words
*/
+#define NFS_pagepad_sz (1) /* Page padding */
#define NFS_fhandle_sz (8)
#define NFS_sattr_sz (8)
#define NFS_filename_sz (1+(NFS2_MAXNAMLEN>>2))
@@ -56,11 +57,11 @@
#define NFS_attrstat_sz (1+NFS_fattr_sz)
#define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
-#define NFS_readlinkres_sz (2+1)
-#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
+#define NFS_readlinkres_sz (2+NFS_pagepad_sz)
+#define NFS_readres_sz (1+NFS_fattr_sz+1+NFS_pagepad_sz)
#define NFS_writeres_sz (NFS_attrstat_sz)
#define NFS_stat_sz (1)
-#define NFS_readdirres_sz (1+1)
+#define NFS_readdirres_sz (1+NFS_pagepad_sz)
#define NFS_statfsres_sz (1+NFS_info_sz)
static int nfs_stat_to_errno(enum nfs_stat);
@@ -592,8 +593,8 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
const struct nfs_readlinkargs *args = data;
encode_fhandle(xdr, args->fh);
- rpc_prepare_reply_pages(req, args->pages, args->pgbase,
- args->pglen, NFS_readlinkres_sz);
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->pglen,
+ NFS_readlinkres_sz - NFS_pagepad_sz);
}
/*
@@ -628,8 +629,8 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
const struct nfs_pgio_args *args = data;
encode_readargs(xdr, args);
- rpc_prepare_reply_pages(req, args->pages, args->pgbase,
- args->count, NFS_readres_sz);
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->count,
+ NFS_readres_sz - NFS_pagepad_sz);
req->rq_rcv_buf.flags |= XDRBUF_READ;
}
@@ -786,8 +787,8 @@ static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
const struct nfs_readdirargs *args = data;
encode_readdirargs(xdr, args);
- rpc_prepare_reply_pages(req, args->pages, 0,
- args->count, NFS_readdirres_sz);
+ rpc_prepare_reply_pages(req, args->pages, 0, args->count,
+ NFS_readdirres_sz - NFS_pagepad_sz);
}
/*
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2397ceedba8a..5c4e23abc345 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -154,14 +154,14 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int
-nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
- struct nfs_fh *fhandle, struct nfs_fattr *fattr,
- struct nfs4_label *label)
+__nfs3_proc_lookup(struct inode *dir, const char *name, size_t len,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr,
+ unsigned short task_flags)
{
struct nfs3_diropargs arg = {
.fh = NFS_FH(dir),
- .name = dentry->d_name.name,
- .len = dentry->d_name.len
+ .name = name,
+ .len = len
};
struct nfs3_diropres res = {
.fh = fhandle,
@@ -173,17 +173,11 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
.rpc_resp = &res,
};
int status;
- unsigned short task_flags = 0;
-
- /* Is this is an attribute revalidation, subject to softreval? */
- if (nfs_lookup_is_soft_revalidate(dentry))
- task_flags |= RPC_TASK_TIMEOUT;
res.dir_attr = nfs_alloc_fattr();
if (res.dir_attr == NULL)
return -ENOMEM;
- dprintk("NFS call lookup %pd2\n", dentry);
nfs_fattr_init(fattr);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags);
nfs_refresh_inode(dir, res.dir_attr);
@@ -198,6 +192,37 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
return status;
}
+static int
+nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr,
+ struct nfs4_label *label)
+{
+ unsigned short task_flags = 0;
+
+ /* Is this is an attribute revalidation, subject to softreval? */
+ if (nfs_lookup_is_soft_revalidate(dentry))
+ task_flags |= RPC_TASK_TIMEOUT;
+
+ dprintk("NFS call lookup %pd2\n", dentry);
+ return __nfs3_proc_lookup(dir, dentry->d_name.name,
+ dentry->d_name.len, fhandle, fattr,
+ task_flags);
+}
+
+static int nfs3_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr, struct nfs4_label *label)
+{
+ const char dotdot[] = "..";
+ const size_t len = strlen(dotdot);
+ unsigned short task_flags = 0;
+
+ if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
+ task_flags |= RPC_TASK_TIMEOUT;
+
+ return __nfs3_proc_lookup(inode, dotdot, len, fhandle, fattr,
+ task_flags);
+}
+
static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{
struct nfs3_accessargs arg = {
@@ -637,37 +662,36 @@ out:
* Also note that this implementation handles both plain readdir and
* readdirplus.
*/
-static int
-nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred,
- u64 cookie, struct page **pages, unsigned int count, bool plus)
+static int nfs3_proc_readdir(struct nfs_readdir_arg *nr_arg,
+ struct nfs_readdir_res *nr_res)
{
- struct inode *dir = d_inode(dentry);
- __be32 *verf = NFS_I(dir)->cookieverf;
+ struct inode *dir = d_inode(nr_arg->dentry);
struct nfs3_readdirargs arg = {
.fh = NFS_FH(dir),
- .cookie = cookie,
- .verf = {verf[0], verf[1]},
- .plus = plus,
- .count = count,
- .pages = pages
+ .cookie = nr_arg->cookie,
+ .plus = nr_arg->plus,
+ .count = nr_arg->page_len,
+ .pages = nr_arg->pages
};
struct nfs3_readdirres res = {
- .verf = verf,
- .plus = plus
+ .verf = nr_res->verf,
+ .plus = nr_arg->plus,
};
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_READDIR],
.rpc_argp = &arg,
.rpc_resp = &res,
- .rpc_cred = cred,
+ .rpc_cred = nr_arg->cred,
};
int status = -ENOMEM;
- if (plus)
+ if (nr_arg->plus)
msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
+ if (arg.cookie)
+ memcpy(arg.verf, nr_arg->verf, sizeof(arg.verf));
- dprintk("NFS call readdir%s %d\n",
- plus? "plus" : "", (unsigned int) cookie);
+ dprintk("NFS call readdir%s %llu\n", nr_arg->plus ? "plus" : "",
+ (unsigned long long)nr_arg->cookie);
res.dir_attr = nfs_alloc_fattr();
if (res.dir_attr == NULL)
@@ -680,8 +704,8 @@ nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred,
nfs_free_fattr(res.dir_attr);
out:
- dprintk("NFS reply readdir%s: %d\n",
- plus? "plus" : "", status);
+ dprintk("NFS reply readdir%s: %d\n", nr_arg->plus ? "plus" : "",
+ status);
return status;
}
@@ -1004,6 +1028,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.getattr = nfs3_proc_getattr,
.setattr = nfs3_proc_setattr,
.lookup = nfs3_proc_lookup,
+ .lookupp = nfs3_proc_lookupp,
.access = nfs3_proc_access,
.readlink = nfs3_proc_readlink,
.create = nfs3_proc_create,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 69971f6c840d..ca10072644ff 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -33,6 +33,7 @@
* Declare the space requirements for NFS arguments and replies as
* number of 32bit-words
*/
+#define NFS3_pagepad_sz (1) /* Page padding */
#define NFS3_fhandle_sz (1+16)
#define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */
#define NFS3_sattr_sz (15)
@@ -69,13 +70,13 @@
#define NFS3_removeres_sz (NFS3_setattrres_sz)
#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
-#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
-#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
+#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+NFS3_pagepad_sz)
+#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+NFS3_pagepad_sz)
#define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
#define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
#define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
-#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
+#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+NFS3_pagepad_sz)
#define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
#define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
#define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
@@ -85,7 +86,8 @@
#define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
- XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
+ XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+\
+ NFS3_pagepad_sz)
#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
static int nfs3_stat_to_errno(enum nfs_stat);
@@ -909,8 +911,8 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
const struct nfs3_readlinkargs *args = data;
encode_nfs_fh3(xdr, args->fh);
- rpc_prepare_reply_pages(req, args->pages, args->pgbase,
- args->pglen, NFS3_readlinkres_sz);
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->pglen,
+ NFS3_readlinkres_sz - NFS3_pagepad_sz);
}
/*
@@ -939,7 +941,8 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
const void *data)
{
const struct nfs_pgio_args *args = data;
- unsigned int replen = args->replen ? args->replen : NFS3_readres_sz;
+ unsigned int replen = args->replen ? args->replen :
+ NFS3_readres_sz - NFS3_pagepad_sz;
encode_read3args(xdr, args);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
@@ -1239,8 +1242,8 @@ static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
const struct nfs3_readdirargs *args = data;
encode_readdir3args(xdr, args);
- rpc_prepare_reply_pages(req, args->pages, 0,
- args->count, NFS3_readdirres_sz);
+ rpc_prepare_reply_pages(req, args->pages, 0, args->count,
+ NFS3_readdirres_sz - NFS3_pagepad_sz);
}
/*
@@ -1281,8 +1284,8 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
const struct nfs3_readdirargs *args = data;
encode_readdirplus3args(xdr, args);
- rpc_prepare_reply_pages(req, args->pages, 0,
- args->count, NFS3_readdirres_sz);
+ rpc_prepare_reply_pages(req, args->pages, 0, args->count,
+ NFS3_readdirres_sz - NFS3_pagepad_sz);
}
/*
@@ -1328,7 +1331,7 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
if (args->mask & (NFS_ACL | NFS_DFACL)) {
rpc_prepare_reply_pages(req, args->pages, 0,
NFSACL_MAXPAGES << PAGE_SHIFT,
- ACL3_getaclres_sz);
+ ACL3_getaclres_sz - NFS3_pagepad_sz);
req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
}
}
@@ -1648,7 +1651,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
result->op_status = status;
if (status != NFS3_OK)
goto out_status;
- result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
+ result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
error = decode_read3resok(xdr, result);
out:
return error;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 4fc61e3d098d..f3fd935620fc 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -1173,14 +1173,12 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
}
static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name,
- void *buf, size_t buflen)
+ void *buf, size_t buflen, struct page **pages,
+ size_t plen)
{
struct nfs_server *server = NFS_SERVER(inode);
- struct page *pages[NFS4XATTR_MAXPAGES] = {};
struct nfs42_getxattrargs arg = {
.fh = NFS_FH(inode),
- .xattr_pages = pages,
- .xattr_len = buflen,
.xattr_name = name,
};
struct nfs42_getxattrres res;
@@ -1189,7 +1187,10 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name,
.rpc_argp = &arg,
.rpc_resp = &res,
};
- int ret, np;
+ ssize_t ret;
+
+ arg.xattr_len = plen;
+ arg.xattr_pages = pages;
ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
&res.seq_res, 0);
@@ -1214,10 +1215,6 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name,
_copy_from_pages(buf, pages, 0, res.xattr_len);
}
- np = DIV_ROUND_UP(res.xattr_len, PAGE_SIZE);
- while (--np >= 0)
- __free_page(pages[np]);
-
return res.xattr_len;
}
@@ -1292,16 +1289,45 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
void *buf, size_t buflen)
{
struct nfs4_exception exception = { };
- ssize_t err;
+ ssize_t err, np, i;
+ struct page **pages;
+ np = nfs_page_array_len(0, buflen ?: XATTR_SIZE_MAX);
+ pages = kmalloc_array(np, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ for (i = 0; i < np; i++) {
+ pages[i] = alloc_page(GFP_KERNEL);
+ if (!pages[i]) {
+ np = i + 1;
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
+ /*
+ * The GETXATTR op has no length field in the call, and the
+ * xattr data is at the end of the reply.
+ *
+ * There is no downside in using the page-aligned length. It will
+ * allow receiving and caching xattrs that are too large for the
+ * caller but still fit in the page-rounded value.
+ */
do {
- err = _nfs42_proc_getxattr(inode, name, buf, buflen);
+ err = _nfs42_proc_getxattr(inode, name, buf, buflen,
+ pages, np * PAGE_SIZE);
if (err >= 0)
break;
err = nfs4_handle_exception(NFS_SERVER(inode), err,
&exception);
} while (exception.retry);
+out:
+ while (--np >= 0)
+ __free_page(pages[np]);
+ kfree(pages);
+
return err;
}
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index ea7dd8cbfac9..c8bad735e4c1 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -191,7 +191,7 @@
#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \
nfs4_xattr_name_maxsz)
-#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + 1)
+#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz)
#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \
1 + nfs4_xattr_name_maxsz + 1)
#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
@@ -489,6 +489,12 @@ static int decode_getxattr(struct xdr_stream *xdr,
return -EIO;
len = be32_to_cpup(p);
+
+ /*
+ * Only check against the page length here. The actual
+ * requested length may be smaller, but that is only
+ * checked against after possibly caching a valid reply.
+ */
if (len > req->rq_rcv_buf.page_len)
return -ERANGE;
@@ -1019,56 +1025,80 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re
return decode_op_hdr(xdr, OP_DEALLOCATE);
}
-static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res,
- uint32_t *eof)
+static int decode_read_plus_data(struct xdr_stream *xdr,
+ struct nfs_pgio_args *args,
+ struct nfs_pgio_res *res)
{
uint32_t count, recvd;
uint64_t offset;
__be32 *p;
p = xdr_inline_decode(xdr, 8 + 4);
- if (unlikely(!p))
- return -EIO;
+ if (!p)
+ return 1;
p = xdr_decode_hyper(p, &offset);
count = be32_to_cpup(p);
- recvd = xdr_align_data(xdr, res->count, count);
- res->count += recvd;
-
- if (count > recvd) {
- dprintk("NFS: server cheating in read reply: "
- "count %u > recvd %u\n", count, recvd);
- *eof = 0;
+ recvd = xdr_align_data(xdr, res->count, xdr_align_size(count));
+ if (recvd > count)
+ recvd = count;
+ if (res->count + recvd > args->count) {
+ if (args->count > res->count)
+ res->count += args->count - res->count;
return 1;
}
-
+ res->count += recvd;
+ if (count > recvd)
+ return 1;
return 0;
}
-static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *res,
- uint32_t *eof)
+static int decode_read_plus_hole(struct xdr_stream *xdr,
+ struct nfs_pgio_args *args,
+ struct nfs_pgio_res *res, uint32_t *eof)
{
uint64_t offset, length, recvd;
__be32 *p;
p = xdr_inline_decode(xdr, 8 + 8);
- if (unlikely(!p))
- return -EIO;
+ if (!p)
+ return 1;
p = xdr_decode_hyper(p, &offset);
p = xdr_decode_hyper(p, &length);
+ if (offset != args->offset + res->count) {
+ /* Server returned an out-of-sequence extent */
+ if (offset > args->offset + res->count ||
+ offset + length < args->offset + res->count) {
+ dprintk("NFS: server returned out of sequence extent: "
+ "offset/size = %llu/%llu != expected %llu\n",
+ (unsigned long long)offset,
+ (unsigned long long)length,
+ (unsigned long long)(args->offset +
+ res->count));
+ return 1;
+ }
+ length -= args->offset + res->count - offset;
+ }
+ if (length + res->count > args->count) {
+ *eof = 0;
+ if (unlikely(res->count >= args->count))
+ return 1;
+ length = args->count - res->count;
+ }
recvd = xdr_expand_hole(xdr, res->count, length);
res->count += recvd;
- if (recvd < length) {
- *eof = 0;
+ if (recvd < length)
return 1;
- }
return 0;
}
static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
{
+ struct nfs_pgio_header *hdr =
+ container_of(res, struct nfs_pgio_header, res);
+ struct nfs_pgio_args *args = &hdr->args;
uint32_t eof, segments, type;
int status, i;
__be32 *p;
@@ -1081,6 +1111,7 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
if (unlikely(!p))
return -EIO;
+ res->count = 0;
eof = be32_to_cpup(p++);
segments = be32_to_cpup(p++);
if (segments == 0)
@@ -1088,26 +1119,31 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
for (i = 0; i < segments; i++) {
p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- return -EIO;
+ if (!p)
+ goto early_out;
type = be32_to_cpup(p++);
if (type == NFS4_CONTENT_DATA)
- status = decode_read_plus_data(xdr, res, &eof);
+ status = decode_read_plus_data(xdr, args, res);
else if (type == NFS4_CONTENT_HOLE)
- status = decode_read_plus_hole(xdr, res, &eof);
+ status = decode_read_plus_hole(xdr, args, res, &eof);
else
return -EINVAL;
if (status < 0)
return status;
if (status > 0)
- break;
+ goto early_out;
}
out:
res->eof = eof;
return 0;
+early_out:
+ if (unlikely(!i))
+ return -EIO;
+ res->eof = 0;
+ return 0;
}
static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
@@ -1476,18 +1512,16 @@ static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- size_t plen;
+ uint32_t replen;
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
+ replen = hdr.replen + op_decode_hdr_maxsz + 1;
encode_getxattr(xdr, args->xattr_name, &hdr);
- plen = args->xattr_len ? args->xattr_len : XATTR_SIZE_MAX;
-
- rpc_prepare_reply_pages(req, args->xattr_pages, 0, plen,
- hdr.replen);
- req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
+ rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->xattr_len,
+ replen);
encode_nops(&hdr);
}
@@ -1520,14 +1554,15 @@ static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req,
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
+ uint32_t replen;
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
+ replen = hdr.replen + op_decode_hdr_maxsz + 2 + 1;
encode_listxattrs(xdr, args, &hdr);
- rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count,
- hdr.replen);
+ rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, replen);
encode_nops(&hdr);
}
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index be7915c861ce..86acffe7335c 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1153,7 +1153,7 @@ struct nfs_server *nfs4_create_server(struct fs_context *fc)
if (!server)
return ERR_PTR(-ENOMEM);
- server->cred = get_cred(current_cred());
+ server->cred = get_cred(fc->cred);
auth_probe = ctx->auth_info.flavor_len < 1;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e89468678ae1..0ce04e0e5d82 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -184,6 +184,8 @@ static int nfs4_map_errors(int err)
return -EPROTONOSUPPORT;
case -NFS4ERR_FILE_OPEN:
return -EBUSY;
+ case -NFS4ERR_NOT_SAME:
+ return -ENOTSYNC;
default:
dprintk("%s could not handle NFSv4 error %d\n",
__func__, -err);
@@ -4397,6 +4399,10 @@ static int _nfs4_proc_lookupp(struct inode *inode,
.rpc_argp = &args,
.rpc_resp = &res,
};
+ unsigned short task_flags = 0;
+
+ if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
+ task_flags |= RPC_TASK_TIMEOUT;
args.bitmask = nfs4_bitmask(server, label);
@@ -4404,7 +4410,7 @@ static int _nfs4_proc_lookupp(struct inode *inode,
dprintk("NFS call lookupp ino=0x%lx\n", inode->i_ino);
status = nfs4_call_sync(clnt, server, &msg, &args.seq_args,
- &res.seq_res, 0);
+ &res.seq_res, task_flags);
dprintk("NFS reply lookupp: %d\n", status);
return status;
}
@@ -4957,35 +4963,40 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
return err;
}
-static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
- u64 cookie, struct page **pages, unsigned int count, bool plus)
+static int _nfs4_proc_readdir(struct nfs_readdir_arg *nr_arg,
+ struct nfs_readdir_res *nr_res)
{
- struct inode *dir = d_inode(dentry);
+ struct inode *dir = d_inode(nr_arg->dentry);
+ struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_readdir_arg args = {
.fh = NFS_FH(dir),
- .pages = pages,
+ .pages = nr_arg->pages,
.pgbase = 0,
- .count = count,
- .bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask,
- .plus = plus,
+ .count = nr_arg->page_len,
+ .plus = nr_arg->plus,
};
struct nfs4_readdir_res res;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR],
.rpc_argp = &args,
.rpc_resp = &res,
- .rpc_cred = cred,
+ .rpc_cred = nr_arg->cred,
};
int status;
- dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__,
- dentry,
- (unsigned long long)cookie);
- nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
+ dprintk("%s: dentry = %pd2, cookie = %llu\n", __func__,
+ nr_arg->dentry, (unsigned long long)nr_arg->cookie);
+ if (!(server->caps & NFS_CAP_SECURITY_LABEL))
+ args.bitmask = server->attr_bitmask_nl;
+ else
+ args.bitmask = server->attr_bitmask;
+
+ nfs4_setup_readdir(nr_arg->cookie, nr_arg->verf, nr_arg->dentry, &args);
res.pgbase = args.pgbase;
- status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
+ status = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
+ &res.seq_res, 0);
if (status >= 0) {
- memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
+ memcpy(nr_res->verf, res.verifier.data, NFS4_VERIFIER_SIZE);
status += args.pgbase;
}
@@ -4995,19 +5006,18 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
return status;
}
-static int nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
- u64 cookie, struct page **pages, unsigned int count, bool plus)
+static int nfs4_proc_readdir(struct nfs_readdir_arg *arg,
+ struct nfs_readdir_res *res)
{
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
- err = _nfs4_proc_readdir(dentry, cred, cookie,
- pages, count, plus);
- trace_nfs4_readdir(d_inode(dentry), err);
- err = nfs4_handle_exception(NFS_SERVER(d_inode(dentry)), err,
- &exception);
+ err = _nfs4_proc_readdir(arg, res);
+ trace_nfs4_readdir(d_inode(arg->dentry), err);
+ err = nfs4_handle_exception(NFS_SERVER(d_inode(arg->dentry)),
+ err, &exception);
} while (exception.retry);
return err;
}
@@ -5310,17 +5320,17 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
}
#if defined CONFIG_NFS_V4_2 && defined CONFIG_NFS_V4_2_READ_PLUS
-static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
+static void nfs42_read_plus_support(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
- if (server->caps & NFS_CAP_READ_PLUS)
+ /* Note: We don't use READ_PLUS with pNFS yet */
+ if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp)
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
- else
- msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
}
#else
-static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
+static void nfs42_read_plus_support(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
- msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
}
#endif /* CONFIG_NFS_V4_2 */
@@ -5330,7 +5340,8 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
hdr->timestamp = jiffies;
if (!hdr->pgio_done_cb)
hdr->pgio_done_cb = nfs4_read_done_cb;
- nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg);
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ nfs42_read_plus_support(hdr, msg);
nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
}
@@ -9651,6 +9662,8 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server,
if (res.notification != args.notify_types)
pdev->nocache = 1;
+ trace_nfs4_getdeviceinfo(server, &pdev->dev_id, status);
+
dprintk("<-- %s status=%d\n", __func__, status);
return status;
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index b996ee23f1ba..3de425f59b3a 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -34,7 +34,7 @@ enum nfs4_slot_tbl_state {
NFS4_SLOT_TBL_DRAINING,
};
-#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
+#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, BITS_PER_LONG)
struct nfs4_slot_table {
struct nfs4_session *session; /* Parent session */
struct nfs4_slot *slots; /* seqid per slot */
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 93f5c1678ec2..984cc42ee54d 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -67,7 +67,7 @@ static void nfs4_evict_inode(struct inode *inode)
nfs_inode_evict_delegation(inode);
/* Note that above delegreturn would trigger pnfs return-on-close */
pnfs_return_layout(inode);
- pnfs_destroy_layout(NFS_I(inode));
+ pnfs_destroy_layout_final(NFS_I(inode));
/* First call standard NFS clear_inode() code */
nfs_clear_inode(inode);
nfs4_xattr_cache_zap(inode);
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 484c1da96dea..48d761e593fb 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -2189,6 +2189,81 @@ DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_done);
DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist);
DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist);
+DECLARE_EVENT_CLASS(nfs4_deviceid_event,
+ TP_PROTO(
+ const struct nfs_client *clp,
+ const struct nfs4_deviceid *deviceid
+ ),
+
+ TP_ARGS(clp, deviceid),
+
+ TP_STRUCT__entry(
+ __string(dstaddr, clp->cl_hostname)
+ __array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dstaddr, clp->cl_hostname);
+ memcpy(__entry->deviceid, deviceid->data,
+ NFS4_DEVICEID4_SIZE);
+ ),
+
+ TP_printk(
+ "deviceid=%s, dstaddr=%s",
+ __print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE),
+ __get_str(dstaddr)
+ )
+);
+#define DEFINE_PNFS_DEVICEID_EVENT(name) \
+ DEFINE_EVENT(nfs4_deviceid_event, name, \
+ TP_PROTO(const struct nfs_client *clp, \
+ const struct nfs4_deviceid *deviceid \
+ ), \
+ TP_ARGS(clp, deviceid))
+DEFINE_PNFS_DEVICEID_EVENT(nfs4_deviceid_free);
+
+DECLARE_EVENT_CLASS(nfs4_deviceid_status,
+ TP_PROTO(
+ const struct nfs_server *server,
+ const struct nfs4_deviceid *deviceid,
+ int status
+ ),
+
+ TP_ARGS(server, deviceid, status),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, status)
+ __string(dstaddr, server->nfs_client->cl_hostname)
+ __array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = server->s_dev;
+ __entry->status = status;
+ __assign_str(dstaddr, server->nfs_client->cl_hostname);
+ memcpy(__entry->deviceid, deviceid->data,
+ NFS4_DEVICEID4_SIZE);
+ ),
+
+ TP_printk(
+ "dev=%02x:%02x: deviceid=%s, dstaddr=%s, status=%d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE),
+ __get_str(dstaddr),
+ __entry->status
+ )
+);
+#define DEFINE_PNFS_DEVICEID_STATUS(name) \
+ DEFINE_EVENT(nfs4_deviceid_status, name, \
+ TP_PROTO(const struct nfs_server *server, \
+ const struct nfs4_deviceid *deviceid, \
+ int status \
+ ), \
+ TP_ARGS(server, deviceid, status))
+DEFINE_PNFS_DEVICEID_STATUS(nfs4_getdeviceinfo);
+DEFINE_PNFS_DEVICEID_STATUS(nfs4_find_deviceid);
+
DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
TP_PROTO(
const struct nfs_pgio_header *hdr
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 2eabe5add344..ac6b79ee9355 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -84,6 +84,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
/* lock,open owner id:
* we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
*/
+#define pagepad_maxsz (1)
#define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2)
#define lock_owner_id_maxsz (1 + 1 + 4)
#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
@@ -215,14 +216,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
nfs4_fattr_bitmap_maxsz)
#define encode_read_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 3)
-#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
+#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + pagepad_maxsz)
#define encode_readdir_maxsz (op_encode_hdr_maxsz + \
2 + encode_verifier_maxsz + 5 + \
nfs4_label_maxsz)
#define decode_readdir_maxsz (op_decode_hdr_maxsz + \
- decode_verifier_maxsz + 1)
+ decode_verifier_maxsz + pagepad_maxsz)
#define encode_readlink_maxsz (op_encode_hdr_maxsz)
-#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
+#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz)
#define encode_write_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 4)
#define decode_write_maxsz (op_decode_hdr_maxsz + \
@@ -284,14 +285,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
#define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
#define encode_getacl_maxsz (encode_getattr_maxsz)
#define decode_getacl_maxsz (op_decode_hdr_maxsz + \
- nfs4_fattr_bitmap_maxsz + 1 + 1)
+ nfs4_fattr_bitmap_maxsz + 1 + pagepad_maxsz)
#define encode_setacl_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 3)
#define decode_setacl_maxsz (decode_setattr_maxsz)
#define encode_fs_locations_maxsz \
(encode_getattr_maxsz)
#define decode_fs_locations_maxsz \
- (1)
+ (pagepad_maxsz)
#define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
@@ -393,12 +394,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
/* devaddr4 payload is read into page */ \
1 /* notification bitmap length */ + \
1 /* notification bitmap, word 0 */ + \
- 1 /* possible XDR padding */)
+ pagepad_maxsz /* possible XDR padding */)
#define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
encode_stateid_maxsz)
#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
decode_stateid_maxsz + \
- XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
+ XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + \
+ pagepad_maxsz)
#define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
2 /* offset */ + \
2 /* length */ + \
@@ -2342,7 +2344,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_layoutget(xdr, args->lg_args, &hdr);
rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0,
args->lg_args->layout.pglen,
- hdr.replen);
+ hdr.replen - pagepad_maxsz);
}
encode_nops(&hdr);
}
@@ -2388,7 +2390,7 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
encode_layoutget(xdr, args->lg_args, &hdr);
rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0,
args->lg_args->layout.pglen,
- hdr.replen);
+ hdr.replen - pagepad_maxsz);
}
encode_nops(&hdr);
}
@@ -2499,7 +2501,7 @@ static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_readlink(xdr, args, req, &hdr);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
- args->pglen, hdr.replen);
+ args->pglen, hdr.replen - pagepad_maxsz);
encode_nops(&hdr);
}
@@ -2520,7 +2522,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_readdir(xdr, args, req, &hdr);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
- args->count, hdr.replen);
+ args->count, hdr.replen - pagepad_maxsz);
encode_nops(&hdr);
}
@@ -2541,7 +2543,7 @@ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_read(xdr, args, &hdr);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
- args->count, hdr.replen);
+ args->count, hdr.replen - pagepad_maxsz);
req->rq_rcv_buf.flags |= XDRBUF_READ;
encode_nops(&hdr);
}
@@ -2588,7 +2590,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
ARRAY_SIZE(nfs4_acl_bitmap), &hdr);
rpc_prepare_reply_pages(req, args->acl_pages, 0,
- args->acl_len, replen + 1);
+ args->acl_len, replen);
encode_nops(&hdr);
}
@@ -2810,7 +2812,7 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
}
rpc_prepare_reply_pages(req, (struct page **)&args->page, 0,
- PAGE_SIZE, replen + 1);
+ PAGE_SIZE, replen);
encode_nops(&hdr);
}
@@ -3009,15 +3011,19 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
+ uint32_t replen;
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
+
+ replen = hdr.replen + op_decode_hdr_maxsz + 2;
+
encode_getdeviceinfo(xdr, args, &hdr);
- /* set up reply kvec. Subtract notification bitmap max size (2)
- * so that notification bitmap is put in xdr_buf tail */
+ /* set up reply kvec. device_addr4 opaque data is read into the
+ * pages */
rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase,
- args->pdev->pglen, hdr.replen - 2);
+ args->pdev->pglen, replen);
encode_nops(&hdr);
}
@@ -3039,7 +3045,7 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
encode_layoutget(xdr, args, &hdr);
rpc_prepare_reply_pages(req, args->layout.pages, 0,
- args->layout.pglen, hdr.replen);
+ args->layout.pglen, hdr.replen - pagepad_maxsz);
encode_nops(&hdr);
}
@@ -5331,11 +5337,11 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
res->acl_len = attrlen;
/* Check for receive buffer overflow */
- if (res->acl_len > (xdr->nwords << 2) ||
+ if (res->acl_len > xdr_stream_remaining(xdr) ||
res->acl_len + res->acl_data_offset > xdr->buf->page_len) {
res->acl_flags |= NFS4_ACL_TRUNC;
- dprintk("NFS: acl reply: attrlen %u > page_len %u\n",
- attrlen, xdr->nwords << 2);
+ dprintk("NFS: acl reply: attrlen %u > page_len %zu\n",
+ attrlen, xdr_stream_remaining(xdr));
}
} else
status = -EOPNOTSUPP;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0e50b9d45c32..07f59dc8cb2e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -294,6 +294,7 @@ void
pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
{
struct inode *inode;
+ unsigned long i_state;
if (!lo)
return;
@@ -304,8 +305,12 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
if (!list_empty(&lo->plh_segs))
WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
pnfs_detach_layout_hdr(lo);
+ i_state = inode->i_state;
spin_unlock(&inode->i_lock);
pnfs_free_layout_hdr(lo);
+ /* Notify pnfs_destroy_layout_final() that we're done */
+ if (i_state & (I_FREEING | I_CLEAR))
+ wake_up_var(lo);
}
}
@@ -734,8 +739,7 @@ pnfs_free_lseg_list(struct list_head *free_me)
}
}
-void
-pnfs_destroy_layout(struct nfs_inode *nfsi)
+static struct pnfs_layout_hdr *__pnfs_destroy_layout(struct nfs_inode *nfsi)
{
struct pnfs_layout_hdr *lo;
LIST_HEAD(tmp_list);
@@ -753,9 +757,34 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
pnfs_put_layout_hdr(lo);
} else
spin_unlock(&nfsi->vfs_inode.i_lock);
+ return lo;
+}
+
+void pnfs_destroy_layout(struct nfs_inode *nfsi)
+{
+ __pnfs_destroy_layout(nfsi);
}
EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
+static bool pnfs_layout_removed(struct nfs_inode *nfsi,
+ struct pnfs_layout_hdr *lo)
+{
+ bool ret;
+
+ spin_lock(&nfsi->vfs_inode.i_lock);
+ ret = nfsi->layout != lo;
+ spin_unlock(&nfsi->vfs_inode.i_lock);
+ return ret;
+}
+
+void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
+{
+ struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi);
+
+ if (lo)
+ wait_var_event(lo, pnfs_layout_removed(nfsi, lo));
+}
+
static bool
pnfs_layout_add_bulk_destroy_list(struct inode *inode,
struct list_head *layout_list)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 2661c44c62db..bbd3de1025f2 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -51,6 +51,8 @@ struct nfs4_pnfs_ds_addr {
size_t da_addrlen;
struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */
char *da_remotestr; /* human readable addr+port */
+ const char *da_netid;
+ int da_transport;
};
struct nfs4_pnfs_ds {
@@ -266,6 +268,7 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_layoutget_free(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
+void pnfs_destroy_layout_final(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
struct nfs_fsid *fsid,
@@ -710,6 +713,10 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
{
}
+static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
+{
+}
+
static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 537b80d693f1..ddbbf4fcda86 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -34,6 +34,8 @@
#include "internal.h"
#include "pnfs.h"
+#include "nfs4trace.h"
+
#define NFSDBG_FACILITY NFSDBG_PNFS
/*
@@ -192,24 +194,28 @@ nfs4_find_get_deviceid(struct nfs_server *server,
d = __nfs4_find_get_deviceid(server, id, hash);
if (d)
- return d;
+ goto found;
new = nfs4_get_device_info(server, id, cred, gfp_mask);
- if (!new)
+ if (!new) {
+ trace_nfs4_find_deviceid(server, id, -ENOENT);
return new;
+ }
spin_lock(&nfs4_deviceid_lock);
d = __nfs4_find_get_deviceid(server, id, hash);
if (d) {
spin_unlock(&nfs4_deviceid_lock);
server->pnfs_curr_ld->free_deviceid_node(new);
- return d;
+ } else {
+ atomic_inc(&new->ref);
+ hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
+ spin_unlock(&nfs4_deviceid_lock);
+ d = new;
}
- hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
- atomic_inc(&new->ref);
- spin_unlock(&nfs4_deviceid_lock);
-
- return new;
+found:
+ trace_nfs4_find_deviceid(server, id, 0);
+ return d;
}
EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
@@ -278,6 +284,7 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
}
if (!atomic_dec_and_test(&d->ref))
return false;
+ trace_nfs4_deviceid_free(d->nfs_client, &d->deviceid);
d->ld->free_deviceid_node(d);
return true;
}
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 679767ac258d..2efcfdd348a1 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -661,6 +661,21 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)
return NULL;
}
+static struct nfs4_pnfs_ds_addr *nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags)
+{
+ struct nfs4_pnfs_ds_addr *da = kzalloc(sizeof(*da), gfp_flags);
+ if (da)
+ INIT_LIST_HEAD(&da->da_node);
+ return da;
+}
+
+static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr *da)
+{
+ kfree(da->da_remotestr);
+ kfree(da->da_netid);
+ kfree(da);
+}
+
static void destroy_ds(struct nfs4_pnfs_ds *ds)
{
struct nfs4_pnfs_ds_addr *da;
@@ -676,8 +691,7 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds)
struct nfs4_pnfs_ds_addr,
da_node);
list_del_init(&da->da_node);
- kfree(da->da_remotestr);
- kfree(da);
+ nfs4_pnfs_ds_addr_free(da);
}
kfree(ds->ds_remotestr);
@@ -854,12 +868,17 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
if (!IS_ERR(clp)) {
struct xprt_create xprt_args = {
- .ident = XPRT_TRANSPORT_TCP,
+ .ident = da->da_transport,
.net = clp->cl_net,
.dstaddr = (struct sockaddr *)&da->da_addr,
.addrlen = da->da_addrlen,
.servername = clp->cl_hostname,
};
+
+ if (da->da_transport != clp->cl_proto)
+ continue;
+ if (da->da_addr.ss_family != clp->cl_addr.ss_family)
+ continue;
/* Add this address as an alias */
rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
rpc_clnt_test_and_add_xprt, NULL);
@@ -867,7 +886,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
}
clp = get_v3_ds_connect(mds_srv,
(struct sockaddr *)&da->da_addr,
- da->da_addrlen, IPPROTO_TCP,
+ da->da_addrlen, da->da_transport,
timeo, retrans);
if (IS_ERR(clp))
continue;
@@ -905,7 +924,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) {
struct xprt_create xprt_args = {
- .ident = XPRT_TRANSPORT_TCP,
+ .ident = da->da_transport,
.net = clp->cl_net,
.dstaddr = (struct sockaddr *)&da->da_addr,
.addrlen = da->da_addrlen,
@@ -913,17 +932,21 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
};
struct nfs4_add_xprt_data xprtdata = {
.clp = clp,
- .cred = nfs4_get_clid_cred(clp),
};
struct rpc_add_xprt_test rpcdata = {
.add_xprt_test = clp->cl_mvops->session_trunk,
.data = &xprtdata,
};
+ if (da->da_transport != clp->cl_proto)
+ continue;
+ if (da->da_addr.ss_family != clp->cl_addr.ss_family)
+ continue;
/**
* Test this address for session trunking and
* add as an alias
*/
+ xprtdata.cred = nfs4_get_clid_cred(clp),
rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
rpc_clnt_setup_test_and_add_xprt,
&rpcdata);
@@ -932,8 +955,9 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
} else {
clp = nfs4_set_ds_client(mds_srv,
(struct sockaddr *)&da->da_addr,
- da->da_addrlen, IPPROTO_TCP,
- timeo, retrans, minor_version);
+ da->da_addrlen,
+ da->da_transport, timeo,
+ retrans, minor_version);
if (IS_ERR(clp))
continue;
@@ -1021,55 +1045,26 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
struct nfs4_pnfs_ds_addr *da = NULL;
char *buf, *portstr;
__be16 port;
- int nlen, rlen;
+ ssize_t nlen, rlen;
int tmp[2];
- __be32 *p;
- char *netid, *match_netid;
- size_t len, match_netid_len;
+ char *netid;
+ size_t len;
char *startsep = "";
char *endsep = "";
/* r_netid */
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
+ nlen = xdr_stream_decode_string_dup(xdr, &netid, XDR_MAX_NETOBJ,
+ gfp_flags);
+ if (unlikely(nlen < 0))
goto out_err;
- nlen = be32_to_cpup(p++);
-
- p = xdr_inline_decode(xdr, nlen);
- if (unlikely(!p))
- goto out_err;
-
- netid = kmalloc(nlen+1, gfp_flags);
- if (unlikely(!netid))
- goto out_err;
-
- netid[nlen] = '\0';
- memcpy(netid, p, nlen);
/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- goto out_free_netid;
- rlen = be32_to_cpup(p);
-
- p = xdr_inline_decode(xdr, rlen);
- if (unlikely(!p))
- goto out_free_netid;
-
/* port is ".ABC.DEF", 8 chars max */
- if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
- dprintk("%s: Invalid address, length %d\n", __func__,
- rlen);
- goto out_free_netid;
- }
- buf = kmalloc(rlen + 1, gfp_flags);
- if (!buf) {
- dprintk("%s: Not enough memory\n", __func__);
+ rlen = xdr_stream_decode_string_dup(xdr, &buf, INET6_ADDRSTRLEN +
+ IPV6_SCOPE_ID_LEN + 8, gfp_flags);
+ if (unlikely(rlen < 0))
goto out_free_netid;
- }
- buf[rlen] = '\0';
- memcpy(buf, p, rlen);
/* replace port '.' with '-' */
portstr = strrchr(buf, '.');
@@ -1089,12 +1084,10 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
}
*portstr = '\0';
- da = kzalloc(sizeof(*da), gfp_flags);
+ da = nfs4_pnfs_ds_addr_alloc(gfp_flags);
if (unlikely(!da))
goto out_free_buf;
- INIT_LIST_HEAD(&da->da_node);
-
if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
sizeof(da->da_addr))) {
dprintk("%s: error parsing address %s\n", __func__, buf);
@@ -1109,15 +1102,11 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
case AF_INET:
((struct sockaddr_in *)&da->da_addr)->sin_port = port;
da->da_addrlen = sizeof(struct sockaddr_in);
- match_netid = "tcp";
- match_netid_len = 3;
break;
case AF_INET6:
((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
da->da_addrlen = sizeof(struct sockaddr_in6);
- match_netid = "tcp6";
- match_netid_len = 4;
startsep = "[";
endsep = "]";
break;
@@ -1128,12 +1117,15 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
goto out_free_da;
}
- if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
- dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
- __func__, netid, match_netid);
+ da->da_transport = xprt_find_transport_ident(netid);
+ if (da->da_transport < 0) {
+ dprintk("%s: ERROR: unknown r_netid \"%s\"\n",
+ __func__, netid);
goto out_free_da;
}
+ da->da_netid = netid;
+
/* save human readable address */
len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
da->da_remotestr = kzalloc(len, gfp_flags);
@@ -1145,7 +1137,6 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
kfree(buf);
- kfree(netid);
return da;
out_free_da:
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 15c865cc837f..73ab7c59d3a7 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -499,26 +499,26 @@ nfs_proc_rmdir(struct inode *dir, const struct qstr *name)
* sure it is syntactically correct; the entries itself are decoded
* from nfs_readdir by calling the decode_entry function directly.
*/
-static int
-nfs_proc_readdir(struct dentry *dentry, const struct cred *cred,
- u64 cookie, struct page **pages, unsigned int count, bool plus)
+static int nfs_proc_readdir(struct nfs_readdir_arg *nr_arg,
+ struct nfs_readdir_res *nr_res)
{
- struct inode *dir = d_inode(dentry);
+ struct inode *dir = d_inode(nr_arg->dentry);
struct nfs_readdirargs arg = {
.fh = NFS_FH(dir),
- .cookie = cookie,
- .count = count,
- .pages = pages,
+ .cookie = nr_arg->cookie,
+ .count = nr_arg->page_len,
+ .pages = nr_arg->pages,
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READDIR],
.rpc_argp = &arg,
- .rpc_cred = cred,
+ .rpc_cred = nr_arg->cred,
};
int status;
- dprintk("NFS call readdir %d\n", (unsigned int)cookie);
+ dprintk("NFS call readdir %llu\n", (unsigned long long)nr_arg->cookie);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ nr_res->verf[0] = nr_res->verf[1] = 0;
nfs_invalidate_atime(dir);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a2c6455ea3fa..681ed98e4ba8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -46,6 +46,11 @@
#define NFS_RPC_SWAPFLAGS (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS)
/*
+ * Size of the NFS directory verifier
+ */
+#define NFS_DIR_VERIFIER_SIZE 2
+
+/*
* NFSv3/v4 Access mode cache entry
*/
struct nfs_access_entry {
@@ -88,8 +93,8 @@ struct nfs_open_context {
struct nfs_open_dir_context {
struct list_head list;
- const struct cred *cred;
unsigned long attr_gencount;
+ __be32 verf[NFS_DIR_VERIFIER_SIZE];
__u64 dir_cookie;
__u64 dup_cookie;
signed char duped;
@@ -157,7 +162,7 @@ struct nfs_inode {
* This is the cookie verifier used for NFSv3 readdir
* operations
*/
- __be32 cookieverf[2];
+ __be32 cookieverf[NFS_DIR_VERIFIER_SIZE];
atomic_long_t nrequests;
struct nfs_mds_commit_info commit_info;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d63cb862d58e..3327239fa2f9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -750,6 +750,20 @@ struct nfs_entry {
struct nfs_server * server;
};
+struct nfs_readdir_arg {
+ struct dentry *dentry;
+ const struct cred *cred;
+ __be32 *verf;
+ u64 cookie;
+ struct page **pages;
+ unsigned int page_len;
+ bool plus;
+};
+
+struct nfs_readdir_res {
+ __be32 *verf;
+};
+
/*
* The following types are for NFSv2 only.
*/
@@ -1744,8 +1758,7 @@ struct nfs_rpc_ops {
unsigned int, struct iattr *);
int (*mkdir) (struct inode *, struct dentry *, struct iattr *);
int (*rmdir) (struct inode *, const struct qstr *);
- int (*readdir) (struct dentry *, const struct cred *,
- u64, struct page **, unsigned int, bool);
+ int (*readdir) (struct nfs_readdir_arg *, struct nfs_readdir_res *);
int (*mknod) (struct inode *, struct dentry *, struct iattr *,
dev_t);
int (*statfs) (struct nfs_server *, struct nfs_fh *,
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 9b35ce50cf2b..19b6dea27367 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -128,8 +128,8 @@ __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *);
void xdr_inline_pages(struct xdr_buf *, unsigned int,
struct page **, unsigned int, unsigned int);
-void xdr_terminate_string(struct xdr_buf *, const u32);
-size_t xdr_buf_pagecount(struct xdr_buf *buf);
+void xdr_terminate_string(const struct xdr_buf *, const u32);
+size_t xdr_buf_pagecount(const struct xdr_buf *buf);
int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp);
void xdr_free_bvec(struct xdr_buf *buf);
@@ -182,15 +182,14 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
* XDR buffer helper functions
*/
extern void xdr_shift_buf(struct xdr_buf *, size_t);
-extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
-extern int xdr_buf_subsegment(const struct xdr_buf *buf, struct xdr_buf *subbuf,
- unsigned int base, unsigned int len);
+extern void xdr_buf_from_iov(const struct kvec *, struct xdr_buf *);
+extern int xdr_buf_subsegment(const struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
extern void xdr_buf_trim(struct xdr_buf *, unsigned int);
-extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
-extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
+extern int read_bytes_from_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int);
+extern int write_bytes_to_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int);
-extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32);
-extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *);
+extern int xdr_encode_word(const struct xdr_buf *, unsigned int, u32);
+extern int xdr_decode_word(const struct xdr_buf *, unsigned int, u32 *);
struct xdr_array2_desc;
typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem);
@@ -201,9 +200,9 @@ struct xdr_array2_desc {
xdr_xcode_elem_t xcode;
};
-extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
+extern int xdr_decode_array2(const struct xdr_buf *buf, unsigned int base,
struct xdr_array2_desc *desc);
-extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
+extern int xdr_encode_array2(const struct xdr_buf *buf, unsigned int base,
struct xdr_array2_desc *desc);
extern void _copy_from_pages(char *p, struct page **pages, size_t pgbase,
size_t len);
@@ -251,9 +250,9 @@ extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
-extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
-extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t);
-extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t);
+extern int xdr_process_buf(const struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
+extern unsigned int xdr_align_data(struct xdr_stream *, unsigned int offset, unsigned int length);
+extern unsigned int xdr_expand_hole(struct xdr_stream *, unsigned int offset, unsigned int length);
extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf,
unsigned int len);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index a603d48d2b2c..d2e97ee802af 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -330,6 +330,7 @@ struct xprt_class {
struct rpc_xprt * (*setup)(struct xprt_create *);
struct module *owner;
char name[32];
+ const char * netid[];
};
/*
@@ -384,7 +385,7 @@ xprt_disable_swap(struct rpc_xprt *xprt)
*/
int xprt_register_transport(struct xprt_class *type);
int xprt_unregister_transport(struct xprt_class *type);
-int xprt_load_transport(const char *);
+int xprt_find_transport_ident(const char *);
void xprt_wait_for_reply_request_def(struct rpc_task *task);
void xprt_wait_for_reply_request_rtt(struct rpc_task *task);
void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 896aafc37b09..76e85e16854b 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -60,7 +60,7 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class,
), \
TP_ARGS(wc, cid))
-DECLARE_EVENT_CLASS(xprtrdma_reply_event,
+DECLARE_EVENT_CLASS(xprtrdma_reply_class,
TP_PROTO(
const struct rpcrdma_rep *rep
),
@@ -68,29 +68,30 @@ DECLARE_EVENT_CLASS(xprtrdma_reply_event,
TP_ARGS(rep),
TP_STRUCT__entry(
- __field(const void *, rep)
- __field(const void *, r_xprt)
__field(u32, xid)
__field(u32, version)
__field(u32, proc)
+ __string(addr, rpcrdma_addrstr(rep->rr_rxprt))
+ __string(port, rpcrdma_portstr(rep->rr_rxprt))
),
TP_fast_assign(
- __entry->rep = rep;
- __entry->r_xprt = rep->rr_rxprt;
__entry->xid = be32_to_cpu(rep->rr_xid);
__entry->version = be32_to_cpu(rep->rr_vers);
__entry->proc = be32_to_cpu(rep->rr_proc);
+ __assign_str(addr, rpcrdma_addrstr(rep->rr_rxprt));
+ __assign_str(port, rpcrdma_portstr(rep->rr_rxprt));
),
- TP_printk("rxprt %p xid=0x%08x rep=%p: version %u proc %u",
- __entry->r_xprt, __entry->xid, __entry->rep,
- __entry->version, __entry->proc
+ TP_printk("peer=[%s]:%s xid=0x%08x version=%u proc=%u",
+ __get_str(addr), __get_str(port),
+ __entry->xid, __entry->version, __entry->proc
)
);
#define DEFINE_REPLY_EVENT(name) \
- DEFINE_EVENT(xprtrdma_reply_event, name, \
+ DEFINE_EVENT(xprtrdma_reply_class, \
+ xprtrdma_reply_##name##_err, \
TP_PROTO( \
const struct rpcrdma_rep *rep \
), \
@@ -261,54 +262,67 @@ DECLARE_EVENT_CLASS(xprtrdma_wrch_event,
), \
TP_ARGS(task, mr, nsegs))
-DECLARE_EVENT_CLASS(xprtrdma_frwr_done,
+TRACE_DEFINE_ENUM(DMA_BIDIRECTIONAL);
+TRACE_DEFINE_ENUM(DMA_TO_DEVICE);
+TRACE_DEFINE_ENUM(DMA_FROM_DEVICE);
+TRACE_DEFINE_ENUM(DMA_NONE);
+
+#define xprtrdma_show_direction(x) \
+ __print_symbolic(x, \
+ { DMA_BIDIRECTIONAL, "BIDIR" }, \
+ { DMA_TO_DEVICE, "TO_DEVICE" }, \
+ { DMA_FROM_DEVICE, "FROM_DEVICE" }, \
+ { DMA_NONE, "NONE" })
+
+DECLARE_EVENT_CLASS(xprtrdma_mr_class,
TP_PROTO(
- const struct ib_wc *wc,
- const struct rpcrdma_frwr *frwr
+ const struct rpcrdma_mr *mr
),
- TP_ARGS(wc, frwr),
+ TP_ARGS(mr),
TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
__field(u32, mr_id)
- __field(unsigned int, status)
- __field(unsigned int, vendor_err)
+ __field(int, nents)
+ __field(u32, handle)
+ __field(u32, length)
+ __field(u64, offset)
+ __field(u32, dir)
),
TP_fast_assign(
- __entry->mr_id = frwr->fr_mr->res.id;
- __entry->status = wc->status;
- __entry->vendor_err = __entry->status ? wc->vendor_err : 0;
+ const struct rpcrdma_req *req = mr->mr_req;
+ const struct rpc_task *task = req->rl_slot.rq_task;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->nents = mr->mr_nents;
+ __entry->handle = mr->mr_handle;
+ __entry->length = mr->mr_length;
+ __entry->offset = mr->mr_offset;
+ __entry->dir = mr->mr_dir;
),
- TP_printk(
- "mr.id=%u: %s (%u/0x%x)",
- __entry->mr_id, rdma_show_wc_status(__entry->status),
- __entry->status, __entry->vendor_err
+ TP_printk("task:%u@%u mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s)",
+ __entry->task_id, __entry->client_id,
+ __entry->mr_id, __entry->nents, __entry->length,
+ (unsigned long long)__entry->offset, __entry->handle,
+ xprtrdma_show_direction(__entry->dir)
)
);
-#define DEFINE_FRWR_DONE_EVENT(name) \
- DEFINE_EVENT(xprtrdma_frwr_done, name, \
+#define DEFINE_MR_EVENT(name) \
+ DEFINE_EVENT(xprtrdma_mr_class, \
+ xprtrdma_mr_##name, \
TP_PROTO( \
- const struct ib_wc *wc, \
- const struct rpcrdma_frwr *frwr \
+ const struct rpcrdma_mr *mr \
), \
- TP_ARGS(wc, frwr))
-
-TRACE_DEFINE_ENUM(DMA_BIDIRECTIONAL);
-TRACE_DEFINE_ENUM(DMA_TO_DEVICE);
-TRACE_DEFINE_ENUM(DMA_FROM_DEVICE);
-TRACE_DEFINE_ENUM(DMA_NONE);
-
-#define xprtrdma_show_direction(x) \
- __print_symbolic(x, \
- { DMA_BIDIRECTIONAL, "BIDIR" }, \
- { DMA_TO_DEVICE, "TO_DEVICE" }, \
- { DMA_FROM_DEVICE, "FROM_DEVICE" }, \
- { DMA_NONE, "NONE" })
+ TP_ARGS(mr))
-DECLARE_EVENT_CLASS(xprtrdma_mr,
+DECLARE_EVENT_CLASS(xprtrdma_anonymous_mr_class,
TP_PROTO(
const struct rpcrdma_mr *mr
),
@@ -340,45 +354,47 @@ DECLARE_EVENT_CLASS(xprtrdma_mr,
)
);
-#define DEFINE_MR_EVENT(name) \
- DEFINE_EVENT(xprtrdma_mr, xprtrdma_mr_##name, \
- TP_PROTO( \
- const struct rpcrdma_mr *mr \
- ), \
+#define DEFINE_ANON_MR_EVENT(name) \
+ DEFINE_EVENT(xprtrdma_anonymous_mr_class, \
+ xprtrdma_mr_##name, \
+ TP_PROTO( \
+ const struct rpcrdma_mr *mr \
+ ), \
TP_ARGS(mr))
-DECLARE_EVENT_CLASS(xprtrdma_cb_event,
+DECLARE_EVENT_CLASS(xprtrdma_callback_class,
TP_PROTO(
+ const struct rpcrdma_xprt *r_xprt,
const struct rpc_rqst *rqst
),
- TP_ARGS(rqst),
+ TP_ARGS(r_xprt, rqst),
TP_STRUCT__entry(
- __field(const void *, rqst)
- __field(const void *, rep)
- __field(const void *, req)
__field(u32, xid)
+ __string(addr, rpcrdma_addrstr(r_xprt))
+ __string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
- __entry->rqst = rqst;
- __entry->req = rpcr_to_rdmar(rqst);
- __entry->rep = rpcr_to_rdmar(rqst)->rl_reply;
__entry->xid = be32_to_cpu(rqst->rq_xid);
+ __assign_str(addr, rpcrdma_addrstr(r_xprt));
+ __assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("xid=0x%08x, rqst=%p req=%p rep=%p",
- __entry->xid, __entry->rqst, __entry->req, __entry->rep
+ TP_printk("peer=[%s]:%s xid=0x%08x",
+ __get_str(addr), __get_str(port), __entry->xid
)
);
-#define DEFINE_CB_EVENT(name) \
- DEFINE_EVENT(xprtrdma_cb_event, name, \
+#define DEFINE_CALLBACK_EVENT(name) \
+ DEFINE_EVENT(xprtrdma_callback_class, \
+ xprtrdma_cb_##name, \
TP_PROTO( \
+ const struct rpcrdma_xprt *r_xprt, \
const struct rpc_rqst *rqst \
), \
- TP_ARGS(rqst))
+ TP_ARGS(r_xprt, rqst))
/**
** Connection events
@@ -549,61 +565,33 @@ TRACE_EVENT(xprtrdma_createmrs,
)
);
-TRACE_EVENT(xprtrdma_mr_get,
- TP_PROTO(
- const struct rpcrdma_req *req
- ),
-
- TP_ARGS(req),
-
- TP_STRUCT__entry(
- __field(const void *, req)
- __field(unsigned int, task_id)
- __field(unsigned int, client_id)
- __field(u32, xid)
- ),
-
- TP_fast_assign(
- const struct rpc_rqst *rqst = &req->rl_slot;
-
- __entry->req = req;
- __entry->task_id = rqst->rq_task->tk_pid;
- __entry->client_id = rqst->rq_task->tk_client->cl_clid;
- __entry->xid = be32_to_cpu(rqst->rq_xid);
- ),
-
- TP_printk("task:%u@%u xid=0x%08x req=%p",
- __entry->task_id, __entry->client_id, __entry->xid,
- __entry->req
- )
-);
-
-TRACE_EVENT(xprtrdma_nomrs,
+TRACE_EVENT(xprtrdma_nomrs_err,
TP_PROTO(
+ const struct rpcrdma_xprt *r_xprt,
const struct rpcrdma_req *req
),
- TP_ARGS(req),
+ TP_ARGS(r_xprt, req),
TP_STRUCT__entry(
- __field(const void *, req)
__field(unsigned int, task_id)
__field(unsigned int, client_id)
- __field(u32, xid)
+ __string(addr, rpcrdma_addrstr(r_xprt))
+ __string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
const struct rpc_rqst *rqst = &req->rl_slot;
- __entry->req = req;
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
- __entry->xid = be32_to_cpu(rqst->rq_xid);
+ __assign_str(addr, rpcrdma_addrstr(r_xprt));
+ __assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("task:%u@%u xid=0x%08x req=%p",
- __entry->task_id, __entry->client_id, __entry->xid,
- __entry->req
+ TP_printk("peer=[%s]:%s task:%u@%u",
+ __get_str(addr), __get_str(port),
+ __entry->task_id, __entry->client_id
)
);
@@ -735,8 +723,8 @@ TRACE_EVENT(xprtrdma_post_send,
TP_ARGS(req),
TP_STRUCT__entry(
- __field(const void *, req)
- __field(const void *, sc)
+ __field(u32, cq_id)
+ __field(int, completion_id)
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(int, num_sge)
@@ -745,20 +733,21 @@ TRACE_EVENT(xprtrdma_post_send,
TP_fast_assign(
const struct rpc_rqst *rqst = &req->rl_slot;
+ const struct rpcrdma_sendctx *sc = req->rl_sendctx;
+ __entry->cq_id = sc->sc_cid.ci_queue_id;
+ __entry->completion_id = sc->sc_cid.ci_completion_id;
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client ?
rqst->rq_task->tk_client->cl_clid : -1;
- __entry->req = req;
- __entry->sc = req->rl_sendctx;
__entry->num_sge = req->rl_wr.num_sge;
__entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED;
),
- TP_printk("task:%u@%u req=%p sc=%p (%d SGE%s) %s",
+ TP_printk("task:%u@%u cq.id=%u cid=%d (%d SGE%s) %s",
__entry->task_id, __entry->client_id,
- __entry->req, __entry->sc, __entry->num_sge,
- (__entry->num_sge == 1 ? "" : "s"),
+ __entry->cq_id, __entry->completion_id,
+ __entry->num_sge, (__entry->num_sge == 1 ? "" : "s"),
(__entry->signaled ? "signaled" : "")
)
);
@@ -771,15 +760,17 @@ TRACE_EVENT(xprtrdma_post_recv,
TP_ARGS(rep),
TP_STRUCT__entry(
- __field(const void *, rep)
+ __field(u32, cq_id)
+ __field(int, completion_id)
),
TP_fast_assign(
- __entry->rep = rep;
+ __entry->cq_id = rep->rr_cid.ci_queue_id;
+ __entry->completion_id = rep->rr_cid.ci_completion_id;
),
- TP_printk("rep=%p",
- __entry->rep
+ TP_printk("cq.id=%d cid=%d",
+ __entry->cq_id, __entry->completion_id
)
);
@@ -816,7 +807,7 @@ TRACE_EVENT(xprtrdma_post_recvs,
)
);
-TRACE_EVENT(xprtrdma_post_linv,
+TRACE_EVENT(xprtrdma_post_linv_err,
TP_PROTO(
const struct rpcrdma_req *req,
int status
@@ -825,19 +816,21 @@ TRACE_EVENT(xprtrdma_post_linv,
TP_ARGS(req, status),
TP_STRUCT__entry(
- __field(const void *, req)
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
__field(int, status)
- __field(u32, xid)
),
TP_fast_assign(
- __entry->req = req;
+ const struct rpc_task *task = req->rl_slot.rq_task;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
__entry->status = status;
- __entry->xid = be32_to_cpu(req->rl_slot.rq_xid);
),
- TP_printk("req=%p xid=0x%08x status=%d",
- __entry->req, __entry->xid, __entry->status
+ TP_printk("task:%u@%u status=%d",
+ __entry->task_id, __entry->client_id, __entry->status
)
);
@@ -845,75 +838,12 @@ TRACE_EVENT(xprtrdma_post_linv,
** Completion events
**/
-TRACE_EVENT(xprtrdma_wc_send,
- TP_PROTO(
- const struct rpcrdma_sendctx *sc,
- const struct ib_wc *wc
- ),
-
- TP_ARGS(sc, wc),
-
- TP_STRUCT__entry(
- __field(const void *, req)
- __field(const void *, sc)
- __field(unsigned int, unmap_count)
- __field(unsigned int, status)
- __field(unsigned int, vendor_err)
- ),
-
- TP_fast_assign(
- __entry->req = sc->sc_req;
- __entry->sc = sc;
- __entry->unmap_count = sc->sc_unmap_count;
- __entry->status = wc->status;
- __entry->vendor_err = __entry->status ? wc->vendor_err : 0;
- ),
-
- TP_printk("req=%p sc=%p unmapped=%u: %s (%u/0x%x)",
- __entry->req, __entry->sc, __entry->unmap_count,
- rdma_show_wc_status(__entry->status),
- __entry->status, __entry->vendor_err
- )
-);
-
-TRACE_EVENT(xprtrdma_wc_receive,
- TP_PROTO(
- const struct ib_wc *wc
- ),
-
- TP_ARGS(wc),
-
- TP_STRUCT__entry(
- __field(const void *, rep)
- __field(u32, byte_len)
- __field(unsigned int, status)
- __field(u32, vendor_err)
- ),
-
- TP_fast_assign(
- __entry->rep = container_of(wc->wr_cqe, struct rpcrdma_rep,
- rr_cqe);
- __entry->status = wc->status;
- if (wc->status) {
- __entry->byte_len = 0;
- __entry->vendor_err = wc->vendor_err;
- } else {
- __entry->byte_len = wc->byte_len;
- __entry->vendor_err = 0;
- }
- ),
-
- TP_printk("rep=%p %u bytes: %s (%u/0x%x)",
- __entry->rep, __entry->byte_len,
- rdma_show_wc_status(__entry->status),
- __entry->status, __entry->vendor_err
- )
-);
-
-DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_fastreg);
-DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li);
-DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake);
-DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_done);
+DEFINE_COMPLETION_EVENT(xprtrdma_wc_receive);
+DEFINE_COMPLETION_EVENT(xprtrdma_wc_send);
+DEFINE_COMPLETION_EVENT(xprtrdma_wc_fastreg);
+DEFINE_COMPLETION_EVENT(xprtrdma_wc_li);
+DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_wake);
+DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_done);
TRACE_EVENT(xprtrdma_frwr_alloc,
TP_PROTO(
@@ -1036,9 +966,9 @@ TRACE_EVENT(xprtrdma_frwr_maperr,
DEFINE_MR_EVENT(localinv);
DEFINE_MR_EVENT(map);
-DEFINE_MR_EVENT(unmap);
-DEFINE_MR_EVENT(reminv);
-DEFINE_MR_EVENT(recycle);
+
+DEFINE_ANON_MR_EVENT(unmap);
+DEFINE_ANON_MR_EVENT(recycle);
TRACE_EVENT(xprtrdma_dma_maperr,
TP_PROTO(
@@ -1066,17 +996,14 @@ TRACE_EVENT(xprtrdma_reply,
TP_PROTO(
const struct rpc_task *task,
const struct rpcrdma_rep *rep,
- const struct rpcrdma_req *req,
unsigned int credits
),
- TP_ARGS(task, rep, req, credits),
+ TP_ARGS(task, rep, credits),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
- __field(const void *, rep)
- __field(const void *, req)
__field(u32, xid)
__field(unsigned int, credits)
),
@@ -1084,49 +1011,102 @@ TRACE_EVENT(xprtrdma_reply,
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
- __entry->rep = rep;
- __entry->req = req;
__entry->xid = be32_to_cpu(rep->rr_xid);
__entry->credits = credits;
),
- TP_printk("task:%u@%u xid=0x%08x, %u credits, rep=%p -> req=%p",
+ TP_printk("task:%u@%u xid=0x%08x credits=%u",
__entry->task_id, __entry->client_id, __entry->xid,
- __entry->credits, __entry->rep, __entry->req
+ __entry->credits
)
);
-TRACE_EVENT(xprtrdma_defer_cmp,
+DEFINE_REPLY_EVENT(vers);
+DEFINE_REPLY_EVENT(rqst);
+DEFINE_REPLY_EVENT(short);
+DEFINE_REPLY_EVENT(hdr);
+
+TRACE_EVENT(xprtrdma_err_vers,
TP_PROTO(
- const struct rpcrdma_rep *rep
+ const struct rpc_rqst *rqst,
+ __be32 *min,
+ __be32 *max
),
- TP_ARGS(rep),
+ TP_ARGS(rqst, min, max),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
- __field(const void *, rep)
__field(u32, xid)
+ __field(u32, min)
+ __field(u32, max)
),
TP_fast_assign(
- __entry->task_id = rep->rr_rqst->rq_task->tk_pid;
- __entry->client_id = rep->rr_rqst->rq_task->tk_client->cl_clid;
- __entry->rep = rep;
- __entry->xid = be32_to_cpu(rep->rr_xid);
+ __entry->task_id = rqst->rq_task->tk_pid;
+ __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ __entry->min = be32_to_cpup(min);
+ __entry->max = be32_to_cpup(max);
),
- TP_printk("task:%u@%u xid=0x%08x rep=%p",
+ TP_printk("task:%u@%u xid=0x%08x versions=[%u, %u]",
__entry->task_id, __entry->client_id, __entry->xid,
- __entry->rep
+ __entry->min, __entry->max
+ )
+);
+
+TRACE_EVENT(xprtrdma_err_chunk,
+ TP_PROTO(
+ const struct rpc_rqst *rqst
+ ),
+
+ TP_ARGS(rqst),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = rqst->rq_task->tk_pid;
+ __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ ),
+
+ TP_printk("task:%u@%u xid=0x%08x",
+ __entry->task_id, __entry->client_id, __entry->xid
)
);
-DEFINE_REPLY_EVENT(xprtrdma_reply_vers);
-DEFINE_REPLY_EVENT(xprtrdma_reply_rqst);
-DEFINE_REPLY_EVENT(xprtrdma_reply_short);
-DEFINE_REPLY_EVENT(xprtrdma_reply_hdr);
+TRACE_EVENT(xprtrdma_err_unrecognized,
+ TP_PROTO(
+ const struct rpc_rqst *rqst,
+ __be32 *procedure
+ ),
+
+ TP_ARGS(rqst, procedure),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(u32, procedure)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = rqst->rq_task->tk_pid;
+ __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+ __entry->procedure = be32_to_cpup(procedure);
+ ),
+
+ TP_printk("task:%u@%u xid=0x%08x procedure=%u",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->procedure
+ )
+);
TRACE_EVENT(xprtrdma_fixup,
TP_PROTO(
@@ -1187,6 +1167,28 @@ TRACE_EVENT(xprtrdma_decode_seg,
)
);
+TRACE_EVENT(xprtrdma_mrs_zap,
+ TP_PROTO(
+ const struct rpc_task *task
+ ),
+
+ TP_ARGS(task),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ ),
+
+ TP_printk("task:%u@%u",
+ __entry->task_id, __entry->client_id
+ )
+);
+
/**
** Callback events
**/
@@ -1219,36 +1221,8 @@ TRACE_EVENT(xprtrdma_cb_setup,
)
);
-DEFINE_CB_EVENT(xprtrdma_cb_call);
-DEFINE_CB_EVENT(xprtrdma_cb_reply);
-
-TRACE_EVENT(xprtrdma_leaked_rep,
- TP_PROTO(
- const struct rpc_rqst *rqst,
- const struct rpcrdma_rep *rep
- ),
-
- TP_ARGS(rqst, rep),
-
- TP_STRUCT__entry(
- __field(unsigned int, task_id)
- __field(unsigned int, client_id)
- __field(u32, xid)
- __field(const void *, rep)
- ),
-
- TP_fast_assign(
- __entry->task_id = rqst->rq_task->tk_pid;
- __entry->client_id = rqst->rq_task->tk_client->cl_clid;
- __entry->xid = be32_to_cpu(rqst->rq_xid);
- __entry->rep = rep;
- ),
-
- TP_printk("task:%u@%u xid=0x%08x rep=%p",
- __entry->task_id, __entry->client_id, __entry->xid,
- __entry->rep
- )
-);
+DEFINE_CALLBACK_EVENT(call);
+DEFINE_CALLBACK_EVENT(reply);
/**
** Server-side RPC/RDMA events
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 3259120462ed..612f0a641f4c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1251,10 +1251,7 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
unsigned int base, unsigned int len,
unsigned int hdrsize)
{
- /* Subtract one to force an extra word of buffer space for the
- * payload's XDR pad to fall into the rcv_buf's tail iovec.
- */
- hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign - 1;
+ hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign;
xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
trace_rpc_xdr_reply_pages(req->rq_task, &req->rq_rcv_buf);
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index fd9bca242724..56029e3af6ff 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -128,13 +128,13 @@ static int do_xprt_debugfs(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *n
return 0;
len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
xprt->debugfs->d_name.name);
- if (len > sizeof(name))
+ if (len >= sizeof(name))
return -1;
if (*nump == 0)
strcpy(link, "xprt");
else {
len = snprintf(link, sizeof(link), "xprt%d", *nump);
- if (len > sizeof(link))
+ if (len >= sizeof(link))
return -1;
}
debugfs_create_symlink(link, clnt->cl_debugfs, name);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index f06d7c315017..cf702a5f7fe5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -676,6 +676,23 @@ struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue)
EXPORT_SYMBOL_GPL(rpc_wake_up_next);
/**
+ * rpc_wake_up_locked - wake up all rpc_tasks
+ * @queue: rpc_wait_queue on which the tasks are sleeping
+ *
+ */
+static void rpc_wake_up_locked(struct rpc_wait_queue *queue)
+{
+ struct rpc_task *task;
+
+ for (;;) {
+ task = __rpc_find_next_queued(queue);
+ if (task == NULL)
+ break;
+ rpc_wake_up_task_queue_locked(queue, task);
+ }
+}
+
+/**
* rpc_wake_up - wake up all rpc_tasks
* @queue: rpc_wait_queue on which the tasks are sleeping
*
@@ -683,25 +700,28 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next);
*/
void rpc_wake_up(struct rpc_wait_queue *queue)
{
- struct list_head *head;
-
spin_lock(&queue->lock);
- head = &queue->tasks[queue->maxpriority];
+ rpc_wake_up_locked(queue);
+ spin_unlock(&queue->lock);
+}
+EXPORT_SYMBOL_GPL(rpc_wake_up);
+
+/**
+ * rpc_wake_up_status_locked - wake up all rpc_tasks and set their status value.
+ * @queue: rpc_wait_queue on which the tasks are sleeping
+ * @status: status value to set
+ */
+static void rpc_wake_up_status_locked(struct rpc_wait_queue *queue, int status)
+{
+ struct rpc_task *task;
+
for (;;) {
- while (!list_empty(head)) {
- struct rpc_task *task;
- task = list_first_entry(head,
- struct rpc_task,
- u.tk_wait.list);
- rpc_wake_up_task_queue_locked(queue, task);
- }
- if (head == &queue->tasks[0])
+ task = __rpc_find_next_queued(queue);
+ if (task == NULL)
break;
- head--;
+ rpc_wake_up_task_queue_set_status_locked(queue, task, status);
}
- spin_unlock(&queue->lock);
}
-EXPORT_SYMBOL_GPL(rpc_wake_up);
/**
* rpc_wake_up_status - wake up all rpc_tasks and set their status value.
@@ -712,23 +732,8 @@ EXPORT_SYMBOL_GPL(rpc_wake_up);
*/
void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
{
- struct list_head *head;
-
spin_lock(&queue->lock);
- head = &queue->tasks[queue->maxpriority];
- for (;;) {
- while (!list_empty(head)) {
- struct rpc_task *task;
- task = list_first_entry(head,
- struct rpc_task,
- u.tk_wait.list);
- task->tk_status = status;
- rpc_wake_up_task_queue_locked(queue, task);
- }
- if (head == &queue->tasks[0])
- break;
- head--;
- }
+ rpc_wake_up_status_locked(queue, status);
spin_unlock(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up_status);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 757560a3b06b..3964ff74ee51 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -123,8 +123,7 @@ EXPORT_SYMBOL_GPL(xdr_decode_string_inplace);
* @len: length of string, in bytes
*
*/
-void
-xdr_terminate_string(struct xdr_buf *buf, const u32 len)
+void xdr_terminate_string(const struct xdr_buf *buf, const u32 len)
{
char *kaddr;
@@ -134,8 +133,7 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len)
}
EXPORT_SYMBOL_GPL(xdr_terminate_string);
-size_t
-xdr_buf_pagecount(struct xdr_buf *buf)
+size_t xdr_buf_pagecount(const struct xdr_buf *buf)
{
if (!buf->page_len)
return 0;
@@ -193,9 +191,6 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
tail->iov_base = buf + offset;
tail->iov_len = buflen - offset;
- if ((xdr->page_len & 3) == 0)
- tail->iov_len -= sizeof(__be32);
-
xdr->buflen += len;
}
EXPORT_SYMBOL_GPL(xdr_inline_pages);
@@ -228,6 +223,9 @@ _shift_data_left_pages(struct page **pages, size_t pgto_base,
BUG_ON(pgfrom_base <= pgto_base);
+ if (!len)
+ return;
+
pgto = pages + (pgto_base >> PAGE_SHIFT);
pgfrom = pages + (pgfrom_base >> PAGE_SHIFT);
@@ -266,26 +264,6 @@ _shift_data_left_pages(struct page **pages, size_t pgto_base,
} while ((len -= copy) != 0);
}
-static void
-_shift_data_left_tail(struct xdr_buf *buf, unsigned int pgto, size_t len)
-{
- struct kvec *tail = buf->tail;
-
- if (len > tail->iov_len)
- len = tail->iov_len;
-
- _copy_to_pages(buf->pages,
- buf->page_base + pgto,
- (char *)tail->iov_base,
- len);
- tail->iov_len -= len;
-
- if (tail->iov_len > 0)
- memmove((char *)tail->iov_base,
- tail->iov_base + len,
- tail->iov_len);
-}
-
/**
* _shift_data_right_pages
* @pages: vector of pages containing both the source and dest memory area.
@@ -310,6 +288,9 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
BUG_ON(pgto_base <= pgfrom_base);
+ if (!len)
+ return;
+
pgto_base += len;
pgfrom_base += len;
@@ -351,46 +332,6 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
} while ((len -= copy) != 0);
}
-static unsigned int
-_shift_data_right_tail(struct xdr_buf *buf, unsigned int pgfrom, size_t len)
-{
- struct kvec *tail = buf->tail;
- unsigned int tailbuf_len;
- unsigned int result = 0;
- size_t copy;
-
- tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len;
-
- /* Shift the tail first */
- if (tailbuf_len != 0) {
- unsigned int free_space = tailbuf_len - tail->iov_len;
-
- if (len < free_space)
- free_space = len;
- if (len > free_space)
- len = free_space;
-
- tail->iov_len += free_space;
- copy = len;
-
- if (tail->iov_len > len) {
- char *p = (char *)tail->iov_base + len;
- memmove(p, tail->iov_base, tail->iov_len - free_space);
- result += tail->iov_len - free_space;
- } else
- copy = tail->iov_len;
-
- /* Copy from the inlined pages into the tail */
- _copy_from_pages((char *)tail->iov_base,
- buf->pages,
- buf->page_base + pgfrom,
- copy);
- result += copy;
- }
-
- return result;
-}
-
/**
* _copy_to_pages
* @pages: array of pages
@@ -408,6 +349,9 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len)
char *vto;
size_t copy;
+ if (!len)
+ return;
+
pgto = pages + (pgbase >> PAGE_SHIFT);
pgbase &= ~PAGE_MASK;
@@ -452,6 +396,9 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
char *vfrom;
size_t copy;
+ if (!len)
+ return;
+
pgfrom = pages + (pgbase >> PAGE_SHIFT);
pgbase &= ~PAGE_MASK;
@@ -475,18 +422,42 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
}
EXPORT_SYMBOL_GPL(_copy_from_pages);
+static void xdr_buf_iov_zero(const struct kvec *iov, unsigned int base,
+ unsigned int len)
+{
+ if (base >= iov->iov_len)
+ return;
+ if (len > iov->iov_len - base)
+ len = iov->iov_len - base;
+ memset(iov->iov_base + base, 0, len);
+}
+
/**
- * _zero_pages
- * @pages: array of pages
- * @pgbase: beginning page vector address
+ * xdr_buf_pages_zero
+ * @buf: xdr_buf
+ * @pgbase: beginning offset
* @len: length
*/
-static void
-_zero_pages(struct page **pages, size_t pgbase, size_t len)
+static void xdr_buf_pages_zero(const struct xdr_buf *buf, unsigned int pgbase,
+ unsigned int len)
{
+ struct page **pages = buf->pages;
struct page **page;
char *vpage;
- size_t zero;
+ unsigned int zero;
+
+ if (!len)
+ return;
+ if (pgbase >= buf->page_len) {
+ xdr_buf_iov_zero(buf->tail, pgbase - buf->page_len, len);
+ return;
+ }
+ if (pgbase + len > buf->page_len) {
+ xdr_buf_iov_zero(buf->tail, 0, pgbase + len - buf->page_len);
+ len = buf->page_len - pgbase;
+ }
+
+ pgbase += buf->page_base;
page = pages + (pgbase >> PAGE_SHIFT);
pgbase &= ~PAGE_MASK;
@@ -507,122 +478,367 @@ _zero_pages(struct page **pages, size_t pgbase, size_t len)
} while ((len -= zero) != 0);
}
+static unsigned int xdr_buf_pages_fill_sparse(const struct xdr_buf *buf,
+ unsigned int buflen, gfp_t gfp)
+{
+ unsigned int i, npages, pagelen;
+
+ if (!(buf->flags & XDRBUF_SPARSE_PAGES))
+ return buflen;
+ if (buflen <= buf->head->iov_len)
+ return buflen;
+ pagelen = buflen - buf->head->iov_len;
+ if (pagelen > buf->page_len)
+ pagelen = buf->page_len;
+ npages = (pagelen + buf->page_base + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ for (i = 0; i < npages; i++) {
+ if (!buf->pages[i])
+ continue;
+ buf->pages[i] = alloc_page(gfp);
+ if (likely(buf->pages[i]))
+ continue;
+ buflen -= pagelen;
+ pagelen = i << PAGE_SHIFT;
+ if (pagelen > buf->page_base)
+ buflen += pagelen - buf->page_base;
+ break;
+ }
+ return buflen;
+}
+
+static void xdr_buf_try_expand(struct xdr_buf *buf, unsigned int len)
+{
+ struct kvec *head = buf->head;
+ struct kvec *tail = buf->tail;
+ unsigned int sum = head->iov_len + buf->page_len + tail->iov_len;
+ unsigned int free_space, newlen;
+
+ if (sum > buf->len) {
+ free_space = min_t(unsigned int, sum - buf->len, len);
+ newlen = xdr_buf_pages_fill_sparse(buf, buf->len + free_space,
+ GFP_KERNEL);
+ free_space = newlen - buf->len;
+ buf->len = newlen;
+ len -= free_space;
+ if (!len)
+ return;
+ }
+
+ if (buf->buflen > sum) {
+ /* Expand the tail buffer */
+ free_space = min_t(unsigned int, buf->buflen - sum, len);
+ tail->iov_len += free_space;
+ buf->len += free_space;
+ }
+}
+
+static void xdr_buf_tail_copy_right(const struct xdr_buf *buf,
+ unsigned int base, unsigned int len,
+ unsigned int shift)
+{
+ const struct kvec *tail = buf->tail;
+ unsigned int to = base + shift;
+
+ if (to >= tail->iov_len)
+ return;
+ if (len + to > tail->iov_len)
+ len = tail->iov_len - to;
+ memmove(tail->iov_base + to, tail->iov_base + base, len);
+}
+
+static void xdr_buf_pages_copy_right(const struct xdr_buf *buf,
+ unsigned int base, unsigned int len,
+ unsigned int shift)
+{
+ const struct kvec *tail = buf->tail;
+ unsigned int to = base + shift;
+ unsigned int pglen = 0;
+ unsigned int talen = 0, tato = 0;
+
+ if (base >= buf->page_len)
+ return;
+ if (len > buf->page_len - base)
+ len = buf->page_len - base;
+ if (to >= buf->page_len) {
+ tato = to - buf->page_len;
+ if (tail->iov_len >= len + tato)
+ talen = len;
+ else if (tail->iov_len > tato)
+ talen = tail->iov_len - tato;
+ } else if (len + to >= buf->page_len) {
+ pglen = buf->page_len - to;
+ talen = len - pglen;
+ if (talen > tail->iov_len)
+ talen = tail->iov_len;
+ } else
+ pglen = len;
+
+ _copy_from_pages(tail->iov_base + tato, buf->pages,
+ buf->page_base + base + pglen, talen);
+ _shift_data_right_pages(buf->pages, buf->page_base + to,
+ buf->page_base + base, pglen);
+}
+
+static void xdr_buf_head_copy_right(const struct xdr_buf *buf,
+ unsigned int base, unsigned int len,
+ unsigned int shift)
+{
+ const struct kvec *head = buf->head;
+ const struct kvec *tail = buf->tail;
+ unsigned int to = base + shift;
+ unsigned int pglen = 0, pgto = 0;
+ unsigned int talen = 0, tato = 0;
+
+ if (base >= head->iov_len)
+ return;
+ if (len > head->iov_len - base)
+ len = head->iov_len - base;
+ if (to >= buf->page_len + head->iov_len) {
+ tato = to - buf->page_len - head->iov_len;
+ talen = len;
+ } else if (to >= head->iov_len) {
+ pgto = to - head->iov_len;
+ pglen = len;
+ if (pgto + pglen > buf->page_len) {
+ talen = pgto + pglen - buf->page_len;
+ pglen -= talen;
+ }
+ } else {
+ pglen = len - to;
+ if (pglen > buf->page_len) {
+ talen = pglen - buf->page_len;
+ pglen = buf->page_len;
+ }
+ }
+
+ len -= talen;
+ base += len;
+ if (talen + tato > tail->iov_len)
+ talen = tail->iov_len > tato ? tail->iov_len - tato : 0;
+ memcpy(tail->iov_base + tato, head->iov_base + base, talen);
+
+ len -= pglen;
+ base -= pglen;
+ _copy_to_pages(buf->pages, buf->page_base + pgto, head->iov_base + base,
+ pglen);
+
+ base -= len;
+ memmove(head->iov_base + to, head->iov_base + base, len);
+}
+
+static void xdr_buf_tail_shift_right(const struct xdr_buf *buf,
+ unsigned int base, unsigned int len,
+ unsigned int shift)
+{
+ const struct kvec *tail = buf->tail;
+
+ if (base >= tail->iov_len || !shift || !len)
+ return;
+ xdr_buf_tail_copy_right(buf, base, len, shift);
+}
+
+static void xdr_buf_pages_shift_right(const struct xdr_buf *buf,
+ unsigned int base, unsigned int len,
+ unsigned int shift)
+{
+ if (!shift || !len)
+ return;
+ if (base >= buf->page_len) {
+ xdr_buf_tail_shift_right(buf, base - buf->page_len, len, shift);
+ return;
+ }
+ if (base + len > buf->page_len)
+ xdr_buf_tail_shift_right(buf, 0, base + len - buf->page_len,
+ shift);
+ xdr_buf_pages_copy_right(buf, base, len, shift);
+}
+
+static void xdr_buf_head_shift_right(const struct xdr_buf *buf,
+ unsigned int base, unsigned int len,
+ unsigned int shift)
+{
+ const struct kvec *head = buf->head;
+
+ if (!shift)
+ return;
+ if (base >= head->iov_len) {
+ xdr_buf_pages_shift_right(buf, head->iov_len - base, len,
+ shift);
+ return;
+ }
+ if (base + len > head->iov_len)
+ xdr_buf_pages_shift_right(buf, 0, base + len - head->iov_len,
+ shift);
+ xdr_buf_head_copy_right(buf, base, len, shift);
+}
+
+static void xdr_buf_tail_copy_left(const struct xdr_buf *buf, unsigned int base,
+ unsigned int len, unsigned int shift)
+{
+ const struct kvec *tail = buf->tail;
+
+ if (base >= tail->iov_len)
+ return;
+ if (len > tail->iov_len - base)
+ len = tail->iov_len - base;
+ /* Shift data into head */
+ if (shift > buf->page_len + base) {
+ const struct kvec *head = buf->head;
+ unsigned int hdto =
+ head->iov_len + buf->page_len + base - shift;
+ unsigned int hdlen = len;
+
+ if (WARN_ONCE(shift > head->iov_len + buf->page_len + base,
+ "SUNRPC: Misaligned data.\n"))
+ return;
+ if (hdto + hdlen > head->iov_len)
+ hdlen = head->iov_len - hdto;
+ memcpy(head->iov_base + hdto, tail->iov_base + base, hdlen);
+ base += hdlen;
+ len -= hdlen;
+ if (!len)
+ return;
+ }
+ /* Shift data into pages */
+ if (shift > base) {
+ unsigned int pgto = buf->page_len + base - shift;
+ unsigned int pglen = len;
+
+ if (pgto + pglen > buf->page_len)
+ pglen = buf->page_len - pgto;
+ _copy_to_pages(buf->pages, buf->page_base + pgto,
+ tail->iov_base + base, pglen);
+ base += pglen;
+ len -= pglen;
+ if (!len)
+ return;
+ }
+ memmove(tail->iov_base + base - shift, tail->iov_base + base, len);
+}
+
+static void xdr_buf_pages_copy_left(const struct xdr_buf *buf,
+ unsigned int base, unsigned int len,
+ unsigned int shift)
+{
+ unsigned int pgto;
+
+ if (base >= buf->page_len)
+ return;
+ if (len > buf->page_len - base)
+ len = buf->page_len - base;
+ /* Shift data into head */
+ if (shift > base) {
+ const struct kvec *head = buf->head;
+ unsigned int hdto = head->iov_len + base - shift;
+ unsigned int hdlen = len;
+
+ if (WARN_ONCE(shift > head->iov_len + base,
+ "SUNRPC: Misaligned data.\n"))
+ return;
+ if (hdto + hdlen > head->iov_len)
+ hdlen = head->iov_len - hdto;
+ _copy_from_pages(head->iov_base + hdto, buf->pages,
+ buf->page_base + base, hdlen);
+ base += hdlen;
+ len -= hdlen;
+ if (!len)
+ return;
+ }
+ pgto = base - shift;
+ _shift_data_left_pages(buf->pages, buf->page_base + pgto,
+ buf->page_base + base, len);
+}
+
+static void xdr_buf_tail_shift_left(const struct xdr_buf *buf,
+ unsigned int base, unsigned int len,
+ unsigned int shift)
+{
+ if (!shift || !len)
+ return;
+ xdr_buf_tail_copy_left(buf, base, len, shift);
+}
+
+static void xdr_buf_pages_shift_left(const struct xdr_buf *buf,
+ unsigned int base, unsigned int len,
+ unsigned int shift)
+{
+ if (!shift || !len)
+ return;
+ if (base >= buf->page_len) {
+ xdr_buf_tail_shift_left(buf, base - buf->page_len, len, shift);
+ return;
+ }
+ xdr_buf_pages_copy_left(buf, base, len, shift);
+ len += base;
+ if (len <= buf->page_len)
+ return;
+ xdr_buf_tail_copy_left(buf, 0, len - buf->page_len, shift);
+}
+
/**
* xdr_shrink_bufhead
* @buf: xdr_buf
- * @len: bytes to remove from buf->head[0]
+ * @len: new length of buf->head[0]
*
- * Shrinks XDR buffer's header kvec buf->head[0] by
+ * Shrinks XDR buffer's header kvec buf->head[0], setting it to
* 'len' bytes. The extra data is not lost, but is instead
* moved into the inlined pages and/or the tail.
*/
-static unsigned int
-xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
+static unsigned int xdr_shrink_bufhead(struct xdr_buf *buf, unsigned int len)
{
- struct kvec *head, *tail;
- size_t copy, offs;
- unsigned int pglen = buf->page_len;
- unsigned int result;
-
- result = 0;
- tail = buf->tail;
- head = buf->head;
+ struct kvec *head = buf->head;
+ unsigned int shift, buflen = max(buf->len, len);
WARN_ON_ONCE(len > head->iov_len);
- if (len > head->iov_len)
- len = head->iov_len;
-
- /* Shift the tail first */
- if (tail->iov_len != 0) {
- if (tail->iov_len > len) {
- copy = tail->iov_len - len;
- memmove((char *)tail->iov_base + len,
- tail->iov_base, copy);
- result += copy;
- }
- /* Copy from the inlined pages into the tail */
- copy = len;
- if (copy > pglen)
- copy = pglen;
- offs = len - copy;
- if (offs >= tail->iov_len)
- copy = 0;
- else if (copy > tail->iov_len - offs)
- copy = tail->iov_len - offs;
- if (copy != 0) {
- _copy_from_pages((char *)tail->iov_base + offs,
- buf->pages,
- buf->page_base + pglen + offs - len,
- copy);
- result += copy;
- }
- /* Do we also need to copy data from the head into the tail ? */
- if (len > pglen) {
- offs = copy = len - pglen;
- if (copy > tail->iov_len)
- copy = tail->iov_len;
- memcpy(tail->iov_base,
- (char *)head->iov_base +
- head->iov_len - offs,
- copy);
- result += copy;
- }
- }
- /* Now handle pages */
- if (pglen != 0) {
- if (pglen > len)
- _shift_data_right_pages(buf->pages,
- buf->page_base + len,
- buf->page_base,
- pglen - len);
- copy = len;
- if (len > pglen)
- copy = pglen;
- _copy_to_pages(buf->pages, buf->page_base,
- (char *)head->iov_base + head->iov_len - len,
- copy);
- result += copy;
+ if (head->iov_len > buflen) {
+ buf->buflen -= head->iov_len - buflen;
+ head->iov_len = buflen;
}
- head->iov_len -= len;
- buf->buflen -= len;
- /* Have we truncated the message? */
- if (buf->len > buf->buflen)
- buf->len = buf->buflen;
-
- return result;
+ if (len >= head->iov_len)
+ return 0;
+ shift = head->iov_len - len;
+ xdr_buf_try_expand(buf, shift);
+ xdr_buf_head_shift_right(buf, len, buflen - len, shift);
+ head->iov_len = len;
+ buf->buflen -= shift;
+ buf->len -= shift;
+ return shift;
}
/**
- * xdr_shrink_pagelen - shrinks buf->pages by up to @len bytes
+ * xdr_shrink_pagelen - shrinks buf->pages to @len bytes
* @buf: xdr_buf
- * @len: bytes to remove from buf->pages
+ * @len: new page buffer length
*
* The extra data is not lost, but is instead moved into buf->tail.
* Returns the actual number of bytes moved.
*/
-static unsigned int
-xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
+static unsigned int xdr_shrink_pagelen(struct xdr_buf *buf, unsigned int len)
{
- unsigned int pglen = buf->page_len;
- unsigned int result;
+ unsigned int shift, buflen = buf->len - buf->head->iov_len;
- if (len > buf->page_len)
- len = buf-> page_len;
-
- result = _shift_data_right_tail(buf, pglen - len, len);
- buf->page_len -= len;
- buf->buflen -= len;
- /* Have we truncated the message? */
- if (buf->len > buf->buflen)
- buf->len = buf->buflen;
-
- return result;
+ WARN_ON_ONCE(len > buf->page_len);
+ if (buf->head->iov_len >= buf->len || len > buflen)
+ buflen = len;
+ if (buf->page_len > buflen) {
+ buf->buflen -= buf->page_len - buflen;
+ buf->page_len = buflen;
+ }
+ if (len >= buf->page_len)
+ return 0;
+ shift = buf->page_len - len;
+ xdr_buf_try_expand(buf, shift);
+ xdr_buf_pages_shift_right(buf, len, buflen - len, shift);
+ buf->page_len = len;
+ buf->len -= shift;
+ buf->buflen -= shift;
+ return shift;
}
void
xdr_shift_buf(struct xdr_buf *buf, size_t len)
{
- xdr_shrink_bufhead(buf, len);
+ xdr_shrink_bufhead(buf, buf->head->iov_len - len);
}
EXPORT_SYMBOL_GPL(xdr_shift_buf);
@@ -636,6 +852,18 @@ unsigned int xdr_stream_pos(const struct xdr_stream *xdr)
}
EXPORT_SYMBOL_GPL(xdr_stream_pos);
+static void xdr_stream_set_pos(struct xdr_stream *xdr, unsigned int pos)
+{
+ unsigned int blen = xdr->buf->len;
+
+ xdr->nwords = blen > pos ? XDR_QUADLEN(blen) - XDR_QUADLEN(pos) : 0;
+}
+
+static void xdr_stream_page_set_pos(struct xdr_stream *xdr, unsigned int pos)
+{
+ xdr_stream_set_pos(xdr, pos + xdr->buf->head[0].iov_len);
+}
+
/**
* xdr_page_pos - Return the current offset from the start of the xdr pages
* @xdr: pointer to struct xdr_stream
@@ -969,19 +1197,31 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b
}
EXPORT_SYMBOL_GPL(xdr_write_pages);
-static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov,
- unsigned int len)
+static unsigned int xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov,
+ unsigned int base, unsigned int len)
{
if (len > iov->iov_len)
len = iov->iov_len;
- xdr->p = (__be32*)iov->iov_base;
+ if (unlikely(base > len))
+ base = len;
+ xdr->p = (__be32*)(iov->iov_base + base);
xdr->end = (__be32*)(iov->iov_base + len);
xdr->iov = iov;
xdr->page_ptr = NULL;
+ return len - base;
+}
+
+static unsigned int xdr_set_tail_base(struct xdr_stream *xdr,
+ unsigned int base, unsigned int len)
+{
+ struct xdr_buf *buf = xdr->buf;
+
+ xdr_stream_set_pos(xdr, base + buf->page_len + buf->head->iov_len);
+ return xdr_set_iov(xdr, buf->tail, base, len);
}
-static int xdr_set_page_base(struct xdr_stream *xdr,
- unsigned int base, unsigned int len)
+static unsigned int xdr_set_page_base(struct xdr_stream *xdr,
+ unsigned int base, unsigned int len)
{
unsigned int pgnr;
unsigned int maxlen;
@@ -990,12 +1230,15 @@ static int xdr_set_page_base(struct xdr_stream *xdr,
void *kaddr;
maxlen = xdr->buf->page_len;
- if (base >= maxlen)
- return -EINVAL;
- maxlen -= base;
+ if (base >= maxlen) {
+ base = maxlen;
+ maxlen = 0;
+ } else
+ maxlen -= base;
if (len > maxlen)
len = maxlen;
+ xdr_stream_page_set_pos(xdr, base);
base += xdr->buf->page_base;
pgnr = base >> PAGE_SHIFT;
@@ -1010,14 +1253,16 @@ static int xdr_set_page_base(struct xdr_stream *xdr,
pgend = PAGE_SIZE;
xdr->end = (__be32*)(kaddr + pgend);
xdr->iov = NULL;
- return 0;
+ return len;
}
static void xdr_set_page(struct xdr_stream *xdr, unsigned int base,
unsigned int len)
{
- if (xdr_set_page_base(xdr, base, len) < 0)
- xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2);
+ if (xdr_set_page_base(xdr, base, len) == 0) {
+ base -= xdr->buf->page_len;
+ xdr_set_tail_base(xdr, base, len);
+ }
}
static void xdr_set_next_page(struct xdr_stream *xdr)
@@ -1026,17 +1271,18 @@ static void xdr_set_next_page(struct xdr_stream *xdr)
newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT;
newbase -= xdr->buf->page_base;
-
- xdr_set_page(xdr, newbase, PAGE_SIZE);
+ if (newbase < xdr->buf->page_len)
+ xdr_set_page_base(xdr, newbase, xdr_stream_remaining(xdr));
+ else
+ xdr_set_tail_base(xdr, 0, xdr_stream_remaining(xdr));
}
static bool xdr_set_next_buffer(struct xdr_stream *xdr)
{
if (xdr->page_ptr != NULL)
xdr_set_next_page(xdr);
- else if (xdr->iov == xdr->buf->head) {
- xdr_set_page(xdr, 0, PAGE_SIZE);
- }
+ else if (xdr->iov == xdr->buf->head)
+ xdr_set_page(xdr, 0, xdr_stream_remaining(xdr));
return xdr->p != xdr->end;
}
@@ -1053,12 +1299,9 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
xdr->buf = buf;
xdr_reset_scratch_buffer(xdr);
xdr->nwords = XDR_QUADLEN(buf->len);
- if (buf->head[0].iov_len != 0)
- xdr_set_iov(xdr, buf->head, buf->len);
- else if (buf->page_len != 0)
- xdr_set_page_base(xdr, 0, buf->len);
- else
- xdr_set_iov(xdr, buf->head, buf->len);
+ if (xdr_set_iov(xdr, buf->head, 0, buf->len) == 0 &&
+ xdr_set_page_base(xdr, 0, buf->len) == 0)
+ xdr_set_iov(xdr, buf->tail, 0, buf->len);
if (p != NULL && p > xdr->p && xdr->end >= p) {
xdr->nwords -= p - xdr->p;
xdr->p = p;
@@ -1158,14 +1401,13 @@ static void xdr_realign_pages(struct xdr_stream *xdr)
struct xdr_buf *buf = xdr->buf;
struct kvec *iov = buf->head;
unsigned int cur = xdr_stream_pos(xdr);
- unsigned int copied, offset;
+ unsigned int copied;
/* Realign pages to current pointer position */
if (iov->iov_len > cur) {
- offset = iov->iov_len - cur;
- copied = xdr_shrink_bufhead(buf, offset);
- trace_rpc_xdr_alignment(xdr, offset, copied);
- xdr->nwords = XDR_QUADLEN(buf->len - cur);
+ copied = xdr_shrink_bufhead(buf, cur);
+ trace_rpc_xdr_alignment(xdr, cur, copied);
+ xdr_set_page(xdr, 0, buf->page_len);
}
}
@@ -1173,8 +1415,7 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
{
struct xdr_buf *buf = xdr->buf;
unsigned int nwords = XDR_QUADLEN(len);
- unsigned int cur = xdr_stream_pos(xdr);
- unsigned int copied, offset;
+ unsigned int copied;
if (xdr->nwords == 0)
return 0;
@@ -1188,125 +1429,103 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
len = buf->page_len;
else if (nwords < xdr->nwords) {
/* Truncate page data and move it into the tail */
- offset = buf->page_len - len;
- copied = xdr_shrink_pagelen(buf, offset);
- trace_rpc_xdr_alignment(xdr, offset, copied);
- xdr->nwords = XDR_QUADLEN(buf->len - cur);
+ copied = xdr_shrink_pagelen(buf, len);
+ trace_rpc_xdr_alignment(xdr, len, copied);
}
return len;
}
/**
- * xdr_read_pages - Ensure page-based XDR data to decode is aligned at current pointer position
+ * xdr_read_pages - align page-based XDR data to current pointer position
* @xdr: pointer to xdr_stream struct
* @len: number of bytes of page data
*
* Moves data beyond the current pointer position from the XDR head[] buffer
- * into the page list. Any data that lies beyond current position + "len"
- * bytes is moved into the XDR tail[].
+ * into the page list. Any data that lies beyond current position + @len
+ * bytes is moved into the XDR tail[]. The xdr_stream current position is
+ * then advanced past that data to align to the next XDR object in the tail.
*
* Returns the number of XDR encoded bytes now contained in the pages
*/
unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
{
- struct xdr_buf *buf = xdr->buf;
- struct kvec *iov;
- unsigned int nwords;
- unsigned int end;
- unsigned int padding;
+ unsigned int nwords = XDR_QUADLEN(len);
+ unsigned int base, end, pglen;
- len = xdr_align_pages(xdr, len);
- if (len == 0)
+ pglen = xdr_align_pages(xdr, nwords << 2);
+ if (pglen == 0)
return 0;
- nwords = XDR_QUADLEN(len);
- padding = (nwords << 2) - len;
- xdr->iov = iov = buf->tail;
- /* Compute remaining message length. */
- end = ((xdr->nwords - nwords) << 2) + padding;
- if (end > iov->iov_len)
- end = iov->iov_len;
- /*
- * Position current pointer at beginning of tail, and
- * set remaining message length.
- */
- xdr->p = (__be32 *)((char *)iov->iov_base + padding);
- xdr->end = (__be32 *)((char *)iov->iov_base + end);
- xdr->page_ptr = NULL;
- xdr->nwords = XDR_QUADLEN(end - padding);
- return len;
+ base = (nwords << 2) - pglen;
+ end = xdr_stream_remaining(xdr) - pglen;
+
+ xdr_set_tail_base(xdr, base, end);
+ return len <= pglen ? len : pglen;
}
EXPORT_SYMBOL_GPL(xdr_read_pages);
-uint64_t xdr_align_data(struct xdr_stream *xdr, uint64_t offset, uint32_t length)
+unsigned int xdr_align_data(struct xdr_stream *xdr, unsigned int offset,
+ unsigned int length)
{
struct xdr_buf *buf = xdr->buf;
- unsigned int from, bytes;
- unsigned int shift = 0;
-
- if ((offset + length) < offset ||
- (offset + length) > buf->page_len)
- length = buf->page_len - offset;
+ unsigned int from, bytes, len;
+ unsigned int shift;
xdr_realign_pages(xdr);
from = xdr_page_pos(xdr);
- bytes = xdr->nwords << 2;
- if (length < bytes)
- bytes = length;
+
+ if (from >= buf->page_len + buf->tail->iov_len)
+ return 0;
+ if (from + buf->head->iov_len >= buf->len)
+ return 0;
+
+ len = buf->len - buf->head->iov_len;
+
+ /* We only shift data left! */
+ if (WARN_ONCE(from < offset, "SUNRPC: misaligned data src=%u dst=%u\n",
+ from, offset))
+ return 0;
+ if (WARN_ONCE(offset > buf->page_len,
+ "SUNRPC: buffer overflow. offset=%u, page_len=%u\n",
+ offset, buf->page_len))
+ return 0;
/* Move page data to the left */
- if (from > offset) {
- shift = min_t(unsigned int, bytes, buf->page_len - from);
- _shift_data_left_pages(buf->pages,
- buf->page_base + offset,
- buf->page_base + from,
- shift);
- bytes -= shift;
-
- /* Move tail data into the pages, if necessary */
- if (bytes > 0)
- _shift_data_left_tail(buf, offset + shift, bytes);
- }
+ shift = from - offset;
+ xdr_buf_pages_shift_left(buf, from, len, shift);
+
+ bytes = xdr_stream_remaining(xdr);
+ if (length > bytes)
+ length = bytes;
+ bytes -= length;
- xdr->nwords -= XDR_QUADLEN(length);
- xdr_set_page(xdr, from + length, PAGE_SIZE);
+ xdr->buf->len -= shift;
+ xdr_set_page(xdr, offset + length, bytes);
return length;
}
EXPORT_SYMBOL_GPL(xdr_align_data);
-uint64_t xdr_expand_hole(struct xdr_stream *xdr, uint64_t offset, uint64_t length)
+unsigned int xdr_expand_hole(struct xdr_stream *xdr, unsigned int offset,
+ unsigned int length)
{
struct xdr_buf *buf = xdr->buf;
- unsigned int bytes;
- unsigned int from;
- unsigned int truncated = 0;
-
- if ((offset + length) < offset ||
- (offset + length) > buf->page_len)
- length = buf->page_len - offset;
+ unsigned int from, to, shift;
xdr_realign_pages(xdr);
from = xdr_page_pos(xdr);
- bytes = xdr->nwords << 2;
-
- if (offset + length + bytes > buf->page_len) {
- unsigned int shift = (offset + length + bytes) - buf->page_len;
- unsigned int res = _shift_data_right_tail(buf, from + bytes - shift, shift);
- truncated = shift - res;
- xdr->nwords -= XDR_QUADLEN(truncated);
- bytes -= shift;
- }
-
- /* Now move the page data over and zero pages */
- if (bytes > 0)
- _shift_data_right_pages(buf->pages,
- buf->page_base + offset + length,
- buf->page_base + from,
- bytes);
- _zero_pages(buf->pages, buf->page_base + offset, length);
+ to = xdr_align_size(offset + length);
+
+ /* Could the hole be behind us? */
+ if (to > from) {
+ unsigned int buflen = buf->len - buf->head->iov_len;
+ shift = to - from;
+ xdr_buf_try_expand(buf, shift);
+ xdr_buf_pages_shift_right(buf, from, buflen, shift);
+ xdr_set_page(xdr, to, xdr_stream_remaining(xdr));
+ } else if (to != from)
+ xdr_align_data(xdr, to, 0);
+ xdr_buf_pages_zero(buf, offset, length);
- buf->len += length - (from - offset) - truncated;
- xdr_set_page(xdr, offset + length, PAGE_SIZE);
return length;
}
EXPORT_SYMBOL_GPL(xdr_expand_hole);
@@ -1335,8 +1554,7 @@ EXPORT_SYMBOL_GPL(xdr_enter_page);
static const struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0};
-void
-xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf)
+void xdr_buf_from_iov(const struct kvec *iov, struct xdr_buf *buf)
{
buf->head[0] = *iov;
buf->tail[0] = empty_iov;
@@ -1493,7 +1711,8 @@ fix_len:
}
EXPORT_SYMBOL_GPL(xdr_buf_trim);
-static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
+static void __read_bytes_from_xdr_buf(const struct xdr_buf *subbuf,
+ void *obj, unsigned int len)
{
unsigned int this_len;
@@ -1502,8 +1721,7 @@ static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigne
len -= this_len;
obj += this_len;
this_len = min_t(unsigned int, len, subbuf->page_len);
- if (this_len)
- _copy_from_pages(obj, subbuf->pages, subbuf->page_base, this_len);
+ _copy_from_pages(obj, subbuf->pages, subbuf->page_base, this_len);
len -= this_len;
obj += this_len;
this_len = min_t(unsigned int, len, subbuf->tail[0].iov_len);
@@ -1511,7 +1729,8 @@ static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigne
}
/* obj is assumed to point to allocated memory of size at least len: */
-int read_bytes_from_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, unsigned int len)
+int read_bytes_from_xdr_buf(const struct xdr_buf *buf, unsigned int base,
+ void *obj, unsigned int len)
{
struct xdr_buf subbuf;
int status;
@@ -1524,7 +1743,8 @@ int read_bytes_from_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, u
}
EXPORT_SYMBOL_GPL(read_bytes_from_xdr_buf);
-static void __write_bytes_to_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
+static void __write_bytes_to_xdr_buf(const struct xdr_buf *subbuf,
+ void *obj, unsigned int len)
{
unsigned int this_len;
@@ -1533,8 +1753,7 @@ static void __write_bytes_to_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned
len -= this_len;
obj += this_len;
this_len = min_t(unsigned int, len, subbuf->page_len);
- if (this_len)
- _copy_to_pages(subbuf->pages, subbuf->page_base, obj, this_len);
+ _copy_to_pages(subbuf->pages, subbuf->page_base, obj, this_len);
len -= this_len;
obj += this_len;
this_len = min_t(unsigned int, len, subbuf->tail[0].iov_len);
@@ -1542,7 +1761,8 @@ static void __write_bytes_to_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned
}
/* obj is assumed to point to allocated memory of size at least len: */
-int write_bytes_to_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, unsigned int len)
+int write_bytes_to_xdr_buf(const struct xdr_buf *buf, unsigned int base,
+ void *obj, unsigned int len)
{
struct xdr_buf subbuf;
int status;
@@ -1555,8 +1775,7 @@ int write_bytes_to_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, un
}
EXPORT_SYMBOL_GPL(write_bytes_to_xdr_buf);
-int
-xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
+int xdr_decode_word(const struct xdr_buf *buf, unsigned int base, u32 *obj)
{
__be32 raw;
int status;
@@ -1569,8 +1788,7 @@ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
}
EXPORT_SYMBOL_GPL(xdr_decode_word);
-int
-xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
+int xdr_encode_word(const struct xdr_buf *buf, unsigned int base, u32 obj)
{
__be32 raw = cpu_to_be32(obj);
@@ -1579,9 +1797,8 @@ xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
EXPORT_SYMBOL_GPL(xdr_encode_word);
/* Returns 0 on success, or else a negative error code. */
-static int
-xdr_xcode_array2(struct xdr_buf *buf, unsigned int base,
- struct xdr_array2_desc *desc, int encode)
+static int xdr_xcode_array2(const struct xdr_buf *buf, unsigned int base,
+ struct xdr_array2_desc *desc, int encode)
{
char *elem = NULL, *c;
unsigned int copied = 0, todo, avail_here;
@@ -1773,9 +1990,8 @@ out:
return err;
}
-int
-xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
- struct xdr_array2_desc *desc)
+int xdr_decode_array2(const struct xdr_buf *buf, unsigned int base,
+ struct xdr_array2_desc *desc)
{
if (base >= buf->len)
return -EINVAL;
@@ -1784,9 +2000,8 @@ xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
}
EXPORT_SYMBOL_GPL(xdr_decode_array2);
-int
-xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
- struct xdr_array2_desc *desc)
+int xdr_encode_array2(const struct xdr_buf *buf, unsigned int base,
+ struct xdr_array2_desc *desc)
{
if ((unsigned long) base + 4 + desc->array_len * desc->elem_size >
buf->head->iov_len + buf->page_len + buf->tail->iov_len)
@@ -1796,9 +2011,9 @@ xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
}
EXPORT_SYMBOL_GPL(xdr_encode_array2);
-int
-xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
- int (*actor)(struct scatterlist *, void *), void *data)
+int xdr_process_buf(const struct xdr_buf *buf, unsigned int offset,
+ unsigned int len,
+ int (*actor)(struct scatterlist *, void *), void *data)
{
int i, ret = 0;
unsigned int page_len, thislen, page_offset;
@@ -1966,10 +2181,8 @@ ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen);
if (ret > 0) {
- char *s = kmalloc(ret + 1, gfp_flags);
+ char *s = kmemdup_nul(p, ret, gfp_flags);
if (s != NULL) {
- memcpy(s, p, ret);
- s[ret] = '\0';
*str = s;
return strlen(s);
}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f6c17e75f20e..691ccf8049a4 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -151,33 +151,94 @@ out:
}
EXPORT_SYMBOL_GPL(xprt_unregister_transport);
-/**
- * xprt_load_transport - load a transport implementation
- * @transport_name: transport to load
- *
- * Returns:
- * 0: transport successfully loaded
- * -ENOENT: transport module not available
- */
-int xprt_load_transport(const char *transport_name)
+static void
+xprt_class_release(const struct xprt_class *t)
{
- struct xprt_class *t;
- int result;
+ module_put(t->owner);
+}
+
+static const struct xprt_class *
+xprt_class_find_by_ident_locked(int ident)
+{
+ const struct xprt_class *t;
+
+ list_for_each_entry(t, &xprt_list, list) {
+ if (t->ident != ident)
+ continue;
+ if (!try_module_get(t->owner))
+ continue;
+ return t;
+ }
+ return NULL;
+}
+
+static const struct xprt_class *
+xprt_class_find_by_ident(int ident)
+{
+ const struct xprt_class *t;
- result = 0;
spin_lock(&xprt_list_lock);
+ t = xprt_class_find_by_ident_locked(ident);
+ spin_unlock(&xprt_list_lock);
+ return t;
+}
+
+static const struct xprt_class *
+xprt_class_find_by_netid_locked(const char *netid)
+{
+ const struct xprt_class *t;
+ unsigned int i;
+
list_for_each_entry(t, &xprt_list, list) {
- if (strcmp(t->name, transport_name) == 0) {
- spin_unlock(&xprt_list_lock);
- goto out;
+ for (i = 0; t->netid[i][0] != '\0'; i++) {
+ if (strcmp(t->netid[i], netid) != 0)
+ continue;
+ if (!try_module_get(t->owner))
+ continue;
+ return t;
}
}
+ return NULL;
+}
+
+static const struct xprt_class *
+xprt_class_find_by_netid(const char *netid)
+{
+ const struct xprt_class *t;
+
+ spin_lock(&xprt_list_lock);
+ t = xprt_class_find_by_netid_locked(netid);
+ if (!t) {
+ spin_unlock(&xprt_list_lock);
+ request_module("rpc%s", netid);
+ spin_lock(&xprt_list_lock);
+ t = xprt_class_find_by_netid_locked(netid);
+ }
spin_unlock(&xprt_list_lock);
- result = request_module("xprt%s", transport_name);
-out:
- return result;
+ return t;
+}
+
+/**
+ * xprt_find_transport_ident - convert a netid into a transport identifier
+ * @netid: transport to load
+ *
+ * Returns:
+ * > 0: transport identifier
+ * -ENOENT: transport module not available
+ */
+int xprt_find_transport_ident(const char *netid)
+{
+ const struct xprt_class *t;
+ int ret;
+
+ t = xprt_class_find_by_netid(netid);
+ if (!t)
+ return -ENOENT;
+ ret = t->ident;
+ xprt_class_release(t);
+ return ret;
}
-EXPORT_SYMBOL_GPL(xprt_load_transport);
+EXPORT_SYMBOL_GPL(xprt_find_transport_ident);
static void xprt_clear_locked(struct rpc_xprt *xprt)
{
@@ -1896,21 +1957,17 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
{
struct rpc_xprt *xprt;
- struct xprt_class *t;
+ const struct xprt_class *t;
- spin_lock(&xprt_list_lock);
- list_for_each_entry(t, &xprt_list, list) {
- if (t->ident == args->ident) {
- spin_unlock(&xprt_list_lock);
- goto found;
- }
+ t = xprt_class_find_by_ident(args->ident);
+ if (!t) {
+ dprintk("RPC: transport (%d) not supported\n", args->ident);
+ return ERR_PTR(-EIO);
}
- spin_unlock(&xprt_list_lock);
- dprintk("RPC: transport (%d) not supported\n", args->ident);
- return ERR_PTR(-EIO);
-found:
xprt = t->setup(args);
+ xprt_class_release(t);
+
if (IS_ERR(xprt))
goto out;
if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index c92c1aac270a..946edf2db646 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015-2020, Oracle and/or its affiliates.
*
* Support for backward direction RPCs on RPC/RDMA.
*/
@@ -82,7 +82,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
&rqst->rq_snd_buf, rpcrdma_noch_pullup))
return -EIO;
- trace_xprtrdma_cb_reply(rqst);
+ trace_xprtrdma_cb_reply(r_xprt, rqst);
return 0;
}
@@ -260,7 +260,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
*/
req = rpcr_to_rdmar(rqst);
req->rl_reply = rep;
- trace_xprtrdma_cb_call(rqst);
+ trace_xprtrdma_cb_call(r_xprt, rqst);
/* Queue rqst for ULP's callback service */
bc_serv = xprt->bc_serv;
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 44888f5badef..baca49fe83af 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -65,18 +65,23 @@ void frwr_release_mr(struct rpcrdma_mr *mr)
kfree(mr);
}
+static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
+{
+ if (mr->mr_device) {
+ trace_xprtrdma_mr_unmap(mr);
+ ib_dma_unmap_sg(mr->mr_device, mr->mr_sg, mr->mr_nents,
+ mr->mr_dir);
+ mr->mr_device = NULL;
+ }
+}
+
static void frwr_mr_recycle(struct rpcrdma_mr *mr)
{
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
trace_xprtrdma_mr_recycle(mr);
- if (mr->mr_dir != DMA_NONE) {
- trace_xprtrdma_mr_unmap(mr);
- ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device,
- mr->mr_sg, mr->mr_nents, mr->mr_dir);
- mr->mr_dir = DMA_NONE;
- }
+ frwr_mr_unmap(r_xprt, mr);
spin_lock(&r_xprt->rx_buf.rb_lock);
list_del(&mr->mr_all);
@@ -86,6 +91,16 @@ static void frwr_mr_recycle(struct rpcrdma_mr *mr)
frwr_release_mr(mr);
}
+static void frwr_mr_put(struct rpcrdma_mr *mr)
+{
+ frwr_mr_unmap(mr->mr_xprt, mr);
+
+ /* The MR is returned to the req's MR free list instead
+ * of to the xprt's MR free list. No spinlock is needed.
+ */
+ rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
+}
+
/* frwr_reset - Place MRs back on the free list
* @req: request to reset
*
@@ -101,7 +116,7 @@ void frwr_reset(struct rpcrdma_req *req)
struct rpcrdma_mr *mr;
while ((mr = rpcrdma_mr_pop(&req->rl_registered)))
- rpcrdma_mr_put(mr);
+ frwr_mr_put(mr);
}
/**
@@ -130,7 +145,7 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
mr->mr_xprt = r_xprt;
mr->frwr.fr_mr = frmr;
- mr->mr_dir = DMA_NONE;
+ mr->mr_device = NULL;
INIT_LIST_HEAD(&mr->mr_list);
init_completion(&mr->frwr.fr_linv_done);
@@ -315,6 +330,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
mr->mr_dir);
if (!dma_nents)
goto out_dmamap_err;
+ mr->mr_device = ep->re_id->device;
ibmr = mr->frwr.fr_mr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
@@ -341,7 +357,6 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
return seg;
out_dmamap_err:
- mr->mr_dir = DMA_NONE;
trace_xprtrdma_frwr_sgerr(mr, i);
return ERR_PTR(-EIO);
@@ -363,12 +378,21 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
container_of(cqe, struct rpcrdma_frwr, fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_fastreg(wc, frwr);
+ trace_xprtrdma_wc_fastreg(wc, &frwr->fr_cid);
/* The MR will get recycled when the associated req is retransmitted */
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
+static void frwr_cid_init(struct rpcrdma_ep *ep,
+ struct rpcrdma_frwr *frwr)
+{
+ struct rpc_rdma_cid *cid = &frwr->fr_cid;
+
+ cid->ci_queue_id = ep->re_attr.send_cq->res.id;
+ cid->ci_completion_id = frwr->fr_mr->res.id;
+}
+
/**
* frwr_send - post Send WRs containing the RPC Call message
* @r_xprt: controlling transport instance
@@ -385,6 +409,7 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
*/
int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
+ struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct ib_send_wr *post_wr;
struct rpcrdma_mr *mr;
@@ -395,6 +420,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
frwr = &mr->frwr;
frwr->fr_cqe.done = frwr_wc_fastreg;
+ frwr_cid_init(ep, frwr);
frwr->fr_regwr.wr.next = post_wr;
frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
frwr->fr_regwr.wr.num_sge = 0;
@@ -404,7 +430,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
post_wr = &frwr->fr_regwr.wr;
}
- return ib_post_send(r_xprt->rx_ep->re_id->qp, post_wr, NULL);
+ return ib_post_send(ep->re_id->qp, post_wr, NULL);
}
/**
@@ -420,18 +446,17 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
list_del_init(&mr->mr_list);
- trace_xprtrdma_mr_reminv(mr);
- rpcrdma_mr_put(mr);
+ frwr_mr_put(mr);
break; /* only one invalidated MR per RPC */
}
}
-static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr)
+static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
{
if (wc->status != IB_WC_SUCCESS)
frwr_mr_recycle(mr);
else
- rpcrdma_mr_put(mr);
+ frwr_mr_put(mr);
}
/**
@@ -448,8 +473,8 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li(wc, frwr);
- __frwr_release_mr(wc, mr);
+ trace_xprtrdma_wc_li(wc, &frwr->fr_cid);
+ frwr_mr_done(wc, mr);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
@@ -469,8 +494,8 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li_wake(wc, frwr);
- __frwr_release_mr(wc, mr);
+ trace_xprtrdma_wc_li_wake(wc, &frwr->fr_cid);
+ frwr_mr_done(wc, mr);
complete(&frwr->fr_linv_done);
rpcrdma_flush_disconnect(cq->cq_context, wc);
@@ -490,6 +515,7 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *first, **prev, *last;
+ struct rpcrdma_ep *ep = r_xprt->rx_ep;
const struct ib_send_wr *bad_wr;
struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
@@ -509,6 +535,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
frwr = &mr->frwr;
frwr->fr_cqe.done = frwr_wc_localinv;
+ frwr_cid_init(ep, frwr);
last = &frwr->fr_invwr;
last->next = NULL;
last->wr_cqe = &frwr->fr_cqe;
@@ -534,7 +561,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* unless re_id->qp is a valid pointer.
*/
bad_wr = NULL;
- rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);
+ rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
/* The final LOCAL_INV WR in the chain is supposed to
* do the wake. If it was never posted, the wake will
@@ -547,7 +574,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
/* Recycle MRs in the LOCAL_INV chain that did not get posted.
*/
- trace_xprtrdma_post_linv(req, rc);
+ trace_xprtrdma_post_linv_err(req, rc);
while (bad_wr) {
frwr = container_of(bad_wr, struct rpcrdma_frwr,
fr_invwr);
@@ -574,10 +601,10 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li_done(wc, frwr);
- __frwr_release_mr(wc, mr);
+ trace_xprtrdma_wc_li_done(wc, &frwr->fr_cid);
+ frwr_mr_done(wc, mr);
- /* Ensure @rep is generated before __frwr_release_mr */
+ /* Ensure @rep is generated before frwr_mr_done */
smp_rmb();
rpcrdma_complete_rqst(rep);
@@ -597,6 +624,7 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *first, *last, **prev;
+ struct rpcrdma_ep *ep = r_xprt->rx_ep;
const struct ib_send_wr *bad_wr;
struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
@@ -614,6 +642,7 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
frwr = &mr->frwr;
frwr->fr_cqe.done = frwr_wc_localinv;
+ frwr_cid_init(ep, frwr);
last = &frwr->fr_invwr;
last->next = NULL;
last->wr_cqe = &frwr->fr_cqe;
@@ -639,13 +668,13 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* unless re_id->qp is a valid pointer.
*/
bad_wr = NULL;
- rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);
+ rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
if (!rc)
return;
/* Recycle MRs in the LOCAL_INV chain that did not get posted.
*/
- trace_xprtrdma_post_linv(req, rc);
+ trace_xprtrdma_post_linv_err(req, rc);
while (bad_wr) {
frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
mr = container_of(frwr, struct rpcrdma_mr, frwr);
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index 620327c01302..45c5b41ac8dc 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -24,6 +24,7 @@ MODULE_DESCRIPTION("RPC/RDMA Transport");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("svcrdma");
MODULE_ALIAS("xprtrdma");
+MODULE_ALIAS("rpcrdma6");
static void __exit rpc_rdma_cleanup(void)
{
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 0f5120c7668f..8f5d0cb68360 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (c) 2014-2017 Oracle. All rights reserved.
+ * Copyright (c) 2014-2020, Oracle and/or its affiliates.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -179,6 +179,31 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
r_xprt->rx_ep->re_max_inline_recv;
}
+/* ACL likes to be lazy in allocating pages. For TCP, these
+ * pages can be allocated during receive processing. Not true
+ * for RDMA, which must always provision receive buffers
+ * up front.
+ */
+static noinline int
+rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
+{
+ struct page **ppages;
+ int len;
+
+ len = buf->page_len;
+ ppages = buf->pages + (buf->page_base >> PAGE_SHIFT);
+ while (len > 0) {
+ if (!*ppages)
+ *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
+ if (!*ppages)
+ return -ENOBUFS;
+ ppages++;
+ len -= PAGE_SIZE;
+ }
+
+ return 0;
+}
+
/* Split @vec on page boundaries into SGEs. FMR registers pages, not
* a byte range. Other modes coalesce these SGEs into a single MR
* when they can.
@@ -233,15 +258,6 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
page_base = offset_in_page(xdrbuf->page_base);
while (len) {
- /* ACL likes to be lazy in allocating pages - ACLs
- * are small by default but can get huge.
- */
- if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
- if (!*ppages)
- *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
- if (!*ppages)
- return -ENOBUFS;
- }
seg->mr_page = *ppages;
seg->mr_offset = (char *)page_base;
seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
@@ -315,7 +331,6 @@ static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
*mr = rpcrdma_mr_get(r_xprt);
if (!*mr)
goto out_getmr_err;
- trace_xprtrdma_mr_get(req);
(*mr)->mr_req = req;
}
@@ -323,7 +338,7 @@ static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
out_getmr_err:
- trace_xprtrdma_nomrs(req);
+ trace_xprtrdma_nomrs_err(r_xprt, req);
xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
rpcrdma_mrs_refresh(r_xprt);
return ERR_PTR(-EAGAIN);
@@ -867,6 +882,12 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
__be32 *p;
int ret;
+ if (unlikely(rqst->rq_rcv_buf.flags & XDRBUF_SPARSE_PAGES)) {
+ ret = rpcrdma_alloc_sparse_pages(&rqst->rq_rcv_buf);
+ if (ret)
+ return ret;
+ }
+
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
rqst);
@@ -1322,20 +1343,13 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
p = xdr_inline_decode(xdr, 2 * sizeof(*p));
if (!p)
break;
- dprintk("RPC: %s: server reports "
- "version error (%u-%u), xid %08x\n", __func__,
- be32_to_cpup(p), be32_to_cpu(*(p + 1)),
- be32_to_cpu(rep->rr_xid));
+ trace_xprtrdma_err_vers(rqst, p, p + 1);
break;
case err_chunk:
- dprintk("RPC: %s: server reports "
- "header decoding error, xid %08x\n", __func__,
- be32_to_cpu(rep->rr_xid));
+ trace_xprtrdma_err_chunk(rqst);
break;
default:
- dprintk("RPC: %s: server reports "
- "unrecognized error %d, xid %08x\n", __func__,
- be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
+ trace_xprtrdma_err_unrecognized(rqst, p);
}
return -EIO;
@@ -1376,7 +1390,7 @@ out:
return;
out_badheader:
- trace_xprtrdma_reply_hdr(rep);
+ trace_xprtrdma_reply_hdr_err(rep);
r_xprt->rx_stats.bad_reply_count++;
rqst->rq_task->tk_status = status;
status = 0;
@@ -1450,14 +1464,12 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
rpcrdma_post_recvs(r_xprt, false);
req = rpcr_to_rdmar(rqst);
- if (req->rl_reply) {
- trace_xprtrdma_leaked_rep(rqst, req->rl_reply);
+ if (unlikely(req->rl_reply))
rpcrdma_recv_buffer_put(req->rl_reply);
- }
req->rl_reply = rep;
rep->rr_rqst = rqst;
- trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
+ trace_xprtrdma_reply(rqst->rq_task, rep, credits);
if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
frwr_reminv(rep, &req->rl_registered);
@@ -1469,16 +1481,16 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
return;
out_badversion:
- trace_xprtrdma_reply_vers(rep);
+ trace_xprtrdma_reply_vers_err(rep);
goto out;
out_norqst:
spin_unlock(&xprt->queue_lock);
- trace_xprtrdma_reply_rqst(rep);
+ trace_xprtrdma_reply_rqst_err(rep);
goto out;
out_shortreply:
- trace_xprtrdma_reply_short(rep);
+ trace_xprtrdma_reply_short_err(rep);
out:
rpcrdma_recv_buffer_put(rep);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 8915e42240d3..78d29d1bcc20 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -599,11 +599,12 @@ static void
xprt_rdma_free(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- if (!list_empty(&req->rl_registered))
- frwr_unmap_sync(r_xprt, req);
+ if (unlikely(!list_empty(&req->rl_registered))) {
+ trace_xprtrdma_mrs_zap(task);
+ frwr_unmap_sync(rpcx_to_rdmax(rqst->rq_xprt), req);
+ }
/* XXX: If the RPC is completing because of a signal and
* not because a reply was received, we ought to ensure
@@ -768,6 +769,7 @@ static struct xprt_class xprt_rdma = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_RDMA,
.setup = xprt_setup_rdma,
+ .netid = { "rdma", "rdma6", "" },
};
void xprt_rdma_cleanup(void)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index ad6e2e4994ce..ec912cf9c618 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -167,7 +167,7 @@ static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_xprt *r_xprt = cq->cq_context;
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_send(sc, wc);
+ trace_xprtrdma_wc_send(wc, &sc->sc_cid);
rpcrdma_sendctx_put_locked(r_xprt, sc);
rpcrdma_flush_disconnect(r_xprt, wc);
}
@@ -186,7 +186,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_xprt *r_xprt = cq->cq_context;
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_receive(wc);
+ trace_xprtrdma_wc_receive(wc, &rep->rr_cid);
--r_xprt->rx_ep->re_receive_count;
if (wc->status != IB_WC_SUCCESS)
goto out_flushed;
@@ -643,6 +643,9 @@ static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep)
return NULL;
sc->sc_cqe.done = rpcrdma_wc_send;
+ sc->sc_cid.ci_queue_id = ep->re_attr.send_cq->res.id;
+ sc->sc_cid.ci_completion_id =
+ atomic_inc_return(&ep->re_completion_ids);
return sc;
}
@@ -972,6 +975,9 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
goto out_free_regbuf;
+ rep->rr_cid.ci_completion_id =
+ atomic_inc_return(&r_xprt->rx_ep->re_completion_ids);
+
xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf),
rdmab_length(rep->rr_rdmabuf));
rep->rr_cqe.done = rpcrdma_wc_receive;
@@ -1179,25 +1185,6 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
}
/**
- * rpcrdma_mr_put - DMA unmap an MR and release it
- * @mr: MR to release
- *
- */
-void rpcrdma_mr_put(struct rpcrdma_mr *mr)
-{
- struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-
- if (mr->mr_dir != DMA_NONE) {
- trace_xprtrdma_mr_unmap(mr);
- ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device,
- mr->mr_sg, mr->mr_nents, mr->mr_dir);
- mr->mr_dir = DMA_NONE;
- }
-
- rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
-}
-
-/**
* rpcrdma_buffer_get - Get a request buffer
* @buffers: Buffer pool from which to obtain a buffer
*
@@ -1411,6 +1398,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
if (!rep)
break;
+ rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id;
trace_xprtrdma_post_recv(rep);
rep->rr_recv_wr.next = wr;
wr = &rep->rr_recv_wr;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 43974ef39a50..94b28657aeeb 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -53,6 +53,7 @@
#include <rdma/ib_verbs.h> /* RDMA verbs api */
#include <linux/sunrpc/clnt.h> /* rpc_xprt */
+#include <linux/sunrpc/rpc_rdma_cid.h> /* completion IDs */
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
@@ -93,6 +94,8 @@ struct rpcrdma_ep {
unsigned int re_max_requests; /* depends on device */
unsigned int re_inline_send; /* negotiated */
unsigned int re_inline_recv; /* negotiated */
+
+ atomic_t re_completion_ids;
};
/* Pre-allocate extra Work Requests for handling backward receives
@@ -180,6 +183,8 @@ enum {
struct rpcrdma_rep {
struct ib_cqe rr_cqe;
+ struct rpc_rdma_cid rr_cid;
+
__be32 rr_xid;
__be32 rr_vers;
__be32 rr_proc;
@@ -211,6 +216,7 @@ enum {
struct rpcrdma_req;
struct rpcrdma_sendctx {
struct ib_cqe sc_cqe;
+ struct rpc_rdma_cid sc_cid;
struct rpcrdma_req *sc_req;
unsigned int sc_unmap_count;
struct ib_sge sc_sges[];
@@ -225,6 +231,7 @@ struct rpcrdma_sendctx {
struct rpcrdma_frwr {
struct ib_mr *fr_mr;
struct ib_cqe fr_cqe;
+ struct rpc_rdma_cid fr_cid;
struct completion fr_linv_done;
union {
struct ib_reg_wr fr_regwr;
@@ -236,6 +243,7 @@ struct rpcrdma_req;
struct rpcrdma_mr {
struct list_head mr_list;
struct rpcrdma_req *mr_req;
+ struct ib_device *mr_device;
struct scatterlist *mr_sg;
int mr_nents;
enum dma_data_direction mr_dir;
@@ -466,7 +474,6 @@ void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
-void rpcrdma_mr_put(struct rpcrdma_mr *mr);
void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7090bbee0ec5..c56a66cdf4ac 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -433,7 +433,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (ret <= 0)
goto sock_err;
xs_flush_bvec(buf->bvec, ret, seek + buf->page_base);
- offset += ret - buf->page_base;
+ ret -= buf->page_base;
+ offset += ret;
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
@@ -3059,6 +3060,7 @@ static struct xprt_class xs_local_transport = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_LOCAL,
.setup = xs_setup_local,
+ .netid = { "" },
};
static struct xprt_class xs_udp_transport = {
@@ -3067,6 +3069,7 @@ static struct xprt_class xs_udp_transport = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_UDP,
.setup = xs_setup_udp,
+ .netid = { "udp", "udp6", "" },
};
static struct xprt_class xs_tcp_transport = {
@@ -3075,6 +3078,7 @@ static struct xprt_class xs_tcp_transport = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_TCP,
.setup = xs_setup_tcp,
+ .netid = { "tcp", "tcp6", "" },
};
static struct xprt_class xs_bc_tcp_transport = {
@@ -3083,6 +3087,7 @@ static struct xprt_class xs_bc_tcp_transport = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_BC_TCP,
.setup = xs_setup_bc_tcp,
+ .netid = { "" },
};
/**